예제 #1
0
def get_posts_by_hashtag(tag, number):
    ins_crawler = InsCrawler()
    return ins_crawler.get_latest_posts_by_tag(tag, number)
예제 #2
0
def get_posts_by_hashtag(tag, number, debug):
    ins_crawler = InsCrawler(has_screen=debug)
    return ins_crawler.get_latest_posts_by_tag(tag, number)
예제 #3
0
target_path = 'result_tag'
debug = False

current_timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

target_img_path = os.path.join(target_path, '%s_%s' % (tag, current_timestamp))
output_filename = '%s_%s.csv' % (tag, current_timestamp)
output_path = os.path.join(target_path, output_filename)

os.makedirs(target_path, exist_ok=True)
os.makedirs(target_img_path, exist_ok=True)

ins_crawler = InsCrawler(has_screen=debug)

results = ins_crawler.get_latest_posts_by_tag(tag, number)

print('[*] %d results' % len(results))

df = pd.DataFrame(columns=['key', 'caption', 'img_url'])

for result in results:
    # key, caption, img_url
    if '1 person' in result['caption'] and 'closeup' in result['caption']:
        parsed = urlparse(result['img_url'])
        filename = parsed.path.split('/')[-1]
        result['filename'] = filename

        urllib.request.urlretrieve(result['img_url'],
                                   os.path.join(target_img_path, filename))