def get_posts_by_hashtag(tag, number): ins_crawler = InsCrawler() return ins_crawler.get_latest_posts_by_tag(tag, number)
def get_posts_by_hashtag(tag, number, debug): ins_crawler = InsCrawler(has_screen=debug) return ins_crawler.get_latest_posts_by_tag(tag, number)
target_path = 'result_tag' debug = False current_timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') target_img_path = os.path.join(target_path, '%s_%s' % (tag, current_timestamp)) output_filename = '%s_%s.csv' % (tag, current_timestamp) output_path = os.path.join(target_path, output_filename) os.makedirs(target_path, exist_ok=True) os.makedirs(target_img_path, exist_ok=True) ins_crawler = InsCrawler(has_screen=debug) results = ins_crawler.get_latest_posts_by_tag(tag, number) print('[*] %d results' % len(results)) df = pd.DataFrame(columns=['key', 'caption', 'img_url']) for result in results: # key, caption, img_url if '1 person' in result['caption'] and 'closeup' in result['caption']: parsed = urlparse(result['img_url']) filename = parsed.path.split('/')[-1] result['filename'] = filename urllib.request.urlretrieve(result['img_url'], os.path.join(target_img_path, filename))