def get_posts_by_user(username, number, detail, debug): if username: ins_crawler = InsCrawler(has_screen=debug) if settings.login: ins_crawler.login() return ins_crawler.get_user_posts(username, number, detail) else: pass
def check_targets(debug, threads_number): if not threads_number: threads_number = 4 targets_list = get_unchecked_targets(threads_number) for hits in targets_list: ins_crawler = InsCrawler(has_screen=debug) if settings.login: ins_crawler.login() Thread(target=ins_crawler.check_targets, args=(hits, )).start()
def get_popular_users(starting_user, debug, threads_number): if not threads_number: threads_number = 4 users_list = get_unchecked_profiles(threads_number) for hits in users_list: ins_crawler = InsCrawler(has_screen=debug) if settings.login: ins_crawler.login() Thread(target=ins_crawler.check_popular_profiles_elastic, args=(hits, )).start()
def get_hashtags_by_post_key(post_key, debug, number): if len(post_key) == 0: return [] ins_crawler = InsCrawler(has_screen=debug) result = [] progress_bar = tqdm(total=len(post_key)) progress_bar.set_description("fetching_2_" + str(number)) for key in post_key: result.append(ins_crawler.fetch_post(key)) progress_bar.update(1) return result
def get_network_by_username(username, depth, debug): ins_crawler = InsCrawler(has_screen=debug) ins_crawler.login() return ins_crawler.get_network_by_username(username, depth)
def get_profile_from_script(username): ins_cralwer = InsCrawler() return ins_cralwer.get_user_profile_from_script_shared_data(username)
def get_posts_by_user(username, number, detail, debug): ins_crawler = InsCrawler(has_screen=debug) return ins_crawler.get_user_posts(username, number, detail)
number = 999 target_path = 'result_username' debug = False current_timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') target_img_path = os.path.join(target_path, '%s_%s' % (username, current_timestamp)) output_filename = '%s_%s.csv' % (username, current_timestamp) output_path = os.path.join(target_path, output_filename) os.makedirs(target_path, exist_ok=True) os.makedirs(target_img_path, exist_ok=True) ins_crawler = InsCrawler(has_screen=debug) results = ins_crawler.get_user_posts(username, number, detail=False) print('[*] %d results' % len(results)) df = pd.DataFrame(columns=['key', 'caption', 'img_url']) for result in results: # key, caption, img_url if '1 person' in result['caption'] and 'closeup' in result['caption']: parsed = urlparse(result['img_url']) filename = parsed.path.split('/')[-1] result['filename'] = filename urllib.request.urlretrieve(result['img_url'],
def get_profile(username, debug=False, follow_list_enabled=False): ins_crawler = InsCrawler(has_screen=debug) ins_crawler.login() return ins_crawler.get_user_profile(username, follow_list_enabled)
logger = logging.getLogger(__name__) if args.mode in ["posts", "posts_full"]: arg_required("username") posts = get_post_full(args.username, args.number, args.debug) output( posts, args.output, ) elif args.mode == "profile": arg_required("username") ins_crawler = InsCrawler(has_screen=args.debug) ins_crawler.login() profile = ins_crawler.get_user_profile(args.username, True) profile['capture_time'] = int(datetime.now().timestamp()) output(profile, args.output) persist = Persist() profile["username"] = args.username try: persist.persistProfile(profile) except: persist.db.rollback() id_profile = persist.getUserIdByUsername(args.username) if id_profile is None: logger.error( 'The profile of specified username does not exist')
def get_user_posts_by_tags(tag, number): ins_crawler = InsCrawler() return ins_crawler.get_user_posts_from_tag(tag, number)
import json import boto3 import os from elasticsearch import Elasticsearch from inscrawler import InsCrawler from inscrawler.settings import settings from dynamodb_json import json_util as dynamo_json ins_crawler = InsCrawler() dynamodb = boto3.client('dynamodb') es = Elasticsearch([os.environ['ES_DOMAIN']]) """ Triggered when DynamoHook exist and crawl website """ def crawlUserProfile(event, context): event_details = json.loads(json.dumps(event['Records'][0]['dynamodb'])) converted_table = dynamo_json.loads(event_details['NewImage']) username = converted_table['username'] crawled_username = ins_crawler.get_user_profile(username) setattr(settings, "fetch_details", True) crawled_media = ins_crawler.get_user_posts(username, number=1) captions = [] # timestamp is missing from crawl locations = [] for post in crawled_media: captions.append({
tag = '증명사진' number = 999 target_path = 'result_tag' debug = False current_timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') target_img_path = os.path.join(target_path, '%s_%s' % (tag, current_timestamp)) output_filename = '%s_%s.csv' % (tag, current_timestamp) output_path = os.path.join(target_path, output_filename) os.makedirs(target_path, exist_ok=True) os.makedirs(target_img_path, exist_ok=True) ins_crawler = InsCrawler(has_screen=debug) results = ins_crawler.get_latest_posts_by_tag(tag, number) print('[*] %d results' % len(results)) df = pd.DataFrame(columns=['key', 'caption', 'img_url']) for result in results: # key, caption, img_url if '1 person' in result['caption'] and 'closeup' in result['caption']: parsed = urlparse(result['img_url']) filename = parsed.path.split('/')[-1] result['filename'] = filename urllib.request.urlretrieve(result['img_url'],
def get_profile(username, debug=False): ins_crawler = InsCrawler(debug) return ins_crawler.get_user_profile(username)
def output_posts_info_from_list(filename: str): with open(filename, 'rb') as r: posts_list = json.load(r) ins_crawler = InsCrawler() return ins_crawler.get_posts_info_from_list(posts_list)
def get_posts_by_user(debug): ins_crawler = InsCrawler(has_screen=debug) return ins_crawler.get_user_posts()
from inscrawler import InsCrawler import argparse from multiprocessing import Process def usage(): return ''' python crawler.py [tag] ''' if __name__ == '__main__': index = 1 parser = argparse.ArgumentParser(description='Instagram Explore Crawler', usage=usage()) parser.add_argument('-n', '--number', type=int, default=100, help='number of posts to crawling') args = parser.parse_args() ins_crawler = InsCrawler(has_screen=True) ins_crawler.get_explorePosts1(maximum=args.number, index=index)
import argparse from inscrawler import InsCrawler def usage(): return ''' python crawler.py [tag] ''' if __name__ == '__main__': parser = argparse.ArgumentParser(description='Instagram Liker', usage=usage()) parser.add_argument('hashtag', help='hashtag name') parser.add_argument('-n', '--number', type=int, default=1000, help='number of posts to like') args = parser.parse_args() ins_crawler = InsCrawler(has_screen=True) ins_crawler.auto_like(tag=args.hashtag, maximum=args.number)
def get_posts_by_user(username, number): ins_crawler = InsCrawler() return ins_crawler.get_user_posts(username, number)
def get_urls_by_hashtag(tag, number, debug, filepath): ins_crawler = InsCrawler(has_screen=debug) return ins_crawler.get_urls_posts_by_tag(tag, number, filepath)
def get_posts_by_user(username, number, detail, debug, ins_crawler=None): if ins_crawler is None: ins_crawler = InsCrawler(has_screen=debug) ins_crawler.login() return ins_crawler.get_user_posts(username, number, detail)
def get_postnum_by_hashtag(tag, debug): ins_crawler = InsCrawler(has_screen=debug) return ins_crawler.fetch_hashtag_articles(tag)
def comment(post_url, content): ins_crawler = InsCrawler() ins_crawler.comment_post(post_url, content)
def get_key_by_hashtag(tag, debug): ins_crawler = InsCrawler(has_screen=debug) return ins_crawler.get_key_by_hashtag(tag)
def get_posts_by_hashtag(tag, number): ins_crawler = InsCrawler() return ins_crawler.get_latest_posts_by_tag(tag, number)
for i, username in usernames.iterrows(): username = username['name'] print(username) target_path = 'result_username' debug = True current_timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') target_img_path = os.path.join(target_path, '%s_%s' % (username, current_timestamp)) output_filename = '%s_%s.csv' % (username, current_timestamp) output_path = os.path.join(target_path, output_filename) ins_crawler = InsCrawler(has_screen=debug) ins_crawler.login() results = ins_crawler.get_user_posts(username, number, detail=True) print('[*] %d results' % len(results)) os.makedirs(target_path, exist_ok=True) os.makedirs(target_img_path, exist_ok=True) df = pd.DataFrame(columns=['key', 'caption', 'img_url', 'likes']) for result in results: # key, captions, img_urls, likes for img_url, caption in zip(result['img_urls'], result['captions']): if caption is not None and (
def get_profile(username): ins_crawler = InsCrawler() return ins_crawler.get_user_profile(username)
def get_images_from_profile(username, output): ins_crawler = InsCrawler() return ins_crawler.get_images_from_profile(username, output)
def get_posts_by_hashtag(tag, number, debug): ins_crawler = InsCrawler(has_screen=debug) return ins_crawler.get_latest_posts_by_tag(tag, number)
def get_posts_by_keys(keys, save, path, debug=False): ins_crawler = InsCrawler(has_screen=debug) return ins_crawler.get_posts_by_keys(keys, save, path)