Beispiel #1
0
def get_posts_by_user(username, number, detail, debug):
    if username:
        ins_crawler = InsCrawler(has_screen=debug)
        if settings.login:
            ins_crawler.login()
        return ins_crawler.get_user_posts(username, number, detail)
    else:
        pass
Beispiel #2
0
def check_targets(debug, threads_number):
    if not threads_number:
        threads_number = 4
    targets_list = get_unchecked_targets(threads_number)
    for hits in targets_list:
        ins_crawler = InsCrawler(has_screen=debug)
        if settings.login:
            ins_crawler.login()
        Thread(target=ins_crawler.check_targets, args=(hits, )).start()
Beispiel #3
0
def get_popular_users(starting_user, debug, threads_number):
    if not threads_number:
        threads_number = 4
    users_list = get_unchecked_profiles(threads_number)
    for hits in users_list:
        ins_crawler = InsCrawler(has_screen=debug)
        if settings.login:
            ins_crawler.login()
        Thread(target=ins_crawler.check_popular_profiles_elastic,
               args=(hits, )).start()
Beispiel #4
0
def get_hashtags_by_post_key(post_key, debug, number):
    if len(post_key) == 0:
        return []

    ins_crawler = InsCrawler(has_screen=debug)
    result = []

    progress_bar = tqdm(total=len(post_key))
    progress_bar.set_description("fetching_2_" + str(number))
    for key in post_key:
        result.append(ins_crawler.fetch_post(key))
        progress_bar.update(1)

    return result
def get_network_by_username(username, depth, debug):
    ins_crawler = InsCrawler(has_screen=debug)
    ins_crawler.login()
    return ins_crawler.get_network_by_username(username, depth)
Beispiel #6
0
def get_profile_from_script(username):
    ins_cralwer = InsCrawler()
    return ins_cralwer.get_user_profile_from_script_shared_data(username)
Beispiel #7
0
def get_posts_by_user(username, number, detail, debug):
    ins_crawler = InsCrawler(has_screen=debug)
    return ins_crawler.get_user_posts(username, number, detail)
Beispiel #8
0
number = 999

target_path = 'result_username'
debug = False

current_timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

target_img_path = os.path.join(target_path,
                               '%s_%s' % (username, current_timestamp))
output_filename = '%s_%s.csv' % (username, current_timestamp)
output_path = os.path.join(target_path, output_filename)

os.makedirs(target_path, exist_ok=True)
os.makedirs(target_img_path, exist_ok=True)

ins_crawler = InsCrawler(has_screen=debug)

results = ins_crawler.get_user_posts(username, number, detail=False)

print('[*] %d results' % len(results))

df = pd.DataFrame(columns=['key', 'caption', 'img_url'])

for result in results:
    # key, caption, img_url
    if '1 person' in result['caption'] and 'closeup' in result['caption']:
        parsed = urlparse(result['img_url'])
        filename = parsed.path.split('/')[-1]
        result['filename'] = filename

        urllib.request.urlretrieve(result['img_url'],
def get_profile(username, debug=False, follow_list_enabled=False):
    ins_crawler = InsCrawler(has_screen=debug)
    ins_crawler.login()
    return ins_crawler.get_user_profile(username, follow_list_enabled)
    logger = logging.getLogger(__name__)

    if args.mode in ["posts", "posts_full"]:
        arg_required("username")
        posts = get_post_full(args.username, args.number, args.debug)

        output(
            posts,
            args.output,
        )

    elif args.mode == "profile":
        arg_required("username")

        ins_crawler = InsCrawler(has_screen=args.debug)
        ins_crawler.login()
        profile = ins_crawler.get_user_profile(args.username, True)
        profile['capture_time'] = int(datetime.now().timestamp())

        output(profile, args.output)
        persist = Persist()
        profile["username"] = args.username
        try:
            persist.persistProfile(profile)
        except:
            persist.db.rollback()
            id_profile = persist.getUserIdByUsername(args.username)
            if id_profile is None:
                logger.error(
                    'The profile of specified username does not exist')
def get_user_posts_by_tags(tag, number):
    ins_crawler = InsCrawler()
    return ins_crawler.get_user_posts_from_tag(tag, number)
Beispiel #12
0
import json
import boto3
import os
from elasticsearch import Elasticsearch
from inscrawler import InsCrawler
from inscrawler.settings import settings
from dynamodb_json import json_util as dynamo_json

ins_crawler = InsCrawler()
dynamodb = boto3.client('dynamodb')
es = Elasticsearch([os.environ['ES_DOMAIN']])
"""
    Triggered when DynamoHook exist and crawl website
"""


def crawlUserProfile(event, context):
    event_details = json.loads(json.dumps(event['Records'][0]['dynamodb']))

    converted_table = dynamo_json.loads(event_details['NewImage'])
    username = converted_table['username']

    crawled_username = ins_crawler.get_user_profile(username)
    setattr(settings, "fetch_details", True)
    crawled_media = ins_crawler.get_user_posts(username, number=1)

    captions = []  # timestamp is missing from crawl
    locations = []

    for post in crawled_media:
        captions.append({
tag = '증명사진'
number = 999

target_path = 'result_tag'
debug = False

current_timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

target_img_path = os.path.join(target_path, '%s_%s' % (tag, current_timestamp))
output_filename = '%s_%s.csv' % (tag, current_timestamp)
output_path = os.path.join(target_path, output_filename)

os.makedirs(target_path, exist_ok=True)
os.makedirs(target_img_path, exist_ok=True)

ins_crawler = InsCrawler(has_screen=debug)

results = ins_crawler.get_latest_posts_by_tag(tag, number)

print('[*] %d results' % len(results))

df = pd.DataFrame(columns=['key', 'caption', 'img_url'])

for result in results:
    # key, caption, img_url
    if '1 person' in result['caption'] and 'closeup' in result['caption']:
        parsed = urlparse(result['img_url'])
        filename = parsed.path.split('/')[-1]
        result['filename'] = filename

        urllib.request.urlretrieve(result['img_url'],
Beispiel #14
0
def get_profile(username, debug=False):
    ins_crawler = InsCrawler(debug)
    return ins_crawler.get_user_profile(username)
def output_posts_info_from_list(filename: str):
    with open(filename, 'rb') as r:
        posts_list = json.load(r)
    ins_crawler = InsCrawler()
    return ins_crawler.get_posts_info_from_list(posts_list)
Beispiel #16
0
def get_posts_by_user(debug):
    ins_crawler = InsCrawler(has_screen=debug)
    return ins_crawler.get_user_posts()
Beispiel #17
0
from inscrawler import InsCrawler
import argparse
from multiprocessing import Process


def usage():
    return '''
        python crawler.py [tag]
    '''


if __name__ == '__main__':
    index = 1
    parser = argparse.ArgumentParser(description='Instagram Explore Crawler',
                                     usage=usage())

    parser.add_argument('-n',
                        '--number',
                        type=int,
                        default=100,
                        help='number of posts to crawling')
    args = parser.parse_args()

    ins_crawler = InsCrawler(has_screen=True)
    ins_crawler.get_explorePosts1(maximum=args.number, index=index)
Beispiel #18
0
import argparse

from inscrawler import InsCrawler


def usage():
    return '''
        python crawler.py [tag]
    '''


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Instagram Liker',
                                     usage=usage())
    parser.add_argument('hashtag', help='hashtag name')
    parser.add_argument('-n',
                        '--number',
                        type=int,
                        default=1000,
                        help='number of posts to like')
    args = parser.parse_args()
    ins_crawler = InsCrawler(has_screen=True)
    ins_crawler.auto_like(tag=args.hashtag, maximum=args.number)
Beispiel #19
0
def get_posts_by_user(username, number):
    ins_crawler = InsCrawler()
    return ins_crawler.get_user_posts(username, number)
Beispiel #20
0
def get_urls_by_hashtag(tag, number, debug, filepath):
    ins_crawler = InsCrawler(has_screen=debug)
    return ins_crawler.get_urls_posts_by_tag(tag, number, filepath)
def get_posts_by_user(username, number, detail, debug, ins_crawler=None):
    if ins_crawler is None:
        ins_crawler = InsCrawler(has_screen=debug)
        ins_crawler.login()
    return ins_crawler.get_user_posts(username, number, detail)
Beispiel #22
0
def get_postnum_by_hashtag(tag, debug):
    ins_crawler = InsCrawler(has_screen=debug)
    return ins_crawler.fetch_hashtag_articles(tag)
Beispiel #23
0
def comment(post_url, content):
    ins_crawler = InsCrawler()
    ins_crawler.comment_post(post_url, content)
Beispiel #24
0
def get_key_by_hashtag(tag, debug):
    ins_crawler = InsCrawler(has_screen=debug)
    return ins_crawler.get_key_by_hashtag(tag)
Beispiel #25
0
def get_posts_by_hashtag(tag, number):
    ins_crawler = InsCrawler()
    return ins_crawler.get_latest_posts_by_tag(tag, number)
for i, username in usernames.iterrows():
    username = username['name']
    print(username)

    target_path = 'result_username'
    debug = True

    current_timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

    target_img_path = os.path.join(target_path,
                                   '%s_%s' % (username, current_timestamp))
    output_filename = '%s_%s.csv' % (username, current_timestamp)
    output_path = os.path.join(target_path, output_filename)

    ins_crawler = InsCrawler(has_screen=debug)

    ins_crawler.login()
    results = ins_crawler.get_user_posts(username, number, detail=True)

    print('[*] %d results' % len(results))

    os.makedirs(target_path, exist_ok=True)
    os.makedirs(target_img_path, exist_ok=True)

    df = pd.DataFrame(columns=['key', 'caption', 'img_url', 'likes'])

    for result in results:
        # key, captions, img_urls, likes
        for img_url, caption in zip(result['img_urls'], result['captions']):
            if caption is not None and (
Beispiel #27
0
def get_profile(username):
    ins_crawler = InsCrawler()
    return ins_crawler.get_user_profile(username)
Beispiel #28
0
def get_images_from_profile(username, output):
    ins_crawler = InsCrawler()
    return ins_crawler.get_images_from_profile(username, output)
Beispiel #29
0
def get_posts_by_hashtag(tag, number, debug):
    ins_crawler = InsCrawler(has_screen=debug)
    return ins_crawler.get_latest_posts_by_tag(tag, number)
def get_posts_by_keys(keys, save, path, debug=False):
    ins_crawler = InsCrawler(has_screen=debug)
    return ins_crawler.get_posts_by_keys(keys, save, path)