Ejemplo n.º 1
0
def get_info(username):
    try:
        browser = init_chromedriver(chrome_options, capabilities)
    except Exception as exc:
        print(exc)
        sys.exit()

    try:
        information = []
        user_commented_list = []

        try:
            if len(Settings.login_username) != 0:
                login(browser, Settings.login_username,
                      Settings.login_password)
            information, user_commented_list = extract_information(
                browser, username, Settings.limit_amount)
        except:
            print("Error with user " + username)
            sys.exit(1)

        Datasaver.save_profile_json(username, information)

    except KeyboardInterrupt:
        print('Aborted...')

    finally:
        browser.delete_all_cookies()
        browser.close()

    return information
def crawl_profile(usernames):
    with SetupBrowserEnvironment() as browser:
        for username in usernames:
            print('Extracting information from ' + username)
            information, user_commented_list = extract_information(browser, username, Settings.limit_amount)
            Datasaver.save_profile_json(username, information.to_dict())
            print ("Number of users who commented on their profile is ", len(user_commented_list),"\n")
Ejemplo n.º 3
0
def find_real_fans(target_user='******'):
    followers_list = grab_followers(target_user)
    sleep(30)

    fan_list = {}
    try:
        browser = init_chromedriver(chrome_options, capabilities)
    except Exception as exc:
        print(exc)
        sys.exit()

    try:
        login(
            browser,
            Settings.login_username,
            Settings.login_password)

        for user in followers_list:
            print('Extracting information from ' + user)
            try:
                information = extract_information(browser, user)
                fan_list[user] = information
            except BaseException:
                print("Error with user " + user)
                sys.exit(1)

            Datasaver.save_profile_json(user, information)
        print("\nFinished.\n")

    except KeyboardInterrupt:
        print('Aborted...')

    finally:
        browser.delete_all_cookies()
        browser.close()

    df = pd.DataFrame(columns=['alias', 'private', 'num_posts', 'num_followers', 'num_following'])
    for id, element in enumerate(fan_list):
        alias = element
        is_private = fan_list[element]['isprivate']
        num_posts = fan_list[element]['num_of_posts']
        num_followers = fan_list[element]['followers']['count']
        num_following = fan_list[element]['following']['count']
        info = [alias, is_private, num_posts, num_followers, num_following]
        tmp = pd.DataFrame([info], columns=['alias', 'private', 'num_posts', 'num_followers', 'num_following'])
        df = df.append(tmp, ignore_index=True)
        print(id, info)

    df.to_csv('real_fans_of_{}.csv'.format(target_user), sep='\t', encoding='utf-8')
    return df
Ejemplo n.º 4
0
def main(usernames):
    chrome_options = Options()
    chrome_options.add_argument('--dns-prefetch-disable')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--lang=en-US')
    chrome_options.add_argument('--headless')
    chrome_options.add_experimental_option('prefs',
                                           {'intl.accept_languages': 'en-US'})
    browser = webdriver.Chrome(
        'C:/devl/inst_data/instagram_profilecrawl/assets/chromedriver',
        chrome_options=chrome_options)

    # makes sure slower connections work as well
    print("Waiting 10 sec")
    browser.implicitly_wait(10)
    try:
        for username in usernames:
            print('Extracting information from ' + username)
            information = []
            user_commented_list = []
            try:
                information, user_commented_list = extract_information(
                    browser, username, Settings.limit_amount)
            except:
                print("Error with user " + username)
            Datasaver.save_profile_json(username, information)

            print("Number of users who commented on his/her profile is ",
                  len(user_commented_list), "\n")

            Datasaver.save_profile_commenters_txt(username,
                                                  user_commented_list)
            print(
                "\nFinished. The json file and nicknames of users who commented were saved in profiles directory.\n"
            )

    except KeyboardInterrupt:
        print('Aborted...')

    finally:
        browser.delete_all_cookies()
        browser.close()
Ejemplo n.º 5
0
    usernames = get_all_user_names()

    for username in usernames:
        print('Extracting information from ' + username)
        information = []
        user_commented_list = []
        try:
            if len(Settings.login_username) != 0:
                login(browser, Settings.login_username,
                      Settings.login_password)
            information, user_commented_list = extract_information(
                browser, username, Settings.limit_amount)
        except:
            print("Error with user " + username)
            sys.exit(1)

        # pdb.set_trace()
        Datasaver.save_profile_json(username, information)

        # print ("Number of users who commented on their profile is ", len(user_commented_list),"\n")

        # Datasaver.save_profile_commenters_txt(username,user_commented_list)
        # print ("\nFinished. The json file and nicknames of users who commented were saved in profiles directory.\n")

except KeyboardInterrupt:
    print('Aborted...')

finally:
    browser.delete_all_cookies()
    browser.close()
Ejemplo n.º 6
0
print("Waiting 10 sec")
browser.implicitly_wait(10)

try:
    usernames = get_all_user_names()

    for username in usernames:
        print('Extracting information from ' + username)
        information = []
        user_commented_list = []
        try:
            information, user_commented_list = extract_information(
                browser, username, Settings.limit_amount)
        except:
            print("Error with user " + username)
        Datasaver.save_profile_json(username, information)

        print("Number of users who commented on his/her profile is ",
              len(user_commented_list), "\n")

        Datasaver.save_profile_commenters_txt(username, user_commented_list)
        print(
            "\nFinished. The json file and nicknames of users who commented were saved in profiles directory.\n"
        )

except KeyboardInterrupt:
    print('Aborted...')

finally:
    browser.delete_all_cookies()
    browser.close()
                             data=json.dumps(slack_data),
                             headers={'Content-Type': 'application/json'})
    if response.status_code != 200:
        raise ValueError(
            'Request to slack returned an error %s, the response is:\n%s' %
            (response.status_code, response.text))


try:
    tags = get_all_user_names()

    for tag in tags:
        print('Extracting information from #' + tag)
        information, errMsg = extract_tag_information(browser, tag,
                                                      Settings.limit_amount)
        Datasaver.save_profile_json(tag, information)

        if (errMsg != None and errMsg != ''):
            #Send msg to slack
            slackSendMsg(errMsg)

except KeyboardInterrupt:
    print('Aborted...')
except Exception as e:
    slackSendMsg(str(e))
    print(str(e))
    pass
finally:
    browser.delete_all_cookies()
    browser.close()
    browser.quit()
"""Goes through all usernames and collects their information"""
import sys

from util.account import login
from util.chromedriver import SetupBrowserEnvironment
from util.cli_helper import get_all_user_names
from util.datasaver import Datasaver
from util.extractor import extract_information
from util.extractor_posts import InstagramPost
from util.settings import Settings
from post_getter import di_csv_kan
from tqdm import tqdm

Settings.chromedriver_location = '/Users/svmihar/Documents/chromedriver'
Settings.scrape_posts_infos = True
Settings.log_output_toconsole = True

with SetupBrowserEnvironment() as browser:
    usernames = get_all_user_names()
    for username in tqdm(usernames):
        print('Extracting information from ' + username)
        information, user_commented_list = extract_information(
            browser, username, Settings.limit_amount)
        Datasaver.save_profile_json(username, information.to_dict())
        # print ("Number of users who commented on their profile is ", len(user_commented_list),"\n")

print('convert to csv')
di_csv_kan()
      webhook_url, data=json.dumps(slack_data),
      headers={'Content-Type': 'application/json'}
  )
  if response.status_code != 200:
      raise ValueError(
          'Request to slack returned an error %s, the response is:\n%s'
          % (response.status_code, response.text)
      )

try:
  tags = get_all_user_names()

  for tag in tags:
    print('Extracting information from #' + tag)
    information, errMsg = extract_tag_information(browser, tag, Settings.limit_amount)
    Datasaver.save_profile_json(tag,information)
    
    if (errMsg != None and errMsg != ''):
      #Send msg to slack
      slackSendMsg(errMsg)
      

except KeyboardInterrupt:
  print('Aborted...')
except Exception as e:
  slackSendMsg(str(e))
  print(str(e))
  pass
finally:
  browser.delete_all_cookies()
  browser.close()
from util.extractor import extract_information

chrome_options = Options()
chrome_options.add_argument('--dns-prefetch-disable')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--lang=en-US')
chrome_options.add_argument('--headless')
chrome_options.add_experimental_option('prefs', {'intl.accept_languages': 'en-US'})
browser = webdriver.Chrome('./assets/chromedriver', chrome_options=chrome_options)

# makes sure slower connections work as well        
print ("Waiting 10 sec")
browser.implicitly_wait(5)

Settings.limit_amount = 1
Settings.scrap_posts_infos = False
try:
  usernames = get_all_user_names()

  for username in usernames:
    print('Extracting information from ' + username)
    information, user_commented_list = extract_information(browser, username, Settings.limit_amount)
    Datasaver.save_profile_json(username,information)

except KeyboardInterrupt:
  print('Aborted...')

finally:
  browser.delete_all_cookies()
  browser.close()