def get_info(username): try: browser = init_chromedriver(chrome_options, capabilities) except Exception as exc: print(exc) sys.exit() try: information = [] user_commented_list = [] try: if len(Settings.login_username) != 0: login(browser, Settings.login_username, Settings.login_password) information, user_commented_list = extract_information( browser, username, Settings.limit_amount) except: print("Error with user " + username) sys.exit(1) Datasaver.save_profile_json(username, information) except KeyboardInterrupt: print('Aborted...') finally: browser.delete_all_cookies() browser.close() return information
def crawl_profile(usernames): with SetupBrowserEnvironment() as browser: for username in usernames: print('Extracting information from ' + username) information, user_commented_list = extract_information(browser, username, Settings.limit_amount) Datasaver.save_profile_json(username, information.to_dict()) print ("Number of users who commented on their profile is ", len(user_commented_list),"\n")
def find_real_fans(target_user='******'): followers_list = grab_followers(target_user) sleep(30) fan_list = {} try: browser = init_chromedriver(chrome_options, capabilities) except Exception as exc: print(exc) sys.exit() try: login( browser, Settings.login_username, Settings.login_password) for user in followers_list: print('Extracting information from ' + user) try: information = extract_information(browser, user) fan_list[user] = information except BaseException: print("Error with user " + user) sys.exit(1) Datasaver.save_profile_json(user, information) print("\nFinished.\n") except KeyboardInterrupt: print('Aborted...') finally: browser.delete_all_cookies() browser.close() df = pd.DataFrame(columns=['alias', 'private', 'num_posts', 'num_followers', 'num_following']) for id, element in enumerate(fan_list): alias = element is_private = fan_list[element]['isprivate'] num_posts = fan_list[element]['num_of_posts'] num_followers = fan_list[element]['followers']['count'] num_following = fan_list[element]['following']['count'] info = [alias, is_private, num_posts, num_followers, num_following] tmp = pd.DataFrame([info], columns=['alias', 'private', 'num_posts', 'num_followers', 'num_following']) df = df.append(tmp, ignore_index=True) print(id, info) df.to_csv('real_fans_of_{}.csv'.format(target_user), sep='\t', encoding='utf-8') return df
def main(usernames): chrome_options = Options() chrome_options.add_argument('--dns-prefetch-disable') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--lang=en-US') chrome_options.add_argument('--headless') chrome_options.add_experimental_option('prefs', {'intl.accept_languages': 'en-US'}) browser = webdriver.Chrome( 'C:/devl/inst_data/instagram_profilecrawl/assets/chromedriver', chrome_options=chrome_options) # makes sure slower connections work as well print("Waiting 10 sec") browser.implicitly_wait(10) try: for username in usernames: print('Extracting information from ' + username) information = [] user_commented_list = [] try: information, user_commented_list = extract_information( browser, username, Settings.limit_amount) except: print("Error with user " + username) Datasaver.save_profile_json(username, information) print("Number of users who commented on his/her profile is ", len(user_commented_list), "\n") Datasaver.save_profile_commenters_txt(username, user_commented_list) print( "\nFinished. The json file and nicknames of users who commented were saved in profiles directory.\n" ) except KeyboardInterrupt: print('Aborted...') finally: browser.delete_all_cookies() browser.close()
usernames = get_all_user_names() for username in usernames: print('Extracting information from ' + username) information = [] user_commented_list = [] try: if len(Settings.login_username) != 0: login(browser, Settings.login_username, Settings.login_password) information, user_commented_list = extract_information( browser, username, Settings.limit_amount) except: print("Error with user " + username) sys.exit(1) # pdb.set_trace() Datasaver.save_profile_json(username, information) # print ("Number of users who commented on their profile is ", len(user_commented_list),"\n") # Datasaver.save_profile_commenters_txt(username,user_commented_list) # print ("\nFinished. The json file and nicknames of users who commented were saved in profiles directory.\n") except KeyboardInterrupt: print('Aborted...') finally: browser.delete_all_cookies() browser.close()
print("Waiting 10 sec") browser.implicitly_wait(10) try: usernames = get_all_user_names() for username in usernames: print('Extracting information from ' + username) information = [] user_commented_list = [] try: information, user_commented_list = extract_information( browser, username, Settings.limit_amount) except: print("Error with user " + username) Datasaver.save_profile_json(username, information) print("Number of users who commented on his/her profile is ", len(user_commented_list), "\n") Datasaver.save_profile_commenters_txt(username, user_commented_list) print( "\nFinished. The json file and nicknames of users who commented were saved in profiles directory.\n" ) except KeyboardInterrupt: print('Aborted...') finally: browser.delete_all_cookies() browser.close()
data=json.dumps(slack_data), headers={'Content-Type': 'application/json'}) if response.status_code != 200: raise ValueError( 'Request to slack returned an error %s, the response is:\n%s' % (response.status_code, response.text)) try: tags = get_all_user_names() for tag in tags: print('Extracting information from #' + tag) information, errMsg = extract_tag_information(browser, tag, Settings.limit_amount) Datasaver.save_profile_json(tag, information) if (errMsg != None and errMsg != ''): #Send msg to slack slackSendMsg(errMsg) except KeyboardInterrupt: print('Aborted...') except Exception as e: slackSendMsg(str(e)) print(str(e)) pass finally: browser.delete_all_cookies() browser.close() browser.quit()
"""Goes through all usernames and collects their information""" import sys from util.account import login from util.chromedriver import SetupBrowserEnvironment from util.cli_helper import get_all_user_names from util.datasaver import Datasaver from util.extractor import extract_information from util.extractor_posts import InstagramPost from util.settings import Settings from post_getter import di_csv_kan from tqdm import tqdm Settings.chromedriver_location = '/Users/svmihar/Documents/chromedriver' Settings.scrape_posts_infos = True Settings.log_output_toconsole = True with SetupBrowserEnvironment() as browser: usernames = get_all_user_names() for username in tqdm(usernames): print('Extracting information from ' + username) information, user_commented_list = extract_information( browser, username, Settings.limit_amount) Datasaver.save_profile_json(username, information.to_dict()) # print ("Number of users who commented on their profile is ", len(user_commented_list),"\n") print('convert to csv') di_csv_kan()
webhook_url, data=json.dumps(slack_data), headers={'Content-Type': 'application/json'} ) if response.status_code != 200: raise ValueError( 'Request to slack returned an error %s, the response is:\n%s' % (response.status_code, response.text) ) try: tags = get_all_user_names() for tag in tags: print('Extracting information from #' + tag) information, errMsg = extract_tag_information(browser, tag, Settings.limit_amount) Datasaver.save_profile_json(tag,information) if (errMsg != None and errMsg != ''): #Send msg to slack slackSendMsg(errMsg) except KeyboardInterrupt: print('Aborted...') except Exception as e: slackSendMsg(str(e)) print(str(e)) pass finally: browser.delete_all_cookies() browser.close()
from util.extractor import extract_information chrome_options = Options() chrome_options.add_argument('--dns-prefetch-disable') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--lang=en-US') chrome_options.add_argument('--headless') chrome_options.add_experimental_option('prefs', {'intl.accept_languages': 'en-US'}) browser = webdriver.Chrome('./assets/chromedriver', chrome_options=chrome_options) # makes sure slower connections work as well print ("Waiting 10 sec") browser.implicitly_wait(5) Settings.limit_amount = 1 Settings.scrap_posts_infos = False try: usernames = get_all_user_names() for username in usernames: print('Extracting information from ' + username) information, user_commented_list = extract_information(browser, username, Settings.limit_amount) Datasaver.save_profile_json(username,information) except KeyboardInterrupt: print('Aborted...') finally: browser.delete_all_cookies() browser.close()