def get_search_contact_fast(profile_data, cur, owner_id, search_id, search_result_cnt): values = profile_data + (owner_id, search_id, botstatus.CONNECT_REQ_N,) print('value insert:', values) add_to_db(cur, search_query, *values) values = (search_result_cnt, search_id,) print('count insert:', values) add_to_db2(cur, search_update_query, *values)
def search(email, password, kw, cur=None, search_id=None, owner_id=None, limit=750): print("==== SEARCH ======") lastrun_date = datetime.now() user_email = email user_password = password driver = login_linkedin_withwebdriver(email, password) try: time.sleep(5) # search connection search_input = driver.find_element_by_xpath( "/html/body/nav/div/form/div/div/div/artdeco-typeahead-deprecated/artdeco-typeahead-deprecated-input/input" ) keyword = kw search_input.clear() search_input.send_keys(keyword) search_input.send_keys(Keys.ENTER) print("-------click search button-----------") time.sleep(5) total_resultcounts_tag = driver.find_element_by_css_selector( "h3.search-results__total") total_resultcounts = total_resultcounts_tag.text result_counts = total_resultcounts.split(" ") real_counts = result_counts[1] counts = real_counts.replace(",", "") print('counts:', counts) range_count = int(counts) // 10 + 1 print('range_count:', range_count) #range_count = 2 parse_urls = {} print('parsing url:') for i in range(range_count): time.sleep(3) driver.execute_script("window.scrollBy(0, 1000);") time.sleep(3) search_list = driver.find_elements_by_class_name( "search-result__result-link") #print('search_list:', search_list) count = 0 #for search_index in range(len(actor_name_lists)): for tag in search_list: url = tag.get_attribute('href') if url in parse_urls: continue parse_urls[url] = 1 count += 1 if count >= limit: break driver.find_element_by_class_name("next").click() print('parsing profile:') for count, url in enumerate(parse_urls.keys()): result = parse_connection_link(driver, url) # insert into data # search_query = """INSERT INTO connector_searchresult (name, company, title, location, # industry, owner_id, search_id) VALUES (%s,%s,%s,%s,%s,%s,%s)""" """ values = (actor_company, "", actor_location, actor_title, user_id, actor_name, latest_actvity, botstatus.OLD_CONNECT_N, 1, connection_time_list[i], owner_id,) """ #values = (actor_company, industry, actor_location, actor_title, user_id, # actor_name) #values = (actor_name, actor_company, actor_title, # actor_location, "", owner_id, search_id) values = result + ( owner_id, search_id, botstatus.CONNECT_REQ_N, ) print('value insert:', values) add_to_db(cur, search_query, *values) print('count insert:', values) values = ( count, search_id, ) add_to_db2(cur, search_update_query, *values) bot_status = botstatus.DONE # completed_date = datetime.now() # return name_list, company_list, title_list, location_list, bot_status, lastrun_date, completed_date except Exception as e: #bot_status = botstatus.ERROR bot_status = botstatus.DONE print("ERROR:", e) driver.close() completed_date = datetime.now() return bot_status, lastrun_date, completed_date
def get_messages(email, password, cur, owner_id): print("==== GET MESSAGES ======") lastrun_date = datetime.now() is_read = 1 type = 7 is_direct = 1 driver = login_linkedin_withwebdriver(email, password) try: time.sleep(3) # Reading messages messageing_menu = driver.find_element_by_css_selector( "span#messaging-tab-icon") messageing_menu.click() time.sleep(10) elem = driver.find_element_by_tag_name("html") elem.send_keys(Keys.END) messaging_ul = driver.find_element_by_class_name( "msg-conversations-container__conversations-list") driver.execute_script( 'arguments[0].scrollDown = arguments[0].scrollHeight', messaging_ul) messaging_list = driver.find_elements_by_css_selector( "li.msg-conversation-listitem") for messaging in messaging_list: created_at_time = messaging.find_element_by_css_selector( "time.msg-conversation-listitem__time-stamp") created_at = created_at_time.text messaging_member = messaging.find_element_by_class_name( "msg-conversation-listitem__link") messaging_member.click() driver.execute_script("window.scrollBy(0, 1000);") try: messaging_text_div = driver.find_element_by_class_name( "msg-spinmail-thread__message-body") driver.execute_script( 'arguments[0].scrollTop = arguments[0].scrollHeight', messaging_text_div) messaging_text_ps = messaging_text_div.find_elements_by_tag_name( "p") message = '' for messaging_text_p in messaging_text_ps: messaging_text = messaging_text_p.text words = messaging_text.split(' ') i = 0 for word in words: if i > 0: message += ' ' message = message + word.strip() i += 1 words = message.split("'") message = '' i = 0 for word in words: if i > 0: message = message + '\"' + word else: message = message + word i += 1 # add to db completed_date = datetime.now() updated_at = datetime.now() if created_at.split(' ')[1] and ( created_at.split(' ')[1] == 'AM' or created_at.split(' ')[1] == 'PM'): created_at = get_message_created_time('', created_at) else: created_at = get_message_created_time(created_at, '') values = (created_at, updated_at, message, completed_date, type, owner_id, is_direct, is_read) if cur is not None: botdb.add_to_db(cur, getmessages_query, *values) except Exception as e: messaging_div = driver.find_element_by_class_name( "msg-s-message-list-container") messaging_ul = messaging_div.find_element_by_css_selector( "ul.msg-s-message-list") driver.execute_script( 'arguments[0].scrollTop = arguments[0].scrollHeight', messaging_ul) message_list = messaging_ul.find_elements_by_css_selector( "li.msg-s-message-list__event") create_at_dates = [] created_at_times = [] messages = [] empty_time_ids = [] prev_create_at_date = '' i = 0 for message_li in message_list: try: create_at_date_li = message_li.find_element_by_css_selector( "time.msg-s-message-list__time-heading") create_at_date = create_at_date_li.text prev_create_at_date = create_at_date except Exception as e: create_at_date = prev_create_at_date create_at_dates.append(create_at_date) try: created_at_time_li = message_li.find_element_by_css_selector( "time.msg-s-message-group__timestamp") created_at_time = created_at_time_li.text for time_id in empty_time_ids: created_at_times[time_id] = created_at_time empty_time_ids = [] except Exception as e: created_at_time = '' empty_time_ids.append(i) created_at_times.append(created_at_time) messaging_text_div = message_li.find_element_by_class_name( "msg-s-event-listitem__message-bubble") driver.execute_script( 'arguments[0].scrollTop = arguments[0].scrollHeight', messaging_text_div) messaging_text_p = messaging_text_div.find_element_by_class_name( "msg-s-event-listitem__body") messaging_text = messaging_text_p.text message = '' words = messaging_text.split(' ') j = 0 for word in words: if j > 0: message += ' ' message = message + word.strip() j += 1 words = message.split("'") message = '' j = 0 for word in words: if j > 0: message = message + '\"' + word else: message = message + word j += 1 messages.append(message) i += 1 completed_date = datetime.now() updated_at = datetime.now() for k in range(0, len(messages)): values = (get_message_created_time(create_at_dates[k], created_at_times[k]), updated_at, messages[k], completed_date, type, owner_id, is_direct, is_read) if cur is not None: botdb.add_to_db(cur, getmessages_query, *values) time.sleep(5) time.sleep(5) bot_status = botstatus.DONE except Exception as e: # bot_status = botstatus.ERROR # just consider all are okay now bot_status = botstatus.DONE print("ERROR:", e) driver.close() completed_date = datetime.now() return bot_status, lastrun_date, completed_date
def get_contacts(email, password, cur=None, owner_id=None): print("==== GET CONTACTS ======") lastrun_date = datetime.now() driver = login_linkedin_withwebdriver(email, password) try: time.sleep(15) # print(driver.page_source) # My Network contacts mynetwork_menu = driver.find_element_by_class_name( "nav-item--mynetwork") mynetwork_menu.click() time.sleep(5) see_all_link = driver.find_element_by_css_selector( "a.mn-connections-summary__see-all") see_all_link.click() time.sleep(5) total_connection_counts = driver.find_element_by_tag_name("h2") counts_text = total_connection_counts.text counts = counts_text.split(" ") act_count = counts[0] loop_range = int(act_count) // 40 + 1 elem = driver.find_element_by_tag_name("html") print("loop_range:", loop_range) for i in range(loop_range): elem.send_keys(Keys.END) time.sleep(5) connections_times = driver.find_elements_by_css_selector( "time.time-badge") connection_time_list = [] for connection_time in connections_times: connection_time_text = connection_time.text connection_time_split = connection_time_text.split(" ") connection_time_num = connection_time_split[1] connection_ago = connection_time_split[2] if "minute" in connection_ago: time_ago = datetime.today() - timedelta( minutes=int(connection_time_num)) elif "hour" in connection_ago: time_ago = datetime.today() - timedelta( hours=int(connection_time_num)) elif "day" in connection_ago: time_ago = datetime.today() - timedelta( days=int(connection_time_num)) elif "week" in connection_ago: time_ago = datetime.today() - timedelta( weeks=int(connection_time_num)) elif "month" in connection_ago: time_ago = datetime.today() - timedelta( days=int(connection_time_num) * 30) elif "year" in connection_ago: time_ago = datetime.today() - timedelta( days=int(connection_time_num) * 365) # connection time connection_time_list.append(str(time_ago)) connections_lists = driver.find_elements_by_css_selector( "a.mn-connection-card__link") connection_alink_lists = [] for connction_link_list in connections_lists: connection_alink = connction_link_list.get_attribute('href') connection_alink_lists.append(connection_alink) print('connection_alink:', connection_alink) # just small nummber #if len(connection_alink_lists) > 2: # break i = 0 for connection_link in connection_alink_lists: print('get_contacts:', get_contacts) result = parse_connection_link(driver, connection_link) print('result:', result) # (actor_company, industry, actor_location, actor_title, user_id, # actor_name) """ values = (actor_company, "", actor_location, actor_title, user_id, actor_name, latest_actvity, botstatus.OLD_CONNECT_N, 1, connection_time_list[i], owner_id,) """ values = result + ( connection_time_list[i], botstatus.OLD_CONNECT_N, 1, connection_time_list[i], owner_id, ) i += 1 if cur is not None: botdb.add_to_db(cur, getcontacts_query, *values) """ cur.execute(getcontacts_query, (actor_company_list[i], "", actor_location_list[i], actor_title_list[ i], linkedin_id_list[i], actor_name_list[i], "", "22", "1", "1", connection_time_list[i], owner_id)) """ bot_status = botstatus.DONE # return linkedin_id_list, actor_name_list, actor_company_list, actor_title_list, actor_location_list, connection_time_list, bot_status, lastrun_date, completed_date except Exception as e: # bot_status = botstatus.ERROR bot_status = botstatus.DONE print("ERROR:", e) completed_date = datetime.now() driver.close() return bot_status, lastrun_date, completed_date
def get_search_contact_fast_with_urls(parse_urls, driver, cur, owner_id, search_id): request_cookies_browser = driver.get_cookies() JSESSIONID = driver.get_cookie('JSESSIONID') csrf_tocken = JSESSIONID['value'] print('parsing profile:', parse_urls) for count, url in enumerate(parse_urls.keys()): profileUrl = url.split('/')[-2] profileUrl = 'https://www.linkedin.com/voyager/api/identity/profiles/' + profileUrl + '/' ############################################################################################################################# driver.execute_script(""" var element = document.createElement('div'); element.id = "interceptedProfile_""" + str(count) + """"; element.appendChild(document.createTextNode("")); document.body.appendChild(element); var xhttp = new XMLHttpRequest(); xhttp.onreadystatechange = function() { if (this.readyState == 4 && this.status == 200) { document.getElementById("interceptedProfile_""" + str(count) + """").innerHTML = this.responseText; } }; xhttp.open('GET', '""" + profileUrl + """', true); xhttp.setRequestHeader('Csrf-Token', """ + csrf_tocken + """) xhttp.send(); """) ############################################################################################################################# time.sleep(5) for count, url in enumerate(parse_urls.keys()): print('============== count =========== :', count) profile_result = driver.find_element_by_id('interceptedProfile_' + str(count)).text jsonProfileData = json.loads(profile_result) txt_industry = jsonProfileData['industryName'] txt_firstname = jsonProfileData['firstName'] txt_lastname = jsonProfileData['lastName'] txt_linkedin_id = url.split('/')[-2] txt_location = jsonProfileData['locationName'] txt_occupation = jsonProfileData['headline'] actor_title = "" actor_company = "" if " at " in txt_occupation: title_company = txt_occupation.split(" at ") actor_title = title_company[0] actor_company = title_company[1] else: actor_company = "" actor_title = txt_occupation values = (actor_company, txt_industry, txt_location, actor_title, txt_linkedin_id, txt_firstname + ' ' + txt_lastname) values = values + (owner_id, search_id, botstatus.CONNECT_REQ_N,) print('value insert:', values) add_to_db(cur, search_query, *values) values = (count + 1, search_id,) print('count insert:', values) add_to_db2(cur, search_update_query, *values)