Ejemplo n.º 1
0
def get_search_contact(parse_urls, driver):
    print('parsing profile:')
    csrf_tocken = get_browser_csrf_tocken(driver)
    for count, url in enumerate(parse_urls.keys()):
        try:
            uid = url.split('/')[-2]
            contact = get_contact_ajax(driver, uid, csrf_tocken)
            print('count:', count + 1,  contact)
        except:
            continue
Ejemplo n.º 2
0
def check_update_contact(driver, cur, owner_id):
    sql = """ select id, linkedin_id from messenger_inbox where owner_id=%s"""

    cur.execute(sql, owner_id)
    csrf_tocken = get_browser_csrf_tocken(driver)
    starttime = time.time()
    for row in cur.fetchall():
        print('update row:', row)
        uid = row[1]
        update_contact(driver, cur, uid, csrf_tocken, row[0])
    print('Done in ', (time.time() - starttime), ' secs')
Ejemplo n.º 3
0
def get_fastcontacts(email, password):

    driver = get_driver(email, password)
    print("==== GET CONTACTS ======")

    try:

        driver.get(LINKEDIN_CONNECTIONS_URL)
        time.sleep(10)

        total_connection_counts = driver.find_element_by_tag_name("h2")
        counts_text = total_connection_counts.text
        counts = counts_text.split(" ")

        cnt_all_connections = counts[0].replace(',', '')
        csrf_tocken = get_browser_csrf_tocken(driver)
        url = "https://www.linkedin.com/voyager/api/relationships/connections?count=" + cnt_all_connections
        url = url + "&sortType=RECENTLY_ADDED&start=0"
        url = url + "&projection=(elements*(to~(id,localizedFirstName,localizedLastName)))"

        #############################################################################################################################
        driver.execute_script("""
          var element = document.createElement('div');
          element.id = "interceptedResponse";
          element.appendChild(document.createTextNode(""));
          document.body.appendChild(element);

          var xhttp = new XMLHttpRequest();

          xhttp.onreadystatechange = function() {
            if (this.readyState == 4 && this.status == 200) {
              document.getElementById("interceptedResponse").innerHTML = this.responseText;
            }
          };
          xhttp.open("GET", '""" + url + """', true);
          xhttp.setRequestHeader('Csrf-Token', """ + csrf_tocken + """)
          xhttp.send();""")
        #############################################################################################################################
        time.sleep(3)
        responsedata = driver.find_element_by_id('interceptedResponse').text

        #print('responsedata :', responsedata)
        targetjsondata = json.loads(responsedata)
        print('----------------targetjsondata------------------> :',
              targetjsondata)

    except Exception as err:
        print('errors:', err)

    driver.close()
Ejemplo n.º 4
0
def get_fastcontact_ajax(driver, cur, owner_id, check_existed=False):

    driver.get(LINKEDIN_CONNECTIONS_URL)
    time.sleep(10)

    total_connection_counts = driver.find_element_by_tag_name("h2")
    counts_text = total_connection_counts.text
    counts = counts_text.split(" ")

    cnt_all_connections = counts[0].replace(',', '')
    csrf_tocken = get_browser_csrf_tocken(driver)
    #############################################################################################################################
    driver.execute_script("""
      var element = document.createElement('div');
      element.id = "interceptedResponse";
      element.appendChild(document.createTextNode(""));
      document.body.appendChild(element);

      var xhttp = new XMLHttpRequest();

      xhttp.onreadystatechange = function() {
        if (this.readyState == 4 && this.status == 200) {
          document.getElementById("interceptedResponse").innerHTML = this.responseText;
        }
      };
      xhttp.open("GET", "https://www.linkedin.com/voyager/api/relationships/connections?count="""
                          + cnt_all_connections +
                          """&sortType=RECENTLY_ADDED&start=0", true);
      xhttp.setRequestHeader('Csrf-Token', """ + csrf_tocken + """)
      xhttp.send();""")
    #############################################################################################################################
    time.sleep(3)
    responsedata = driver.find_element_by_id('interceptedResponse').text
    #print('responsedata :', responsedata)
    targetjsondata = json.loads(responsedata)
    print('----------------targetjsondata------------------> :',
          targetjsondata)
    time.sleep(100)
    targetjsondata = targetjsondata['elements']
    for itemdata in targetjsondata:
        firstName = itemdata['miniProfile']['firstName']
        lastName = itemdata['miniProfile']['lastName']
        occupation = itemdata['miniProfile']['occupation']
        publicIdentifier = itemdata['miniProfile']['publicIdentifier']
        createdAt = itemdata['createdAt']
        createdAtTime = datetime.fromtimestamp(int(str(createdAt)[0:10]))
        actor_title = ""
        actor_company = ""
        if " at " in occupation:
            title_company = occupation.split(" at ")
            actor_title = title_company[0]
            actor_company = title_company[1]
        else:
            actor_company = ""
            actor_title = occupation
        actor_title = get_contact_title(actor_title)
        values = (
            actor_company,
            "",
            "",
            actor_title,
            publicIdentifier,
            firstName + ' ' + lastName,
        )
        values = values + \
            (createdAtTime, str(botstatus.OLD_CONNECT_N),
             '1', createdAtTime, str(owner_id),)

        if check_existed:
            contact_row = bot_db.get_db(cur, inbox_check_id_query, (
                publicIdentifier,
                owner_id,
            ))
            if contact_row is not None:
                continue

        if cur is not None:
            bot_db.add_to_db2(cur, getcontacts_query, *values)

            if check_existed:
                contact_row = bot_db.get_db(cur, inbox_check_id_query, (
                    publicIdentifier,
                    owner_id,
                ))
                update_contact(driver, cur, publicIdentifier, csrf_tocken,
                               contact_row[0])

    print(' Number of connections : ', len(targetjsondata))
Ejemplo n.º 5
0
def get_search_contact_salesurls(url, driver, cur=None, owner_id=None, 
                                           search_id=None, counter=1,
                                           request_cookies_browser=None):
    
    csrf_tocken = get_browser_csrf_tocken(driver)
    
    try:
            
        milli_sec = int(round(time.time() * 1000))
        print('profile url:', url)
        
        
        divId = str(counter)
        js = """
            var element = document.createElement('div');
            element.id = 'interceptedSalesProfile_""" + divId + """';
            element.appendChild(document.createTextNode(""));
            document.body.appendChild(element);
            
            var xhttp = new XMLHttpRequest();
            
            xhttp.onreadystatechange = function() {
            if (this.readyState == 4 && this.status == 200) {
            document.getElementById('interceptedSalesProfile_""" + divId + """').innerHTML = this.responseText;
            }
            };
            xhttp.open('GET', '""" + url + """', true);
            xhttp.setRequestHeader('Csrf-Token', """ + csrf_tocken + """)
            xhttp.setRequestHeader('cookie', '""" + request_cookies_browser + """')
            xhttp.send();
            """
        print('js:', js)
        
        #############################################################################################################################
        driver.execute_script(js)
        #############################################################################################################################
        time.sleep(2)

        
        print('============== count =========== :', counter)
        
        profile_result = driver.find_element_by_id('interceptedSalesProfile_' + divId).text
        
        # if count == 0:
        print('profile_result:', profile_result)
        
        return
        
        
        jsonProfileData = json.loads(profile_result)
    
    
    
        
        user_id  = get_sale_nav_linkedid_id(url)

        txt_firstname = jsonProfileData['viewee']['firstName']
        txt_lastname = jsonProfileData['viewee']['lastName']
        txt_linkedin_id = user_id
        txt_location = jsonProfileData['viewee']['location']
        txt_occupation = jsonProfileData['viewee']['headline']
        txt_company = jsonProfileData['viewee']['company']
        
        
        actor_title = ""
        if " at " in txt_occupation:
            title_company = txt_occupation.split(" at ")
            actor_title = title_company[0]
        else:
            actor_title = txt_occupation
            
        actor_title = get_contact_title(actor_title)
        
        values = (txt_company, '', txt_location, actor_title, txt_linkedin_id, txt_firstname + ' ' + txt_lastname)
        values = values + (owner_id, search_id, botstatus.CONNECT_REQ_N,)

        print('value insert:', values)
        
        if cur:
                
            add_to_db2(cur, search_query, *values)
    
            values = (counter, search_id,)
            print('count insert:', values)
            add_to_db2(cur, search_update_query, *values)
    except Exception as err:
        print('error:', err)
Ejemplo n.º 6
0
def fast_search(email, password, search_data, cur=None, search_id=None,
                 owner_id=None, search_mode=0, limit=750):

    print('----------', search_data, '----------', search_mode)
    driver = login_linkedin_withwebdriver(email, password)
    max_count = limit
    total_count = 500
    lastrun_date = datetime.now()

    print("==== SEARCH ======")

    csrf_tocken = get_browser_csrf_tocken(driver)
    try:
        if search_mode == 0:
            time.sleep(5)
            js = """
              var element = document.createElement('div');
              element.id = "interceptedSearchResponse";
              element.appendChild(document.createTextNode(""));
              document.body.appendChild(element);
    
              var xhttp = new XMLHttpRequest();
    
              xhttp.onreadystatechange = function() {
                if (this.readyState == 4 && this.status == 200) {
                  document.getElementById("interceptedSearchResponse").innerHTML = this.responseText;
                }
              };
              xhttp.open("GET", "https://www.linkedin.com/voyager/api/search/cluster?blendedSrpEnabled=true&count=""" + str(max_count) + """&guides=List()&keywords=""" + search_data + """&origin=GLOBAL_SEARCH_HEADER&q=guided&start=0", true);
              xhttp.setRequestHeader('Csrf-Token', """ + csrf_tocken + """)
              xhttp.send();"""
            #############################################################################################################################
            driver.execute_script(js)
            #############################################################################################################################
            time.sleep(3)
            searchListText = driver.find_element_by_id('interceptedSearchResponse').text
            jsondata = json.loads(searchListText)
            total_count = jsondata['paging']['total']
            print('total_count : ', total_count, searchListText.encode('utf-8'))
            close_driver(driver, email)
            return
            targetcount = max_count
            if max_count > total_count:
                targetcount = total_count

            targetcount = targetcount // 10 + 1

            for idx in range(targetcount):
                #############################################################################################################################
                driver.execute_script("""

                          var element = document.createElement('div');
                          element.id = "interceptedSearchResponse_""" + str(idx) + """";
                          element.appendChild(document.createTextNode(""));
                          document.body.appendChild(element);

                          var xhttp = new XMLHttpRequest();

                          xhttp.onreadystatechange = function() {
                            if (this.readyState == 4 && this.status == 200) {
                              document.getElementById("interceptedSearchResponse_""" + str(idx) + """").innerHTML = this.responseText;
                            }
                          };
                          xhttp.open("GET", "https://www.linkedin.com/voyager/api/search/cluster?blendedSrpEnabled=true&count=10&guides=List()&keywords=""" + search_data + """&origin=GLOBAL_SEARCH_HEADER&q=guided&start=""" + str(
                    idx * 10) + """", true);
                          xhttp.setRequestHeader('Csrf-Token', """ + csrf_tocken + """)
                          xhttp.send();

                        """)
                #############################################################################################################################
            time.sleep(10)

            search_result_cnt = 0
            for idx in range(targetcount):
                conversationListText = driver.find_element_by_id('interceptedSearchResponse_' + str(idx)).text
                jsondata = json.loads(conversationListText)

                elementsData = jsondata['elements'][0]['elements']
                elementsCount = len(elementsData)

                for idx2 in range(elementsCount):
                    print(' ----------------------------------------')

                    profileData = elementsData[idx2]['hitInfo']['com.linkedin.voyager.search.SearchProfile']

                    txt_industry = ''
                    if 'industry' in profileData:
                        txt_industry = profileData['industry']
                    txt_location = ''
                    if 'location' in profileData:
                        txt_location = profileData['location']

                    txt_firstname = profileData['miniProfile']['firstName']
                    txt_lastname = profileData['miniProfile']['lastName']
                    txt_occupation = profileData['miniProfile']['occupation']
                    txt_linkedin_id = profileData['miniProfile']['publicIdentifier']

                    if txt_linkedin_id != 'UNKNOWN':

                        actor_title = ""
                        actor_company = ""
                        if " at " in txt_occupation:
                            title_company = txt_occupation.split(" at ")
                            actor_title = title_company[0]
                            actor_company = title_company[1]
                        else:
                            actor_company = ""
                            actor_title = txt_occupation

                        search_result_cnt += 1
                        values = (actor_company, txt_industry, txt_location, actor_title, txt_linkedin_id,
                                  txt_firstname + ' ' + txt_lastname)
                        get_search_contact_fast(values, cur, owner_id, search_id, search_result_cnt)

            bot_status = botstatus.DONE
        elif search_mode == 1:
            time.sleep(5)

            driver.get(search_data)
            print("-------Go to search url-----------")

            time.sleep(5)
            total_resultcounts_tag = driver.find_element_by_css_selector(
                "h3.search-results__total")
            total_resultcounts = total_resultcounts_tag.text
            result_counts = total_resultcounts.split(" ")
            real_counts = result_counts[1]
            counts = real_counts.replace(",", "")
            print('counts:', counts)

            targetcount = max_count
            if max_count > counts:
                targetcount = counts

            targetcount = targetcount // 10 + 1

            print('range_count:', targetcount)

            parse_urls = {}
            print('parsing url:')
            for _ in range(targetcount):
                time.sleep(3)
                driver.execute_script("window.scrollBy(0, 1000);")
                time.sleep(3)

                search_list = driver.find_elements_by_class_name(
                    "search-result__result-link")
                # print('search_list:', search_list)
                count = 0

                # for search_index in range(len(actor_name_lists)):

                for tag in search_list:
                    url = tag.get_attribute('href')
                    if url in parse_urls:
                        continue
                    parse_urls[url] = 1
                    count += 1
                    if count >= limit:
                        break

                try:
                    driver.find_element_by_class_name("next").click()
                except Exception as err1:
                    print('No next:', err1)
                    break

            get_search_contact_fast_with_urls(parse_urls, driver, cur, owner_id, search_id)
            """
            print('parsing profile:')
            for count, url in enumerate(parse_urls.keys()):
                result = parse_connection_link(driver, url)


                values = result + (owner_id, search_id,
                                   botstatus.CONNECT_REQ_N,)
                print('value insert:', values)
                add_to_db(cur, search_query, *values)
                print('count insert:', values)
                values = (count, search_id, )
                add_to_db2(cur, search_update_query, *values)

            """
            bot_status = botstatus.DONE
        else:
            time.sleep(2)
            driver.get(search_data)
            print("-------Go to sales navigator search url-----------")

            time.sleep(10)
            try:
                total_resultcounts_tag = driver.find_element_by_css_selector(
                    ".TR .spotlight-result-count")
            except:
                print("search has not result")
                values = (0, search_id,)
                add_to_db2(cur, search_update_query, *values)
                bot_status = botstatus.DONE
                driver.close()
                completed_date = datetime.now()
                return bot_status, lastrun_date, completed_date

            total_resultcounts = total_resultcounts_tag.text
            real_counts = 0
            if total_resultcounts[-1:] == 'M':
                real_counts = float(total_resultcounts[:-1]) * 1000000
            elif total_resultcounts[-1:] == 'K':
                real_counts = float(total_resultcounts[:-1]) * 1000
            else:
                real_counts = int(total_resultcounts)
            print('counts:', int(real_counts))
            if real_counts == 0:
                print("search has not result")
                values = (0, search_id,)
                add_to_db2(cur, search_update_query, *values)
                bot_status = botstatus.DONE
                driver.close()
                completed_date = datetime.now()
                return bot_status, lastrun_date, completed_date

            targetcount = max_count
            if max_count > real_counts:
                targetcount = real_counts
            
            
            targetcount = targetcount // 25 + 1

            print('range_count:', targetcount)

            

            print('parsing profile:')
            for _ in range(targetcount):
                parse_urls = {}
                time.sleep(7)
                
                search_list = driver.find_elements_by_class_name("member")

                # for search_index in range(len(actor_name_lists)):
                count = 0
                for tag in search_list:
                    try:
                        search_save_text = tag.find_element_by_class_name(
                            'save-lead')
                        url = tag.find_element_by_class_name(
                            'name-link').get_attribute('href')
                        if url in parse_urls:
                            continue
                        parse_urls[url] = 1
                        count += 1
                        if count >= limit:
                            break
                    except Exception as err:
                        continue
                    
                    get_search_contact_fast_with_salesurls(parse_urls, driver, 
                                                           cur, owner_id, 
                                                           search_id, count)

                try:
                    driver.find_element_by_class_name(
                        "next-pagination .pagination-text").click()
                except Exception as err1:
                    print('No next:', err1)
                    break

                

            """
            print('parsing profile:')
            for count, url in enumerate(parse_urls.keys()):
                result = parse_connection_link_sales(driver, url)
                # insert into data

                values = result + (owner_id, search_id,
                                   botstatus.CONNECT_REQ_N,)
                print('value insert:', values)
                add_to_db(cur, search_query, *values)
                print('count insert:', values)
                values = (count, search_id, )
                add_to_db2(cur, search_update_query, *values)
            """

            bot_status = botstatus.DONE
        # completed_date = datetime.now()
        # return name_list, company_list, title_list, location_list, bot_status, lastrun_date, completed_date

    except Exception as e:
        # bot_status = botstatus.ERROR
        bot_status = botstatus.DONE
        print("ERROR:", e)

    driver.close()

    completed_date = datetime.now()

    return bot_status, lastrun_date, completed_date