Exemple #1
0
def update_contact(driver, cur, uid, csrf_tocken, row_id):
    contact = get_contact_ajax(driver, uid, csrf_tocken)

    values = (contact['location'], contact['industry'], contact['countrycode'],
              contact['company'], contact['title'], row_id)

    add_to_db2(cur, inbox_update_query, *values)
Exemple #2
0
def get_search_contact_fast(profile_data, cur, owner_id, search_id, search_result_cnt):
    values = profile_data + (owner_id, search_id, botstatus.CONNECT_REQ_N,)
    print('value insert:', values)
    add_to_db(cur, search_query, *values)

    values = (search_result_cnt, search_id,)
    print('count insert:', values)
    add_to_db2(cur, search_update_query, *values)
Exemple #3
0
def check_and_add_ip(cur=None):
    if cur is None:
        cur = get_cursor()

    ip = get_ip()

    row = check_bot_ip(cur, ip)
    if row is None:
        print('{0} ip is not in app_freebotip table'.format(ip))
        add_to_db2(cur, insert_query, ip)
Exemple #4
0
def get_fastcontact_ajax(driver, cur, owner_id, check_existed=False):

    driver.get(LINKEDIN_CONNECTIONS_URL)
    time.sleep(10)

    total_connection_counts = driver.find_element_by_tag_name("h2")
    counts_text = total_connection_counts.text
    counts = counts_text.split(" ")

    cnt_all_connections = counts[0].replace(',', '')
    csrf_tocken = get_browser_csrf_tocken(driver)
    #############################################################################################################################
    driver.execute_script("""
      var element = document.createElement('div');
      element.id = "interceptedResponse";
      element.appendChild(document.createTextNode(""));
      document.body.appendChild(element);

      var xhttp = new XMLHttpRequest();

      xhttp.onreadystatechange = function() {
        if (this.readyState == 4 && this.status == 200) {
          document.getElementById("interceptedResponse").innerHTML = this.responseText;
        }
      };
      xhttp.open("GET", "https://www.linkedin.com/voyager/api/relationships/connections?count="""
                          + cnt_all_connections +
                          """&sortType=RECENTLY_ADDED&start=0", true);
      xhttp.setRequestHeader('Csrf-Token', """ + csrf_tocken + """)
      xhttp.send();""")
    #############################################################################################################################
    time.sleep(3)
    responsedata = driver.find_element_by_id('interceptedResponse').text
    #print('responsedata :', responsedata)
    targetjsondata = json.loads(responsedata)
    print('----------------targetjsondata------------------> :',
          targetjsondata)
    time.sleep(100)
    targetjsondata = targetjsondata['elements']
    for itemdata in targetjsondata:
        firstName = itemdata['miniProfile']['firstName']
        lastName = itemdata['miniProfile']['lastName']
        occupation = itemdata['miniProfile']['occupation']
        publicIdentifier = itemdata['miniProfile']['publicIdentifier']
        createdAt = itemdata['createdAt']
        createdAtTime = datetime.fromtimestamp(int(str(createdAt)[0:10]))
        actor_title = ""
        actor_company = ""
        if " at " in occupation:
            title_company = occupation.split(" at ")
            actor_title = title_company[0]
            actor_company = title_company[1]
        else:
            actor_company = ""
            actor_title = occupation
        actor_title = get_contact_title(actor_title)
        values = (
            actor_company,
            "",
            "",
            actor_title,
            publicIdentifier,
            firstName + ' ' + lastName,
        )
        values = values + \
            (createdAtTime, str(botstatus.OLD_CONNECT_N),
             '1', createdAtTime, str(owner_id),)

        if check_existed:
            contact_row = bot_db.get_db(cur, inbox_check_id_query, (
                publicIdentifier,
                owner_id,
            ))
            if contact_row is not None:
                continue

        if cur is not None:
            bot_db.add_to_db2(cur, getcontacts_query, *values)

            if check_existed:
                contact_row = bot_db.get_db(cur, inbox_check_id_query, (
                    publicIdentifier,
                    owner_id,
                ))
                update_contact(driver, cur, publicIdentifier, csrf_tocken,
                               contact_row[0])

    print(' Number of connections : ', len(targetjsondata))
def search(email,
           password,
           kw,
           cur=None,
           search_id=None,
           owner_id=None,
           limit=750):

    print("==== SEARCH ======")
    lastrun_date = datetime.now()

    user_email = email
    user_password = password
    driver = login_linkedin_withwebdriver(email, password)

    try:
        time.sleep(5)
        # search connection
        search_input = driver.find_element_by_xpath(
            "/html/body/nav/div/form/div/div/div/artdeco-typeahead-deprecated/artdeco-typeahead-deprecated-input/input"
        )
        keyword = kw
        search_input.clear()
        search_input.send_keys(keyword)
        search_input.send_keys(Keys.ENTER)

        print("-------click search button-----------")

        time.sleep(5)
        total_resultcounts_tag = driver.find_element_by_css_selector(
            "h3.search-results__total")
        total_resultcounts = total_resultcounts_tag.text
        result_counts = total_resultcounts.split(" ")
        real_counts = result_counts[1]
        counts = real_counts.replace(",", "")
        print('counts:', counts)
        range_count = int(counts) // 10 + 1
        print('range_count:', range_count)
        #range_count = 2

        parse_urls = {}
        print('parsing url:')
        for i in range(range_count):
            time.sleep(3)
            driver.execute_script("window.scrollBy(0, 1000);")
            time.sleep(3)

            search_list = driver.find_elements_by_class_name(
                "search-result__result-link")
            #print('search_list:', search_list)
            count = 0

            #for search_index in range(len(actor_name_lists)):

            for tag in search_list:
                url = tag.get_attribute('href')
                if url in parse_urls:
                    continue
                parse_urls[url] = 1
                count += 1
                if count >= limit:
                    break

            driver.find_element_by_class_name("next").click()

        print('parsing profile:')
        for count, url in enumerate(parse_urls.keys()):
            result = parse_connection_link(driver, url)

            # insert into data
            # search_query = """INSERT INTO connector_searchresult (name, company, title, location,
            #    industry, owner_id, search_id) VALUES (%s,%s,%s,%s,%s,%s,%s)"""
            """
            values = (actor_company, "", actor_location, actor_title, user_id,
                    actor_name, latest_actvity, botstatus.OLD_CONNECT_N, 1,
                    connection_time_list[i], owner_id,)
            """
            #values = (actor_company, industry, actor_location, actor_title, user_id,
            #    actor_name)

            #values = (actor_name, actor_company, actor_title,
            #          actor_location, "", owner_id, search_id)
            values = result + (
                owner_id,
                search_id,
                botstatus.CONNECT_REQ_N,
            )
            print('value insert:', values)
            add_to_db(cur, search_query, *values)
            print('count insert:', values)
            values = (
                count,
                search_id,
            )
            add_to_db2(cur, search_update_query, *values)

        bot_status = botstatus.DONE
        # completed_date = datetime.now()
        # return name_list, company_list, title_list, location_list, bot_status, lastrun_date, completed_date

    except Exception as e:
        #bot_status = botstatus.ERROR
        bot_status = botstatus.DONE
        print("ERROR:", e)

    driver.close()

    completed_date = datetime.now()

    return bot_status, lastrun_date, completed_date
Exemple #6
0
def get_search_contact_salesurls(url, driver, cur=None, owner_id=None, 
                                           search_id=None, counter=1,
                                           request_cookies_browser=None):
    
    csrf_tocken = get_browser_csrf_tocken(driver)
    
    try:
            
        milli_sec = int(round(time.time() * 1000))
        print('profile url:', url)
        
        
        divId = str(counter)
        js = """
            var element = document.createElement('div');
            element.id = 'interceptedSalesProfile_""" + divId + """';
            element.appendChild(document.createTextNode(""));
            document.body.appendChild(element);
            
            var xhttp = new XMLHttpRequest();
            
            xhttp.onreadystatechange = function() {
            if (this.readyState == 4 && this.status == 200) {
            document.getElementById('interceptedSalesProfile_""" + divId + """').innerHTML = this.responseText;
            }
            };
            xhttp.open('GET', '""" + url + """', true);
            xhttp.setRequestHeader('Csrf-Token', """ + csrf_tocken + """)
            xhttp.setRequestHeader('cookie', '""" + request_cookies_browser + """')
            xhttp.send();
            """
        print('js:', js)
        
        #############################################################################################################################
        driver.execute_script(js)
        #############################################################################################################################
        time.sleep(2)

        
        print('============== count =========== :', counter)
        
        profile_result = driver.find_element_by_id('interceptedSalesProfile_' + divId).text
        
        # if count == 0:
        print('profile_result:', profile_result)
        
        return
        
        
        jsonProfileData = json.loads(profile_result)
    
    
    
        
        user_id  = get_sale_nav_linkedid_id(url)

        txt_firstname = jsonProfileData['viewee']['firstName']
        txt_lastname = jsonProfileData['viewee']['lastName']
        txt_linkedin_id = user_id
        txt_location = jsonProfileData['viewee']['location']
        txt_occupation = jsonProfileData['viewee']['headline']
        txt_company = jsonProfileData['viewee']['company']
        
        
        actor_title = ""
        if " at " in txt_occupation:
            title_company = txt_occupation.split(" at ")
            actor_title = title_company[0]
        else:
            actor_title = txt_occupation
            
        actor_title = get_contact_title(actor_title)
        
        values = (txt_company, '', txt_location, actor_title, txt_linkedin_id, txt_firstname + ' ' + txt_lastname)
        values = values + (owner_id, search_id, botstatus.CONNECT_REQ_N,)

        print('value insert:', values)
        
        if cur:
                
            add_to_db2(cur, search_query, *values)
    
            values = (counter, search_id,)
            print('count insert:', values)
            add_to_db2(cur, search_update_query, *values)
    except Exception as err:
        print('error:', err)
Exemple #7
0
def get_search_contact_fast_with_urls(parse_urls, driver, cur, owner_id, search_id):
    request_cookies_browser = driver.get_cookies()
    JSESSIONID = driver.get_cookie('JSESSIONID')
    csrf_tocken = JSESSIONID['value']

    print('parsing profile:', parse_urls)
    for count, url in enumerate(parse_urls.keys()):
        profileUrl = url.split('/')[-2]
        profileUrl = 'https://www.linkedin.com/voyager/api/identity/profiles/' + profileUrl + '/'

        #############################################################################################################################
        driver.execute_script("""
              var element = document.createElement('div');
              element.id = "interceptedProfile_""" + str(count) + """";
              element.appendChild(document.createTextNode(""));
              document.body.appendChild(element);

              var xhttp = new XMLHttpRequest();

              xhttp.onreadystatechange = function() {
                if (this.readyState == 4 && this.status == 200) {
                  document.getElementById("interceptedProfile_""" + str(count) + """").innerHTML = this.responseText;
                }
              };
              xhttp.open('GET', '""" + profileUrl + """', true);
              xhttp.setRequestHeader('Csrf-Token', """ + csrf_tocken + """)
              xhttp.send();
                """)
        #############################################################################################################################
    time.sleep(5)

    for count, url in enumerate(parse_urls.keys()):
        print('============== count =========== :', count)
        profile_result = driver.find_element_by_id('interceptedProfile_' + str(count)).text
        jsonProfileData = json.loads(profile_result)

        txt_industry = jsonProfileData['industryName']
        txt_firstname = jsonProfileData['firstName']
        txt_lastname = jsonProfileData['lastName']
        txt_linkedin_id = url.split('/')[-2]
        txt_location = jsonProfileData['locationName']
        txt_occupation = jsonProfileData['headline']

        actor_title = ""
        actor_company = ""
        if " at " in txt_occupation:
            title_company = txt_occupation.split(" at ")
            actor_title = title_company[0]
            actor_company = title_company[1]
        else:
            actor_company = ""
            actor_title = txt_occupation

        values = (actor_company, txt_industry, txt_location, actor_title, txt_linkedin_id, txt_firstname + ' ' + txt_lastname)
        values = values + (owner_id, search_id, botstatus.CONNECT_REQ_N,)

        print('value insert:', values)
        add_to_db(cur, search_query, *values)

        values = (count + 1, search_id,)
        print('count insert:', values)
        add_to_db2(cur, search_update_query, *values)
Exemple #8
0
def fast_search(email, password, search_data, cur=None, search_id=None,
                 owner_id=None, search_mode=0, limit=750):

    print('----------', search_data, '----------', search_mode)
    driver = login_linkedin_withwebdriver(email, password)
    max_count = limit
    total_count = 500
    lastrun_date = datetime.now()

    print("==== SEARCH ======")

    csrf_tocken = get_browser_csrf_tocken(driver)
    try:
        if search_mode == 0:
            time.sleep(5)
            js = """
              var element = document.createElement('div');
              element.id = "interceptedSearchResponse";
              element.appendChild(document.createTextNode(""));
              document.body.appendChild(element);
    
              var xhttp = new XMLHttpRequest();
    
              xhttp.onreadystatechange = function() {
                if (this.readyState == 4 && this.status == 200) {
                  document.getElementById("interceptedSearchResponse").innerHTML = this.responseText;
                }
              };
              xhttp.open("GET", "https://www.linkedin.com/voyager/api/search/cluster?blendedSrpEnabled=true&count=""" + str(max_count) + """&guides=List()&keywords=""" + search_data + """&origin=GLOBAL_SEARCH_HEADER&q=guided&start=0", true);
              xhttp.setRequestHeader('Csrf-Token', """ + csrf_tocken + """)
              xhttp.send();"""
            #############################################################################################################################
            driver.execute_script(js)
            #############################################################################################################################
            time.sleep(3)
            searchListText = driver.find_element_by_id('interceptedSearchResponse').text
            jsondata = json.loads(searchListText)
            total_count = jsondata['paging']['total']
            print('total_count : ', total_count, searchListText.encode('utf-8'))
            close_driver(driver, email)
            return
            targetcount = max_count
            if max_count > total_count:
                targetcount = total_count

            targetcount = targetcount // 10 + 1

            for idx in range(targetcount):
                #############################################################################################################################
                driver.execute_script("""

                          var element = document.createElement('div');
                          element.id = "interceptedSearchResponse_""" + str(idx) + """";
                          element.appendChild(document.createTextNode(""));
                          document.body.appendChild(element);

                          var xhttp = new XMLHttpRequest();

                          xhttp.onreadystatechange = function() {
                            if (this.readyState == 4 && this.status == 200) {
                              document.getElementById("interceptedSearchResponse_""" + str(idx) + """").innerHTML = this.responseText;
                            }
                          };
                          xhttp.open("GET", "https://www.linkedin.com/voyager/api/search/cluster?blendedSrpEnabled=true&count=10&guides=List()&keywords=""" + search_data + """&origin=GLOBAL_SEARCH_HEADER&q=guided&start=""" + str(
                    idx * 10) + """", true);
                          xhttp.setRequestHeader('Csrf-Token', """ + csrf_tocken + """)
                          xhttp.send();

                        """)
                #############################################################################################################################
            time.sleep(10)

            search_result_cnt = 0
            for idx in range(targetcount):
                conversationListText = driver.find_element_by_id('interceptedSearchResponse_' + str(idx)).text
                jsondata = json.loads(conversationListText)

                elementsData = jsondata['elements'][0]['elements']
                elementsCount = len(elementsData)

                for idx2 in range(elementsCount):
                    print(' ----------------------------------------')

                    profileData = elementsData[idx2]['hitInfo']['com.linkedin.voyager.search.SearchProfile']

                    txt_industry = ''
                    if 'industry' in profileData:
                        txt_industry = profileData['industry']
                    txt_location = ''
                    if 'location' in profileData:
                        txt_location = profileData['location']

                    txt_firstname = profileData['miniProfile']['firstName']
                    txt_lastname = profileData['miniProfile']['lastName']
                    txt_occupation = profileData['miniProfile']['occupation']
                    txt_linkedin_id = profileData['miniProfile']['publicIdentifier']

                    if txt_linkedin_id != 'UNKNOWN':

                        actor_title = ""
                        actor_company = ""
                        if " at " in txt_occupation:
                            title_company = txt_occupation.split(" at ")
                            actor_title = title_company[0]
                            actor_company = title_company[1]
                        else:
                            actor_company = ""
                            actor_title = txt_occupation

                        search_result_cnt += 1
                        values = (actor_company, txt_industry, txt_location, actor_title, txt_linkedin_id,
                                  txt_firstname + ' ' + txt_lastname)
                        get_search_contact_fast(values, cur, owner_id, search_id, search_result_cnt)

            bot_status = botstatus.DONE
        elif search_mode == 1:
            time.sleep(5)

            driver.get(search_data)
            print("-------Go to search url-----------")

            time.sleep(5)
            total_resultcounts_tag = driver.find_element_by_css_selector(
                "h3.search-results__total")
            total_resultcounts = total_resultcounts_tag.text
            result_counts = total_resultcounts.split(" ")
            real_counts = result_counts[1]
            counts = real_counts.replace(",", "")
            print('counts:', counts)

            targetcount = max_count
            if max_count > counts:
                targetcount = counts

            targetcount = targetcount // 10 + 1

            print('range_count:', targetcount)

            parse_urls = {}
            print('parsing url:')
            for _ in range(targetcount):
                time.sleep(3)
                driver.execute_script("window.scrollBy(0, 1000);")
                time.sleep(3)

                search_list = driver.find_elements_by_class_name(
                    "search-result__result-link")
                # print('search_list:', search_list)
                count = 0

                # for search_index in range(len(actor_name_lists)):

                for tag in search_list:
                    url = tag.get_attribute('href')
                    if url in parse_urls:
                        continue
                    parse_urls[url] = 1
                    count += 1
                    if count >= limit:
                        break

                try:
                    driver.find_element_by_class_name("next").click()
                except Exception as err1:
                    print('No next:', err1)
                    break

            get_search_contact_fast_with_urls(parse_urls, driver, cur, owner_id, search_id)
            """
            print('parsing profile:')
            for count, url in enumerate(parse_urls.keys()):
                result = parse_connection_link(driver, url)


                values = result + (owner_id, search_id,
                                   botstatus.CONNECT_REQ_N,)
                print('value insert:', values)
                add_to_db(cur, search_query, *values)
                print('count insert:', values)
                values = (count, search_id, )
                add_to_db2(cur, search_update_query, *values)

            """
            bot_status = botstatus.DONE
        else:
            time.sleep(2)
            driver.get(search_data)
            print("-------Go to sales navigator search url-----------")

            time.sleep(10)
            try:
                total_resultcounts_tag = driver.find_element_by_css_selector(
                    ".TR .spotlight-result-count")
            except:
                print("search has not result")
                values = (0, search_id,)
                add_to_db2(cur, search_update_query, *values)
                bot_status = botstatus.DONE
                driver.close()
                completed_date = datetime.now()
                return bot_status, lastrun_date, completed_date

            total_resultcounts = total_resultcounts_tag.text
            real_counts = 0
            if total_resultcounts[-1:] == 'M':
                real_counts = float(total_resultcounts[:-1]) * 1000000
            elif total_resultcounts[-1:] == 'K':
                real_counts = float(total_resultcounts[:-1]) * 1000
            else:
                real_counts = int(total_resultcounts)
            print('counts:', int(real_counts))
            if real_counts == 0:
                print("search has not result")
                values = (0, search_id,)
                add_to_db2(cur, search_update_query, *values)
                bot_status = botstatus.DONE
                driver.close()
                completed_date = datetime.now()
                return bot_status, lastrun_date, completed_date

            targetcount = max_count
            if max_count > real_counts:
                targetcount = real_counts
            
            
            targetcount = targetcount // 25 + 1

            print('range_count:', targetcount)

            

            print('parsing profile:')
            for _ in range(targetcount):
                parse_urls = {}
                time.sleep(7)
                
                search_list = driver.find_elements_by_class_name("member")

                # for search_index in range(len(actor_name_lists)):
                count = 0
                for tag in search_list:
                    try:
                        search_save_text = tag.find_element_by_class_name(
                            'save-lead')
                        url = tag.find_element_by_class_name(
                            'name-link').get_attribute('href')
                        if url in parse_urls:
                            continue
                        parse_urls[url] = 1
                        count += 1
                        if count >= limit:
                            break
                    except Exception as err:
                        continue
                    
                    get_search_contact_fast_with_salesurls(parse_urls, driver, 
                                                           cur, owner_id, 
                                                           search_id, count)

                try:
                    driver.find_element_by_class_name(
                        "next-pagination .pagination-text").click()
                except Exception as err1:
                    print('No next:', err1)
                    break

                

            """
            print('parsing profile:')
            for count, url in enumerate(parse_urls.keys()):
                result = parse_connection_link_sales(driver, url)
                # insert into data

                values = result + (owner_id, search_id,
                                   botstatus.CONNECT_REQ_N,)
                print('value insert:', values)
                add_to_db(cur, search_query, *values)
                print('count insert:', values)
                values = (count, search_id, )
                add_to_db2(cur, search_update_query, *values)
            """

            bot_status = botstatus.DONE
        # completed_date = datetime.now()
        # return name_list, company_list, title_list, location_list, bot_status, lastrun_date, completed_date

    except Exception as e:
        # bot_status = botstatus.ERROR
        bot_status = botstatus.DONE
        print("ERROR:", e)

    driver.close()

    completed_date = datetime.now()

    return bot_status, lastrun_date, completed_date