Esempio n. 1
0
def get_search_contact_fast(profile_data, cur, owner_id, search_id, search_result_cnt):
    values = profile_data + (owner_id, search_id, botstatus.CONNECT_REQ_N,)
    print('value insert:', values)
    add_to_db(cur, search_query, *values)

    values = (search_result_cnt, search_id,)
    print('count insert:', values)
    add_to_db2(cur, search_update_query, *values)
def search(email,
           password,
           kw,
           cur=None,
           search_id=None,
           owner_id=None,
           limit=750):

    print("==== SEARCH ======")
    lastrun_date = datetime.now()

    user_email = email
    user_password = password
    driver = login_linkedin_withwebdriver(email, password)

    try:
        time.sleep(5)
        # search connection
        search_input = driver.find_element_by_xpath(
            "/html/body/nav/div/form/div/div/div/artdeco-typeahead-deprecated/artdeco-typeahead-deprecated-input/input"
        )
        keyword = kw
        search_input.clear()
        search_input.send_keys(keyword)
        search_input.send_keys(Keys.ENTER)

        print("-------click search button-----------")

        time.sleep(5)
        total_resultcounts_tag = driver.find_element_by_css_selector(
            "h3.search-results__total")
        total_resultcounts = total_resultcounts_tag.text
        result_counts = total_resultcounts.split(" ")
        real_counts = result_counts[1]
        counts = real_counts.replace(",", "")
        print('counts:', counts)
        range_count = int(counts) // 10 + 1
        print('range_count:', range_count)
        #range_count = 2

        parse_urls = {}
        print('parsing url:')
        for i in range(range_count):
            time.sleep(3)
            driver.execute_script("window.scrollBy(0, 1000);")
            time.sleep(3)

            search_list = driver.find_elements_by_class_name(
                "search-result__result-link")
            #print('search_list:', search_list)
            count = 0

            #for search_index in range(len(actor_name_lists)):

            for tag in search_list:
                url = tag.get_attribute('href')
                if url in parse_urls:
                    continue
                parse_urls[url] = 1
                count += 1
                if count >= limit:
                    break

            driver.find_element_by_class_name("next").click()

        print('parsing profile:')
        for count, url in enumerate(parse_urls.keys()):
            result = parse_connection_link(driver, url)

            # insert into data
            # search_query = """INSERT INTO connector_searchresult (name, company, title, location,
            #    industry, owner_id, search_id) VALUES (%s,%s,%s,%s,%s,%s,%s)"""
            """
            values = (actor_company, "", actor_location, actor_title, user_id,
                    actor_name, latest_actvity, botstatus.OLD_CONNECT_N, 1,
                    connection_time_list[i], owner_id,)
            """
            #values = (actor_company, industry, actor_location, actor_title, user_id,
            #    actor_name)

            #values = (actor_name, actor_company, actor_title,
            #          actor_location, "", owner_id, search_id)
            values = result + (
                owner_id,
                search_id,
                botstatus.CONNECT_REQ_N,
            )
            print('value insert:', values)
            add_to_db(cur, search_query, *values)
            print('count insert:', values)
            values = (
                count,
                search_id,
            )
            add_to_db2(cur, search_update_query, *values)

        bot_status = botstatus.DONE
        # completed_date = datetime.now()
        # return name_list, company_list, title_list, location_list, bot_status, lastrun_date, completed_date

    except Exception as e:
        #bot_status = botstatus.ERROR
        bot_status = botstatus.DONE
        print("ERROR:", e)

    driver.close()

    completed_date = datetime.now()

    return bot_status, lastrun_date, completed_date
def get_messages(email, password, cur, owner_id):

    print("==== GET MESSAGES ======")
    lastrun_date = datetime.now()
    is_read = 1
    type = 7
    is_direct = 1
    driver = login_linkedin_withwebdriver(email, password)

    try:
        time.sleep(3)

        # Reading messages
        messageing_menu = driver.find_element_by_css_selector(
            "span#messaging-tab-icon")
        messageing_menu.click()
        time.sleep(10)

        elem = driver.find_element_by_tag_name("html")
        elem.send_keys(Keys.END)

        messaging_ul = driver.find_element_by_class_name(
            "msg-conversations-container__conversations-list")
        driver.execute_script(
            'arguments[0].scrollDown = arguments[0].scrollHeight',
            messaging_ul)
        messaging_list = driver.find_elements_by_css_selector(
            "li.msg-conversation-listitem")

        for messaging in messaging_list:
            created_at_time = messaging.find_element_by_css_selector(
                "time.msg-conversation-listitem__time-stamp")
            created_at = created_at_time.text

            messaging_member = messaging.find_element_by_class_name(
                "msg-conversation-listitem__link")
            messaging_member.click()
            driver.execute_script("window.scrollBy(0, 1000);")

            try:
                messaging_text_div = driver.find_element_by_class_name(
                    "msg-spinmail-thread__message-body")
                driver.execute_script(
                    'arguments[0].scrollTop = arguments[0].scrollHeight',
                    messaging_text_div)
                messaging_text_ps = messaging_text_div.find_elements_by_tag_name(
                    "p")

                message = ''
                for messaging_text_p in messaging_text_ps:
                    messaging_text = messaging_text_p.text
                    words = messaging_text.split(' ')
                    i = 0
                    for word in words:
                        if i > 0:
                            message += ' '
                        message = message + word.strip()
                        i += 1

                words = message.split("'")
                message = ''
                i = 0
                for word in words:
                    if i > 0:
                        message = message + '\"' + word
                    else:
                        message = message + word
                    i += 1

                # add to db
                completed_date = datetime.now()
                updated_at = datetime.now()

                if created_at.split(' ')[1] and (
                        created_at.split(' ')[1] == 'AM'
                        or created_at.split(' ')[1] == 'PM'):
                    created_at = get_message_created_time('', created_at)
                else:
                    created_at = get_message_created_time(created_at, '')

                values = (created_at, updated_at, message, completed_date,
                          type, owner_id, is_direct, is_read)
                if cur is not None:
                    botdb.add_to_db(cur, getmessages_query, *values)

            except Exception as e:
                messaging_div = driver.find_element_by_class_name(
                    "msg-s-message-list-container")
                messaging_ul = messaging_div.find_element_by_css_selector(
                    "ul.msg-s-message-list")
                driver.execute_script(
                    'arguments[0].scrollTop = arguments[0].scrollHeight',
                    messaging_ul)
                message_list = messaging_ul.find_elements_by_css_selector(
                    "li.msg-s-message-list__event")

                create_at_dates = []
                created_at_times = []
                messages = []
                empty_time_ids = []
                prev_create_at_date = ''

                i = 0
                for message_li in message_list:
                    try:
                        create_at_date_li = message_li.find_element_by_css_selector(
                            "time.msg-s-message-list__time-heading")
                        create_at_date = create_at_date_li.text
                        prev_create_at_date = create_at_date

                    except Exception as e:
                        create_at_date = prev_create_at_date
                    create_at_dates.append(create_at_date)

                    try:
                        created_at_time_li = message_li.find_element_by_css_selector(
                            "time.msg-s-message-group__timestamp")
                        created_at_time = created_at_time_li.text

                        for time_id in empty_time_ids:
                            created_at_times[time_id] = created_at_time
                        empty_time_ids = []
                    except Exception as e:
                        created_at_time = ''
                        empty_time_ids.append(i)
                    created_at_times.append(created_at_time)

                    messaging_text_div = message_li.find_element_by_class_name(
                        "msg-s-event-listitem__message-bubble")
                    driver.execute_script(
                        'arguments[0].scrollTop = arguments[0].scrollHeight',
                        messaging_text_div)
                    messaging_text_p = messaging_text_div.find_element_by_class_name(
                        "msg-s-event-listitem__body")
                    messaging_text = messaging_text_p.text

                    message = ''
                    words = messaging_text.split(' ')
                    j = 0
                    for word in words:
                        if j > 0:
                            message += ' '
                        message = message + word.strip()
                        j += 1

                    words = message.split("'")
                    message = ''
                    j = 0
                    for word in words:
                        if j > 0:
                            message = message + '\"' + word
                        else:
                            message = message + word
                        j += 1

                    messages.append(message)
                    i += 1

                completed_date = datetime.now()
                updated_at = datetime.now()

                for k in range(0, len(messages)):
                    values = (get_message_created_time(create_at_dates[k],
                                                       created_at_times[k]),
                              updated_at, messages[k], completed_date, type,
                              owner_id, is_direct, is_read)
                    if cur is not None:
                        botdb.add_to_db(cur, getmessages_query, *values)

            time.sleep(5)

        time.sleep(5)
        bot_status = botstatus.DONE

    except Exception as e:
        # bot_status = botstatus.ERROR
        # just consider all are okay now
        bot_status = botstatus.DONE
        print("ERROR:", e)

    driver.close()
    completed_date = datetime.now()
    return bot_status, lastrun_date, completed_date
def get_contacts(email, password, cur=None, owner_id=None):
    print("==== GET CONTACTS ======")
    lastrun_date = datetime.now()
    driver = login_linkedin_withwebdriver(email, password)

    try:
        time.sleep(15)
        # print(driver.page_source)
        # My Network contacts
        mynetwork_menu = driver.find_element_by_class_name(
            "nav-item--mynetwork")
        mynetwork_menu.click()
        time.sleep(5)
        see_all_link = driver.find_element_by_css_selector(
            "a.mn-connections-summary__see-all")
        see_all_link.click()
        time.sleep(5)

        total_connection_counts = driver.find_element_by_tag_name("h2")
        counts_text = total_connection_counts.text
        counts = counts_text.split(" ")
        act_count = counts[0]
        loop_range = int(act_count) // 40 + 1
        elem = driver.find_element_by_tag_name("html")
        print("loop_range:", loop_range)

        for i in range(loop_range):
            elem.send_keys(Keys.END)
            time.sleep(5)

        connections_times = driver.find_elements_by_css_selector(
            "time.time-badge")
        connection_time_list = []
        for connection_time in connections_times:
            connection_time_text = connection_time.text
            connection_time_split = connection_time_text.split(" ")
            connection_time_num = connection_time_split[1]
            connection_ago = connection_time_split[2]

            if "minute" in connection_ago:
                time_ago = datetime.today() - timedelta(
                    minutes=int(connection_time_num))
            elif "hour" in connection_ago:
                time_ago = datetime.today() - timedelta(
                    hours=int(connection_time_num))
            elif "day" in connection_ago:
                time_ago = datetime.today() - timedelta(
                    days=int(connection_time_num))
            elif "week" in connection_ago:
                time_ago = datetime.today() - timedelta(
                    weeks=int(connection_time_num))
            elif "month" in connection_ago:
                time_ago = datetime.today() - timedelta(
                    days=int(connection_time_num) * 30)
            elif "year" in connection_ago:
                time_ago = datetime.today() - timedelta(
                    days=int(connection_time_num) * 365)

            # connection time
            connection_time_list.append(str(time_ago))

        connections_lists = driver.find_elements_by_css_selector(
            "a.mn-connection-card__link")
        connection_alink_lists = []
        for connction_link_list in connections_lists:
            connection_alink = connction_link_list.get_attribute('href')
            connection_alink_lists.append(connection_alink)
            print('connection_alink:', connection_alink)
            # just small nummber
            #if len(connection_alink_lists) > 2:
            #    break

        i = 0
        for connection_link in connection_alink_lists:
            print('get_contacts:', get_contacts)
            result = parse_connection_link(driver, connection_link)
            print('result:', result)
            # (actor_company, industry, actor_location, actor_title, user_id,
            #    actor_name)
            """
            values = (actor_company, "", actor_location, actor_title, user_id,
                    actor_name, latest_actvity, botstatus.OLD_CONNECT_N, 1,
                    connection_time_list[i], owner_id,)
            """
            values = result + (
                connection_time_list[i],
                botstatus.OLD_CONNECT_N,
                1,
                connection_time_list[i],
                owner_id,
            )

            i += 1
            if cur is not None:
                botdb.add_to_db(cur, getcontacts_query, *values)
            """
            cur.execute(getcontacts_query, (actor_company_list[i], "", actor_location_list[i], actor_title_list[
                            i], linkedin_id_list[i], actor_name_list[i], "", "22", "1", "1", connection_time_list[i], owner_id))
                            
            """

        bot_status = botstatus.DONE
        # return linkedin_id_list, actor_name_list, actor_company_list, actor_title_list, actor_location_list, connection_time_list, bot_status, lastrun_date, completed_date

    except Exception as e:
        # bot_status = botstatus.ERROR
        bot_status = botstatus.DONE
        print("ERROR:", e)

    completed_date = datetime.now()
    driver.close()
    return bot_status, lastrun_date, completed_date
Esempio n. 5
0
def get_search_contact_fast_with_urls(parse_urls, driver, cur, owner_id, search_id):
    request_cookies_browser = driver.get_cookies()
    JSESSIONID = driver.get_cookie('JSESSIONID')
    csrf_tocken = JSESSIONID['value']

    print('parsing profile:', parse_urls)
    for count, url in enumerate(parse_urls.keys()):
        profileUrl = url.split('/')[-2]
        profileUrl = 'https://www.linkedin.com/voyager/api/identity/profiles/' + profileUrl + '/'

        #############################################################################################################################
        driver.execute_script("""
              var element = document.createElement('div');
              element.id = "interceptedProfile_""" + str(count) + """";
              element.appendChild(document.createTextNode(""));
              document.body.appendChild(element);

              var xhttp = new XMLHttpRequest();

              xhttp.onreadystatechange = function() {
                if (this.readyState == 4 && this.status == 200) {
                  document.getElementById("interceptedProfile_""" + str(count) + """").innerHTML = this.responseText;
                }
              };
              xhttp.open('GET', '""" + profileUrl + """', true);
              xhttp.setRequestHeader('Csrf-Token', """ + csrf_tocken + """)
              xhttp.send();
                """)
        #############################################################################################################################
    time.sleep(5)

    for count, url in enumerate(parse_urls.keys()):
        print('============== count =========== :', count)
        profile_result = driver.find_element_by_id('interceptedProfile_' + str(count)).text
        jsonProfileData = json.loads(profile_result)

        txt_industry = jsonProfileData['industryName']
        txt_firstname = jsonProfileData['firstName']
        txt_lastname = jsonProfileData['lastName']
        txt_linkedin_id = url.split('/')[-2]
        txt_location = jsonProfileData['locationName']
        txt_occupation = jsonProfileData['headline']

        actor_title = ""
        actor_company = ""
        if " at " in txt_occupation:
            title_company = txt_occupation.split(" at ")
            actor_title = title_company[0]
            actor_company = title_company[1]
        else:
            actor_company = ""
            actor_title = txt_occupation

        values = (actor_company, txt_industry, txt_location, actor_title, txt_linkedin_id, txt_firstname + ' ' + txt_lastname)
        values = values + (owner_id, search_id, botstatus.CONNECT_REQ_N,)

        print('value insert:', values)
        add_to_db(cur, search_query, *values)

        values = (count + 1, search_id,)
        print('count insert:', values)
        add_to_db2(cur, search_update_query, *values)