Beispiel #1
0
    def start_requests(self):
        from general_utils_lib import account_creation as AC
        from selenium.webdriver.support.ui import WebDriverWait

        urls = [
            'http://www.guru.com/d/jobs/pg/9/'
        ]

        driver = AC.initialize_tor()#driver()
        driver_wait = WebDriverWait(driver, 20)

        login_dets = dict()
        AC.login_as_worker(driver, driver_wait, login_dets)

        db = MySQLdb.connect("localhost", "root", "", "guru_crawler")
        cursor = db.cursor()

        meta = {"driver": driver, "driver_wait": driver_wait, "db": db, "cursor": cursor}

        yield scrapy.Request(url=urls[0], callback=self.parse, meta=meta)
def check_login():
    from general_utils_lib import read_files, account_creation as AC

    login_dets = read_files.read_csv(
        "/Users/laveeshrohra/Documents/Workspace/job_RA/guru_accounts.csv")

    driver = AC.initialize_tor()
    driver_wait = AC.initialize_wait(driver)

    for i in range(1, len(login_dets)):
        login = {"username": login_dets[i][0], "password": login_dets[i][2]}
        AC.login(login, driver, driver_wait)
        AC.check_and_click_el(driver_wait, {
            "name": "ctl00_ContentPlaceHolder1_ucSq_aSkip",
            "click": 1
        }, {"name": "e-topnav-dash-in"}, login)
        AC.logout(driver_wait)

        if (i % 4) == 0:
            driver = AC.wait_ip_change(driver)
Beispiel #3
0

def execute_query(db, cursor, query):
    try:
        cursor.execute(query)
        db.commit()
    except:
        db.rollback()
        print query
        time.sleep(10)


def get_all_data(project_urls, driver, driver_wait, cursor, db):
    return get_project_data(project_urls, driver, driver_wait, cursor, db)


#print get_skills_and_stars("http://www.guru.com/freelancers/intelex-informatics/reviews")
import MySQLdb, os
driver = AC.initialize_tor()
driver_wait = WebDriverWait(driver, 5)
db = MySQLdb.connect("localhost", "root", "", "guru_crawler")
cursor = db.cursor()
file_name = '/Users/laveeshrohra/Documents/Workspace/job_RA/new_project_desc.csv'

links = get_project_links.get_all_links(driver, driver_wait, file_name)
AC.login_as_worker(driver, driver_wait)
get_all_data(links, driver, driver_wait, cursor, db)
os.remove(file_name)
driver.close()

#check_json_resp()
Beispiel #4
0
def post_pic():  #driver, driver_wait, login_dets
    from selenium.webdriver.support.ui import WebDriverWait
    from general_utils_lib import read_files
    from selenium.webdriver.support import expected_conditions as EC
    import random, csv, traceback

    driver = AC.initialize_tor()
    driver_wait = WebDriverWait(driver, 20)

    url = "http://www.guru.com/emp/modifyaccount.aspx"

    image_urls = dict()
    image_urls["wf"] = "https://s14.postimg.org/3znnyktrl/image.jpg"
    image_urls["wm"] = "https://s14.postimg.org/5dqvlozap/image.jpg"
    image_urls["bm"] = "https://s22.postimg.org/v4w2nat81/image.jpg"
    image_urls["bf"] = "https://s21.postimg.org/k4gqkxj4n/image.jpg"
    image_urls["im"] = "https://s18.postimg.org/vgy5qdkkp/image.jpg"
    image_urls["if"] = "https://s22.postimg.org/nd4nhy3c1/image.jpg"
    image_urls["am"] = "https://s21.postimg.org/azwep0vdz/image.jpg"
    image_urls["af"] = "https://s21.postimg.org/8ubm72z3r/image.jpg"

    #Login Module
    file_data = read_files.read_csv(
        "/Users/laveeshrohra/Documents/Workspace/job_RA/guru_accounts.csv")
    login_dets = dict()

    zip = ["90003", "90039", "90095", "90013", "90021"]

    log = open('/Users/laveeshrohra/Documents/Workspace/job_RA/logs.csv', "w+")
    writer = csv.writer(log,
                        delimiter=',',
                        quotechar='"',
                        quoting=csv.QUOTE_ALL)

    for i in range(1, len(file_data)):

        data = file_data[i]
        try:
            login_dets["username"] = data[0].strip(
            )  #"*****@*****.**"
            login_dets["password"] = data[2].strip()  #"guru1234"
            AC.login(login_dets, driver, driver_wait)
            AC.check_and_click_el(driver_wait, {
                "name": "ctl00_ContentPlaceHolder1_ucSq_aSkip",
                "click": 1
            }, {"name": "e-topnav-dash-in"})

            driver.get(url)

            city = driver.find_element(
                AC.constants.BY_ID, "ctl00_guB_ctl00_txtCity_txtCity_TextBox")
            city.clear()
            city.send_keys("Los Angeles")
            driver.find_element(
                AC.constants.BY_XPATH,
                "//select[@id='ctl00_guB_ctl00_ddlCountry_ddlCountry_Select']/option[@value='1']"
            ).click()
            state = driver_wait.until(
                EC.element_to_be_clickable((
                    AC.constants.BY_XPATH,
                    "//select[@id='ctl00_guB_ctl00_ddlState_ddlState_Select']/option[@value='5']"
                )))
            state.click()

            zip_el = driver.find_element(
                AC.constants.BY_ID,
                "ctl00_guB_ctl00_txtPostalCode_txtPostalCode_TextBox")
            zip_el.clear()
            zip_el.send_keys(random.choice(zip))

            AC.check_and_click_el(driver_wait, {
                "name": "ctl00_guB_ctl00_btnSubmit_btnSubmit_Button",
                "click": 1
            }, {"name": "ctl00_guB_navigation"})

            if data[4].strip() is "Unknown" or not data[3].strip():
                continue

            # post_pic_dom(driver, driver_wait, data, image_urls)

            AC.logout(driver_wait)
        except:
            writer.writerow([data[0].strip(), traceback.print_exc()])
            continue

    driver.close()
    log.close()
Beispiel #5
0
def post_guru_project():
    import menuPagesParse as Menu
    import random, csv, traceback

    driver = AC.initialize_tor()
    driver_wait = AC.initialize_wait(driver)

    project_desc = Menu.get_project_desc(
        '/Users/laveeshrohra/Documents/Workspace/job_RA/project_desc.csv')

    project_details = read_files.read_csv(
        "/Users/laveeshrohra/Documents/Workspace/job_RA/sample_project_posts.csv"
    )[2]

    login_dets = read_files.read_csv(
        "/Users/laveeshrohra/Documents/Workspace/job_RA/guru_accounts.csv")

    titles = [
        "Web Scraping", "Scrape the website", "Data Extractor",
        "Extract data from website", "Gather data from site", "Data Collector",
        "Collect all data from website", "Website scraper",
        "Collection of data", "Extraction of Data", "Website scraping",
        "Scrape the website", "Webpage parsing", "Scraping data from website",
        "Network page scraper", "Collect the data from website",
        "Script for web scraping", "Extraction of data from Web",
        "Scrape data from web", "Collecting info from website",
        "Webpage Scraping", "Site scraping", "Web scraping", "Html scraper",
        "Script for web parsing", "Parsing web page", "Webs scraper",
        "Internet site scraping", "Scrape data from site"
    ]

    of = open(
        '/Users/laveeshrohra/Documents/Workspace/job_RA/project_details.csv',
        'a+')
    writer = csv.writer(of,
                        delimiter=',',
                        quotechar='"',
                        quoting=csv.QUOTE_ALL)
    log = open('/Users/laveeshrohra/Documents/Workspace/job_RA/logs.csv', "w+")
    log_writer = csv.writer(log,
                            delimiter=',',
                            quotechar='"',
                            quoting=csv.QUOTE_ALL)

    iterations = 1
    prev_iter = 9 * (iterations - 1)

    for i in range(prev_iter + 1, (iterations * 9) + 1):  #len(login_dets)

        login = login_dets[i % len(login_dets)] if (
            i % len(login_dets)) > 0 else login_dets[(i % len(login_dets)) + 1]

        try:
            project_details[0] = random.choice(titles)
            project_details[1] = project_desc[i - 1]  #45+ after every rotation
            input_data = get_input_data(login, project_details)

            AC.login(input_data["login"], driver, driver_wait)
            AC.check_and_click_el(driver_wait, {
                "name": "ctl00_ContentPlaceHolder1_ucSq_aSkip",
                "click": 1
            }, {"name": "e-topnav-dash-in"})

            #print "logged in"

            AC.check_and_click_el(
                driver_wait, {
                    "name": "e-topnav-postjob-in",
                    "click": 1
                }, {"name": "ctl00_guB_ucPostProject_txtPT_txtPT_TextBox"})

            # print "clicked post"
            # time.sleep(5)

            els_data = element_list(input_data["data"])
            submit_btn = {
                "name": ".//button[text()='Continue']",
                "type": AC.constants.BY_XPATH
            }
            assert_page = {
                "next_page": "Post Project Confirmation"
            }  #"current_val" : "Post",

            AC.fill_form(driver, driver_wait, None, assert_page, els_data,
                         submit_btn, False)
            AC.check_and_click_el(
                driver_wait, {
                    "name": ".//a[@href='/d/freelancers/']",
                    "type": AC.constants.BY_XPATH,
                    "click": 1
                }, {"name": "ctl00_guB_txtKeyWord"})
            AC.logout(driver_wait)

            write_data = list()
            write_data.append(input_data["login"]['username'])
            write_data.extend(project_details)
            writer.writerow(write_data)
            time.sleep(210)

        except:
            log_writer.writerow([login[0], traceback.print_exc()])
            continue
        # if (i % 4) == 0:
        #     driver = AC.wait_ip_change(driver)

    of.close()
    driver.close()
    log.close()
from selenium.webdriver.support.ui import WebDriverWait
from general_utils_lib import account_creation as AC
from general_utils_lib import general_utils as GU
from general_utils_lib import read_files
import gmail_read, time, csv, itertools, random, traceback

driver = AC.initialize_tor()  #_browser()#AC.initialize_driver()
driver_wait = WebDriverWait(driver, 5)


def fill_elem_list(name, email, password):
    element_ids = list()
    #freelancer_dets = ["freelancer", "btnCreateAccountFreelancer_btnCreateAccountFreelancer_Button"]
    element_ids.append({
        "name": "employer",
        "value": None,
        "click": 1
    })  #freelancer - for working
    element_ids.append({
        "name": "ucRegistration_txtFullName_txtFullName_TextBox",
        "value": name,
        "click": 0
    })
    element_ids.append({
        "name": "ucRegistration_txtEmail_txtEmail_TextBox",
        "value": email,
        "click": 0
    })
    element_ids.append({
        "name": "ucRegistration_txtPassword_txtPassword_TextBox",
        "value": password,