コード例 #1
0
 def __init__(self):
     """
     Initializes database connection and sessionmaker.
     Creates deals table.
     """
     engine = db_connect()
     create_tables(engine)
     self.Session = sessionmaker(bind=engine)
コード例 #2
0
 def __init__(self):
         """
         Initializes database connection and sessionmaker.
         Creates deals table.
         """
         engine = db_connect()
         create_deals_table(engine)
         self.Session = sessionmaker(bind=engine)
         
         #clear data from table
         try:
             print('wiping databases')
             session = self.Session()
             session.query(Data).delete()
             session.commit()
             session.query(JsonData).delete()
             session.commit()
         except:
             session.rollback()
             raise
         finally:
             session.close()
コード例 #3
0
 def __init__(self):
     engine = db_connect()
     create_tables(engine)
     self.Session = sessionmaker(bind=engine)
コード例 #4
0
from string import Template
import os
import random
from sqlalchemy.orm import sessionmaker
import scraper.models as models

# Connect to the database to retrieve names
engine = models.db_connect()
Session = sessionmaker(bind=engine)

# Define template substitutions depending on the type
substitutions = dict()


def subs_measurement(session):
    measurements = session.query(models.Measurement).all()
    return random.choice(measurements).name


def subs_technology(session):
    technologies = list(models.technologies)
    for type in session.query(models.InstrumentType).all():
        technologies.append(type.name)
    return random.choice(technologies)


def subs_mission(session):
    missions = session.query(models.Mission).all()
    return random.choice(missions).name

コード例 #5
0
ファイル: monster.py プロジェクト: LordNoteworthy/workspider
    def execute_js():
        from scraper.models import Jobs, db_connect
        from selenium.webdriver.common.action_chains import ActionChains
        from sqlalchemy.orm import sessionmaker
        from selenium.common.exceptions import NoSuchElementException, UnexpectedAlertPresentException
        from selenium import webdriver
        import re
        import time

        # Get DB engine
        engine = db_connect()
        Session = sessionmaker()
        Session.configure(bind=engine)
        session = Session()

        # Iterate through job urls
        urls = []
        q = session.query(Jobs).filter((Jobs.url.like('http://offre-emploi.monster.fr%')) & (Jobs.processed == False)).all()
        for url in q:
            urls.append(url.url)

        # Init browser
        profile = webdriver.FirefoxProfile()
        profile.set_preference("browser.cache.disk.enable", False)
        profile.set_preference("browser.cache.memory.enable", False)
        profile.set_preference("browser.cache.offline.enable", False)
        profile.set_preference("network.http.use-cache", False)

        browser = webdriver.Firefox(profile)
        action = ActionChains(browser)

        # Login to user space
        browser.get("https://login.monster.fr/Login/SignIn", )
        browser.find_element_by_name("EmailAddress").send_keys(EMAIL)
        browser.find_element_by_name("Password").send_keys(PASSWORD)

        elem = browser.find_element_by_xpath("//*[@id=\"signInContent\"]/form/div[3]/input[1]")
        action.move_to_element(elem).click().perform()
        time.sleep(5)

        # for each url, click on 'postuler'
        link = "http://offre-emploi.monster.fr/Apply/Apply.aspx?JobID="
        for url in urls:
            apply_link = re.findall(r"\b\d{6}\w+", url)
            try:
                apply_link = link+apply_link[0]
                print "* Processing %s" % url
                browser.get(apply_link)
                if 'Vous postulez' in browser.page_source.encode("utf-8"):
                    browser.find_element_by_css_selector("#CoverLetter1_DropDownListLetters > option:nth-child(2)").click()
                    browser.find_element_by_css_selector("#rbAuthorizedNo0").click()

                    # Click on "POSTULER"
                    browser.find_element_by_id('btnSubmit').click()
                    time.sleep(5)

                else:
                    pass

            except NoSuchElementException:
                raise

            except UnexpectedAlertPresentException:
                alert = browser.switch_to_alert()
                #alert.dismiss()
                continue

            finally:
                # Update database
                session.query(Jobs).filter(Jobs.url == url).update({'processed': True})
                session.commit()
                session.close()

        browser.close()
コード例 #6
0
ファイル: monster.py プロジェクト: rogerconner/workspider
    def execute_js():
        from scraper.models import Jobs, db_connect
        from selenium.webdriver.common.action_chains import ActionChains
        from sqlalchemy.orm import sessionmaker
        from selenium.common.exceptions import NoSuchElementException, UnexpectedAlertPresentException
        from selenium import webdriver
        import re
        import time

        # Get DB engine
        engine = db_connect()
        Session = sessionmaker()
        Session.configure(bind=engine)
        session = Session()

        # Iterate through job urls
        urls = []
        q = session.query(
            Jobs).filter((Jobs.url.like('http://offre-emploi.monster.fr%'))
                         & (Jobs.processed == False)).all()
        for url in q:
            urls.append(url.url)

        # Init browser
        profile = webdriver.FirefoxProfile()
        profile.set_preference("browser.cache.disk.enable", False)
        profile.set_preference("browser.cache.memory.enable", False)
        profile.set_preference("browser.cache.offline.enable", False)
        profile.set_preference("network.http.use-cache", False)

        browser = webdriver.Firefox(profile)
        action = ActionChains(browser)

        # Login to user space
        browser.get("https://login.monster.fr/Login/SignIn", )
        browser.find_element_by_name("EmailAddress").send_keys(EMAIL)
        browser.find_element_by_name("Password").send_keys(PASSWORD)

        elem = browser.find_element_by_xpath(
            "//*[@id=\"signInContent\"]/form/div[3]/input[1]")
        action.move_to_element(elem).click().perform()
        time.sleep(5)

        # for each url, click on 'postuler'
        link = "http://offre-emploi.monster.fr/Apply/Apply.aspx?JobID="
        for url in urls:
            apply_link = re.findall(r"\b\d{6}\w+", url)
            try:
                apply_link = link + apply_link[0]
                print "* Processing %s" % url
                browser.get(apply_link)
                if 'Vous postulez' in browser.page_source.encode("utf-8"):
                    browser.find_element_by_css_selector(
                        "#CoverLetter1_DropDownListLetters > option:nth-child(2)"
                    ).click()
                    browser.find_element_by_css_selector(
                        "#rbAuthorizedNo0").click()

                    # Click on "POSTULER"
                    browser.find_element_by_id('btnSubmit').click()
                    time.sleep(5)

                else:
                    pass

            except NoSuchElementException:
                raise

            except UnexpectedAlertPresentException:
                alert = browser.switch_to_alert()
                #alert.dismiss()
                continue

            finally:
                # Update database
                session.query(Jobs).filter(Jobs.url == url).update(
                    {'processed': True})
                session.commit()
                session.close()

        browser.close()