def __init__(self): """ Initializes database connection and sessionmaker. Creates deals table. """ engine = db_connect() create_tables(engine) self.Session = sessionmaker(bind=engine)
def __init__(self): """ Initializes database connection and sessionmaker. Creates deals table. """ engine = db_connect() create_deals_table(engine) self.Session = sessionmaker(bind=engine) #clear data from table try: print('wiping databases') session = self.Session() session.query(Data).delete() session.commit() session.query(JsonData).delete() session.commit() except: session.rollback() raise finally: session.close()
def __init__(self): engine = db_connect() create_tables(engine) self.Session = sessionmaker(bind=engine)
from string import Template import os import random from sqlalchemy.orm import sessionmaker import scraper.models as models # Connect to the database to retrieve names engine = models.db_connect() Session = sessionmaker(bind=engine) # Define template substitutions depending on the type substitutions = dict() def subs_measurement(session): measurements = session.query(models.Measurement).all() return random.choice(measurements).name def subs_technology(session): technologies = list(models.technologies) for type in session.query(models.InstrumentType).all(): technologies.append(type.name) return random.choice(technologies) def subs_mission(session): missions = session.query(models.Mission).all() return random.choice(missions).name
def execute_js(): from scraper.models import Jobs, db_connect from selenium.webdriver.common.action_chains import ActionChains from sqlalchemy.orm import sessionmaker from selenium.common.exceptions import NoSuchElementException, UnexpectedAlertPresentException from selenium import webdriver import re import time # Get DB engine engine = db_connect() Session = sessionmaker() Session.configure(bind=engine) session = Session() # Iterate through job urls urls = [] q = session.query(Jobs).filter((Jobs.url.like('http://offre-emploi.monster.fr%')) & (Jobs.processed == False)).all() for url in q: urls.append(url.url) # Init browser profile = webdriver.FirefoxProfile() profile.set_preference("browser.cache.disk.enable", False) profile.set_preference("browser.cache.memory.enable", False) profile.set_preference("browser.cache.offline.enable", False) profile.set_preference("network.http.use-cache", False) browser = webdriver.Firefox(profile) action = ActionChains(browser) # Login to user space browser.get("https://login.monster.fr/Login/SignIn", ) browser.find_element_by_name("EmailAddress").send_keys(EMAIL) browser.find_element_by_name("Password").send_keys(PASSWORD) elem = browser.find_element_by_xpath("//*[@id=\"signInContent\"]/form/div[3]/input[1]") action.move_to_element(elem).click().perform() time.sleep(5) # for each url, click on 'postuler' link = "http://offre-emploi.monster.fr/Apply/Apply.aspx?JobID=" for url in urls: apply_link = re.findall(r"\b\d{6}\w+", url) try: apply_link = link+apply_link[0] print "* Processing %s" % url browser.get(apply_link) if 'Vous postulez' in browser.page_source.encode("utf-8"): browser.find_element_by_css_selector("#CoverLetter1_DropDownListLetters > option:nth-child(2)").click() browser.find_element_by_css_selector("#rbAuthorizedNo0").click() # Click on "POSTULER" browser.find_element_by_id('btnSubmit').click() time.sleep(5) else: pass except NoSuchElementException: raise except UnexpectedAlertPresentException: alert = browser.switch_to_alert() #alert.dismiss() continue finally: # Update database session.query(Jobs).filter(Jobs.url == url).update({'processed': True}) session.commit() session.close() browser.close()
def execute_js(): from scraper.models import Jobs, db_connect from selenium.webdriver.common.action_chains import ActionChains from sqlalchemy.orm import sessionmaker from selenium.common.exceptions import NoSuchElementException, UnexpectedAlertPresentException from selenium import webdriver import re import time # Get DB engine engine = db_connect() Session = sessionmaker() Session.configure(bind=engine) session = Session() # Iterate through job urls urls = [] q = session.query( Jobs).filter((Jobs.url.like('http://offre-emploi.monster.fr%')) & (Jobs.processed == False)).all() for url in q: urls.append(url.url) # Init browser profile = webdriver.FirefoxProfile() profile.set_preference("browser.cache.disk.enable", False) profile.set_preference("browser.cache.memory.enable", False) profile.set_preference("browser.cache.offline.enable", False) profile.set_preference("network.http.use-cache", False) browser = webdriver.Firefox(profile) action = ActionChains(browser) # Login to user space browser.get("https://login.monster.fr/Login/SignIn", ) browser.find_element_by_name("EmailAddress").send_keys(EMAIL) browser.find_element_by_name("Password").send_keys(PASSWORD) elem = browser.find_element_by_xpath( "//*[@id=\"signInContent\"]/form/div[3]/input[1]") action.move_to_element(elem).click().perform() time.sleep(5) # for each url, click on 'postuler' link = "http://offre-emploi.monster.fr/Apply/Apply.aspx?JobID=" for url in urls: apply_link = re.findall(r"\b\d{6}\w+", url) try: apply_link = link + apply_link[0] print "* Processing %s" % url browser.get(apply_link) if 'Vous postulez' in browser.page_source.encode("utf-8"): browser.find_element_by_css_selector( "#CoverLetter1_DropDownListLetters > option:nth-child(2)" ).click() browser.find_element_by_css_selector( "#rbAuthorizedNo0").click() # Click on "POSTULER" browser.find_element_by_id('btnSubmit').click() time.sleep(5) else: pass except NoSuchElementException: raise except UnexpectedAlertPresentException: alert = browser.switch_to_alert() #alert.dismiss() continue finally: # Update database session.query(Jobs).filter(Jobs.url == url).update( {'processed': True}) session.commit() session.close() browser.close()