コード例 #1
0
class SeleniumAccess(object):
    """Analyze information of each scrapint of a job and contruct information for save"""

    logger = None
    config = None
    driver = None

    def __init__(self, config, level_log):
        self.logger = Logger(self.__class__.__name__, level_log).get()
        self.config = config

    def open_selenium(self):
        """open driver for scraping"""
        self.logger.debug("Open Selenium")
        self.driver = webdriver.Remote(\
                      command_executor=self.config.get("urlSelenium"),\
                      desired_capabilities=DesiredCapabilities.CHROME)
        self.logger.debug("IS selenium open %r", self.driver != None)

    def close_selenium(self):
        """close driver for scraping"""
        if self.driver != None:
            self.driver.stop_client()
            self.driver.close()
            self.driver = None
コード例 #2
0
class FindFlights(object):
    """find Flight"""

    seleniumaccess = None
    mongodbaccess = None
    logger = None
    holidays = None

    def __init__(self, config, mongo_db_access, level_log):
        self.logger = Logger(self.__class__.__name__, level_log).get()
        self.mongodbaccess = mongo_db_access
        self.seleniumaccess = SeleniumAccess(config, level_log)
        self.holidays = Holidays(level_log)
        self.logger.info("Inicio: %s", datetime.datetime.now())

    def get_flights(self, urls):
        """ doc to explain """
        self.logger.info("Process each url")
        result = {"save": 0, "warn": 0, "error": 0}

        self.seleniumaccess.open_selenium()
        driver = self.seleniumaccess.driver
        time.sleep(1)
        driver.get("http://www.google.com")
        time.sleep(1)

        for url in urls:
            accumulate_dic(result, self.url_to_flight(url, driver))

        self.seleniumaccess.close_selenium()
        return result

    def url_to_flight(self, url, driver):
        """process each url"""
        driver.get(url.get("url", "http://google.es"))
        try:
            precio_string = driver.find_element_by_class_name(
                "gws-flights-results__price").text
            #navigate
            #driver.find_element_by_class_name("gws-flights-results__more").click()
            #driver.find_element_by_xpath("//*[contains(text(), 'SELECT FLIGHT')]").click()
            if url.get("type", "") == "o":
                type_flight = driver\
                  .find_element_by_class_name("gws-flights-form__menu-label").text
            else:
                type_flight = driver\
                  .find_element_by_class_name("gws-flights-results__price-annotation").text

            url_insert = \
              {"dBusqueda":datetime.datetime.now(),  \
               "precio":float(precio_string[1:].replace(".", "").replace(", ", ".")), \
               "type": type_flight,\
               "horaS":driver.find_element_by_class_name("gws-flights-results__times").text,\
               "horaLl":"",\
               "company":driver.find_element_by_class_name("gws-flights-results__carriers").text,\
               "duracion":driver.find_element_by_class_name("gws-flights-results__duration").text, \
               "escalas":driver \
                .find_element_by_class_name("gws-flights-results__itinerary-stops").text, \
               "from":url.get("from", "XXX"), \
               "to":url.get("to", "XXX"), \
               "dateDirect":url.get("dateDirect", "XXX"), \
               "dateReturn":url.get("dateReturn", "YYY"), \
               "holidays": \
                 self.holidays.get_number_holidays(url.get("dateDirect", "XXX"), \
                                                   url.get("dateReturn", "YYY"))}
            self.logger.debug("Insert url elemento: %s", url_insert)
            self.mongodbaccess.insert("vuelos", url_insert)
            self.mongodbaccess.delete_one("urls", {"url": url.get("url", "")})
            print "from: {0}, to: {1}, dateDirect: {2}, dateReturn: {3}, price: {4}".format(\
                   url_insert["from"], url_insert["to"], \
                   url_insert["dateDirect"].strftime("%Y-%m-%d"), \
                   url_insert["dateReturn"].strftime("%Y-%m-%d"), \
                   url_insert["precio"])
        except StaleElementReferenceException as error_ref:
            print "****************************"
            print url
            print error_ref
            time.sleep(1)
            return {"save": 0, "warn": 0, "error": 1}
        except NoSuchElementException as error_no_such:
            print "****************************"
            print url
            print error_no_such
            time.sleep(1)
            return {"save": 0, "warn": 1, "error": 0}
        except TimeoutException as error_time_out:
            print "-- ERROR -- TimeOut *****************"
            print "****************************"
            print url
            print error_time_out
            return {"save": 0, "warn": 0, "error": 1}
        return {"save": 1, "warn": 0, "error": 0}
コード例 #3
0
class MongoDBAccess(object):
    """Class to access to MongoDB allow access and review connections"""

    db_access = None
    _client = None

    def __init__(self, config, levelLog):
        """Need a file where has got all parameters and level of Loggin"""
        self.logger = Logger(self.__class__.__name__, levelLog).get()
        self.logger.setLevel('INFO')

        try:
            self.logger.debug(config.get("url", ""))
            self._client = MongoClient(config.get("url", ""))
            self.db_access = self._client[config.get("nameDB")]
            self.logger.info("-- INFO -- DATA BASE CONECT OK")
        except ConfigurationError:
            self.logger.error("ConfigurationErr")
        except ConnectionFailure:
            self.logger.error("ConnectionFailure")
        except OperationFailure:
            self.logger.error("Authentication failure")

    def status(self):
        """Determinate True is connect or False if is not connect"""
        if self._client is None:
            return False
        try:
            self.logger.debug(self._client.server_info())
            return True
        except ConnectionFailure:
            self.logger.error("ConnectionFailure")
            return False
        except OperationFailure:
            self.logger.error("Authentication failure")
            return False

    def find_one(self, collection, query, sort=None):
        """Find one element only return a json element"""
        if self.status():
            sort = None if sort is None else sort.items()
            self.logger.info("Access to collection: %s, query %s", collection,
                             query)
            return self.db_access[collection].find_one(query, sort=sort)
        else:
            self.logger.error("Database Not INIT Find_one")
            return None

    def find(self, collection, query, sort=None, limite=None):
        """Find several elements is a cursor, atention for line in cursor is better"""
        if self.status():
            self.logger.info("Access to collection Multi: %s, query: %s, sort: %s, limit: %s",\
                collection, query, sort, limite)
            limite = 0 if limite is None else limite
            sort = None if sort is None else sort.items()
            return self.db_access[collection].find(query,
                                                   sort=sort,
                                                   limit=limite)
        else:
            self.logger.error("Database Not INIT Find")
            return None

    def update_one(self, collection, query, change, is_set="set"):
        """Update One return status of update"""
        if self.status():
            self.logger.info("Modify collection: %s, query: %s, modify: %s, set: %s",\
                collection, query, change, is_set)
            setdollar = "$" + is_set
            return self.db_access[collection].update_one(
                query, {setdollar: change})
        else:
            self.logger.error("Database Not INIT Update_one")
            return None

    def update_many(self, collection, query, change, is_set="set"):
        """Update Many return status of update"""
        if self.status():
            self.logger.info("Modify Many collection: %s, query: %s, modify: %s, set: %s",\
                collection, query, change, is_set)
            setdollar = "$" + is_set
            return self.db_access[collection].update_many(
                query, {setdollar: change})
        else:
            self.logger.error("Database Not INIT Update_one")
            return None

    def insert(self, collection, element):
        """Insert return status of insert"""
        if self.status():
            self.logger.debug("Insert collection: %s, data: %s", collection,
                              element)
            #control duplicated
            try:
                return self.db_access[collection].insert(element)
            except DuplicateKeyError:
                return None
        else:
            self.logger.error("Database Not INIT Find")
            return None

    def delete_one(self, collection, element):
        """delete One return status of delete"""
        if self.status():
            self.logger.info("Remove collection: %s, data: %s", collection,
                             element)
            return self.db_access[collection].delete_one(element)
        else:
            self.logger.error("Database Not INIT Find")
            return None

    def delete_many(self, collection, element):
        """delete return status of delete"""
        if self.status():
            self.logger.info("Remove collection: %s, data: %s", collection,
                             element)
            return self.db_access[collection].delete_many(element)
        else:
            self.logger.error("Database Not INIT Find")
            return None

    def aggregate(self, collection, element):
        """delete return status of delete"""
        if self.status():
            self.logger.info("Aggregate collection: %s, data: %s", collection,
                             element)
            return self.db_access[collection].aggregate(element)
        else:
            self.logger.error("Database Not INIT Find")
            return None

    def drop(self, collection):
        """Drop a collection return status of drop"""
        if self.status():
            self.logger.info("Drop collection: %s", collection)
            return self.db_access[collection].drop()
        else:
            self.logger.error("Database Not INIT Find")
            return None