class CleanFlights(object): """clean Flights""" mongodbaccess = None logger = None def __init__(self, mongo_db_access, level_log): self.logger = Logger(self.__class__.__name__, level_log).get() self.mongodbaccess = mongo_db_access self.logger.info("Inicio: %s", datetime.datetime.now()) def clean(self): """ clean Process """ self.logger.info("++INFO-- CLEAN FASE I") result = {"total":0} for vuelo in self.mongodbaccess.find("vuelos", {}): result = self.analize_each_flight(result, vuelo) return result def analize_each_flight(self, result, vuelo): """each flight analyze each rule""" apply(lambda rule: accumulate_dic(result, rule(vuelo)), self.create_all_rules()) result["total"] += 1 return result def create_all_rules(self): """ insert all rules created for run all""" return [self.rule_older_than_15days] def rule_older_than_15days(self, elemento): """First Rule: move all flights from vuelos to vuelosOld older than 15 days """ date15 = datetime.datetime.now()-datetime.timedelta(days=15) deleted = 0 inserted_old = 0 if elemento.get("dateDirect", datetime.datetime) < date15: if self.mongodbaccess.insert("vuelosOld", elemento) is not None: inserted_old = 1 self.logger.error("Error vuelo not insert backup but delete %s", elemento) self.mongodbaccess.delete_one("vuelos", {"_id":elemento.get("_id")}) deleted = 1 return {"deleted":deleted, "inserted_old":inserted_old}
class MongoDBAccess(object): """Class to access to MongoDB allow access and review connections""" db_access = None _client = None def __init__(self, config, levelLog): """Need a file where has got all parameters and level of Loggin""" self.logger = Logger(self.__class__.__name__, levelLog).get() self.logger.setLevel('INFO') try: self.logger.debug(config.get("url", "")) self._client = MongoClient(config.get("url", "")) self.db_access = self._client[config.get("nameDB")] self.logger.info("-- INFO -- DATA BASE CONECT OK") except ConfigurationError: self.logger.error("ConfigurationErr") except ConnectionFailure: self.logger.error("ConnectionFailure") except OperationFailure: self.logger.error("Authentication failure") def status(self): """Determinate True is connect or False if is not connect""" if self._client is None: return False try: self.logger.debug(self._client.server_info()) return True except ConnectionFailure: self.logger.error("ConnectionFailure") return False except OperationFailure: self.logger.error("Authentication failure") return False def find_one(self, collection, query, sort=None): """Find one element only return a json element""" if self.status(): sort = None if sort is None else sort.items() self.logger.info("Access to collection: %s, query %s", collection, query) return self.db_access[collection].find_one(query, sort=sort) else: self.logger.error("Database Not INIT Find_one") return None def find(self, collection, query, sort=None, limite=None): """Find several elements is a cursor, atention for line in cursor is better""" if self.status(): self.logger.info("Access to collection Multi: %s, query: %s, sort: %s, limit: %s",\ collection, query, sort, limite) limite = 0 if limite is None else limite sort = None if sort is None else sort.items() return self.db_access[collection].find(query, sort=sort, limit=limite) else: self.logger.error("Database Not INIT Find") return None def update_one(self, collection, query, change, is_set="set"): """Update One return status of update""" if self.status(): self.logger.info("Modify collection: %s, query: %s, modify: %s, set: %s",\ collection, query, change, is_set) setdollar = "$" + is_set return self.db_access[collection].update_one( query, {setdollar: change}) else: self.logger.error("Database Not INIT Update_one") return None def update_many(self, collection, query, change, is_set="set"): """Update Many return status of update""" if self.status(): self.logger.info("Modify Many collection: %s, query: %s, modify: %s, set: %s",\ collection, query, change, is_set) setdollar = "$" + is_set return self.db_access[collection].update_many( query, {setdollar: change}) else: self.logger.error("Database Not INIT Update_one") return None def insert(self, collection, element): """Insert return status of insert""" if self.status(): self.logger.debug("Insert collection: %s, data: %s", collection, element) #control duplicated try: return self.db_access[collection].insert(element) except DuplicateKeyError: return None else: self.logger.error("Database Not INIT Find") return None def delete_one(self, collection, element): """delete One return status of delete""" if self.status(): self.logger.info("Remove collection: %s, data: %s", collection, element) return self.db_access[collection].delete_one(element) else: self.logger.error("Database Not INIT Find") return None def delete_many(self, collection, element): """delete return status of delete""" if self.status(): self.logger.info("Remove collection: %s, data: %s", collection, element) return self.db_access[collection].delete_many(element) else: self.logger.error("Database Not INIT Find") return None def aggregate(self, collection, element): """delete return status of delete""" if self.status(): self.logger.info("Aggregate collection: %s, data: %s", collection, element) return self.db_access[collection].aggregate(element) else: self.logger.error("Database Not INIT Find") return None def drop(self, collection): """Drop a collection return status of drop""" if self.status(): self.logger.info("Drop collection: %s", collection) return self.db_access[collection].drop() else: self.logger.error("Database Not INIT Find") return None
class FindFlights(object): """find Flight""" seleniumaccess = None mongodbaccess = None logger = None holidays = None def __init__(self, config, mongo_db_access, level_log): self.logger = Logger(self.__class__.__name__, level_log).get() self.mongodbaccess = mongo_db_access self.seleniumaccess = SeleniumAccess(config, level_log) self.holidays = Holidays(level_log) self.logger.info("Inicio: %s", datetime.datetime.now()) def get_flights(self, urls): """ doc to explain """ self.logger.info("Process each url") result = {"save": 0, "warn": 0, "error": 0} self.seleniumaccess.open_selenium() driver = self.seleniumaccess.driver time.sleep(1) driver.get("http://www.google.com") time.sleep(1) for url in urls: accumulate_dic(result, self.url_to_flight(url, driver)) self.seleniumaccess.close_selenium() return result def url_to_flight(self, url, driver): """process each url""" driver.get(url.get("url", "http://google.es")) try: precio_string = driver.find_element_by_class_name( "gws-flights-results__price").text #navigate #driver.find_element_by_class_name("gws-flights-results__more").click() #driver.find_element_by_xpath("//*[contains(text(), 'SELECT FLIGHT')]").click() if url.get("type", "") == "o": type_flight = driver\ .find_element_by_class_name("gws-flights-form__menu-label").text else: type_flight = driver\ .find_element_by_class_name("gws-flights-results__price-annotation").text url_insert = \ {"dBusqueda":datetime.datetime.now(), \ "precio":float(precio_string[1:].replace(".", "").replace(", ", ".")), \ "type": type_flight,\ "horaS":driver.find_element_by_class_name("gws-flights-results__times").text,\ "horaLl":"",\ "company":driver.find_element_by_class_name("gws-flights-results__carriers").text,\ "duracion":driver.find_element_by_class_name("gws-flights-results__duration").text, \ "escalas":driver \ .find_element_by_class_name("gws-flights-results__itinerary-stops").text, \ "from":url.get("from", "XXX"), \ "to":url.get("to", "XXX"), \ "dateDirect":url.get("dateDirect", "XXX"), \ "dateReturn":url.get("dateReturn", "YYY"), \ "holidays": \ self.holidays.get_number_holidays(url.get("dateDirect", "XXX"), \ url.get("dateReturn", "YYY"))} self.logger.debug("Insert url elemento: %s", url_insert) self.mongodbaccess.insert("vuelos", url_insert) self.mongodbaccess.delete_one("urls", {"url": url.get("url", "")}) print "from: {0}, to: {1}, dateDirect: {2}, dateReturn: {3}, price: {4}".format(\ url_insert["from"], url_insert["to"], \ url_insert["dateDirect"].strftime("%Y-%m-%d"), \ url_insert["dateReturn"].strftime("%Y-%m-%d"), \ url_insert["precio"]) except StaleElementReferenceException as error_ref: print "****************************" print url print error_ref time.sleep(1) return {"save": 0, "warn": 0, "error": 1} except NoSuchElementException as error_no_such: print "****************************" print url print error_no_such time.sleep(1) return {"save": 0, "warn": 1, "error": 0} except TimeoutException as error_time_out: print "-- ERROR -- TimeOut *****************" print "****************************" print url print error_time_out return {"save": 0, "warn": 0, "error": 1} return {"save": 1, "warn": 0, "error": 0}
class Vuelos(object): """find Flight""" level_log = None config = None mongodbaccess = None logger = None def __init__(self, file_config, level_log): self.level_log = level_log self.logger = Logger(self.__class__.__name__, level_log).get() try: self.config = json.loads(open(file_config, "r").read()) self.mongodbaccess = MongoDBAccess(self.config, level_log) except IOError: self.logger.error("File Error: %s", file_config) self.config = {} self.mongodbaccess = MongoDBAccess({}, level_log) self.logger.info("Inicio: %s", datetime.datetime.now()) def ejecutar(self, nivel): """ run load process """ print "++ INFO ++ MODULO PRINCIPAL MODO DE EJECUCION: {0}".format( nivel) if nivel == "1": print "-- INFO -- MODO 1 duro ejecuta y limpia los datos del dia" #proceso duro vaciamos informacion y empezamos print "++ INFO ++ Vaciamos informacion del dia" print "-- INFO -- dia: {0}".format(today()) borrados = self.vaciar_dia() print "-- INFO -- vaciamos informacion -- Vuelos borrados del dia: {0}"\ .format(borrados.deleted_count) urls = BuildUrls(self.mongodbaccess, self.level_log).build_urls() print "-- INFO -- construir urls -- numero de URLS: {0}".format( urls) else: print "-- INFO -- MODO 0 suave solo si hay datos que ejecutar" #proceso soft miramos si hay algo que procesar #si no hay nada que procesar o el dia no se ha ejecutado. if self.return_urls().count() == 0: #no hay nada que ejecutar if self.find_last_day() < today(): # ultimo dia es anterior a hoy a las 12... no se ha procesado print "++ WARN ++ 1.1 PRIMERA VEZ DEL DIA creamos las URLS y seguimos" urls = BuildUrls(self.mongodbaccess, self.level_log).build_urls() print "-- INFO -- construir urls -- numero de URLS: {0}".format( urls) else: # ultimo dia posterior hoy a las 12... esta todo Ok print "++ WARN ++ 1.2 SE HA PROCESADO TODO Y NO HAY NADA QUE HACER" else: if self.find_last_day() < today(): # prblemas en el paraiso ayer la cosa no fue bien. Reiniciamos y procesamos print "** ERROR ** 2.1 AYER NO SE EJECUTARON TODOS LOS VUELOS" print "** ERROR ** vuelos pendientes {0}".format( self.return_urls().count()) self.logger.error("AYER no se ejecutaron todos los vuelos") urls = BuildUrls(self.mongodbaccess, self.level_log).build_urls() print "-- INFO -- construir urls -- numero de URLS: {0}".format( urls) else: #hay cosas que ejecutar print "++ WARN ++ 2.2 HA HABIDO UNA CANCELACION y el "\ +"SISTEMA SIGUE DESDE ESE PUNTO" print "++ WARN ++ vuelos pendientes {0}".format( self.return_urls().count()) self.logger.error( "Ha habido una cancelacion y se sigue desde ese punto") result = FindFlights(self.config, self.mongodbaccess, self.level_log)\ .get_flights(self.return_urls()) print "++ INFO -- TOTAL PROCESO, Save: {0}".format( result.get("save", 0)) print "++ INFO -- TOTAL PROCESO, errores sin Informacion: {0}".format( result.get("warn", 0)) print "++ INFO -- TOTAL PROCESO, errores NO ENCONTRADO: {0}".format( result.get("error", 0)) def vaciar_dia(self): """ delete all info of day """ return self.mongodbaccess.delete_many("vuelos", {"dBusqueda": { "$gt": today() }}) def return_urls(self): """ doc to explain """ return self.mongodbaccess.find("urls", {}) def find_last_day(self): """ doc to explain """ print "++ INFO ++ find_last_day" if self.mongodbaccess.find_one("vuelos", {}, sort={"dBusqueda": -1 }) is None: return datetime.datetime(2000, 01, 01) else: return self.mongodbaccess.find_one("vuelos", {}, sort={"dBusqueda":-1})\ .get("dBusqueda", "")