def main(): """ Main module code. """ # pylint: disable=locally-disabled,too-many-branches # Parse arguments args = parse_args() # Set logger if args.vv: logging.getLogger('').setLevel(logging.DEBUG) logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG) elif args.verbose: logging.getLogger('').setLevel(logging.INFO) # sqlalchemy INFO level is way too loud, just stick with WARNING logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING) else: logging.getLogger('').setLevel(logging.WARNING) logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING) # Init-config command if args.cmd == "init-config": flatisfy.config.init_config(args.output) sys.exit(0) else: # Load config if args.cmd == "build-data": # Data not yet built, do not use it in config checks config = flatisfy.config.load_config(args, check_with_data=False) else: config = flatisfy.config.load_config(args, check_with_data=True) if config is None: LOGGER.error("Invalid configuration. Exiting. " "Run init-config before if this is the first time " "you run Flatisfy.") sys.exit(1) # Purge command if args.cmd == "purge": cmds.purge_db(config) return # Build data files try: force = False if args.cmd == "build-data": force = True data.preprocess_data(config, force=force) LOGGER.info("Done building data!") if args.cmd == "build-data": sys.exit(0) except flatisfy.exceptions.DataBuildError as exc: LOGGER.error("%s", exc) sys.exit(1) # Fetch command if args.cmd == "fetch": # Fetch and filter flats list fetched_flats = fetch.fetch_flats(config) fetched_flats = cmds.filter_fetched_flats(config, fetched_flats=fetched_flats, fetch_details=True) # Sort by cost fetched_flats = { k: tools.sort_list_of_dicts_by(v["new"], "cost") for k, v in fetched_flats.items() } print(tools.pretty_json(fetched_flats)) return # Filter command elif args.cmd == "filter": # Load and filter flats list if args.input: fetched_flats = fetch.load_flats_from_file(args.input, config) fetched_flats = cmds.filter_fetched_flats( config, fetched_flats=fetched_flats, fetch_details=False) # Sort by cost fetched_flats = { k: tools.sort_list_of_dicts_by(v["new"], "cost") for k, v in fetched_flats.items() } # Output to stdout print(tools.pretty_json(fetched_flats)) else: cmds.import_and_filter(config, load_from_db=True) return # Import command elif args.cmd == "import": cmds.import_and_filter(config, load_from_db=False) return # Serve command elif args.cmd == "serve": cmds.serve(config) return
def import_and_filter(config, load_from_db=False, new_only=False): """ Fetch the available flats list. Then, filter it according to criteria. Finally, store it in the database. :param config: A config dict. :param load_from_db: Whether to load flats from database or fetch them using Woob. :return: ``None``. """ # Fetch and filter flats list past_flats = fetch.load_flats_from_db(config) if load_from_db: fetched_flats = past_flats else: fetched_flats = fetch.fetch_flats(config) # Do not fetch additional details if we loaded data from the db. flats_by_status = filter_fetched_flats( config, fetched_flats=fetched_flats, fetch_details=(not load_from_db), past_flats=past_flats if new_only else {}, ) # Create database connection get_session = database.init_db(config["database"], config["search_index"]) new_flats = [] result = [] LOGGER.info("Merging fetched flats in database...") # Flatten the flats_by_status dict flatten_flats_by_status = collections.defaultdict(list) for flats in flats_by_status.values(): for status, flats_list in flats.items(): flatten_flats_by_status[status].extend(flats_list) with get_session() as session: # Set is_expired to true for all existing flats. # This will be set back to false if we find them during importing. for flat in session.query(flat_model.Flat).all(): flat.is_expired = True for status, flats_list in flatten_flats_by_status.items(): # Build SQLAlchemy Flat model objects for every available flat flats_objects = {flat_dict["id"]: flat_model.Flat.from_dict(flat_dict) for flat_dict in flats_list} if flats_objects: # If there are some flats, try to merge them with the ones in # db existing_flats_queries = session.query(flat_model.Flat).filter( flat_model.Flat.id.in_(flats_objects.keys()) ) for each in existing_flats_queries.all(): # For each flat to merge, take care not to overwrite the # status if the user defined it flat_object = flats_objects[each.id] if each.status in flat_model.AUTOMATED_STATUSES: flat_object.status = getattr(flat_model.FlatStatus, status) else: flat_object.status = each.status # Every flat we fetched isn't expired flat_object.is_expired = False # For each flat already in the db, merge it (UPDATE) # instead of adding it session.merge(flats_objects.pop(each.id)) # For any other flat, it is not already in the database, so we can # just set the status field without worrying for flat in flats_objects.values(): flat.status = getattr(flat_model.FlatStatus, status) if flat.status == flat_model.FlatStatus.new: new_flats.append(flat) result.append(flat.id) session.add_all(flats_objects.values()) if config["send_email"]: email.send_notification(config, new_flats) LOGGER.info(f"Found {len(result)} new flats.") # Touch a file to indicate last update timestamp ts_file = os.path.join(config["data_directory"], "timestamp") with open(ts_file, "w"): os.utime(ts_file, None) LOGGER.info("Done!") return result
def import_and_filter(config, load_from_db=False): """ Fetch the available flats list. Then, filter it according to criteria. Finally, store it in the database. :param config: A config dict. :param load_from_db: Whether to load flats from database or fetch them using WebOOB. :return: ``None``. """ # Fetch and filter flats list if load_from_db: fetched_flats = fetch.load_flats_from_db(config) else: fetched_flats = fetch.fetch_flats(config) # Do not fetch additional details if we loaded data from the db. flats_by_status = filter_fetched_flats(config, fetched_flats=fetched_flats, fetch_details=(not load_from_db)) # Create database connection get_session = database.init_db(config["database"], config["search_index"]) new_flats = [] LOGGER.info("Merging fetched flats in database...") # Flatten the flats_by_status dict flatten_flats_by_status = collections.defaultdict(list) for flats in flats_by_status.values(): for status, flats_list in flats.items(): flatten_flats_by_status[status].extend(flats_list) with get_session() as session: for status, flats_list in flatten_flats_by_status.items(): # Build SQLAlchemy Flat model objects for every available flat flats_objects = { flat_dict["id"]: flat_model.Flat.from_dict(flat_dict) for flat_dict in flats_list } if flats_objects: # If there are some flats, try to merge them with the ones in # db existing_flats_queries = session.query(flat_model.Flat).filter( flat_model.Flat.id.in_(flats_objects.keys())) for each in existing_flats_queries.all(): # For each flat to merge, take care not to overwrite the # status if the user defined it flat_object = flats_objects[each.id] if each.status in flat_model.AUTOMATED_STATUSES: flat_object.status = getattr(flat_model.FlatStatus, status) else: flat_object.status = each.status # For each flat already in the db, merge it (UPDATE) # instead of adding it session.merge(flats_objects.pop(each.id)) # For any other flat, it is not already in the database, so we can # just set the status field without worrying for flat in flats_objects.values(): flat.status = getattr(flat_model.FlatStatus, status) if flat.status == flat_model.FlatStatus.new: new_flats.append(flat) session.add_all(flats_objects.values()) if config["send_email"]: email.send_notification(config, new_flats) LOGGER.info("Done!")