Exemple #1
0
def main():
    """
    Main module code.
    """
    # pylint: disable=locally-disabled,too-many-branches
    # Parse arguments
    args = parse_args()

    # Set logger
    if args.vv:
        logging.getLogger('').setLevel(logging.DEBUG)
        logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG)
    elif args.verbose:
        logging.getLogger('').setLevel(logging.INFO)
        # sqlalchemy INFO level is way too loud, just stick with WARNING
        logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)
    else:
        logging.getLogger('').setLevel(logging.WARNING)
        logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)

    # Init-config command
    if args.cmd == "init-config":
        flatisfy.config.init_config(args.output)
        sys.exit(0)
    else:
        # Load config
        if args.cmd == "build-data":
            # Data not yet built, do not use it in config checks
            config = flatisfy.config.load_config(args, check_with_data=False)
        else:
            config = flatisfy.config.load_config(args, check_with_data=True)
        if config is None:
            LOGGER.error("Invalid configuration. Exiting. "
                         "Run init-config before if this is the first time "
                         "you run Flatisfy.")
            sys.exit(1)

    # Purge command
    if args.cmd == "purge":
        cmds.purge_db(config)
        return

    # Build data files
    try:
        force = False
        if args.cmd == "build-data":
            force = True

        data.preprocess_data(config, force=force)
        LOGGER.info("Done building data!")

        if args.cmd == "build-data":
            sys.exit(0)
    except flatisfy.exceptions.DataBuildError as exc:
        LOGGER.error("%s", exc)
        sys.exit(1)

    # Fetch command
    if args.cmd == "fetch":
        # Fetch and filter flats list
        fetched_flats = fetch.fetch_flats(config)
        fetched_flats = cmds.filter_fetched_flats(config,
                                                  fetched_flats=fetched_flats,
                                                  fetch_details=True)
        # Sort by cost
        fetched_flats = {
            k: tools.sort_list_of_dicts_by(v["new"], "cost")
            for k, v in fetched_flats.items()
        }

        print(tools.pretty_json(fetched_flats))
        return
    # Filter command
    elif args.cmd == "filter":
        # Load and filter flats list
        if args.input:
            fetched_flats = fetch.load_flats_from_file(args.input, config)

            fetched_flats = cmds.filter_fetched_flats(
                config, fetched_flats=fetched_flats, fetch_details=False)

            # Sort by cost
            fetched_flats = {
                k: tools.sort_list_of_dicts_by(v["new"], "cost")
                for k, v in fetched_flats.items()
            }

            # Output to stdout
            print(tools.pretty_json(fetched_flats))
        else:
            cmds.import_and_filter(config, load_from_db=True)
        return
    # Import command
    elif args.cmd == "import":
        cmds.import_and_filter(config, load_from_db=False)
        return
    # Serve command
    elif args.cmd == "serve":
        cmds.serve(config)
        return
Exemple #2
0
def import_and_filter(config, load_from_db=False, new_only=False):
    """
    Fetch the available flats list. Then, filter it according to criteria.
    Finally, store it in the database.

    :param config: A config dict.
    :param load_from_db: Whether to load flats from database or fetch them
        using Woob.
    :return: ``None``.
    """
    # Fetch and filter flats list
    past_flats = fetch.load_flats_from_db(config)
    if load_from_db:
        fetched_flats = past_flats
    else:
        fetched_flats = fetch.fetch_flats(config)
    # Do not fetch additional details if we loaded data from the db.
    flats_by_status = filter_fetched_flats(
        config,
        fetched_flats=fetched_flats,
        fetch_details=(not load_from_db),
        past_flats=past_flats if new_only else {},
    )
    # Create database connection
    get_session = database.init_db(config["database"], config["search_index"])

    new_flats = []
    result = []

    LOGGER.info("Merging fetched flats in database...")
    # Flatten the flats_by_status dict
    flatten_flats_by_status = collections.defaultdict(list)
    for flats in flats_by_status.values():
        for status, flats_list in flats.items():
            flatten_flats_by_status[status].extend(flats_list)

    with get_session() as session:
        # Set is_expired to true for all existing flats.
        # This will be set back to false if we find them during importing.
        for flat in session.query(flat_model.Flat).all():
            flat.is_expired = True

        for status, flats_list in flatten_flats_by_status.items():
            # Build SQLAlchemy Flat model objects for every available flat
            flats_objects = {flat_dict["id"]: flat_model.Flat.from_dict(flat_dict) for flat_dict in flats_list}

            if flats_objects:
                # If there are some flats, try to merge them with the ones in
                # db
                existing_flats_queries = session.query(flat_model.Flat).filter(
                    flat_model.Flat.id.in_(flats_objects.keys())
                )
                for each in existing_flats_queries.all():
                    # For each flat to merge, take care not to overwrite the
                    # status if the user defined it
                    flat_object = flats_objects[each.id]
                    if each.status in flat_model.AUTOMATED_STATUSES:
                        flat_object.status = getattr(flat_model.FlatStatus, status)
                    else:
                        flat_object.status = each.status

                    # Every flat we fetched isn't expired
                    flat_object.is_expired = False

                    # For each flat already in the db, merge it (UPDATE)
                    # instead of adding it
                    session.merge(flats_objects.pop(each.id))

            # For any other flat, it is not already in the database, so we can
            # just set the status field without worrying
            for flat in flats_objects.values():
                flat.status = getattr(flat_model.FlatStatus, status)
                if flat.status == flat_model.FlatStatus.new:
                    new_flats.append(flat)
                    result.append(flat.id)

            session.add_all(flats_objects.values())

        if config["send_email"]:
            email.send_notification(config, new_flats)

    LOGGER.info(f"Found {len(result)} new flats.")

    # Touch a file to indicate last update timestamp
    ts_file = os.path.join(config["data_directory"], "timestamp")
    with open(ts_file, "w"):
        os.utime(ts_file, None)

    LOGGER.info("Done!")
    return result
Exemple #3
0
def import_and_filter(config, load_from_db=False):
    """
    Fetch the available flats list. Then, filter it according to criteria.
    Finally, store it in the database.

    :param config: A config dict.
    :param load_from_db: Whether to load flats from database or fetch them
        using WebOOB.
    :return: ``None``.
    """
    # Fetch and filter flats list
    if load_from_db:
        fetched_flats = fetch.load_flats_from_db(config)
    else:
        fetched_flats = fetch.fetch_flats(config)
    # Do not fetch additional details if we loaded data from the db.
    flats_by_status = filter_fetched_flats(config,
                                           fetched_flats=fetched_flats,
                                           fetch_details=(not load_from_db))
    # Create database connection
    get_session = database.init_db(config["database"], config["search_index"])

    new_flats = []

    LOGGER.info("Merging fetched flats in database...")
    # Flatten the flats_by_status dict
    flatten_flats_by_status = collections.defaultdict(list)
    for flats in flats_by_status.values():
        for status, flats_list in flats.items():
            flatten_flats_by_status[status].extend(flats_list)

    with get_session() as session:
        for status, flats_list in flatten_flats_by_status.items():
            # Build SQLAlchemy Flat model objects for every available flat
            flats_objects = {
                flat_dict["id"]: flat_model.Flat.from_dict(flat_dict)
                for flat_dict in flats_list
            }

            if flats_objects:
                # If there are some flats, try to merge them with the ones in
                # db
                existing_flats_queries = session.query(flat_model.Flat).filter(
                    flat_model.Flat.id.in_(flats_objects.keys()))
                for each in existing_flats_queries.all():
                    # For each flat to merge, take care not to overwrite the
                    # status if the user defined it
                    flat_object = flats_objects[each.id]
                    if each.status in flat_model.AUTOMATED_STATUSES:
                        flat_object.status = getattr(flat_model.FlatStatus,
                                                     status)
                    else:
                        flat_object.status = each.status
                    # For each flat already in the db, merge it (UPDATE)
                    # instead of adding it
                    session.merge(flats_objects.pop(each.id))

            # For any other flat, it is not already in the database, so we can
            # just set the status field without worrying
            for flat in flats_objects.values():
                flat.status = getattr(flat_model.FlatStatus, status)
                if flat.status == flat_model.FlatStatus.new:
                    new_flats.append(flat)

            session.add_all(flats_objects.values())

        if config["send_email"]:
            email.send_notification(config, new_flats)

    LOGGER.info("Done!")