def main(): """Main function to extract job offers.""" logging.basicConfig(format='%(asctime)s | %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) parser = argparse.ArgumentParser(description='Extract job offers.') parser.add_argument('-f','--filename', type=str, help=f'Filename where data will be saved. ' \ f'The date will be added at the end of it.\n' \ f'This argument is optional, the default value ' \ f'will be saved under the data directory with the '\ f'name: offers_XXXX-XX-XX.csv. Where XXXX-XX-XX '\ f'is the actual date.', default=DEFAULT_FILENAME) parser.add_argument('-wt','--wait_time', type=int, help=f'Time to wait between requests, used to prevent ' \ f'the overload of the server.\n' \ f'The default wait time is 5 seconds.', default=5) parser.add_argument('-nd', '--number_of_days', type=int, help=f"Set the number of days to collect data.", default=None) parser.add_argument('-td', '--today', action="store_true", help=f"Collect the data from today.") parser.add_argument('--testing', action="store_true", help=f"Test with a limited number of pages.") args = parser.parse_args() # Get ready the different constants to be used. today_date = datetime.date.today() filename = f'../data/partial_datasets/{args.filename}_{today_date}.csv' wait_time = args.wait_time requestor = RequestHandler() if args.today: logging.info(f"Only todays data: {today_date}") requestor.conf_query_todays_spanish_offers() elif args.number_of_days is not None: logging.info("Getting data from the last {args.number_of_days} days") requestor.conf_query_custom_days(args.number_of_days) filename = f'../data/partial_datasets/{args.filename}_last_{args.number_of_days}_days.csv' else: logging.info("All the data") requestor.conf_query_all_spanish_offers() logging.info(f'Data will be saved in: {filename}') export_data_to_file(filename, wait_time, requestor, args.testing) logging.info('Data extracted correctly.')