def main(): # parse the arguments args = parse_args() if args.version: print("Version:\t\t{}".format(VERSION)) exit(0) if args.init: init() exit(0) # Handle custom config file if args.conf: filepath = args.conf else: filename = ".kijiji_scraper/config.yaml" # Find the default config file from env varibles filepath = find_file(['HOME', 'XDG_CONFIG_HOME', 'APPDATA'], [filename]) if not filepath: # Find the default config file in the install directory abspath = os.path.abspath(__file__) dname = os.path.dirname(os.path.dirname(abspath)) filepath = os.path.join(dname, "config.yaml") if not os.path.exists(filepath): filepath = None if filepath: # Get config values with open(filepath, "r") as config_file: email_config, mattermost_config, urls_to_scrape = yaml.safe_load_all( config_file) print("Loaded config file: %s" % filepath) else: print("No config file loaded") email_config, mattermost_config, urls_to_scrape = ({}, {}, {}) # Do not try to send mail if no config file is loaded args.skipmail = True args.skip_mattermost = True # Initialize the KijijiScraper and email client ads_filepath = None if not args.all: if args.ads: ads_filepath = args.ads else: # Find default ads.json file in PWD directory for retro-compatibility if os.path.exists("ads.json"): ads_filepath = "ads.json" # Find default ads.json file in env variables if not ads_filepath: ads_filepath = find_file( ['HOME', 'XDG_CONFIG_HOME', 'APPDATA'], ['.kijiji_scraper/ads.json'], default_content='{}', create=True) print("Ads file: {}".format(ads_filepath)) kijiji_scraper = KijijiScraper(ads_filepath) # Overwrite search URLs if specified if args.url: urls_to_scrape = [{'url': u} for u in args.url] # Nice quit if no URLs if not urls_to_scrape: print( "You must supply at least one URL to scrape. Use --url or configure URLs in the config file." ) exit(-1) mm_client = None if not args.skip_mattermost: mm_client = MattermostClient(mattermost_config) # Scrape each url given in config file for url_dict in urls_to_scrape: url = url_dict.get("url") exclude_words = url_dict.get("exclude", []) print("Scraping: {}".format(url)) if len(exclude_words): print("Excluding: " + ", ".join(exclude_words)) kijiji_scraper.set_exclude_list(exclude_words) ads, email_title = kijiji_scraper.scrape_kijiji_for_ads(url) info_string = "Found %s new ads" % len(ads) \ if len(ads) != 1 else "Found 1 new ad" print(info_string) # Print ads summary list sys.stdout.buffer.write(get_ads_summary(ads).encode('utf-8')) # Send email if not args.skipmail and len(ads): email_client = EmailClient(email_config) # Overwrite email recipients if specified if args.email: email_client.receiver = ','.join(args.email) email_client.mail_ads(ads, email_title) print("Email sent to %s" % email_client.receiver) else: print("No email sent") if mm_client and not args.skip_mattermost and len(ads): mm_client.post_ads(ads) print("Ads sent to Mattermost in channel {}".format( mattermost_config.get("mm_channel"))) if ads_filepath: kijiji_scraper.save_ads()
import os from kijiji_scraper.kijiji_scraper import KijijiScraper from kijiji_scraper.discord_client import DiscordClient if __name__ == "__main__": args = sys.argv skip_flag = "-s" in args current_directory = os.path.dirname(os.path.realpath(__file__)) # Get config values with open(current_directory + "/config.yaml", "r") as config_file: discord_config, urls_to_scrape = yaml.safe_load_all(config_file) # Initialize the KijijiScraper kijiji_scraper = KijijiScraper() discord_client = DiscordClient(discord_config) # Scrape each url given in config file for url_dict in urls_to_scrape: url = url_dict.get("url") exclude_words = url_dict.get("exclude", []) print(f"Scraping: {url}") if len(exclude_words): print("Excluding: " + ", ".join(exclude_words)) kijiji_scraper.set_exclude_list(exclude_words) ads, discord_title = kijiji_scraper.scrape_kijiji_for_ads(url) info_string = f"Found {len(ads)} new ads\n" \