コード例 #1
0
ファイル: launcher.py プロジェクト: bpjobin/Kijiji-Scraper
def main():
    # parse the arguments
    args = parse_args()

    if args.version:
        print("Version:\t\t{}".format(VERSION))
        exit(0)

    if args.init:
        init()
        exit(0)

    # Handle custom config file
    if args.conf:
        filepath = args.conf
    else:
        filename = ".kijiji_scraper/config.yaml"
        # Find the default config file from env varibles
        filepath = find_file(['HOME', 'XDG_CONFIG_HOME', 'APPDATA'],
                             [filename])
        if not filepath:
            # Find the default config file in the install directory
            abspath = os.path.abspath(__file__)
            dname = os.path.dirname(os.path.dirname(abspath))
            filepath = os.path.join(dname, "config.yaml")
            if not os.path.exists(filepath):
                filepath = None
    if filepath:
        # Get config values
        with open(filepath, "r") as config_file:
            email_config, mattermost_config, urls_to_scrape = yaml.safe_load_all(
                config_file)
        print("Loaded config file: %s" % filepath)
    else:
        print("No config file loaded")
        email_config, mattermost_config, urls_to_scrape = ({}, {}, {})
        # Do not try to send mail if no config file is loaded
        args.skipmail = True
        args.skip_mattermost = True

    # Initialize the KijijiScraper and email client
    ads_filepath = None
    if not args.all:
        if args.ads:
            ads_filepath = args.ads
        else:
            # Find default ads.json file in PWD directory for retro-compatibility
            if os.path.exists("ads.json"):
                ads_filepath = "ads.json"
            # Find default ads.json file in env variables
            if not ads_filepath:
                ads_filepath = find_file(
                    ['HOME', 'XDG_CONFIG_HOME', 'APPDATA'],
                    ['.kijiji_scraper/ads.json'],
                    default_content='{}',
                    create=True)
        print("Ads file: {}".format(ads_filepath))
    kijiji_scraper = KijijiScraper(ads_filepath)

    # Overwrite search URLs if specified
    if args.url:
        urls_to_scrape = [{'url': u} for u in args.url]

    # Nice quit if no URLs
    if not urls_to_scrape:
        print(
            "You must supply at least one URL to scrape. Use --url or configure URLs in the config file."
        )
        exit(-1)

    mm_client = None
    if not args.skip_mattermost:
        mm_client = MattermostClient(mattermost_config)

    # Scrape each url given in config file
    for url_dict in urls_to_scrape:
        url = url_dict.get("url")
        exclude_words = url_dict.get("exclude", [])

        print("Scraping: {}".format(url))
        if len(exclude_words):
            print("Excluding: " + ", ".join(exclude_words))

        kijiji_scraper.set_exclude_list(exclude_words)
        ads, email_title = kijiji_scraper.scrape_kijiji_for_ads(url)

        info_string = "Found %s new ads" % len(ads) \
            if len(ads) != 1 else "Found 1 new ad"
        print(info_string)

        # Print ads summary list
        sys.stdout.buffer.write(get_ads_summary(ads).encode('utf-8'))
        # Send email
        if not args.skipmail and len(ads):
            email_client = EmailClient(email_config)
            # Overwrite email recipients if specified
            if args.email:
                email_client.receiver = ','.join(args.email)
            email_client.mail_ads(ads, email_title)
            print("Email sent to %s" % email_client.receiver)
        else:
            print("No email sent")
        if mm_client and not args.skip_mattermost and len(ads):
            mm_client.post_ads(ads)
            print("Ads sent to Mattermost in channel {}".format(
                mattermost_config.get("mm_channel")))

    if ads_filepath:
        kijiji_scraper.save_ads()
コード例 #2
0
import os

from kijiji_scraper.kijiji_scraper import KijijiScraper
from kijiji_scraper.discord_client import DiscordClient

if __name__ == "__main__":
    args = sys.argv
    skip_flag = "-s" in args
    current_directory = os.path.dirname(os.path.realpath(__file__))

    # Get config values
    with open(current_directory + "/config.yaml", "r") as config_file:
        discord_config, urls_to_scrape = yaml.safe_load_all(config_file)

    # Initialize the KijijiScraper
    kijiji_scraper = KijijiScraper()
    discord_client = DiscordClient(discord_config)

    # Scrape each url given in config file
    for url_dict in urls_to_scrape:
        url = url_dict.get("url")
        exclude_words = url_dict.get("exclude", [])

        print(f"Scraping: {url}")
        if len(exclude_words):
            print("Excluding: " + ", ".join(exclude_words))

        kijiji_scraper.set_exclude_list(exclude_words)
        ads, discord_title = kijiji_scraper.scrape_kijiji_for_ads(url)

        info_string = f"Found {len(ads)} new ads\n" \