예제 #1
0
def download(workers, config, log_level):
    RedditConfig.initialize(config)
    print(RedditConfig())

    luigi.configuration.get_config().set(
        'resources', 'max_concurrent_downloads',
        str(RedditConfig().max_concurrent_downloads))

    result = luigi.interface.build(
        [
            DownloadRawFile(d, ft)
            for d, ft in RedditConfig().make_all_dates_filetypes()
        ],
        workers=workers,
        local_scheduler=True,
        log_level=log_level,
        detailed_summary=True,
    )
    print(result.summary_text)
예제 #2
0
def generate(workers, config, log_level):
    RedditConfig.initialize(config)
    print(RedditConfig())

    luigi.configuration.get_config().set(
        'resources', 'max_concurrent_downloads',
        str(RedditConfig().max_concurrent_downloads))
    luigi.configuration.get_config().set(
        'resources', 'max_concurrent_build',
        str(RedditConfig().max_concurrent_build))
    luigi.configuration.get_config().set(
        'resources', 'max_concurrent_sample',
        str(RedditConfig().max_concurrent_sample))

    result = luigi.interface.build(
        [ZipDataset()],
        workers=workers,
        local_scheduler=True,
        log_level=log_level,
        detailed_summary=True,
    )
    print(result.summary_text)