コード例 #1
0
def run(date_interval, src="s3n://ooni-public/reports-sanitised/streams/",
        dst="s3n://ooni-public/processed/",
        imported_dir="s3n://ooni-public/reports-sanitised/yaml/",
        worker_processes=16):

    sch = luigi.scheduler.CentralPlannerScheduler()
    w = luigi.worker.Worker(
        scheduler=sch, worker_processes=worker_processes)

    imported_dates = get_imported_dates(
        imported_dir, aws_access_key_id=config.aws.access_key_id,
        aws_secret_access_key=config.aws.secret_access_key)

    interval = get_date_interval(date_interval)
    for date in interval:
        if str(date) not in imported_dates:
            continue

        logger.info("Running CountInterestingReports for %s on %s to %s" %
                    (date, src, dst))
        task = SparkResultsToDatabase(src=src, date=date, dst=dst)
        w.add(task)

    w.run()
    w.stop()
コード例 #2
0
def run(src, dst_private, dst_public, date_interval, worker_processes=16):

    sch = luigi.scheduler.CentralPlannerScheduler()
    w = luigi.worker.Worker(scheduler=sch,
                            worker_processes=worker_processes)

    interval = get_date_interval(date_interval)
    for date in interval:
        logger.debug("working on %s" % date)
        task = AggregateYAMLReports(dst_private=dst_private,
                                    dst_public=dst_public, src=src, date=date)
        w.add(task)
    w.run()
    w.stop()
コード例 #3
0
def run(src, dst_private, dst_public, date_interval, worker_processes=16):
    sch = luigi.scheduler.CentralPlannerScheduler()
    w = luigi.worker.Worker(scheduler=sch,
                            worker_processes=worker_processes)

    imported_dates = get_imported_dates(src,
                                        aws_access_key_id=config.aws.access_key_id,
                                        aws_secret_access_key=config.aws.secret_access_key)
    interval = get_date_interval(date_interval)
    for date in interval:
        if str(date) not in imported_dates:
            continue
        logging.info("adding headers for date: %s" % date)
        task = ReportHeadersToDatabase(dst_private=dst_private,
                                       dst_public=dst_public,
                                       src=src, date=date)
        w.add(task)
    w.run()
    w.stop()