def update(dataset, submission_date, limit=None):
    if limit:
        limit = int(limit)

    if dataset not in ["telemetry", "telemetry-release"]:
        raise ValueError("Unsupported dataset")

    conn = boto.connect_s3(host="s3-us-west-2.amazonaws.com")
    meta_bucket = conn.get_bucket(METADATA_BUCKET, validate=False)
    sources = json.loads(meta_bucket.get_key("sources.json").get_contents_as_string())
    bucket = sources[dataset]["bucket"]
    prefix = sources[dataset]["prefix"]

    if prefix == "telemetry-2":
        sdb = SDB("telemetry_v4", read_only=False)  # Backwards compatibility
    else:
        sdb = SDB(prefix, read_only=False)

    prev = sdb.get_daily_stats(submission_date, submission_date)
    update_published_v4_files(sdb, bucket, prefix, submission_date=submission_date, limit=limit)
    curr = sdb.get_daily_stats(submission_date, submission_date)

    print "Filter service stats:"
    print "Note that the following numbers are correct only if there isn't another entity concurrently pushing new submissions:"
    sdb.diff_stats(prev, curr)

    print "AWS lambda stats:"
    sdb.print_lambda_stats(submission_date, submission_date)
Example #2
0
def update(dataset, submission_date, limit=None):
    if limit:
        limit = int(limit)

    if dataset not in ["telemetry", "telemetry-release"]:
        raise ValueError("Unsupported dataset")

    conn = boto.connect_s3(host=S3_DEFAULT_ENDPOINT)
    meta_bucket = conn.get_bucket(METADATA_BUCKET, validate=False)
    sources = json.loads(
        meta_bucket.get_key("sources.json").get_contents_as_string())
    bucket = sources[dataset]["bucket"]
    prefix = sources[dataset]["prefix"]

    if prefix == "telemetry-2":
        sdb = SDB("telemetry_v4", read_only=False)  # Backwards compatibility
    else:
        sdb = SDB(prefix, read_only=False)

    prev = sdb.get_daily_stats(submission_date, submission_date)
    update_published_v4_files(sdb,
                              bucket,
                              prefix,
                              submission_date=submission_date,
                              limit=limit)
    curr = sdb.get_daily_stats(submission_date, submission_date)

    print "Filter service stats:"
    print "Note that the following numbers are correct only if there isn't another entity concurrently pushing new submissions:"
    sdb.diff_stats(prev, curr)

    print "AWS lambda stats:"
    sdb.print_lambda_stats(submission_date, submission_date)
def main(limit=None, schema_version=None, from_date=None, to_date=None):
    if from_date and not to_date:
        to_date = datetime.now().strftime("%Y%m%d")

    if limit:
        limit = int(limit)

    if schema_version != "v2" and schema_version != "v4":
        raise ValueError("Unsupported schema version")

    if schema_version == "v2":
        sdb = SDB("telemetry_v2", read_only=False)
    else:
        sdb = SDB("telemetry_v4", read_only=False)

    if from_date:
        prev = sdb.get_daily_stats(from_date, to_date)

    if schema_version == "v2":
        update_published_v2_files(sdb, from_submission_date=from_date, to_submission_date=to_date, limit=limit)
    else:
        update_published_v4_files(sdb, from_submission_date=from_date, to_submission_date=to_date, limit=limit)

    if from_date:
        curr = sdb.get_daily_stats(from_date, to_date)
        print
        print "Filter service stats:"
        print "Note that the following numbers are correct only if there ins't another entity concurrently pushing new submissions:"
        sdb.diff_stats(prev, curr)

        print "AWS lambda stats:"
        sdb.print_lambda_stats(from_date, to_date)