Beispiel #1
0
def main(date, bucket, prefix, num_clusters, num_donors, kernel_bandwidth,
         num_pdf_points):
    spark = (SparkSession.builder.appName(
        "taar_similarity").enableHiveSupport().getOrCreate())

    if num_donors < 100:
        logger.warn("Less than 100 donors were requested.",
                    extra={"donors": num_donors})
        num_donors = 100

    logger.info("Loading the AMO whitelist...")
    whitelist = load_amo_curated_whitelist()

    logger.info("Computing the list of donors...")

    # Compute the donors clusters and the LR curves.
    cluster_ids, donors_df = get_donors(spark, num_clusters, num_donors,
                                        whitelist)
    lr_curves = get_lr_curves(spark, donors_df, cluster_ids, kernel_bandwidth,
                              num_pdf_points)

    # Store them.
    donors = format_donors_dictionary(donors_df)
    store_json_to_s3(json.dumps(donors, indent=2), 'donors', date, prefix,
                     bucket)
    store_json_to_s3(json.dumps(lr_curves, indent=2), 'lr_curves', date,
                     prefix, bucket)
    spark.stop()
 def load(self, jdata):
     date = datetime.date.today().strftime("%Y%m%d")
     store_json_to_s3(json.dumps(jdata),
                      FILTERED_AMO_BASE_FILENAME,
                      date,
                      AMO_DUMP_PREFIX,
                      AMO_DUMP_BUCKET)
Beispiel #3
0
def main(date, bucket, prefix):
    logger.info("Retreiving AMO legacy addon replacements list")
    legacy_dict = fetch_legacy_replacement_masterlist()

    if len(legacy_dict) > 0:
        logger.info("Updating active legacy addon replacements list in s3")
        store_json_to_s3(json.dumps(legacy_dict, indent=2), EXPORT_FILE_NAME,
                         date, prefix, bucket)
    else:
        logger.info("EMPTY list retrieved from AMO legacy recs API")
Beispiel #4
0
def main(date, bucket, prefix, num_addons):
    spark = (SparkSession.builder.appName(
        "taar_locale").enableHiveSupport().getOrCreate())

    logger.info("Processing top N addons per locale")
    locale_dict = generate_dictionary(spark, num_addons)
    store_json_to_s3(json.dumps(locale_dict, indent=2), LOCALE_FILE_NAME, date,
                     prefix, bucket)

    spark.stop()
Beispiel #5
0
def main(date, workers, s3_prefix, s3_bucket):
    amodb = AMODatabase(int(workers))

    addon_map = amodb.fetch_addons()

    try:
        store_json_to_s3(json.dumps(addon_map), AMO_DUMP_FILENAME, date,
                         s3_prefix, s3_bucket)
        logger.info("Completed uploading s3://%s/%s%s.json" %
                    (s3_bucket, s3_prefix, AMO_DUMP_FILENAME))
    except Exception as e:
        logger.error("Error uploading data to S3", e)
 def _load_s3_data(self, jdata, fname):
     date = datetime.date.today().strftime("%Y%m%d")
     store_json_to_s3(json.dumps(jdata), fname, date, AMO_DUMP_PREFIX,
                      AMO_DUMP_BUCKET)