def main(date, bucket, prefix, num_clusters, num_donors, kernel_bandwidth, num_pdf_points): spark = (SparkSession.builder.appName( "taar_similarity").enableHiveSupport().getOrCreate()) if num_donors < 100: logger.warn("Less than 100 donors were requested.", extra={"donors": num_donors}) num_donors = 100 logger.info("Loading the AMO whitelist...") whitelist = load_amo_curated_whitelist() logger.info("Computing the list of donors...") # Compute the donors clusters and the LR curves. cluster_ids, donors_df = get_donors(spark, num_clusters, num_donors, whitelist) lr_curves = get_lr_curves(spark, donors_df, cluster_ids, kernel_bandwidth, num_pdf_points) # Store them. donors = format_donors_dictionary(donors_df) store_json_to_s3(json.dumps(donors, indent=2), 'donors', date, prefix, bucket) store_json_to_s3(json.dumps(lr_curves, indent=2), 'lr_curves', date, prefix, bucket) spark.stop()
def load(self, jdata): date = datetime.date.today().strftime("%Y%m%d") store_json_to_s3(json.dumps(jdata), FILTERED_AMO_BASE_FILENAME, date, AMO_DUMP_PREFIX, AMO_DUMP_BUCKET)
def main(date, bucket, prefix): logger.info("Retreiving AMO legacy addon replacements list") legacy_dict = fetch_legacy_replacement_masterlist() if len(legacy_dict) > 0: logger.info("Updating active legacy addon replacements list in s3") store_json_to_s3(json.dumps(legacy_dict, indent=2), EXPORT_FILE_NAME, date, prefix, bucket) else: logger.info("EMPTY list retrieved from AMO legacy recs API")
def main(date, bucket, prefix, num_addons): spark = (SparkSession.builder.appName( "taar_locale").enableHiveSupport().getOrCreate()) logger.info("Processing top N addons per locale") locale_dict = generate_dictionary(spark, num_addons) store_json_to_s3(json.dumps(locale_dict, indent=2), LOCALE_FILE_NAME, date, prefix, bucket) spark.stop()
def main(date, workers, s3_prefix, s3_bucket): amodb = AMODatabase(int(workers)) addon_map = amodb.fetch_addons() try: store_json_to_s3(json.dumps(addon_map), AMO_DUMP_FILENAME, date, s3_prefix, s3_bucket) logger.info("Completed uploading s3://%s/%s%s.json" % (s3_bucket, s3_prefix, AMO_DUMP_FILENAME)) except Exception as e: logger.error("Error uploading data to S3", e)
def _load_s3_data(self, jdata, fname): date = datetime.date.today().strftime("%Y%m%d") store_json_to_s3(json.dumps(jdata), fname, date, AMO_DUMP_PREFIX, AMO_DUMP_BUCKET)