コード例 #1
0
def save_train_predictions(prediction, objetivo, model_name, hyperparams):
    s3_name = parse_filename(objetivo, model_name, hyperparams)
    s3_name = s3_name[2:]

    vars_bias = [
        'dayofmonth', 'flight_number_reporting_airline', 'prediction',
        'originwac', 'label', 'distance'
    ]
    df_bias = prediction.select(
        [c for c in prediction.columns if c in vars_bias])
    df_bias = df_bias.withColumnRenamed("prediction",
                                        "score").withColumnRenamed(
                                            "label", "label_value")
    df_bias = df_bias.withColumn('s3_name', lit(s3_name))

    df_bias = df_bias.withColumn(
        'aux',
        f.when(f.col('dayofmonth') < 9, "0").otherwise(""))
    df_bias = df_bias.withColumn(
        'fecha', concat(lit("2019"), lit("12"), col('aux'), col('dayofmonth')))

    vars_bias = [
        'flight_number_reporting_airline', 'prediction', 'originwac',
        'label_value', 'distance', 'score', 's3_name', 'fecha'
    ]
    df_bias = df_bias.select([c for c in df_bias.columns if c in vars_bias])

    df_pandas = df_bias.toPandas()

    save_rds_pandas(df_pandas, "predictions.train")
コード例 #2
0
	def output(self):
		objetivo = self.obj
		model_name = self.model
		hyperparams = {"iter": int(self.numIt),
						"pca": int(self.numPCA)}

		output_path = parse_filename(objetivo, model_name, hyperparams)
		output_path = "s3://" + str(self.bucname) +  output_path[1:] + ".model.zip"

		return luigi.contrib.s3.S3Target(path=output_path)
コード例 #3
0
def add_meta_data(objetivo, model_name, hyperparams, log, train_time,
                  test_split, train_nrows):
    s3_name = parse_filename(objetivo, model_name, hyperparams)
    s3_name = s3_name[2:]

    AUROC = log['AUROC']
    AUPR = log['AUPR']
    precision = log['precision']
    recall = log['recall']
    f1 = log['F1 Measure']
    today = date.today()
    d1 = today.strftime("%Y%m%d")

    query = """ INSERT INTO metadatos.models (fecha, objetivo, model_name, s3_name, hyperparams, AUROC, AUPR, precision, recall, f1, train_time,  test_split, train_nrows )   VALUES ( %s, %s,%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s  ) """
    values = (d1, objetivo, model_name, s3_name, json.dumps(hyperparams),
              AUROC, AUPR, precision, recall, f1, train_time, test_split,
              train_nrows)
    insert_query(query, values)