def main(): # The DataprocFacade will manage cluster creation # and destruction once the context exits with DataprocFacade(settings.GCP_PROJECT_ID, settings.DATAPROC_CLUSTER, settings.GCP_ZONE) as dataproc: # Upload the script from the cfretl.scripts directory # TODO: this should just pass in a filename - a cluster # is only going to run a single job anyway dataproc.upload_sparkjob(settings.GCS_BUCKET_NAME, settings.DATAPROC_SCRIPT) # TODO: should probably pass a token in here so that we # can verify that results were successfully computed dataproc.run_job(settings.GCS_BUCKET_NAME, settings.DATAPROC_SCRIPT) remote_settings = CFRRemoteSettings() remote_settings.create_user_in_test() # TODO: do something to test that we have results we're looking for # and transform the bq result table # into a final model model = load_mock_model() remote_settings.write_models(model)
def test_update_weights(WEIGHT_VECTOR): cfr_remote = CFRRemoteSettings() assert cfr_remote.write_models(WEIGHT_VECTOR) actual = cfr_remote._test_read_models() assert actual == WEIGHT_VECTOR
BQ output table and coerce it into values for RemoteSettings JSON blob """ model = CFRModel() data = [] for idx, cfr_id in enumerate(CFR_ID_LIST): prior_0 = random.random() snip = model.one_cfr( cfr_id, [prior_0, 1 - prior_0], [ [random.randint(1, 10000) for i in FEATURES_LIST], [random.randint(1, 10000) for i in FEATURES_LIST], ], ) data.append(snip) return model.generate_cfr_model(data, version_code) remote_settings.create_user_in_test() remote_settings.clone_to_cfr_control(CFRS) remote_settings.clone_to_cfr_experiment(CFRS) json_model = generate_cfr_cfgdata(version_code) remote_settings.write_models(json_model) print("Wrote out version : {:d}".format(version_code)) print("=" * 20, datetime.now(), "=" * 20)
def test_write_weights(WEIGHT_VECTOR): cfr_remote = CFRRemoteSettings() assert cfr_remote.write_models(WEIGHT_VECTOR) actual = cfr_remote._test_read_models() _compare_weights(WEIGHT_VECTOR, actual)