def test_export_file_handles_we_do_not_own_copy_false_new_table( syn, new_project): table_bad_file_handles = "syn19002937" # User ID #3357179 owns these file handles with pytest.raises(sc.core.exceptions.SynapseHTTPError): export_tables(syn, table_mapping=table_bad_file_handles, target_project=new_project["id"], copy_file_handles=False)
def test_export_file_handles_we_do_not_own_copy_false(syn, new_tables): table_bad_file_handles = "syn19002937" # User ID #3357179 owns these file handles with pytest.raises(sc.core.exceptions.SynapseHTTPError): export_tables(syn, table_mapping={ table_bad_file_handles: new_tables["schema"][0]["id"] }, update=False, copy_file_handles=False)
def main(): credentials = get_env_var_credentials() syn = sc.login(credentials["synapseUsername"], credentials["synapsePassword"]) verify_no_new_table_versions(syn) relevant_healthcodes = get_relevant_healthcodes(syn) synapsebridgehelpers.export_tables(syn=syn, table_mapping=TABLE_MAPPING, identifier_col="healthCode", identifier=relevant_healthcodes)
def main(): args = get_env_var_args() syn = sc.login(args["synapseUsername"], args["synapsePassword"]) table_mapping = get_table_mapping(syn, args["tableMapping"]) relevant_healthcodes = get_relevant_healthcodes( syn, participants_table=args["participantsTable"], substudy=args["substudy"]) synapsebridgehelpers.export_tables(syn=syn, table_mapping=table_mapping, identifier_col="healthCode", identifier=relevant_healthcodes, copy_file_handles=True)
def test_manually_pass_source_tables_dict(syn, tables, new_project, sample_table): source_table = tables["schema"][0]["id"] schema = sc.Schema(name=tables["schema"][0]["name"], columns=tables["columns"][0], parent=new_project["id"]) incomplete_table = deepcopy(sample_table.iloc[:len(sample_table) // 2]) rest_of_the_table = deepcopy(sample_table.iloc[len(sample_table) // 2:]) table = syn.store(sc.Table(schema, incomplete_table)) source_tables = {source_table: rest_of_the_table} exported_table = export_tables(syn, table_mapping={source_table: table.tableId}, source_tables=source_tables, update=True) updated_table = syn.tableQuery("select * from {}".format(table.tableId)) updated_table = updated_table.asDataFrame().reset_index(drop=True) updated_table_no_fh = updated_table.drop("raw_data", axis=1) update = exported_table[source_table][1] correct_table_no_fh = incomplete_table.append(update, ignore_index=True, sort=False) correct_table_no_fh = correct_table_no_fh.drop( "raw_data", axis=1).reset_index(drop=True) print("returned results \n", updated_table_no_fh) print("correct result \n", correct_table_no_fh) pd.testing.assert_frame_equal(updated_table_no_fh, correct_table_no_fh)
def test_schema_change(syn, tables, new_project, sample_table): source_table = tables["schema"][0]["id"] target_table_cols = deepcopy(tables["columns"][0]) added_col = target_table_cols.pop(2) renamed_original_name = target_table_cols[2]["name"] target_table_cols[2]["name"] = "renamed_col" target_table_cols[3]["maximumSize"] = 100 schema = sc.Schema(name=tables["schema"][0]["name"], columns=target_table_cols, parent=new_project["id"]) incomplete_table = deepcopy(sample_table.iloc[:len(sample_table) // 2]) incomplete_table = incomplete_table.drop(added_col["name"], axis=1) incomplete_table = incomplete_table.rename( {renamed_original_name: "renamed_col"}, axis=1) table = syn.store(sc.Table(schema, incomplete_table)) exported_table = export_tables(syn, table_mapping={source_table: table.tableId}, update=False) updated_table = syn.tableQuery("select * from {}".format(table.tableId)) updated_table = updated_table.asDataFrame().reset_index(drop=True) updated_table_no_fh = updated_table.drop("raw_data", axis=1) comparison_table = sample_table.drop("raw_data", axis=1).reset_index(drop=True) updated_table_no_fh = updated_table_no_fh[comparison_table.columns] print(updated_table_no_fh) print(comparison_table) pd.testing.assert_frame_equal(updated_table_no_fh, comparison_table)
def main(): args = get_env_var_args() syn = sc.login(args["synapseUsername"], args["synapsePassword"]) table_mapping = get_table_mapping(syn, args["tableMapping"]) relevant_healthcodes = get_relevant_healthcodes( syn, participants_table=args["participantsTable"], substudy=args["substudy"]) if args["additionalHealthcodeJson"] is not None: additional_healthcodes = get_additional_healthcodes( syn, synapse_id=args["additionalHealthcodeJson"]) relevant_healthcodes = list( {*relevant_healthcodes, *additional_healthcodes}) synapsebridgehelpers.export_tables(syn=syn, table_mapping=table_mapping, identifier_col="healthCode", identifier=relevant_healthcodes, copy_file_handles=True)
def test_export_one_table_to_new(syn, new_project, tables, sample_table): source_table = tables["schema"][0]["id"] exported_table = export_tables(syn, table_mapping=source_table, target_project=new_project["id"]) exported_table_no_fh = exported_table[source_table][1].drop( "raw_data", axis=1).reset_index(drop=True) testing_table_no_fh = sample_table.drop("raw_data", axis=1).reset_index(drop=True) pd.testing.assert_frame_equal(exported_table_no_fh, testing_table_no_fh)
def test_export_one_table_to_new_no_filehandles(syn, new_project, tables, sample_table): source_table = tables["schema"][0]["id"] exported_table = export_tables(syn, table_mapping=source_table, target_project=new_project["id"], copy_file_handles=False) exported_table[source_table][1].reset_index(drop=True, inplace=True) pd.testing.assert_frame_equal(exported_table[source_table][1], sample_table)
def test_export_file_handles_we_do_not_own_copy_none_new_table( syn, new_project, sample_table): table_bad_file_handles = "syn19002937" # User ID #3357179 owns these file handles exported_table = export_tables(syn, table_mapping=table_bad_file_handles, target_project=new_project["id"], copy_file_handles=None) new_table = syn.tableQuery("select * from {}".format( exported_table[table_bad_file_handles][0])).asDataFrame() new_table_no_fh = new_table.reset_index(drop=True).drop("raw_data", axis=1) correct_table_no_fh = sample_table.drop("raw_data", axis=1) pd.testing.assert_frame_equal(correct_table_no_fh, new_table_no_fh)
def test_manually_pass_source_tables_str(syn, tables, new_project, sample_table): source_table = tables["schema"][0]["id"] source_tables = {source_table: sample_table} exported_table = export_tables(syn, table_mapping=source_table, source_tables=source_tables, target_project=new_project["id"]) exported_table_no_fh = exported_table[source_table][1].drop( "raw_data", axis=1).reset_index(drop=True) testing_table_no_fh = sample_table.drop("raw_data", axis=1).reset_index(drop=True) pd.testing.assert_frame_equal(exported_table_no_fh, testing_table_no_fh)
def test_export_multiple_tables_to_new(syn, new_project, tables, sample_table): source_table = tables["schema"][0]["id"] source_table_2 = tables["schema"][1]["id"] exported_table = export_tables( syn, table_mapping=[s["id"] for s in tables["schema"]], target_project=new_project["id"]) exported_table_no_fh = exported_table[source_table][1].drop( "raw_data", axis=1).reset_index(drop=True) exported_table_2_no_fh = exported_table[source_table_2][1].drop( "raw_data", axis=1).reset_index(drop=True) testing_table_no_fh = sample_table.drop("raw_data", axis=1).reset_index(drop=True) assert (exported_table_no_fh.equals(testing_table_no_fh) and exported_table_2_no_fh.equals(testing_table_no_fh))
def test_export_file_handles_we_do_not_own_copy_none(syn, new_tables, sample_table): table_bad_file_handles = "syn19002937" # User ID #3357179 owns these file handles target_table = new_tables["schema"][0]["id"] exported_table = export_tables( syn, table_mapping={table_bad_file_handles: target_table}, update=False, copy_file_handles=None) updated_table = syn.tableQuery( "select * from {}".format(target_table)).asDataFrame() updated_table_no_fh = updated_table.reset_index(drop=True).drop("raw_data", axis=1) correct_table_no_fh = sample_table.drop("raw_data", axis=1) pd.testing.assert_frame_equal(correct_table_no_fh, updated_table_no_fh)
def test_kwargs(syn, tables, new_project, sample_table): source_table = tables["schema"][0]["id"] exported_table = export_tables(syn, table_mapping=source_table, target_project=new_project["id"], substudy="my-study", substudy_col="substudyMemberships") exported_table_no_fh = exported_table[source_table][1].drop( "raw_data", axis=1).reset_index(drop=True) testing_table_no_fh = sample_table.drop("raw_data", axis=1).reset_index(drop=True) to_keep = [ "my-study" in s for s in testing_table_no_fh.substudyMemberships.values ] testing_table_no_fh = testing_table_no_fh.loc[to_keep] pd.testing.assert_frame_equal(exported_table_no_fh, testing_table_no_fh)
def test_manually_pass_source_tables_list(syn, tables, new_project, sample_table): source_table = tables["schema"][0]["id"] source_table_2 = tables["schema"][1]["id"] source_tables = {source_table: sample_table, source_table_2: sample_table} exported_table = export_tables( syn, table_mapping=[s["id"] for s in tables["schema"]], source_tables=source_tables, target_project=new_project["id"]) exported_table_no_fh = exported_table[source_table][1].drop( "raw_data", axis=1).reset_index(drop=True) exported_table_2_no_fh = exported_table[source_table_2][1].drop( "raw_data", axis=1).reset_index(drop=True) testing_table_no_fh = sample_table.drop("raw_data", axis=1).reset_index(drop=True) assert (exported_table_no_fh.equals(testing_table_no_fh) and exported_table_2_no_fh.equals(testing_table_no_fh))
def test_export_multiple_tables_to_preexisting_update(syn, new_project, tables, sample_table): source_table = tables["schema"][0]["id"] source_table_2 = tables["schema"][1]["id"] schema = sc.Schema(name=tables["schema"][0]["name"], columns=tables["columns"][0], parent=new_project["id"]) incomplete_table = deepcopy(sample_table.iloc[:len(sample_table) // 2]) table = syn.store(sc.Table(schema, incomplete_table)) schema_2 = sc.Schema(name=tables["schema"][1]["name"], columns=tables["columns"][1], parent=new_project["id"]) incomplete_table_2 = deepcopy(sample_table.iloc[:len(sample_table) // 3]) table_2 = syn.store(sc.Table(schema_2, incomplete_table_2)) exported_table = export_tables(syn, table_mapping={ source_table: table.tableId, source_table_2: table_2.tableId }, update=True) updated_table = syn.tableQuery("select * from {}".format(table.tableId)) updated_table = updated_table.asDataFrame().reset_index(drop=True) updated_table_no_fh = updated_table.drop("raw_data", axis=1) update = exported_table[source_table][1] correct_table_no_fh = incomplete_table.append(update, ignore_index=True, sort=False) correct_table_no_fh = correct_table_no_fh.drop( "raw_data", axis=1).reset_index(drop=True) updated_table_2 = syn.tableQuery("select * from {}".format( table_2.tableId)) updated_table_2 = updated_table_2.asDataFrame().reset_index(drop=True) updated_table_2_no_fh = updated_table_2.drop("raw_data", axis=1) update_2 = exported_table[source_table_2][1] correct_table_no_fh_2 = incomplete_table_2.append(update_2, ignore_index=True, sort=False) correct_table_no_fh_2 = correct_table_no_fh_2.drop( "raw_data", axis=1).reset_index(drop=True) print("returned results \n", updated_table_no_fh) print("correct result \n", correct_table_no_fh) assert (updated_table_no_fh.equals(correct_table_no_fh) and updated_table_2_no_fh.equals(correct_table_no_fh_2))
def test_export_one_table_to_preexisting_no_update(syn, new_project, tables, sample_table): source_table = tables["schema"][0]["id"] schema = sc.Schema(name=tables["schema"][0]["name"], columns=tables["columns"][0], parent=new_project["id"]) incomplete_table = deepcopy(sample_table.iloc[:len(sample_table) // 2]) table = syn.store(sc.Table(schema, incomplete_table)) exported_table = export_tables(syn, table_mapping={source_table: table.tableId}, update=False) updated_table = syn.tableQuery("select * from {}".format(table.tableId)) updated_table = updated_table.asDataFrame().reset_index(drop=True) updated_table_no_fh = updated_table.drop("raw_data", axis=1) comparison_table = sample_table.drop("raw_data", axis=1).reset_index(drop=True) print(updated_table_no_fh) print(comparison_table) pd.testing.assert_frame_equal(updated_table_no_fh, comparison_table)
def test_table_mapping_exception(syn): with pytest.raises(TypeError): export_tables(syn, table_mapping=42, update=True)