def test_tables_csv(syn, project): # Define schema cols = [ Column(name='Name', columnType='STRING'), Column(name='Born', columnType='INTEGER'), Column(name='Hipness', columnType='DOUBLE'), Column(name='Living', columnType='BOOLEAN') ] schema = Schema(name='Jazz Guys', columns=cols, parent=project) data = [["John Coltrane", 1926, 8.65, False], ["Miles Davis", 1926, 9.87, False], ["Bill Evans", 1929, 7.65, False], ["Paul Chambers", 1935, 5.14, False], ["Jimmy Cobb", 1929, 5.78, True], ["Scott LaFaro", 1936, 4.21, False], ["Sonny Rollins", 1930, 8.99, True], ["Kenny Burrel", 1931, 4.37, True]] # the following creates a CSV file and uploads it to create a new table table = syn.store(Table(schema, data)) # Query and download an identical CSV results = syn.tableQuery("select * from %s" % table.schema.id, resultsAs="csv", includeRowIdAndRowVersion=False) # Test that CSV file came back as expected for expected_row, row in zip(data, results): assert expected_row == row, "expected %s but got %s" % (expected_row, row)
def dontruntest_big_tables(syn, project): cols = [ Column(name='name', columnType='STRING', maximumSize=1000), Column(name='foo', columnType='STRING', enumValues=['foo', 'bar', 'bat']), Column(name='x', columnType='DOUBLE'), Column(name='n', columnType='INTEGER'), Column(name='is_bogus', columnType='BOOLEAN') ] table1 = syn.store(Schema(name='Big Table', columns=cols, parent=project)) rows_per_append = 10 for i in range(1000): rows = [] for j in range(rows_per_append): foo = cols[1].enumValues[random.randint(0, 2)] rows.append( Row(('Robot ' + str(i * rows_per_append + j), foo, random.random() * 200.0, random.randint(0, 100), random.random() >= 0.5))) syn.store(RowSet(columns=cols, schema=table1, rows=rows)) syn.tableQuery("select * from %s" % table1.id) results = syn.tableQuery( "select n, COUNT(n), MIN(x), AVG(x), MAX(x), SUM(x) from %s group by n" % table1.id) results.asDataFrame()
def dontruntest_big_csvs(syn, project, schedule_for_cleanup): cols = [ Column(name='name', columnType='STRING', maximumSize=1000), Column(name='foo', columnType='STRING', enumValues=['foo', 'bar', 'bat']), Column(name='x', columnType='DOUBLE'), Column(name='n', columnType='INTEGER'), Column(name='is_bogus', columnType='BOOLEAN') ] schema1 = syn.store(Schema(name='Big Table', columns=cols, parent=project)) # write rows to CSV file with tempfile.NamedTemporaryFile(delete=False) as temp: schedule_for_cleanup(temp.name) filename = temp.name with io.open(filename, mode='w', encoding="utf-8", newline='') as temp: writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep)) writer.writerow([col.name for col in cols]) for i in range(10): for j in range(100): foo = cols[1].enumValues[random.randint(0, 2)] writer.writerow( ('Robot ' + str(i * 100 + j), foo, random.random() * 200.0, random.randint(0, 100), random.random() >= 0.5)) # upload CSV syn._uploadCsv(filepath=temp.name, schema=schema1) from synapseclient.table import CsvFileTable CsvFileTable.from_table_query(syn, "select * from %s" % schema1.id)
def test_synapse_integer_columns_with_missing_values_from_dataframe( syn, project, schedule_for_cleanup): # SYNPY-267 cols = [ Column(name='x', columnType='STRING'), Column(name='y', columnType='INTEGER'), Column(name='z', columnType='DOUBLE') ] schema = syn.store(Schema(name='Big Table', columns=cols, parent=project)) line_terminator = str(os.linesep) # write rows to CSV file with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as temp: schedule_for_cleanup(temp.name) # 2nd row is missing a value in its integer column temp.write('x,y,z' + line_terminator + 'a,1,0.9' + line_terminator + 'b,,0.8' + line_terminator + 'c,3,0.7' + line_terminator) temp.flush() filename = temp.name # create a table from csv table = Table(schema, filename) df = table.asDataFrame() table_from_dataframe = Table(schema, df) assert table.filepath != table_from_dataframe.filepath df2 = table_from_dataframe.asDataFrame() assert_frame_equal(df, df2)
def test_createColumns(syn): columns_to_create = [Column(name="FirstTestColumn", columnType="INTEGER"), Column(name="SecondTestColumn", columnType="DOUBLE")] created_columns = syn.createColumns(columns_to_create) assert len(columns_to_create) == len(created_columns) for col_to_create, created_col in zip(columns_to_create, created_columns): assert 'id' in created_col assert set(col_to_create.items()).issubset(set(created_col.items()))
def test_createColumns(): columns_to_create = [ Column(name="FirstTestColumn", columnType="INTEGER"), Column(name="SecondTestColumn", columnType="DOUBLE") ] created_columns = syn.createColumns(columns_to_create) assert_equals(len(columns_to_create), len(created_columns)) for col_to_create, created_col in zip(columns_to_create, created_columns): assert_in('id', created_col) assert_dict_contains_subset(col_to_create, created_col)
def process_new_table(args, syn): """ Function: process_new_table Purpose: Create an annotations table with the specified name under the specified Synapse parent ID using the specified JSON schema. This function is called when the "new_table" option is specified when the program is called. Arguments: JSON schema file reference Synapse parent ID Synapse table name A Synapse client object """ # Define column names for the synapse table. dcc_column_names = [ Column(name="key", columnType="STRING", maximumSize=100), Column(name="description", columnType="STRING", maximumSize=250), Column(name="columnType", columnType="STRING", maximumSize=50), Column(name="maximumSize", columnType="DOUBLE"), Column(name="value", columnType="STRING", maximumSize=250), Column(name="valueDescription", columnType="LARGETEXT"), Column(name="source", columnType="STRING", maximumSize=250), Column(name="module", columnType="STRING", maximumSize=100) ] syn_table_df = process_schema(args.json_schema_file) # Build and populate the Synapse table. table_schema = Schema(name=args.synapse_table_name, columns=dcc_column_names, parent=args.parent_synapse_id) dcc_table = syn.store(Table(table_schema, syn_table_df))
def to_column_objects(leaderboard_columns): """ Turns a list of dictionaries of column configuration information defined in conf.leaderboard_columns) into a list of Column objects """ column_keys = ['name', 'columnType', 'maximumSize', 'enumValues', 'defaultValue'] return [Column(**{ key: col[key] for key in column_keys if key in col}) for col in leaderboard_columns]
def test_table_query(test_state): """Test command line ability to do table query.""" cols = [ Column(name='name', columnType='STRING', maximumSize=1000), Column(name='foo', columnType='STRING', enumValues=['foo', 'bar', 'bat']), Column(name='x', columnType='DOUBLE'), Column(name='age', columnType='INTEGER'), Column(name='cartoon', columnType='BOOLEAN') ] project_entity = test_state.project schema1 = test_state.syn.store( Schema(name=str(uuid.uuid4()), columns=cols, parent=project_entity)) test_state.schedule_for_cleanup(schema1.id) data1 = [['Chris', 'bar', 11.23, 45, False], ['Jen', 'bat', 14.56, 40, False], ['Jane', 'bat', 17.89, 6, False], ['Henry', 'bar', 10.12, 1, False]] test_state.syn.store(RowSet(schema=schema1, rows=[Row(r) for r in data1])) # Test query output = run(test_state, 'synapse' '--skip-checks', 'query', 'select * from %s' % schema1.id) output_rows = output.rstrip("\n").split("\n") # Check the length of the output assert len(output_rows) == 5, "got %s rows" % (len(output_rows), ) # Check that headers are correct. # Should be column names in schema plus the ROW_ID and ROW_VERSION my_headers_set = output_rows[0].split("\t") expected_headers_set = ["ROW_ID", "ROW_VERSION"] + list( map(lambda x: x.name, cols)) assert my_headers_set == expected_headers_set, "%r != %r" % ( my_headers_set, expected_headers_set)
def test_store_table_datetime(syn, project): current_datetime = datetime.fromtimestamp(round(time.time(), 3)) schema = syn.store( Schema("testTable", [Column(name="testerino", columnType='DATE')], project)) rowset = RowSet(rows=[Row([current_datetime])], schema=schema) syn.store(Table(schema, rowset)) query_result = syn.tableQuery("select * from %s" % utils.id_of(schema), resultsAs="rowset") assert current_datetime == query_result.rowset['rows'][0]['values'][0]
def _ensure_syt_view(self): """ Ensure the syt table/view exists for the project. """ try: # This will fail if the schema doesn't exist. This is a synapseclient bug. self._syt_view = self._synapse_client.get(EntityViewSchema( name=self.SYT_VIEW_NAME, parent=self._project), downloadFile=False) except: pass if self._syt_view == None: evs = EntityViewSchema(name=self.SYT_VIEW_NAME, parent=self._project, scopes=[self._project], properties={'viewTypeMask': 9}) # Delete the 'type' property so we can set our own viewTypeMask to Files and Folders. evs.pop('type') # Since we removed 'type' we have to manually populate the base columns. evs.addColumn(Column(name='id', columnType='ENTITYID')) evs.addColumn(Column(name='parentId', columnType='ENTITYID')) evs.addColumn(Column(name='projectId', columnType='ENTITYID')) evs.addColumn(Column(name='type', columnType='STRING')) evs.addColumn( Column(name='name', columnType='STRING', maximumSize=256)) evs.addColumn( Column(name=self.ANNO_CHECKED_OUT_BY_ID, columnType='STRING')) evs.addColumn( Column(name=self.ANNO_CHECKED_OUT_BY_NAME, columnType='STRING')) evs.addColumn( Column(name=self.ANNO_CHECKED_OUT_DATE, columnType='DATE')) self._syt_view = self._synapse_client.store(evs)
def test_rowset_tables(syn, project): cols = [ Column(name='name', columnType='STRING', maximumSize=1000), Column(name='foo', columnType='STRING', enumValues=['foo', 'bar', 'bat']), Column(name='x', columnType='DOUBLE'), Column(name='age', columnType='INTEGER'), Column(name='cartoon', columnType='BOOLEAN'), Column(name='description', columnType='LARGETEXT') ] schema1 = syn.store(Schema(name='Foo Table', columns=cols, parent=project)) data1 = [['Chris', 'bar', 11.23, 45, False, 'a'], ['Jen', 'bat', 14.56, 40, False, 'b'], ['Jane', 'bat', 17.89, 6, False, 'c' * 1002], ['Henry', 'bar', 10.12, 1, False, 'd']] row_reference_set1 = syn.store( RowSet(schema=schema1, rows=[Row(r) for r in data1])) assert len(row_reference_set1['rows']) == 4
def update_global_scores_table(global_data): import challenge_config as config from synapseclient import Schema, Column, Table, Row, RowSet, as_table_columns # 'principalId', 'name', 'score_lb', 'score_mean', 'score_ub', 'rank' cols = [ Column(name='UserID', columnType='STRING', maximumSize=100), Column(name='Name', columnType='STRING', maximumSize=100), Column(name='score_lb', columnType='DOUBLE'), Column(name='score_mean', columnType='DOUBLE'), Column(name='score_ub', columnType='DOUBLE'), Column(name='rank', columnType='DOUBLE'), ] schema = Schema(name='Global Scores', columns=cols, parent=config.CHALLENGE_SYN_ID) results = syn.tableQuery("select * from {}".format('syn7237020')) if len(results) > 0: a = syn.delete(results.asRowSet()) table = syn.store(Table(schema, global_data)) results = syn.tableQuery("select * from {}".format(table.tableId)) for row in results: print row return
def test_command_get_recursive_and_query(test_state): """Tests the 'synapse get -r' and 'synapse get -q' functions""" project_entity = test_state.project # Create Folders in Project folder_entity = test_state.syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity)) folder_entity2 = test_state.syn.store(Folder(name=str(uuid.uuid4()), parent=folder_entity)) # Create and upload two files in sub-Folder uploaded_paths = [] file_entities = [] for i in range(2): f = utils.make_bogus_data_file() uploaded_paths.append(f) test_state.schedule_for_cleanup(f) file_entity = File(f, parent=folder_entity2) file_entity = test_state.syn.store(file_entity) file_entities.append(file_entity) test_state.schedule_for_cleanup(f) # Add a file in the Folder as well f = utils.make_bogus_data_file() uploaded_paths.append(f) test_state.schedule_for_cleanup(f) file_entity = File(f, parent=folder_entity) file_entity = test_state.syn.store(file_entity) file_entities.append(file_entity) # get -r uses syncFromSynapse() which uses getChildren(), which is not immediately consistent, # but faster than chunked queries. time.sleep(2) # Test recursive get run(test_state, 'synapse' '--skip-checks', 'get', '-r', folder_entity.id) # Verify that we downloaded files: new_paths = [os.path.join('.', folder_entity2.name, os.path.basename(f)) for f in uploaded_paths[:-1]] new_paths.append(os.path.join('.', os.path.basename(uploaded_paths[-1]))) test_state.schedule_for_cleanup(folder_entity.name) for downloaded, uploaded in zip(new_paths, uploaded_paths): assert os.path.exists(downloaded) assert filecmp.cmp(downloaded, uploaded) test_state.schedule_for_cleanup(downloaded) # Test query get using a Table with an entity column # This should be replaced when Table File Views are implemented in the client cols = [Column(name='id', columnType='ENTITYID')] schema1 = test_state.syn.store(Schema(name='Foo Table', columns=cols, parent=project_entity)) test_state.schedule_for_cleanup(schema1.id) data1 = [[x.id] for x in file_entities] test_state.syn.store(RowSet(schema=schema1, rows=[Row(r) for r in data1])) time.sleep(3) # get -q are eventually consistent # Test Table/View query get run(test_state, 'synapse' '--skip-checks', 'get', '-q', "select id from %s" % schema1.id) # Verify that we downloaded files: new_paths = [os.path.join('.', os.path.basename(f)) for f in uploaded_paths[:-1]] new_paths.append(os.path.join('.', os.path.basename(uploaded_paths[-1]))) test_state.schedule_for_cleanup(folder_entity.name) for downloaded, uploaded in zip(new_paths, uploaded_paths): assert os.path.exists(downloaded) assert filecmp.cmp(downloaded, uploaded) test_state.schedule_for_cleanup(downloaded) test_state.schedule_for_cleanup(new_paths[0])
def test_create_and_update_file_view(syn, project, schedule_for_cleanup): # Create a folder folder = Folder(str(uuid.uuid4()), parent=project, description='creating a file-view') folder = syn.store(folder) # Create dummy file with annotations in our folder path = utils.make_bogus_data_file() file_annotations = dict(fileFormat='jpg', dataType='image', artist='Banksy', medium='print', title='Girl With Ballon') schedule_for_cleanup(path) a_file = File(path, parent=folder, annotations=file_annotations) a_file = syn.store(a_file) schedule_for_cleanup(a_file) # Add new columns for the annotations on this file and get their IDs my_added_cols = [ syn.store(Column(name=k, columnType="STRING")) for k in file_annotations.keys() ] my_added_cols_ids = [c['id'] for c in my_added_cols] view_default_ids = [ c['id'] for c in syn._get_default_view_columns( "entityview", EntityViewType.FILE.value) ] col_ids = my_added_cols_ids + view_default_ids scopeIds = [folder['id'].lstrip('syn')] # Create an empty entity-view with defined scope as folder entity_view = EntityViewSchema(name=str(uuid.uuid4()), scopeIds=scopeIds, addDefaultViewColumns=True, addAnnotationColumns=False, type='file', columns=my_added_cols, parent=project) entity_view = syn.store(entity_view) schedule_for_cleanup(entity_view) assert set(scopeIds) == set(entity_view.scopeIds) assert set(col_ids) == set(entity_view.columnIds) assert EntityViewType.FILE.value == entity_view.viewTypeMask # get the current view-schema view = syn.tableQuery("select * from %s" % entity_view.id) schedule_for_cleanup(view.filepath) view_dict = list( csv.DictReader(io.open(view.filepath, encoding="utf-8", newline=''))) # check that all of the annotations were retrieved from the view assert set(file_annotations.keys()).issubset(set(view_dict[0].keys())) updated_a_file = syn.get(a_file.id, downloadFile=False) # Check that the values are the same as what was set # Both in the view and on the entity itself for k, v in file_annotations.items(): assert view_dict[0][k] == v assert updated_a_file.annotations[k][0] == v # Make a change to the view and store view_dict[0]['fileFormat'] = 'PNG' with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as temp: schedule_for_cleanup(temp.name) temp_filename = temp.name with io.open(temp_filename, mode='w', encoding="utf-8", newline='') as temp_file: dw = csv.DictWriter(temp_file, fieldnames=view_dict[0].keys(), quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep)) dw.writeheader() dw.writerows(view_dict) temp_file.flush() syn.store(Table(entity_view.id, temp_filename)) new_view_dict = list( csv.DictReader(io.open(temp_filename, encoding="utf-8", newline=''))) assert new_view_dict[0]['fileFormat'] == 'PNG' # query for the change start_time = time.time() new_view_results = syn.tableQuery("select * from %s" % entity_view.id) schedule_for_cleanup(new_view_results.filepath) new_view_dict = list( csv.DictReader( io.open(new_view_results.filepath, encoding="utf-8", newline=''))) # query until change is seen. while new_view_dict[0]['fileFormat'] != 'PNG': # check timeout assert time.time() - start_time < QUERY_TIMEOUT_SEC # query again new_view_results = syn.tableQuery("select * from %s" % entity_view.id) new_view_dict = list( csv.DictReader( io.open(new_view_results.filepath, encoding="utf-8", newline=''))) # paranoid check assert new_view_dict[0]['fileFormat'] == 'PNG'
def partial_rowset_test_state(syn, project): cols = [ Column(name='foo', columnType='INTEGER'), Column(name='bar', columnType='INTEGER') ] table_schema = syn.store( Schema(name='PartialRowTest' + str(uuid.uuid4()), columns=cols, parent=project)) data = [[1, None], [None, 2]] syn.store(RowSet(schema=table_schema, rows=[Row(r) for r in data])) # set up a file view folder = syn.store( Folder(name="PartialRowTestFolder" + str(uuid.uuid4()), parent=project)) syn.store( File("~/path/doesnt/matter", name="f1", parent=folder, synapseStore=False)) syn.store( File("~/path/doesnt/matter/again", name="f2", parent=folder, synapseStore=False)) cols = [ Column(name='foo', columnType='INTEGER'), Column(name='bar', columnType='INTEGER') ] view_schema = syn.store( EntityViewSchema(name='PartialRowTestViews' + str(uuid.uuid4()), columns=cols, addDefaultViewColumns=False, parent=project, scopes=[folder])) table_changes = [{'foo': 4}, {'bar': 5}] view_changes = [{'bar': 6}, {'foo': 7}] expected_table_cells = pd.DataFrame({ 'foo': [4.0, float('NaN')], 'bar': [float('NaN'), 5.0] }) expected_view_cells = pd.DataFrame({ 'foo': [float('NaN'), 7.0], 'bar': [6.0, float('NaN')] }) class TestState: def __init__(self): self.syn = syn self.project = project self.table_schema = table_schema self.view_schema = view_schema self.table_changes = table_changes self.view_changes = view_changes self.expected_table_cells = expected_table_cells self.expected_view_cells = expected_view_cells return TestState()
def pubmed(args, syn): """ Given a list of grant numbers pulled from a synapse table column, utilizes a pubmed API to generate a search query. This query is constructed by the union ('or' logic) of all the grant numbers, which would aid in pulling down a list of all PubMed publication id's associated with the grants. Then it will go through the PubMed id's and scrape the publication for basic informative information. :param args: :param syn: :return: """ projectId = args.projectId project = syn.get(projectId) if args.grantviewId is not None: grantviewId = args.grantviewId else: grantviewId = "syn10142562" csbcGrants = csbcGrantList(syn, grantviewId) grantIds = getGrantQuery(csbcGrants) pubmedIds = getPubMedIds(grantIds) csbcView = getCenterIdsView(syn, grantviewId) # for utf encoding and debugging # finalTable.to_csv("csbc.csv", sep=',', index=False, encoding="utf-8") # finalTable = pandas.read_csv("csbc.csv", delimiter=',', encoding="utf-8") # os.remove("csbc.csv") if args.tableId: # update existing schema tableId = args.tableId schema = syn.get(tableId) publicationTable = syn.tableQuery("select * from %s" % tableId) currentTable = publicationTable.asDataFrame() new_pubmed_ids = list( set(pubmedIds) - set([i.split("=")[1] for i in list(currentTable.PubMed)])) finalTable = getPMIDDF(new_pubmed_ids, csbcGrants, csbcView) table = synapseclient.Table(schema, finalTable.values.tolist()) table = syn.store(table) else: # create a new schema # cols = synapseclient.as_table_columns(finalTable) finalTable = getPMIDDF(pubmedIds, csbcGrants, csbcView) cols = [ Column(name='CSBC PSON Center', columnType='ENTITYID', maximumSize=50), Column(name='Consortium', columnType='STRING', maximumSize=100), Column(name='PubMed', columnType='LINK', maximumSize=100), Column(name='Journal', columnType='STRING', maximumSize=100), Column(name='Publication Year', columnType='DATE'), Column(name='Title', columnType='STRING', maximumSize=500), Column(name='Authors', columnType='STRING', maximumSize=990), Column(name='Grant', columnType='STRING', maximumSize=50), Column(name='Data Location', columnType='LINK', maximumSize=1000), Column(name='Synapse Location', columnType='ENTITYID', maximumSize=50), Column(name='Keywords', columnType='STRING', maximumSize=250) ] schema = synapseclient.Schema(name=args.tableName, columns=cols, parent=project) table = synapseclient.Table(schema, finalTable) table = syn.store(table)
def test_copy(): """Tests the copy function""" # Create a Project project_entity = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) acl = syn.setPermissions( project_entity, other_user['principalId'], accessType=['READ', 'CREATE', 'UPDATE', 'DOWNLOAD']) # Create two Folders in Project folder_entity = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) second_folder = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) third_folder = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) schedule_for_cleanup(folder_entity.id) schedule_for_cleanup(second_folder.id) schedule_for_cleanup(third_folder.id) # Annotations and provenance repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient' annots = {'test': ['hello_world']} prov = Activity(name="test", used=repo_url) # Create, upload, and set annotations/provenance on a file in Folder filename = utils.make_bogus_data_file() schedule_for_cleanup(filename) file_entity = syn.store(File(filename, parent=folder_entity)) externalURL_entity = syn.store( File(repo_url, name='rand', parent=folder_entity, synapseStore=False)) syn.setAnnotations(file_entity, annots) syn.setAnnotations(externalURL_entity, annots) syn.setProvenance(externalURL_entity.id, prov) schedule_for_cleanup(file_entity.id) schedule_for_cleanup(externalURL_entity.id) # ------------------------------------ # TEST COPY FILE # ------------------------------------ output = synapseutils.copy(syn, file_entity.id, destinationId=project_entity.id) output_URL = synapseutils.copy(syn, externalURL_entity.id, destinationId=project_entity.id, skipCopyAnnotations=True) #Verify that our copied files are identical copied_ent = syn.get(output[file_entity.id]) copied_URL_ent = syn.get(output_URL[externalURL_entity.id], downloadFile=False) copied_ent_annot = syn.getAnnotations(copied_ent) copied_url_annot = syn.getAnnotations(copied_URL_ent) copied_prov = syn.getProvenance(copied_ent) copied_url_prov = syn.getProvenance(copied_URL_ent) schedule_for_cleanup(copied_ent.id) schedule_for_cleanup(copied_URL_ent.id) # TEST: set_Provenance = Traceback print("Test: setProvenance = Traceback") assert copied_prov['used'][0]['reference']['targetId'] == file_entity.id assert copied_url_prov['used'][0]['reference'][ 'targetId'] == externalURL_entity.id # TEST: Make sure copied files are the same assert copied_ent_annot == annots assert copied_ent.dataFileHandleId == file_entity.dataFileHandleId # TEST: Make sure copied URLs are the same assert copied_url_annot == {} assert copied_URL_ent.externalURL == repo_url assert copied_URL_ent.name == 'rand' assert copied_URL_ent.dataFileHandleId == externalURL_entity.dataFileHandleId # TEST: Throw error if file is copied to a folder/project that has a file with the same filename assert_raises(ValueError, synapseutils.copy, syn, project_entity.id, destinationId=project_entity.id) assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=project_entity.id) assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=third_folder.id, setProvenance="gib") assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=file_entity.id) print("Test: setProvenance = None") output = synapseutils.copy(syn, file_entity.id, destinationId=second_folder.id, setProvenance=None) assert_raises(SynapseHTTPError, syn.getProvenance, output[file_entity.id]) schedule_for_cleanup(output[file_entity.id]) print("Test: setProvenance = Existing") output_URL = synapseutils.copy(syn, externalURL_entity.id, destinationId=second_folder.id, setProvenance="existing") output_prov = syn.getProvenance(output_URL[externalURL_entity.id]) schedule_for_cleanup(output_URL[externalURL_entity.id]) assert output_prov['name'] == prov['name'] assert output_prov['used'] == prov['used'] if 'username' not in other_user or 'password' not in other_user: sys.stderr.write( '\nWarning: no test-authentication configured. skipping testing copy function when trying to copy file made by another user.\n' ) return try: print( "Test: Other user copy should result in different data file handle" ) syn_other = synapseclient.Synapse(skip_checks=True) syn_other.login(other_user['username'], other_user['password']) output = synapseutils.copy(syn_other, file_entity.id, destinationId=third_folder.id) new_copied_ent = syn.get(output[file_entity.id]) new_copied_ent_annot = syn.getAnnotations(new_copied_ent) schedule_for_cleanup(new_copied_ent.id) copied_URL_ent.externalURL = "https://www.google.com" copied_URL_ent = syn.store(copied_URL_ent) output = synapseutils.copy(syn_other, copied_URL_ent.id, destinationId=third_folder.id, version=1) new_copied_URL = syn.get(output[copied_URL_ent.id], downloadFile=False) schedule_for_cleanup(new_copied_URL.id) assert new_copied_ent_annot == annots assert new_copied_ent.dataFileHandleId != copied_ent.dataFileHandleId #Test if copying different versions gets you the correct file assert new_copied_URL.versionNumber == 1 assert new_copied_URL.externalURL == repo_url assert new_copied_URL.dataFileHandleId != copied_URL_ent.dataFileHandleId finally: syn_other.logout() # ------------------------------------ # TEST COPY LINKS # ------------------------------------ print("Test: Copy Links") second_file = utils.make_bogus_data_file() #schedule_for_cleanup(filename) second_file_entity = syn.store(File(second_file, parent=project_entity)) link_entity = Link(second_file_entity.id, parent=folder_entity.id) link_entity = syn.store(link_entity) #function under test uses queries which are eventually consistent but not immediately after creating the entities start_time = time.time() while syn.query("select id from entity where id=='%s'" % link_entity.id).get('totalNumberOfResults') <= 0: assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC) time.sleep(2) copied_link = synapseutils.copy(syn, link_entity.id, destinationId=second_folder.id) old = syn.get(link_entity.id, followLink=False) new = syn.get(copied_link[link_entity.id], followLink=False) assert old.linksTo['targetId'] == new.linksTo['targetId'] assert old.linksTo['targetVersionNumber'] == new.linksTo[ 'targetVersionNumber'] schedule_for_cleanup(second_file_entity.id) schedule_for_cleanup(link_entity.id) schedule_for_cleanup(copied_link[link_entity.id]) time.sleep(3) assert_raises(ValueError, synapseutils.copy, syn, link_entity.id, destinationId=second_folder.id) # ------------------------------------ # TEST COPY TABLE # ------------------------------------ second_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(second_project.id) print("Test: Copy Tables") cols = [ Column(name='n', columnType='DOUBLE', maximumSize=50), Column(name='c', columnType='STRING', maximumSize=50), Column(name='i', columnType='INTEGER') ] data = [[2.1, 'foo', 10], [2.2, 'bar', 20], [2.3, 'baz', 30]] schema = syn.store( Schema(name='Testing', columns=cols, parent=project_entity.id)) row_reference_set = syn.store( RowSet(columns=cols, schema=schema, rows=[Row(r) for r in data])) table_map = synapseutils.copy(syn, schema.id, destinationId=second_project.id) copied_table = syn.tableQuery('select * from %s' % table_map[schema.id]) rows = copied_table.asRowSet()['rows'] # TEST: Check if all values are the same for i, row in enumerate(rows): assert row['values'] == data[i] assert_raises(ValueError, synapseutils.copy, syn, schema.id, destinationId=second_project.id) schedule_for_cleanup(schema.id) schedule_for_cleanup(table_map[schema.id]) # ------------------------------------ # TEST COPY FOLDER # ------------------------------------ print("Test: Copy Folder") mapping = synapseutils.copy(syn, folder_entity.id, destinationId=second_project.id) for i in mapping: old = syn.get(i, downloadFile=False) new = syn.get(mapping[i], downloadFile=False) assert old.name == new.name assert old.annotations == new.annotations assert old.concreteType == new.concreteType assert_raises(ValueError, synapseutils.copy, syn, folder_entity.id, destinationId=second_project.id) # TEST: Throw error if excludeTypes isn't in file, link and table or isn't a list assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id, excludeTypes=["foo"]) assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id, excludeTypes="file") # TEST: excludeType = ["file"], only the folder is created second = synapseutils.copy(syn, second_folder.id, destinationId=second_project.id, excludeTypes=["file", "table", "link"]) copied_folder = syn.get(second[second_folder.id]) assert copied_folder.name == second_folder.name assert len(second) == 1 # TEST: Make sure error is thrown if foldername already exists start_time = time.time() while syn.query("select id from entity where id=='%s'" % copied_folder.id).get('totalNumberOfResults') <= 0: assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC) time.sleep(2) assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id) # ------------------------------------ # TEST COPY PROJECT # ------------------------------------ print("Test: Copy Project") third_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(third_project.id) mapping = synapseutils.copy(syn, project_entity.id, destinationId=third_project.id) for i in mapping: old = syn.get(i, downloadFile=False) new = syn.get(mapping[i], downloadFile=False) if not isinstance(old, Project): assert old.name == new.name assert old.annotations == new.annotations assert old.concreteType == new.concreteType # TEST: Can't copy project to a folder assert_raises(ValueError, synapseutils.copy, syn, project_entity.id, destinationId=second_folder.id)
def test_copy(): """Tests the copy function""" # Create a Project project_entity = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) # Create two Folders in Project folder_entity = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) second_folder = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) third_folder = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) schedule_for_cleanup(folder_entity.id) schedule_for_cleanup(second_folder.id) schedule_for_cleanup(third_folder.id) # Annotations and provenance repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient' annos = {'test': ['hello_world']} prov = Activity(name="test", used=repo_url) # Create, upload, and set annotations/provenance on a file in Folder filename = utils.make_bogus_data_file() schedule_for_cleanup(filename) file_entity = syn.store(File(filename, parent=folder_entity)) externalURL_entity = syn.store( File(repo_url, name='rand', parent=folder_entity, synapseStore=False)) syn.setAnnotations(file_entity, annos) syn.setAnnotations(externalURL_entity, annos) syn.setProvenance(externalURL_entity.id, prov) schedule_for_cleanup(file_entity.id) schedule_for_cleanup(externalURL_entity.id) # ------------------------------------ # TEST COPY FILE # ------------------------------------ output = synapseutils.copy(syn, file_entity.id, destinationId=project_entity.id) output_URL = synapseutils.copy(syn, externalURL_entity.id, destinationId=project_entity.id, skipCopyAnnotations=True) # Verify that our copied files are identical copied_ent = syn.get(output[file_entity.id]) copied_URL_ent = syn.get(output_URL[externalURL_entity.id], downloadFile=False) copied_ent_annot = syn.getAnnotations(copied_ent) copied_url_annot = syn.getAnnotations(copied_URL_ent) copied_prov = syn.getProvenance(copied_ent) copied_url_prov = syn.getProvenance(copied_URL_ent) schedule_for_cleanup(copied_ent.id) schedule_for_cleanup(copied_URL_ent.id) # TEST: set_Provenance = Traceback assert_equals(copied_prov['used'][0]['reference']['targetId'], file_entity.id) assert_equals(copied_url_prov['used'][0]['reference']['targetId'], externalURL_entity.id) # TEST: Make sure copied files are the same assert_equals(copied_ent_annot, annos) assert_equals(copied_ent.dataFileHandleId, file_entity.dataFileHandleId) # TEST: Make sure copied URLs are the same assert_equals(copied_url_annot, {}) assert_equals(copied_URL_ent.externalURL, repo_url) assert_equals(copied_URL_ent.name, 'rand') assert_equals(copied_URL_ent.dataFileHandleId, externalURL_entity.dataFileHandleId) # TEST: Throw error if file is copied to a folder/project that has a file with the same filename assert_raises(ValueError, synapseutils.copy, syn, project_entity.id, destinationId=project_entity.id) assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=project_entity.id) assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=third_folder.id, setProvenance="gib") assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=file_entity.id) # Test: setProvenance = None output = synapseutils.copy(syn, file_entity.id, destinationId=second_folder.id, setProvenance=None) assert_raises(SynapseHTTPError, syn.getProvenance, output[file_entity.id]) schedule_for_cleanup(output[file_entity.id]) # Test: setProvenance = Existing output_URL = synapseutils.copy(syn, externalURL_entity.id, destinationId=second_folder.id, setProvenance="existing") output_prov = syn.getProvenance(output_URL[externalURL_entity.id]) schedule_for_cleanup(output_URL[externalURL_entity.id]) assert_equals(output_prov['name'], prov['name']) assert_equals(output_prov['used'], prov['used']) # ------------------------------------ # TEST COPY LINKS # ------------------------------------ second_file = utils.make_bogus_data_file() # schedule_for_cleanup(filename) second_file_entity = syn.store(File(second_file, parent=project_entity)) link_entity = Link(second_file_entity.id, parent=folder_entity.id) link_entity = syn.store(link_entity) copied_link = synapseutils.copy(syn, link_entity.id, destinationId=second_folder.id) old = syn.get(link_entity.id, followLink=False) new = syn.get(copied_link[link_entity.id], followLink=False) assert_equals(old.linksTo['targetId'], new.linksTo['targetId']) schedule_for_cleanup(second_file_entity.id) schedule_for_cleanup(link_entity.id) schedule_for_cleanup(copied_link[link_entity.id]) time.sleep(3) assert_raises(ValueError, synapseutils.copy, syn, link_entity.id, destinationId=second_folder.id) # ------------------------------------ # TEST COPY TABLE # ------------------------------------ second_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(second_project.id) cols = [ Column(name='n', columnType='DOUBLE', maximumSize=50), Column(name='c', columnType='STRING', maximumSize=50), Column(name='i', columnType='INTEGER') ] data = [[2.1, 'foo', 10], [2.2, 'bar', 20], [2.3, 'baz', 30]] schema = syn.store( Schema(name='Testing', columns=cols, parent=project_entity.id)) syn.store(RowSet(schema=schema, rows=[Row(r) for r in data])) table_map = synapseutils.copy(syn, schema.id, destinationId=second_project.id) copied_table = syn.tableQuery('select * from %s' % table_map[schema.id]) rows = copied_table.asRowSet()['rows'] # TEST: Check if all values are the same for i, row in enumerate(rows): assert_equals(row['values'], data[i]) assert_raises(ValueError, synapseutils.copy, syn, schema.id, destinationId=second_project.id) schedule_for_cleanup(schema.id) schedule_for_cleanup(table_map[schema.id]) # ------------------------------------ # TEST COPY FOLDER # ------------------------------------ mapping = synapseutils.copy(syn, folder_entity.id, destinationId=second_project.id) for i in mapping: old = syn.get(i, downloadFile=False) new = syn.get(mapping[i], downloadFile=False) assert_equals(old.name, new.name) assert_equals(old.annotations, new.annotations) assert_equals(old.concreteType, new.concreteType) assert_raises(ValueError, synapseutils.copy, syn, folder_entity.id, destinationId=second_project.id) # TEST: Throw error if excludeTypes isn't in file, link and table or isn't a list assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id, excludeTypes=["foo"]) assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id, excludeTypes="file") # TEST: excludeType = ["file"], only the folder is created second = synapseutils.copy(syn, second_folder.id, destinationId=second_project.id, excludeTypes=["file", "table", "link"]) copied_folder = syn.get(second[second_folder.id]) assert_equals(copied_folder.name, second_folder.name) assert_equals(len(second), 1) # TEST: Make sure error is thrown if foldername already exists assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id) # ------------------------------------ # TEST COPY PROJECT # ------------------------------------ third_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(third_project.id) mapping = synapseutils.copy(syn, project_entity.id, destinationId=third_project.id) for i in mapping: old = syn.get(i, downloadFile=False) new = syn.get(mapping[i], downloadFile=False) if not isinstance(old, Project): assert_equals(old.name, new.name) assert_equals(old.annotations, new.annotations) assert_equals(old.concreteType, new.concreteType) # TEST: Can't copy project to a folder assert_raises(ValueError, synapseutils.copy, syn, project_entity.id, destinationId=second_folder.id)