def test_entity_view_add_annotation_columns(syn, project, schedule_for_cleanup): folder1 = syn.store( Folder(name=str(uuid.uuid4()) + 'test_entity_view_add_annotation_columns_proj1', parent=project, annotations={ 'strAnno': 'str1', 'intAnno': 1, 'floatAnno': 1.1 })) folder2 = syn.store( Folder(name=str(uuid.uuid4()) + 'test_entity_view_add_annotation_columns_proj2', parent=project, annotations={ 'dateAnno': datetime.now(), 'strAnno': 'str2', 'intAnno': 2 })) schedule_for_cleanup(folder1) schedule_for_cleanup(folder2) scopeIds = [utils.id_of(folder1), utils.id_of(folder2)] # This test is to ensure that user code which use the deprecated field `type` continue to work # TODO: remove this test case in Synapse Python client 2.0 entity_view = EntityViewSchema(name=str(uuid.uuid4()), scopeIds=scopeIds, addDefaultViewColumns=False, addAnnotationColumns=True, type='project', parent=project) syn.store(entity_view) # This test is to ensure that user code which use the deprecated field `type` continue to work # TODO: remove this test case in Synapse Python client 2.0 entity_view = EntityViewSchema(name=str(uuid.uuid4()), scopeIds=scopeIds, addDefaultViewColumns=False, addAnnotationColumns=True, type='file', includeEntityTypes=[EntityViewType.PROJECT], parent=project) syn.store(entity_view) entity_view = EntityViewSchema(name=str(uuid.uuid4()), scopeIds=scopeIds, addDefaultViewColumns=False, addAnnotationColumns=True, includeEntityTypes=[EntityViewType.PROJECT], parent=project) syn.store(entity_view)
def _view_setup(cls): # set up a file view folder = syn.store( Folder(name="PartialRowTestFolder" + str(uuid.uuid4()), parent=project)) syn.store( File("~/path/doesnt/matter", name="f1", parent=folder, synapseStore=False)) syn.store( File("~/path/doesnt/matter/again", name="f2", parent=folder, synapseStore=False)) cols = [ Column(name='foo', columnType='STRING', maximumSize=1000), Column(name='bar', columnType='STRING') ] return syn.store( EntityViewSchema(name='PartialRowTestViews' + str(uuid.uuid4()), columns=cols, addDefaultViewColumns=False, parent=project, scopes=[folder]))
def test_entity_view_add_annotation_columns(): folder1 = syn.store( Folder(name=str(uuid.uuid4()) + 'test_entity_view_add_annotation_columns_proj1', parent=project, annotations={ 'strAnno': 'str1', 'intAnno': 1, 'floatAnno': 1.1 })) folder2 = syn.store( Folder(name=str(uuid.uuid4()) + 'test_entity_view_add_annotation_columns_proj2', parent=project, annotations={ 'dateAnno': datetime.now(), 'strAnno': 'str2', 'intAnno': 2 })) schedule_for_cleanup(folder1) schedule_for_cleanup(folder2) scopeIds = [utils.id_of(folder1), utils.id_of(folder2)] entity_view = EntityViewSchema(name=str(uuid.uuid4()), scopeIds=scopeIds, addDefaultViewColumns=False, addAnnotationColumns=True, type='project', parent=project) syn.store(entity_view)
def test_table_file_view_csv_update_annotations__includeEntityEtag(): folder = syn.store( synapseclient.Folder(name="updateAnnoFolder" + str(uuid.uuid4()), parent=project)) anno1_name = "annotationColumn1" anno2_name = "annotationColumn2" initial_annotations = { anno1_name: "initial_value1", anno2_name: "initial_value2" } file_entity = syn.store( File(name= "test_table_file_view_csv_update_annotations__includeEntityEtag", path="~/fakepath", synapseStore=False, parent=folder, annotations=initial_annotations)) annotation_columns = [ Column(name=anno1_name, columnType='STRING'), Column(name=anno2_name, columnType='STRING') ] entity_view = syn.store( EntityViewSchema(name="TestEntityViewSchemaUpdateAnnotation" + str(uuid.uuid4()), parent=project, scopes=[folder], columns=annotation_columns)) query_str = "SELECT {anno1}, {anno2} FROM {proj_id}".format( anno1=anno1_name, anno2=anno2_name, proj_id=utils.id_of(entity_view)) #modify first annotation using rowset rowset_query_result = syn.tableQuery(query_str, resultsAs="rowset") rowset = rowset_query_result.asRowSet() rowset_changed_anno_value = "rowset_value_change" rowset.rows[0].values[0] = rowset_changed_anno_value syn.store(rowset) #modify second annotation using csv csv_query_result = syn.tableQuery(query_str, resultsAs="csv") dataframe = csv_query_result.asDataFrame() csv_changed_anno_value = "csv_value_change" dataframe.ix[0, anno2_name] = csv_changed_anno_value syn.store(Table(utils.id_of(entity_view), dataframe)) #check annotations in the file entity. Annotations may not be immediately updated so we wait in while loop expected_annotations = { anno1_name: [rowset_changed_anno_value], anno2_name: [csv_changed_anno_value] } start_time = time.time() while (expected_annotations != file_entity.annotations): assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC) time.sleep(2) file_entity = syn.get(file_entity, downloadFile=False)
def __init__(self, datasetId: str, synapse: Synapse, name: str = None, temporary: bool = True, parentId: str = None) -> None: """Create a file view scoped to a dataset folder. Args: datasetId (str): Synapse ID for a dataset folder/project. synapse (Synapse): Used for Synapse requests. name (str): Name of the file view (temporary or not). temporary (bool): Whether to delete the file view on exit of either a 'with' statement or Python entirely. parentId (str, optional): Synapse ID specifying where to store the file view. Defaults to datasetId. """ self.datasetId = datasetId self.synapse = synapse self.is_temporary = temporary if name is None: self.name = f"schematic annotation file view for {self.datasetId}" if self.is_temporary: uid = secrets.token_urlsafe(5) self.name = f"{self.name} - UID {uid}" # TODO: Allow a DCC admin to configure a "universal parent" # Such as a Synapse project writeable by everyone. self.parentId = datasetId if parentId is None else parentId # TODO: Create local sharing setting to hide from everyone else view_schema = EntityViewSchema( name=self.name, parent=self.parentId, scopes=self.datasetId, includeEntityTypes=[EntityViewType.FILE, EntityViewType.FOLDER], addDefaultViewColumns=False, addAnnotationColumns=True) # TODO: Handle failure due to insufficient permissions by # creating a temporary new project to store view self.view_schema = self.synapse.store(view_schema) # These are filled in after calling `self.query()` self.results = None self.table = None # Ensure deletion of the file view (last resort) if self.is_temporary: atexit.register(self.delete)
def get_or_create_view(self, **kwargs) -> EntityViewSchema: """Gets an existing view schema by name and parent or creates a new one. Args: Same arguments as synapseclient.EntityViewSchema Returns: A synapseclient.EntityViewSchema. """ view = EntityViewSchema(**kwargs) view = self._find_by_obj_or_create(view) self.logger.info('{} View {} ({})'.format(self._update_str, view.name, view.id)) return view
def test_create_and_update_file_view(syn, project, schedule_for_cleanup): # Create a folder folder = Folder(str(uuid.uuid4()), parent=project, description='creating a file-view') folder = syn.store(folder) # Create dummy file with annotations in our folder path = utils.make_bogus_data_file() file_annotations = dict(fileFormat='jpg', dataType='image', artist='Banksy', medium='print', title='Girl With Ballon') schedule_for_cleanup(path) a_file = File(path, parent=folder, annotations=file_annotations) a_file = syn.store(a_file) schedule_for_cleanup(a_file) # Add new columns for the annotations on this file and get their IDs my_added_cols = [ syn.store(Column(name=k, columnType="STRING")) for k in file_annotations.keys() ] my_added_cols_ids = [c['id'] for c in my_added_cols] view_default_ids = [ c['id'] for c in syn._get_default_view_columns( "entityview", EntityViewType.FILE.value) ] col_ids = my_added_cols_ids + view_default_ids scopeIds = [folder['id'].lstrip('syn')] # Create an empty entity-view with defined scope as folder entity_view = EntityViewSchema(name=str(uuid.uuid4()), scopeIds=scopeIds, addDefaultViewColumns=True, addAnnotationColumns=False, type='file', columns=my_added_cols, parent=project) entity_view = syn.store(entity_view) schedule_for_cleanup(entity_view) assert set(scopeIds) == set(entity_view.scopeIds) assert set(col_ids) == set(entity_view.columnIds) assert EntityViewType.FILE.value == entity_view.viewTypeMask # get the current view-schema view = syn.tableQuery("select * from %s" % entity_view.id) schedule_for_cleanup(view.filepath) view_dict = list( csv.DictReader(io.open(view.filepath, encoding="utf-8", newline=''))) # check that all of the annotations were retrieved from the view assert set(file_annotations.keys()).issubset(set(view_dict[0].keys())) updated_a_file = syn.get(a_file.id, downloadFile=False) # Check that the values are the same as what was set # Both in the view and on the entity itself for k, v in file_annotations.items(): assert view_dict[0][k] == v assert updated_a_file.annotations[k][0] == v # Make a change to the view and store view_dict[0]['fileFormat'] = 'PNG' with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as temp: schedule_for_cleanup(temp.name) temp_filename = temp.name with io.open(temp_filename, mode='w', encoding="utf-8", newline='') as temp_file: dw = csv.DictWriter(temp_file, fieldnames=view_dict[0].keys(), quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep)) dw.writeheader() dw.writerows(view_dict) temp_file.flush() syn.store(Table(entity_view.id, temp_filename)) new_view_dict = list( csv.DictReader(io.open(temp_filename, encoding="utf-8", newline=''))) assert new_view_dict[0]['fileFormat'] == 'PNG' # query for the change start_time = time.time() new_view_results = syn.tableQuery("select * from %s" % entity_view.id) schedule_for_cleanup(new_view_results.filepath) new_view_dict = list( csv.DictReader( io.open(new_view_results.filepath, encoding="utf-8", newline=''))) # query until change is seen. while new_view_dict[0]['fileFormat'] != 'PNG': # check timeout assert time.time() - start_time < QUERY_TIMEOUT_SEC # query again new_view_results = syn.tableQuery("select * from %s" % entity_view.id) new_view_dict = list( csv.DictReader( io.open(new_view_results.filepath, encoding="utf-8", newline=''))) # paranoid check assert new_view_dict[0]['fileFormat'] == 'PNG'
def partial_rowset_test_state(syn, project): cols = [ Column(name='foo', columnType='INTEGER'), Column(name='bar', columnType='INTEGER') ] table_schema = syn.store( Schema(name='PartialRowTest' + str(uuid.uuid4()), columns=cols, parent=project)) data = [[1, None], [None, 2]] syn.store(RowSet(schema=table_schema, rows=[Row(r) for r in data])) # set up a file view folder = syn.store( Folder(name="PartialRowTestFolder" + str(uuid.uuid4()), parent=project)) syn.store( File("~/path/doesnt/matter", name="f1", parent=folder, synapseStore=False)) syn.store( File("~/path/doesnt/matter/again", name="f2", parent=folder, synapseStore=False)) cols = [ Column(name='foo', columnType='INTEGER'), Column(name='bar', columnType='INTEGER') ] view_schema = syn.store( EntityViewSchema(name='PartialRowTestViews' + str(uuid.uuid4()), columns=cols, addDefaultViewColumns=False, parent=project, scopes=[folder])) table_changes = [{'foo': 4}, {'bar': 5}] view_changes = [{'bar': 6}, {'foo': 7}] expected_table_cells = pd.DataFrame({ 'foo': [4.0, float('NaN')], 'bar': [float('NaN'), 5.0] }) expected_view_cells = pd.DataFrame({ 'foo': [float('NaN'), 7.0], 'bar': [6.0, float('NaN')] }) class TestState: def __init__(self): self.syn = syn self.project = project self.table_schema = table_schema self.view_schema = view_schema self.table_changes = table_changes self.view_changes = view_changes self.expected_table_cells = expected_table_cells self.expected_view_cells = expected_view_cells return TestState()
def test_entity_view_add_annotation_columns(): folder1 = syn.store( Folder(name=str(uuid.uuid4()) + 'test_entity_view_add_annotation_columns_proj1', parent=project, annotations={ 'strAnno': 'str1', 'intAnno': 1, 'floatAnno': 1.1 })) folder2 = syn.store( Folder(name=str(uuid.uuid4()) + 'test_entity_view_add_annotation_columns_proj2', parent=project, annotations={ 'dateAnno': datetime.now(), 'strAnno': 'str2', 'intAnno': 2 })) schedule_for_cleanup(folder1) schedule_for_cleanup(folder2) scopeIds = [utils.id_of(folder1), utils.id_of(folder2)] entity_view = EntityViewSchema(name=str(uuid.uuid4()), scopeIds=scopeIds, addDefaultViewColumns=False, addAnnotationColumns=True, type='project', parent=project) assert_true(entity_view['addAnnotationColumns']) #For some reason this call is eventually consistent but not immediately consistent. so we just wait till the size returned is correct expected_column_types = { 'dateAnno': 'DATE', 'intAnno': 'INTEGER', 'strAnno': 'STRING', 'floatAnno': 'DOUBLE' } columns = syn._get_annotation_entity_view_columns(scopeIds, 'project') start_time = time.time() while len(columns) != len(expected_column_types): assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC) columns = syn._get_annotation_entity_view_columns(scopeIds, 'project') time.sleep(2) entity_view = syn.store(entity_view) assert_false(entity_view['addAnnotationColumns']) view_column_types = { column['name']: column['columnType'] for column in syn.getColumns(entity_view) } assert_dict_equal(expected_column_types, view_column_types) #add another annotation to the project and make sure that EntityViewSchema only adds one moe column folder1['anotherAnnotation'] = 'I need healing!' folder1 = syn.store(folder1) prev_columns = list(entity_view.columnIds) # sometimes annotation columns are not immediately updated so we wait for it to update in a loop start_time = time.time() while len(entity_view.columnIds) != len(prev_columns) + 1: assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC) entity_view.addAnnotationColumns = True entity_view = syn.store(entity_view) expected_column_types.update({'anotherAnnotation': 'STRING'}) view_column_types = { column['name']: column['columnType'] for column in syn.getColumns(entity_view) } assert_dict_equal(expected_column_types, view_column_types)
def test_entity_view_add_annotation_columns(): proj1 = syn.store( Project(name=str(uuid.uuid4()) + 'test_entity_view_add_annotation_columns_proj1', annotations={ 'strAnno': 'str1', 'intAnno': 1, 'floatAnno': 1.1 })) proj2 = syn.store( Project(name=str(uuid.uuid4()) + 'test_entity_view_add_annotation_columns_proj2', annotations={ 'dateAnno': datetime.now(), 'strAnno': 'str2', 'intAnno': 2 })) schedule_for_cleanup(proj1) schedule_for_cleanup(proj2) scopeIds = [utils.id_of(proj1), utils.id_of(proj2)] entity_view = EntityViewSchema(name=str(uuid.uuid4()), scopeIds=scopeIds, addDefaultViewColumns=False, addAnnotationColumns=True, type='project', parent=project) assert_true(entity_view['addAnnotationColumns']) #For some reason this call is eventually consistent but not immediately consistent. so we just wait till the size returned is correct expected_column_types = { 'dateAnno': 'DATE', 'intAnno': 'INTEGER', 'strAnno': 'STRING', 'floatAnno': 'DOUBLE', 'concreteType': 'STRING' } columns = syn._get_annotation_entity_view_columns(scopeIds, 'project') while len(columns) != len(expected_column_types): columns = syn._get_annotation_entity_view_columns(scopeIds, 'project') time.sleep(2) entity_view = syn.store(entity_view) assert_false(entity_view['addAnnotationColumns']) view_column_types = { column['name']: column['columnType'] for column in syn.getColumns(entity_view.columnIds) } assert_dict_equal(expected_column_types, view_column_types) #add another annotation to the project and make sure that EntityViewSchema only adds one moe column proj1['anotherAnnotation'] = 'I need healing!' proj1 = syn.store(proj1) entity_view.addAnnotationColumns = True entity_view = syn.store(entity_view) expected_column_types.update({'anotherAnnotation': 'STRING'}) view_column_types = { column['name']: column['columnType'] for column in syn.getColumns(entity_view.columnIds) } assert_dict_equal(expected_column_types, view_column_types)
def _ensure_syt_view(self): """ Ensure the syt table/view exists for the project. """ try: # This will fail if the schema doesn't exist. This is a synapseclient bug. self._syt_view = self._synapse_client.get(EntityViewSchema( name=self.SYT_VIEW_NAME, parent=self._project), downloadFile=False) except: pass if self._syt_view == None: evs = EntityViewSchema(name=self.SYT_VIEW_NAME, parent=self._project, scopes=[self._project], properties={'viewTypeMask': 9}) # Delete the 'type' property so we can set our own viewTypeMask to Files and Folders. evs.pop('type') # Since we removed 'type' we have to manually populate the base columns. evs.addColumn(Column(name='id', columnType='ENTITYID')) evs.addColumn(Column(name='parentId', columnType='ENTITYID')) evs.addColumn(Column(name='projectId', columnType='ENTITYID')) evs.addColumn(Column(name='type', columnType='STRING')) evs.addColumn( Column(name='name', columnType='STRING', maximumSize=256)) evs.addColumn( Column(name=self.ANNO_CHECKED_OUT_BY_ID, columnType='STRING')) evs.addColumn( Column(name=self.ANNO_CHECKED_OUT_BY_NAME, columnType='STRING')) evs.addColumn( Column(name=self.ANNO_CHECKED_OUT_DATE, columnType='DATE')) self._syt_view = self._synapse_client.store(evs)