def tag_using_seperator(self, tag: str, separator: str, value_position: int): """Tag an experiment raw data using file name and separator Parameters ---------- tag The name (or key) of the tag to add to the data separator The character used as a separator in the filename (ex: _) value_position Position of the value to extract with respect to the separators """ self.set_tag(tag, False) _rawdataset = RawDataSet(self.metadata.rawdataset) for i in range(_rawdataset.size()): _rawdata = _rawdataset.get(i) basename = os.path.splitext(os.path.basename( _rawdata.metadata.uri))[0] splited_name = basename.split(separator) value = '' if len(splited_name) > value_position: value = splited_name[value_position] _rawdata.set_tag(tag, value)
def set_tag(self, tag: str, add_to_data: bool = True): """Add a tag key to the experiment It add the tag key (if not already exists) to the experiment metadata and also add a tag key to all the images Parameters ---------- tag Tag key to be added add_to_data if True add an empty tag to all the data in the RawDataSet """ # add the tag to the experiment if tag not in self.metadata.tags: self.metadata.tags.append(tag) self.write() if add_to_data: raw_dataset = RawDataSet(self.metadata.rawdataset) for i in range(int(raw_dataset.size())): raw_data = raw_dataset.get(i) if tag not in raw_data.metadata.tags: raw_data.set_tag(tag, '')
def test_write_rawdataset(self): raw_data = RawDataSet(self.tst_dataset_file) raw_data.metadata = create_dataset() raw_data.write() self.assertTrue( filecmp.cmp(self.tst_dataset_file, self.ref_dataset_file, shallow=False))
def tag_from_name(self, tag: str, values: list): """Tag an experiment raw data using file name Parameters ---------- tag The name (or key) of the tag to add to the data values List of possible values for the tag to find in the filename """ self.set_tag(tag, False) _rawdataset = RawDataSet(self.metadata.rawdataset) for i in range(_rawdataset.size()): _rawdata = _rawdataset.get(i) for value in values: if value in _rawdata.metadata.name: _rawdata.set_tag(tag, value) break
def get_data( self, dataset_name: str, query: str, origin_output_name: str = '' ) -> list: """query a specific dataset of an experiment In this version only AND queries are supported (ex: tag1=value1 AND tag2=value2) and performed on the data set named dataset Parameters ---------- dataset_name Name of the dataset to query query String query with the key=value format. origin_output_name Name of the ouput origin (ex: -o) in the case of ProcessedDataset search Returns ------- list List of selected data (md.json files urls are returned) """ # search the dataset raw_dataset = RawDataSet(self.metadata.rawdataset) if raw_dataset.metadata.name == dataset_name: return raw_dataset.get_data(query) else: for i in range(len(self.metadata.processeddatasets)): processeddataset = ProcessedDataSet( self.metadata.processeddatasets[i]) if processeddataset.metadata.name == dataset_name: return processeddataset.get_data(query, origin_output_name) raise MetadataQueryError('Query dataset ', dataset_name, ' not found')
def get_dataset(self, name: str): """Get the metadata of a dataset Returns a RawDataset or a ProcessedDataSet Parameters ---------- name Name of the dataset """ if name == 'data': return RawDataSet(self.metadata.rawdataset) else: for dataset_name in self.metadata.processeddatasets: pdataset = ProcessedDataSet(dataset_name) if pdataset.metadata.name == name: return pdataset return None
def test_read_rawdataset(self): raw_dataset_read = RawDataSet(self.ref_dataset_file) raw_data_ref_metadata = create_dataset() self.assertEqual(raw_dataset_read.metadata.serialize(), raw_data_ref_metadata.serialize())