def get_dataset_metadata(self): """ Get dict of additional metadata associated with the current dataset NOTE: In the case that multiple regions and/or datasets are supported, this function should return the metadata of the currently selected dataset only. If no particular dataset is selected, then all should be returned. :return: Instance of omsi.shared.metadata_data.metadata_dict """ return metadata_dict()
def get_metadata(self, key=None): """ Get dict with the full description of the metadata for the given key or all metadata if no key is given. :param key: The name of the metadata object to be retrieved. Default is None in which case all metadata will be retrieved. :returns: `omsi.shared.metadata_data.metadata_value` object if a key is given or a `omsi.shared.metadata_data.metadata_dict` with all metadata if key is set to None. :raises: KeyError is raised in case that the specified key does not exist """ descr_attr = omsi_format_metadata_collection.description_value_attribute unit_attr = omsi_format_metadata_collection.unit_value_attribute ontology_attr = omsi_format_metadata_collection.ontology_value_attribute isjson_attr = omsi_format_metadata_collection.is_json_dict_attribute if key is None: output_meta_dict = metadata_dict() for metadata_name, metadata_dataset in self.managed_group.iteritems(): unit = None if unit_attr not in metadata_dataset.attrs else metadata_dataset.attrs[unit_attr] description = None if descr_attr not in metadata_dataset.attrs else metadata_dataset.attrs[descr_attr] ontology = None if ontology_attr not in metadata_dataset.attrs else \ json.loads(metadata_dataset.attrs[ontology_attr]) md_value = metadata_dataset[:] if len(metadata_dataset.shape) > 0 else metadata_dataset[()] if isjson_attr in metadata_dataset.attrs: try: md_value = json.loads(md_value) except: log_helper.error(__name__, "Parsing of json metadata object failed for " + str(metadata_name)) output_meta_dict[metadata_name] = metadata_value( name=metadata_name, value=md_value, description=description, unit=unit, ontology=ontology) return output_meta_dict else: metadata_dataset = self.managed_group[key] unit = None if unit_attr not in metadata_dataset.attrs else metadata_dataset.attrs[unit_attr] description = None if descr_attr not in metadata_dataset.attrs else metadata_dataset.attrs[descr_attr] ontology = None if ontology_attr not in metadata_dataset.attrs else \ json.loads(metadata_dataset.attrs[ontology_attr]) return metadata_value( name=key, value=metadata_dataset[:], description=description, unit=unit, ontology=ontology)
def __create__(cls, parent_group, instrument_name=None, mzdata=None, flush_io=True): """ Create an instrument group and populate it with the given data. :param parent_group: The parent h5py group where the instrument group should be created in. :type parent_group. h5py.Group :param instrument_name: The name of the instrument :type instrument_name: string, None :param mzdata: Numpy array of the mz data values of the instrument :type mzdata: numpy array or None :param flush_io: Call flush on the HDF5 file to ensure all HDF5 bufferes are flushed so that all data has been written to file :returns: The function returns the h5py HDF5 handler to the instrument info group created for the experiment. """ if instrument_name is not None or mzdata is not None: all_meta = metadata_dict() if instrument_name is not None: all_meta[omsi_format_instrument.instrument_name] = \ metadata_value(value=instrument_name, name=omsi_format_instrument.instrument_name, description='Name of the instrument') if mzdata is not None: all_meta[omsi_format_instrument.instrument_name] = \ metadata_value(value=mzdata, name=omsi_format_instrument.instrument_mz_name, description='The global m/z axis for the recordings') else: all_meta = None # Initialize the group and populate the data using the create method of the parent class metadata_obj = omsi_file_metadata_collection.___create___( parent_group=parent_group, group_name=omsi_format_instrument.instrument_groupname, metadata=all_meta, type_attr_value="omsi_file_instrument", version_attr_value=omsi_format_instrument.current_version, flush_io=flush_io) if flush_io: parent_group.file.flush() return omsi_file_instrument.__create_instrument_info___(instrument_group=metadata_obj.managed_group)
def __parse_scan_parameters(self): """ Internal helper function used to parse out scan parameters from the scan filter string """ # precursor m/z # example scan filter: MS2: ITMS + p MALDI Z ms2 [email protected] [500.00-700.00] ## example scan filter: MS1: FTMS + p MALDI Full ms [850.00-1000.00] scan_params = [] for scan_idx, scantype in enumerate(self.scan_types): #MSnValueOfN n = filter(None, re.findall('(?<=ms)\d*', scantype)) if n: MSnValueOfN = int(n[0]) else: MSnValueOfN = 1 #precursor ms2pre = filter(None, re.findall('[\d.]+(?=@)', scantype)) if ms2pre: ms2_precursor = float(ms2pre[0]) else: ms2_precursor = None #dissociation type dissot = filter(None, re.findall('(?<=\d@)[A-z]*', scantype)) if dissot: dissociationtype = dissot[0] else: dissociationtype = 'None' #dissociation energy dissoe = filter(None, re.findall('(?<='+dissociationtype+')'+'[\d.]+', scantype)) if dissoe: dissociationenergy = float(dissoe[0]) else: dissociationenergy = None #polarity pol = filter(None, re.findall('([+][ ]p)', scantype)) if pol: polarity = 'pos' else: pol = filter(None, re.findall('([-][ ]p)', scantype)) if pol: polarity = 'neg' else: polarity = 'unk' #put all params in dictionary paramdict = metadata_dict() msn_von_ontology = METADATA_ONTOLOGIES['msn_value_of_n'] paramdict['msn_value_of_n'] = metadata_value(name='msn_value_of_n', value=MSnValueOfN, unit=msn_von_ontology['unit'], description=msn_von_ontology['description'], ontology=msn_von_ontology) if dissociationenergy: paramdict['dissociation_energy'] = metadata_value(name='dissociation_energy', value=dissociationenergy, unit='V', description='Dissociation energy') if ms2_precursor is not None: paramdict['msn_precursor_mz'] = metadata_value(name='msn_precursor_mz', value=ms2_precursor, unit='m/z', description='The precursor m/z value') paramdict['dissociation_type'] = metadata_value(name='dissociation_type', value=dissociationtype, unit=None, description='Dissociation type') polarity_ontology = METADATA_ONTOLOGIES['polarity'] paramdict['polarity'] = metadata_value(name='polarity', value=polarity, unit=polarity_ontology['unit'], description=polarity_ontology['description'], ontology=polarity_ontology) scan_params.append(paramdict) return scan_params
def __parse_scan_parameters(self): """ Internal helper function used to parse out scan parameters from the scan filter string """ # precursor m/z # example scan filter: MS2: ITMS + p MALDI Z ms2 [email protected] [500.00-700.00] ## example scan filter: MS1: FTMS + p MALDI Full ms [850.00-1000.00] scan_params = [] for scan_idx, scantype in enumerate(self.scan_types): #MSnValueOfN n = filter(None, re.findall('(?<=ms)\d*', scantype)) if n: MSnValueOfN = int(n[0]) else: MSnValueOfN = 1 #precursor ms2pre = filter(None, re.findall('[\d.]+(?=@)', scantype)) if ms2pre: ms2_precursor = float(ms2pre[0]) else: ms2_precursor = None #dissociation type dissot = filter(None, re.findall('(?<=\d@)[A-z]*', scantype)) if dissot: dissociationtype = dissot[0] else: dissociationtype = 'None' #dissociation energy dissoe = filter( None, re.findall('(?<=' + dissociationtype + ')' + '[\d.]+', scantype)) if dissoe: dissociationenergy = float(dissoe[0]) else: dissociationenergy = None #polarity pol = filter(None, re.findall('([+][ ]p)', scantype)) if pol: polarity = 'pos' else: pol = filter(None, re.findall('([-][ ]p)', scantype)) if pol: polarity = 'neg' else: polarity = 'unk' #put all params in dictionary paramdict = metadata_dict() msn_von_ontology = METADATA_ONTOLOGIES['msn_value_of_n'] paramdict['msn_value_of_n'] = metadata_value( name='msn_value_of_n', value=MSnValueOfN, unit=msn_von_ontology['unit'], description=msn_von_ontology['description'], ontology=msn_von_ontology) if dissociationenergy: paramdict['dissociation_energy'] = metadata_value( name='dissociation_energy', value=dissociationenergy, unit='V', description='Dissociation energy') if ms2_precursor is not None: paramdict['msn_precursor_mz'] = metadata_value( name='msn_precursor_mz', value=ms2_precursor, unit='m/z', description='The precursor m/z value') paramdict['dissociation_type'] = metadata_value( name='dissociation_type', value=dissociationtype, unit=None, description='Dissociation type') polarity_ontology = METADATA_ONTOLOGIES['polarity'] paramdict['polarity'] = metadata_value( name='polarity', value=polarity, unit=polarity_ontology['unit'], description=polarity_ontology['description'], ontology=polarity_ontology) scan_params.append(paramdict) return scan_params
def __compute_file_info(cls, filename, resolution): ## TODO completely refactor this to make it smartly handle profile or centroid datasets ## TODO: centroid datasets should take in a user parameter "Resolution" and resample data at that resolution ## TODO: profile datasets should work as is ## TODO: checks for profile data vs. centroid data on the variation in length of ['m/z array'] """ Internal helper function used to compute the mz axis, data type for the intensities, format type :return: Numpy array with mz axis :return: string with data type :return: imzml file type :return: """ reader = ImzMLParser(filename) # Read the first spectrum mz_axes, intens = reader.getspectrum(0) # NOTE: mz_axes is a tuple # Read the coordinates coordinates = np.asarray(reader.coordinates) # Determine the data type for the internsity values dtype = np.asarray(intens).dtype.str # Compute the mz axis and file type file_type = cls.available_imzml_types['continuous'] min_mz, max_mz = np.amin(mz_axes), np.amax(mz_axes) for ind in range(coordinates.shape[0] ): #for ind, loc in enumerate(reader.coordinates): mz, intens = reader.getspectrum(ind) if mz == mz_axes: pass else: file_type = cls.available_imzml_types['processed'] if min_mz > np.amin(mz): min_mz = np.amin(mz) if max_mz < np.amax(mz): max_mz = np.amax(mz) # Reinterpolate the mz-axis if we have a processed mode imzml file if file_type == cls.available_imzml_types['processed']: f = np.ceil(1e6 * np.log(max_mz / min_mz) / resolution) mz_axes = np.logspace(np.log10(min_mz), np.log10(max_mz), f) log_helper.info( __name__, "Reinterpolated m/z axis for processed imzML file") # Construct the imzml metadata information dataset_metadata = metadata_dict() instrument_metadata = metadata_dict() method_metadata = metadata_dict() for k, v in reader.imzmldict.iteritems(): dataset_metadata[k] = metadata_value(name=k, value=v, unit=None, description=k, ontology=None) # Delete the parser and read the metadata del reader # Parse the metadata for the file. We try to parse only the header and ignore the # <run > group in the XML file to avoid going throught the whole file again # while extracting the majority of the relevant metadata try: with open(filename, 'r') as ins: metdata_header = '' for line in ins: if '<run' in line: break else: metdata_header += line metdata_header += '</mzML>' metdata_header_dict = xmltodict.parse(metdata_header)['mzML'] for k, v in metdata_header_dict.iteritems(): store_value = metadata_value( name=k, value=v, unit=None, description=str(k) + " extracted from imzML XML header.", ontology=None) if k == 'instrumentConfigurationList': instrument_metadata[k] = store_value elif k == 'dataProcessingList': method_metadata[k] = store_value elif k == 'scanSettingsList': dataset_metadata[k] = store_value elif k == 'softwareList': method_metadata[k] = store_value elif k == 'sampleList': method_metadata[k] = store_value else: dataset_metadata[k] = store_value dataset_metadata['imzml_xml_metadata_header'] = metadata_value( name='imzml_xml_metadata_header', value=metdata_header, unit=None, description='XML imzML header', ontology=None) except: log_helper.warning( __name__, "Extraction of additional imzML metadata failed") return coordinates, np.asarray( mz_axes ), dtype, file_type, dataset_metadata, instrument_metadata, method_metadata
def __compute_file_info(cls, filename, resolution): ## TODO completely refactor this to make it smartly handle profile or centroid datasets ## TODO: centroid datasets should take in a user parameter "Resolution" and resample data at that resolution ## TODO: profile datasets should work as is ## TODO: checks for profile data vs. centroid data on the variation in length of ['m/z array'] """ Internal helper function used to compute the mz axis, data type for the intensities, format type :return: Numpy array with mz axis :return: string with data type :return: imzml file type :return: """ reader = ImzMLParser(filename) # Read the first spectrum mz_axes, intens = reader.getspectrum(0) # NOTE: mz_axes is a tuple # Read the coordinates coordinates = np.asarray(reader.coordinates) # #Start the data at [0,0,0] # coordinates[:,0] = coordinates[:,0] - np.amin(coordinates,axis=0)[0] # coordinates[:,1] = coordinates[:,1] - np.amin(coordinates,axis=0)[1] # coordinates[:,2] = coordinates[:,2] - np.amin(coordinates,axis=0)[2] # Determine the data type for the internsity values dtype = np.asarray(intens).dtype.str # Compute the mz axis and file type file_type = cls.available_imzml_types['continuous'] min_mz, max_mz = np.amin(mz_axes), np.amax(mz_axes) for ind in range(coordinates.shape[0]): #for ind, loc in enumerate(reader.coordinates): mz, intens = reader.getspectrum(ind) if mz == mz_axes: pass else: file_type = cls.available_imzml_types['processed'] if min_mz > np.amin(mz): min_mz = np.amin(mz) if max_mz < np.amax(mz): max_mz = np.amax(mz) # Reinterpolate the mz-axis if we have a processed mode imzml file if file_type == cls.available_imzml_types['processed']: f = np.ceil(1e6 * np.log(max_mz/min_mz)/resolution) mz_axes = np.logspace(np.log10(min_mz), np.log10(max_mz), f) log_helper.info(__name__, "Reinterpolated m/z axis for processed imzML file") # Construct the imzml metadata information dataset_metadata = metadata_dict() instrument_metadata = metadata_dict() method_metadata = metadata_dict() for k, v in reader.imzmldict.iteritems(): dataset_metadata[k] = metadata_value(name=k, value=v, unit=None, description=k, ontology=None) # Delete the parser and read the metadata del reader # Parse the metadata for the file. We try to parse only the header and ignore the # <run > group in the XML file to avoid going throught the whole file again # while extracting the majority of the relevant metadata try: with open(filename, 'r') as ins: metdata_header = '' for line in ins: if '<run' in line: break else: metdata_header += line metdata_header += '</mzML>' metdata_header_dict = xmltodict.parse(metdata_header)['mzML'] for k, v in metdata_header_dict.iteritems(): store_value = metadata_value(name=k, value=v, unit=None, description=str(k) + " extracted from imzML XML header.", ontology=None) if k == 'instrumentConfigurationList': instrument_metadata[k] = store_value elif k == 'dataProcessingList': method_metadata[k] = store_value elif k == 'scanSettingsList': dataset_metadata[k] = store_value elif k == 'softwareList': method_metadata[k] = store_value elif k =='sampleList': method_metadata[k] = store_value else: dataset_metadata[k] = store_value dataset_metadata['imzml_xml_metadata_header'] = metadata_value(name='imzml_xml_metadata_header', value=metdata_header, unit=None, description='XML imzML header', ontology=None) except: log_helper.warning(__name__, "Extraction of additional imzML metadata failed") return coordinates, np.asarray(mz_axes), dtype, file_type, dataset_metadata, instrument_metadata, method_metadata