def testMapAlignerAlgorithmPoseClustering(): """ @tests: MapAlignmentAlgorithmPoseClustering.__init__ MapAlignmentAlgorithmPoseClustering.alignFeatureMaps MapAlignmentAlgorithmPoseClustering.alignPeakMaps MapAlignmentAlgorithmPoseClustering.fitModel MapAlignmentAlgorithmPoseClustering.getDefaults MapAlignmentAlgorithmPoseClustering.getName MapAlignmentAlgorithmPoseClustering.getParameters MapAlignmentAlgorithmPoseClustering.setName MapAlignmentAlgorithmPoseClustering.setParameters MapAlignmentAlgorithmPoseClustering.setReference MapAlignmentAlgorithmPoseClustering.transformFeatureMaps MapAlignmentAlgorithmPoseClustering.transformPeakMaps """ ma = pyopenms.MapAlignmentAlgorithmPoseClustering() assert isinstance(ma.getDefaults(), pyopenms.Param) assert isinstance(ma.getParameters(), pyopenms.Param) assert isinstance(ma.getName(), str) ma.setReference(0, "") ma.alignFeatureMaps ma.alignPeakMaps pyopenms.MapAlignmentAlgorithmPoseClustering.transformPeakMaps pyopenms.MapAlignmentAlgorithmPoseClustering.transformFeatureMaps
def getDefaultParameters(): model_param = getModelDefaults("linear") algo_param = pms.MapAlignmentAlgorithmPoseClustering().getParameters() default = pms.Param() default.insert("model:", model_param) default.insert("algorithm:", algo_param) return default
def align(in_files, out_files, out_trafos, reference_index, reference_file, params): in_types = set(pms.FileHandler.getType(in_) for in_ in in_files) if in_types <= set((pms.Type.MZML, pms.Type.MZXML, pms.Type.MZDATA)): align_features = False elif in_types == set((pms.Type.FEATUREXML, )): align_features = True else: raise Exception("different kinds of input files") algorithm = pms.MapAlignmentAlgorithmPoseClustering() alignment_params = params.copy("algorithm:", True) algorithm.setParameters(alignment_params) algorithm.setLogType(pms.LogType.CMD) plog = pms.ProgressLogger() plog.setLogType(pms.LogType.CMD) if reference_file: file_ = reference_file elif reference_index > 0: file_ = in_files[reference_index - 1] else: sizes = [] if align_features: fh = pms.FeatureXMLFile() plog.startProgress(0, len(in_files), "Determine Reference map") for i, in_f in enumerate(in_files): sizes.append((fh.loadSize(in_f), in_f)) plog.setProgress(i) else: fh = pms.MzMLFile() mse = pms.MSExperiment() plog.startProgress(0, len(in_files), "Determine Reference map") for i, in_f in enumerate(in_files): fh.load(in_f, mse) mse.updateRanges() sizes.append((mse.getSize(), in_f)) plog.setProgress(i) plog.endProgress() __, file_ = max(sizes) f_fmxl = pms.FeatureXMLFile() if not out_files: options = f_fmxl.getOptions() options.setLoadConvexHull(False) options.setLoadSubordinates(False) f_fmxl.setOptions(options) if align_features: map_ref = pms.FeatureMap() f_fxml_tmp = pms.FeatureXMLFile() options = f_fmxl.getOptions() options.setLoadConvexHull(False) options.setLoadSubordinates(False) f_fxml_tmp.setOptions(options) f_fxml_tmp.load(file_, map_ref) algorithm.setReference(map_ref) else: map_ref = pms.MSExperiment() pms.MzMLFile().load(file_, map_ref) algorithm.setReference(map_ref) plog.startProgress(0, len(in_files), "Align input maps") for i, in_file in enumerate(in_files): trafo = pms.TransformationDescription() if align_features: map_ = pms.FeatureMap() f_fxml_tmp = pms.FeatureXMLFile() f_fxml_tmp.setOptions(f_fmxl.getOptions()) f_fxml_tmp.load(in_file, map_) if in_file == file_: trafo.fitModel("identity") else: algorithm.align(map_, trafo) if out_files: pms.MapAlignmentTransformer.transformSingleFeatureMap( map_, trafo) addDataProcessing(map_, params, pms.ProcessingAction.ALIGNMENT) f_fxml_tmp.store(out_files[i], map_) else: map_ = pms.MSExperiment() pms.MzMLFile().load(in_file, map_) if in_file == file_: trafo.fitModel("identity") else: algorithm.align(map_, trafo) if out_files: pms.MapAlignmentTransformer.transformSinglePeakMap(map_, trafo) addDataProcessing(map_, params, pms.ProcessingAction.ALIGNMENT) pms.MzMLFile().store(out_files[i], map_) if out_trafos: pms.TransformationXMLFile().store(out_trafos[i], trafo) plog.setProgress(i + 1) plog.endProgress()
def align(in_files, out_files, trafo_out_files, reference_index, reference_file, params): algo = pms.MapAlignmentAlgorithmPoseClustering() algo.setReference(reference_index, reference_file) model_params = params.copy("model:", True) model_type = model_params.getValue("type").toString() pl = pms.ProgressLogger() pl.setLogType(pms.LogType.CMD) alignment_param = params.copy("algorithm:", True) algo.setParameters(alignment_param) transformations = [] in_types = set(pms.FileHandler.getType(in_file) for in_file in in_files) in_maps = [] if in_types <= set((pms.Type.MZML, pms.Type.MZXML, pms.Type.MZDATA)): fh = pms.FileHandler() pl.startProgress(0, len(in_files), "loading input files") for i, in_file in enumerate(in_files): pl.setProgress(i) pm = pms.MSExperiment() fh.loadExperiment(in_file, pm) in_maps.append(pm) pl.endProgress() algo.alignPeakMaps(in_maps, transformations) if model_type != "none": algo.fitModel(model_type, model_params, transformations) pms.MapAlignmentAlgorithmPoseClustering.transformPeakMaps(in_maps, transformations) pl.startProgress(0, len(out_files), "writing output files") for i, out_file in enumerate(out_files): pl.setProgress(i) in_map = addDataProcessing(in_maps[i], params) fh.storeExperiment(out_file, in_map) pl.endProgress() elif in_types == set((pms.Type.FEATUREXML,)): fh = pms.FeatureXMLFile() pl.startProgress(0, len(in_files), "loading input files") for i, in_file in enumerate(in_files): pl.setProgress(i) pm = pms.FeatureMap() fh.load(in_file, pm) in_maps.append(pm) pl.endProgress() algo.alignFeatureMaps(in_maps, transformations) if model_type != "none": algo.fitModel(model_type, model_params, transformations) pms.MapAlignmentAlgorithmPoseClustering.transformFeatureMaps(in_maps, transformations) pl.startProgress(0, len(out_files), "writing output files") for i, out_file in enumerate(out_files): pl.setProgress(i) in_map = addDataProcessing(in_maps[i], params) fh.store(out_file, in_map) pl.endProgress() else: raise Exception("can not handle input file format") if trafo_out_files: for name, trafo in zip(trafo_out_files, transformations): pms.TransformationXMLFile().store(name, trafo)
def rtAlign(tables, refTable = None, destination = None, nPeaks=-1, numBreakpoints=5, maxRtDifference = 100, maxMzDifference = 0.3, maxMzDifferencePairfinder = 0.5, forceAlign=False): """ aligns feature tables in respect to retention times. the algorithm produces new tables with aligned data. **input tables including the assiciatoted peakmap(s) are not modified**. Parameters: - *nPeaks*: max number of peaks matched by superimposer, -1 means: all peaks - *maxRtDifference*: max allowed difference in rt values for searching matching features. - *maxMzDifference*: max allowed difference in mz values for super imposer. - *maxMzDifferencePairfinder*: max allowed difference in mz values for pair finding. - *numBreakpoints*: number of break points of fitted spline. default:5, more points result in splines with higher variation. - *forceAlign*: has to be *True* to align already rt aligned tables. - *refTable*: extra reference table, if *None* the table with most features among *tables* is taken. """ import os.path import pyopenms import copy from libms.DataStructures.Table import toOpenMSFeatureMap, Table import custom_dialogs assert refTable is None or isinstance(refTable, Table) assert destination is None or isinstance(destination, basestring) for table in tables: # collect all maps maps = set(table.peakmap.values) assert len(maps) == 1, "can only align features from one single peakmap" map = maps.pop() assert map != None, "None value for peakmaps not allowed" if forceAlign: map.meta["rt_aligned"]=False else: if map.meta.get("rt_aligned"): message = "there are already rt_aligned peakmaps in the "\ "tables.\nyou have to provide the forceAlign "\ "parameter of this function\nto align all tables" raise Exception(message) assert isinstance(table, Table), "non table object in tables" table.requireColumn("mz"), "need mz column for alignment" table.requireColumn("rt"), "need rt column for alignment" if destination is None: destination = custom_dialogs.askForDirectory() if destination is None: print "aborted" return if refTable is not None: maps = set(refTable.peakmap.values) assert len(maps) == 1, "can only align features from one single peakmap" map = maps.pop() assert map != None, "None value for peakmaps not allowed" refTable.requireColumn("mz"), "need mz column in reftable" refTable.requireColumn("rt"), "need rt column in reftable" assert os.path.isdir(os.path.abspath(destination)), "target is no directory" # setup algorithm algo = pyopenms.MapAlignmentAlgorithmPoseClustering() algo.setLogType(pyopenms.LogType.CMD) ap = algo.getDefaults() ap["max_num_peaks_considered"] = nPeaks ap["superimposer:num_used_points"] = nPeaks ap["superimposer:mz_pair_max_distance"] = float(maxMzDifferencePairfinder) ap["pairfinder:distance_RT:max_difference"] = float(maxRtDifference) ap["pairfinder:distance_MZ:max_difference"] = float(maxMzDifference) ap["pairfinder:distance_MZ:unit"] = "Da" algo.setParameters(ap) # convert to pyOpenMS types and find map with max num features which # is taken as refamp: fms = [ (toOpenMSFeatureMap(table), table) for table in tables] if refTable is None: refMap, refTable = max(fms, key=lambda (fm, t): fm.size()) print print "REFMAP IS", print os.path.basename(refTable.meta.get("source","<noname>")) else: if refTable in tables: refMap = fms[tables.index(refTable)][0] else: refMap = toOpenMSFeatureMap(refTable) results = [] for fm, table in fms: # we do not modify existing table inkl. peakmaps: (rt-values # might change below in _transformTable) ! table = copy.deepcopy(table) if fm is refMap: results.append(table) continue sources = set(table.source.values) assert len(sources)==1, "multiple sources in table" source = sources.pop() filename = os.path.basename(source) print print "ALIGN FEATURES FROM ", filename print transformation = _computeTransformation(algo, refMap, fm, numBreakpoints) _plot_and_save(transformation, filename, destination) _transformTable(table, transformation) results.append(table) for t in results: t.meta["rt_aligned"] = True return results
def __init__(self, **kwargs): super(MAEntity, self).__init__( oms.MapAlignmentAlgorithmPoseClustering(), **kwargs, )
def align_feature_xmls(feature_xml_lis, consensus_map_out_path="", class_label_dict={}): """ first apply pose clustering to include all features maps next link/group them across all features Each MS1 spectrum from raw-file will create a feature file - we need to load and align them to get unique and representative features :param feature_xml_lis: :param consensus_map_out_path: :return: consensus_map, consensus_map_out_path, measurement_names """ # do consensus map normalization and export - # can't hack normalization together from lack of example usage and poor signature # - no normalization implemented # openms won't deal with posix paths - wants to have strings instead # need to make sure it get's those # let's sort them to make sure feature matrix is also sorted feature_xml_lis = sorted([str(fx) for fx in feature_xml_lis]) num_features_list = [] for current_feature_xml_path in feature_xml_lis: # load features into FeatureMaps cm = oms.FeatureMap() # current_map oms.FeatureXMLFile().load(current_feature_xml_path, cm) # list_functions(current_map, prefix="") num_features_list.append(cm.size()) del cm # should choose the feature file / experiment with most features as reference max_index = np.argmax(num_features_list) reference_map_path = feature_xml_lis[max_index] default_max_num_peaks_considered = 1000 default_max_scaling_value = 10.0 aligned_paths = [] for i, current_feature_xml_path in enumerate(feature_xml_lis): # load features into FeatureMaps reference_map = oms.FeatureMap( ) # pairwise alignment - so need master map - oms.FeatureXMLFile().load(reference_map_path, reference_map) current_map = oms.FeatureMap() oms.FeatureXMLFile().load(current_feature_xml_path, current_map) # create a transformation description required as init for aligner transformation_description = oms.TransformationDescription() # adjust max scaling parameter otherwise leads to error when running with algae samples # adjust max num peaks to 2k - also would leads to error when running with algae samples aligner = oms.MapAlignmentAlgorithmPoseClustering() aligner_params = aligner.getParameters() # print(aligner_params.asDict().keys()) max_scaling_key = b'superimposer:max_scaling' # aligner_params.getEntry(max_scaling_key) aligner_params.setValue(max_scaling_key, default_max_scaling_value) max_num_peaks_key = b'max_num_peaks_considered' # aligner_params.getEntry(max_num_peaks_key) aligner_params.setValue( max_num_peaks_key, default_max_num_peaks_considered) # default = 1000 # need higher default for algae # decrease runtime by removing weak signals # print(aligner_params.asDict()) num_used_points_key = b'superimposer:num_used_points' # aligner_params.getEntry(num_used_points_key) aligner_params.setValue( num_used_points_key, 1000) # half the default parameter, speed up alignment aligner.setParameters(aligner_params) aligner.setReference(reference_map) try: # run alignment aligner.align(current_map, transformation_description) except RuntimeError as re: if 'max_num_peaks_considered' in str(re): # retry with higher threshold - required for algae dataset default_max_num_peaks_considered = 15000 # 15 fold - makes it a lot slower but less error prone aligner_params.setValue(max_num_peaks_key, default_max_num_peaks_considered) default_max_scaling_value = 20.0 # need to increase to 20 aligner_params.setValue(max_scaling_key, default_max_scaling_value) # max shift could also be off - issue for ckd dataset default_max_shift_value = 2000.0 # need to increase from 1000 to 2000 max_shift_key = b'superimposer:max_shift' aligner_params.setValue(max_shift_key, default_max_shift_value) print( f"Encountered GC/MS Clustering issue - setting 'max_num_peaks_considered' to {default_max_num_peaks_considered}, 'superimposer:max_scaling' to {default_max_scaling_value} and 'superimposer:max_shift' to {default_max_shift_value}" ) aligner.setParameters(aligner_params) aligner.setReference(reference_map) aligner.align(current_map, transformation_description) current_map.updateRanges() reference_map.updateRanges() # update feature XML files - both reference and current updated_current_map_path = default_store_aligned_feature_xml( current_map, current_feature_xml_path) updated_reference_path = default_store_aligned_feature_xml( reference_map, reference_map_path) reference_map_path = updated_reference_path aligned_paths.append(updated_current_map_path) print(f"Finished alignment of {i}/{len(feature_xml_lis)-1}") # also replace here with new reference we updated the reference map to aligned_paths[max_index] = reference_map_path # link/group them across features to create consensus map grouper = oms.FeatureGroupingAlgorithmUnlabeled() # leave parameters default # according to openms documentation: # b) Call "setReference", "addToGroup" (n times), "getResultMap" in that order. for i, current_feature_map_path in enumerate(aligned_paths): print(f"Grouping features {i}/{len(aligned_paths)-1}") current_map = oms.FeatureMap() oms.FeatureXMLFile().load(current_feature_map_path, current_map) if not i: # first iteration - use as reference grouper.setReference(i, current_map) else: grouper.addToGroup(i, current_map) # get consensus map consensus_map = grouper.getResultMap() # consensus map requires some mapping between ids and filenames - otherwise will complain print(f"Mapping aligned results back to class labels") class_label_fns = list(class_label_dict.keys()) fds = {i: oms.ColumnHeader() for i, _ in enumerate(aligned_paths)} measurement_names = [] for i, aligned_path in enumerate(aligned_paths): # fds[i].filename = b"file0" current_fn = f"{str(Path(aligned_path).stem)}{str(Path(aligned_path).suffix)}" # this is where we need to replace the feature_xml filenames with the ones from class_labels if class_label_dict: # could do longest substring match with each of the fns in class_label dict to find matching filename # django will rename duplicate filenames instead of overwriting # or we expect both featureXML input and class_label_dict to be ordered - which they should be when using the getter fds[i].filename = class_label_fns[i] else: fds[i].filename = current_fn.encode( "UTF8") # needs bytestring representation measurement_names.append(current_fn) consensus_map.setColumnHeaders(fds) # cleanup aligned_feature_xmls - can be >30mb per file - so better remove them for ap in aligned_paths: os.remove(ap) # do consensus map normalization and export to consensus files # using median normalization, also available are Quantile and "robust regression" normalizer = oms.ConsensusMapNormalizerAlgorithmMedian() # ConsensusMapNormalizerAlgorithmMedian # signature of class is more than incomplete ... *args **kwargs for required parameters is not the best implementation choice... # but gives TypeError requiring int when calling with # normalizer.normalizeMaps(consensus_map, "NM_SCALE", "", "") # """ normalizer.normalizeMaps(map, method, acc_filter, desc_filter) map ConsensusMap method whether to use scaling or shifting to same median acc_filter string describing the regular expression for filtering accessions desc_filter string describing the regular expression for filtering descriptions """ """ method: probably 0 / 1 - referenced as Enumerator in OpenMS documentation from shell output can deduce normalization methods are 0: NM_SCALE scale to same median using division/multiplication 1: NM_SHIFT shift using subtraction/addition """ normalizer.normalizeMaps(consensus_map, 0, "", "") # don't export if not required - requires more file management # now export if consensus_map_out_path: print("Storing consensus xml") oms.ConsensusXMLFile().store(str(consensus_map_out_path), consensus_map) return consensus_map, measurement_names
def choose_ma_algorithm(self, **kwargs): #create map alignment algorithm self.ma_algorithm = oms.MapAlignmentAlgorithmPoseClustering()