def main(self): #after path_parsing method we have self.src_full_name_list for f in get_list_full_names(self.src): print("Map Alignment implementation") print("Source file:", f) # to prepare(init) empty list and entity; self.init_entity(**self.kw) self.reference_map = oms.FeatureMap() self.toAlign_map = oms.FeatureMap() oms.FeatureXMLFile().load(self.reference_file, self.reference_map) oms.FeatureXMLFile().load(f, self.toAlign_map) #Set reference_map file self.ma.entity.setReference(self.reference_map) #3rd step create object for the computed transformation transformation = oms.TransformationDescription() # the 4rd step: self.ma.entity.align(self.toAlign_map, transformation) # the 5th step: is store result into file; self.dst_full_file_name = os.path.join(self.dst,\ convert_src_to_dst_file_name(f, self.dst, self.suffix_dst_files, self.ext_dst_files) ) #print("dst=",dst_full_file_name) oms.FeatureXMLFile().store(self.dst_full_file_name, self.toAlign_map) oms.FeatureXMLFile().store(self.dst_full_file_name, self.reference_map) print("Aligned data stored into:", self.dst_full_file_name)
def detect_peaks_gcms_centroid(ms_experiment, parameters, debug=False): """ Applicable to centroided experiments, also see https://abibuilder.informatik.uni-tuebingen.de/archive/openms/Documentation/nightly/html/a16103.html :param ms_experiment: :param parameters: :return: """ print(f"Detecting peaks with {GCMSPeakDetectionMethod.CENTROIDED}") ff = oms.FeatureFinder() if not debug: ff.setLogType(oms.LogType.NONE) else: ff.setLogType(oms.LogType.CMD) # Run the feature finder name = "centroided" pdm_name = GCMSPeakDetectionMethod.CENTROIDED.name parameters['detection_mode'] = name parameters['pdm_name'] = pdm_name # name = parameters['detection_mode'] features = oms.FeatureMap() seeds = oms.FeatureMap() ff_params = oms.FeatureFinder().getParameters(name) ff.run(name, ms_experiment, features, ff_params, seeds) # features.setUniqueIds() features.ensureUniqueId() fh = oms.FeatureXMLFile() feature_storage_path = f"{parameters['input_filename']}_output.featureXML" fh.store(feature_storage_path, features) parameters['feature_storage'] = feature_storage_path print("Found", features.size(), "features") return parameters
def main(self): #after path_parsing method we have self.src_full_name_list print("FeatureFindingMetabo implementation") for f in get_list_full_names(self.src): print("Source file:", f) # to prepare(init) empty list and entity; self.init_entity(**self.kw) input_map = oms.PeakMap() # the 1st step: load map; fm = oms.FeatureMap() oms.MzMLFile().load(f, input_map) # the 2nd step: apply_ffm; self.mtd.entity.run(input_map, self.output_mt) self.epd.entity.detectPeaks(self.output_mt, self.splitted_mt) self.ffm.entity.run(self.splitted_mt, fm, self.filtered_mt) # the 3d step: is store result into file; dst_full_file_name = os.path.join(self.dst,\ convert_src_to_dst_file_name(f, self.dst, self.suffix_dst_files, self.ext_dst_files) ) oms.FeatureXMLFile().store(dst_full_file_name, fm) print("Centroided data stored into:", dst_full_file_name)
def load_feature_maps(self, **kwargs): self.reference = oms.FeatureMap() self.toAlign = oms.FeatureMap() self.xml_file = oms.FeatureXMLFile() self.xml_file.load(self.input_fm_1, self.reference) self.xml_file.load(self.input_fm_2, self.toAlign)
def main(options): out = options.outfile chromat_in = options.infile traml_in = options.traml_in trafo_in = options.trafo_in pp = pyopenms.MRMTransitionGroupPicker() metabolomics = False # this is an important weight for RT-deviation -- the larger the value, the less importance will be given to exact RT matches # for proteomics data it tends to be a good idea to set it to the length of # the RT space (e.g. for 100 second RT space, set it to 100) rt_normalization_factor = 100.0 pp_params = pp.getDefaults(); pp_params.setValue("PeakPickerMRM:remove_overlapping_peaks", options.remove_overlapping_peaks, '') pp_params.setValue("PeakPickerMRM:method", options.method, '') if (metabolomics): # Need to change those for metabolomics and very short peaks! pp_params.setValue("PeakPickerMRM:signal_to_noise", 0.01, '') pp_params.setValue("PeakPickerMRM:peak_width", 0.1, '') pp_params.setValue("PeakPickerMRM:gauss_width", 0.1, '') pp_params.setValue("resample_boundary", 0.05, '') pp_params.setValue("compute_peak_quality", "true", '') pp.setParameters(pp_params) scorer = pyopenms.MRMFeatureFinderScoring() scoring_params = scorer.getDefaults(); # Only report the top 5 features scoring_params.setValue("stop_report_after_feature", 5, '') scoring_params.setValue("rt_normalization_factor", rt_normalization_factor, '') scorer.setParameters(scoring_params); chromatograms = pyopenms.MSExperiment() fh = pyopenms.FileHandler() fh.loadExperiment(chromat_in, chromatograms) targeted = pyopenms.TargetedExperiment(); tramlfile = pyopenms.TraMLFile(); tramlfile.load(traml_in, targeted); trafoxml = pyopenms.TransformationXMLFile() trafo = pyopenms.TransformationDescription() if trafo_in is not None: model_params = pyopenms.Param() model_params.setValue("symmetric_regression", "false", "", []) model_type = "linear" trafoxml.load(trafo_in, trafo, True) trafo.fitModel(model_type, model_params); light_targeted = pyopenms.LightTargetedExperiment(); pyopenms.OpenSwathDataAccessHelper().convertTargetedExp(targeted, light_targeted) output = algorithm(chromatograms, light_targeted, pp, scorer, trafo) pyopenms.FeatureXMLFile().store(out, output);
def run_featurefinder_centroided(input_map, params, seeds, out_path): ff = pms.FeatureFinder() ff.setLogType(pms.LogType.CMD) features = pms.FeatureMap() name = pms.FeatureFinderAlgorithmPicked.getProductName() ff.run(name, input_map, features, params, seeds) fh = pms.FeatureXMLFile() fh.store(out_path, features)
def default_store_feature_xml(feature_map, parameters): """ Store feature xml file in fedault location with default name - standaradized manner :param feature_map: :param parameters: :return: updated parameters with 'feature_storage' set to path """ fh = oms.FeatureXMLFile() feature_xml_outname = f"{parameters['input_filename']}{get_default_feature_xml_storage_suffix()}" fh.store(feature_xml_outname, feature_map) parameters['feature_storage'] = feature_xml_outname return parameters
def collect_convex_hulls(self): self.convex_hulls = [] # opening featureXML xml_file = oms.FeatureXMLFile() self.fmap = oms.FeatureMap() xml_file.load(self.feature_xml_fname, self.fmap) feature_mzs = [] for i, fe in enumerate(self.fmap): feature_mzs.append([i, fe.getMZ()]) feature_mzs = np.array(feature_mzs) feature_mzs = feature_mzs[feature_mzs[:, 1].argsort(), :] # looking up the example features in the featureXML self.examples_oms_features = {} for ex, (mz_t, mz_m) in self.peaks_peaks.items(): i_mz_fe = lookup.find(feature_mzs[:, 1], mz_m, t=10) if i_mz_fe: self.examples_oms_features[feature_mzs[i_mz_fe, 0]] = ex # collecting convex hulls for ife, fe in enumerate(self.fmap): if ife in self.examples_oms_features: hull_list = fe.getConvexHulls() self.extend_hulls(hull_list, ife, 0) subord_feature = fe.getSubordinates() if subord_feature: for subfe in subord_feature: hull_list = subfe.getConvexHulls() self.extend_hulls(hull_list, ife, 1) # columns: rt, mz, feature index, hull index, is sub-feature self.convex_hulls = np.vstack(self.convex_hulls) self.oms_feature_mzs = feature_mzs[feature_mzs[:, 0].argsort(), :]
def id_mapper(in_file, id_file, out_file, params, use_centroid_rt, use_centroid_mz, use_subelements): in_type = pms.FileHandler.getType(in_file) protein_ids = [] peptide_ids = [] pms.IdXMLFile().load(id_file, protein_ids, peptide_ids) mapper = pms.IDMapper() mapper.setParameters(params) if in_type == pms.Type.CONSENSUSXML: file_ = pms.ConsensusXMLFile() map_ = pms.ConsensusMap() file_.load(in_file, map_) mapper.annotate(map_, peptide_ids, protein_ids, use_subelements) addDataProcessing( map_, params, pms.DataProcessing.ProcessingAction.IDENTIFICATION_MAPPING) file_.store(out_file, map_) elif in_type == pms.Type.FEATUREXML: file_ = pms.FeatureXMLFile() map_ = pms.FeatureMap() file_.load(in_file, map_) mapper.annotate(map_, peptide_ids, protein_ids, use_centroid_rt, use_centroid_mz) addDataProcessing( map_, params, pms.DataProcessing.ProcessingAction.IDENTIFICATION_MAPPING) file_.store(out_file, map_) elif in_type == pms.Type.MZQ: file_ = pms.MzQuantMLFile() msq = pms.MSQuantifications() file_.load(in_file, msq) maps = msq.getConsensusMaps() for map_ in maps: mapper.annotate(map_, peptide_ids, protein_ids, use_subelements) addDataProcessing( map_, params, pms.DataProcessing.ProcessingAction.IDENTIFICATION_MAPPING) msq.setConsensusMaps(maps) file_.store(out_file, msq) else: raise Exception("invalid input file format")
def default_store_aligned_feature_xml(feature_map, original_path): """ Store feature xml file in default location with default aligned name :param feature_map: :param parameters: :return: updated parameters with 'feature_storage' set to path """ fh = oms.FeatureXMLFile() storage_suffix = get_default_feature_xml_storage_suffix('aligned') if str(original_path).endswith(storage_suffix): # overwrite - probably writing the reference feature map feature_xml_outname = original_path else: feature_xml_outname = f"{Path(original_path).parent}/{Path(original_path).stem}{storage_suffix}" fh.store(feature_xml_outname, feature_map) return feature_xml_outname
def testFeatureXMLFile(): """ @tests: FeatureXMLFile.__init__ FeatureXMLFile.load FeatureXMLFile.store FileHandler.__init__ FileHandler.loadFeatures """ fm = pyopenms.FeatureMap() fm.setUniqueIds() fh = pyopenms.FeatureXMLFile() fh.store("test.featureXML", fm) fh.load("test.featureXML", fm) fh = pyopenms.FileHandler() fh.loadFeatures("test.featureXML", fm)
def parse_featureXML_GT(feature_file): featuremap = pyopenms.FeatureMap() featurexml = pyopenms.FeatureXMLFile() featurexml.load(feature_file, featuremap) hulls = pd.DataFrame( columns=['rt_min', 'rt_max', 'mz_min', 'mz_max', 'detected', 'pic_id']) for i in range(featuremap.size()): feature = featuremap[i] chs = feature.getConvexHulls() for j in range(len(chs)): pts = chs[j].getHullPoints() hulls.loc[len(hulls)] = [ pts.min(0)[0], pts.max(0)[0], pts.min(0)[1], pts.max(0)[1], False, -1 ] return hulls
def detect_peaks_gcms_isotopewavelet(ms_experiment, parameters, debug=False): """ Use isotop wavelet to process raw data - can perform poorly on centroided data - also see https://abibuilder.informatik.uni-tuebingen.de/archive/openms/Documentation/nightly/html/a16105.html TODO use "corrected" intensity_type :param ms_experiment: :param parameters: :return: """ print(f"Detecting peaks with {GCMSPeakDetectionMethod.ISOTOPEWAVELET}") ff = oms.FeatureFinder() if not debug: ff.setLogType(oms.LogType.NONE) else: ff.setLogType(oms.LogType.CMD) # Run the feature finder name = "isotope_wavelet" pdm_name = GCMSPeakDetectionMethod.ISOTOPEWAVELET.name parameters['detection_mode'] = name parameters['pdm_name'] = pdm_name # name = parameters['detection_mode'] features = oms.FeatureMap() seeds = oms.FeatureMap() ff_params = ff.getParameters(name) # complains about "the extremal length of the wavelet is larger (47661) than the number of data points" # wavelet_length is defined by mz_cutoff / min_spacing # hr_data must be true if high-resolution data (orbitrap, FTICR) # hr_data parameter for isotopewavelet function is_hr_data = parameters.get("hr_data", False) if is_hr_data: hr_key = b"hr_data" # hr_data takes extremely long - >= 2h per measurement of (!)32MB - there are way larger spectra... ff_params.setValue(hr_key, b"true") ff.run(name, ms_experiment, features, ff_params, seeds) features.setUniqueIds() fh = oms.FeatureXMLFile() feature_storage_path = f"{parameters['input_filename']}_output.featureXML" fh.store(feature_storage_path, features) parameters['feature_storage'] = feature_storage_path print("Found", features.size(), "features") return parameters
def FeatureFindingMetabo(mzfile, noise_threshold_int, snr): finder = 'C:/Program Files/OpenMS/bin/FeatureFinderMetabo.exe' feature_file = 'tmp.featureXML' noise_threshold_int = noise_threshold_int / snr subprocess.call([finder, '-in', mzfile, '-out', feature_file, '-algorithm:common:noise_threshold_int', f'{noise_threshold_int}', '-algorithm:common:chrom_peak_snr', f'{snr}', '-algorithm:common:chrom_fwhm', '10', '-algorithm:mtd:mass_error_ppm', '20', '-algorithm:mtd:reestimate_mt_sd', 'true', '-algorithm:mtd:min_sample_rate', '0', '-algorithm:mtd:min_trace_length', '2', '-algorithm:epd:width_filtering', 'off', '-algorithm:ffm:charge_lower_bound', '1', '-algorithm:ffm:charge_lower_bound', '5']) featuremap = pyopenms.FeatureMap() featurexml = pyopenms.FeatureXMLFile() featurexml.load(feature_file, featuremap) os.remove(feature_file) return featuremap
def run_featurefinder_centroided(input_path, params, seeds, out_path): fh = pms.MzMLFile() options = pms.PeakFileOptions() options.setMSLevels([1,1]) fh.setOptions(options) input_map = pms.MSExperiment() fh.load(input_path, input_map) input_map.updateRanges() ff = pms.FeatureFinder() ff.setLogType(pms.LogType.CMD) features = pms.FeatureMap() name = pms.FeatureFinderAlgorithmPicked.getProductName() ff.run(name, input_map, features, params, seeds) features.setUniqueIds() addDataProcessing(features, params, pms.ProcessingAction.QUANTITATION) fh = pms.FeatureXMLFile() fh.store(out_path, features)
def extract_data(in_file_name, mz_search): xml_file = oms.FeatureXMLFile() fmap = oms.FeatureMap() xml_file.load(in_file_name, fmap) delta = 0.01 #print( "FeatureMap size=", fmap.size() ) for n in fmap: #if mz_search == n.getMZ(): if abs(mz_search - n.getMZ()) < delta: """ print( "mz=", n.getMZ(), "rt=", n.getRT(), " intensity=", n. getIntensity(), "width=", n.getWidth(), "charge=", n. getCharge() ) """ hull_list = n.getConvexHulls() #getConvexHull() return ConvexHull2D; for hull in hull_list: hull_points = hull.getHullPoints() # hull_points is numpy.ndarray; #print( "hull_points.size=", hull_points.size ) for p in hull_points: print( p[0], p[1] ) subord_feature = n.getSubordinates() if subord_feature: #print("getSubordinates:") for f in subord_feature: hull_list = f.getConvexHulls() for hull in hull_list: hull_points = hull.getHullPoints() # hull_points is numpy.ndarray; #print( "hull_points.size=", hull_points.size ) for p in hull_points: print( p[0], p[1] ) else: continue
def main(options): out = options.outfile chromat_in = options.infile traml_in = options.traml_in pp = pyopenms.MRMTransitionGroupPicker() pp_params = pp.getDefaults() pp_params.setValue("PeakPickerMRM:remove_overlapping_peaks", options.remove_overlapping_peaks, '') pp_params.setValue("PeakPickerMRM:method", options.method, '') pp.setParameters(pp_params) chromatograms = pyopenms.MSExperiment() fh = pyopenms.FileHandler() fh.loadExperiment(chromat_in, chromatograms) targeted = pyopenms.TargetedExperiment() tramlfile = pyopenms.TraMLFile() tramlfile.load(traml_in, targeted) output = algorithm(chromatograms, targeted, pp) pyopenms.FeatureXMLFile().store(out, output)
def main(options): starttime = time.time() tolerance = float(options.tolerance) precursorshift = float(options.precursorshift) mswindow = float(options.mswindow) rtwindow = float(options.rtwindow) # load file timed("loading experiment", starttime) exp = glyxtoolms.lib.openOpenMSExperiment(options.infile) # assure sorted spectra exp.sortSpectra() timed("sort spectra", starttime) ms1, ms2 = sortSpectra(exp) links = [] noresult = 0 timed("finding precursors " + str(len(ms2)), starttime) for mz, spec2, spec1 in ms2: charge = spec2.getPrecursors()[0].getCharge() if charge == 0: charge = 1 peaks, results = findMonoIsotopicPeak(mz, charge, spec1, tolerance, precursorshift, mswindow) if len(results) == 0: noresult += 1 continue best = min(results, key=lambda r: r["mass"] * r["error"] / r["sum"]**2) rt = spec2.getRT() link = Link(rt, best) link.rt1 = spec1.getRT() link.nativeId = spec1.getNativeID() link.peaks = peaks links.append(link) print "could not find suitable starting pattern for ", noresult, "spectra from ", len( ms2) timed("group precursors", starttime) # group precursors for l1 in links: for l2 in links: if l1.charge != l2.charge: continue if abs(l1.mz - l2.mz) > tolerance: continue if abs(l1.rt - l2.rt) > rtwindow: continue l1.near.add(l2) timed("group into features", starttime) # group into features features = [] while True: # find link without feature working = set() for link in links: if link.feature == None: working.add(link) break if len(working) == 0: break feature = Feature() features.append(feature) while len(working) > 0: current = working.pop() current.feature = feature feature.ms2.append(current) for link in current.near: if link.feature == None: working.add(link) timed("calculate precursor positions", starttime) # calculate mz and charge of feature for feature in features: feature.mz = sum([l.mz for l in feature.ms2]) / len(feature.ms2) feature.charge = feature.ms2[0].charge # get lowest error feature.error = min([l.result["error"] for l in feature.ms2]) feature.extendRTDomain(ms1, tolerance) # calculate dimensions masses = [l.result["x"][-1] for l in feature.ms2] feature.mzLow = feature.mz feature.mzHigh = max(masses) timed("merge features", starttime) # check features against each other # remove feature if mzLow is within other feature and rtLow and rtHigh within too todelete = set() for f1 in features: for f2 in features: if f1 == f2: continue if abs(f1.mz - f2.mz) > tolerance: continue if not (f1.mzLow <= f2.mzLow <= f1.mzHigh): continue if not (f1.rtLow <= f2.rtLow <= f1.rtHigh): continue if not (f1.rtLow <= f2.rtHigh <= f1.rtHigh): continue todelete.add(f2) for f in todelete: features.remove(f) newfeatures = set() # merge features for f1 in features: if f1.rtHigh == 0.0: print "zero", "f1", [ms.rt for ms in f1.ms2] for f2 in features: if f1 == f2: continue if abs(f1.mzLow - f2.mzLow) > tolerance: continue if f1.charge != f2.charge: continue if not ((f1.rtLow <= f2.rtLow <= f1.rtHigh) or abs(f2.rtLow - f1.rtHigh) < 30): continue todelete.add(f1) todelete.add(f2) # create new feature newf = Feature() newf.error = min((f1.error, f2.error)) newf.mz = min((f1.mz, f2.mz)) newf.mzLow = min((f1.mzLow, f2.mzLow)) newf.mzHigh = max((f1.mzHigh, f2.mzHigh)) newf.rtLow = min((f1.rtLow, f2.rtLow)) newf.rtHigh = max((f1.rtHigh, f2.rtHigh)) if newf.rtHigh == 0.0: print "zero", "newf" newf.ms2 = sorted(set(f1.ms2).union(set(f2.ms2)), key=lambda ms: ms.rt) newf.rt = (f1.rt + f2.rt) / 2.0 newfeatures.add(newf) features += list(newfeatures) timed("write featuremap", starttime) fm = pyopenms.FeatureMap() N = 0 for feature in features: # calc bounding boxes from pattern f = pyopenms.Feature() f.ensureUniqueId() f.setRT(feature.rt) f.setMZ(feature.mz) f.setCharge(feature.charge) hulls = [] sumIntensity = 0 for i in feature.pattern: pattern = feature.pattern[i] minRT = feature.rtLow - 1 # eliminate rounding error by adding a rt tolerance maxRT = feature.rtHigh + 1 #minRT = min([p[0] for p in pattern]) #maxRT = max([p[0] for p in pattern]) minMZ = min([p[1] for p in pattern]) - tolerance maxMZ = max([p[1] for p in pattern]) + tolerance sumIntensity += sum([p[2] for p in pattern]) h = pyopenms.ConvexHull2D() h.addPoint([minRT, minMZ]) h.addPoint([maxRT, minMZ]) h.addPoint([maxRT, maxMZ]) h.addPoint([minRT, maxMZ]) hulls.append(h) f.setConvexHulls(hulls) f.getConvexHull().expandToBoundingBox() f.setIntensity(sumIntensity) f.setOverallQuality(feature.error) # check if boundingbox is fullfilles h = f.getConvexHull() b = h.getBoundingBox() minRT, minMZ = b.minPosition() maxRT, maxMZ = b.maxPosition() if sumIntensity == 0: N += 1 continue if charge == 0: N += 1 continue fm.push_back(f) print "ignoring ", N, " features that have no peaks or a charge of 0" fxml = pyopenms.FeatureXMLFile() fxml.store(options.outfile, fm)
parser.add_argument('-z', '--im', action='store', required=False, type=float, default=0.031, help='the IM threshold to use') parser.add_argument('--no-im', action='store_true', required=False, default=False, help='do not use IM comparisons (only use RT and m/z)') parser.add_argument('-q', '--quiet', action='store_true', required=False, default=False, help='suppress progress output when comparing features') args = parser.parse_args() output_file = args.out thresholds = [args.rt, args.mz, args.im] input_mask, ref_mask = ms.FeatureMap(), ms.FeatureMap() input_is_csv = True if args.in_.endswith('.csv') else False ref_is_csv = True if args.ref.endswith('.csv') else False if not input_is_csv and not args.in_.endswith('featureXML'): print('Error: input features must be in csv or featureXML formats') exit(1) if not ref_is_csv: # TODO: Add more comparison formats print('Error: comparison currently requires reference features in csv format') exit(1) if input_is_csv: input_mask = csv_to_list(args.in_) else: ms.FeatureXMLFile().load(args.in_, input_mask) if ref_is_csv: ref_mask = csv_to_list(args.ref) else: ms.FeatureXMLFile().load(args.ref, ref_mask) compare_features(input_mask, ref_mask, not args.no_im, args.quiet)
print('Error:', args.dir, 'is not an existing directory') exit(1) if not args.out.endswith('.featureXML') or args.out.endswith( '.mzML'): # TODO: implement mzML support print('Error:', args.out, 'must be a featureXML or mzML file') exit(1) if args.bench: time_out = open(args.dir + '/benchmark.txt', 'w') start_t = time.time() exp = ms.OnDiscMSExperiment() print('Loading mzML input file.', end=' ', flush=True) if not exp.openFile(args.in_): print('Error:', args.in_, 'is not an indexed mzML file') exit(1) print('Done', flush=True) if args.bench: total_t = time.time() - start_t time_out.write(f'mzml load: {total_t}s\n') time_out.close() ff = FeatureFinderIonMobility() features = ff.run(exp, args.num_bins, args.pp_type, args.peak_radius, args.window_radius, args.pp_mode, args.ff_type, args.dir, args.filter, args.debug, args.bench) ms.FeatureXMLFile().store(args.dir + '/' + args.out, features) print('Found', features.size(), 'features')
def align(in_files, out_files, out_trafos, reference_index, reference_file, params): in_types = set(pms.FileHandler.getType(in_) for in_ in in_files) if in_types <= set((pms.Type.MZML, pms.Type.MZXML, pms.Type.MZDATA)): align_features = False elif in_types == set((pms.Type.FEATUREXML, )): align_features = True else: raise Exception("different kinds of input files") algorithm = pms.MapAlignmentAlgorithmPoseClustering() alignment_params = params.copy("algorithm:", True) algorithm.setParameters(alignment_params) algorithm.setLogType(pms.LogType.CMD) plog = pms.ProgressLogger() plog.setLogType(pms.LogType.CMD) if reference_file: file_ = reference_file elif reference_index > 0: file_ = in_files[reference_index - 1] else: sizes = [] if align_features: fh = pms.FeatureXMLFile() plog.startProgress(0, len(in_files), "Determine Reference map") for i, in_f in enumerate(in_files): sizes.append((fh.loadSize(in_f), in_f)) plog.setProgress(i) else: fh = pms.MzMLFile() mse = pms.MSExperiment() plog.startProgress(0, len(in_files), "Determine Reference map") for i, in_f in enumerate(in_files): fh.load(in_f, mse) mse.updateRanges() sizes.append((mse.getSize(), in_f)) plog.setProgress(i) plog.endProgress() __, file_ = max(sizes) f_fmxl = pms.FeatureXMLFile() if not out_files: options = f_fmxl.getOptions() options.setLoadConvexHull(False) options.setLoadSubordinates(False) f_fmxl.setOptions(options) if align_features: map_ref = pms.FeatureMap() f_fxml_tmp = pms.FeatureXMLFile() options = f_fmxl.getOptions() options.setLoadConvexHull(False) options.setLoadSubordinates(False) f_fxml_tmp.setOptions(options) f_fxml_tmp.load(file_, map_ref) algorithm.setReference(map_ref) else: map_ref = pms.MSExperiment() pms.MzMLFile().load(file_, map_ref) algorithm.setReference(map_ref) plog.startProgress(0, len(in_files), "Align input maps") for i, in_file in enumerate(in_files): trafo = pms.TransformationDescription() if align_features: map_ = pms.FeatureMap() f_fxml_tmp = pms.FeatureXMLFile() f_fxml_tmp.setOptions(f_fmxl.getOptions()) f_fxml_tmp.load(in_file, map_) if in_file == file_: trafo.fitModel("identity") else: algorithm.align(map_, trafo) if out_files: pms.MapAlignmentTransformer.transformSingleFeatureMap( map_, trafo) addDataProcessing(map_, params, pms.ProcessingAction.ALIGNMENT) f_fxml_tmp.store(out_files[i], map_) else: map_ = pms.MSExperiment() pms.MzMLFile().load(in_file, map_) if in_file == file_: trafo.fitModel("identity") else: algorithm.align(map_, trafo) if out_files: pms.MapAlignmentTransformer.transformSinglePeakMap(map_, trafo) addDataProcessing(map_, params, pms.ProcessingAction.ALIGNMENT) pms.MzMLFile().store(out_files[i], map_) if out_trafos: pms.TransformationXMLFile().store(out_trafos[i], trafo) plog.setProgress(i + 1) plog.endProgress()
def align_feature_xmls(feature_xml_lis, consensus_map_out_path="", class_label_dict={}): """ first apply pose clustering to include all features maps next link/group them across all features Each MS1 spectrum from raw-file will create a feature file - we need to load and align them to get unique and representative features :param feature_xml_lis: :param consensus_map_out_path: :return: consensus_map, consensus_map_out_path, measurement_names """ # do consensus map normalization and export - # can't hack normalization together from lack of example usage and poor signature # - no normalization implemented # openms won't deal with posix paths - wants to have strings instead # need to make sure it get's those # let's sort them to make sure feature matrix is also sorted feature_xml_lis = sorted([str(fx) for fx in feature_xml_lis]) num_features_list = [] for current_feature_xml_path in feature_xml_lis: # load features into FeatureMaps cm = oms.FeatureMap() # current_map oms.FeatureXMLFile().load(current_feature_xml_path, cm) # list_functions(current_map, prefix="") num_features_list.append(cm.size()) del cm # should choose the feature file / experiment with most features as reference max_index = np.argmax(num_features_list) reference_map_path = feature_xml_lis[max_index] default_max_num_peaks_considered = 1000 default_max_scaling_value = 10.0 aligned_paths = [] for i, current_feature_xml_path in enumerate(feature_xml_lis): # load features into FeatureMaps reference_map = oms.FeatureMap( ) # pairwise alignment - so need master map - oms.FeatureXMLFile().load(reference_map_path, reference_map) current_map = oms.FeatureMap() oms.FeatureXMLFile().load(current_feature_xml_path, current_map) # create a transformation description required as init for aligner transformation_description = oms.TransformationDescription() # adjust max scaling parameter otherwise leads to error when running with algae samples # adjust max num peaks to 2k - also would leads to error when running with algae samples aligner = oms.MapAlignmentAlgorithmPoseClustering() aligner_params = aligner.getParameters() # print(aligner_params.asDict().keys()) max_scaling_key = b'superimposer:max_scaling' # aligner_params.getEntry(max_scaling_key) aligner_params.setValue(max_scaling_key, default_max_scaling_value) max_num_peaks_key = b'max_num_peaks_considered' # aligner_params.getEntry(max_num_peaks_key) aligner_params.setValue( max_num_peaks_key, default_max_num_peaks_considered) # default = 1000 # need higher default for algae # decrease runtime by removing weak signals # print(aligner_params.asDict()) num_used_points_key = b'superimposer:num_used_points' # aligner_params.getEntry(num_used_points_key) aligner_params.setValue( num_used_points_key, 1000) # half the default parameter, speed up alignment aligner.setParameters(aligner_params) aligner.setReference(reference_map) try: # run alignment aligner.align(current_map, transformation_description) except RuntimeError as re: if 'max_num_peaks_considered' in str(re): # retry with higher threshold - required for algae dataset default_max_num_peaks_considered = 15000 # 15 fold - makes it a lot slower but less error prone aligner_params.setValue(max_num_peaks_key, default_max_num_peaks_considered) default_max_scaling_value = 20.0 # need to increase to 20 aligner_params.setValue(max_scaling_key, default_max_scaling_value) # max shift could also be off - issue for ckd dataset default_max_shift_value = 2000.0 # need to increase from 1000 to 2000 max_shift_key = b'superimposer:max_shift' aligner_params.setValue(max_shift_key, default_max_shift_value) print( f"Encountered GC/MS Clustering issue - setting 'max_num_peaks_considered' to {default_max_num_peaks_considered}, 'superimposer:max_scaling' to {default_max_scaling_value} and 'superimposer:max_shift' to {default_max_shift_value}" ) aligner.setParameters(aligner_params) aligner.setReference(reference_map) aligner.align(current_map, transformation_description) current_map.updateRanges() reference_map.updateRanges() # update feature XML files - both reference and current updated_current_map_path = default_store_aligned_feature_xml( current_map, current_feature_xml_path) updated_reference_path = default_store_aligned_feature_xml( reference_map, reference_map_path) reference_map_path = updated_reference_path aligned_paths.append(updated_current_map_path) print(f"Finished alignment of {i}/{len(feature_xml_lis)-1}") # also replace here with new reference we updated the reference map to aligned_paths[max_index] = reference_map_path # link/group them across features to create consensus map grouper = oms.FeatureGroupingAlgorithmUnlabeled() # leave parameters default # according to openms documentation: # b) Call "setReference", "addToGroup" (n times), "getResultMap" in that order. for i, current_feature_map_path in enumerate(aligned_paths): print(f"Grouping features {i}/{len(aligned_paths)-1}") current_map = oms.FeatureMap() oms.FeatureXMLFile().load(current_feature_map_path, current_map) if not i: # first iteration - use as reference grouper.setReference(i, current_map) else: grouper.addToGroup(i, current_map) # get consensus map consensus_map = grouper.getResultMap() # consensus map requires some mapping between ids and filenames - otherwise will complain print(f"Mapping aligned results back to class labels") class_label_fns = list(class_label_dict.keys()) fds = {i: oms.ColumnHeader() for i, _ in enumerate(aligned_paths)} measurement_names = [] for i, aligned_path in enumerate(aligned_paths): # fds[i].filename = b"file0" current_fn = f"{str(Path(aligned_path).stem)}{str(Path(aligned_path).suffix)}" # this is where we need to replace the feature_xml filenames with the ones from class_labels if class_label_dict: # could do longest substring match with each of the fns in class_label dict to find matching filename # django will rename duplicate filenames instead of overwriting # or we expect both featureXML input and class_label_dict to be ordered - which they should be when using the getter fds[i].filename = class_label_fns[i] else: fds[i].filename = current_fn.encode( "UTF8") # needs bytestring representation measurement_names.append(current_fn) consensus_map.setColumnHeaders(fds) # cleanup aligned_feature_xmls - can be >30mb per file - so better remove them for ap in aligned_paths: os.remove(ap) # do consensus map normalization and export to consensus files # using median normalization, also available are Quantile and "robust regression" normalizer = oms.ConsensusMapNormalizerAlgorithmMedian() # ConsensusMapNormalizerAlgorithmMedian # signature of class is more than incomplete ... *args **kwargs for required parameters is not the best implementation choice... # but gives TypeError requiring int when calling with # normalizer.normalizeMaps(consensus_map, "NM_SCALE", "", "") # """ normalizer.normalizeMaps(map, method, acc_filter, desc_filter) map ConsensusMap method whether to use scaling or shifting to same median acc_filter string describing the regular expression for filtering accessions desc_filter string describing the regular expression for filtering descriptions """ """ method: probably 0 / 1 - referenced as Enumerator in OpenMS documentation from shell output can deduce normalization methods are 0: NM_SCALE scale to same median using division/multiplication 1: NM_SHIFT shift using subtraction/addition """ normalizer.normalizeMaps(consensus_map, 0, "", "") # don't export if not required - requires more file management # now export if consensus_map_out_path: print("Storing consensus xml") oms.ConsensusXMLFile().store(str(consensus_map_out_path), consensus_map) return consensus_map, measurement_names
def main(): parser = argparse.ArgumentParser(description="FeatureFinderCentroided") parser.add_argument("-in", action="store", type=str, dest="in_", metavar="input_file", ) parser.add_argument("-seeds", action="store", type=str, metavar="seeds_file", ) parser.add_argument("-out", action="store", type=str, metavar="output_file", ) parser.add_argument("-ini", action="store", type=str, metavar="ini_file", ) parser.add_argument("-dict_ini", action="store", type=str, metavar="python_dict_ini_file", ) parser.add_argument("-write_ini", action="store", type=str, metavar="ini_file", ) parser.add_argument("-write_dict_ini", action="store", type=str, metavar="python_dict_ini_file", ) args = parser.parse_args() run_mode = args.in_ is not None and args.out is not None\ and (args.ini is not None or args.dict_ini is not None) write_mode = args.write_ini is not None or args.write_dict_ini is not None ok = run_mode or write_mode if not ok: parser.error("either specify -in, -out and -(dict)ini for running " "the peakpicker\nor -write(dict)ini for creating std " "ini file") name = pms.FeatureFinderAlgorithmPicked.getProductName() defaults = pms.FeatureFinder().getParameters(name) write_requested = writeParamsIfRequested(args, defaults) if not write_requested: updateDefaults(args, defaults) seeds = pms.FeatureMap() if args.seeds: fh = pms.FeatureXMLFile() fh.load(args.seeds, seeds) run_featurefinder_centroided(args.in_, defaults, seeds, args.out)
def find_features(self, pp_type: str, peak_radius: int, window_radius: float, pp_mode: str, ff_type: str, dir: str, filter: str, debug: bool) -> List[List[ms.FeatureMap]]: """Runs optional peak picking and then an existing feature finder on each IM bin. Keyword arguments: pp_type: the peak picker to use ('none', 'pphr', or 'custom') peak_radius: for the custom peak picker, the minimum peak radius of a peak set window_radius: for the custom peak picker, the maximum m/z window radius to consider pp_mode: for the custom peak picker, the mode to use ('ltr' or 'int') ff_type: the existing feature finder to use ('centroided' or 'multiplex') dir: the directory to write the intermediate output files to filter: the noise filter to use ('none', 'gauss', or 'sgolay') debug: determines if intermediate output files should be written Returns: a list of two lists (for the passes), each containing the features for all of their bins. """ features = [[], []] total_features = [ms.FeatureMap(), ms.FeatureMap()] # Only used for debug output if filter == 'gauss': filter_g = ms.GaussFilter() params_g = filter_g.getDefaults() params_g.setValue(b'ppm_tolerance', 20.0) params_g.setValue(b'use_ppm_tolerance', b'true') filter_g.setParameters(params_g) if filter == 'sgolay': filter_s = ms.SavitzkyGolayFilter() params_s = filter_s.getDefaults() params_s.setValue(b'frame_length', 7) params_s.setValue(b'polynomial_order', 3) filter_s.setParameters(params_s) pick_hr = ms.PeakPickerHiRes() pick_im = ppim.PeakPickerIonMobility() nb = [self.num_bins, 0 if self.num_bins == 1 else self.num_bins + 1 ] # Size of each pass for j in range(2): # Pass index for i in range(nb[j]): # Bin index exp, new_exp = ms.MSExperiment(), ms.MSExperiment() ms.MzMLFile().load( dir + '/b-' + str(j) + '-' + str(i) + '.mzML', exp) # Optional noise filtering if filter == 'gauss': filter_g.filterExperiment(exp) elif filter == 'sgolay': filter_s.filterExperiment(exp) if filter != 'none' and debug: ms.MzMLFile().store( dir + '/pass' + str(j) + '-bin' + str(i) + '-filtered.mzML', exp) # Optional peak picking if pp_type == 'pphr': pick_hr.pickExperiment(exp, new_exp) elif pp_type == 'custom': new_exp = pick_im.pick_experiment(exp, peak_radius, window_radius, pp_mode, self.MIN_INTENSITY, strict=True) else: new_exp = exp if pp_type != 'none' and debug: ms.MzMLFile().store( dir + '/pass' + str(j) + '-bin' + str(i) + '-picked.mzML', new_exp) # Feature finding temp_features = ms.FeatureMap() if util.has_peaks(new_exp): temp_features = self.run_ff(new_exp, ff_type) temp_features = self.match_features_internal(temp_features) temp_features.setUniqueIds() if debug: ms.FeatureXMLFile().store( dir + '/pass' + str(j) + '-bin' + str(i) + '.featureXML', temp_features) features[j].append(temp_features) total_features[j] += temp_features if debug: for j in range(2): total_features[j].setUniqueIds() ms.FeatureXMLFile().store( dir + '/pass' + str(j) + '.featureXML', total_features[j]) return features[0], features[1]
def export_features(self): self._log('Saving features into `%s`.' % self.features_file) self.features_xml = oms.FeatureXMLFile() self.features_xml.store(self.features_file, self.feature_map)
def link(in_files, out_file, keep_subelements, params): in_types = set(pms.FileHandler.getType(in_) for in_ in in_files) if in_types == set((pms.Type.CONSENSUSXML, )): link_features = False elif in_types == set((pms.Type.FEATUREXML, )): link_features = True else: raise Exception("different kinds of input files") algorithm_parameters = params.copy("algorithm:", True) algorithm = pms.FeatureGroupingAlgorithmQT() algorithm.setParameters(algorithm_parameters) out_map = pms.ConsensusMap() fds = out_map.getColumnHeaders() if link_features: f = pms.FeatureXMLFile() maps = [] for i, in_file in enumerate(in_files): map_ = pms.FeatureMap() f.load(in_file, map_) # set filedescriptions fd = fds.get(i, pms.ColumnHeader()) fd.filename = in_file fd.size = map_.size() fd.unique_id = map_.getUniqueId() fds[i] = fd maps.append(map_) out_map.setColumnHeaders(fds) algorithm.group(maps, out_map) else: f = pms.ConsensusXMLFile() maps = [] for i, in_file in enumerate(in_files): map_ = pms.ConsensusMap() f.load(in_file, map_) maps.append(map_) algorithm.group(maps, out_map) if not keep_subelements: for i in range(len(in_files)): # set filedescriptions fd = fds.get(i, pms.ColumnHeader()) fd.filename = in_files[i] fd.size = maps[i].size() fd.unique_id = maps[i].getUniqueId() fds[i] = fd out_map.setColumnHeaders(fds) else: algorithm.transferSubelements(maps, out_map) out_map.setUniqueIds() addDataProcessing(out_map, params, pms.DataProcessing.ProcessingAction.FEATURE_GROUPING) pms.ConsensusXMLFile().store(out_file, out_map) sizes = [] for feat in out_map: sizes.append(feat.size()) c = Counter(sizes) print "Number of consensus features:" for size, count in c.most_common(): print " of size %2d : %6d" % (size, count) print " total : %6d" % out_map.size()
def align(in_files, out_files, trafo_out_files, reference_index, reference_file, params): algo = pms.MapAlignmentAlgorithmPoseClustering() algo.setReference(reference_index, reference_file) model_params = params.copy("model:", True) model_type = model_params.getValue("type").toString() pl = pms.ProgressLogger() pl.setLogType(pms.LogType.CMD) alignment_param = params.copy("algorithm:", True) algo.setParameters(alignment_param) transformations = [] in_types = set(pms.FileHandler.getType(in_file) for in_file in in_files) in_maps = [] if in_types <= set((pms.Type.MZML, pms.Type.MZXML, pms.Type.MZDATA)): fh = pms.FileHandler() pl.startProgress(0, len(in_files), "loading input files") for i, in_file in enumerate(in_files): pl.setProgress(i) pm = pms.MSExperiment() fh.loadExperiment(in_file, pm) in_maps.append(pm) pl.endProgress() algo.alignPeakMaps(in_maps, transformations) if model_type != "none": algo.fitModel(model_type, model_params, transformations) pms.MapAlignmentAlgorithmPoseClustering.transformPeakMaps(in_maps, transformations) pl.startProgress(0, len(out_files), "writing output files") for i, out_file in enumerate(out_files): pl.setProgress(i) in_map = addDataProcessing(in_maps[i], params) fh.storeExperiment(out_file, in_map) pl.endProgress() elif in_types == set((pms.Type.FEATUREXML,)): fh = pms.FeatureXMLFile() pl.startProgress(0, len(in_files), "loading input files") for i, in_file in enumerate(in_files): pl.setProgress(i) pm = pms.FeatureMap() fh.load(in_file, pm) in_maps.append(pm) pl.endProgress() algo.alignFeatureMaps(in_maps, transformations) if model_type != "none": algo.fitModel(model_type, model_params, transformations) pms.MapAlignmentAlgorithmPoseClustering.transformFeatureMaps(in_maps, transformations) pl.startProgress(0, len(out_files), "writing output files") for i, out_file in enumerate(out_files): pl.setProgress(i) in_map = addDataProcessing(in_maps[i], params) fh.store(out_file, in_map) pl.endProgress() else: raise Exception("can not handle input file format") if trafo_out_files: for name, trafo in zip(trafo_out_files, transformations): pms.TransformationXMLFile().store(name, trafo)
exp = ms.MSExperiment() print('Loading mzML input file....................', end='', flush=True) ms.MzMLFile().load(args.source + '.mzML', exp) print('Done') # Remove MS2 scans spectra = exp.getSpectra() for i in range(len(spectra)): spec = spectra[i] if spec.getMSLevel() == 1: clean_exp.addSpectrum(spec) openms_features = run_ff(clean_exp, 'centroided') ms.FeatureXMLFile().store(args.outdir + '/openms.featureXML', openms_features) else: ms.FeatureXMLFile().load(args.openms + '.featureXML', openms_features) ms.FeatureXMLFile().load(args.found + '.featureXML', found_features) ms.FeatureXMLFile().load(args.baseline + '.featureXML', baseline_features) ms.FeatureXMLFile().load(args.truth + '.featureXML', truth_features) if not brute_force: print('Features loaded, beginning comparison', flush=True) common_features = compare_features(found_features, openms_features, baseline_features, truth_features) common_features.setUniqueIds() ms.FeatureXMLFile().store(args.outdir + '/common.featureXML', common_features) else:
""" Producing the test data for TOPP_FeatureLinkerUnlabeledQT_5 and TOPP_FeatureLinkerUnlabeledQT_6 """ fmaps = [pyopenms.FeatureMap() for i in range(3)] pepids = [] pepseq = ["PEPTIDEA", "PEPTIDEK", "PEPTIDER"] for s in pepseq: pepid = pyopenms.PeptideIdentification() hit = pyopenms.PeptideHit() hit.setSequence(pyopenms.AASequence.fromString(s, True)) pepid.insertHit(hit) pepid.setIdentifier("Protein0") pepids.append(pepid) protid = pyopenms.ProteinIdentification() protid.setIdentifier("Protein0") for i, fmap in enumerate(fmaps): fmap.setProteinIdentifications([protid]) # add 3 features to each map, but with a twist (adding different peptide ids to different maps) for k in range(3): f = pyopenms.Feature() f.setRT(300 + k * 100 + i * 10) f.setMZ(500 + k * 0.001 + i * 0.01) f.setIntensity(500 + i * 100) f.setMetaValue("sequence", pepseq[(i + k) % 3]) # easier viewing in TOPPView f.setPeptideIdentifications([pepids[(i + k) % 3]]) fmap.push_back(f) pyopenms.FeatureXMLFile().store("output_%s.featureXML" % i, fmap)
def export_chromatograms_data(self): self._log('Saving chromatograms into `%s`.' % self.features_file) self.chromatogram_mzml = oms.FeatureXMLFile() self.chromatogram_mzml.store(self.chromatograms, self.feature_map)