def main(): # register command line arguments model = CTDModel( name='NameOfThePyTOPPTool', # required version='1.0', # required description= 'This is an example tool how to write pyTOPP tools compatible with the OpenMS workflow ecosystem.', manual='RTF', docurl='http://dummy.url/docurl.html', category='Example', executableName='exampletool', executablePath='/path/to/exec/exampletool-1.0/exampletool') # Register in / out etc. with CTDModel model.add( 'input', required=True, type='input-file', is_list=False, file_formats=['mzML'], # filename restrictions description='Input file') model.add( 'output', required=True, type='output-file', is_list=False, file_formats=['mzML'], # filename restrictions description='Output file') defaults = pms.PeakPickerHiRes().getDefaults() # expose algorithm parameters in command line options addParamToCTDopts(defaults, model) # parse command line # if -write_ini is provided, store model in CTD file, exit with error code 0 # if -ini is provided, load CTD file into defaults Param object and return new model with paraneters set as defaults arg_dict, openms_params = parseCTDCommandLine(sys.argv, model, defaults) # data processing fh = pms.MzMLFile() fh.setLogType(pms.LogType.CMD) input_map = pms.MSExperiment() fh.load(arg_dict["input"], input_map) pp = pms.PeakPickerHiRes() pp.setParameters(openms_params) out_map = pms.MSExperiment() pp.pickExperiment(input_map, out_map) out_map = addDataProcessing( out_map, openms_params, pms.DataProcessing.ProcessingAction.PEAK_PICKING) fh = pms.FileHandler() fh.storeExperiment(arg_dict["output"], out_map)
def run_featurefinder_centroided(input_path, params, seeds, out_path): fh = pms.MzMLFile() options = pms.PeakFileOptions() options.setMSLevels([1,1]) fh.setOptions(options) input_map = pms.MSExperiment() fh.load(input_path, input_map) input_map.updateRanges() ff = pms.FeatureFinder() ff.setLogType(pms.LogType.CMD) features = pms.FeatureMap() name = pms.FeatureFinderAlgorithmPicked.getProductName() ff.run(name, input_map, features, params, seeds) features.setUniqueIds() addDataProcessing(features, params, pms.ProcessingAction.QUANTITATION) fh = pms.FeatureXMLFile() fh.store(out_path, features)
def run_peak_picker(input_map, params, out_path): spec0 = input_map[0] if pms.PeakTypeEstimator().estimateType(spec0) == \ pms. SpectrumSettings.SpectrumType.PEAKS: logging.warn("input peak map does not look like profile data") if any(not s.isSorted() for s in input_map): raise Exception("Not all spectra are sorted according to m/z") pp = pms.PeakPickerHiRes() pp.setParameters(params) out_map = pms.MSExperiment() pp.pickExperiment(input_map, out_map) out_map = addDataProcessing(out_map, params, pms.ProcessingAction.PEAK_PICKING) fh = pms.FileHandler() fh.storeExperiment(out_path, out_map)
def run_peak_picker(input_map, params, out_path): spec0 = input_map[0] if pms.PeakTypeEstimator().estimateType(spec0) == \ pms. SpectrumSettings.SpectrumType.PEAKS: logging.warn("input peak map does not look like profile data") if any(not s.isSorted() for s in input_map): raise Exception("Not all spectra are sorted according to m/z") pp = pms.PeakPickerHiRes() pp.setParameters(params) out_map = pms.MSExperiment() pp.pickExperiment(input_map, out_map) out_map = addDataProcessing( out_map, params, pms.DataProcessing.ProcessingAction.PEAK_PICKING) fh = pms.FileHandler() fh.storeExperiment(out_path, out_map)
def id_mapper(in_file, id_file, out_file, params, use_centroid_rt, use_centroid_mz, use_subelements): in_type = pms.FileHandler.getType(in_file) protein_ids = [] peptide_ids = [] pms.IdXMLFile().load(id_file, protein_ids, peptide_ids) mapper = pms.IDMapper() mapper.setParameters(params) if in_type == pms.Type.CONSENSUSXML: file_ = pms.ConsensusXMLFile() map_ = pms.ConsensusMap() file_.load(in_file, map_) mapper.annotate(map_, peptide_ids, protein_ids, use_subelements) addDataProcessing( map_, params, pms.DataProcessing.ProcessingAction.IDENTIFICATION_MAPPING) file_.store(out_file, map_) elif in_type == pms.Type.FEATUREXML: file_ = pms.FeatureXMLFile() map_ = pms.FeatureMap() file_.load(in_file, map_) mapper.annotate(map_, peptide_ids, protein_ids, use_centroid_rt, use_centroid_mz) addDataProcessing( map_, params, pms.DataProcessing.ProcessingAction.IDENTIFICATION_MAPPING) file_.store(out_file, map_) elif in_type == pms.Type.MZQ: file_ = pms.MzQuantMLFile() msq = pms.MSQuantifications() file_.load(in_file, msq) maps = msq.getConsensusMaps() for map_ in maps: mapper.annotate(map_, peptide_ids, protein_ids, use_subelements) addDataProcessing( map_, params, pms.DataProcessing.ProcessingAction.IDENTIFICATION_MAPPING) msq.setConsensusMaps(maps) file_.store(out_file, msq) else: raise Exception("invalid input file format")
def id_mapper(in_file, id_file, out_file, params, use_centroid_rt, use_centroid_mz, use_subelements ): in_type = pms.FileHandler.getType(in_file) protein_ids = [] peptide_ids = [] pms.IdXMLFile().load(id_file, protein_ids, peptide_ids) mapper = pms.IDMapper() mapper.setParameters(params) if in_type == pms.Type.CONSENSUSXML: file_ = pms.ConsensusXMLFile() map_ = pms.ConsensusMap() file_.load(in_file, map_) mapper.annotate(map_, peptide_ids, protein_ids, use_subelements) addDataProcessing(map_, params, pms.ProcessingAction.IDENTIFICATION_MAPPING) file_.store(out_file, map_) elif in_type == pms.Type.FEATUREXML: file_ = pms.FeatureXMLFile() map_ = pms.FeatureMap() file_.load(in_file, map_) mapper.annotate(map_, peptide_ids, protein_ids, use_centroid_rt, use_centroid_mz) addDataProcessing(map_, params, pms.ProcessingAction.IDENTIFICATION_MAPPING) file_.store(out_file, map_) elif in_type == pms.Type.MZQ: file_ = pms.MzQuantMLFile() msq = pms.MSQuantifications() file_.load(in_file, msq) maps = msq.getConsensusMaps() for map_ in maps: mapper.annotate(map_, peptide_ids, protein_ids, use_subelements) addDataProcessing(map_, params, pms.ProcessingAction.IDENTIFICATION_MAPPING) msq.setConsensusMaps(maps) file_.store(out_file, msq) else: raise Exception("invalid input file format")
def align(in_files, out_files, out_trafos, reference_index, reference_file, params): in_types = set(pms.FileHandler.getType(in_) for in_ in in_files) if in_types <= set((pms.Type.MZML, pms.Type.MZXML, pms.Type.MZDATA)): align_features = False elif in_types == set((pms.Type.FEATUREXML,)): align_features = True else: raise Exception("different kinds of input files") algorithm = pms.MapAlignmentAlgorithmPoseClustering() alignment_params = params.copy("algorithm:", True) algorithm.setParameters(alignment_params) algorithm.setLogType(pms.LogType.CMD) plog = pms.ProgressLogger() plog.setLogType(pms.LogType.CMD) if reference_file: file_ = reference_file elif reference_index > 0: file_ = in_files[reference_index-1] else: sizes = [] if align_features: fh = pms.FeatureXMLFile() plog.startProgress(0, len(in_files), "Determine Reference map") for i, in_f in enumerate(in_files): sizes.append((fh.loadSize(in_f), in_f)) plog.setProgress(i) else: fh = pms.MzMLFile() mse = pms.MSExperiment() plog.startProgress(0, len(in_files), "Determine Reference map") for i, in_f in enumerate(in_files): fh.load(in_f, mse) mse.updateRanges() sizes.append((mse.getSize(), in_f)) plog.setProgress(i) plog.endProgress() __, file_ = max(sizes) f_fmxl = pms.FeatureXMLFile() if not out_files: options = f_fmxl.getOptions() options.setLoadConvexHull(False) options.setLoadSubordinates(False) f_fmxl.setOptions(options) if align_features: map_ref = pms.FeatureMap() f_fxml_tmp = pms.FeatureXMLFile() options = f_fmxl.getOptions() options.setLoadConvexHull(False) options.setLoadSubordinates(False) f_fxml_tmp.setOptions(options) f_fxml_tmp.load(file_, map_ref) algorithm.setReference(map_ref) else: map_ref = pms.MSExperiment() pms.MzMLFile().load(file_, map_ref) algorithm.setReference(map_ref) plog.startProgress(0, len(in_files), "Align input maps") for i, in_file in enumerate(in_files): trafo = pms.TransformationDescription() if align_features: map_ = pms.FeatureMap() f_fxml_tmp = pms.FeatureXMLFile() f_fxml_tmp.setOptions(f_fmxl.getOptions()) f_fxml_tmp.load(in_file, map_) if in_file == file_: trafo.fitModel("identity") else: algorithm.align(map_, trafo) if out_files: pms.MapAlignmentTransformer.transformSingleFeatureMap(map_, trafo) addDataProcessing(map_, params, pms.ProcessingAction.ALIGNMENT) f_fxml_tmp.store(out_files[i], map_) else: map_ = pms.MSExperiment() pms.MzMLFile().load(in_file, map_) if in_file == file_: trafo.fitModel("identity") else: algorithm.align(map_, trafo) if out_files: pms.MapAlignmentTransformer.transformSinglePeakMap(map_, trafo) addDataProcessing(map_, params, pms.ProcessingAction.ALIGNMENT) pms.MzMLFile().store(out_files[i], map_) if out_trafos: pms.TransformationXMLFile().store(out_trafos[i], trafo) plog.setProgress(i+1) plog.endProgress()
def align(in_files, out_files, out_trafos, reference_index, reference_file, params): in_types = set(pms.FileHandler.getType(in_) for in_ in in_files) if in_types <= set((pms.Type.MZML, pms.Type.MZXML, pms.Type.MZDATA)): align_features = False elif in_types == set((pms.Type.FEATUREXML, )): align_features = True else: raise Exception("different kinds of input files") algorithm = pms.MapAlignmentAlgorithmPoseClustering() alignment_params = params.copy("algorithm:", True) algorithm.setParameters(alignment_params) algorithm.setLogType(pms.LogType.CMD) plog = pms.ProgressLogger() plog.setLogType(pms.LogType.CMD) if reference_file: file_ = reference_file elif reference_index > 0: file_ = in_files[reference_index - 1] else: sizes = [] if align_features: fh = pms.FeatureXMLFile() plog.startProgress(0, len(in_files), "Determine Reference map") for i, in_f in enumerate(in_files): sizes.append((fh.loadSize(in_f), in_f)) plog.setProgress(i) else: fh = pms.MzMLFile() mse = pms.MSExperiment() plog.startProgress(0, len(in_files), "Determine Reference map") for i, in_f in enumerate(in_files): fh.load(in_f, mse) mse.updateRanges() sizes.append((mse.getSize(), in_f)) plog.setProgress(i) plog.endProgress() __, file_ = max(sizes) f_fmxl = pms.FeatureXMLFile() if not out_files: options = f_fmxl.getOptions() options.setLoadConvexHull(False) options.setLoadSubordinates(False) f_fmxl.setOptions(options) if align_features: map_ref = pms.FeatureMap() f_fxml_tmp = pms.FeatureXMLFile() options = f_fmxl.getOptions() options.setLoadConvexHull(False) options.setLoadSubordinates(False) f_fxml_tmp.setOptions(options) f_fxml_tmp.load(file_, map_ref) algorithm.setReference(map_ref) else: map_ref = pms.MSExperiment() pms.MzMLFile().load(file_, map_ref) algorithm.setReference(map_ref) plog.startProgress(0, len(in_files), "Align input maps") for i, in_file in enumerate(in_files): trafo = pms.TransformationDescription() if align_features: map_ = pms.FeatureMap() f_fxml_tmp = pms.FeatureXMLFile() f_fxml_tmp.setOptions(f_fmxl.getOptions()) f_fxml_tmp.load(in_file, map_) if in_file == file_: trafo.fitModel("identity") else: algorithm.align(map_, trafo) if out_files: pms.MapAlignmentTransformer.transformSingleFeatureMap( map_, trafo) addDataProcessing(map_, params, pms.ProcessingAction.ALIGNMENT) f_fxml_tmp.store(out_files[i], map_) else: map_ = pms.MSExperiment() pms.MzMLFile().load(in_file, map_) if in_file == file_: trafo.fitModel("identity") else: algorithm.align(map_, trafo) if out_files: pms.MapAlignmentTransformer.transformSinglePeakMap(map_, trafo) addDataProcessing(map_, params, pms.ProcessingAction.ALIGNMENT) pms.MzMLFile().store(out_files[i], map_) if out_trafos: pms.TransformationXMLFile().store(out_trafos[i], trafo) plog.setProgress(i + 1) plog.endProgress()
def link(in_files, out_file, keep_subelements, params): in_types = set(pms.FileHandler.getType(in_) for in_ in in_files) if in_types == set((pms.Type.CONSENSUSXML, )): link_features = False elif in_types == set((pms.Type.FEATUREXML, )): link_features = True else: raise Exception("different kinds of input files") algorithm_parameters = params.copy("algorithm:", True) algorithm = pms.FeatureGroupingAlgorithmQT() algorithm.setParameters(algorithm_parameters) out_map = pms.ConsensusMap() fds = out_map.getColumnHeaders() if link_features: f = pms.FeatureXMLFile() maps = [] for i, in_file in enumerate(in_files): map_ = pms.FeatureMap() f.load(in_file, map_) # set filedescriptions fd = fds.get(i, pms.ColumnHeader()) fd.filename = in_file fd.size = map_.size() fd.unique_id = map_.getUniqueId() fds[i] = fd maps.append(map_) out_map.setColumnHeaders(fds) algorithm.group(maps, out_map) else: f = pms.ConsensusXMLFile() maps = [] for i, in_file in enumerate(in_files): map_ = pms.ConsensusMap() f.load(in_file, map_) maps.append(map_) algorithm.group(maps, out_map) if not keep_subelements: for i in range(len(in_files)): # set filedescriptions fd = fds.get(i, pms.ColumnHeader()) fd.filename = in_files[i] fd.size = maps[i].size() fd.unique_id = maps[i].getUniqueId() fds[i] = fd out_map.setColumnHeaders(fds) else: algorithm.transferSubelements(maps, out_map) out_map.setUniqueIds() addDataProcessing(out_map, params, pms.DataProcessing.ProcessingAction.FEATURE_GROUPING) pms.ConsensusXMLFile().store(out_file, out_map) sizes = [] for feat in out_map: sizes.append(feat.size()) c = Counter(sizes) print "Number of consensus features:" for size, count in c.most_common(): print " of size %2d : %6d" % (size, count) print " total : %6d" % out_map.size()
def link(in_files, out_file, keep_subelements, params): in_types = set(pms.FileHandler.getType(in_) for in_ in in_files) if in_types == set((pms.Type.CONSENSUSXML,)): link_features = False elif in_types == set((pms.Type.FEATUREXML,)): link_features = True else: raise Exception("different kinds of input files") algorithm_parameters = params.copy("algorithm:", True) algorithm = pms.FeatureGroupingAlgorithmQT() algorithm.setParameters(algorithm_parameters) out_map = pms.ConsensusMap() fds = out_map.getFileDescriptions() if link_features: f = pms.FeatureXMLFile() maps = [] for i, in_file in enumerate(in_files): map_ = pms.FeatureMap() f.load(in_file, map_) # set filedescriptions fd = fds.get(i, pms.FileDescription()) fd.filename = in_file fd.size = map_.size() fd.unique_id = map_.getUniqueId() fds[i] = fd maps.append(map_) out_map.setFileDescriptions(fds) algorithm.group(maps, out_map) else: f = pms.ConsensusXMLFile() maps = [] for i, in_file in enumerate(in_files): map_ = pms.ConsensusMap() f.load(in_file, map_) maps.append(map_) algorithm.group(maps, out_map) if not keep_subelements: for i in range(len(in_files)): # set filedescriptions fd = fds.get(i, pms.FileDescription()) fd.filename = in_files[i] fd.size = maps[i].size() fd.unique_id = maps[i].getUniqueId() fds[i] = fd out_map.setFileDescriptions(fds) else: algorithm.transferSubelements(maps, out_map) out_map.setUniqueIds() addDataProcessing(out_map, params, pms.ProcessingAction.FEATURE_GROUPING) pms.ConsensusXMLFile().store(out_file, out_map) sizes = [] for feat in out_map: sizes.append(feat.size()) c = Counter(sizes) print "Number of consensus features:" for size, count in c.most_common(): print " of size %2d : %6d" % (size, count) print " total : %6d" % out_map.size()