Exemple #1
0
def main():

    # register command line arguments
    model = CTDModel(
        name='NameOfThePyTOPPTool',  # required
        version='1.0',  # required
        description=
        'This is an example tool how to write pyTOPP tools compatible with the OpenMS workflow ecosystem.',
        manual='RTF',
        docurl='http://dummy.url/docurl.html',
        category='Example',
        executableName='exampletool',
        executablePath='/path/to/exec/exampletool-1.0/exampletool')

    # Register in / out etc. with CTDModel
    model.add(
        'input',
        required=True,
        type='input-file',
        is_list=False,
        file_formats=['mzML'],  # filename restrictions
        description='Input file')

    model.add(
        'output',
        required=True,
        type='output-file',
        is_list=False,
        file_formats=['mzML'],  # filename restrictions
        description='Output file')

    defaults = pms.PeakPickerHiRes().getDefaults()

    # expose algorithm parameters in command line options
    addParamToCTDopts(defaults, model)

    # parse command line
    # if -write_ini is provided, store model in CTD file, exit with error code 0
    # if -ini is provided, load CTD file into defaults Param object and return new model with paraneters set as defaults
    arg_dict, openms_params = parseCTDCommandLine(sys.argv, model, defaults)

    # data processing
    fh = pms.MzMLFile()
    fh.setLogType(pms.LogType.CMD)
    input_map = pms.MSExperiment()

    fh.load(arg_dict["input"], input_map)

    pp = pms.PeakPickerHiRes()
    pp.setParameters(openms_params)
    out_map = pms.MSExperiment()
    pp.pickExperiment(input_map, out_map)

    out_map = addDataProcessing(
        out_map, openms_params,
        pms.DataProcessing.ProcessingAction.PEAK_PICKING)
    fh = pms.FileHandler()
    fh.storeExperiment(arg_dict["output"], out_map)
Exemple #2
0
def run_featurefinder_centroided(input_path, params, seeds, out_path):

    fh = pms.MzMLFile()
    options = pms.PeakFileOptions()
    options.setMSLevels([1,1])
    fh.setOptions(options)
    input_map = pms.MSExperiment()
    fh.load(input_path, input_map)
    input_map.updateRanges()

    ff = pms.FeatureFinder()
    ff.setLogType(pms.LogType.CMD)

    features = pms.FeatureMap()
    name = pms.FeatureFinderAlgorithmPicked.getProductName()
    ff.run(name, input_map, features, params, seeds)

    features.setUniqueIds()
    addDataProcessing(features, params, pms.ProcessingAction.QUANTITATION)

    fh = pms.FeatureXMLFile()
    fh.store(out_path, features)
Exemple #3
0
def run_peak_picker(input_map, params, out_path):

    spec0 = input_map[0]
    if pms.PeakTypeEstimator().estimateType(spec0) == \
        pms.                               SpectrumSettings.SpectrumType.PEAKS:
        logging.warn("input peak map does not look like profile data")

    if any(not s.isSorted() for s in input_map):
        raise Exception("Not all spectra are sorted according to m/z")

    pp = pms.PeakPickerHiRes()
    pp.setParameters(params)
    out_map = pms.MSExperiment()
    pp.pickExperiment(input_map, out_map)

    out_map = addDataProcessing(out_map, params, pms.ProcessingAction.PEAK_PICKING)
    fh = pms.FileHandler()
    fh.storeExperiment(out_path, out_map)
Exemple #4
0
def run_peak_picker(input_map, params, out_path):

    spec0 = input_map[0]
    if pms.PeakTypeEstimator().estimateType(spec0) == \
        pms.                               SpectrumSettings.SpectrumType.PEAKS:
        logging.warn("input peak map does not look like profile data")

    if any(not s.isSorted() for s in input_map):
        raise Exception("Not all spectra are sorted according to m/z")

    pp = pms.PeakPickerHiRes()
    pp.setParameters(params)
    out_map = pms.MSExperiment()
    pp.pickExperiment(input_map, out_map)

    out_map = addDataProcessing(
        out_map, params, pms.DataProcessing.ProcessingAction.PEAK_PICKING)
    fh = pms.FileHandler()
    fh.storeExperiment(out_path, out_map)
Exemple #5
0
def id_mapper(in_file, id_file, out_file, params, use_centroid_rt,
              use_centroid_mz, use_subelements):

    in_type = pms.FileHandler.getType(in_file)

    protein_ids = []
    peptide_ids = []

    pms.IdXMLFile().load(id_file, protein_ids, peptide_ids)

    mapper = pms.IDMapper()
    mapper.setParameters(params)

    if in_type == pms.Type.CONSENSUSXML:
        file_ = pms.ConsensusXMLFile()
        map_ = pms.ConsensusMap()
        file_.load(in_file, map_)
        mapper.annotate(map_, peptide_ids, protein_ids, use_subelements)
        addDataProcessing(
            map_, params,
            pms.DataProcessing.ProcessingAction.IDENTIFICATION_MAPPING)
        file_.store(out_file, map_)

    elif in_type == pms.Type.FEATUREXML:
        file_ = pms.FeatureXMLFile()
        map_ = pms.FeatureMap()
        file_.load(in_file, map_)
        mapper.annotate(map_, peptide_ids, protein_ids, use_centroid_rt,
                        use_centroid_mz)
        addDataProcessing(
            map_, params,
            pms.DataProcessing.ProcessingAction.IDENTIFICATION_MAPPING)
        file_.store(out_file, map_)

    elif in_type == pms.Type.MZQ:
        file_ = pms.MzQuantMLFile()
        msq = pms.MSQuantifications()
        file_.load(in_file, msq)
        maps = msq.getConsensusMaps()
        for map_ in maps:
            mapper.annotate(map_, peptide_ids, protein_ids, use_subelements)
            addDataProcessing(
                map_, params,
                pms.DataProcessing.ProcessingAction.IDENTIFICATION_MAPPING)
        msq.setConsensusMaps(maps)
        file_.store(out_file, msq)

    else:
        raise Exception("invalid input file format")
Exemple #6
0
def id_mapper(in_file, id_file, out_file, params, use_centroid_rt,
        use_centroid_mz, use_subelements ):

    in_type = pms.FileHandler.getType(in_file)

    protein_ids = []
    peptide_ids = []

    pms.IdXMLFile().load(id_file, protein_ids, peptide_ids)

    mapper = pms.IDMapper()
    mapper.setParameters(params)

    if in_type == pms.Type.CONSENSUSXML:
        file_ = pms.ConsensusXMLFile()
        map_ = pms.ConsensusMap()
        file_.load(in_file, map_)
        mapper.annotate(map_, peptide_ids, protein_ids, use_subelements)
        addDataProcessing(map_, params, pms.ProcessingAction.IDENTIFICATION_MAPPING)
        file_.store(out_file, map_)

    elif in_type == pms.Type.FEATUREXML:
        file_ = pms.FeatureXMLFile()
        map_ = pms.FeatureMap()
        file_.load(in_file, map_)
        mapper.annotate(map_, peptide_ids, protein_ids, use_centroid_rt,
                use_centroid_mz)
        addDataProcessing(map_, params, pms.ProcessingAction.IDENTIFICATION_MAPPING)
        file_.store(out_file, map_)

    elif in_type == pms.Type.MZQ:
        file_ = pms.MzQuantMLFile()
        msq = pms.MSQuantifications()
        file_.load(in_file, msq)
        maps = msq.getConsensusMaps()
        for map_ in maps:
            mapper.annotate(map_, peptide_ids, protein_ids, use_subelements)
            addDataProcessing(map_, params, pms.ProcessingAction.IDENTIFICATION_MAPPING)
        msq.setConsensusMaps(maps)
        file_.store(out_file, msq)

    else:
        raise Exception("invalid input file format")
def align(in_files, out_files, out_trafos, reference_index,
        reference_file, params):

    in_types = set(pms.FileHandler.getType(in_) for in_ in in_files)

    if in_types <= set((pms.Type.MZML, pms.Type.MZXML, pms.Type.MZDATA)):
        align_features = False
    elif in_types == set((pms.Type.FEATUREXML,)):
        align_features = True
    else:
        raise Exception("different kinds of input files")

    algorithm = pms.MapAlignmentAlgorithmPoseClustering()
    alignment_params = params.copy("algorithm:", True)
    algorithm.setParameters(alignment_params)
    algorithm.setLogType(pms.LogType.CMD)

    plog = pms.ProgressLogger()
    plog.setLogType(pms.LogType.CMD)

    if reference_file:
        file_ = reference_file
    elif reference_index > 0:
        file_ = in_files[reference_index-1]
    else:
        sizes = []
        if align_features:
            fh = pms.FeatureXMLFile()
            plog.startProgress(0, len(in_files), "Determine Reference map")
            for i, in_f in enumerate(in_files):
                sizes.append((fh.loadSize(in_f), in_f))
                plog.setProgress(i)
        else:
            fh = pms.MzMLFile()
            mse = pms.MSExperiment()
            plog.startProgress(0, len(in_files), "Determine Reference map")
            for i, in_f in enumerate(in_files):
                fh.load(in_f, mse)
                mse.updateRanges()
                sizes.append((mse.getSize(), in_f))
                plog.setProgress(i)
        plog.endProgress()
        __, file_ = max(sizes)

    f_fmxl = pms.FeatureXMLFile()
    if not out_files:
        options = f_fmxl.getOptions()
        options.setLoadConvexHull(False)
        options.setLoadSubordinates(False)
        f_fmxl.setOptions(options)

    if align_features:
        map_ref = pms.FeatureMap()
        f_fxml_tmp = pms.FeatureXMLFile()
        options = f_fmxl.getOptions()
        options.setLoadConvexHull(False)
        options.setLoadSubordinates(False)
        f_fxml_tmp.setOptions(options)
        f_fxml_tmp.load(file_, map_ref)
        algorithm.setReference(map_ref)
    else:
        map_ref = pms.MSExperiment()
        pms.MzMLFile().load(file_, map_ref)
        algorithm.setReference(map_ref)

    plog.startProgress(0, len(in_files), "Align input maps")
    for i, in_file in enumerate(in_files):
        trafo = pms.TransformationDescription()
        if align_features:
            map_ = pms.FeatureMap()
            f_fxml_tmp = pms.FeatureXMLFile()
            f_fxml_tmp.setOptions(f_fmxl.getOptions())
            f_fxml_tmp.load(in_file, map_)
            if in_file == file_:
                trafo.fitModel("identity")
            else:
                algorithm.align(map_, trafo)
            if out_files:
                pms.MapAlignmentTransformer.transformSingleFeatureMap(map_, trafo)
                addDataProcessing(map_, params, pms.ProcessingAction.ALIGNMENT)
                f_fxml_tmp.store(out_files[i], map_)
        else:
            map_ = pms.MSExperiment()
            pms.MzMLFile().load(in_file, map_)
            if in_file == file_:
                trafo.fitModel("identity")
            else:
                algorithm.align(map_, trafo)
            if out_files:
                pms.MapAlignmentTransformer.transformSinglePeakMap(map_, trafo)
                addDataProcessing(map_, params, pms.ProcessingAction.ALIGNMENT)
                pms.MzMLFile().store(out_files[i], map_)
        if out_trafos:
            pms.TransformationXMLFile().store(out_trafos[i], trafo)

        plog.setProgress(i+1)

    plog.endProgress()
def align(in_files, out_files, out_trafos, reference_index, reference_file,
          params):

    in_types = set(pms.FileHandler.getType(in_) for in_ in in_files)

    if in_types <= set((pms.Type.MZML, pms.Type.MZXML, pms.Type.MZDATA)):
        align_features = False
    elif in_types == set((pms.Type.FEATUREXML, )):
        align_features = True
    else:
        raise Exception("different kinds of input files")

    algorithm = pms.MapAlignmentAlgorithmPoseClustering()
    alignment_params = params.copy("algorithm:", True)
    algorithm.setParameters(alignment_params)
    algorithm.setLogType(pms.LogType.CMD)

    plog = pms.ProgressLogger()
    plog.setLogType(pms.LogType.CMD)

    if reference_file:
        file_ = reference_file
    elif reference_index > 0:
        file_ = in_files[reference_index - 1]
    else:
        sizes = []
        if align_features:
            fh = pms.FeatureXMLFile()
            plog.startProgress(0, len(in_files), "Determine Reference map")
            for i, in_f in enumerate(in_files):
                sizes.append((fh.loadSize(in_f), in_f))
                plog.setProgress(i)
        else:
            fh = pms.MzMLFile()
            mse = pms.MSExperiment()
            plog.startProgress(0, len(in_files), "Determine Reference map")
            for i, in_f in enumerate(in_files):
                fh.load(in_f, mse)
                mse.updateRanges()
                sizes.append((mse.getSize(), in_f))
                plog.setProgress(i)
        plog.endProgress()
        __, file_ = max(sizes)

    f_fmxl = pms.FeatureXMLFile()
    if not out_files:
        options = f_fmxl.getOptions()
        options.setLoadConvexHull(False)
        options.setLoadSubordinates(False)
        f_fmxl.setOptions(options)

    if align_features:
        map_ref = pms.FeatureMap()
        f_fxml_tmp = pms.FeatureXMLFile()
        options = f_fmxl.getOptions()
        options.setLoadConvexHull(False)
        options.setLoadSubordinates(False)
        f_fxml_tmp.setOptions(options)
        f_fxml_tmp.load(file_, map_ref)
        algorithm.setReference(map_ref)
    else:
        map_ref = pms.MSExperiment()
        pms.MzMLFile().load(file_, map_ref)
        algorithm.setReference(map_ref)

    plog.startProgress(0, len(in_files), "Align input maps")
    for i, in_file in enumerate(in_files):
        trafo = pms.TransformationDescription()
        if align_features:
            map_ = pms.FeatureMap()
            f_fxml_tmp = pms.FeatureXMLFile()
            f_fxml_tmp.setOptions(f_fmxl.getOptions())
            f_fxml_tmp.load(in_file, map_)
            if in_file == file_:
                trafo.fitModel("identity")
            else:
                algorithm.align(map_, trafo)
            if out_files:
                pms.MapAlignmentTransformer.transformSingleFeatureMap(
                    map_, trafo)
                addDataProcessing(map_, params, pms.ProcessingAction.ALIGNMENT)
                f_fxml_tmp.store(out_files[i], map_)
        else:
            map_ = pms.MSExperiment()
            pms.MzMLFile().load(in_file, map_)
            if in_file == file_:
                trafo.fitModel("identity")
            else:
                algorithm.align(map_, trafo)
            if out_files:
                pms.MapAlignmentTransformer.transformSinglePeakMap(map_, trafo)
                addDataProcessing(map_, params, pms.ProcessingAction.ALIGNMENT)
                pms.MzMLFile().store(out_files[i], map_)
        if out_trafos:
            pms.TransformationXMLFile().store(out_trafos[i], trafo)

        plog.setProgress(i + 1)

    plog.endProgress()
def link(in_files, out_file, keep_subelements, params):

    in_types = set(pms.FileHandler.getType(in_) for in_ in in_files)

    if in_types == set((pms.Type.CONSENSUSXML, )):
        link_features = False
    elif in_types == set((pms.Type.FEATUREXML, )):
        link_features = True
    else:
        raise Exception("different kinds of input files")

    algorithm_parameters = params.copy("algorithm:", True)
    algorithm = pms.FeatureGroupingAlgorithmQT()
    algorithm.setParameters(algorithm_parameters)

    out_map = pms.ConsensusMap()
    fds = out_map.getColumnHeaders()
    if link_features:
        f = pms.FeatureXMLFile()
        maps = []
        for i, in_file in enumerate(in_files):
            map_ = pms.FeatureMap()
            f.load(in_file, map_)

            # set filedescriptions
            fd = fds.get(i, pms.ColumnHeader())
            fd.filename = in_file
            fd.size = map_.size()
            fd.unique_id = map_.getUniqueId()
            fds[i] = fd
            maps.append(map_)
        out_map.setColumnHeaders(fds)
        algorithm.group(maps, out_map)
    else:
        f = pms.ConsensusXMLFile()
        maps = []
        for i, in_file in enumerate(in_files):
            map_ = pms.ConsensusMap()
            f.load(in_file, map_)
            maps.append(map_)
        algorithm.group(maps, out_map)

        if not keep_subelements:
            for i in range(len(in_files)):
                # set filedescriptions
                fd = fds.get(i, pms.ColumnHeader())
                fd.filename = in_files[i]
                fd.size = maps[i].size()
                fd.unique_id = maps[i].getUniqueId()
                fds[i] = fd
            out_map.setColumnHeaders(fds)
        else:
            algorithm.transferSubelements(maps, out_map)

    out_map.setUniqueIds()
    addDataProcessing(out_map, params,
                      pms.DataProcessing.ProcessingAction.FEATURE_GROUPING)

    pms.ConsensusXMLFile().store(out_file, out_map)

    sizes = []
    for feat in out_map:
        sizes.append(feat.size())

    c = Counter(sizes)
    print "Number of consensus features:"
    for size, count in c.most_common():
        print "   of size %2d : %6d" % (size, count)
    print "        total : %6d" % out_map.size()
def link(in_files, out_file, keep_subelements, params):

    in_types = set(pms.FileHandler.getType(in_) for in_ in in_files)

    if in_types == set((pms.Type.CONSENSUSXML,)):
        link_features = False
    elif in_types == set((pms.Type.FEATUREXML,)):
        link_features = True
    else:
        raise Exception("different kinds of input files")

    algorithm_parameters = params.copy("algorithm:", True)
    algorithm = pms.FeatureGroupingAlgorithmQT()
    algorithm.setParameters(algorithm_parameters)

    out_map = pms.ConsensusMap()
    fds = out_map.getFileDescriptions()
    if link_features:
        f = pms.FeatureXMLFile()
        maps = []
        for i, in_file in enumerate(in_files):
            map_ = pms.FeatureMap()
            f.load(in_file, map_)

            # set filedescriptions
            fd = fds.get(i, pms.FileDescription())
            fd.filename = in_file
            fd.size = map_.size()
            fd.unique_id = map_.getUniqueId()
            fds[i] = fd
            maps.append(map_)
        out_map.setFileDescriptions(fds)
        algorithm.group(maps, out_map)
    else:
        f = pms.ConsensusXMLFile()
        maps = []
        for i, in_file in enumerate(in_files):
            map_ = pms.ConsensusMap()
            f.load(in_file, map_)
            maps.append(map_)
        algorithm.group(maps, out_map)

        if not keep_subelements:
            for i in range(len(in_files)):
                # set filedescriptions
                fd = fds.get(i, pms.FileDescription())
                fd.filename = in_files[i]
                fd.size = maps[i].size()
                fd.unique_id = maps[i].getUniqueId()
                fds[i] = fd
            out_map.setFileDescriptions(fds)
        else:
            algorithm.transferSubelements(maps, out_map)

    out_map.setUniqueIds()
    addDataProcessing(out_map, params, pms.ProcessingAction.FEATURE_GROUPING)

    pms.ConsensusXMLFile().store(out_file, out_map)

    sizes = []
    for feat in out_map:
        sizes.append(feat.size())

    c = Counter(sizes)
    print "Number of consensus features:"
    for size, count in c.most_common():
        print "   of size %2d : %6d" % (size, count)
    print "        total : %6d" % out_map.size()