Python ConsensusXMLFile Examples

Programming Language: Python

Namespace/Package Name: pyopenms

Method/Function: ConsensusXMLFile

Examples at hotexamples.com: 3

Python ConsensusXMLFile - 3 examples found. These are the top rated real world Python examples of pyopenms.ConsensusXMLFile extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def id_mapper(in_file, id_file, out_file, params, use_centroid_rt,
              use_centroid_mz, use_subelements):

    in_type = pms.FileHandler.getType(in_file)

    protein_ids = []
    peptide_ids = []

    pms.IdXMLFile().load(id_file, protein_ids, peptide_ids)

    mapper = pms.IDMapper()
    mapper.setParameters(params)

    if in_type == pms.Type.CONSENSUSXML:
        file_ = pms.ConsensusXMLFile()
        map_ = pms.ConsensusMap()
        file_.load(in_file, map_)
        mapper.annotate(map_, peptide_ids, protein_ids, use_subelements)
        addDataProcessing(
            map_, params,
            pms.DataProcessing.ProcessingAction.IDENTIFICATION_MAPPING)
        file_.store(out_file, map_)

    elif in_type == pms.Type.FEATUREXML:
        file_ = pms.FeatureXMLFile()
        map_ = pms.FeatureMap()
        file_.load(in_file, map_)
        mapper.annotate(map_, peptide_ids, protein_ids, use_centroid_rt,
                        use_centroid_mz)
        addDataProcessing(
            map_, params,
            pms.DataProcessing.ProcessingAction.IDENTIFICATION_MAPPING)
        file_.store(out_file, map_)

    elif in_type == pms.Type.MZQ:
        file_ = pms.MzQuantMLFile()
        msq = pms.MSQuantifications()
        file_.load(in_file, msq)
        maps = msq.getConsensusMaps()
        for map_ in maps:
            mapper.annotate(map_, peptide_ids, protein_ids, use_subelements)
            addDataProcessing(
                map_, params,
                pms.DataProcessing.ProcessingAction.IDENTIFICATION_MAPPING)
        msq.setConsensusMaps(maps)
        file_.store(out_file, msq)

    else:
        raise Exception("invalid input file format")

Example #2

Show file

File: FeatureLinkerUnlabeledQT.py Project: Roestlab/OpenMS

def link(in_files, out_file, keep_subelements, params):

    in_types = set(pms.FileHandler.getType(in_) for in_ in in_files)

    if in_types == set((pms.Type.CONSENSUSXML, )):
        link_features = False
    elif in_types == set((pms.Type.FEATUREXML, )):
        link_features = True
    else:
        raise Exception("different kinds of input files")

    algorithm_parameters = params.copy("algorithm:", True)
    algorithm = pms.FeatureGroupingAlgorithmQT()
    algorithm.setParameters(algorithm_parameters)

    out_map = pms.ConsensusMap()
    fds = out_map.getColumnHeaders()
    if link_features:
        f = pms.FeatureXMLFile()
        maps = []
        for i, in_file in enumerate(in_files):
            map_ = pms.FeatureMap()
            f.load(in_file, map_)

            # set filedescriptions
            fd = fds.get(i, pms.ColumnHeader())
            fd.filename = in_file
            fd.size = map_.size()
            fd.unique_id = map_.getUniqueId()
            fds[i] = fd
            maps.append(map_)
        out_map.setColumnHeaders(fds)
        algorithm.group(maps, out_map)
    else:
        f = pms.ConsensusXMLFile()
        maps = []
        for i, in_file in enumerate(in_files):
            map_ = pms.ConsensusMap()
            f.load(in_file, map_)
            maps.append(map_)
        algorithm.group(maps, out_map)

        if not keep_subelements:
            for i in range(len(in_files)):
                # set filedescriptions
                fd = fds.get(i, pms.ColumnHeader())
                fd.filename = in_files[i]
                fd.size = maps[i].size()
                fd.unique_id = maps[i].getUniqueId()
                fds[i] = fd
            out_map.setColumnHeaders(fds)
        else:
            algorithm.transferSubelements(maps, out_map)

    out_map.setUniqueIds()
    addDataProcessing(out_map, params,
                      pms.DataProcessing.ProcessingAction.FEATURE_GROUPING)

    pms.ConsensusXMLFile().store(out_file, out_map)

    sizes = []
    for feat in out_map:
        sizes.append(feat.size())

    c = Counter(sizes)
    print "Number of consensus features:"
    for size, count in c.most_common():
        print "   of size %2d : %6d" % (size, count)
    print "        total : %6d" % out_map.size()

Example #3

Show file

File: GCMSTools.py Project: philmaweb/breathpy

def align_feature_xmls(feature_xml_lis,
                       consensus_map_out_path="",
                       class_label_dict={}):
    """
    first apply pose clustering to include all features maps
      next link/group them across all features

    Each MS1 spectrum from raw-file will create a feature file -
    we need to load and align them to get unique and representative features
    :param feature_xml_lis:
    :param consensus_map_out_path:
    :return: consensus_map, consensus_map_out_path, measurement_names
    """
    # do consensus map normalization and export -
    # can't hack normalization together from lack of example usage and poor signature
    #   - no normalization implemented

    # openms won't deal with posix paths - wants to have strings instead
    # need to make sure it get's those
    # let's sort them to make sure feature matrix is also sorted
    feature_xml_lis = sorted([str(fx) for fx in feature_xml_lis])

    num_features_list = []
    for current_feature_xml_path in feature_xml_lis:
        # load features into FeatureMaps
        cm = oms.FeatureMap()  # current_map
        oms.FeatureXMLFile().load(current_feature_xml_path, cm)
        # list_functions(current_map, prefix="")
        num_features_list.append(cm.size())
        del cm

    # should choose the feature file / experiment with most features as reference
    max_index = np.argmax(num_features_list)
    reference_map_path = feature_xml_lis[max_index]

    default_max_num_peaks_considered = 1000
    default_max_scaling_value = 10.0
    aligned_paths = []
    for i, current_feature_xml_path in enumerate(feature_xml_lis):
        # load features into FeatureMaps
        reference_map = oms.FeatureMap(
        )  # pairwise alignment - so need master map -
        oms.FeatureXMLFile().load(reference_map_path, reference_map)

        current_map = oms.FeatureMap()
        oms.FeatureXMLFile().load(current_feature_xml_path, current_map)

        # create a transformation description required as init for aligner
        transformation_description = oms.TransformationDescription()

        # adjust max scaling parameter otherwise leads to error when running with algae samples
        # adjust max num peaks to 2k - also would leads to error when running with algae samples

        aligner = oms.MapAlignmentAlgorithmPoseClustering()
        aligner_params = aligner.getParameters()

        # print(aligner_params.asDict().keys())
        max_scaling_key = b'superimposer:max_scaling'
        # aligner_params.getEntry(max_scaling_key)
        aligner_params.setValue(max_scaling_key, default_max_scaling_value)

        max_num_peaks_key = b'max_num_peaks_considered'
        # aligner_params.getEntry(max_num_peaks_key)
        aligner_params.setValue(
            max_num_peaks_key,
            default_max_num_peaks_considered)  # default = 1000
        # need higher default for algae

        # decrease runtime by removing weak signals
        # print(aligner_params.asDict())
        num_used_points_key = b'superimposer:num_used_points'
        # aligner_params.getEntry(num_used_points_key)
        aligner_params.setValue(
            num_used_points_key,
            1000)  # half the default parameter, speed up alignment

        aligner.setParameters(aligner_params)

        aligner.setReference(reference_map)

        try:
            # run alignment
            aligner.align(current_map, transformation_description)
        except RuntimeError as re:
            if 'max_num_peaks_considered' in str(re):
                # retry with higher threshold - required for algae dataset
                default_max_num_peaks_considered = 15000  # 15 fold - makes it a lot slower but less error prone
                aligner_params.setValue(max_num_peaks_key,
                                        default_max_num_peaks_considered)
                default_max_scaling_value = 20.0  # need to increase to 20
                aligner_params.setValue(max_scaling_key,
                                        default_max_scaling_value)

                # max shift could also be off - issue for ckd dataset
                default_max_shift_value = 2000.0  # need to increase from 1000 to 2000
                max_shift_key = b'superimposer:max_shift'
                aligner_params.setValue(max_shift_key, default_max_shift_value)

                print(
                    f"Encountered GC/MS Clustering issue - setting 'max_num_peaks_considered' to {default_max_num_peaks_considered}, 'superimposer:max_scaling' to {default_max_scaling_value} and 'superimposer:max_shift' to {default_max_shift_value}"
                )
                aligner.setParameters(aligner_params)
                aligner.setReference(reference_map)
                aligner.align(current_map, transformation_description)

        current_map.updateRanges()
        reference_map.updateRanges()

        # update feature XML files - both reference and current
        updated_current_map_path = default_store_aligned_feature_xml(
            current_map, current_feature_xml_path)
        updated_reference_path = default_store_aligned_feature_xml(
            reference_map, reference_map_path)
        reference_map_path = updated_reference_path

        aligned_paths.append(updated_current_map_path)
        print(f"Finished alignment of {i}/{len(feature_xml_lis)-1}")

    # also replace here with new reference we updated the reference map to
    aligned_paths[max_index] = reference_map_path

    #   link/group them across features to create consensus map

    grouper = oms.FeatureGroupingAlgorithmUnlabeled()
    # leave parameters default

    # according to openms documentation:
    #   b) Call "setReference", "addToGroup" (n times), "getResultMap" in that order.

    for i, current_feature_map_path in enumerate(aligned_paths):
        print(f"Grouping features {i}/{len(aligned_paths)-1}")
        current_map = oms.FeatureMap()
        oms.FeatureXMLFile().load(current_feature_map_path, current_map)

        if not i:
            # first iteration - use as reference
            grouper.setReference(i, current_map)

        else:
            grouper.addToGroup(i, current_map)

    # get consensus map
    consensus_map = grouper.getResultMap()

    # consensus map requires some mapping between ids and filenames - otherwise will complain
    print(f"Mapping aligned results back to class labels")
    class_label_fns = list(class_label_dict.keys())
    fds = {i: oms.ColumnHeader() for i, _ in enumerate(aligned_paths)}
    measurement_names = []
    for i, aligned_path in enumerate(aligned_paths):
        # fds[i].filename = b"file0"
        current_fn = f"{str(Path(aligned_path).stem)}{str(Path(aligned_path).suffix)}"

        # this is where we need to replace the feature_xml filenames with the ones from class_labels
        if class_label_dict:
            # could do longest substring match with each of the fns in class_label dict to find matching filename
            #   django will rename duplicate filenames instead of overwriting
            # or we expect both featureXML input and class_label_dict to be ordered - which they should be when using the getter
            fds[i].filename = class_label_fns[i]

        else:
            fds[i].filename = current_fn.encode(
                "UTF8")  # needs bytestring representation

        measurement_names.append(current_fn)

    consensus_map.setColumnHeaders(fds)

    #  cleanup aligned_feature_xmls - can be >30mb per file - so better remove them
    for ap in aligned_paths:
        os.remove(ap)

    #   do consensus map normalization and export to consensus files
    # using median normalization, also available are Quantile and "robust regression"
    normalizer = oms.ConsensusMapNormalizerAlgorithmMedian()

    # ConsensusMapNormalizerAlgorithmMedian
    # signature of class is more than incomplete ... *args **kwargs for required parameters is not the best implementation choice...
    # but gives TypeError requiring int when calling with
    # normalizer.normalizeMaps(consensus_map, "NM_SCALE", "", "") #
    """
    normalizer.normalizeMaps(map, method, acc_filter, desc_filter)
    map	ConsensusMap
    method	whether to use scaling or shifting to same median 
    acc_filter	string describing the regular expression for filtering accessions
    desc_filter	string describing the regular expression for filtering descriptions 
    """
    """
        method: probably 0 / 1 - referenced as Enumerator in OpenMS documentation
        from shell output can deduce normalization methods are
        0: NM_SCALE 	scale to same median using division/multiplication  
        1: NM_SHIFT 	shift using subtraction/addition
    """
    normalizer.normalizeMaps(consensus_map, 0, "", "")

    # don't export if not required - requires more file management
    # now export
    if consensus_map_out_path:
        print("Storing consensus xml")
        oms.ConsensusXMLFile().store(str(consensus_map_out_path),
                                     consensus_map)

    return consensus_map, measurement_names