Beispiel #1
0
def testMapAlignerAlgorithmPoseClustering():
    """
    @tests:
     MapAlignmentAlgorithmPoseClustering.__init__
     MapAlignmentAlgorithmPoseClustering.alignFeatureMaps
     MapAlignmentAlgorithmPoseClustering.alignPeakMaps
     MapAlignmentAlgorithmPoseClustering.fitModel
     MapAlignmentAlgorithmPoseClustering.getDefaults
     MapAlignmentAlgorithmPoseClustering.getName
     MapAlignmentAlgorithmPoseClustering.getParameters
     MapAlignmentAlgorithmPoseClustering.setName
     MapAlignmentAlgorithmPoseClustering.setParameters
     MapAlignmentAlgorithmPoseClustering.setReference
     MapAlignmentAlgorithmPoseClustering.transformFeatureMaps
     MapAlignmentAlgorithmPoseClustering.transformPeakMaps
     """
    ma = pyopenms.MapAlignmentAlgorithmPoseClustering()
    assert isinstance(ma.getDefaults(), pyopenms.Param)
    assert isinstance(ma.getParameters(), pyopenms.Param)
    assert isinstance(ma.getName(), str)

    ma.setReference(0, "")
    ma.alignFeatureMaps
    ma.alignPeakMaps
    pyopenms.MapAlignmentAlgorithmPoseClustering.transformPeakMaps
    pyopenms.MapAlignmentAlgorithmPoseClustering.transformFeatureMaps
def getDefaultParameters():
    model_param = getModelDefaults("linear")
    algo_param = pms.MapAlignmentAlgorithmPoseClustering().getParameters()
    default = pms.Param()
    default.insert("model:", model_param)
    default.insert("algorithm:", algo_param)
    return default
def align(in_files, out_files, out_trafos, reference_index, reference_file,
          params):

    in_types = set(pms.FileHandler.getType(in_) for in_ in in_files)

    if in_types <= set((pms.Type.MZML, pms.Type.MZXML, pms.Type.MZDATA)):
        align_features = False
    elif in_types == set((pms.Type.FEATUREXML, )):
        align_features = True
    else:
        raise Exception("different kinds of input files")

    algorithm = pms.MapAlignmentAlgorithmPoseClustering()
    alignment_params = params.copy("algorithm:", True)
    algorithm.setParameters(alignment_params)
    algorithm.setLogType(pms.LogType.CMD)

    plog = pms.ProgressLogger()
    plog.setLogType(pms.LogType.CMD)

    if reference_file:
        file_ = reference_file
    elif reference_index > 0:
        file_ = in_files[reference_index - 1]
    else:
        sizes = []
        if align_features:
            fh = pms.FeatureXMLFile()
            plog.startProgress(0, len(in_files), "Determine Reference map")
            for i, in_f in enumerate(in_files):
                sizes.append((fh.loadSize(in_f), in_f))
                plog.setProgress(i)
        else:
            fh = pms.MzMLFile()
            mse = pms.MSExperiment()
            plog.startProgress(0, len(in_files), "Determine Reference map")
            for i, in_f in enumerate(in_files):
                fh.load(in_f, mse)
                mse.updateRanges()
                sizes.append((mse.getSize(), in_f))
                plog.setProgress(i)
        plog.endProgress()
        __, file_ = max(sizes)

    f_fmxl = pms.FeatureXMLFile()
    if not out_files:
        options = f_fmxl.getOptions()
        options.setLoadConvexHull(False)
        options.setLoadSubordinates(False)
        f_fmxl.setOptions(options)

    if align_features:
        map_ref = pms.FeatureMap()
        f_fxml_tmp = pms.FeatureXMLFile()
        options = f_fmxl.getOptions()
        options.setLoadConvexHull(False)
        options.setLoadSubordinates(False)
        f_fxml_tmp.setOptions(options)
        f_fxml_tmp.load(file_, map_ref)
        algorithm.setReference(map_ref)
    else:
        map_ref = pms.MSExperiment()
        pms.MzMLFile().load(file_, map_ref)
        algorithm.setReference(map_ref)

    plog.startProgress(0, len(in_files), "Align input maps")
    for i, in_file in enumerate(in_files):
        trafo = pms.TransformationDescription()
        if align_features:
            map_ = pms.FeatureMap()
            f_fxml_tmp = pms.FeatureXMLFile()
            f_fxml_tmp.setOptions(f_fmxl.getOptions())
            f_fxml_tmp.load(in_file, map_)
            if in_file == file_:
                trafo.fitModel("identity")
            else:
                algorithm.align(map_, trafo)
            if out_files:
                pms.MapAlignmentTransformer.transformSingleFeatureMap(
                    map_, trafo)
                addDataProcessing(map_, params, pms.ProcessingAction.ALIGNMENT)
                f_fxml_tmp.store(out_files[i], map_)
        else:
            map_ = pms.MSExperiment()
            pms.MzMLFile().load(in_file, map_)
            if in_file == file_:
                trafo.fitModel("identity")
            else:
                algorithm.align(map_, trafo)
            if out_files:
                pms.MapAlignmentTransformer.transformSinglePeakMap(map_, trafo)
                addDataProcessing(map_, params, pms.ProcessingAction.ALIGNMENT)
                pms.MzMLFile().store(out_files[i], map_)
        if out_trafos:
            pms.TransformationXMLFile().store(out_trafos[i], trafo)

        plog.setProgress(i + 1)

    plog.endProgress()
Beispiel #4
0
def align(in_files, out_files, trafo_out_files, reference_index,
        reference_file, params):

    algo = pms.MapAlignmentAlgorithmPoseClustering()
    algo.setReference(reference_index, reference_file)

    model_params = params.copy("model:", True)
    model_type   = model_params.getValue("type").toString()

    pl = pms.ProgressLogger()
    pl.setLogType(pms.LogType.CMD)

    alignment_param = params.copy("algorithm:", True)

    algo.setParameters(alignment_param)

    transformations = []

    in_types = set(pms.FileHandler.getType(in_file) for in_file in in_files)
    in_maps = []
    if in_types <= set((pms.Type.MZML, pms.Type.MZXML, pms.Type.MZDATA)):
        fh = pms.FileHandler()
        pl.startProgress(0, len(in_files), "loading input files")
        for i, in_file in enumerate(in_files):
            pl.setProgress(i)
            pm = pms.MSExperiment()
            fh.loadExperiment(in_file, pm)
            in_maps.append(pm)
        pl.endProgress()
        algo.alignPeakMaps(in_maps, transformations)
        if model_type != "none":
            algo.fitModel(model_type, model_params, transformations)
        pms.MapAlignmentAlgorithmPoseClustering.transformPeakMaps(in_maps, transformations)
        pl.startProgress(0, len(out_files), "writing output files")
        for i, out_file in enumerate(out_files):
            pl.setProgress(i)
            in_map = addDataProcessing(in_maps[i], params)
            fh.storeExperiment(out_file, in_map)
        pl.endProgress()

    elif in_types == set((pms.Type.FEATUREXML,)):
        fh = pms.FeatureXMLFile()
        pl.startProgress(0, len(in_files), "loading input files")
        for i, in_file in enumerate(in_files):
            pl.setProgress(i)
            pm = pms.FeatureMap()
            fh.load(in_file, pm)
            in_maps.append(pm)
        pl.endProgress()
        algo.alignFeatureMaps(in_maps, transformations)
        if model_type != "none":
            algo.fitModel(model_type, model_params, transformations)
        pms.MapAlignmentAlgorithmPoseClustering.transformFeatureMaps(in_maps, transformations)
        pl.startProgress(0, len(out_files), "writing output files")
        for i, out_file in enumerate(out_files):
            pl.setProgress(i)
            in_map = addDataProcessing(in_maps[i], params)
            fh.store(out_file, in_map)
        pl.endProgress()

    else:
        raise Exception("can not handle input file format")

    if trafo_out_files:
        for name, trafo in zip(trafo_out_files, transformations):
            pms.TransformationXMLFile().store(name, trafo)
Beispiel #5
0
def rtAlign(tables, refTable = None, destination = None, nPeaks=-1,
            numBreakpoints=5, maxRtDifference = 100, maxMzDifference = 0.3,
            maxMzDifferencePairfinder = 0.5, forceAlign=False):

    """ aligns feature tables in respect to retention times.
        the algorithm produces new tables with aligned data.
        **input tables including the assiciatoted peakmap(s) are not modified**.

        Parameters:

            - *nPeaks*: max number of peaks matched by superimposer, -1
              means: all peaks

            - *maxRtDifference*: max allowed difference in rt values for
              searching matching features.

            - *maxMzDifference*: max allowed difference in mz values for
              super imposer.

            - *maxMzDifferencePairfinder*: max allowed difference in mz values
              for pair finding.

            - *numBreakpoints*: number of break points of fitted spline.
              default:5, more points result in splines with higher variation.

            - *forceAlign*: has to be *True* to align already rt aligned tables.

            - *refTable*: extra reference table, if *None* the table
              with most features among *tables* is taken.
    """

    import os.path
    import pyopenms
    import copy
    from  libms.DataStructures.Table import toOpenMSFeatureMap, Table
    import custom_dialogs

    assert refTable is None or isinstance(refTable, Table)
    assert destination is None or isinstance(destination, basestring)

    for table in tables:
        # collect all maps
        maps = set(table.peakmap.values)
        assert len(maps) == 1, "can only align features from one single peakmap"
        map = maps.pop()
        assert map != None, "None value for peakmaps not allowed"
        if forceAlign:
            map.meta["rt_aligned"]=False
        else:
            if map.meta.get("rt_aligned"):
                message = "there are already rt_aligned peakmaps in the "\
                          "tables.\nyou have to provide the forceAlign "\
                          "parameter of this function\nto align all tables"
                raise Exception(message)
        assert isinstance(table, Table), "non table object in tables"
        table.requireColumn("mz"), "need mz column for alignment"
        table.requireColumn("rt"), "need rt column for alignment"

    if destination is None:
        destination = custom_dialogs.askForDirectory()
        if destination is None:
            print "aborted"
            return

    if refTable is not None:
        maps = set(refTable.peakmap.values)
        assert len(maps) == 1, "can only align features from one single peakmap"
        map = maps.pop()
        assert map != None, "None value for peakmaps not allowed"
        refTable.requireColumn("mz"), "need mz column in reftable"
        refTable.requireColumn("rt"), "need rt column in reftable"

    assert os.path.isdir(os.path.abspath(destination)), "target is no directory"

    # setup algorithm
    algo = pyopenms.MapAlignmentAlgorithmPoseClustering()
    algo.setLogType(pyopenms.LogType.CMD)

    ap = algo.getDefaults()
    ap["max_num_peaks_considered"] = nPeaks
    ap["superimposer:num_used_points"] = nPeaks
    ap["superimposer:mz_pair_max_distance"] = float(maxMzDifferencePairfinder)
    ap["pairfinder:distance_RT:max_difference"] = float(maxRtDifference)
    ap["pairfinder:distance_MZ:max_difference"] = float(maxMzDifference)
    ap["pairfinder:distance_MZ:unit"] = "Da"
    algo.setParameters(ap)

    # convert to pyOpenMS types and find map with max num features which
    # is taken as refamp:
    fms = [ (toOpenMSFeatureMap(table), table) for table in tables]
    if refTable is None:
        refMap, refTable = max(fms, key=lambda (fm, t): fm.size())
        print
        print "REFMAP IS",
        print os.path.basename(refTable.meta.get("source","<noname>"))
    else:
        if refTable in tables:
            refMap = fms[tables.index(refTable)][0]
        else:
            refMap = toOpenMSFeatureMap(refTable)
    results = []
    for fm, table in fms:
        # we do not modify existing table inkl. peakmaps: (rt-values
        # might change below in _transformTable) !
        table = copy.deepcopy(table)
        if fm is refMap:
            results.append(table)
            continue
        sources = set(table.source.values)
        assert len(sources)==1, "multiple sources in table"
        source = sources.pop()
        filename = os.path.basename(source)
        print
        print "ALIGN FEATURES FROM ", filename
        print
        transformation = _computeTransformation(algo, refMap, fm, numBreakpoints)
        _plot_and_save(transformation, filename, destination)
        _transformTable(table, transformation)
        results.append(table)
    for t in results:
        t.meta["rt_aligned"] = True
    return results
Beispiel #6
0
 def __init__(self, **kwargs):
     
     super(MAEntity, self).__init__(
         oms.MapAlignmentAlgorithmPoseClustering(),
         **kwargs,
     )
Beispiel #7
0
def align_feature_xmls(feature_xml_lis,
                       consensus_map_out_path="",
                       class_label_dict={}):
    """
    first apply pose clustering to include all features maps
      next link/group them across all features

    Each MS1 spectrum from raw-file will create a feature file -
    we need to load and align them to get unique and representative features
    :param feature_xml_lis:
    :param consensus_map_out_path:
    :return: consensus_map, consensus_map_out_path, measurement_names
    """
    # do consensus map normalization and export -
    # can't hack normalization together from lack of example usage and poor signature
    #   - no normalization implemented

    # openms won't deal with posix paths - wants to have strings instead
    # need to make sure it get's those
    # let's sort them to make sure feature matrix is also sorted
    feature_xml_lis = sorted([str(fx) for fx in feature_xml_lis])

    num_features_list = []
    for current_feature_xml_path in feature_xml_lis:
        # load features into FeatureMaps
        cm = oms.FeatureMap()  # current_map
        oms.FeatureXMLFile().load(current_feature_xml_path, cm)
        # list_functions(current_map, prefix="")
        num_features_list.append(cm.size())
        del cm

    # should choose the feature file / experiment with most features as reference
    max_index = np.argmax(num_features_list)
    reference_map_path = feature_xml_lis[max_index]

    default_max_num_peaks_considered = 1000
    default_max_scaling_value = 10.0
    aligned_paths = []
    for i, current_feature_xml_path in enumerate(feature_xml_lis):
        # load features into FeatureMaps
        reference_map = oms.FeatureMap(
        )  # pairwise alignment - so need master map -
        oms.FeatureXMLFile().load(reference_map_path, reference_map)

        current_map = oms.FeatureMap()
        oms.FeatureXMLFile().load(current_feature_xml_path, current_map)

        # create a transformation description required as init for aligner
        transformation_description = oms.TransformationDescription()

        # adjust max scaling parameter otherwise leads to error when running with algae samples
        # adjust max num peaks to 2k - also would leads to error when running with algae samples

        aligner = oms.MapAlignmentAlgorithmPoseClustering()
        aligner_params = aligner.getParameters()

        # print(aligner_params.asDict().keys())
        max_scaling_key = b'superimposer:max_scaling'
        # aligner_params.getEntry(max_scaling_key)
        aligner_params.setValue(max_scaling_key, default_max_scaling_value)

        max_num_peaks_key = b'max_num_peaks_considered'
        # aligner_params.getEntry(max_num_peaks_key)
        aligner_params.setValue(
            max_num_peaks_key,
            default_max_num_peaks_considered)  # default = 1000
        # need higher default for algae

        # decrease runtime by removing weak signals
        # print(aligner_params.asDict())
        num_used_points_key = b'superimposer:num_used_points'
        # aligner_params.getEntry(num_used_points_key)
        aligner_params.setValue(
            num_used_points_key,
            1000)  # half the default parameter, speed up alignment

        aligner.setParameters(aligner_params)

        aligner.setReference(reference_map)

        try:
            # run alignment
            aligner.align(current_map, transformation_description)
        except RuntimeError as re:
            if 'max_num_peaks_considered' in str(re):
                # retry with higher threshold - required for algae dataset
                default_max_num_peaks_considered = 15000  # 15 fold - makes it a lot slower but less error prone
                aligner_params.setValue(max_num_peaks_key,
                                        default_max_num_peaks_considered)
                default_max_scaling_value = 20.0  # need to increase to 20
                aligner_params.setValue(max_scaling_key,
                                        default_max_scaling_value)

                # max shift could also be off - issue for ckd dataset
                default_max_shift_value = 2000.0  # need to increase from 1000 to 2000
                max_shift_key = b'superimposer:max_shift'
                aligner_params.setValue(max_shift_key, default_max_shift_value)

                print(
                    f"Encountered GC/MS Clustering issue - setting 'max_num_peaks_considered' to {default_max_num_peaks_considered}, 'superimposer:max_scaling' to {default_max_scaling_value} and 'superimposer:max_shift' to {default_max_shift_value}"
                )
                aligner.setParameters(aligner_params)
                aligner.setReference(reference_map)
                aligner.align(current_map, transformation_description)

        current_map.updateRanges()
        reference_map.updateRanges()

        # update feature XML files - both reference and current
        updated_current_map_path = default_store_aligned_feature_xml(
            current_map, current_feature_xml_path)
        updated_reference_path = default_store_aligned_feature_xml(
            reference_map, reference_map_path)
        reference_map_path = updated_reference_path

        aligned_paths.append(updated_current_map_path)
        print(f"Finished alignment of {i}/{len(feature_xml_lis)-1}")

    # also replace here with new reference we updated the reference map to
    aligned_paths[max_index] = reference_map_path

    #   link/group them across features to create consensus map

    grouper = oms.FeatureGroupingAlgorithmUnlabeled()
    # leave parameters default

    # according to openms documentation:
    #   b) Call "setReference", "addToGroup" (n times), "getResultMap" in that order.

    for i, current_feature_map_path in enumerate(aligned_paths):
        print(f"Grouping features {i}/{len(aligned_paths)-1}")
        current_map = oms.FeatureMap()
        oms.FeatureXMLFile().load(current_feature_map_path, current_map)

        if not i:
            # first iteration - use as reference
            grouper.setReference(i, current_map)

        else:
            grouper.addToGroup(i, current_map)

    # get consensus map
    consensus_map = grouper.getResultMap()

    # consensus map requires some mapping between ids and filenames - otherwise will complain
    print(f"Mapping aligned results back to class labels")
    class_label_fns = list(class_label_dict.keys())
    fds = {i: oms.ColumnHeader() for i, _ in enumerate(aligned_paths)}
    measurement_names = []
    for i, aligned_path in enumerate(aligned_paths):
        # fds[i].filename = b"file0"
        current_fn = f"{str(Path(aligned_path).stem)}{str(Path(aligned_path).suffix)}"

        # this is where we need to replace the feature_xml filenames with the ones from class_labels
        if class_label_dict:
            # could do longest substring match with each of the fns in class_label dict to find matching filename
            #   django will rename duplicate filenames instead of overwriting
            # or we expect both featureXML input and class_label_dict to be ordered - which they should be when using the getter
            fds[i].filename = class_label_fns[i]

        else:
            fds[i].filename = current_fn.encode(
                "UTF8")  # needs bytestring representation

        measurement_names.append(current_fn)

    consensus_map.setColumnHeaders(fds)

    #  cleanup aligned_feature_xmls - can be >30mb per file - so better remove them
    for ap in aligned_paths:
        os.remove(ap)

    #   do consensus map normalization and export to consensus files
    # using median normalization, also available are Quantile and "robust regression"
    normalizer = oms.ConsensusMapNormalizerAlgorithmMedian()

    # ConsensusMapNormalizerAlgorithmMedian
    # signature of class is more than incomplete ... *args **kwargs for required parameters is not the best implementation choice...
    # but gives TypeError requiring int when calling with
    # normalizer.normalizeMaps(consensus_map, "NM_SCALE", "", "") #
    """
    normalizer.normalizeMaps(map, method, acc_filter, desc_filter)
    map	ConsensusMap
    method	whether to use scaling or shifting to same median 
    acc_filter	string describing the regular expression for filtering accessions
    desc_filter	string describing the regular expression for filtering descriptions 
    """
    """
        method: probably 0 / 1 - referenced as Enumerator in OpenMS documentation
        from shell output can deduce normalization methods are
        0: NM_SCALE 	scale to same median using division/multiplication  
        1: NM_SHIFT 	shift using subtraction/addition
    """
    normalizer.normalizeMaps(consensus_map, 0, "", "")

    # don't export if not required - requires more file management
    # now export
    if consensus_map_out_path:
        print("Storing consensus xml")
        oms.ConsensusXMLFile().store(str(consensus_map_out_path),
                                     consensus_map)

    return consensus_map, measurement_names
Beispiel #8
0
    def choose_ma_algorithm(self, **kwargs):

        #create map alignment algorithm
        self.ma_algorithm = oms.MapAlignmentAlgorithmPoseClustering()