Python getDistanceMatrix Examples, msproteomicstoolslib.algorithms.alignment.AlignmentMST.getDistanceMatrix Python Examples

Example #1

0

Show file

File: test_Integration.py Project: yachliu/msproteomicstools

    def test_reference_2(self):

        rid = "0_1"
        self.tr_data.reference = "0_0" # set reference run to 0_0

        spl_aligner = SplineAligner(self.initial_alignment_cutoff)
        tree = MinimumSpanningTree(getDistanceMatrix(self.new_exp, self.multipeptides, spl_aligner))
        tree_mapped = [(self.new_exp.runs[a].get_id(), self.new_exp.runs[b].get_id()) for a,b in tree]

        # Select peakgroups, compute left/right border
        selected_pg = [pg for p in self.current_mpep1.getAllPeptides() for pg in p.get_all_peakgroups() if pg.get_cluster_id() == 1]
        border_l, border_r = integrationBorderReference(self.new_exp, selected_pg, 
            rid, self.tr_data, "median")

        # Reference 0_0 means that we transformed from 0_2 to 0_0 and then to 0_1
        self.assertAlmostEqual(border_l, 
                               self.tr_data.getTrafo("0_0", "0_1").predict(
                                 self.tr_data.getTrafo("0_2", "0_0").predict([ 240.0 ]) 
                               ))
        self.assertAlmostEqual(border_r, 
                               self.tr_data.getTrafo("0_0", "0_1").predict(
                                 self.tr_data.getTrafo("0_2", "0_0").predict([ 260.0 ]) 
                               ))

        self.assertAlmostEqual(border_l, 187.18146718146681)
        self.assertAlmostEqual(border_r, 202.00772200772167)

Example #2

0

Show file

File: test_Integration.py Project: yachliu/msproteomicstools

    def test_reference_1(self):

        rid = "0_0"
        self.tr_data.reference = "0_2" # set reference run to 0_2

        spl_aligner = SplineAligner(self.initial_alignment_cutoff)
        tree = MinimumSpanningTree(getDistanceMatrix(self.new_exp, self.multipeptides, spl_aligner))
        tree_mapped = [(self.new_exp.runs[a].get_id(), self.new_exp.runs[b].get_id()) for a,b in tree]

        # Select peakgroups, compute left/right border
        selected_pg = [pg for p in self.current_mpep1.getAllPeptides() for pg in p.get_all_peakgroups() if pg.get_cluster_id() == 1]
        border_l, border_r = integrationBorderReference(self.new_exp, selected_pg, 
            rid, self.tr_data, "median")

        # Direct transformation from 0_2 to 0_0
        self.assertAlmostEqual(border_l, self.tr_data.getTrafo("0_2", "0_0").predict([ 240.0 ])[0])
        self.assertAlmostEqual(border_r, self.tr_data.getTrafo("0_2", "0_0").predict([ 260.0 ])[0])

        self.assertAlmostEqual(border_l, 77.992277992277934)
        self.assertAlmostEqual(border_r, 84.1698841699)

        border_l, border_r = integrationBorderReference(self.new_exp, selected_pg, 
            rid, self.tr_data, "mean")
        self.assertAlmostEqual(border_l, 77.992277992277934)
        self.assertAlmostEqual(border_r, 84.1698841699)
        border_l, border_r = integrationBorderReference(self.new_exp, selected_pg, 
            rid, self.tr_data, "max_width")
        self.assertAlmostEqual(border_l, 77.992277992277934)
        self.assertAlmostEqual(border_r, 84.1698841699)

        self.assertRaises(Exception, integrationBorderReference, self.new_exp, selected_pg, 
            rid, self.tr_data, "dummy")

Example #3

0

Show file

File: feature_alignment.py Project: jylanwhu/msproteomicstools

def doMSTAlignment(exp, multipeptides, max_rt_diff, rt_diff_isotope,
                   initial_alignment_cutoff, fdr_cutoff, aligned_fdr_cutoff,
                   smoothing_method, method, use_RT_correction,
                   stdev_max_rt_per_run, use_local_stdev, mst_use_ref, force):
    """
    Minimum Spanning Tree (MST) based local aligment 
    """

    spl_aligner = SplineAligner(initial_alignment_cutoff, experiment=exp)

    if mst_use_ref:
        # force reference-based alignment
        bestrun = spl_aligner._determine_best_run(exp)
        ref = spl_aligner._determine_best_run(exp).get_id()
        refrun_id, refrun = [(i, run) for i, run in enumerate(exp.runs)
                             if run.get_id() == ref][0]
        tree = [(i, refrun_id) for i in range(len(exp.runs)) if i != refrun_id]
    else:
        tree = MinimumSpanningTree(
            getDistanceMatrix(exp, multipeptides, spl_aligner))

    print("Computed Tree:", tree)

    # Get alignments
    tr_data = LightTransformationData()
    for edge in tree:
        addDataToTrafo(tr_data,
                       exp.runs[edge[0]],
                       exp.runs[edge[1]],
                       spl_aligner,
                       multipeptides,
                       smoothing_method,
                       max_rt_diff,
                       force=force)

    tree_mapped = [(exp.runs[a].get_id(), exp.runs[b].get_id())
                   for a, b in tree]

    # Perform work
    al = TreeConsensusAlignment(max_rt_diff,
                                fdr_cutoff,
                                aligned_fdr_cutoff,
                                rt_diff_isotope=rt_diff_isotope,
                                correctRT_using_pg=use_RT_correction,
                                stdev_max_rt_per_run=stdev_max_rt_per_run,
                                use_local_stdev=use_local_stdev)

    if method == "LocalMST":
        al.alignBestCluster(multipeptides, tree_mapped, tr_data)
    elif method == "LocalMSTAllCluster":
        al.alignAllCluster(multipeptides, tree_mapped, tr_data)

    # Store number of ambigous cases (e.g. where more than one peakgroup below
    # the strict quality cutoff was found in the RT window) and the number of
    # cases where multiple possibilities were found.
    exp.nr_ambiguous = al.nr_ambiguous
    exp.nr_multiple_align = al.nr_multiple_align

    return tree

Example #4

0

Show file

File: feature_alignment.py Project: aseyboldt/msproteomicstools

def doMSTAlignment(exp, multipeptides, max_rt_diff, rt_diff_isotope, initial_alignment_cutoff,
                   fdr_cutoff, aligned_fdr_cutoff, smoothing_method, method,
                   use_RT_correction, stdev_max_rt_per_run, use_local_stdev, mst_use_ref):
    """
    Minimum Spanning Tree (MST) based local aligment 
    """

    spl_aligner = SplineAligner(initial_alignment_cutoff)

    if mst_use_ref:
        # force reference-based alignment
        bestrun = spl_aligner._determine_best_run(exp)
        ref = spl_aligner._determine_best_run(exp).get_id()
        refrun_id, refrun = [ (i,run) for i, run in enumerate(exp.runs) if run.get_id() == ref][0]
        tree = [( i, refrun_id) for i in range(len(exp.runs)) if i != refrun_id]
    else:
        tree = MinimumSpanningTree(getDistanceMatrix(exp, multipeptides, spl_aligner))

    print("Computed Tree:", tree)

    
    # Get alignments
    tr_data = LightTransformationData()
    for edge in tree:
        addDataToTrafo(tr_data, exp.runs[edge[0]], exp.runs[edge[1]],
                       spl_aligner, multipeptides, smoothing_method,
                       max_rt_diff)

    tree_mapped = [ (exp.runs[a].get_id(), exp.runs[b].get_id()) for a,b in tree]

    # Perform work
    al = TreeConsensusAlignment(max_rt_diff, fdr_cutoff, aligned_fdr_cutoff, 
                                rt_diff_isotope=rt_diff_isotope,
                                correctRT_using_pg=use_RT_correction,
                                stdev_max_rt_per_run=stdev_max_rt_per_run,
                                use_local_stdev=use_local_stdev)

    if method == "LocalMST":
        al.alignBestCluster(multipeptides, tree_mapped, tr_data)
    elif method == "LocalMSTAllCluster":
        al.alignAllCluster(multipeptides, tree_mapped, tr_data)

    # Store number of ambigous cases (e.g. where more than one peakgroup below
    # the strict quality cutoff was found in the RT window) and the number of
    # cases where multiple possibilities were found.
    exp.nr_ambiguous = al.nr_ambiguous
    exp.nr_multiple_align = al.nr_multiple_align

    return tree

Example #5

0

Show file

File: test_Integration.py Project: yachliu/msproteomicstools

    def test_shortestDistance_1(self):

        rid = "0_0"
        spl_aligner = SplineAligner(self.initial_alignment_cutoff)
        dist_matrix = getDistanceMatrix(self.new_exp, self.multipeptides, spl_aligner)

        # Select peakgroups, compute left/right border
        selected_pg = [pg for p in self.current_mpep1.getAllPeptides() for pg in p.get_all_peakgroups() if pg.get_cluster_id() == 1]
        rmap = dict([(r.get_id(),i) for i,r in enumerate(self.new_exp.runs) ])
        border_l, border_r = integrationBorderShortestDistance(selected_pg, 
            rid, self.tr_data, dist_matrix, rmap)

        # Direct transformation from 0_2 to 0_0
        self.assertAlmostEqual(border_l, self.tr_data.getTrafo("0_2", "0_0").predict([ 240.0 ])[0])
        self.assertAlmostEqual(border_r, self.tr_data.getTrafo("0_2", "0_0").predict([ 260.0 ])[0])

        self.assertAlmostEqual(border_l, 77.992277992277934)
        self.assertAlmostEqual(border_r, 84.1698841699)

Example #6

0

Show file

File: test_Integration.py Project: yachliu/msproteomicstools

    def test_shortestPath_2(self):

        rid = "0_1"
        spl_aligner = SplineAligner(self.initial_alignment_cutoff)
        tree = MinimumSpanningTree(getDistanceMatrix(self.new_exp, self.multipeptides, spl_aligner))
        tree_mapped = [(self.new_exp.runs[a].get_id(), self.new_exp.runs[b].get_id()) for a,b in tree]

        # Select peakgroups, compute left/right border
        selected_pg = [pg for p in self.current_mpep1.getAllPeptides() for pg in p.get_all_peakgroups() if pg.get_cluster_id() == 1]
        border_l, border_r = integrationBorderShortestPath(selected_pg, 
            rid, self.tr_data, tree_mapped)

        # Shortest path means that we transformed from 0_2 to 0_1
        self.assertAlmostEqual(border_l, self.tr_data.getTrafo("0_2", "0_1").predict( [ 240.0 ] ))
        self.assertAlmostEqual(border_r, self.tr_data.getTrafo("0_2", "0_1").predict( [ 260.0 ] ))

        self.assertAlmostEqual(border_l, 168.03088803088787)
        self.assertAlmostEqual(border_r, 183.32046332046318)

Example #7

0

Show file

File: test_Integration.py Project: yachliu/msproteomicstools

    def test_shortestDistance_2(self):

        rid = "0_1"
        spl_aligner = SplineAligner(self.initial_alignment_cutoff)
        dist_matrix = getDistanceMatrix(self.new_exp, self.multipeptides, spl_aligner)

        # Select peakgroups, compute left/right border
        selected_pg = [pg for p in self.current_mpep1.getAllPeptides() for pg in p.get_all_peakgroups() if pg.get_cluster_id() == 1]
        rmap = dict([(r.get_id(),i) for i,r in enumerate(self.new_exp.runs) ])
        border_l, border_r = integrationBorderShortestDistance(selected_pg, 
            rid, self.tr_data, dist_matrix, rmap)

        # Shortest distance means that we transformed directly from 0_2 to 0_1
        self.assertAlmostEqual(border_l, self.tr_data.getTrafo("0_2", "0_1").predict([ 240.0 ])[0])
        self.assertAlmostEqual(border_r, self.tr_data.getTrafo("0_2", "0_1").predict([ 260.0 ])[0])

        self.assertAlmostEqual(border_l, 168.03088803088787)
        self.assertAlmostEqual(border_r, 183.32046332)

Example #8

0

Show file

def doMSTAlignment(exp, multipeptides, max_rt_diff, rt_diff_isotope,
                   initial_alignment_cutoff, fdr_cutoff, aligned_fdr_cutoff,
                   smoothing_method, method, use_RT_correction,
                   stdev_max_rt_per_run, use_local_stdev):
    """
    Minimum Spanning Tree (MST) based local aligment 
    """

    spl_aligner = SplineAligner(initial_alignment_cutoff)
    tree = MinimumSpanningTree(
        getDistanceMatrix(exp, multipeptides, spl_aligner))
    print "Computed Tree:", tree

    # Get alignments
    tr_data = LightTransformationData()
    for edge in tree:
        addDataToTrafo(tr_data, exp.runs[edge[0]], exp.runs[edge[1]],
                       spl_aligner, multipeptides, smoothing_method,
                       max_rt_diff)

    tree_mapped = [(exp.runs[a].get_id(), exp.runs[b].get_id())
                   for a, b in tree]

    # Perform work
    al = TreeConsensusAlignment(max_rt_diff,
                                fdr_cutoff,
                                aligned_fdr_cutoff,
                                rt_diff_isotope=rt_diff_isotope,
                                correctRT_using_pg=use_RT_correction,
                                stdev_max_rt_per_run=stdev_max_rt_per_run,
                                use_local_stdev=use_local_stdev)

    if method == "LocalMST":
        al.alignBestCluster(multipeptides, tree_mapped, tr_data)
    elif method == "LocalMSTAllCluster":
        al.alignAllCluster(multipeptides, tree_mapped, tr_data)

    # Store number of ambigous cases (e.g. where more than one peakgroup below
    # the strict quality cutoff was found in the RT window) and the number of
    # cases where multiple possibilities were found.
    exp.nr_ambiguous = al.nr_ambiguous
    exp.nr_multiple_align = al.nr_multiple_align

Example #9

0

Show file

File: requantAlignedValues.py Project: mesontau/msproteomicstools

def runSingleFileImputation(options, peakgroups_file, mzML_file, method, is_test):
    """Impute values across chromatograms

    Args:
        peakgroups_file(filename): CSV file containing all peakgroups
        mzML_file(filename): mzML file containing chromatograms
    Returns:
        A tuple of:
            new_exp(AlignmentExperiment): experiment containing the aligned peakgroups
            multipeptides(list(AlignmentHelper.Multipeptide)): list of multipeptides

    This function will read the csv file with all peakgroups as well as the
    provided chromatogram file (.chrom.mzML). It will then try to impute
    missing values for those peakgroups where no values is currently present,
    reading the raw chromatograms.
    """

    # We do not want to exclude any peakgroups for noiseIntegration (we assume
    # that alignment has already happened)
    fdr_cutoff_all_pg = 1.0

    start = time.time()
    reader = SWATHScoringReader.newReader([peakgroups_file],
                                          options.file_format,
                                          readmethod="complete",
                                          enable_isotopic_grouping = not options.disable_isotopic_grouping)
    new_exp = Experiment()
    new_exp.runs = reader.parse_files()
    multipeptides = new_exp.get_all_multipeptides(fdr_cutoff_all_pg, verbose=False)
    print("Parsing the peakgroups file took %ss" % (time.time() - start) )

    mapping = {}
    precursors_mapping = {}
    sequences_mapping = {}
    protein_mapping = {}
    inferMapping([ mzML_file ], [ peakgroups_file ], mapping, precursors_mapping, sequences_mapping, protein_mapping, verbose=False)
    mapping_inv = dict([(v[0],k) for k,v in mapping.iteritems()])
    if VERBOSE:
        print mapping

    # Do only a single run : read only one single file
    start = time.time()
    swath_chromatograms = SwathChromatogramCollection()
    swath_chromatograms.parseFromMzML([ mzML_file ], mapping_inv)
    print("Reading the chromatogram files took %ss" % (time.time() - start) )
    assert len(swath_chromatograms.getRunIDs() ) == 1
    rid = swath_chromatograms.getRunIDs()[0]

    start = time.time()
    initial_alignment_cutoff = 0.0001
    max_rt_diff = 30
    sd_data = -1 # We do not use the standard deviation data in this algorithm
    tr_data = transformations.LightTransformationData()
    spl_aligner = SplineAligner(initial_alignment_cutoff)

    if method == "singleClosestRun":
        tree_mapped = None

        run_1 = [r for r in new_exp.runs if r.get_id() == rid][0]
        dist_matrix = getDistanceMatrix(new_exp, multipeptides, spl_aligner, singleRowId=run_1.get_id())
        print("Distance matrix took %ss" % (time.time() - start) )

        start = time.time()
        for run_0 in new_exp.runs:
            helper.addDataToTrafo(tr_data, run_0, run_1, spl_aligner, multipeptides,
                options.realign_method, max_rt_diff, sd_max_data_length=sd_data)

    elif method == "singleShortestPath":
        dist_matrix = None

        tree = MinimumSpanningTree(getDistanceMatrix(new_exp, multipeptides, spl_aligner))
        tree_mapped = [(new_exp.runs[a].get_id(), new_exp.runs[b].get_id()) for a,b in tree]
        print("Distance matrix took %ss" % (time.time() - start) )

        start = time.time()
        for edge in tree:
            helper.addDataToTrafo(tr_data, new_exp.runs[edge[0]], 
                new_exp.runs[edge[1]], spl_aligner, multipeptides, 
                options.realign_method, max_rt_diff, sd_max_data_length=sd_data)

    else:
        raise Exception("Unknown method: " + method)

    print("Alignment took %ss" % (time.time() - start) )
    start = time.time()
    multipeptides = analyze_multipeptides(new_exp, multipeptides, swath_chromatograms,
        tr_data, options.border_option, rid, tree=tree_mapped, mat=dist_matrix,
        disable_isotopic_transfer=options.disable_isotopic_transfer, is_test=is_test)
    print("Analyzing the runs took %ss" % (time.time() - start) )

    return new_exp, multipeptides

Example #10

0

Show file

File: requantAlignedValues.py Project: YetsunLam/msproteomicstools

def runSingleFileImputation(options, peakgroups_file, mzML_file, method, is_test):
    """Impute values across chromatograms

    Args:
        peakgroups_file(filename): CSV file containing all peakgroups
        mzML_file(filename): mzML file containing chromatograms
    Returns:
        A tuple of:
            new_exp(AlignmentExperiment): experiment containing the aligned peakgroups
            multipeptides(list(AlignmentHelper.Multipeptide)): list of multipeptides

    This function will read the csv file with all peakgroups as well as the
    provided chromatogram file (.chrom.mzML). It will then try to impute
    missing values for those peakgroups where no values is currently present,
    reading the raw chromatograms.
    """

    # We do not want to exclude any peakgroups for noiseIntegration (we assume
    # that alignment has already happened)
    fdr_cutoff_all_pg = 1.0

    start = time.time()
    reader = SWATHScoringReader.newReader([peakgroups_file],
                                          options.file_format,
                                          readmethod="complete",
                                          enable_isotopic_grouping = not options.disable_isotopic_grouping)
    new_exp = Experiment()
    new_exp.runs = reader.parse_files()
    multipeptides = new_exp.get_all_multipeptides(fdr_cutoff_all_pg, verbose=False)
    print("Parsing the peakgroups file took %ss" % (time.time() - start) )

    mapping = {}
    precursors_mapping = {}
    sequences_mapping = {}
    protein_mapping = {}
    inferMapping([ mzML_file ], [ peakgroups_file ], mapping, precursors_mapping, sequences_mapping, protein_mapping, verbose=False)
    mapping_inv = dict([(v[0],k) for k,v in mapping.iteritems()])
    if VERBOSE:
        print mapping

    # Do only a single run : read only one single file
    start = time.time()
    swath_chromatograms = SwathChromatogramCollection()
    swath_chromatograms.parseFromMzML([ mzML_file ], mapping_inv)
    print("Reading the chromatogram files took %ss" % (time.time() - start) )
    assert len(swath_chromatograms.getRunIDs() ) == 1
    rid = swath_chromatograms.getRunIDs()[0]

    start = time.time()
    initial_alignment_cutoff = 0.0001
    max_rt_diff = 30
    sd_data = -1 # We do not use the standard deviation data in this algorithm
    tr_data = transformations.LightTransformationData()
    spl_aligner = SplineAligner(initial_alignment_cutoff)

    if method == "singleClosestRun":
        tree_mapped = None

        run_1 = [r for r in new_exp.runs if r.get_id() == rid][0]
        dist_matrix = getDistanceMatrix(new_exp, multipeptides, spl_aligner, singleRowId=run_1.get_id())
        print("Distance matrix took %ss" % (time.time() - start) )

        start = time.time()
        for run_0 in new_exp.runs:
            helper.addDataToTrafo(tr_data, run_0, run_1, spl_aligner, multipeptides,
                options.realign_method, max_rt_diff, sd_max_data_length=sd_data)

    elif method == "singleShortestPath":
        dist_matrix = None

        tree = MinimumSpanningTree(getDistanceMatrix(new_exp, multipeptides, spl_aligner))
        tree_mapped = [(new_exp.runs[a].get_id(), new_exp.runs[b].get_id()) for a,b in tree]
        print("Distance matrix took %ss" % (time.time() - start) )

        start = time.time()
        for edge in tree:
            helper.addDataToTrafo(tr_data, new_exp.runs[edge[0]], 
                new_exp.runs[edge[1]], spl_aligner, multipeptides, 
                options.realign_method, max_rt_diff, sd_max_data_length=sd_data)

    else:
        raise Exception("Unknown method: " + method)

    print("Alignment took %ss" % (time.time() - start) )
    start = time.time()
    multipeptides = analyze_multipeptides(new_exp, multipeptides, swath_chromatograms,
        tr_data, options.border_option, rid, tree=tree_mapped, mat=dist_matrix,
        disable_isotopic_transfer=options.disable_isotopic_transfer, is_test=is_test)
    print("Analyzing the runs took %ss" % (time.time() - start) )

    return new_exp, multipeptides

Example #11

0

Show file

def doMSTAlignment(exp, multipeptides, max_rt_diff, rt_diff_isotope,
                   initial_alignment_cutoff, fdr_cutoff, aligned_fdr_cutoff,
                   smoothing_method, method, use_RT_correction,
                   stdev_max_rt_per_run, use_local_stdev, mst_use_ref, force,
                   optimized_cython):
    """
    Minimum Spanning Tree (MST) based local aligment 
    """

    spl_aligner = SplineAligner(initial_alignment_cutoff, experiment=exp)

    if mst_use_ref:
        # force reference-based alignment
        bestrun = spl_aligner._determine_best_run(exp)
        ref = spl_aligner._determine_best_run(exp).get_id()
        refrun_id, refrun = [(i, run) for i, run in enumerate(exp.runs)
                             if run.get_id() == ref][0]
        tree = [(i, refrun_id) for i in range(len(exp.runs)) if i != refrun_id]
    else:
        start = time.time()
        tree = MinimumSpanningTree(
            getDistanceMatrix(exp, multipeptides, spl_aligner))
        print("Computing tree took %0.2fs" % (time.time() - start))

    print("Computed Tree:", tree)

    # Get alignments
    start = time.time()
    try:
        from msproteomicstoolslib.cython._optimized import CyLightTransformationData
        if optimized_cython:
            tr_data = CyLightTransformationData()
        else:
            tr_data = LightTransformationData()
    except ImportError:
        print(
            "WARNING: cannot import CyLightTransformationData, will use Python version (slower)."
        )
        tr_data = LightTransformationData()

    for edge in tree:
        addDataToTrafo(tr_data,
                       exp.runs[edge[0]],
                       exp.runs[edge[1]],
                       spl_aligner,
                       multipeptides,
                       smoothing_method,
                       max_rt_diff,
                       force=force)

    tree_mapped = [(exp.runs[a].get_id(), exp.runs[b].get_id())
                   for a, b in tree]
    print("Computing transformations for all edges took %0.2fs" %
          (time.time() - start))

    # Perform work
    al = TreeConsensusAlignment(max_rt_diff,
                                fdr_cutoff,
                                aligned_fdr_cutoff,
                                rt_diff_isotope=rt_diff_isotope,
                                correctRT_using_pg=use_RT_correction,
                                stdev_max_rt_per_run=stdev_max_rt_per_run,
                                use_local_stdev=use_local_stdev)

    if method == "LocalMST":
        if optimized_cython:
            al.alignBestCluster(multipeptides, tree_mapped, tr_data)
        else:
            print(
                "WARNING: cannot utilize optimized MST alignment (needs readmethod = cminimal), will use Python version (slower)."
            )
            al.alignBestCluster_legacy(multipeptides, tree_mapped, tr_data)
    elif method == "LocalMSTAllCluster":
        al.alignAllCluster(multipeptides, tree_mapped, tr_data)

    # Store number of ambigous cases (e.g. where more than one peakgroup below
    # the strict quality cutoff was found in the RT window) and the number of
    # cases where multiple possibilities were found.
    exp.nr_ambiguous = al.nr_ambiguous
    exp.nr_multiple_align = al.nr_multiple_align

    return tree

Example #12

0

Show file

File: feature_alignment.py Project: guoci/msproteomicstools

def doMSTAlignment(exp, multipeptides, max_rt_diff, rt_diff_isotope, initial_alignment_cutoff,
                   fdr_cutoff, aligned_fdr_cutoff, smoothing_method, method,
                   use_RT_correction, stdev_max_rt_per_run, use_local_stdev, mst_use_ref, force, optimized_cython):
    """
    Minimum Spanning Tree (MST) based local aligment 
    """

    spl_aligner = SplineAligner(initial_alignment_cutoff, experiment=exp)

    if mst_use_ref:
        # force reference-based alignment
        bestrun = spl_aligner._determine_best_run(exp)
        ref = spl_aligner._determine_best_run(exp).get_id()
        refrun_id, refrun = [ (i,run) for i, run in enumerate(exp.runs) if run.get_id() == ref][0]
        tree = [( i, refrun_id) for i in range(len(exp.runs)) if i != refrun_id]
    else:
        start = time.time()
        tree = MinimumSpanningTree(getDistanceMatrix(exp, multipeptides, spl_aligner))
        print("Computing tree took %0.2fs" % (time.time() - start) )

    print("Computed Tree:", tree)

    
    # Get alignments
    start = time.time()
    try:
        from msproteomicstoolslib.cython._optimized import CyLightTransformationData
        if optimized_cython:
            tr_data = CyLightTransformationData()
        else:
            tr_data = LightTransformationData()
    except ImportError:
        print("WARNING: cannot import CyLightTransformationData, will use Python version (slower).")
        tr_data = LightTransformationData()

    for edge in tree:
        addDataToTrafo(tr_data, exp.runs[edge[0]], exp.runs[edge[1]],
                       spl_aligner, multipeptides, smoothing_method,
                       max_rt_diff, force=force)

    tree_mapped = [ (exp.runs[a].get_id(), exp.runs[b].get_id()) for a,b in tree]
    print("Computing transformations for all edges took %0.2fs" % (time.time() - start) )

    # Perform work
    al = TreeConsensusAlignment(max_rt_diff, fdr_cutoff, aligned_fdr_cutoff, 
                                rt_diff_isotope=rt_diff_isotope,
                                correctRT_using_pg=use_RT_correction,
                                stdev_max_rt_per_run=stdev_max_rt_per_run,
                                use_local_stdev=use_local_stdev)

    if method == "LocalMST":
        if optimized_cython:
            al.alignBestCluster(multipeptides, tree_mapped, tr_data)
        else:
            print("WARNING: cannot utilize optimized MST alignment (needs readmethod = cminimal), will use Python version (slower).")
            al.alignBestCluster_legacy(multipeptides, tree_mapped, tr_data)
    elif method == "LocalMSTAllCluster":
        al.alignAllCluster(multipeptides, tree_mapped, tr_data)

    # Store number of ambigous cases (e.g. where more than one peakgroup below
    # the strict quality cutoff was found in the RT window) and the number of
    # cases where multiple possibilities were found.
    exp.nr_ambiguous = al.nr_ambiguous
    exp.nr_multiple_align = al.nr_multiple_align

    return tree