Python AlignmentHelper.addDataToTrafo 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: msproteomicstoolslib.algorithms.alignment

클래스/타입: AlignmentHelper

메소드/함수: addDataToTrafo

hotexamples.com에서의 예제들: 5

Python AlignmentHelper.addDataToTrafo - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 msproteomicstoolslib.algorithms.alignment.AlignmentHelper.addDataToTrafo에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

addDataToTrafo(3)

write_out_matrix_file(3)

예제 #1

파일 보기

파일: test_Integration.py 프로젝트: yachliu/msproteomicstools

    def setUp(self):
        # Set up dirs
        self.dirname = os.path.dirname(os.path.abspath(__file__))
        self.topdir = os.path.join(os.path.join(self.dirname, ".."), "..")
        self.datadir = os.path.join(os.path.join(self.topdir, "test"), "data")
        self.scriptdir = os.path.join(self.topdir, "analysis")

        # Set up files
        peakgroups_file = os.path.join(self.datadir, "imputeValues/imputeValues_5_input.csv")
        mzml_file = os.path.join(self.datadir, "imputeValues/r004_small/split_olgas_otherfile.chrom.mzML")

        # Parameters
        self.initial_alignment_cutoff = 0.0001
        fdr_cutoff_all_pg = 1.0
        max_rt_diff = 30

        # Read input
        reader = SWATHScoringReader.newReader([peakgroups_file], "openswath", readmethod="complete")
        self.new_exp = MRExperiment()
        self.new_exp.runs = reader.parse_files()
        self.multipeptides = self.new_exp.get_all_multipeptides(fdr_cutoff_all_pg, verbose=False)

        # Align all against all
        self.tr_data = transformations.LightTransformationData()
        spl_aligner = SplineAligner(self.initial_alignment_cutoff)
        for run_0 in self.new_exp.runs:
            for run_1 in self.new_exp.runs:
                helper.addDataToTrafo(self.tr_data, run_0, run_1, spl_aligner, self.multipeptides, "linear", 30)

        # Select two interesting peptides
        pepname = "21517_C[160]NVVISGGTGSGK/2_run0 0 0"
        self.current_mpep1 = [m for m in self.multipeptides if m.getAllPeptides()[0].get_id() == pepname][0]

        pepname = "26471_GYEDPPAALFR/2_run0 0 0"
        self.current_mpep2 = [m for m in self.multipeptides if m.getAllPeptides()[0].get_id() == pepname][0]

예제 #2

파일 보기

    def setUp(self):

        import msproteomicstoolslib.data_structures.Precursor as precursor
        import msproteomicstoolslib.data_structures.PrecursorGroup as precursor_group
        import msproteomicstoolslib.format.TransformationCollection as transformations
        from msproteomicstoolslib.algorithms.alignment.SplineAligner import SplineAligner
        import msproteomicstoolslib.algorithms.alignment.AlignmentHelper as helper

        # 0. id
        # 1. quality score (FDR)
        # 2. retention time (normalized)
        # 3. intensity

        mpeps = [Multipeptide() for i in range(3)]
        [m.set_nr_runs(5) for m in mpeps]

        # Parameters
        self.initial_alignment_cutoff = 0.001

        runs = [MockRun("0_%s" % (i + 1)) for i in range(5)]
        ids = 0
        for i in range(5):

            # Two alignment peptides
            p = precursor.Precursor("anchorpeptide_1", runs[i])
            pg_tuple = ("id_%s" % ids, 0.0001, 100 + i * 10, 10000)
            p.add_peakgroup_tpl(pg_tuple, "anchorpeptide_1", -1)
            prgr = precursor_group.PrecursorGroup(p.get_id(), runs[i])
            prgr.addPrecursor(p)
            mpeps[0].insert(runs[i].get_id(), prgr)
            ids += 1

            p = precursor.Precursor("anchorpeptide_2", runs[i])
            pg_tuple = ("id_%s" % ids, 0.0001, 1000 + i * 100, 10000)
            p.add_peakgroup_tpl(pg_tuple, "anchorpeptide_2", -1)
            prgr = precursor_group.PrecursorGroup(p.get_id(), runs[i])
            prgr.addPrecursor(p)
            mpeps[1].insert(runs[i].get_id(), prgr)
            ids += 1

            # The noise peptide
            p = precursor.Precursor("anchorpeptide_3", runs[i])
            pg_tuple = ("id_%s" % ids, 0.0001, 500 + i * 40, 10000)
            p.add_peakgroup_tpl(pg_tuple, "anchorpeptide_3", -1)
            prgr = precursor_group.PrecursorGroup(p.get_id(), runs[i])
            prgr.addPrecursor(p)
            mpeps[2].insert(runs[i].get_id(), prgr)
            ids += 1

        m = Multipeptide()
        m.set_nr_runs(5)

        # Run 1
        #  - peakgroup 1 : RT = 110 seconds [correct]
        p = precursor.Precursor("precursor_1", runs[0])
        pg_tuple = ("peakgroup1", 0.01, 100, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[0])
        prgr.addPrecursor(p)
        m.insert(runs[0].get_id(), prgr)

        # Run 2:
        #  - peakgroup 2 : RT = 115 seconds [correct]
        #  - peakgroup 3 : RT = 130 seconds
        p = precursor.Precursor("precursor_1", runs[1])
        pg_tuple = ("peakgroup2", 0.2, 112, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        pg_tuple = ("peakgroup3", 0.18, 130, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[1])
        prgr.addPrecursor(p)
        m.insert(runs[1].get_id(), prgr)

        # Run 3:
        #  - peakgroup 4 : RT = 120 seconds [correct]
        #  - peakgroup 5 : RT = 130 seconds
        p = precursor.Precursor("precursor_1", runs[2])
        pg_tuple = ("peakgroup4", 0.2, 120, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        pg_tuple = ("peakgroup5", 0.17, 130, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[2])
        prgr.addPrecursor(p)
        m.insert(runs[2].get_id(), prgr)

        # Run 4:
        #  - peakgroup 6 : missing          [correct]
        #  - peakgroup 7 : RT = 145 seconds
        p = precursor.Precursor("precursor_1", runs[3])
        pg_tuple = ("peakgroup7", 0.18, 145, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[3])
        prgr.addPrecursor(p)
        m.insert(runs[3].get_id(), prgr)

        # Run 5:
        #  - peakgroup 8 : RT = 140 seconds [correct]
        #  - peakgroup 9 : missing
        p = precursor.Precursor("precursor_1", runs[4])
        pg_tuple = ("peakgroup8", 0.1, 139, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[4])
        prgr.addPrecursor(p)
        m.insert(runs[4].get_id(), prgr)

        self.mpep = m
        self.exp = Dummy()
        self.exp.runs = runs

        mpeps.append(m)
        self.multipeptides = mpeps

        # Align all against all
        self.tr_data = transformations.LightTransformationData()
        spl_aligner = SplineAligner(self.initial_alignment_cutoff)
        for run_0 in self.exp.runs:
            for run_1 in self.exp.runs:
                helper.addDataToTrafo(self.tr_data, run_0, run_1, spl_aligner,
                                      self.multipeptides, "linear", 30)

예제 #3

파일 보기

파일: test_AlignmentMST.py 프로젝트: guoci/msproteomicstools

    def setUp(self):

        import msproteomicstoolslib.data_structures.Precursor as precursor
        import msproteomicstoolslib.data_structures.PrecursorGroup as precursor_group
        import msproteomicstoolslib.format.TransformationCollection as transformations
        from msproteomicstoolslib.algorithms.alignment.SplineAligner import SplineAligner
        import msproteomicstoolslib.algorithms.alignment.AlignmentHelper as helper

        # 0. id
        # 1. quality score (FDR)
        # 2. retention time (normalized)
        # 3. intensity

        mpeps = [Multipeptide() for i in range(3)]
        [m.set_nr_runs(5) for m in mpeps]

        # Parameters
        self.initial_alignment_cutoff = 0.001

        runs = [MockRun("0_%s" % (i+1)) for i in range(5)]
        ids = 0
        for i in range(5):

            # Two alignment peptides
            p = precursor.Precursor("anchorpeptide_1", runs[i] )
            pg_tuple = ("id_%s" % ids, 0.0001, 100 + i*10, 10000)
            p.add_peakgroup_tpl(pg_tuple, "anchorpeptide_1", -1)
            prgr = precursor_group.PrecursorGroup(p.get_id(), runs[i])
            prgr.addPrecursor(p)
            mpeps[0].insert(runs[i].get_id(), prgr)
            ids += 1

            p = precursor.Precursor("anchorpeptide_2", runs[i] )
            pg_tuple = ("id_%s" % ids, 0.0001, 1000 + i*100, 10000)
            p.add_peakgroup_tpl(pg_tuple, "anchorpeptide_2", -1)
            prgr = precursor_group.PrecursorGroup(p.get_id(), runs[i])
            prgr.addPrecursor(p)
            mpeps[1].insert(runs[i].get_id(), prgr)
            ids += 1

            # The noise peptide
            p = precursor.Precursor("anchorpeptide_3", runs[i] )
            pg_tuple = ("id_%s" % ids, 0.0001, 500 + i*40, 10000)
            p.add_peakgroup_tpl(pg_tuple, "anchorpeptide_3", -1)
            prgr = precursor_group.PrecursorGroup(p.get_id(), runs[i])
            prgr.addPrecursor(p)
            mpeps[2].insert(runs[i].get_id(), prgr)
            ids += 1

        m = Multipeptide()
        m.set_nr_runs(5)

        # Run 1
        #  - peakgroup 1 : RT = 110 seconds [correct]
        p = precursor.Precursor("precursor_1", runs[0])
        pg_tuple = ("peakgroup1", 0.01, 100, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[0])
        prgr.addPrecursor(p)
        m.insert(runs[0].get_id(), prgr)

        # Run 2:
        #  - peakgroup 2 : RT = 115 seconds [correct]
        #  - peakgroup 3 : RT = 130 seconds
        p = precursor.Precursor("precursor_1", runs[1])
        pg_tuple = ("peakgroup2", 0.2, 112, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        pg_tuple = ("peakgroup3", 0.18, 130, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[1])
        prgr.addPrecursor(p)
        m.insert(runs[1].get_id(), prgr)

        # Run 3:
        #  - peakgroup 4 : RT = 120 seconds [correct]
        #  - peakgroup 5 : RT = 130 seconds
        p = precursor.Precursor("precursor_1", runs[2])
        pg_tuple = ("peakgroup4", 0.2, 120, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        pg_tuple = ("peakgroup5", 0.17, 130, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[2])
        prgr.addPrecursor(p)
        m.insert(runs[2].get_id(), prgr)

        # Run 4:
        #  - peakgroup 6 : missing          [correct]
        #  - peakgroup 7 : RT = 145 seconds
        p = precursor.Precursor("precursor_1", runs[3])
        pg_tuple = ("peakgroup7", 0.18, 145, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[3])
        prgr.addPrecursor(p)
        m.insert(runs[3].get_id(), prgr)

        # Run 5:
        #  - peakgroup 8 : RT = 140 seconds [correct]
        #  - peakgroup 9 : missing
        p = precursor.Precursor("precursor_1", runs[4])
        pg_tuple = ("peakgroup8", 0.1, 139, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[4])
        prgr.addPrecursor(p)
        m.insert(runs[4].get_id(), prgr)

        self.mpep = m
        self.exp = Dummy()
        self.exp.runs = runs

        mpeps.append(m)
        self.multipeptides = mpeps

        # Align all against all
        self.tr_data = transformations.LightTransformationData()
        spl_aligner = SplineAligner(self.initial_alignment_cutoff)
        for run_0 in self.exp.runs:
            for run_1 in self.exp.runs:
                helper.addDataToTrafo(self.tr_data, run_0, run_1, spl_aligner, self.multipeptides, "linear", 30)

예제 #4

파일 보기

파일: requantAlignedValues.py 프로젝트: mesontau/msproteomicstools

def runSingleFileImputation(options, peakgroups_file, mzML_file, method, is_test):
    """Impute values across chromatograms

    Args:
        peakgroups_file(filename): CSV file containing all peakgroups
        mzML_file(filename): mzML file containing chromatograms
    Returns:
        A tuple of:
            new_exp(AlignmentExperiment): experiment containing the aligned peakgroups
            multipeptides(list(AlignmentHelper.Multipeptide)): list of multipeptides

    This function will read the csv file with all peakgroups as well as the
    provided chromatogram file (.chrom.mzML). It will then try to impute
    missing values for those peakgroups where no values is currently present,
    reading the raw chromatograms.
    """

    # We do not want to exclude any peakgroups for noiseIntegration (we assume
    # that alignment has already happened)
    fdr_cutoff_all_pg = 1.0

    start = time.time()
    reader = SWATHScoringReader.newReader([peakgroups_file],
                                          options.file_format,
                                          readmethod="complete",
                                          enable_isotopic_grouping = not options.disable_isotopic_grouping)
    new_exp = Experiment()
    new_exp.runs = reader.parse_files()
    multipeptides = new_exp.get_all_multipeptides(fdr_cutoff_all_pg, verbose=False)
    print("Parsing the peakgroups file took %ss" % (time.time() - start) )

    mapping = {}
    precursors_mapping = {}
    sequences_mapping = {}
    protein_mapping = {}
    inferMapping([ mzML_file ], [ peakgroups_file ], mapping, precursors_mapping, sequences_mapping, protein_mapping, verbose=False)
    mapping_inv = dict([(v[0],k) for k,v in mapping.iteritems()])
    if VERBOSE:
        print mapping

    # Do only a single run : read only one single file
    start = time.time()
    swath_chromatograms = SwathChromatogramCollection()
    swath_chromatograms.parseFromMzML([ mzML_file ], mapping_inv)
    print("Reading the chromatogram files took %ss" % (time.time() - start) )
    assert len(swath_chromatograms.getRunIDs() ) == 1
    rid = swath_chromatograms.getRunIDs()[0]

    start = time.time()
    initial_alignment_cutoff = 0.0001
    max_rt_diff = 30
    sd_data = -1 # We do not use the standard deviation data in this algorithm
    tr_data = transformations.LightTransformationData()
    spl_aligner = SplineAligner(initial_alignment_cutoff)

    if method == "singleClosestRun":
        tree_mapped = None

        run_1 = [r for r in new_exp.runs if r.get_id() == rid][0]
        dist_matrix = getDistanceMatrix(new_exp, multipeptides, spl_aligner, singleRowId=run_1.get_id())
        print("Distance matrix took %ss" % (time.time() - start) )

        start = time.time()
        for run_0 in new_exp.runs:
            helper.addDataToTrafo(tr_data, run_0, run_1, spl_aligner, multipeptides,
                options.realign_method, max_rt_diff, sd_max_data_length=sd_data)

    elif method == "singleShortestPath":
        dist_matrix = None

        tree = MinimumSpanningTree(getDistanceMatrix(new_exp, multipeptides, spl_aligner))
        tree_mapped = [(new_exp.runs[a].get_id(), new_exp.runs[b].get_id()) for a,b in tree]
        print("Distance matrix took %ss" % (time.time() - start) )

        start = time.time()
        for edge in tree:
            helper.addDataToTrafo(tr_data, new_exp.runs[edge[0]], 
                new_exp.runs[edge[1]], spl_aligner, multipeptides, 
                options.realign_method, max_rt_diff, sd_max_data_length=sd_data)

    else:
        raise Exception("Unknown method: " + method)

    print("Alignment took %ss" % (time.time() - start) )
    start = time.time()
    multipeptides = analyze_multipeptides(new_exp, multipeptides, swath_chromatograms,
        tr_data, options.border_option, rid, tree=tree_mapped, mat=dist_matrix,
        disable_isotopic_transfer=options.disable_isotopic_transfer, is_test=is_test)
    print("Analyzing the runs took %ss" % (time.time() - start) )

    return new_exp, multipeptides

예제 #5

파일 보기

파일: requantAlignedValues.py 프로젝트: YetsunLam/msproteomicstools

def runSingleFileImputation(options, peakgroups_file, mzML_file, method, is_test):
    """Impute values across chromatograms

    Args:
        peakgroups_file(filename): CSV file containing all peakgroups
        mzML_file(filename): mzML file containing chromatograms
    Returns:
        A tuple of:
            new_exp(AlignmentExperiment): experiment containing the aligned peakgroups
            multipeptides(list(AlignmentHelper.Multipeptide)): list of multipeptides

    This function will read the csv file with all peakgroups as well as the
    provided chromatogram file (.chrom.mzML). It will then try to impute
    missing values for those peakgroups where no values is currently present,
    reading the raw chromatograms.
    """

    # We do not want to exclude any peakgroups for noiseIntegration (we assume
    # that alignment has already happened)
    fdr_cutoff_all_pg = 1.0

    start = time.time()
    reader = SWATHScoringReader.newReader([peakgroups_file],
                                          options.file_format,
                                          readmethod="complete",
                                          enable_isotopic_grouping = not options.disable_isotopic_grouping)
    new_exp = Experiment()
    new_exp.runs = reader.parse_files()
    multipeptides = new_exp.get_all_multipeptides(fdr_cutoff_all_pg, verbose=False)
    print("Parsing the peakgroups file took %ss" % (time.time() - start) )

    mapping = {}
    precursors_mapping = {}
    sequences_mapping = {}
    protein_mapping = {}
    inferMapping([ mzML_file ], [ peakgroups_file ], mapping, precursors_mapping, sequences_mapping, protein_mapping, verbose=False)
    mapping_inv = dict([(v[0],k) for k,v in mapping.iteritems()])
    if VERBOSE:
        print mapping

    # Do only a single run : read only one single file
    start = time.time()
    swath_chromatograms = SwathChromatogramCollection()
    swath_chromatograms.parseFromMzML([ mzML_file ], mapping_inv)
    print("Reading the chromatogram files took %ss" % (time.time() - start) )
    assert len(swath_chromatograms.getRunIDs() ) == 1
    rid = swath_chromatograms.getRunIDs()[0]

    start = time.time()
    initial_alignment_cutoff = 0.0001
    max_rt_diff = 30
    sd_data = -1 # We do not use the standard deviation data in this algorithm
    tr_data = transformations.LightTransformationData()
    spl_aligner = SplineAligner(initial_alignment_cutoff)

    if method == "singleClosestRun":
        tree_mapped = None

        run_1 = [r for r in new_exp.runs if r.get_id() == rid][0]
        dist_matrix = getDistanceMatrix(new_exp, multipeptides, spl_aligner, singleRowId=run_1.get_id())
        print("Distance matrix took %ss" % (time.time() - start) )

        start = time.time()
        for run_0 in new_exp.runs:
            helper.addDataToTrafo(tr_data, run_0, run_1, spl_aligner, multipeptides,
                options.realign_method, max_rt_diff, sd_max_data_length=sd_data)

    elif method == "singleShortestPath":
        dist_matrix = None

        tree = MinimumSpanningTree(getDistanceMatrix(new_exp, multipeptides, spl_aligner))
        tree_mapped = [(new_exp.runs[a].get_id(), new_exp.runs[b].get_id()) for a,b in tree]
        print("Distance matrix took %ss" % (time.time() - start) )

        start = time.time()
        for edge in tree:
            helper.addDataToTrafo(tr_data, new_exp.runs[edge[0]], 
                new_exp.runs[edge[1]], spl_aligner, multipeptides, 
                options.realign_method, max_rt_diff, sd_max_data_length=sd_data)

    else:
        raise Exception("Unknown method: " + method)

    print("Alignment took %ss" % (time.time() - start) )
    start = time.time()
    multipeptides = analyze_multipeptides(new_exp, multipeptides, swath_chromatograms,
        tr_data, options.border_option, rid, tree=tree_mapped, mat=dist_matrix,
        disable_isotopic_transfer=options.disable_isotopic_transfer, is_test=is_test)
    print("Analyzing the runs took %ss" % (time.time() - start) )

    return new_exp, multipeptides