Esempio n. 1
0
 def addPrecursor(self, precursor, peptide_group_label):
     if peptide_group_label in self.all_precursor_groups_:
         self.getPrecursorGroup(peptide_group_label).addPrecursor(precursor)
     else:
         prec_gr = PrecursorGroup(peptide_group_label, self)
         prec_gr.addPrecursor(precursor)
         self.all_precursor_groups_[peptide_group_label] = prec_gr
Esempio n. 2
0
 def addPrecursor(self, precursor, peptide_group_label):
     if self.all_precursor_groups_.has_key(peptide_group_label):
         self.getPrecursorGroup(peptide_group_label).addPrecursor(precursor)
     else:
         prec_gr = PrecursorGroup(peptide_group_label, self)
         prec_gr.addPrecursor(precursor)
         self.all_precursor_groups_[peptide_group_label] = prec_gr
def analyze_multipeptide_cluster(current_mpep, cnt, new_exp, swath_chromatograms, 
                          transformation_collection_, border_option, selected_pg, cluster_id,
                          onlyExtractFromRun=None, tree=None, mat=None, is_test=False):

        for rid in [r.get_id() for r in new_exp.runs]:
            if VERBOSE: print "Shall work on run ", rid
            cnt.peakgroups += 1

            # Check whether we have a peakgroup for this run id, if yes we mark
            # this peakgroup as selected for output, otherwise we try to impute.
            if not any([pg.peptide.run.get_id() == rid for pg in selected_pg]):

                # Skip if we should not extract from this run
                if not onlyExtractFromRun is None:
                    if onlyExtractFromRun != rid:
                        continue

                cnt.imputations += 1

                # Select current run, compute right/left integration border and then integrate
                current_run = [r for r in new_exp.runs if r.get_id() == rid][0]
                rmap = dict([(r.get_id(),i) for i,r in enumerate(new_exp.runs) ])

                if VERBOSE:
                    print "Will try to fill NA in run", current_run.get_id(), "for peptide", selected_pg[0].peptide.get_id()
                    print tree, mat

                inferred_from_isotope = False
                if current_mpep.hasPrecursorGroup(current_run.get_id()):
                    # If another precursor was already picked for this run,
                    # check if any peakgroups of the same cluster exist and if
                    # so, try to use the one with the minimal FDR score to
                    # infer the boundaries.
                    prgr = current_mpep.getPrecursorGroup(current_run.get_id())
                    pgs = [(pg_.get_fdr_score(), pg_) for prec_ in prgr for pg_ in prec_.peakgroups if pg_.get_cluster_id() == cluster_id] 
                    if len(pgs) > 0:
                        # TODO if the boundaries were inferred (e.g. fdr score > 1.0), should we not use them? 
                        best_pg = min(pgs)[1]
                        border_l = float(best_pg.get_value("leftWidth"))
                        border_r = float(best_pg.get_value("rightWidth"))
                        inferred_from_isotope = True

                        if VERBOSE:
                            print "Will try to infer from isotopically modified version of the same run!"
                            print "Precursor is", prgr
                            for prec in prgr:
                                print " --", prec
                                for pg in prec.peakgroups:
                                    print "  * ", pg
                            print "Min fdr pg", best_pg, " border %s / %s" % (border_l, border_r)

                if inferred_from_isotope:
                    # All good
                    pass

                elif tree is not None:
                    ## Use the closest path approach
                    border_l, border_r = integrationBorderShortestPath(selected_pg, 
                        rid, transformation_collection_, tree)
                elif mat is not None:
                    ## Use the closest overall run approach
                    border_l, border_r = integrationBorderShortestDistance(selected_pg, 
                        rid, transformation_collection_, mat, rmap)
                else:
                    ## Use the refernce-based approach
                    border_l, border_r = integrationBorderReference(new_exp, selected_pg, rid, transformation_collection_, border_option)
                newpg = integrate_chromatogram(selected_pg[0], current_run, swath_chromatograms,
                                             border_l, border_r, cnt, is_test)
                if newpg != "NA": 
                    if VERBOSE: 
                        print "Managed to fill NA in run", current_run.get_id(), \
                          "with value", newpg.get_value("Intensity"), "/ borders", border_l, border_r #, "for cluster", newpg.get_value("align_clusterid")
                    cnt.imputation_succ += 1
                    transition_group_id = newpg.get_value("transition_group_id")
                    try:
                        peptide_label = newpg.get_value("peptide_group_label")
                    except KeyError:
                        peptide_label = transition_group_id

                    # Create new precursor
                    precursor = GeneralPrecursor(transition_group_id, current_run)
                    newpg.setClusterID(cluster_id)
                    newpg.peptide = precursor
                    precursor.add_peakgroup(newpg)
                    precursor.sequence = selected_pg[0].peptide.sequence
                    precursor.protein_name = selected_pg[0].peptide.protein_name

                    if current_mpep.hasPrecursorGroup(current_run.get_id()):
                        prec_group = current_mpep.getPrecursorGroup(current_run.get_id())
                        if prec_group.getPrecursor(transition_group_id) is None:
                            # No precursors exists yet for this transition_group_id - this
                            # means that we have a new run for which a precursor group already
                            # exists (e.g. we have found a light version already) and are now
                            # dealing with the heavy ...
                            prec_group.addPrecursor(precursor)
                        else:
                            # Likely, another peakgroup from the same run and peptide was
                            # picked already but not from the same cluster
                            prec_group.getPrecursor(transition_group_id).add_peakgroup(newpg)
                    else:
                        # Create new precursor group and insert
                        precursor_group = PrecursorGroup(peptide_label, current_run)
                        precursor_group.addPrecursor(precursor)
                        current_mpep.insert(rid, precursor_group)
def analyze_multipeptide_cluster(current_mpep, cnt, new_exp, swath_chromatograms, 
                          transformation_collection_, border_option, selected_pg, cluster_id,
                          onlyExtractFromRun=None, tree=None, mat=None, is_test=False):

        for rid in [r.get_id() for r in new_exp.runs]:
            if VERBOSE: print "Shall work on run ", rid
            cnt.peakgroups += 1

            # Check whether we have a peakgroup for this run id, if yes we mark
            # this peakgroup as selected for output, otherwise we try to impute.
            if not any([pg.peptide.run.get_id() == rid for pg in selected_pg]):

                # Skip if we should not extract from this run
                if not onlyExtractFromRun is None:
                    if onlyExtractFromRun != rid:
                        continue

                cnt.imputations += 1

                # Select current run, compute right/left integration border and then integrate
                current_run = [r for r in new_exp.runs if r.get_id() == rid][0]
                rmap = dict([(r.get_id(),i) for i,r in enumerate(new_exp.runs) ])

                if VERBOSE:
                    print "Will try to fill NA in run", current_run.get_id(), "for peptide", selected_pg[0].peptide.get_id()
                    print tree, mat

                inferred_from_isotope = False
                if current_mpep.hasPrecursorGroup(current_run.get_id()):
                    # If another precursor was already picked for this run,
                    # check if any peakgroups of the same cluster exist and if
                    # so, try to use the one with the minimal FDR score to
                    # infer the boundaries.
                    prgr = current_mpep.getPrecursorGroup(current_run.get_id())
                    pgs = [(pg_.get_fdr_score(), pg_) for prec_ in prgr for pg_ in prec_.peakgroups if pg_.get_cluster_id() == cluster_id] 
                    if len(pgs) > 0:
                        # TODO if the boundaries were inferred (e.g. fdr score > 1.0), should we not use them? 
                        best_pg = min(pgs)[1]
                        border_l = float(best_pg.get_value("leftWidth"))
                        border_r = float(best_pg.get_value("rightWidth"))
                        inferred_from_isotope = True

                        if VERBOSE:
                            print "Will try to infer from isotopically modified version of the same run!"
                            print "Precursor is", prgr
                            for prec in prgr:
                                print " --", prec
                                for pg in prec.peakgroups:
                                    print "  * ", pg
                            print "Min fdr pg", best_pg, " border %s / %s" % (border_l, border_r)

                if inferred_from_isotope:
                    # All good
                    pass

                elif tree is not None:
                    ## Use the closest path approach
                    border_l, border_r = integrationBorderShortestPath(selected_pg, 
                        rid, transformation_collection_, tree)
                elif mat is not None:
                    ## Use the closest overall run approach
                    border_l, border_r = integrationBorderShortestDistance(selected_pg, 
                        rid, transformation_collection_, mat, rmap)
                else:
                    ## Use the refernce-based approach
                    border_l, border_r = integrationBorderReference(new_exp, selected_pg, rid, transformation_collection_, border_option)
                newpg = integrate_chromatogram(selected_pg[0], current_run, swath_chromatograms,
                                             border_l, border_r, cnt, is_test)
                if newpg != "NA": 
                    if VERBOSE: 
                        print "Managed to fill NA in run", current_run.get_id(), \
                          "with value", newpg.get_value("Intensity"), "/ borders", border_l, border_r #, "for cluster", newpg.get_value("align_clusterid")
                    cnt.imputation_succ += 1
                    transition_group_id = newpg.get_value("transition_group_id")
                    try:
                        peptide_label = newpg.get_value("peptide_group_label")
                    except KeyError:
                        peptide_label = transition_group_id

                    # Create new precursor
                    precursor = GeneralPrecursor(transition_group_id, current_run)
                    newpg.setClusterID(cluster_id)
                    newpg.peptide = precursor
                    precursor.add_peakgroup(newpg)
                    precursor.sequence = selected_pg[0].peptide.sequence
                    precursor.protein_name = selected_pg[0].peptide.protein_name

                    if current_mpep.hasPrecursorGroup(current_run.get_id()):
                        prec_group = current_mpep.getPrecursorGroup(current_run.get_id())
                        if prec_group.getPrecursor(transition_group_id) is None:
                            # No precursors exists yet for this transition_group_id - this
                            # means that we have a new run for which a precursor group already
                            # exists (e.g. we have found a light version already) and are now
                            # dealing with the heavy ...
                            prec_group.addPrecursor(precursor)
                        else:
                            # Likely, another peakgroup from the same run and peptide was
                            # picked already but not from the same cluster
                            prec_group.getPrecursor(transition_group_id).add_peakgroup(newpg)
                    else:
                        # Create new precursor group and insert
                        precursor_group = PrecursorGroup(peptide_label, current_run)
                        precursor_group.addPrecursor(precursor)
                        current_mpep.insert(rid, precursor_group)