예제 #1
0
  def test_runuis_swath(self):
    self.assertEqual(len(self.precursors_to_evaluate), 905)
    swath_mode = False
    par = self.par
    R = self.R
    cursor = self.db.cursor()
    prepare = []

    self.min_q1 = 500
    self.max_q1 = 525

    # Get the precursors (now for 500-525 instead of the full range)
    ###########################################################################
    myprecursors = Precursors()
    cursor = self.db.cursor()
    myprecursors.getFromDB(par, cursor, self.min_q1 - par.q1_window, self.max_q1 + par.q1_window)
    rtree = myprecursors.build_rangetree()
    self.precursors_to_evaluate = myprecursors.getPrecursorsToEvaluate(self.min_q1, self.max_q1)
    self.assertEqual(len(self.precursors_to_evaluate), 39)

    isotope_correction = par.isotopes_up_to * R.mass_diffC13 / min(par.parent_charges)
    temp_precursors = Precursors()
    temp_precursors.getFromDB(par, self.db.cursor(), self.min_q1 - isotope_correction, self.max_q1)
    all_swath_precursors = []
    for p in temp_precursors.precursors:
      if(p.included_in_isotopic_range(self.min_q1, self.max_q1, par) ): 
        all_swath_precursors.append(p)

    for precursor in self.precursors_to_evaluate:

      q3_low, q3_high = par.get_q3range_transitions()
      transitions = precursor.calculate_transitions(q3_low, q3_high)
      nr_transitions = len(transitions)


      if par.ssrcalc_window > 1000:
          precursors_obj = [p for p in all_swath_precursors if p.transition_group != precursor.transition_group]
      else:
          ssrcalc_low =  precursor.ssrcalc - par.ssrcalc_window 
          ssrcalc_high = precursor.ssrcalc + par.ssrcalc_window 
          precursors_obj = [p for p in all_swath_precursors if p.transition_group != precursor.transition_group
                       and p.ssrcalc > ssrcalc_low and p.ssrcalc < ssrcalc_high ]
      collisions_per_peptide = collider.get_coll_per_peptide_from_precursors(self.acollider, 
              transitions, precursors_obj, par, precursor)


      non_uis_list = collider.get_nonuis_list(collisions_per_peptide, par.max_uis)

      for order in range(1,min(par.max_uis+1, nr_transitions+1)): 
        prepare.append( (len(non_uis_list[order]), collider.choose(nr_transitions, 
          order), precursor.parent_id , order, -1) )

    self.assertEqual(len(prepare), 39*par.max_uis)
    self.assertEqual(prepare[0], (5, 8.0, 69, 1, -1) )

    final_report = self.get_final_report(par, prepare)
    self.check_final_report_swath(final_report)
예제 #2
0
  def test_runuis_swath_rangetree(self):
    self.assertEqual(len(self.precursors_to_evaluate), 905)
    swath_mode = False
    par = self.par
    R = self.R
    cursor = self.db.cursor()
    prepare = []

    self.min_q1 = 500
    self.max_q1 = 525

    # Get the precursors (now for 500-525 instead of the full range)
    ###########################################################################
    myprecursors = Precursors()
    cursor = self.db.cursor()
    myprecursors.getFromDB(par, cursor, self.min_q1 - par.q1_window, self.max_q1 + par.q1_window)
    rtree = myprecursors.build_rangetree()
    self.precursors_to_evaluate = myprecursors.getPrecursorsToEvaluate(self.min_q1, self.max_q1)
    self.assertEqual(len(self.precursors_to_evaluate), 39)

    # If we dont use the DB, we use the rangetree to query and get our list of
    # precursors that are interfering. In SWATH we dont include a +/- q1_window
    # around our range or precursors because the precursor window is fixed to
    # (min_q1,max_q1) and no other precursors are considered.
    myprecursors.getFromDB(par, cursor, self.min_q1, self.max_q1)
    rtree = myprecursors.build_rangetree()

    for precursor in self.precursors_to_evaluate:

      q3_low, q3_high = par.get_q3range_transitions()
      transitions = precursor.calculate_transitions(q3_low, q3_high)
      nr_transitions = len(transitions)

      # Use the rangetree, whether it is swath or not
      collisions_per_peptide = self.myprecursors.get_collisions_per_peptide_from_rangetree(precursor, self.min_q1, self.max_q1, transitions, par, rtree)
      non_uis_list = collider.get_nonuis_list(collisions_per_peptide, par.max_uis)

      for order in range(1,min(par.max_uis+1, nr_transitions+1)): 
        prepare.append( (len(non_uis_list[order]), collider.choose(nr_transitions, 
          order), precursor.parent_id , order, -1) )

    self.assertEqual(len(prepare), 39*par.max_uis)
    # self.assertEqual(prepare[0], (5, 8.0, 69, 1, -1) )

    final_report = self.get_final_report(par, prepare)
    self.check_final_report_swath(final_report)
예제 #3
0
    def setUp(self):
        self.limit = 100
        self.limit_large = 100
        self.limit = 300
        self.limit_large = 600
        try:
            import MySQLdb
            #db_l = MySQLdb.connect(read_default_file="~/.my.cnf.local")
            db = MySQLdb.connect(read_default_file="~/.srm.cnf")
            cursor = db.cursor()
            self.cursor = cursor

            par = test_shared.get_default_setup_parameters()
            par.use_sqlite = True
            par.q1_window = 1.2 / 2.0 
            par.q3_window = 2.0 / 2.0
            par.ssrcalc_window = 4 / 2.0 
            par.ssrcalc_window = 9999 / 2.0 
            par.peptide_tables = ['hroest.srmPeptides_yeast']
            par.transition_table = 'hroest.srmTransitions_yeast'
            par.isotopes_up_to = 3
            self.mycollider = collider.SRMcollider()
            par.select_by = "id"

            self.par = par
            self.min_q1 = 440
            self.max_q1 = 450

            ## For debugging
            ##self.max_q1 = 440.18
            ##par.q1_window = 0.009 / 2.0 
            ##par.q3_window = 2.0 / 2.0
            ##par.isotopes_up_to = 1

            import Residues
            R = Residues.Residues('mono')
            isotope_correction = par.isotopes_up_to * R.mass_diffC13 / min(par.parent_charges)

            start = time.time()
            query =  """
            select modified_sequence, transition_group, parent_id, q1_charge, q1, ssrcalc, isotope_nr, 0, 0
            from %s where q1 between %s and %s
            #and isotope_nr = 0
                           """ % (par.peptide_tables[0], self.min_q1 - par.q1_window, self.max_q1 + par.q1_window) 
            cursor.execute(query)
            self.alltuples =  list(cursor.fetchall() )
            #print "len alltuples", len(self.alltuples)

            query =  """
            select modified_sequence, transition_group, parent_id, q1_charge, q1, ssrcalc, isotope_nr, 0, 0
            from %s where q1 between %s - %s and %s
            and isotope_nr = 0
                           """ % (par.peptide_tables[0], self.min_q1 - par.q1_window, isotope_correction, self.max_q1 + par.q1_window) 
            cursor.execute(query)
            self.alltuples_isotope_correction =  list(cursor.fetchall())
            #print "len alltuples zero", len(self.alltuples_isotope_correction)

            self.myprecursors = Precursors()

            # myprecursors.getFromDB(par, db.cursor(), self.min_q1 - par.q1_window, self.max_q1 + par.q1_window)
            ##### LEGACY getFromDB -- have isotope_nr = 0 in there!

            # Get all precursors from the DB within a window of Q1
            lower_q1 = self.min_q1 - par.q1_window
            upper_q1 =  self.max_q1 + par.q1_window
            self.myprecursors.precursors = []
            isotope_correction = par.isotopes_up_to * R.mass_diffC13 / min(par.parent_charges)
            q =  """
            select modified_sequence, transition_group, parent_id, q1_charge, q1, ssrcalc, modifications, missed_cleavages, isotopically_modified
            from %(peptide_table)s where q1 between %(lowq1)s - %(isotope_correction)s and %(highq1)s
            and isotope_nr = 0
            """ % {'peptide_table' : par.peptide_tables[0],
                          'lowq1'  : lower_q1,  # min_q1 - par.q1_window
                          'highq1' : upper_q1, # max_q1 + par.q1_window,
                          'isotope_correction' : isotope_correction
                  } 
            cursor.execute(q)
            for res in cursor.fetchall():
              p = Precursor()
              p.initialize(*res)
              # Only include those precursors that are actually have isotopes in the specified range
              if(p.included_in_isotopic_range(lower_q1, upper_q1, par) ): 
                self.myprecursors.precursors.append(p)

            ##### END LEGACY getFromDB

            self.precursors_to_evaluate = self.myprecursors.getPrecursorsToEvaluate(self.min_q1, self.max_q1)

        except Exception as inst:
            print "something went wrong"
            print inst
예제 #4
0
class Test_speed_integrated(unittest.TestCase):

    def setUp(self):
        self.limit = 100
        self.limit_large = 100
        self.limit = 300
        self.limit_large = 600
        try:
            import MySQLdb
            #db_l = MySQLdb.connect(read_default_file="~/.my.cnf.local")
            db = MySQLdb.connect(read_default_file="~/.srm.cnf")
            cursor = db.cursor()
            self.cursor = cursor

            par = test_shared.get_default_setup_parameters()
            par.use_sqlite = True
            par.q1_window = 1.2 / 2.0 
            par.q3_window = 2.0 / 2.0
            par.ssrcalc_window = 4 / 2.0 
            par.ssrcalc_window = 9999 / 2.0 
            par.peptide_tables = ['hroest.srmPeptides_yeast']
            par.transition_table = 'hroest.srmTransitions_yeast'
            par.isotopes_up_to = 3
            self.mycollider = collider.SRMcollider()
            par.select_by = "id"

            self.par = par
            self.min_q1 = 440
            self.max_q1 = 450

            ## For debugging
            ##self.max_q1 = 440.18
            ##par.q1_window = 0.009 / 2.0 
            ##par.q3_window = 2.0 / 2.0
            ##par.isotopes_up_to = 1

            import Residues
            R = Residues.Residues('mono')
            isotope_correction = par.isotopes_up_to * R.mass_diffC13 / min(par.parent_charges)

            start = time.time()
            query =  """
            select modified_sequence, transition_group, parent_id, q1_charge, q1, ssrcalc, isotope_nr, 0, 0
            from %s where q1 between %s and %s
            #and isotope_nr = 0
                           """ % (par.peptide_tables[0], self.min_q1 - par.q1_window, self.max_q1 + par.q1_window) 
            cursor.execute(query)
            self.alltuples =  list(cursor.fetchall() )
            #print "len alltuples", len(self.alltuples)

            query =  """
            select modified_sequence, transition_group, parent_id, q1_charge, q1, ssrcalc, isotope_nr, 0, 0
            from %s where q1 between %s - %s and %s
            and isotope_nr = 0
                           """ % (par.peptide_tables[0], self.min_q1 - par.q1_window, isotope_correction, self.max_q1 + par.q1_window) 
            cursor.execute(query)
            self.alltuples_isotope_correction =  list(cursor.fetchall())
            #print "len alltuples zero", len(self.alltuples_isotope_correction)

            self.myprecursors = Precursors()

            # myprecursors.getFromDB(par, db.cursor(), self.min_q1 - par.q1_window, self.max_q1 + par.q1_window)
            ##### LEGACY getFromDB -- have isotope_nr = 0 in there!

            # Get all precursors from the DB within a window of Q1
            lower_q1 = self.min_q1 - par.q1_window
            upper_q1 =  self.max_q1 + par.q1_window
            self.myprecursors.precursors = []
            isotope_correction = par.isotopes_up_to * R.mass_diffC13 / min(par.parent_charges)
            q =  """
            select modified_sequence, transition_group, parent_id, q1_charge, q1, ssrcalc, modifications, missed_cleavages, isotopically_modified
            from %(peptide_table)s where q1 between %(lowq1)s - %(isotope_correction)s and %(highq1)s
            and isotope_nr = 0
            """ % {'peptide_table' : par.peptide_tables[0],
                          'lowq1'  : lower_q1,  # min_q1 - par.q1_window
                          'highq1' : upper_q1, # max_q1 + par.q1_window,
                          'isotope_correction' : isotope_correction
                  } 
            cursor.execute(q)
            for res in cursor.fetchall():
              p = Precursor()
              p.initialize(*res)
              # Only include those precursors that are actually have isotopes in the specified range
              if(p.included_in_isotopic_range(lower_q1, upper_q1, par) ): 
                self.myprecursors.precursors.append(p)

            ##### END LEGACY getFromDB

            self.precursors_to_evaluate = self.myprecursors.getPrecursorsToEvaluate(self.min_q1, self.max_q1)

        except Exception as inst:
            print "something went wrong"
            print inst

    @attr('comparison') 
    def test_integrated_cpp(self):
            """ Compare the fully integrated vs the mixed C++ rangetree / Python solution.

            Here we are comparing the fully integrated solution of storing all
            precursors in a C++ range tree and never passing them to Python vs
            the solution where we store the precursors in the rangetree, pass
            them to Python and then evaluate them.
            """

            verbose = True
            verbose = False
            print '\n'*1
            print "Comparing fully integrated solution (c_integrated.wrap_all)"
            par = self.par
            cursor = self.cursor

            all_precursors = self.precursors_to_evaluate
            shuffle(all_precursors)
            all_precursors = all_precursors[:self.limit_large]

            self.myprecursors.build_parent_id_lookup()
            testrange = self.myprecursors.build_rangetree()
            import c_rangetree
            r = c_rangetree.ExtendedRangetree_Q1_RT.create()
            r.new_rangetree()
            r.create_tree(tuple(self.alltuples_isotope_correction))
            #c_integrated.create_tree(tuple(self.alltuples_isotope_correction))

            MAX_UIS = 5
            newtime = 0; oldtime = 0; ctime = 0
            oldsql = 0; newsql = 0
            alllocaltime = 0
            localprecursor = 0
            transitiontime = 0
            c_fromprecursortime = 0
            prepare  = []
            self._cursor = False
            print "Running experiment ", par.get_common_filename()
            print "calc_trans. = time to calculate the transitions of the precursor"
            print "old = use rangetree to get precursors, use C++ code to get collperpep"
            print "new = use rangetree to get precursors, use single C++ code to get collperpep"
            print "i\tcalc_trans.\tnew\t\told\t\t>>\tspeedup"
            for kk, precursor in enumerate(all_precursors):
                ii = kk + 1

                q1 =       precursor.q1
                ssrcalc =  precursor.ssrcalc
                sequence = precursor.modified_sequence
                peptide_key = precursor.transition_group
                p_id = precursor.parent_id

                q3_low, q3_high = par.get_q3range_transitions()

                #new way to calculate the precursors
                mystart = time.time()
                transitions = c_getnonuis.calculate_transitions_ch(
                    ((q1, sequence, p_id),), [1,2], q3_low, q3_high)
                nr_transitions = len( transitions )
                #fake some srm_id for the transitions
                transitions = tuple([ (t[0], i) for i,t in enumerate(transitions)])

                q1_low = q1 - par.q1_window 
                q1_high = q1 + par.q1_window
                innerstart = time.time()
                #correct rounding errors, s.t. we get the same results as before!
                ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001
                ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001
                transitiontime += time.time() - mystart

                isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min(par.parent_charges)

                ###################################
                # New way to calculate non_uislist 
                #  - start out from transitions, get non_uislist

                mystart = time.time()
                newresult = c_integrated.wrap_all_bitwise(transitions, q1_low, ssrcalc_low,
                    q1_high,  ssrcalc_high, peptide_key,
                    min(MAX_UIS,nr_transitions) , par.q3_window, #q3_low, q3_high,
                    par.ppm, par.isotopes_up_to, isotope_correction, par, r)
                newtime += time.time() - mystart

                ###################################
                # Old way to calculate non_uislist:
                #  - get collisions per peptide
                #  - get non_uislist

                mystart = time.time()
                collisions_per_peptide_obj = self.myprecursors.get_collisions_per_peptide_from_rangetree(
                    precursor, precursor.q1 - par.q1_window, precursor.q1 + par.q1_window, 
                    transitions, par, r)

                ## # if False:
                ## #     precursor_ids = tuple(c_rangetree.query_tree( q1_low, ssrcalc_low, 
                ## #         q1_high,  ssrcalc_high, par.isotopes_up_to, isotope_correction)) 
                ## #     precursors = tuple([parentid_lookup[myid[0]] for myid in precursor_ids
                ## #                         #dont select myself 
                ## #                        if parentid_lookup[myid[0]][2]  != peptide_key])
                ## #     collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide( 
                ## #         transitions, precursors, q3_low, q3_high, par.q3_window, par.ppm)

                non_uis_list = [{} for i in range(MAX_UIS+1)]
                for order in range(1,MAX_UIS+1):
                    non_uis_list[order] = c_getnonuis.get_non_uis(
                        collisions_per_peptide_obj, order)

                oldtime += time.time() - mystart

                non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list]
                non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]]

                ###################################
                # Assert equality, print out result
                self.assertEqual( newresult , non_uis_list_len) 

                mys =  "%s\t%0.4fms\t%0.2fms\t\t%0.2fms\t\t>>\t%0.2f" % \
                (ii, transitiontime *1000/ ii, 
                        newtime*1000/ii, oldtime*1000/ii,
                oldtime *1.0 / newtime)
                #start a new line for each output?
                #mys += '\t%s\t%s' % ( len(precursors), len(precursor_new) )
                if False: mys += '\n'

                self.ESC = chr(27)
                sys.stdout.write(self.ESC + '[2K')
                if self._cursor:
                    sys.stdout.write(self.ESC + '[u')
                self._cursor = True
                sys.stdout.write(self.ESC + '[s')
                sys.stdout.write(mys)
                sys.stdout.flush()


            # except Exception as inst:
            #     print "something went wrong"
            #     print inst

    #@attr('comparison') 
    def test_two_table_mysql(self):
            """Compare the two table vs the one table MySQL approach
            
            Here we are comparing the old (querying the transitions database as
            well as the precursor database) and the new way (only query the
            precursor database and calculate the transitions on the fly) way of
            calculating the collisions.
            """

            print '\n'*1
            print "Comparing one vs two table MySQL solution"
            par = self.par
            cursor = self.cursor

            mycollider = collider.SRMcollider()
            mypepids = _get_unique_pepids(par, cursor)
            self.mycollider.pepids = mypepids
            self.mycollider.calcinner = 0
            shuffle( self.mycollider.pepids )
            self.mycollider.pepids = self.mycollider.pepids[:self.limit]

            MAX_UIS = 5
            c_newuistime = 0; oldtime = 0; c_fromprecursortime = 0
            oldsql = 0; newsql = 0
            oldcalctime = 0; localsql = 0
            self._cursor = False
            print "oldtime = get UIS from collisions and transitions (getting all collisions from the transitions db)"
            print "cuis + oldsql = as oldtime but calculate UIS in C++"
            print "py+newsql = only get the precursors from the db and calculate collisions in Python"
            print "ctime + newsql = only get the precursors from the db and calculate collisions in C++"
            print "new = use fast SQL and C++ code"
            print "old = use slow SQL and Python code"
            print "i\toldtime\tcuis+oldsql\tpy+newsql\tctime+newsql\t>>>\toldsql\tnewsql\t...\t...\tspeedup"
            for kk, pep in enumerate(self.mycollider.pepids):
                ii = kk + 1
                p_id = pep['parent_id']
                q1 = pep['q1']
                q3_low, q3_high = par.get_q3range_transitions()
                precursor = Precursor(q1=pep['q1'], transition_group=pep['transition_group'], parent_id = pep['parent_id'], modified_sequence=pep['mod_sequence'], ssrcalc=pep['ssrcalc'])
                transitions = collider.calculate_transitions_ch(
                    ((q1, pep['mod_sequence'], p_id),), [1], q3_low, q3_high)
                #fake some srm_id for the transitions
                transitions = tuple([ (t[0], i) for i,t in enumerate(transitions)])
                ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor)
                nr_transitions = len( transitions )
                if nr_transitions == 0: continue #no transitions in this window
                #
                mystart = time.time()
                collisions = list(self.mycollider._get_all_collisions_calculate_new(par, precursor, cursor))
                oldcolllen = len(collisions)
                oldcalctime += time.time() - mystart
                #
                mystart = time.time()
                collisions = _get_all_collisions(mycollider, par, pep, cursor, transitions = transitions)
                oldcsqllen = len(collisions)
                oldsql += time.time() - mystart
                #
                par.query2_add = ' and isotope_nr = 0 ' # due to the new handling of isotopes
                mystart = time.time()
                self.mycollider.mysqlnewtime= 0
                precursors = self.mycollider._get_all_precursors(par, precursor, cursor)
                newsql += time.time() - mystart
                #
                mystart = time.time()
                #precursors = self.mycollider._get_all_precursors(par, pep, cursor_l)
                localsql += time.time() - mystart
                par.query2_add = '' # due to the new handling of isotopes
                #
                mystart = time.time()
                non_uis_list = get_non_UIS_from_transitions(transitions, 
                                            tuple(collisions), par, MAX_UIS)
                cnewuis = non_uis_list
                c_newuistime += time.time() - mystart
                #
                mystart = time.time()
                non_uis_list = get_non_UIS_from_transitions_old(transitions, 
                                            collisions, par, MAX_UIS)
                oldnonuislist = non_uis_list
                oldtime += time.time() - mystart
                #
                mystart = time.time()
                q3_low, q3_high = par.get_q3range_transitions()
                collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series( 
                    transitions, precursors, par, q3_low, q3_high, par.q3_window, par.ppm, False)
                non_uis_list = [ {} for i in range(MAX_UIS+1)]
                for order in range(1,MAX_UIS+1):
                    non_uis_list[order] = c_getnonuis.get_non_uis(
                        collisions_per_peptide, order)
                c_fromprecursortime += time.time() - mystart

                newl = [len(n) for n in non_uis_list]
                self.assertEqual(newl, [len(o) for o in cnewuis])
                self.assertEqual(newl, [len(o) for o in oldnonuislist])

                non_uis_list = [set(k.keys()) for k in non_uis_list]
                cnewuis = [set(k.keys()) for k in cnewuis]

                self.assertEqual(non_uis_list, cnewuis)
                self.assertEqual(non_uis_list, oldnonuislist)

                mys =  "%s\t%0.fms\t%0.fms\t\t%0.fms\t\t%0.2fms\t\t>>>\t%0.fms\t%0.2fms\t...\t...\t%0.2f" % \
                 (ii,  #i
                 (oldtime + oldsql)*1000/ii,  #oldtime
                 (c_newuistime+oldsql)*1000/ii, #cuis + oldsql
                 (oldcalctime + newsql + oldtime)*1000/ii,  #newsql
                 (c_fromprecursortime + newsql)*1000/ii,  #ctime+newsql
                 #(c_fromprecursortime + localsql)*1000/ii,

                 oldsql*1000/ii, #newsql
                 newsql*1000/ii, #oldsql
                 #localsql*1000/ii,
                 #oldtime / c_newuistime
                 (oldtime + oldsql) / (c_fromprecursortime + newsql)
                )

                self.ESC = chr(27)
                sys.stdout.write(self.ESC + '[2K')
                if self._cursor:
                    sys.stdout.write(self.ESC + '[u')
                self._cursor = True
                sys.stdout.write(self.ESC + '[s')
                sys.stdout.write(mys)
                sys.stdout.flush()

    #@attr('comparison') 
    def test_mysql_vs_integrated(self):
            """Compare the one table MySQL approach vs the fully integrated Cpp approach
            
            Here we are comparing the old (querying the transitions database as
            well as the precursor database) and the new way (only query the
            precursor database and calculate the transitions on the fly) way of
            calculating the collisions.
            """

            print '\n'*1
            print "Comparing one table MySQL solution vs integrated solution"
            par = self.par
            cursor = self.cursor

            mypepids = [
                        {
                            'mod_sequence'  :  r[0],
                            'peptide_key' :r[1],
                            'transition_group' :r[1],
                            'parent_id' :  r[2],
                            'q1_charge' :  r[3],
                            'q1' :         r[4],
                            'ssrcalc' :    r[5],
                        }
                        for r in self.alltuples
                if r[3] == 2 #charge is 2
                and r[6] == 0 #isotope is 0
                and r[4] >= self.min_q1
                and r[4] < self.max_q1
            ]

            mycollider = collider.SRMcollider()
            #mypepids = _get_unique_pepids(par, cursor)
            self.mycollider.pepids = mypepids
            self.mycollider.calcinner = 0
            shuffle( self.mycollider.pepids )
            self.mycollider.pepids = self.mycollider.pepids[:self.limit]

            import c_rangetree
            r = c_rangetree.ExtendedRangetree_Q1_RT.create()
            r.new_rangetree()
            r.create_tree(tuple(self.alltuples_isotope_correction))
            #c_integrated.create_tree(tuple(self.alltuples_isotope_correction))

            MAX_UIS = 5
            c_newuistime = 0; oldtime = 0; c_fromprecursortime = 0
            oldsql = 0; newsql = 0
            newtime = 0
            oldcalctime = 0; localsql = 0
            self._cursor = False
            print "i\toldtime\t\tnewtime\t>>\tspeedup"
            for kk, pep in enumerate(self.mycollider.pepids):
                ii = kk + 1
                p_id = pep['parent_id']
                q1 = pep['q1']
                q3_low, q3_high = par.get_q3range_transitions()
                q1_low = q1 - par.q1_window 
                q1_high = q1 + par.q1_window
                ssrcalc = pep['ssrcalc']
                peptide_key = pep['peptide_key']

                #correct rounding errors, s.t. we get the same results as before!
                ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001
                ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001
                isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min(par.parent_charges)

                precursor = Precursor(q1=pep['q1'], transition_group=pep['transition_group'], parent_id = pep['parent_id'], modified_sequence=pep['mod_sequence'], ssrcalc=pep['ssrcalc'])

                transitions = collider.calculate_transitions_ch(
                    ((q1, pep['mod_sequence'], p_id),), [1], q3_low, q3_high)
                #fake some srm_id for the transitions
                transitions = tuple([ (t[0], i) for i,t in enumerate(transitions)])
                ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor)
                nr_transitions = len( transitions )
                if nr_transitions == 0: continue #no transitions in this window

                ###################################
                # Old way to calculate non_uislist 
                #  - get all precursors from the SQL database
                #  - calculate collisions per peptide in C++

                par.query2_add = ' and isotope_nr = 0 ' # due to the new handling of isotopes
                mystart = time.time()
                self.mycollider.mysqlnewtime= 0
                precursors = self.mycollider._get_all_precursors(par, precursor, cursor)
                newsql += time.time() - mystart

                mystart = time.time()
                q3_low, q3_high = par.get_q3range_transitions()
                collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series( 
                    transitions, precursors, par, q3_low, q3_high, par.q3_window, par.ppm, False)
                non_uis_list = [ {} for i in range(MAX_UIS+1)]
                for order in range(1,MAX_UIS+1):
                    non_uis_list[order] = c_getnonuis.get_non_uis(
                        collisions_per_peptide, order)
                c_fromprecursortime += time.time() - mystart

                newl = [len(n) for n in non_uis_list]
                non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list]
                non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]]

                ###################################
                # New way to calculate non_uislist 
                #  - start out from transitions, get non_uislist
                mystart = time.time()
                newresult = c_integrated.wrap_all_bitwise(transitions, q1_low, ssrcalc_low,
                    q1_high,  ssrcalc_high, peptide_key,
                    min(MAX_UIS,nr_transitions) , par.q3_window, #q3_low, q3_high,
                    par.ppm, par.isotopes_up_to, isotope_correction, par, r)
                newtime += time.time() - mystart

                ###################################
                # Assert equality, print out result
                self.assertEqual( newresult , non_uis_list_len) 

                mys =  "%s\t%0.1fms\t\t%0.2fms\t>>>\t%0.1f" % \
                 (ii,  #i
                 (c_fromprecursortime + newsql)*1000/ii,  # oldtime
                 (newtime)*1000/ii, # newtime
                 (c_fromprecursortime + newsql) / (newtime), # speedup
                )

                self.ESC = chr(27)
                sys.stdout.write(self.ESC + '[2K')
                if self._cursor:
                    sys.stdout.write(self.ESC + '[u')
                self._cursor = True
                sys.stdout.write(self.ESC + '[s')
                sys.stdout.write(mys)
                sys.stdout.flush()
예제 #5
0
        raise Exception("Your Q1 window needs to be at least as large as the min/max q1 you chose.")
        sys.exit()

###########################################################################
# Prepare the collider

db = par.get_db()
cursor = db.cursor()

if options.insert_mysql:
    assert False # you have to implement this yourself

# Get the precursors
###########################################################################
from precursor import Precursors
myprecursors = Precursors()
myprecursors.getFromDB(par, db.cursor(), min_q1 - par.q1_window, max_q1 + par.q1_window)
if not options.query_peptide_table is None and not options.query_peptide_table == "":
  print "Using a different table for the query peptides than for the background peptides!"
  print "Will use table %s " % options.query_peptide_table
  query_precursors = Precursors()
  query_par = copy(par)
  query_par.peptide_tables = [options.query_peptide_table]
  query_precursors.getFromDB(query_par, db.cursor(), min_q1 - par.q1_window, max_q1 + par.q1_window)
  precursors_to_evaluate = query_precursors.getPrecursorsToEvaluate(min_q1, max_q1)
else:
  precursors_to_evaluate = myprecursors.getPrecursorsToEvaluate(min_q1, max_q1)
myprecursors.build_parent_id_lookup()
myprecursors.build_transition_group_lookup()

# If we dont use the DB, we use the rangetree to query and get our list of
예제 #6
0
exp_key = sys.argv[1]
min_q1 = float(sys.argv[2])
max_q1 = float(sys.argv[3])
outfile = options.outfile
strike3_ssrcalcwindow = options.ssr3strike
myorder =options.myorder
contamination_allow =options.allow_contamination
par.eval()
print par.get_common_filename()

skip_strike2 = True

# Get the precursors
###########################################################################
from precursor import Precursors
myprecursors = Precursors()
myprecursors.getFromDB(par, db.cursor(), min_q1 - par.q1_window, max_q1 + par.q1_window)
if options.GRAVY: myprecursors.use_GRAVY_scores()
rtree = myprecursors.build_rangetree()
precursors_to_evaluate = myprecursors.getPrecursorsToEvaluate(min_q1, max_q1)
myprecursors.build_parent_id_lookup()
myprecursors.build_transition_group_lookup()

print "Want to evaluate precursors", len(precursors_to_evaluate)

"""
The idea is to find UIS combinations that are globally UIS, locally
clean and also there are no cases in which all the UIS coelute when
they are in different peptides:
    * global UIS = whole RT
    * locally clean = no intereferences around the peptide
예제 #7
0
    def setUp(self):
        self.limit = 100
        self.limit_large = 100
        self.limit = 300
        self.limit_large = 600
        try:
            import MySQLdb
            #db_l = MySQLdb.connect(read_default_file="~/.my.cnf.local")
            db = MySQLdb.connect(read_default_file="~/.srm.cnf")
            cursor = db.cursor()
            self.cursor = cursor

            par = test_shared.get_default_setup_parameters()
            par.use_sqlite = True
            par.q1_window = 1.2 / 2.0
            par.q3_window = 2.0 / 2.0
            par.ssrcalc_window = 4 / 2.0
            par.ssrcalc_window = 9999 / 2.0
            par.peptide_tables = ['hroest.srmPeptides_yeast']
            par.transition_table = 'hroest.srmTransitions_yeast'
            par.isotopes_up_to = 3
            self.mycollider = collider.SRMcollider()
            par.select_by = "id"

            self.par = par
            self.min_q1 = 440
            self.max_q1 = 450

            ## For debugging
            ##self.max_q1 = 440.18
            ##par.q1_window = 0.009 / 2.0
            ##par.q3_window = 2.0 / 2.0
            ##par.isotopes_up_to = 1

            import Residues
            R = Residues.Residues('mono')
            isotope_correction = par.isotopes_up_to * R.mass_diffC13 / min(
                par.parent_charges)

            start = time.time()
            query = """
            select modified_sequence, transition_group, parent_id, q1_charge, q1, ssrcalc, isotope_nr, 0, 0
            from %s where q1 between %s and %s
            #and isotope_nr = 0
                           """ % (par.peptide_tables[0], self.min_q1 -
                                  par.q1_window, self.max_q1 + par.q1_window)
            cursor.execute(query)
            self.alltuples = list(cursor.fetchall())
            #print "len alltuples", len(self.alltuples)

            query = """
            select modified_sequence, transition_group, parent_id, q1_charge, q1, ssrcalc, isotope_nr, 0, 0
            from %s where q1 between %s - %s and %s
            and isotope_nr = 0
                           """ % (par.peptide_tables[0], self.min_q1 -
                                  par.q1_window, isotope_correction,
                                  self.max_q1 + par.q1_window)
            cursor.execute(query)
            self.alltuples_isotope_correction = list(cursor.fetchall())
            #print "len alltuples zero", len(self.alltuples_isotope_correction)

            self.myprecursors = Precursors()

            # myprecursors.getFromDB(par, db.cursor(), self.min_q1 - par.q1_window, self.max_q1 + par.q1_window)
            ##### LEGACY getFromDB -- have isotope_nr = 0 in there!

            # Get all precursors from the DB within a window of Q1
            lower_q1 = self.min_q1 - par.q1_window
            upper_q1 = self.max_q1 + par.q1_window
            self.myprecursors.precursors = []
            isotope_correction = par.isotopes_up_to * R.mass_diffC13 / min(
                par.parent_charges)
            q = """
            select modified_sequence, transition_group, parent_id, q1_charge, q1, ssrcalc, modifications, missed_cleavages, isotopically_modified
            from %(peptide_table)s where q1 between %(lowq1)s - %(isotope_correction)s and %(highq1)s
            and isotope_nr = 0
            """ % {
                'peptide_table': par.peptide_tables[0],
                'lowq1': lower_q1,  # min_q1 - par.q1_window
                'highq1': upper_q1,  # max_q1 + par.q1_window,
                'isotope_correction': isotope_correction
            }
            cursor.execute(q)
            for res in cursor.fetchall():
                p = Precursor()
                p.initialize(*res)
                # Only include those precursors that are actually have isotopes in the specified range
                if (p.included_in_isotopic_range(lower_q1, upper_q1, par)):
                    self.myprecursors.precursors.append(p)

            ##### END LEGACY getFromDB

            self.precursors_to_evaluate = self.myprecursors.getPrecursorsToEvaluate(
                self.min_q1, self.max_q1)

        except Exception as inst:
            print "something went wrong"
            print inst
예제 #8
0
class Test_speed_integrated(unittest.TestCase):
    def setUp(self):
        self.limit = 100
        self.limit_large = 100
        self.limit = 300
        self.limit_large = 600
        try:
            import MySQLdb
            #db_l = MySQLdb.connect(read_default_file="~/.my.cnf.local")
            db = MySQLdb.connect(read_default_file="~/.srm.cnf")
            cursor = db.cursor()
            self.cursor = cursor

            par = test_shared.get_default_setup_parameters()
            par.use_sqlite = True
            par.q1_window = 1.2 / 2.0
            par.q3_window = 2.0 / 2.0
            par.ssrcalc_window = 4 / 2.0
            par.ssrcalc_window = 9999 / 2.0
            par.peptide_tables = ['hroest.srmPeptides_yeast']
            par.transition_table = 'hroest.srmTransitions_yeast'
            par.isotopes_up_to = 3
            self.mycollider = collider.SRMcollider()
            par.select_by = "id"

            self.par = par
            self.min_q1 = 440
            self.max_q1 = 450

            ## For debugging
            ##self.max_q1 = 440.18
            ##par.q1_window = 0.009 / 2.0
            ##par.q3_window = 2.0 / 2.0
            ##par.isotopes_up_to = 1

            import Residues
            R = Residues.Residues('mono')
            isotope_correction = par.isotopes_up_to * R.mass_diffC13 / min(
                par.parent_charges)

            start = time.time()
            query = """
            select modified_sequence, transition_group, parent_id, q1_charge, q1, ssrcalc, isotope_nr, 0, 0
            from %s where q1 between %s and %s
            #and isotope_nr = 0
                           """ % (par.peptide_tables[0], self.min_q1 -
                                  par.q1_window, self.max_q1 + par.q1_window)
            cursor.execute(query)
            self.alltuples = list(cursor.fetchall())
            #print "len alltuples", len(self.alltuples)

            query = """
            select modified_sequence, transition_group, parent_id, q1_charge, q1, ssrcalc, isotope_nr, 0, 0
            from %s where q1 between %s - %s and %s
            and isotope_nr = 0
                           """ % (par.peptide_tables[0], self.min_q1 -
                                  par.q1_window, isotope_correction,
                                  self.max_q1 + par.q1_window)
            cursor.execute(query)
            self.alltuples_isotope_correction = list(cursor.fetchall())
            #print "len alltuples zero", len(self.alltuples_isotope_correction)

            self.myprecursors = Precursors()

            # myprecursors.getFromDB(par, db.cursor(), self.min_q1 - par.q1_window, self.max_q1 + par.q1_window)
            ##### LEGACY getFromDB -- have isotope_nr = 0 in there!

            # Get all precursors from the DB within a window of Q1
            lower_q1 = self.min_q1 - par.q1_window
            upper_q1 = self.max_q1 + par.q1_window
            self.myprecursors.precursors = []
            isotope_correction = par.isotopes_up_to * R.mass_diffC13 / min(
                par.parent_charges)
            q = """
            select modified_sequence, transition_group, parent_id, q1_charge, q1, ssrcalc, modifications, missed_cleavages, isotopically_modified
            from %(peptide_table)s where q1 between %(lowq1)s - %(isotope_correction)s and %(highq1)s
            and isotope_nr = 0
            """ % {
                'peptide_table': par.peptide_tables[0],
                'lowq1': lower_q1,  # min_q1 - par.q1_window
                'highq1': upper_q1,  # max_q1 + par.q1_window,
                'isotope_correction': isotope_correction
            }
            cursor.execute(q)
            for res in cursor.fetchall():
                p = Precursor()
                p.initialize(*res)
                # Only include those precursors that are actually have isotopes in the specified range
                if (p.included_in_isotopic_range(lower_q1, upper_q1, par)):
                    self.myprecursors.precursors.append(p)

            ##### END LEGACY getFromDB

            self.precursors_to_evaluate = self.myprecursors.getPrecursorsToEvaluate(
                self.min_q1, self.max_q1)

        except Exception as inst:
            print "something went wrong"
            print inst

    @attr('comparison')
    def test_integrated_cpp(self):
        """ Compare the fully integrated vs the mixed C++ rangetree / Python solution.

            Here we are comparing the fully integrated solution of storing all
            precursors in a C++ range tree and never passing them to Python vs
            the solution where we store the precursors in the rangetree, pass
            them to Python and then evaluate them.
            """

        verbose = True
        verbose = False
        print '\n' * 1
        print "Comparing fully integrated solution (c_integrated.wrap_all)"
        par = self.par
        cursor = self.cursor

        all_precursors = self.precursors_to_evaluate
        shuffle(all_precursors)
        all_precursors = all_precursors[:self.limit_large]

        self.myprecursors.build_parent_id_lookup()
        testrange = self.myprecursors.build_rangetree()
        import c_rangetree
        r = c_rangetree.ExtendedRangetree_Q1_RT.create()
        r.new_rangetree()
        r.create_tree(tuple(self.alltuples_isotope_correction))
        #c_integrated.create_tree(tuple(self.alltuples_isotope_correction))

        MAX_UIS = 5
        newtime = 0
        oldtime = 0
        ctime = 0
        oldsql = 0
        newsql = 0
        alllocaltime = 0
        localprecursor = 0
        transitiontime = 0
        c_fromprecursortime = 0
        prepare = []
        self._cursor = False
        print "Running experiment ", par.get_common_filename()
        print "calc_trans. = time to calculate the transitions of the precursor"
        print "old = use rangetree to get precursors, use C++ code to get collperpep"
        print "new = use rangetree to get precursors, use single C++ code to get collperpep"
        print "i\tcalc_trans.\tnew\t\told\t\t>>\tspeedup"
        for kk, precursor in enumerate(all_precursors):
            ii = kk + 1

            q1 = precursor.q1
            ssrcalc = precursor.ssrcalc
            sequence = precursor.modified_sequence
            peptide_key = precursor.transition_group
            p_id = precursor.parent_id

            q3_low, q3_high = par.get_q3range_transitions()

            #new way to calculate the precursors
            mystart = time.time()
            transitions = c_getnonuis.calculate_transitions_ch(
                ((q1, sequence, p_id), ), [1, 2], q3_low, q3_high)
            nr_transitions = len(transitions)
            #fake some srm_id for the transitions
            transitions = tuple([(t[0], i) for i, t in enumerate(transitions)])

            q1_low = q1 - par.q1_window
            q1_high = q1 + par.q1_window
            innerstart = time.time()
            #correct rounding errors, s.t. we get the same results as before!
            ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001
            ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001
            transitiontime += time.time() - mystart

            isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min(
                par.parent_charges)

            ###################################
            # New way to calculate non_uislist
            #  - start out from transitions, get non_uislist

            mystart = time.time()
            newresult = c_integrated.wrap_all_bitwise(
                transitions,
                q1_low,
                ssrcalc_low,
                q1_high,
                ssrcalc_high,
                peptide_key,
                min(MAX_UIS, nr_transitions),
                par.q3_window,  #q3_low, q3_high,
                par.ppm,
                par.isotopes_up_to,
                isotope_correction,
                par,
                r)
            newtime += time.time() - mystart

            ###################################
            # Old way to calculate non_uislist:
            #  - get collisions per peptide
            #  - get non_uislist

            mystart = time.time()
            collisions_per_peptide_obj = self.myprecursors.get_collisions_per_peptide_from_rangetree(
                precursor, precursor.q1 - par.q1_window,
                precursor.q1 + par.q1_window, transitions, par, r)

            ## # if False:
            ## #     precursor_ids = tuple(c_rangetree.query_tree( q1_low, ssrcalc_low,
            ## #         q1_high,  ssrcalc_high, par.isotopes_up_to, isotope_correction))
            ## #     precursors = tuple([parentid_lookup[myid[0]] for myid in precursor_ids
            ## #                         #dont select myself
            ## #                        if parentid_lookup[myid[0]][2]  != peptide_key])
            ## #     collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide(
            ## #         transitions, precursors, q3_low, q3_high, par.q3_window, par.ppm)

            non_uis_list = [{} for i in range(MAX_UIS + 1)]
            for order in range(1, MAX_UIS + 1):
                non_uis_list[order] = c_getnonuis.get_non_uis(
                    collisions_per_peptide_obj, order)

            oldtime += time.time() - mystart

            non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list]
            non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]]

            ###################################
            # Assert equality, print out result
            self.assertEqual(newresult, non_uis_list_len)

            mys =  "%s\t%0.4fms\t%0.2fms\t\t%0.2fms\t\t>>\t%0.2f" % \
            (ii, transitiontime *1000/ ii,
                    newtime*1000/ii, oldtime*1000/ii,
            oldtime *1.0 / newtime)
            #start a new line for each output?
            #mys += '\t%s\t%s' % ( len(precursors), len(precursor_new) )
            if False: mys += '\n'

            self.ESC = chr(27)
            sys.stdout.write(self.ESC + '[2K')
            if self._cursor:
                sys.stdout.write(self.ESC + '[u')
            self._cursor = True
            sys.stdout.write(self.ESC + '[s')
            sys.stdout.write(mys)
            sys.stdout.flush()

        # except Exception as inst:
        #     print "something went wrong"
        #     print inst

    #@attr('comparison')
    def test_two_table_mysql(self):
        """Compare the two table vs the one table MySQL approach
            
            Here we are comparing the old (querying the transitions database as
            well as the precursor database) and the new way (only query the
            precursor database and calculate the transitions on the fly) way of
            calculating the collisions.
            """

        print '\n' * 1
        print "Comparing one vs two table MySQL solution"
        par = self.par
        cursor = self.cursor

        mycollider = collider.SRMcollider()
        mypepids = _get_unique_pepids(par, cursor)
        self.mycollider.pepids = mypepids
        self.mycollider.calcinner = 0
        shuffle(self.mycollider.pepids)
        self.mycollider.pepids = self.mycollider.pepids[:self.limit]

        MAX_UIS = 5
        c_newuistime = 0
        oldtime = 0
        c_fromprecursortime = 0
        oldsql = 0
        newsql = 0
        oldcalctime = 0
        localsql = 0
        self._cursor = False
        print "oldtime = get UIS from collisions and transitions (getting all collisions from the transitions db)"
        print "cuis + oldsql = as oldtime but calculate UIS in C++"
        print "py+newsql = only get the precursors from the db and calculate collisions in Python"
        print "ctime + newsql = only get the precursors from the db and calculate collisions in C++"
        print "new = use fast SQL and C++ code"
        print "old = use slow SQL and Python code"
        print "i\toldtime\tcuis+oldsql\tpy+newsql\tctime+newsql\t>>>\toldsql\tnewsql\t...\t...\tspeedup"
        for kk, pep in enumerate(self.mycollider.pepids):
            ii = kk + 1
            p_id = pep['parent_id']
            q1 = pep['q1']
            q3_low, q3_high = par.get_q3range_transitions()
            precursor = Precursor(q1=pep['q1'],
                                  transition_group=pep['transition_group'],
                                  parent_id=pep['parent_id'],
                                  modified_sequence=pep['mod_sequence'],
                                  ssrcalc=pep['ssrcalc'])
            transitions = collider.calculate_transitions_ch(
                ((q1, pep['mod_sequence'], p_id), ), [1], q3_low, q3_high)
            #fake some srm_id for the transitions
            transitions = tuple([(t[0], i) for i, t in enumerate(transitions)])
            ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor)
            nr_transitions = len(transitions)
            if nr_transitions == 0: continue  #no transitions in this window
            #
            mystart = time.time()
            collisions = list(
                self.mycollider._get_all_collisions_calculate_new(
                    par, precursor, cursor))
            oldcolllen = len(collisions)
            oldcalctime += time.time() - mystart
            #
            mystart = time.time()
            collisions = _get_all_collisions(mycollider,
                                             par,
                                             pep,
                                             cursor,
                                             transitions=transitions)
            oldcsqllen = len(collisions)
            oldsql += time.time() - mystart
            #
            par.query2_add = ' and isotope_nr = 0 '  # due to the new handling of isotopes
            mystart = time.time()
            self.mycollider.mysqlnewtime = 0
            precursors = self.mycollider._get_all_precursors(
                par, precursor, cursor)
            newsql += time.time() - mystart
            #
            mystart = time.time()
            #precursors = self.mycollider._get_all_precursors(par, pep, cursor_l)
            localsql += time.time() - mystart
            par.query2_add = ''  # due to the new handling of isotopes
            #
            mystart = time.time()
            non_uis_list = get_non_UIS_from_transitions(
                transitions, tuple(collisions), par, MAX_UIS)
            cnewuis = non_uis_list
            c_newuistime += time.time() - mystart
            #
            mystart = time.time()
            non_uis_list = get_non_UIS_from_transitions_old(
                transitions, collisions, par, MAX_UIS)
            oldnonuislist = non_uis_list
            oldtime += time.time() - mystart
            #
            mystart = time.time()
            q3_low, q3_high = par.get_q3range_transitions()
            collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series(
                transitions, precursors, par, q3_low, q3_high, par.q3_window,
                par.ppm, False)
            non_uis_list = [{} for i in range(MAX_UIS + 1)]
            for order in range(1, MAX_UIS + 1):
                non_uis_list[order] = c_getnonuis.get_non_uis(
                    collisions_per_peptide, order)
            c_fromprecursortime += time.time() - mystart

            newl = [len(n) for n in non_uis_list]
            self.assertEqual(newl, [len(o) for o in cnewuis])
            self.assertEqual(newl, [len(o) for o in oldnonuislist])

            non_uis_list = [set(k.keys()) for k in non_uis_list]
            cnewuis = [set(k.keys()) for k in cnewuis]

            self.assertEqual(non_uis_list, cnewuis)
            self.assertEqual(non_uis_list, oldnonuislist)

            mys =  "%s\t%0.fms\t%0.fms\t\t%0.fms\t\t%0.2fms\t\t>>>\t%0.fms\t%0.2fms\t...\t...\t%0.2f" % \
             (ii,  #i
             (oldtime + oldsql)*1000/ii,  #oldtime
             (c_newuistime+oldsql)*1000/ii, #cuis + oldsql
             (oldcalctime + newsql + oldtime)*1000/ii,  #newsql
             (c_fromprecursortime + newsql)*1000/ii,  #ctime+newsql
             #(c_fromprecursortime + localsql)*1000/ii,

             oldsql*1000/ii, #newsql
             newsql*1000/ii, #oldsql
             #localsql*1000/ii,
             #oldtime / c_newuistime
             (oldtime + oldsql) / (c_fromprecursortime + newsql)
            )

            self.ESC = chr(27)
            sys.stdout.write(self.ESC + '[2K')
            if self._cursor:
                sys.stdout.write(self.ESC + '[u')
            self._cursor = True
            sys.stdout.write(self.ESC + '[s')
            sys.stdout.write(mys)
            sys.stdout.flush()

    #@attr('comparison')
    def test_mysql_vs_integrated(self):
        """Compare the one table MySQL approach vs the fully integrated Cpp approach
            
            Here we are comparing the old (querying the transitions database as
            well as the precursor database) and the new way (only query the
            precursor database and calculate the transitions on the fly) way of
            calculating the collisions.
            """

        print '\n' * 1
        print "Comparing one table MySQL solution vs integrated solution"
        par = self.par
        cursor = self.cursor

        mypepids = [
            {
                'mod_sequence': r[0],
                'peptide_key': r[1],
                'transition_group': r[1],
                'parent_id': r[2],
                'q1_charge': r[3],
                'q1': r[4],
                'ssrcalc': r[5],
            } for r in self.alltuples if r[3] == 2  #charge is 2
            and r[6] == 0  #isotope is 0
            and r[4] >= self.min_q1 and r[4] < self.max_q1
        ]

        mycollider = collider.SRMcollider()
        #mypepids = _get_unique_pepids(par, cursor)
        self.mycollider.pepids = mypepids
        self.mycollider.calcinner = 0
        shuffle(self.mycollider.pepids)
        self.mycollider.pepids = self.mycollider.pepids[:self.limit]

        import c_rangetree
        r = c_rangetree.ExtendedRangetree_Q1_RT.create()
        r.new_rangetree()
        r.create_tree(tuple(self.alltuples_isotope_correction))
        #c_integrated.create_tree(tuple(self.alltuples_isotope_correction))

        MAX_UIS = 5
        c_newuistime = 0
        oldtime = 0
        c_fromprecursortime = 0
        oldsql = 0
        newsql = 0
        newtime = 0
        oldcalctime = 0
        localsql = 0
        self._cursor = False
        print "i\toldtime\t\tnewtime\t>>\tspeedup"
        for kk, pep in enumerate(self.mycollider.pepids):
            ii = kk + 1
            p_id = pep['parent_id']
            q1 = pep['q1']
            q3_low, q3_high = par.get_q3range_transitions()
            q1_low = q1 - par.q1_window
            q1_high = q1 + par.q1_window
            ssrcalc = pep['ssrcalc']
            peptide_key = pep['peptide_key']

            #correct rounding errors, s.t. we get the same results as before!
            ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001
            ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001
            isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min(
                par.parent_charges)

            precursor = Precursor(q1=pep['q1'],
                                  transition_group=pep['transition_group'],
                                  parent_id=pep['parent_id'],
                                  modified_sequence=pep['mod_sequence'],
                                  ssrcalc=pep['ssrcalc'])

            transitions = collider.calculate_transitions_ch(
                ((q1, pep['mod_sequence'], p_id), ), [1], q3_low, q3_high)
            #fake some srm_id for the transitions
            transitions = tuple([(t[0], i) for i, t in enumerate(transitions)])
            ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor)
            nr_transitions = len(transitions)
            if nr_transitions == 0: continue  #no transitions in this window

            ###################################
            # Old way to calculate non_uislist
            #  - get all precursors from the SQL database
            #  - calculate collisions per peptide in C++

            par.query2_add = ' and isotope_nr = 0 '  # due to the new handling of isotopes
            mystart = time.time()
            self.mycollider.mysqlnewtime = 0
            precursors = self.mycollider._get_all_precursors(
                par, precursor, cursor)
            newsql += time.time() - mystart

            mystart = time.time()
            q3_low, q3_high = par.get_q3range_transitions()
            collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series(
                transitions, precursors, par, q3_low, q3_high, par.q3_window,
                par.ppm, False)
            non_uis_list = [{} for i in range(MAX_UIS + 1)]
            for order in range(1, MAX_UIS + 1):
                non_uis_list[order] = c_getnonuis.get_non_uis(
                    collisions_per_peptide, order)
            c_fromprecursortime += time.time() - mystart

            newl = [len(n) for n in non_uis_list]
            non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list]
            non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]]

            ###################################
            # New way to calculate non_uislist
            #  - start out from transitions, get non_uislist
            mystart = time.time()
            newresult = c_integrated.wrap_all_bitwise(
                transitions,
                q1_low,
                ssrcalc_low,
                q1_high,
                ssrcalc_high,
                peptide_key,
                min(MAX_UIS, nr_transitions),
                par.q3_window,  #q3_low, q3_high,
                par.ppm,
                par.isotopes_up_to,
                isotope_correction,
                par,
                r)
            newtime += time.time() - mystart

            ###################################
            # Assert equality, print out result
            self.assertEqual(newresult, non_uis_list_len)

            mys =  "%s\t%0.1fms\t\t%0.2fms\t>>>\t%0.1f" % \
             (ii,  #i
             (c_fromprecursortime + newsql)*1000/ii,  # oldtime
             (newtime)*1000/ii, # newtime
             (c_fromprecursortime + newsql) / (newtime), # speedup
            )

            self.ESC = chr(27)
            sys.stdout.write(self.ESC + '[2K')
            if self._cursor:
                sys.stdout.write(self.ESC + '[u')
            self._cursor = True
            sys.stdout.write(self.ESC + '[s')
            sys.stdout.write(mys)
            sys.stdout.flush()
예제 #9
0
  def setUp(self):

    self.transitions = transitions_def1
    self.collisions  = collisions_def1

    self.EPSILON = 10**-5

    self.min_q1 = 400
    self.max_q1 = 1500

    par = collider.SRM_parameters()
    par.q1_window = 1 / 2.0
    par.q3_window = 1 / 2.0
    par.ssrcalc_window = 10 / 2.0
    par.ppm = False
    par.isotopes_up_to = 3
    par.q3_low = 400
    par.q3_high = 1400
    par.max_uis = 5
    par.peptide_tables = [ PEPTIDE_TABLE_NAME ]
    par.mysql_config = '~/.my.cnf'
    par.sqlite_database = test_shared.SQLITE_DATABASE_LOCATION
    print par.sqlite_database 
    par.use_sqlite = USE_SQLITE
    par.quiet = False

    par.bions      =  True
    par.yions      =  True
    par.aions      =  False
    par.aMinusNH3  =  False
    par.bMinusH2O  =  False
    par.bMinusNH3  =  False
    par.bPlusH2O   =  False
    par.yMinusH2O  =  False
    par.yMinusNH3  =  False
    par.cions      =  False
    par.xions      =  False
    par.zions      =  False
    par.MMinusH2O  =  False
    par.MMinusNH3  =  False
    par.q3_range = [par.q3_low, par.q3_high]
    par.set_default_vars()
    par.eval()

    self.par = par
    self.R = Residues('mono')

    self.acollider = collider.SRMcollider()
    self.aparamset = collider.testcase()

    self.db = par.get_db()


    # Get the precursors
    ###########################################################################
    myprecursors = Precursors()
    cursor = self.db.cursor()
    myprecursors.getFromDB(par, cursor, self.min_q1 - par.q1_window, self.max_q1 + par.q1_window)
    testrange = myprecursors.build_rangetree()
    self.precursors_to_evaluate = myprecursors.getPrecursorsToEvaluate(self.min_q1, self.max_q1)
    myprecursors.build_parent_id_lookup()
    myprecursors.build_transition_group_lookup()
    self.myprecursors = myprecursors
    cursor.close()
예제 #10
0
    sys.exit()

#local arguments
exp_key = sys.argv[1]
min_q1 = float(sys.argv[2])
max_q1 = float(sys.argv[3])
db = par.get_db()
cursor = db.cursor()

if par.max_uis ==0: 
    print "Please change --max_uis option, 0 does not make sense here"
    sys.exit()

# Get the precursors
###########################################################################
myprecursors = Precursors()
myprecursors.getFromDB(par, db.cursor(), min_q1 - par.q1_window, max_q1 + par.q1_window)
if not options.query_peptide_table is None and not options.query_peptide_table == "":
  print "Using a different table for the query peptides than for the background peptides!"
  print "Will use table %s " % options.query_peptide_table
  query_precursors = Precursors()
  query_par = copy(par)
  query_par.peptide_tables = [options.query_peptide_table]
  query_precursors.getFromDB(query_par, db.cursor(), min_q1 - par.q1_window, max_q1 + par.q1_window)
  precursors_to_evaluate = query_precursors.getPrecursorsToEvaluate(min_q1, max_q1)
else:
  precursors_to_evaluate = myprecursors.getPrecursorsToEvaluate(min_q1, max_q1)
isotope_correction = par.calculate_isotope_correction()
r_tree = myprecursors.build_extended_rangetree ()

print "Will evaluate %s precursors" % len(precursors_to_evaluate)