def test_mysql_vs_integrated(self): """Compare the one table MySQL approach vs the fully integrated Cpp approach Here we are comparing the old (querying the transitions database as well as the precursor database) and the new way (only query the precursor database and calculate the transitions on the fly) way of calculating the collisions. """ print '\n'*1 print "Comparing one table MySQL solution vs integrated solution" par = self.par cursor = self.cursor mypepids = [ { 'mod_sequence' : r[0], 'peptide_key' :r[1], 'transition_group' :r[1], 'parent_id' : r[2], 'q1_charge' : r[3], 'q1' : r[4], 'ssrcalc' : r[5], } for r in self.alltuples if r[3] == 2 #charge is 2 and r[6] == 0 #isotope is 0 and r[4] >= self.min_q1 and r[4] < self.max_q1 ] mycollider = collider.SRMcollider() #mypepids = _get_unique_pepids(par, cursor) self.mycollider.pepids = mypepids self.mycollider.calcinner = 0 shuffle( self.mycollider.pepids ) self.mycollider.pepids = self.mycollider.pepids[:self.limit] import c_rangetree r = c_rangetree.ExtendedRangetree_Q1_RT.create() r.new_rangetree() r.create_tree(tuple(self.alltuples_isotope_correction)) #c_integrated.create_tree(tuple(self.alltuples_isotope_correction)) MAX_UIS = 5 c_newuistime = 0; oldtime = 0; c_fromprecursortime = 0 oldsql = 0; newsql = 0 newtime = 0 oldcalctime = 0; localsql = 0 self._cursor = False print "i\toldtime\t\tnewtime\t>>\tspeedup" for kk, pep in enumerate(self.mycollider.pepids): ii = kk + 1 p_id = pep['parent_id'] q1 = pep['q1'] q3_low, q3_high = par.get_q3range_transitions() q1_low = q1 - par.q1_window q1_high = q1 + par.q1_window ssrcalc = pep['ssrcalc'] peptide_key = pep['peptide_key'] #correct rounding errors, s.t. we get the same results as before! ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001 ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001 isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min(par.parent_charges) precursor = Precursor(q1=pep['q1'], transition_group=pep['transition_group'], parent_id = pep['parent_id'], modified_sequence=pep['mod_sequence'], ssrcalc=pep['ssrcalc']) transitions = collider.calculate_transitions_ch( ((q1, pep['mod_sequence'], p_id),), [1], q3_low, q3_high) #fake some srm_id for the transitions transitions = tuple([ (t[0], i) for i,t in enumerate(transitions)]) ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor) nr_transitions = len( transitions ) if nr_transitions == 0: continue #no transitions in this window ################################### # Old way to calculate non_uislist # - get all precursors from the SQL database # - calculate collisions per peptide in C++ par.query2_add = ' and isotope_nr = 0 ' # due to the new handling of isotopes mystart = time.time() self.mycollider.mysqlnewtime= 0 precursors = self.mycollider._get_all_precursors(par, precursor, cursor) newsql += time.time() - mystart mystart = time.time() q3_low, q3_high = par.get_q3range_transitions() collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series( transitions, precursors, par, q3_low, q3_high, par.q3_window, par.ppm, False) non_uis_list = [ {} for i in range(MAX_UIS+1)] for order in range(1,MAX_UIS+1): non_uis_list[order] = c_getnonuis.get_non_uis( collisions_per_peptide, order) c_fromprecursortime += time.time() - mystart newl = [len(n) for n in non_uis_list] non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list] non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]] ################################### # New way to calculate non_uislist # - start out from transitions, get non_uislist mystart = time.time() newresult = c_integrated.wrap_all_bitwise(transitions, q1_low, ssrcalc_low, q1_high, ssrcalc_high, peptide_key, min(MAX_UIS,nr_transitions) , par.q3_window, #q3_low, q3_high, par.ppm, par.isotopes_up_to, isotope_correction, par, r) newtime += time.time() - mystart ################################### # Assert equality, print out result self.assertEqual( newresult , non_uis_list_len) mys = "%s\t%0.1fms\t\t%0.2fms\t>>>\t%0.1f" % \ (ii, #i (c_fromprecursortime + newsql)*1000/ii, # oldtime (newtime)*1000/ii, # newtime (c_fromprecursortime + newsql) / (newtime), # speedup ) self.ESC = chr(27) sys.stdout.write(self.ESC + '[2K') if self._cursor: sys.stdout.write(self.ESC + '[u') self._cursor = True sys.stdout.write(self.ESC + '[s') sys.stdout.write(mys) sys.stdout.flush()
def test_mysql_vs_integrated(self): """Compare the one table MySQL approach vs the fully integrated Cpp approach Here we are comparing the old (querying the transitions database as well as the precursor database) and the new way (only query the precursor database and calculate the transitions on the fly) way of calculating the collisions. """ print '\n' * 1 print "Comparing one table MySQL solution vs integrated solution" par = self.par cursor = self.cursor mypepids = [ { 'mod_sequence': r[0], 'peptide_key': r[1], 'transition_group': r[1], 'parent_id': r[2], 'q1_charge': r[3], 'q1': r[4], 'ssrcalc': r[5], } for r in self.alltuples if r[3] == 2 #charge is 2 and r[6] == 0 #isotope is 0 and r[4] >= self.min_q1 and r[4] < self.max_q1 ] mycollider = collider.SRMcollider() #mypepids = _get_unique_pepids(par, cursor) self.mycollider.pepids = mypepids self.mycollider.calcinner = 0 shuffle(self.mycollider.pepids) self.mycollider.pepids = self.mycollider.pepids[:self.limit] import c_rangetree r = c_rangetree.ExtendedRangetree_Q1_RT.create() r.new_rangetree() r.create_tree(tuple(self.alltuples_isotope_correction)) #c_integrated.create_tree(tuple(self.alltuples_isotope_correction)) MAX_UIS = 5 c_newuistime = 0 oldtime = 0 c_fromprecursortime = 0 oldsql = 0 newsql = 0 newtime = 0 oldcalctime = 0 localsql = 0 self._cursor = False print "i\toldtime\t\tnewtime\t>>\tspeedup" for kk, pep in enumerate(self.mycollider.pepids): ii = kk + 1 p_id = pep['parent_id'] q1 = pep['q1'] q3_low, q3_high = par.get_q3range_transitions() q1_low = q1 - par.q1_window q1_high = q1 + par.q1_window ssrcalc = pep['ssrcalc'] peptide_key = pep['peptide_key'] #correct rounding errors, s.t. we get the same results as before! ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001 ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001 isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min( par.parent_charges) precursor = Precursor(q1=pep['q1'], transition_group=pep['transition_group'], parent_id=pep['parent_id'], modified_sequence=pep['mod_sequence'], ssrcalc=pep['ssrcalc']) transitions = collider.calculate_transitions_ch( ((q1, pep['mod_sequence'], p_id), ), [1], q3_low, q3_high) #fake some srm_id for the transitions transitions = tuple([(t[0], i) for i, t in enumerate(transitions)]) ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor) nr_transitions = len(transitions) if nr_transitions == 0: continue #no transitions in this window ################################### # Old way to calculate non_uislist # - get all precursors from the SQL database # - calculate collisions per peptide in C++ par.query2_add = ' and isotope_nr = 0 ' # due to the new handling of isotopes mystart = time.time() self.mycollider.mysqlnewtime = 0 precursors = self.mycollider._get_all_precursors( par, precursor, cursor) newsql += time.time() - mystart mystart = time.time() q3_low, q3_high = par.get_q3range_transitions() collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series( transitions, precursors, par, q3_low, q3_high, par.q3_window, par.ppm, False) non_uis_list = [{} for i in range(MAX_UIS + 1)] for order in range(1, MAX_UIS + 1): non_uis_list[order] = c_getnonuis.get_non_uis( collisions_per_peptide, order) c_fromprecursortime += time.time() - mystart newl = [len(n) for n in non_uis_list] non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list] non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]] ################################### # New way to calculate non_uislist # - start out from transitions, get non_uislist mystart = time.time() newresult = c_integrated.wrap_all_bitwise( transitions, q1_low, ssrcalc_low, q1_high, ssrcalc_high, peptide_key, min(MAX_UIS, nr_transitions), par.q3_window, #q3_low, q3_high, par.ppm, par.isotopes_up_to, isotope_correction, par, r) newtime += time.time() - mystart ################################### # Assert equality, print out result self.assertEqual(newresult, non_uis_list_len) mys = "%s\t%0.1fms\t\t%0.2fms\t>>>\t%0.1f" % \ (ii, #i (c_fromprecursortime + newsql)*1000/ii, # oldtime (newtime)*1000/ii, # newtime (c_fromprecursortime + newsql) / (newtime), # speedup ) self.ESC = chr(27) sys.stdout.write(self.ESC + '[2K') if self._cursor: sys.stdout.write(self.ESC + '[u') self._cursor = True sys.stdout.write(self.ESC + '[s') sys.stdout.write(mys) sys.stdout.flush()
def test_two_table_mysql(self): """Compare the two table vs the one table MySQL approach Here we are comparing the old (querying the transitions database as well as the precursor database) and the new way (only query the precursor database and calculate the transitions on the fly) way of calculating the collisions. """ print '\n'*1 print "Comparing one vs two table MySQL solution" par = self.par cursor = self.cursor mycollider = collider.SRMcollider() mypepids = _get_unique_pepids(par, cursor) self.mycollider.pepids = mypepids self.mycollider.calcinner = 0 shuffle( self.mycollider.pepids ) self.mycollider.pepids = self.mycollider.pepids[:self.limit] MAX_UIS = 5 c_newuistime = 0; oldtime = 0; c_fromprecursortime = 0 oldsql = 0; newsql = 0 oldcalctime = 0; localsql = 0 self._cursor = False print "oldtime = get UIS from collisions and transitions (getting all collisions from the transitions db)" print "cuis + oldsql = as oldtime but calculate UIS in C++" print "py+newsql = only get the precursors from the db and calculate collisions in Python" print "ctime + newsql = only get the precursors from the db and calculate collisions in C++" print "new = use fast SQL and C++ code" print "old = use slow SQL and Python code" print "i\toldtime\tcuis+oldsql\tpy+newsql\tctime+newsql\t>>>\toldsql\tnewsql\t...\t...\tspeedup" for kk, pep in enumerate(self.mycollider.pepids): ii = kk + 1 p_id = pep['parent_id'] q1 = pep['q1'] q3_low, q3_high = par.get_q3range_transitions() precursor = Precursor(q1=pep['q1'], transition_group=pep['transition_group'], parent_id = pep['parent_id'], modified_sequence=pep['mod_sequence'], ssrcalc=pep['ssrcalc']) transitions = collider.calculate_transitions_ch( ((q1, pep['mod_sequence'], p_id),), [1], q3_low, q3_high) #fake some srm_id for the transitions transitions = tuple([ (t[0], i) for i,t in enumerate(transitions)]) ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor) nr_transitions = len( transitions ) if nr_transitions == 0: continue #no transitions in this window # mystart = time.time() collisions = list(self.mycollider._get_all_collisions_calculate_new(par, precursor, cursor)) oldcolllen = len(collisions) oldcalctime += time.time() - mystart # mystart = time.time() collisions = _get_all_collisions(mycollider, par, pep, cursor, transitions = transitions) oldcsqllen = len(collisions) oldsql += time.time() - mystart # par.query2_add = ' and isotope_nr = 0 ' # due to the new handling of isotopes mystart = time.time() self.mycollider.mysqlnewtime= 0 precursors = self.mycollider._get_all_precursors(par, precursor, cursor) newsql += time.time() - mystart # mystart = time.time() #precursors = self.mycollider._get_all_precursors(par, pep, cursor_l) localsql += time.time() - mystart par.query2_add = '' # due to the new handling of isotopes # mystart = time.time() non_uis_list = get_non_UIS_from_transitions(transitions, tuple(collisions), par, MAX_UIS) cnewuis = non_uis_list c_newuistime += time.time() - mystart # mystart = time.time() non_uis_list = get_non_UIS_from_transitions_old(transitions, collisions, par, MAX_UIS) oldnonuislist = non_uis_list oldtime += time.time() - mystart # mystart = time.time() q3_low, q3_high = par.get_q3range_transitions() collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series( transitions, precursors, par, q3_low, q3_high, par.q3_window, par.ppm, False) non_uis_list = [ {} for i in range(MAX_UIS+1)] for order in range(1,MAX_UIS+1): non_uis_list[order] = c_getnonuis.get_non_uis( collisions_per_peptide, order) c_fromprecursortime += time.time() - mystart newl = [len(n) for n in non_uis_list] self.assertEqual(newl, [len(o) for o in cnewuis]) self.assertEqual(newl, [len(o) for o in oldnonuislist]) non_uis_list = [set(k.keys()) for k in non_uis_list] cnewuis = [set(k.keys()) for k in cnewuis] self.assertEqual(non_uis_list, cnewuis) self.assertEqual(non_uis_list, oldnonuislist) mys = "%s\t%0.fms\t%0.fms\t\t%0.fms\t\t%0.2fms\t\t>>>\t%0.fms\t%0.2fms\t...\t...\t%0.2f" % \ (ii, #i (oldtime + oldsql)*1000/ii, #oldtime (c_newuistime+oldsql)*1000/ii, #cuis + oldsql (oldcalctime + newsql + oldtime)*1000/ii, #newsql (c_fromprecursortime + newsql)*1000/ii, #ctime+newsql #(c_fromprecursortime + localsql)*1000/ii, oldsql*1000/ii, #newsql newsql*1000/ii, #oldsql #localsql*1000/ii, #oldtime / c_newuistime (oldtime + oldsql) / (c_fromprecursortime + newsql) ) self.ESC = chr(27) sys.stdout.write(self.ESC + '[2K') if self._cursor: sys.stdout.write(self.ESC + '[u') self._cursor = True sys.stdout.write(self.ESC + '[s') sys.stdout.write(mys) sys.stdout.flush()
def test_two_table_mysql(self): """Compare the two table vs the one table MySQL approach Here we are comparing the old (querying the transitions database as well as the precursor database) and the new way (only query the precursor database and calculate the transitions on the fly) way of calculating the collisions. """ print '\n' * 1 print "Comparing one vs two table MySQL solution" par = self.par cursor = self.cursor mycollider = collider.SRMcollider() mypepids = _get_unique_pepids(par, cursor) self.mycollider.pepids = mypepids self.mycollider.calcinner = 0 shuffle(self.mycollider.pepids) self.mycollider.pepids = self.mycollider.pepids[:self.limit] MAX_UIS = 5 c_newuistime = 0 oldtime = 0 c_fromprecursortime = 0 oldsql = 0 newsql = 0 oldcalctime = 0 localsql = 0 self._cursor = False print "oldtime = get UIS from collisions and transitions (getting all collisions from the transitions db)" print "cuis + oldsql = as oldtime but calculate UIS in C++" print "py+newsql = only get the precursors from the db and calculate collisions in Python" print "ctime + newsql = only get the precursors from the db and calculate collisions in C++" print "new = use fast SQL and C++ code" print "old = use slow SQL and Python code" print "i\toldtime\tcuis+oldsql\tpy+newsql\tctime+newsql\t>>>\toldsql\tnewsql\t...\t...\tspeedup" for kk, pep in enumerate(self.mycollider.pepids): ii = kk + 1 p_id = pep['parent_id'] q1 = pep['q1'] q3_low, q3_high = par.get_q3range_transitions() precursor = Precursor(q1=pep['q1'], transition_group=pep['transition_group'], parent_id=pep['parent_id'], modified_sequence=pep['mod_sequence'], ssrcalc=pep['ssrcalc']) transitions = collider.calculate_transitions_ch( ((q1, pep['mod_sequence'], p_id), ), [1], q3_low, q3_high) #fake some srm_id for the transitions transitions = tuple([(t[0], i) for i, t in enumerate(transitions)]) ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor) nr_transitions = len(transitions) if nr_transitions == 0: continue #no transitions in this window # mystart = time.time() collisions = list( self.mycollider._get_all_collisions_calculate_new( par, precursor, cursor)) oldcolllen = len(collisions) oldcalctime += time.time() - mystart # mystart = time.time() collisions = _get_all_collisions(mycollider, par, pep, cursor, transitions=transitions) oldcsqllen = len(collisions) oldsql += time.time() - mystart # par.query2_add = ' and isotope_nr = 0 ' # due to the new handling of isotopes mystart = time.time() self.mycollider.mysqlnewtime = 0 precursors = self.mycollider._get_all_precursors( par, precursor, cursor) newsql += time.time() - mystart # mystart = time.time() #precursors = self.mycollider._get_all_precursors(par, pep, cursor_l) localsql += time.time() - mystart par.query2_add = '' # due to the new handling of isotopes # mystart = time.time() non_uis_list = get_non_UIS_from_transitions( transitions, tuple(collisions), par, MAX_UIS) cnewuis = non_uis_list c_newuistime += time.time() - mystart # mystart = time.time() non_uis_list = get_non_UIS_from_transitions_old( transitions, collisions, par, MAX_UIS) oldnonuislist = non_uis_list oldtime += time.time() - mystart # mystart = time.time() q3_low, q3_high = par.get_q3range_transitions() collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series( transitions, precursors, par, q3_low, q3_high, par.q3_window, par.ppm, False) non_uis_list = [{} for i in range(MAX_UIS + 1)] for order in range(1, MAX_UIS + 1): non_uis_list[order] = c_getnonuis.get_non_uis( collisions_per_peptide, order) c_fromprecursortime += time.time() - mystart newl = [len(n) for n in non_uis_list] self.assertEqual(newl, [len(o) for o in cnewuis]) self.assertEqual(newl, [len(o) for o in oldnonuislist]) non_uis_list = [set(k.keys()) for k in non_uis_list] cnewuis = [set(k.keys()) for k in cnewuis] self.assertEqual(non_uis_list, cnewuis) self.assertEqual(non_uis_list, oldnonuislist) mys = "%s\t%0.fms\t%0.fms\t\t%0.fms\t\t%0.2fms\t\t>>>\t%0.fms\t%0.2fms\t...\t...\t%0.2f" % \ (ii, #i (oldtime + oldsql)*1000/ii, #oldtime (c_newuistime+oldsql)*1000/ii, #cuis + oldsql (oldcalctime + newsql + oldtime)*1000/ii, #newsql (c_fromprecursortime + newsql)*1000/ii, #ctime+newsql #(c_fromprecursortime + localsql)*1000/ii, oldsql*1000/ii, #newsql newsql*1000/ii, #oldsql #localsql*1000/ii, #oldtime / c_newuistime (oldtime + oldsql) / (c_fromprecursortime + newsql) ) self.ESC = chr(27) sys.stdout.write(self.ESC + '[2K') if self._cursor: sys.stdout.write(self.ESC + '[u') self._cursor = True sys.stdout.write(self.ESC + '[s') sys.stdout.write(mys) sys.stdout.flush()