def test_integrated_threepeptides(self): pep = test_shared.runpep1 transitions = test_shared.runtransitions1 precursors = test_shared.runprecursors1 transitions = tuple([ (t[0], i) for i,t in enumerate(transitions)]) par = self.par q3_high = self.q3_high q3_low = self.q3_low R = self.R par.max_uis = 15 alltuples = [ (p[1], p[2], p[2], p[3], p[0], 25, -1,-1, 0) for p in precursors] import c_rangetree r = c_rangetree.ExtendedRangetree_Q1_RT.create() r.new_rangetree() r.create_tree(tuple(alltuples)) #//c_integrated.create_tree(tuple(alltuples)) q1 = 450 par.q1_window = 5 par.isotopes_up_to = 2 isotope_correction = 1 ssrcalc_low = 0 ssrcalc_high = 100 MAX_UIS=5 result = c_integrated.wrap_all_bitwise(transitions, q1 - par.q1_window, ssrcalc_low, q1 + par.q1_window, ssrcalc_high, -1, MAX_UIS, par.q3_window, par.ppm, par.isotopes_up_to, isotope_correction, par, r) self.assertEqual(result, [12, 66, 220, 495, 790]) q1 = 450 par.q1_window = 0.1 par.q3_window = 0.1 par.isotopes_up_to = 2 isotope_correction = 1 ssrcalc_low = 0 ssrcalc_high = 100 MAX_UIS=5 result = c_integrated.wrap_all_bitwise(transitions, q1 - par.q1_window, ssrcalc_low, q1 + par.q1_window, ssrcalc_high, -1, MAX_UIS, par.q3_window, par.ppm, par.isotopes_up_to, isotope_correction, par, r) self.assertEqual(result, [12, 35, 20, 3, 0] ) q1 = 450 par.q1_window = 0.08 par.q3_window = 0.01 par.isotopes_up_to = 2 isotope_correction = 1 ssrcalc_low = 0 ssrcalc_high = 100 MAX_UIS=5 result = c_integrated.wrap_all_bitwise(transitions, q1 - par.q1_window, ssrcalc_low, q1 + par.q1_window, ssrcalc_high, -1, MAX_UIS, par.q3_window, par.ppm, par.isotopes_up_to, isotope_correction, par, r) self.assertEqual(result, [10, 10, 5, 1, 0] )
def test_mysql_vs_integrated(self): """Compare the one table MySQL approach vs the fully integrated Cpp approach Here we are comparing the old (querying the transitions database as well as the precursor database) and the new way (only query the precursor database and calculate the transitions on the fly) way of calculating the collisions. """ print '\n'*1 print "Comparing one table MySQL solution vs integrated solution" par = self.par cursor = self.cursor mypepids = [ { 'mod_sequence' : r[0], 'peptide_key' :r[1], 'transition_group' :r[1], 'parent_id' : r[2], 'q1_charge' : r[3], 'q1' : r[4], 'ssrcalc' : r[5], } for r in self.alltuples if r[3] == 2 #charge is 2 and r[6] == 0 #isotope is 0 and r[4] >= self.min_q1 and r[4] < self.max_q1 ] mycollider = collider.SRMcollider() #mypepids = _get_unique_pepids(par, cursor) self.mycollider.pepids = mypepids self.mycollider.calcinner = 0 shuffle( self.mycollider.pepids ) self.mycollider.pepids = self.mycollider.pepids[:self.limit] import c_rangetree r = c_rangetree.ExtendedRangetree_Q1_RT.create() r.new_rangetree() r.create_tree(tuple(self.alltuples_isotope_correction)) #c_integrated.create_tree(tuple(self.alltuples_isotope_correction)) MAX_UIS = 5 c_newuistime = 0; oldtime = 0; c_fromprecursortime = 0 oldsql = 0; newsql = 0 newtime = 0 oldcalctime = 0; localsql = 0 self._cursor = False print "i\toldtime\t\tnewtime\t>>\tspeedup" for kk, pep in enumerate(self.mycollider.pepids): ii = kk + 1 p_id = pep['parent_id'] q1 = pep['q1'] q3_low, q3_high = par.get_q3range_transitions() q1_low = q1 - par.q1_window q1_high = q1 + par.q1_window ssrcalc = pep['ssrcalc'] peptide_key = pep['peptide_key'] #correct rounding errors, s.t. we get the same results as before! ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001 ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001 isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min(par.parent_charges) precursor = Precursor(q1=pep['q1'], transition_group=pep['transition_group'], parent_id = pep['parent_id'], modified_sequence=pep['mod_sequence'], ssrcalc=pep['ssrcalc']) transitions = collider.calculate_transitions_ch( ((q1, pep['mod_sequence'], p_id),), [1], q3_low, q3_high) #fake some srm_id for the transitions transitions = tuple([ (t[0], i) for i,t in enumerate(transitions)]) ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor) nr_transitions = len( transitions ) if nr_transitions == 0: continue #no transitions in this window ################################### # Old way to calculate non_uislist # - get all precursors from the SQL database # - calculate collisions per peptide in C++ par.query2_add = ' and isotope_nr = 0 ' # due to the new handling of isotopes mystart = time.time() self.mycollider.mysqlnewtime= 0 precursors = self.mycollider._get_all_precursors(par, precursor, cursor) newsql += time.time() - mystart mystart = time.time() q3_low, q3_high = par.get_q3range_transitions() collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series( transitions, precursors, par, q3_low, q3_high, par.q3_window, par.ppm, False) non_uis_list = [ {} for i in range(MAX_UIS+1)] for order in range(1,MAX_UIS+1): non_uis_list[order] = c_getnonuis.get_non_uis( collisions_per_peptide, order) c_fromprecursortime += time.time() - mystart newl = [len(n) for n in non_uis_list] non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list] non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]] ################################### # New way to calculate non_uislist # - start out from transitions, get non_uislist mystart = time.time() newresult = c_integrated.wrap_all_bitwise(transitions, q1_low, ssrcalc_low, q1_high, ssrcalc_high, peptide_key, min(MAX_UIS,nr_transitions) , par.q3_window, #q3_low, q3_high, par.ppm, par.isotopes_up_to, isotope_correction, par, r) newtime += time.time() - mystart ################################### # Assert equality, print out result self.assertEqual( newresult , non_uis_list_len) mys = "%s\t%0.1fms\t\t%0.2fms\t>>>\t%0.1f" % \ (ii, #i (c_fromprecursortime + newsql)*1000/ii, # oldtime (newtime)*1000/ii, # newtime (c_fromprecursortime + newsql) / (newtime), # speedup ) self.ESC = chr(27) sys.stdout.write(self.ESC + '[2K') if self._cursor: sys.stdout.write(self.ESC + '[u') self._cursor = True sys.stdout.write(self.ESC + '[s') sys.stdout.write(mys) sys.stdout.flush()
def test_integrated_cpp(self): """ Compare the fully integrated vs the mixed C++ rangetree / Python solution. Here we are comparing the fully integrated solution of storing all precursors in a C++ range tree and never passing them to Python vs the solution where we store the precursors in the rangetree, pass them to Python and then evaluate them. """ verbose = True verbose = False print '\n'*1 print "Comparing fully integrated solution (c_integrated.wrap_all)" par = self.par cursor = self.cursor all_precursors = self.precursors_to_evaluate shuffle(all_precursors) all_precursors = all_precursors[:self.limit_large] self.myprecursors.build_parent_id_lookup() testrange = self.myprecursors.build_rangetree() import c_rangetree r = c_rangetree.ExtendedRangetree_Q1_RT.create() r.new_rangetree() r.create_tree(tuple(self.alltuples_isotope_correction)) #c_integrated.create_tree(tuple(self.alltuples_isotope_correction)) MAX_UIS = 5 newtime = 0; oldtime = 0; ctime = 0 oldsql = 0; newsql = 0 alllocaltime = 0 localprecursor = 0 transitiontime = 0 c_fromprecursortime = 0 prepare = [] self._cursor = False print "Running experiment ", par.get_common_filename() print "calc_trans. = time to calculate the transitions of the precursor" print "old = use rangetree to get precursors, use C++ code to get collperpep" print "new = use rangetree to get precursors, use single C++ code to get collperpep" print "i\tcalc_trans.\tnew\t\told\t\t>>\tspeedup" for kk, precursor in enumerate(all_precursors): ii = kk + 1 q1 = precursor.q1 ssrcalc = precursor.ssrcalc sequence = precursor.modified_sequence peptide_key = precursor.transition_group p_id = precursor.parent_id q3_low, q3_high = par.get_q3range_transitions() #new way to calculate the precursors mystart = time.time() transitions = c_getnonuis.calculate_transitions_ch( ((q1, sequence, p_id),), [1,2], q3_low, q3_high) nr_transitions = len( transitions ) #fake some srm_id for the transitions transitions = tuple([ (t[0], i) for i,t in enumerate(transitions)]) q1_low = q1 - par.q1_window q1_high = q1 + par.q1_window innerstart = time.time() #correct rounding errors, s.t. we get the same results as before! ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001 ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001 transitiontime += time.time() - mystart isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min(par.parent_charges) ################################### # New way to calculate non_uislist # - start out from transitions, get non_uislist mystart = time.time() newresult = c_integrated.wrap_all_bitwise(transitions, q1_low, ssrcalc_low, q1_high, ssrcalc_high, peptide_key, min(MAX_UIS,nr_transitions) , par.q3_window, #q3_low, q3_high, par.ppm, par.isotopes_up_to, isotope_correction, par, r) newtime += time.time() - mystart ################################### # Old way to calculate non_uislist: # - get collisions per peptide # - get non_uislist mystart = time.time() collisions_per_peptide_obj = self.myprecursors.get_collisions_per_peptide_from_rangetree( precursor, precursor.q1 - par.q1_window, precursor.q1 + par.q1_window, transitions, par, r) ## # if False: ## # precursor_ids = tuple(c_rangetree.query_tree( q1_low, ssrcalc_low, ## # q1_high, ssrcalc_high, par.isotopes_up_to, isotope_correction)) ## # precursors = tuple([parentid_lookup[myid[0]] for myid in precursor_ids ## # #dont select myself ## # if parentid_lookup[myid[0]][2] != peptide_key]) ## # collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide( ## # transitions, precursors, q3_low, q3_high, par.q3_window, par.ppm) non_uis_list = [{} for i in range(MAX_UIS+1)] for order in range(1,MAX_UIS+1): non_uis_list[order] = c_getnonuis.get_non_uis( collisions_per_peptide_obj, order) oldtime += time.time() - mystart non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list] non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]] ################################### # Assert equality, print out result self.assertEqual( newresult , non_uis_list_len) mys = "%s\t%0.4fms\t%0.2fms\t\t%0.2fms\t\t>>\t%0.2f" % \ (ii, transitiontime *1000/ ii, newtime*1000/ii, oldtime*1000/ii, oldtime *1.0 / newtime) #start a new line for each output? #mys += '\t%s\t%s' % ( len(precursors), len(precursor_new) ) if False: mys += '\n' self.ESC = chr(27) sys.stdout.write(self.ESC + '[2K') if self._cursor: sys.stdout.write(self.ESC + '[u') self._cursor = True sys.stdout.write(self.ESC + '[s') sys.stdout.write(mys) sys.stdout.flush()
def test_mysql_vs_integrated(self): """Compare the one table MySQL approach vs the fully integrated Cpp approach Here we are comparing the old (querying the transitions database as well as the precursor database) and the new way (only query the precursor database and calculate the transitions on the fly) way of calculating the collisions. """ print '\n' * 1 print "Comparing one table MySQL solution vs integrated solution" par = self.par cursor = self.cursor mypepids = [ { 'mod_sequence': r[0], 'peptide_key': r[1], 'transition_group': r[1], 'parent_id': r[2], 'q1_charge': r[3], 'q1': r[4], 'ssrcalc': r[5], } for r in self.alltuples if r[3] == 2 #charge is 2 and r[6] == 0 #isotope is 0 and r[4] >= self.min_q1 and r[4] < self.max_q1 ] mycollider = collider.SRMcollider() #mypepids = _get_unique_pepids(par, cursor) self.mycollider.pepids = mypepids self.mycollider.calcinner = 0 shuffle(self.mycollider.pepids) self.mycollider.pepids = self.mycollider.pepids[:self.limit] import c_rangetree r = c_rangetree.ExtendedRangetree_Q1_RT.create() r.new_rangetree() r.create_tree(tuple(self.alltuples_isotope_correction)) #c_integrated.create_tree(tuple(self.alltuples_isotope_correction)) MAX_UIS = 5 c_newuistime = 0 oldtime = 0 c_fromprecursortime = 0 oldsql = 0 newsql = 0 newtime = 0 oldcalctime = 0 localsql = 0 self._cursor = False print "i\toldtime\t\tnewtime\t>>\tspeedup" for kk, pep in enumerate(self.mycollider.pepids): ii = kk + 1 p_id = pep['parent_id'] q1 = pep['q1'] q3_low, q3_high = par.get_q3range_transitions() q1_low = q1 - par.q1_window q1_high = q1 + par.q1_window ssrcalc = pep['ssrcalc'] peptide_key = pep['peptide_key'] #correct rounding errors, s.t. we get the same results as before! ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001 ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001 isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min( par.parent_charges) precursor = Precursor(q1=pep['q1'], transition_group=pep['transition_group'], parent_id=pep['parent_id'], modified_sequence=pep['mod_sequence'], ssrcalc=pep['ssrcalc']) transitions = collider.calculate_transitions_ch( ((q1, pep['mod_sequence'], p_id), ), [1], q3_low, q3_high) #fake some srm_id for the transitions transitions = tuple([(t[0], i) for i, t in enumerate(transitions)]) ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor) nr_transitions = len(transitions) if nr_transitions == 0: continue #no transitions in this window ################################### # Old way to calculate non_uislist # - get all precursors from the SQL database # - calculate collisions per peptide in C++ par.query2_add = ' and isotope_nr = 0 ' # due to the new handling of isotopes mystart = time.time() self.mycollider.mysqlnewtime = 0 precursors = self.mycollider._get_all_precursors( par, precursor, cursor) newsql += time.time() - mystart mystart = time.time() q3_low, q3_high = par.get_q3range_transitions() collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series( transitions, precursors, par, q3_low, q3_high, par.q3_window, par.ppm, False) non_uis_list = [{} for i in range(MAX_UIS + 1)] for order in range(1, MAX_UIS + 1): non_uis_list[order] = c_getnonuis.get_non_uis( collisions_per_peptide, order) c_fromprecursortime += time.time() - mystart newl = [len(n) for n in non_uis_list] non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list] non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]] ################################### # New way to calculate non_uislist # - start out from transitions, get non_uislist mystart = time.time() newresult = c_integrated.wrap_all_bitwise( transitions, q1_low, ssrcalc_low, q1_high, ssrcalc_high, peptide_key, min(MAX_UIS, nr_transitions), par.q3_window, #q3_low, q3_high, par.ppm, par.isotopes_up_to, isotope_correction, par, r) newtime += time.time() - mystart ################################### # Assert equality, print out result self.assertEqual(newresult, non_uis_list_len) mys = "%s\t%0.1fms\t\t%0.2fms\t>>>\t%0.1f" % \ (ii, #i (c_fromprecursortime + newsql)*1000/ii, # oldtime (newtime)*1000/ii, # newtime (c_fromprecursortime + newsql) / (newtime), # speedup ) self.ESC = chr(27) sys.stdout.write(self.ESC + '[2K') if self._cursor: sys.stdout.write(self.ESC + '[u') self._cursor = True sys.stdout.write(self.ESC + '[s') sys.stdout.write(mys) sys.stdout.flush()
def test_integrated_cpp(self): """ Compare the fully integrated vs the mixed C++ rangetree / Python solution. Here we are comparing the fully integrated solution of storing all precursors in a C++ range tree and never passing them to Python vs the solution where we store the precursors in the rangetree, pass them to Python and then evaluate them. """ verbose = True verbose = False print '\n' * 1 print "Comparing fully integrated solution (c_integrated.wrap_all)" par = self.par cursor = self.cursor all_precursors = self.precursors_to_evaluate shuffle(all_precursors) all_precursors = all_precursors[:self.limit_large] self.myprecursors.build_parent_id_lookup() testrange = self.myprecursors.build_rangetree() import c_rangetree r = c_rangetree.ExtendedRangetree_Q1_RT.create() r.new_rangetree() r.create_tree(tuple(self.alltuples_isotope_correction)) #c_integrated.create_tree(tuple(self.alltuples_isotope_correction)) MAX_UIS = 5 newtime = 0 oldtime = 0 ctime = 0 oldsql = 0 newsql = 0 alllocaltime = 0 localprecursor = 0 transitiontime = 0 c_fromprecursortime = 0 prepare = [] self._cursor = False print "Running experiment ", par.get_common_filename() print "calc_trans. = time to calculate the transitions of the precursor" print "old = use rangetree to get precursors, use C++ code to get collperpep" print "new = use rangetree to get precursors, use single C++ code to get collperpep" print "i\tcalc_trans.\tnew\t\told\t\t>>\tspeedup" for kk, precursor in enumerate(all_precursors): ii = kk + 1 q1 = precursor.q1 ssrcalc = precursor.ssrcalc sequence = precursor.modified_sequence peptide_key = precursor.transition_group p_id = precursor.parent_id q3_low, q3_high = par.get_q3range_transitions() #new way to calculate the precursors mystart = time.time() transitions = c_getnonuis.calculate_transitions_ch( ((q1, sequence, p_id), ), [1, 2], q3_low, q3_high) nr_transitions = len(transitions) #fake some srm_id for the transitions transitions = tuple([(t[0], i) for i, t in enumerate(transitions)]) q1_low = q1 - par.q1_window q1_high = q1 + par.q1_window innerstart = time.time() #correct rounding errors, s.t. we get the same results as before! ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001 ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001 transitiontime += time.time() - mystart isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min( par.parent_charges) ################################### # New way to calculate non_uislist # - start out from transitions, get non_uislist mystart = time.time() newresult = c_integrated.wrap_all_bitwise( transitions, q1_low, ssrcalc_low, q1_high, ssrcalc_high, peptide_key, min(MAX_UIS, nr_transitions), par.q3_window, #q3_low, q3_high, par.ppm, par.isotopes_up_to, isotope_correction, par, r) newtime += time.time() - mystart ################################### # Old way to calculate non_uislist: # - get collisions per peptide # - get non_uislist mystart = time.time() collisions_per_peptide_obj = self.myprecursors.get_collisions_per_peptide_from_rangetree( precursor, precursor.q1 - par.q1_window, precursor.q1 + par.q1_window, transitions, par, r) ## # if False: ## # precursor_ids = tuple(c_rangetree.query_tree( q1_low, ssrcalc_low, ## # q1_high, ssrcalc_high, par.isotopes_up_to, isotope_correction)) ## # precursors = tuple([parentid_lookup[myid[0]] for myid in precursor_ids ## # #dont select myself ## # if parentid_lookup[myid[0]][2] != peptide_key]) ## # collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide( ## # transitions, precursors, q3_low, q3_high, par.q3_window, par.ppm) non_uis_list = [{} for i in range(MAX_UIS + 1)] for order in range(1, MAX_UIS + 1): non_uis_list[order] = c_getnonuis.get_non_uis( collisions_per_peptide_obj, order) oldtime += time.time() - mystart non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list] non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]] ################################### # Assert equality, print out result self.assertEqual(newresult, non_uis_list_len) mys = "%s\t%0.4fms\t%0.2fms\t\t%0.2fms\t\t>>\t%0.2f" % \ (ii, transitiontime *1000/ ii, newtime*1000/ii, oldtime*1000/ii, oldtime *1.0 / newtime) #start a new line for each output? #mys += '\t%s\t%s' % ( len(precursors), len(precursor_new) ) if False: mys += '\n' self.ESC = chr(27) sys.stdout.write(self.ESC + '[2K') if self._cursor: sys.stdout.write(self.ESC + '[u') self._cursor = True sys.stdout.write(self.ESC + '[s') sys.stdout.write(mys) sys.stdout.flush()
else: precursors_to_evaluate = myprecursors.getPrecursorsToEvaluate(min_q1, max_q1) isotope_correction = par.calculate_isotope_correction() r_tree = myprecursors.build_extended_rangetree () print "Will evaluate %s precursors" % len(precursors_to_evaluate) progressm = progress.ProgressMeter(total=len(precursors_to_evaluate), unit='peptides') prepare = [] for precursor in precursors_to_evaluate: transitions = precursor.calculate_transitions_from_param(par) #correct rounding errors, s.t. we get the same results as before! ssrcalc_low = precursor.ssrcalc - par.ssrcalc_window + 0.001 ssrcalc_high = precursor.ssrcalc + par.ssrcalc_window - 0.001 try: result = c_integrated.wrap_all_bitwise(transitions, precursor.q1 - par.q1_window, ssrcalc_low, precursor.q1 + par.q1_window, ssrcalc_high, precursor.transition_group, min(par.max_uis,len(transitions)), par.q3_window, par.ppm, par.isotopes_up_to, isotope_correction, par, r_tree) except ValueError: print "Too many transitions for", precursor.modification continue for order in range(1,min(par.max_uis+1, len(transitions)+1)): prepare.append( (result[order-1], collider.choose(len(transitions), order), precursor.parent_id , order, exp_key) ) #//break; progressm.update(1) for order in range(1,6): sum_all = sum([p[0]*1.0/p[1] for p in prepare if p[3] == order]) nr_peptides = len([p for p in prepare if p[3] == order]) if not par.quiet and not nr_peptides ==0: print "Order %s, Average non useable UIS %s" % (order, sum_all *1.0/ nr_peptides)