def test_get_non_uis2(self): print '\nTesting non_uis_list' pep = test_shared.runpep2 transitions = test_shared.runtransitions2 precursors = test_shared.runprecursors2 transitions = tuple([ (t[0], i) for i,t in enumerate(transitions)]) collisions_per_peptide, ctime = runcpp(self, pep, transitions, precursors) MAX_UIS = self.MAX_UIS non_uis_list = [set() for i in range(MAX_UIS+1)] #here we calculate the UIS for this peptide with the given RT-range st = time.time() for pepc in collisions_per_peptide.values(): for i in range(1,MAX_UIS+1): collider.get_non_uis(pepc, non_uis_list[i], i) oldtime = time.time() - st st = time.time() for kk in range(10): non_uis_list_new = [{} for i in range(MAX_UIS+1)] for order in range(1,MAX_UIS+1): non_uis_list_new[order] = c_getnonuis.get_non_uis( collisions_per_peptide, order) non_uis_list_new = [set( res.keys() ) for res in non_uis_list_new] ctime = (time.time() - st)/10 self.assertEqual(non_uis_list_new, non_uis_list) print ctime, oldtime print "Speedup:", oldtime / ctime
def test_get_non_uis2(self): print '\nTesting non_uis_list' pep = test_shared.runpep2 transitions = test_shared.runtransitions2 precursors = test_shared.runprecursors2 transitions = tuple([(t[0], i) for i, t in enumerate(transitions)]) collisions_per_peptide, ctime = runcpp(self, pep, transitions, precursors) MAX_UIS = self.MAX_UIS non_uis_list = [set() for i in range(MAX_UIS + 1)] #here we calculate the UIS for this peptide with the given RT-range st = time.time() for pepc in collisions_per_peptide.values(): for i in range(1, MAX_UIS + 1): collider.get_non_uis(pepc, non_uis_list[i], i) oldtime = time.time() - st st = time.time() for kk in range(10): non_uis_list_new = [{} for i in range(MAX_UIS + 1)] for order in range(1, MAX_UIS + 1): non_uis_list_new[order] = c_getnonuis.get_non_uis( collisions_per_peptide, order) non_uis_list_new = [set(res.keys()) for res in non_uis_list_new] ctime = (time.time() - st) / 10 self.assertEqual(non_uis_list_new, non_uis_list) print ctime, oldtime print "Speedup:", oldtime / ctime
def write_csv_row(fragments, collisions_per_peptide, current_sequence, uis, wuis): srm_ids = [f.fragment_count for f in fragments] srm_lookup = [ (fragment.fragment_count, fragment) for fragment in fragments] srm_lookup = dict(srm_lookup) for order in range(1,uis+1): non_uis = c_getnonuis.get_non_uis(collisions_per_peptide, order) if False: # here we just output the non-UIS combinations. Usually # these are more informative and are preferable to a list # of UIS combinations. for comb in non_uis: tmp = [ srm_lookup[elem] for elem in comb] myrow = [] for tt in tmp: myrow.extend( [ tt.q3, tt.annotation ]) wuis.writerow(myrow) else: # if you want the real deal, go ahead. uis_list = collider.get_uis(srm_ids, non_uis, order) #if(len(uis_list) == 0): wuis.writerow([ 'Sorry, no UIS found for order %s' % order ]) for comb in uis_list: tmp = [ srm_lookup[elem] for elem in comb] myrow = [current_sequence, order] for tt in tmp: myrow.extend( [ tt.q3, tt.annotation ]) wuis.writerow(myrow)
def get_nonuis_list(collisions_per_peptide, MAX_UIS): non_uis_list = [set() for i in range(MAX_UIS+1)] try: import c_getnonuis for order in range(1,MAX_UIS+1): non_uis_list[order] = c_getnonuis.get_non_uis( collisions_per_peptide, order) except ImportError: for pepc in collisions_per_peptide.values(): for i in range(1,MAX_UIS+1): get_non_uis(pepc, non_uis_list[i], i) return non_uis_list
def get_non_UIS_from_transitions(transitions, collisions, par, MAX_UIS, forceset=False): """ Get all combinations that are not UIS Note that the new version returns a dictionary. To convert it to a set, one needs to force the function to return a set. """ try: #using C++ functions for this == faster import c_getnonuis non_uis_list = [{} for i in range(MAX_UIS+1)] collisions_per_peptide = getnonuis(transitions, collisions, par.q3_window, par.ppm) for order in range(1,MAX_UIS+1): non_uis_list[order] = c_getnonuis.get_non_uis( collisions_per_peptide, order) if forceset: return [set(k.keys()) for k in non_uis_list] return non_uis_list except ImportError: #old way of doing it return get_non_UIS_from_transitions_old(transitions, collisions, par, MAX_UIS)
def test_mysql_vs_integrated(self): """Compare the one table MySQL approach vs the fully integrated Cpp approach Here we are comparing the old (querying the transitions database as well as the precursor database) and the new way (only query the precursor database and calculate the transitions on the fly) way of calculating the collisions. """ print '\n'*1 print "Comparing one table MySQL solution vs integrated solution" par = self.par cursor = self.cursor mypepids = [ { 'mod_sequence' : r[0], 'peptide_key' :r[1], 'transition_group' :r[1], 'parent_id' : r[2], 'q1_charge' : r[3], 'q1' : r[4], 'ssrcalc' : r[5], } for r in self.alltuples if r[3] == 2 #charge is 2 and r[6] == 0 #isotope is 0 and r[4] >= self.min_q1 and r[4] < self.max_q1 ] mycollider = collider.SRMcollider() #mypepids = _get_unique_pepids(par, cursor) self.mycollider.pepids = mypepids self.mycollider.calcinner = 0 shuffle( self.mycollider.pepids ) self.mycollider.pepids = self.mycollider.pepids[:self.limit] import c_rangetree r = c_rangetree.ExtendedRangetree_Q1_RT.create() r.new_rangetree() r.create_tree(tuple(self.alltuples_isotope_correction)) #c_integrated.create_tree(tuple(self.alltuples_isotope_correction)) MAX_UIS = 5 c_newuistime = 0; oldtime = 0; c_fromprecursortime = 0 oldsql = 0; newsql = 0 newtime = 0 oldcalctime = 0; localsql = 0 self._cursor = False print "i\toldtime\t\tnewtime\t>>\tspeedup" for kk, pep in enumerate(self.mycollider.pepids): ii = kk + 1 p_id = pep['parent_id'] q1 = pep['q1'] q3_low, q3_high = par.get_q3range_transitions() q1_low = q1 - par.q1_window q1_high = q1 + par.q1_window ssrcalc = pep['ssrcalc'] peptide_key = pep['peptide_key'] #correct rounding errors, s.t. we get the same results as before! ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001 ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001 isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min(par.parent_charges) precursor = Precursor(q1=pep['q1'], transition_group=pep['transition_group'], parent_id = pep['parent_id'], modified_sequence=pep['mod_sequence'], ssrcalc=pep['ssrcalc']) transitions = collider.calculate_transitions_ch( ((q1, pep['mod_sequence'], p_id),), [1], q3_low, q3_high) #fake some srm_id for the transitions transitions = tuple([ (t[0], i) for i,t in enumerate(transitions)]) ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor) nr_transitions = len( transitions ) if nr_transitions == 0: continue #no transitions in this window ################################### # Old way to calculate non_uislist # - get all precursors from the SQL database # - calculate collisions per peptide in C++ par.query2_add = ' and isotope_nr = 0 ' # due to the new handling of isotopes mystart = time.time() self.mycollider.mysqlnewtime= 0 precursors = self.mycollider._get_all_precursors(par, precursor, cursor) newsql += time.time() - mystart mystart = time.time() q3_low, q3_high = par.get_q3range_transitions() collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series( transitions, precursors, par, q3_low, q3_high, par.q3_window, par.ppm, False) non_uis_list = [ {} for i in range(MAX_UIS+1)] for order in range(1,MAX_UIS+1): non_uis_list[order] = c_getnonuis.get_non_uis( collisions_per_peptide, order) c_fromprecursortime += time.time() - mystart newl = [len(n) for n in non_uis_list] non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list] non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]] ################################### # New way to calculate non_uislist # - start out from transitions, get non_uislist mystart = time.time() newresult = c_integrated.wrap_all_bitwise(transitions, q1_low, ssrcalc_low, q1_high, ssrcalc_high, peptide_key, min(MAX_UIS,nr_transitions) , par.q3_window, #q3_low, q3_high, par.ppm, par.isotopes_up_to, isotope_correction, par, r) newtime += time.time() - mystart ################################### # Assert equality, print out result self.assertEqual( newresult , non_uis_list_len) mys = "%s\t%0.1fms\t\t%0.2fms\t>>>\t%0.1f" % \ (ii, #i (c_fromprecursortime + newsql)*1000/ii, # oldtime (newtime)*1000/ii, # newtime (c_fromprecursortime + newsql) / (newtime), # speedup ) self.ESC = chr(27) sys.stdout.write(self.ESC + '[2K') if self._cursor: sys.stdout.write(self.ESC + '[u') self._cursor = True sys.stdout.write(self.ESC + '[s') sys.stdout.write(mys) sys.stdout.flush()
def test_two_table_mysql(self): """Compare the two table vs the one table MySQL approach Here we are comparing the old (querying the transitions database as well as the precursor database) and the new way (only query the precursor database and calculate the transitions on the fly) way of calculating the collisions. """ print '\n'*1 print "Comparing one vs two table MySQL solution" par = self.par cursor = self.cursor mycollider = collider.SRMcollider() mypepids = _get_unique_pepids(par, cursor) self.mycollider.pepids = mypepids self.mycollider.calcinner = 0 shuffle( self.mycollider.pepids ) self.mycollider.pepids = self.mycollider.pepids[:self.limit] MAX_UIS = 5 c_newuistime = 0; oldtime = 0; c_fromprecursortime = 0 oldsql = 0; newsql = 0 oldcalctime = 0; localsql = 0 self._cursor = False print "oldtime = get UIS from collisions and transitions (getting all collisions from the transitions db)" print "cuis + oldsql = as oldtime but calculate UIS in C++" print "py+newsql = only get the precursors from the db and calculate collisions in Python" print "ctime + newsql = only get the precursors from the db and calculate collisions in C++" print "new = use fast SQL and C++ code" print "old = use slow SQL and Python code" print "i\toldtime\tcuis+oldsql\tpy+newsql\tctime+newsql\t>>>\toldsql\tnewsql\t...\t...\tspeedup" for kk, pep in enumerate(self.mycollider.pepids): ii = kk + 1 p_id = pep['parent_id'] q1 = pep['q1'] q3_low, q3_high = par.get_q3range_transitions() precursor = Precursor(q1=pep['q1'], transition_group=pep['transition_group'], parent_id = pep['parent_id'], modified_sequence=pep['mod_sequence'], ssrcalc=pep['ssrcalc']) transitions = collider.calculate_transitions_ch( ((q1, pep['mod_sequence'], p_id),), [1], q3_low, q3_high) #fake some srm_id for the transitions transitions = tuple([ (t[0], i) for i,t in enumerate(transitions)]) ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor) nr_transitions = len( transitions ) if nr_transitions == 0: continue #no transitions in this window # mystart = time.time() collisions = list(self.mycollider._get_all_collisions_calculate_new(par, precursor, cursor)) oldcolllen = len(collisions) oldcalctime += time.time() - mystart # mystart = time.time() collisions = _get_all_collisions(mycollider, par, pep, cursor, transitions = transitions) oldcsqllen = len(collisions) oldsql += time.time() - mystart # par.query2_add = ' and isotope_nr = 0 ' # due to the new handling of isotopes mystart = time.time() self.mycollider.mysqlnewtime= 0 precursors = self.mycollider._get_all_precursors(par, precursor, cursor) newsql += time.time() - mystart # mystart = time.time() #precursors = self.mycollider._get_all_precursors(par, pep, cursor_l) localsql += time.time() - mystart par.query2_add = '' # due to the new handling of isotopes # mystart = time.time() non_uis_list = get_non_UIS_from_transitions(transitions, tuple(collisions), par, MAX_UIS) cnewuis = non_uis_list c_newuistime += time.time() - mystart # mystart = time.time() non_uis_list = get_non_UIS_from_transitions_old(transitions, collisions, par, MAX_UIS) oldnonuislist = non_uis_list oldtime += time.time() - mystart # mystart = time.time() q3_low, q3_high = par.get_q3range_transitions() collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series( transitions, precursors, par, q3_low, q3_high, par.q3_window, par.ppm, False) non_uis_list = [ {} for i in range(MAX_UIS+1)] for order in range(1,MAX_UIS+1): non_uis_list[order] = c_getnonuis.get_non_uis( collisions_per_peptide, order) c_fromprecursortime += time.time() - mystart newl = [len(n) for n in non_uis_list] self.assertEqual(newl, [len(o) for o in cnewuis]) self.assertEqual(newl, [len(o) for o in oldnonuislist]) non_uis_list = [set(k.keys()) for k in non_uis_list] cnewuis = [set(k.keys()) for k in cnewuis] self.assertEqual(non_uis_list, cnewuis) self.assertEqual(non_uis_list, oldnonuislist) mys = "%s\t%0.fms\t%0.fms\t\t%0.fms\t\t%0.2fms\t\t>>>\t%0.fms\t%0.2fms\t...\t...\t%0.2f" % \ (ii, #i (oldtime + oldsql)*1000/ii, #oldtime (c_newuistime+oldsql)*1000/ii, #cuis + oldsql (oldcalctime + newsql + oldtime)*1000/ii, #newsql (c_fromprecursortime + newsql)*1000/ii, #ctime+newsql #(c_fromprecursortime + localsql)*1000/ii, oldsql*1000/ii, #newsql newsql*1000/ii, #oldsql #localsql*1000/ii, #oldtime / c_newuistime (oldtime + oldsql) / (c_fromprecursortime + newsql) ) self.ESC = chr(27) sys.stdout.write(self.ESC + '[2K') if self._cursor: sys.stdout.write(self.ESC + '[u') self._cursor = True sys.stdout.write(self.ESC + '[s') sys.stdout.write(mys) sys.stdout.flush()
def test_integrated_cpp(self): """ Compare the fully integrated vs the mixed C++ rangetree / Python solution. Here we are comparing the fully integrated solution of storing all precursors in a C++ range tree and never passing them to Python vs the solution where we store the precursors in the rangetree, pass them to Python and then evaluate them. """ verbose = True verbose = False print '\n'*1 print "Comparing fully integrated solution (c_integrated.wrap_all)" par = self.par cursor = self.cursor all_precursors = self.precursors_to_evaluate shuffle(all_precursors) all_precursors = all_precursors[:self.limit_large] self.myprecursors.build_parent_id_lookup() testrange = self.myprecursors.build_rangetree() import c_rangetree r = c_rangetree.ExtendedRangetree_Q1_RT.create() r.new_rangetree() r.create_tree(tuple(self.alltuples_isotope_correction)) #c_integrated.create_tree(tuple(self.alltuples_isotope_correction)) MAX_UIS = 5 newtime = 0; oldtime = 0; ctime = 0 oldsql = 0; newsql = 0 alllocaltime = 0 localprecursor = 0 transitiontime = 0 c_fromprecursortime = 0 prepare = [] self._cursor = False print "Running experiment ", par.get_common_filename() print "calc_trans. = time to calculate the transitions of the precursor" print "old = use rangetree to get precursors, use C++ code to get collperpep" print "new = use rangetree to get precursors, use single C++ code to get collperpep" print "i\tcalc_trans.\tnew\t\told\t\t>>\tspeedup" for kk, precursor in enumerate(all_precursors): ii = kk + 1 q1 = precursor.q1 ssrcalc = precursor.ssrcalc sequence = precursor.modified_sequence peptide_key = precursor.transition_group p_id = precursor.parent_id q3_low, q3_high = par.get_q3range_transitions() #new way to calculate the precursors mystart = time.time() transitions = c_getnonuis.calculate_transitions_ch( ((q1, sequence, p_id),), [1,2], q3_low, q3_high) nr_transitions = len( transitions ) #fake some srm_id for the transitions transitions = tuple([ (t[0], i) for i,t in enumerate(transitions)]) q1_low = q1 - par.q1_window q1_high = q1 + par.q1_window innerstart = time.time() #correct rounding errors, s.t. we get the same results as before! ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001 ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001 transitiontime += time.time() - mystart isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min(par.parent_charges) ################################### # New way to calculate non_uislist # - start out from transitions, get non_uislist mystart = time.time() newresult = c_integrated.wrap_all_bitwise(transitions, q1_low, ssrcalc_low, q1_high, ssrcalc_high, peptide_key, min(MAX_UIS,nr_transitions) , par.q3_window, #q3_low, q3_high, par.ppm, par.isotopes_up_to, isotope_correction, par, r) newtime += time.time() - mystart ################################### # Old way to calculate non_uislist: # - get collisions per peptide # - get non_uislist mystart = time.time() collisions_per_peptide_obj = self.myprecursors.get_collisions_per_peptide_from_rangetree( precursor, precursor.q1 - par.q1_window, precursor.q1 + par.q1_window, transitions, par, r) ## # if False: ## # precursor_ids = tuple(c_rangetree.query_tree( q1_low, ssrcalc_low, ## # q1_high, ssrcalc_high, par.isotopes_up_to, isotope_correction)) ## # precursors = tuple([parentid_lookup[myid[0]] for myid in precursor_ids ## # #dont select myself ## # if parentid_lookup[myid[0]][2] != peptide_key]) ## # collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide( ## # transitions, precursors, q3_low, q3_high, par.q3_window, par.ppm) non_uis_list = [{} for i in range(MAX_UIS+1)] for order in range(1,MAX_UIS+1): non_uis_list[order] = c_getnonuis.get_non_uis( collisions_per_peptide_obj, order) oldtime += time.time() - mystart non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list] non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]] ################################### # Assert equality, print out result self.assertEqual( newresult , non_uis_list_len) mys = "%s\t%0.4fms\t%0.2fms\t\t%0.2fms\t\t>>\t%0.2f" % \ (ii, transitiontime *1000/ ii, newtime*1000/ii, oldtime*1000/ii, oldtime *1.0 / newtime) #start a new line for each output? #mys += '\t%s\t%s' % ( len(precursors), len(precursor_new) ) if False: mys += '\n' self.ESC = chr(27) sys.stdout.write(self.ESC + '[2K') if self._cursor: sys.stdout.write(self.ESC + '[u') self._cursor = True sys.stdout.write(self.ESC + '[s') sys.stdout.write(mys) sys.stdout.flush()
# We dont have experimental height data and cannot use C++ code collisions_per_peptide = collider.get_coll_per_peptide(mycollider, transitions, par, peptide_obj, cursor) min_needed = mycollider._sub_getMinNeededTransitions(par, transitions, collisions_per_peptide) #min_needed = mycollider.getMinNeededTransitions_direct(par, transitions, precursors) else: # here we consider the case that we have measured a number of # transitions experimentally and want to know how many of them are # sufficient to establish uniqueness. For this, all we need is # that one tuple of transitions establishes uniqueness since we # were able to measure it above the background noise. collisions_per_peptide = collider.get_coll_per_peptide(mycollider, transitions, par, pep, cursor) for order in range(1,nr_transitions+1): mymax = collider.choose(nr_transitions, order) if use_cpp: non_uis = c_getnonuis.get_non_uis(collisions_per_peptide, order) else: non_uis = set() for pepc in collisions_per_peptide.values(): get_non_uis(pepc, non_uis, i) if len(non_uis) < mymax: break if len(non_uis) < mymax: min_needed = order else: min_needed = -1 spectrum.score = min_needed * nr_transitions spectrum.min_needed = min_needed if min_needed != -1: spectrum.score = nr_transitions - min_needed if not par.quiet: progressm.update(1) get_min_tr_time += time.time() - tmp_time; tmp_time = time.time() # }}}
def test_get_non_uis2(self): for order in range(1, 6): res = c_getnonuis.get_non_uis(test_shared.refcollperpep2, order) res = set(res.keys()) self.assertEqual(res, test_shared.refnonuis2_sorted[order])
for precursor in precursors_to_evaluate: q3_low, q3_high = par.get_q3range_transitions() transitions = precursor.calculate_transitions(q3_low, q3_high) nr_transitions = len(transitions) ############################################################### #strike 1: it has to be global UIS computed_collisions = myprecursors.get_collisions_per_peptide_from_rangetree( precursor, precursor.q1 - par.q1_window, precursor.q1 + par.q1_window, transitions, par, rtree) collisions_per_peptide = computed_collisions # see SRMCollider::Combinatorics::get_non_uis non_useable_combinations = c_getnonuis.get_non_uis( collisions_per_peptide, myorder) srm_ids = [t[1] for t in transitions] tuples_strike1 = 0 if not nr_transitions < myorder: tuples_strike1 = collider.choose(nr_transitions, myorder ) - len(non_useable_combinations) ############################################################### #strike 2: it has to be locally clean if not skip_strike2: ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001 ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001 precursor_ids = tuple(c_rangetree.query_tree( q1_low, ssrcalc_low, q1_high, ssrcalc_high ) ) precursors = tuple([parentid_lookup[myid[0]] for myid in precursor_ids #dont select myself if parentid_lookup[myid[0]][2] != pep['transition_group']])
def test_mysql_vs_integrated(self): """Compare the one table MySQL approach vs the fully integrated Cpp approach Here we are comparing the old (querying the transitions database as well as the precursor database) and the new way (only query the precursor database and calculate the transitions on the fly) way of calculating the collisions. """ print '\n' * 1 print "Comparing one table MySQL solution vs integrated solution" par = self.par cursor = self.cursor mypepids = [ { 'mod_sequence': r[0], 'peptide_key': r[1], 'transition_group': r[1], 'parent_id': r[2], 'q1_charge': r[3], 'q1': r[4], 'ssrcalc': r[5], } for r in self.alltuples if r[3] == 2 #charge is 2 and r[6] == 0 #isotope is 0 and r[4] >= self.min_q1 and r[4] < self.max_q1 ] mycollider = collider.SRMcollider() #mypepids = _get_unique_pepids(par, cursor) self.mycollider.pepids = mypepids self.mycollider.calcinner = 0 shuffle(self.mycollider.pepids) self.mycollider.pepids = self.mycollider.pepids[:self.limit] import c_rangetree r = c_rangetree.ExtendedRangetree_Q1_RT.create() r.new_rangetree() r.create_tree(tuple(self.alltuples_isotope_correction)) #c_integrated.create_tree(tuple(self.alltuples_isotope_correction)) MAX_UIS = 5 c_newuistime = 0 oldtime = 0 c_fromprecursortime = 0 oldsql = 0 newsql = 0 newtime = 0 oldcalctime = 0 localsql = 0 self._cursor = False print "i\toldtime\t\tnewtime\t>>\tspeedup" for kk, pep in enumerate(self.mycollider.pepids): ii = kk + 1 p_id = pep['parent_id'] q1 = pep['q1'] q3_low, q3_high = par.get_q3range_transitions() q1_low = q1 - par.q1_window q1_high = q1 + par.q1_window ssrcalc = pep['ssrcalc'] peptide_key = pep['peptide_key'] #correct rounding errors, s.t. we get the same results as before! ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001 ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001 isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min( par.parent_charges) precursor = Precursor(q1=pep['q1'], transition_group=pep['transition_group'], parent_id=pep['parent_id'], modified_sequence=pep['mod_sequence'], ssrcalc=pep['ssrcalc']) transitions = collider.calculate_transitions_ch( ((q1, pep['mod_sequence'], p_id), ), [1], q3_low, q3_high) #fake some srm_id for the transitions transitions = tuple([(t[0], i) for i, t in enumerate(transitions)]) ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor) nr_transitions = len(transitions) if nr_transitions == 0: continue #no transitions in this window ################################### # Old way to calculate non_uislist # - get all precursors from the SQL database # - calculate collisions per peptide in C++ par.query2_add = ' and isotope_nr = 0 ' # due to the new handling of isotopes mystart = time.time() self.mycollider.mysqlnewtime = 0 precursors = self.mycollider._get_all_precursors( par, precursor, cursor) newsql += time.time() - mystart mystart = time.time() q3_low, q3_high = par.get_q3range_transitions() collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series( transitions, precursors, par, q3_low, q3_high, par.q3_window, par.ppm, False) non_uis_list = [{} for i in range(MAX_UIS + 1)] for order in range(1, MAX_UIS + 1): non_uis_list[order] = c_getnonuis.get_non_uis( collisions_per_peptide, order) c_fromprecursortime += time.time() - mystart newl = [len(n) for n in non_uis_list] non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list] non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]] ################################### # New way to calculate non_uislist # - start out from transitions, get non_uislist mystart = time.time() newresult = c_integrated.wrap_all_bitwise( transitions, q1_low, ssrcalc_low, q1_high, ssrcalc_high, peptide_key, min(MAX_UIS, nr_transitions), par.q3_window, #q3_low, q3_high, par.ppm, par.isotopes_up_to, isotope_correction, par, r) newtime += time.time() - mystart ################################### # Assert equality, print out result self.assertEqual(newresult, non_uis_list_len) mys = "%s\t%0.1fms\t\t%0.2fms\t>>>\t%0.1f" % \ (ii, #i (c_fromprecursortime + newsql)*1000/ii, # oldtime (newtime)*1000/ii, # newtime (c_fromprecursortime + newsql) / (newtime), # speedup ) self.ESC = chr(27) sys.stdout.write(self.ESC + '[2K') if self._cursor: sys.stdout.write(self.ESC + '[u') self._cursor = True sys.stdout.write(self.ESC + '[s') sys.stdout.write(mys) sys.stdout.flush()
def test_two_table_mysql(self): """Compare the two table vs the one table MySQL approach Here we are comparing the old (querying the transitions database as well as the precursor database) and the new way (only query the precursor database and calculate the transitions on the fly) way of calculating the collisions. """ print '\n' * 1 print "Comparing one vs two table MySQL solution" par = self.par cursor = self.cursor mycollider = collider.SRMcollider() mypepids = _get_unique_pepids(par, cursor) self.mycollider.pepids = mypepids self.mycollider.calcinner = 0 shuffle(self.mycollider.pepids) self.mycollider.pepids = self.mycollider.pepids[:self.limit] MAX_UIS = 5 c_newuistime = 0 oldtime = 0 c_fromprecursortime = 0 oldsql = 0 newsql = 0 oldcalctime = 0 localsql = 0 self._cursor = False print "oldtime = get UIS from collisions and transitions (getting all collisions from the transitions db)" print "cuis + oldsql = as oldtime but calculate UIS in C++" print "py+newsql = only get the precursors from the db and calculate collisions in Python" print "ctime + newsql = only get the precursors from the db and calculate collisions in C++" print "new = use fast SQL and C++ code" print "old = use slow SQL and Python code" print "i\toldtime\tcuis+oldsql\tpy+newsql\tctime+newsql\t>>>\toldsql\tnewsql\t...\t...\tspeedup" for kk, pep in enumerate(self.mycollider.pepids): ii = kk + 1 p_id = pep['parent_id'] q1 = pep['q1'] q3_low, q3_high = par.get_q3range_transitions() precursor = Precursor(q1=pep['q1'], transition_group=pep['transition_group'], parent_id=pep['parent_id'], modified_sequence=pep['mod_sequence'], ssrcalc=pep['ssrcalc']) transitions = collider.calculate_transitions_ch( ((q1, pep['mod_sequence'], p_id), ), [1], q3_low, q3_high) #fake some srm_id for the transitions transitions = tuple([(t[0], i) for i, t in enumerate(transitions)]) ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor) nr_transitions = len(transitions) if nr_transitions == 0: continue #no transitions in this window # mystart = time.time() collisions = list( self.mycollider._get_all_collisions_calculate_new( par, precursor, cursor)) oldcolllen = len(collisions) oldcalctime += time.time() - mystart # mystart = time.time() collisions = _get_all_collisions(mycollider, par, pep, cursor, transitions=transitions) oldcsqllen = len(collisions) oldsql += time.time() - mystart # par.query2_add = ' and isotope_nr = 0 ' # due to the new handling of isotopes mystart = time.time() self.mycollider.mysqlnewtime = 0 precursors = self.mycollider._get_all_precursors( par, precursor, cursor) newsql += time.time() - mystart # mystart = time.time() #precursors = self.mycollider._get_all_precursors(par, pep, cursor_l) localsql += time.time() - mystart par.query2_add = '' # due to the new handling of isotopes # mystart = time.time() non_uis_list = get_non_UIS_from_transitions( transitions, tuple(collisions), par, MAX_UIS) cnewuis = non_uis_list c_newuistime += time.time() - mystart # mystart = time.time() non_uis_list = get_non_UIS_from_transitions_old( transitions, collisions, par, MAX_UIS) oldnonuislist = non_uis_list oldtime += time.time() - mystart # mystart = time.time() q3_low, q3_high = par.get_q3range_transitions() collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series( transitions, precursors, par, q3_low, q3_high, par.q3_window, par.ppm, False) non_uis_list = [{} for i in range(MAX_UIS + 1)] for order in range(1, MAX_UIS + 1): non_uis_list[order] = c_getnonuis.get_non_uis( collisions_per_peptide, order) c_fromprecursortime += time.time() - mystart newl = [len(n) for n in non_uis_list] self.assertEqual(newl, [len(o) for o in cnewuis]) self.assertEqual(newl, [len(o) for o in oldnonuislist]) non_uis_list = [set(k.keys()) for k in non_uis_list] cnewuis = [set(k.keys()) for k in cnewuis] self.assertEqual(non_uis_list, cnewuis) self.assertEqual(non_uis_list, oldnonuislist) mys = "%s\t%0.fms\t%0.fms\t\t%0.fms\t\t%0.2fms\t\t>>>\t%0.fms\t%0.2fms\t...\t...\t%0.2f" % \ (ii, #i (oldtime + oldsql)*1000/ii, #oldtime (c_newuistime+oldsql)*1000/ii, #cuis + oldsql (oldcalctime + newsql + oldtime)*1000/ii, #newsql (c_fromprecursortime + newsql)*1000/ii, #ctime+newsql #(c_fromprecursortime + localsql)*1000/ii, oldsql*1000/ii, #newsql newsql*1000/ii, #oldsql #localsql*1000/ii, #oldtime / c_newuistime (oldtime + oldsql) / (c_fromprecursortime + newsql) ) self.ESC = chr(27) sys.stdout.write(self.ESC + '[2K') if self._cursor: sys.stdout.write(self.ESC + '[u') self._cursor = True sys.stdout.write(self.ESC + '[s') sys.stdout.write(mys) sys.stdout.flush()
def test_integrated_cpp(self): """ Compare the fully integrated vs the mixed C++ rangetree / Python solution. Here we are comparing the fully integrated solution of storing all precursors in a C++ range tree and never passing them to Python vs the solution where we store the precursors in the rangetree, pass them to Python and then evaluate them. """ verbose = True verbose = False print '\n' * 1 print "Comparing fully integrated solution (c_integrated.wrap_all)" par = self.par cursor = self.cursor all_precursors = self.precursors_to_evaluate shuffle(all_precursors) all_precursors = all_precursors[:self.limit_large] self.myprecursors.build_parent_id_lookup() testrange = self.myprecursors.build_rangetree() import c_rangetree r = c_rangetree.ExtendedRangetree_Q1_RT.create() r.new_rangetree() r.create_tree(tuple(self.alltuples_isotope_correction)) #c_integrated.create_tree(tuple(self.alltuples_isotope_correction)) MAX_UIS = 5 newtime = 0 oldtime = 0 ctime = 0 oldsql = 0 newsql = 0 alllocaltime = 0 localprecursor = 0 transitiontime = 0 c_fromprecursortime = 0 prepare = [] self._cursor = False print "Running experiment ", par.get_common_filename() print "calc_trans. = time to calculate the transitions of the precursor" print "old = use rangetree to get precursors, use C++ code to get collperpep" print "new = use rangetree to get precursors, use single C++ code to get collperpep" print "i\tcalc_trans.\tnew\t\told\t\t>>\tspeedup" for kk, precursor in enumerate(all_precursors): ii = kk + 1 q1 = precursor.q1 ssrcalc = precursor.ssrcalc sequence = precursor.modified_sequence peptide_key = precursor.transition_group p_id = precursor.parent_id q3_low, q3_high = par.get_q3range_transitions() #new way to calculate the precursors mystart = time.time() transitions = c_getnonuis.calculate_transitions_ch( ((q1, sequence, p_id), ), [1, 2], q3_low, q3_high) nr_transitions = len(transitions) #fake some srm_id for the transitions transitions = tuple([(t[0], i) for i, t in enumerate(transitions)]) q1_low = q1 - par.q1_window q1_high = q1 + par.q1_window innerstart = time.time() #correct rounding errors, s.t. we get the same results as before! ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001 ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001 transitiontime += time.time() - mystart isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min( par.parent_charges) ################################### # New way to calculate non_uislist # - start out from transitions, get non_uislist mystart = time.time() newresult = c_integrated.wrap_all_bitwise( transitions, q1_low, ssrcalc_low, q1_high, ssrcalc_high, peptide_key, min(MAX_UIS, nr_transitions), par.q3_window, #q3_low, q3_high, par.ppm, par.isotopes_up_to, isotope_correction, par, r) newtime += time.time() - mystart ################################### # Old way to calculate non_uislist: # - get collisions per peptide # - get non_uislist mystart = time.time() collisions_per_peptide_obj = self.myprecursors.get_collisions_per_peptide_from_rangetree( precursor, precursor.q1 - par.q1_window, precursor.q1 + par.q1_window, transitions, par, r) ## # if False: ## # precursor_ids = tuple(c_rangetree.query_tree( q1_low, ssrcalc_low, ## # q1_high, ssrcalc_high, par.isotopes_up_to, isotope_correction)) ## # precursors = tuple([parentid_lookup[myid[0]] for myid in precursor_ids ## # #dont select myself ## # if parentid_lookup[myid[0]][2] != peptide_key]) ## # collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide( ## # transitions, precursors, q3_low, q3_high, par.q3_window, par.ppm) non_uis_list = [{} for i in range(MAX_UIS + 1)] for order in range(1, MAX_UIS + 1): non_uis_list[order] = c_getnonuis.get_non_uis( collisions_per_peptide_obj, order) oldtime += time.time() - mystart non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list] non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]] ################################### # Assert equality, print out result self.assertEqual(newresult, non_uis_list_len) mys = "%s\t%0.4fms\t%0.2fms\t\t%0.2fms\t\t>>\t%0.2f" % \ (ii, transitiontime *1000/ ii, newtime*1000/ii, oldtime*1000/ii, oldtime *1.0 / newtime) #start a new line for each output? #mys += '\t%s\t%s' % ( len(precursors), len(precursor_new) ) if False: mys += '\n' self.ESC = chr(27) sys.stdout.write(self.ESC + '[2K') if self._cursor: sys.stdout.write(self.ESC + '[u') self._cursor = True sys.stdout.write(self.ESC + '[s') sys.stdout.write(mys) sys.stdout.flush()