Example #1
0
    def test_get_non_uis2(self):
        print '\nTesting non_uis_list'

        pep = test_shared.runpep2
        transitions = test_shared.runtransitions2
        precursors = test_shared.runprecursors2
        transitions = tuple([ (t[0], i) for i,t in enumerate(transitions)])
        collisions_per_peptide, ctime = runcpp(self, pep, transitions, precursors)

        MAX_UIS = self.MAX_UIS

        non_uis_list = [set() for i in range(MAX_UIS+1)]
        #here we calculate the UIS for this peptide with the given RT-range
        st = time.time()
        for pepc in collisions_per_peptide.values():
            for i in range(1,MAX_UIS+1):
                collider.get_non_uis(pepc, non_uis_list[i], i)
        oldtime = time.time() - st

        st = time.time()
        for kk in range(10):
            non_uis_list_new = [{} for i in range(MAX_UIS+1)]
            for order in range(1,MAX_UIS+1):
                non_uis_list_new[order] = c_getnonuis.get_non_uis(
                    collisions_per_peptide, order)

        non_uis_list_new = [set( res.keys() ) for res in non_uis_list_new]
        ctime = (time.time() - st)/10

        self.assertEqual(non_uis_list_new, non_uis_list)
        print ctime, oldtime
        print "Speedup:", oldtime / ctime
Example #2
0
    def test_get_non_uis2(self):
        print '\nTesting non_uis_list'

        pep = test_shared.runpep2
        transitions = test_shared.runtransitions2
        precursors = test_shared.runprecursors2
        transitions = tuple([(t[0], i) for i, t in enumerate(transitions)])
        collisions_per_peptide, ctime = runcpp(self, pep, transitions,
                                               precursors)

        MAX_UIS = self.MAX_UIS

        non_uis_list = [set() for i in range(MAX_UIS + 1)]
        #here we calculate the UIS for this peptide with the given RT-range
        st = time.time()
        for pepc in collisions_per_peptide.values():
            for i in range(1, MAX_UIS + 1):
                collider.get_non_uis(pepc, non_uis_list[i], i)
        oldtime = time.time() - st

        st = time.time()
        for kk in range(10):
            non_uis_list_new = [{} for i in range(MAX_UIS + 1)]
            for order in range(1, MAX_UIS + 1):
                non_uis_list_new[order] = c_getnonuis.get_non_uis(
                    collisions_per_peptide, order)

        non_uis_list_new = [set(res.keys()) for res in non_uis_list_new]
        ctime = (time.time() - st) / 10

        self.assertEqual(non_uis_list_new, non_uis_list)
        print ctime, oldtime
        print "Speedup:", oldtime / ctime
def write_csv_row(fragments, collisions_per_peptide, current_sequence, uis, wuis):
    srm_ids = [f.fragment_count for f in fragments]
    srm_lookup = [ (fragment.fragment_count, fragment) for fragment in fragments]
    srm_lookup = dict(srm_lookup) 
    for order in range(1,uis+1): 
        non_uis = c_getnonuis.get_non_uis(collisions_per_peptide, order)
        if False:
            # here we just output the non-UIS combinations. Usually
            # these are more informative and are preferable to a list
            # of UIS combinations.
            for comb in non_uis:
                tmp = [ srm_lookup[elem] for elem in comb]  
                myrow = []
                for tt in tmp:
                    myrow.extend( [ tt.q3, tt.annotation ])
                wuis.writerow(myrow)
        else:
            # if you want the real deal, go ahead. 
            uis_list = collider.get_uis(srm_ids, non_uis, order)
            #if(len(uis_list) == 0): wuis.writerow([ 'Sorry, no UIS found for order %s' % order ])
            for comb in uis_list:
                tmp = [ srm_lookup[elem] for elem in comb]  
                myrow = [current_sequence, order]
                for tt in tmp:
                    myrow.extend( [ tt.q3, tt.annotation ])
                wuis.writerow(myrow)
def write_csv_row(fragments, collisions_per_peptide, current_sequence, uis, wuis):
    srm_ids = [f.fragment_count for f in fragments]
    srm_lookup = [ (fragment.fragment_count, fragment) for fragment in fragments]
    srm_lookup = dict(srm_lookup) 
    for order in range(1,uis+1): 
        non_uis = c_getnonuis.get_non_uis(collisions_per_peptide, order)
        if False:
            # here we just output the non-UIS combinations. Usually
            # these are more informative and are preferable to a list
            # of UIS combinations.
            for comb in non_uis:
                tmp = [ srm_lookup[elem] for elem in comb]  
                myrow = []
                for tt in tmp:
                    myrow.extend( [ tt.q3, tt.annotation ])
                wuis.writerow(myrow)
        else:
            # if you want the real deal, go ahead. 
            uis_list = collider.get_uis(srm_ids, non_uis, order)
            #if(len(uis_list) == 0): wuis.writerow([ 'Sorry, no UIS found for order %s' % order ])
            for comb in uis_list:
                tmp = [ srm_lookup[elem] for elem in comb]  
                myrow = [current_sequence, order]
                for tt in tmp:
                    myrow.extend( [ tt.q3, tt.annotation ])
                wuis.writerow(myrow)
Example #5
0
def get_nonuis_list(collisions_per_peptide, MAX_UIS):
    non_uis_list = [set() for i in range(MAX_UIS+1)]
    try:
        import c_getnonuis
        for order in range(1,MAX_UIS+1):
                non_uis_list[order] = c_getnonuis.get_non_uis(
                    collisions_per_peptide, order)
    except ImportError:
        for pepc in collisions_per_peptide.values():
            for i in range(1,MAX_UIS+1):
                get_non_uis(pepc, non_uis_list[i], i)
    return non_uis_list 
Example #6
0
def get_non_UIS_from_transitions(transitions, collisions, par, MAX_UIS, 
                                forceset=False):
    """ Get all combinations that are not UIS 
    
    Note that the new version returns a dictionary. To convert it to a set, one 
    needs to force the function to return a set.
    """
    try: 
        #using C++ functions for this == faster
        import c_getnonuis
        non_uis_list = [{} for i in range(MAX_UIS+1)]
        collisions_per_peptide = getnonuis(transitions, collisions, par.q3_window, par.ppm)
        for order in range(1,MAX_UIS+1):
            non_uis_list[order] = c_getnonuis.get_non_uis(
                collisions_per_peptide, order)

        if forceset: return [set(k.keys()) for k in non_uis_list]
        return non_uis_list

    except ImportError:
        #old way of doing it
        return get_non_UIS_from_transitions_old(transitions, collisions, par, MAX_UIS)
Example #7
0
    def test_mysql_vs_integrated(self):
            """Compare the one table MySQL approach vs the fully integrated Cpp approach
            
            Here we are comparing the old (querying the transitions database as
            well as the precursor database) and the new way (only query the
            precursor database and calculate the transitions on the fly) way of
            calculating the collisions.
            """

            print '\n'*1
            print "Comparing one table MySQL solution vs integrated solution"
            par = self.par
            cursor = self.cursor

            mypepids = [
                        {
                            'mod_sequence'  :  r[0],
                            'peptide_key' :r[1],
                            'transition_group' :r[1],
                            'parent_id' :  r[2],
                            'q1_charge' :  r[3],
                            'q1' :         r[4],
                            'ssrcalc' :    r[5],
                        }
                        for r in self.alltuples
                if r[3] == 2 #charge is 2
                and r[6] == 0 #isotope is 0
                and r[4] >= self.min_q1
                and r[4] < self.max_q1
            ]

            mycollider = collider.SRMcollider()
            #mypepids = _get_unique_pepids(par, cursor)
            self.mycollider.pepids = mypepids
            self.mycollider.calcinner = 0
            shuffle( self.mycollider.pepids )
            self.mycollider.pepids = self.mycollider.pepids[:self.limit]

            import c_rangetree
            r = c_rangetree.ExtendedRangetree_Q1_RT.create()
            r.new_rangetree()
            r.create_tree(tuple(self.alltuples_isotope_correction))
            #c_integrated.create_tree(tuple(self.alltuples_isotope_correction))

            MAX_UIS = 5
            c_newuistime = 0; oldtime = 0; c_fromprecursortime = 0
            oldsql = 0; newsql = 0
            newtime = 0
            oldcalctime = 0; localsql = 0
            self._cursor = False
            print "i\toldtime\t\tnewtime\t>>\tspeedup"
            for kk, pep in enumerate(self.mycollider.pepids):
                ii = kk + 1
                p_id = pep['parent_id']
                q1 = pep['q1']
                q3_low, q3_high = par.get_q3range_transitions()
                q1_low = q1 - par.q1_window 
                q1_high = q1 + par.q1_window
                ssrcalc = pep['ssrcalc']
                peptide_key = pep['peptide_key']

                #correct rounding errors, s.t. we get the same results as before!
                ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001
                ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001
                isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min(par.parent_charges)

                precursor = Precursor(q1=pep['q1'], transition_group=pep['transition_group'], parent_id = pep['parent_id'], modified_sequence=pep['mod_sequence'], ssrcalc=pep['ssrcalc'])

                transitions = collider.calculate_transitions_ch(
                    ((q1, pep['mod_sequence'], p_id),), [1], q3_low, q3_high)
                #fake some srm_id for the transitions
                transitions = tuple([ (t[0], i) for i,t in enumerate(transitions)])
                ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor)
                nr_transitions = len( transitions )
                if nr_transitions == 0: continue #no transitions in this window

                ###################################
                # Old way to calculate non_uislist 
                #  - get all precursors from the SQL database
                #  - calculate collisions per peptide in C++

                par.query2_add = ' and isotope_nr = 0 ' # due to the new handling of isotopes
                mystart = time.time()
                self.mycollider.mysqlnewtime= 0
                precursors = self.mycollider._get_all_precursors(par, precursor, cursor)
                newsql += time.time() - mystart

                mystart = time.time()
                q3_low, q3_high = par.get_q3range_transitions()
                collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series( 
                    transitions, precursors, par, q3_low, q3_high, par.q3_window, par.ppm, False)
                non_uis_list = [ {} for i in range(MAX_UIS+1)]
                for order in range(1,MAX_UIS+1):
                    non_uis_list[order] = c_getnonuis.get_non_uis(
                        collisions_per_peptide, order)
                c_fromprecursortime += time.time() - mystart

                newl = [len(n) for n in non_uis_list]
                non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list]
                non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]]

                ###################################
                # New way to calculate non_uislist 
                #  - start out from transitions, get non_uislist
                mystart = time.time()
                newresult = c_integrated.wrap_all_bitwise(transitions, q1_low, ssrcalc_low,
                    q1_high,  ssrcalc_high, peptide_key,
                    min(MAX_UIS,nr_transitions) , par.q3_window, #q3_low, q3_high,
                    par.ppm, par.isotopes_up_to, isotope_correction, par, r)
                newtime += time.time() - mystart

                ###################################
                # Assert equality, print out result
                self.assertEqual( newresult , non_uis_list_len) 

                mys =  "%s\t%0.1fms\t\t%0.2fms\t>>>\t%0.1f" % \
                 (ii,  #i
                 (c_fromprecursortime + newsql)*1000/ii,  # oldtime
                 (newtime)*1000/ii, # newtime
                 (c_fromprecursortime + newsql) / (newtime), # speedup
                )

                self.ESC = chr(27)
                sys.stdout.write(self.ESC + '[2K')
                if self._cursor:
                    sys.stdout.write(self.ESC + '[u')
                self._cursor = True
                sys.stdout.write(self.ESC + '[s')
                sys.stdout.write(mys)
                sys.stdout.flush()
Example #8
0
    def test_two_table_mysql(self):
            """Compare the two table vs the one table MySQL approach
            
            Here we are comparing the old (querying the transitions database as
            well as the precursor database) and the new way (only query the
            precursor database and calculate the transitions on the fly) way of
            calculating the collisions.
            """

            print '\n'*1
            print "Comparing one vs two table MySQL solution"
            par = self.par
            cursor = self.cursor

            mycollider = collider.SRMcollider()
            mypepids = _get_unique_pepids(par, cursor)
            self.mycollider.pepids = mypepids
            self.mycollider.calcinner = 0
            shuffle( self.mycollider.pepids )
            self.mycollider.pepids = self.mycollider.pepids[:self.limit]

            MAX_UIS = 5
            c_newuistime = 0; oldtime = 0; c_fromprecursortime = 0
            oldsql = 0; newsql = 0
            oldcalctime = 0; localsql = 0
            self._cursor = False
            print "oldtime = get UIS from collisions and transitions (getting all collisions from the transitions db)"
            print "cuis + oldsql = as oldtime but calculate UIS in C++"
            print "py+newsql = only get the precursors from the db and calculate collisions in Python"
            print "ctime + newsql = only get the precursors from the db and calculate collisions in C++"
            print "new = use fast SQL and C++ code"
            print "old = use slow SQL and Python code"
            print "i\toldtime\tcuis+oldsql\tpy+newsql\tctime+newsql\t>>>\toldsql\tnewsql\t...\t...\tspeedup"
            for kk, pep in enumerate(self.mycollider.pepids):
                ii = kk + 1
                p_id = pep['parent_id']
                q1 = pep['q1']
                q3_low, q3_high = par.get_q3range_transitions()
                precursor = Precursor(q1=pep['q1'], transition_group=pep['transition_group'], parent_id = pep['parent_id'], modified_sequence=pep['mod_sequence'], ssrcalc=pep['ssrcalc'])
                transitions = collider.calculate_transitions_ch(
                    ((q1, pep['mod_sequence'], p_id),), [1], q3_low, q3_high)
                #fake some srm_id for the transitions
                transitions = tuple([ (t[0], i) for i,t in enumerate(transitions)])
                ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor)
                nr_transitions = len( transitions )
                if nr_transitions == 0: continue #no transitions in this window
                #
                mystart = time.time()
                collisions = list(self.mycollider._get_all_collisions_calculate_new(par, precursor, cursor))
                oldcolllen = len(collisions)
                oldcalctime += time.time() - mystart
                #
                mystart = time.time()
                collisions = _get_all_collisions(mycollider, par, pep, cursor, transitions = transitions)
                oldcsqllen = len(collisions)
                oldsql += time.time() - mystart
                #
                par.query2_add = ' and isotope_nr = 0 ' # due to the new handling of isotopes
                mystart = time.time()
                self.mycollider.mysqlnewtime= 0
                precursors = self.mycollider._get_all_precursors(par, precursor, cursor)
                newsql += time.time() - mystart
                #
                mystart = time.time()
                #precursors = self.mycollider._get_all_precursors(par, pep, cursor_l)
                localsql += time.time() - mystart
                par.query2_add = '' # due to the new handling of isotopes
                #
                mystart = time.time()
                non_uis_list = get_non_UIS_from_transitions(transitions, 
                                            tuple(collisions), par, MAX_UIS)
                cnewuis = non_uis_list
                c_newuistime += time.time() - mystart
                #
                mystart = time.time()
                non_uis_list = get_non_UIS_from_transitions_old(transitions, 
                                            collisions, par, MAX_UIS)
                oldnonuislist = non_uis_list
                oldtime += time.time() - mystart
                #
                mystart = time.time()
                q3_low, q3_high = par.get_q3range_transitions()
                collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series( 
                    transitions, precursors, par, q3_low, q3_high, par.q3_window, par.ppm, False)
                non_uis_list = [ {} for i in range(MAX_UIS+1)]
                for order in range(1,MAX_UIS+1):
                    non_uis_list[order] = c_getnonuis.get_non_uis(
                        collisions_per_peptide, order)
                c_fromprecursortime += time.time() - mystart

                newl = [len(n) for n in non_uis_list]
                self.assertEqual(newl, [len(o) for o in cnewuis])
                self.assertEqual(newl, [len(o) for o in oldnonuislist])

                non_uis_list = [set(k.keys()) for k in non_uis_list]
                cnewuis = [set(k.keys()) for k in cnewuis]

                self.assertEqual(non_uis_list, cnewuis)
                self.assertEqual(non_uis_list, oldnonuislist)

                mys =  "%s\t%0.fms\t%0.fms\t\t%0.fms\t\t%0.2fms\t\t>>>\t%0.fms\t%0.2fms\t...\t...\t%0.2f" % \
                 (ii,  #i
                 (oldtime + oldsql)*1000/ii,  #oldtime
                 (c_newuistime+oldsql)*1000/ii, #cuis + oldsql
                 (oldcalctime + newsql + oldtime)*1000/ii,  #newsql
                 (c_fromprecursortime + newsql)*1000/ii,  #ctime+newsql
                 #(c_fromprecursortime + localsql)*1000/ii,

                 oldsql*1000/ii, #newsql
                 newsql*1000/ii, #oldsql
                 #localsql*1000/ii,
                 #oldtime / c_newuistime
                 (oldtime + oldsql) / (c_fromprecursortime + newsql)
                )

                self.ESC = chr(27)
                sys.stdout.write(self.ESC + '[2K')
                if self._cursor:
                    sys.stdout.write(self.ESC + '[u')
                self._cursor = True
                sys.stdout.write(self.ESC + '[s')
                sys.stdout.write(mys)
                sys.stdout.flush()
Example #9
0
    def test_integrated_cpp(self):
            """ Compare the fully integrated vs the mixed C++ rangetree / Python solution.

            Here we are comparing the fully integrated solution of storing all
            precursors in a C++ range tree and never passing them to Python vs
            the solution where we store the precursors in the rangetree, pass
            them to Python and then evaluate them.
            """

            verbose = True
            verbose = False
            print '\n'*1
            print "Comparing fully integrated solution (c_integrated.wrap_all)"
            par = self.par
            cursor = self.cursor

            all_precursors = self.precursors_to_evaluate
            shuffle(all_precursors)
            all_precursors = all_precursors[:self.limit_large]

            self.myprecursors.build_parent_id_lookup()
            testrange = self.myprecursors.build_rangetree()
            import c_rangetree
            r = c_rangetree.ExtendedRangetree_Q1_RT.create()
            r.new_rangetree()
            r.create_tree(tuple(self.alltuples_isotope_correction))
            #c_integrated.create_tree(tuple(self.alltuples_isotope_correction))

            MAX_UIS = 5
            newtime = 0; oldtime = 0; ctime = 0
            oldsql = 0; newsql = 0
            alllocaltime = 0
            localprecursor = 0
            transitiontime = 0
            c_fromprecursortime = 0
            prepare  = []
            self._cursor = False
            print "Running experiment ", par.get_common_filename()
            print "calc_trans. = time to calculate the transitions of the precursor"
            print "old = use rangetree to get precursors, use C++ code to get collperpep"
            print "new = use rangetree to get precursors, use single C++ code to get collperpep"
            print "i\tcalc_trans.\tnew\t\told\t\t>>\tspeedup"
            for kk, precursor in enumerate(all_precursors):
                ii = kk + 1

                q1 =       precursor.q1
                ssrcalc =  precursor.ssrcalc
                sequence = precursor.modified_sequence
                peptide_key = precursor.transition_group
                p_id = precursor.parent_id

                q3_low, q3_high = par.get_q3range_transitions()

                #new way to calculate the precursors
                mystart = time.time()
                transitions = c_getnonuis.calculate_transitions_ch(
                    ((q1, sequence, p_id),), [1,2], q3_low, q3_high)
                nr_transitions = len( transitions )
                #fake some srm_id for the transitions
                transitions = tuple([ (t[0], i) for i,t in enumerate(transitions)])

                q1_low = q1 - par.q1_window 
                q1_high = q1 + par.q1_window
                innerstart = time.time()
                #correct rounding errors, s.t. we get the same results as before!
                ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001
                ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001
                transitiontime += time.time() - mystart

                isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min(par.parent_charges)

                ###################################
                # New way to calculate non_uislist 
                #  - start out from transitions, get non_uislist

                mystart = time.time()
                newresult = c_integrated.wrap_all_bitwise(transitions, q1_low, ssrcalc_low,
                    q1_high,  ssrcalc_high, peptide_key,
                    min(MAX_UIS,nr_transitions) , par.q3_window, #q3_low, q3_high,
                    par.ppm, par.isotopes_up_to, isotope_correction, par, r)
                newtime += time.time() - mystart

                ###################################
                # Old way to calculate non_uislist:
                #  - get collisions per peptide
                #  - get non_uislist

                mystart = time.time()
                collisions_per_peptide_obj = self.myprecursors.get_collisions_per_peptide_from_rangetree(
                    precursor, precursor.q1 - par.q1_window, precursor.q1 + par.q1_window, 
                    transitions, par, r)

                ## # if False:
                ## #     precursor_ids = tuple(c_rangetree.query_tree( q1_low, ssrcalc_low, 
                ## #         q1_high,  ssrcalc_high, par.isotopes_up_to, isotope_correction)) 
                ## #     precursors = tuple([parentid_lookup[myid[0]] for myid in precursor_ids
                ## #                         #dont select myself 
                ## #                        if parentid_lookup[myid[0]][2]  != peptide_key])
                ## #     collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide( 
                ## #         transitions, precursors, q3_low, q3_high, par.q3_window, par.ppm)

                non_uis_list = [{} for i in range(MAX_UIS+1)]
                for order in range(1,MAX_UIS+1):
                    non_uis_list[order] = c_getnonuis.get_non_uis(
                        collisions_per_peptide_obj, order)

                oldtime += time.time() - mystart

                non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list]
                non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]]

                ###################################
                # Assert equality, print out result
                self.assertEqual( newresult , non_uis_list_len) 

                mys =  "%s\t%0.4fms\t%0.2fms\t\t%0.2fms\t\t>>\t%0.2f" % \
                (ii, transitiontime *1000/ ii, 
                        newtime*1000/ii, oldtime*1000/ii,
                oldtime *1.0 / newtime)
                #start a new line for each output?
                #mys += '\t%s\t%s' % ( len(precursors), len(precursor_new) )
                if False: mys += '\n'

                self.ESC = chr(27)
                sys.stdout.write(self.ESC + '[2K')
                if self._cursor:
                    sys.stdout.write(self.ESC + '[u')
                self._cursor = True
                sys.stdout.write(self.ESC + '[s')
                sys.stdout.write(mys)
                sys.stdout.flush()
Example #10
0
        # We dont have experimental height data and cannot use C++ code
        collisions_per_peptide = collider.get_coll_per_peptide(mycollider, 
            transitions, par, peptide_obj, cursor)
        min_needed = mycollider._sub_getMinNeededTransitions(par, transitions, collisions_per_peptide)
        #min_needed = mycollider.getMinNeededTransitions_direct(par, transitions, precursors)
    else:
        # here we consider the case that we have measured a number of
        # transitions experimentally and want to know how many of them are
        # sufficient to establish uniqueness. For this, all we need is
        # that one tuple of transitions establishes uniqueness since we
        # were able to measure it above the background noise.
        collisions_per_peptide = collider.get_coll_per_peptide(mycollider, 
            transitions, par, pep, cursor)
        for order in range(1,nr_transitions+1): 
            mymax = collider.choose(nr_transitions, order)
            if use_cpp: non_uis = c_getnonuis.get_non_uis(collisions_per_peptide, order)
            else: 
                non_uis = set()
                for pepc in collisions_per_peptide.values():
                    get_non_uis(pepc, non_uis, i)
            if len(non_uis) < mymax: break
        if len(non_uis) < mymax: min_needed  = order
        else: min_needed = -1
    spectrum.score = min_needed * nr_transitions
    spectrum.min_needed = min_needed
    if min_needed != -1: spectrum.score = nr_transitions - min_needed
    if not par.quiet: progressm.update(1)
    get_min_tr_time += time.time() - tmp_time; tmp_time = time.time()


# }}}
Example #11
0
 def test_get_non_uis2(self):
     for order in range(1, 6):
         res = c_getnonuis.get_non_uis(test_shared.refcollperpep2, order)
         res = set(res.keys())
         self.assertEqual(res, test_shared.refnonuis2_sorted[order])
Example #12
0
for precursor in precursors_to_evaluate:
    q3_low, q3_high = par.get_q3range_transitions()
    transitions = precursor.calculate_transitions(q3_low, q3_high)
    nr_transitions = len(transitions)

    ###############################################################
    #strike 1: it has to be global UIS

    computed_collisions = myprecursors.get_collisions_per_peptide_from_rangetree(
        precursor, precursor.q1 - par.q1_window, precursor.q1 + par.q1_window, 
        transitions, par, rtree)

    collisions_per_peptide = computed_collisions 

    # see SRMCollider::Combinatorics::get_non_uis
    non_useable_combinations = c_getnonuis.get_non_uis( collisions_per_peptide, myorder)
    srm_ids = [t[1] for t in transitions]
    tuples_strike1 = 0
    if not nr_transitions < myorder:
      tuples_strike1 = collider.choose(nr_transitions, myorder ) - len(non_useable_combinations)

    ###############################################################
    #strike 2: it has to be locally clean
    if not skip_strike2:
      ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001
      ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001
      precursor_ids = tuple(c_rangetree.query_tree( q1_low, ssrcalc_low, 
                                                   q1_high,  ssrcalc_high )  )
      precursors = tuple([parentid_lookup[myid[0]] for myid in precursor_ids
                          #dont select myself 
                         if parentid_lookup[myid[0]][2]  != pep['transition_group']])
Example #13
0
    def test_mysql_vs_integrated(self):
        """Compare the one table MySQL approach vs the fully integrated Cpp approach
            
            Here we are comparing the old (querying the transitions database as
            well as the precursor database) and the new way (only query the
            precursor database and calculate the transitions on the fly) way of
            calculating the collisions.
            """

        print '\n' * 1
        print "Comparing one table MySQL solution vs integrated solution"
        par = self.par
        cursor = self.cursor

        mypepids = [
            {
                'mod_sequence': r[0],
                'peptide_key': r[1],
                'transition_group': r[1],
                'parent_id': r[2],
                'q1_charge': r[3],
                'q1': r[4],
                'ssrcalc': r[5],
            } for r in self.alltuples if r[3] == 2  #charge is 2
            and r[6] == 0  #isotope is 0
            and r[4] >= self.min_q1 and r[4] < self.max_q1
        ]

        mycollider = collider.SRMcollider()
        #mypepids = _get_unique_pepids(par, cursor)
        self.mycollider.pepids = mypepids
        self.mycollider.calcinner = 0
        shuffle(self.mycollider.pepids)
        self.mycollider.pepids = self.mycollider.pepids[:self.limit]

        import c_rangetree
        r = c_rangetree.ExtendedRangetree_Q1_RT.create()
        r.new_rangetree()
        r.create_tree(tuple(self.alltuples_isotope_correction))
        #c_integrated.create_tree(tuple(self.alltuples_isotope_correction))

        MAX_UIS = 5
        c_newuistime = 0
        oldtime = 0
        c_fromprecursortime = 0
        oldsql = 0
        newsql = 0
        newtime = 0
        oldcalctime = 0
        localsql = 0
        self._cursor = False
        print "i\toldtime\t\tnewtime\t>>\tspeedup"
        for kk, pep in enumerate(self.mycollider.pepids):
            ii = kk + 1
            p_id = pep['parent_id']
            q1 = pep['q1']
            q3_low, q3_high = par.get_q3range_transitions()
            q1_low = q1 - par.q1_window
            q1_high = q1 + par.q1_window
            ssrcalc = pep['ssrcalc']
            peptide_key = pep['peptide_key']

            #correct rounding errors, s.t. we get the same results as before!
            ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001
            ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001
            isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min(
                par.parent_charges)

            precursor = Precursor(q1=pep['q1'],
                                  transition_group=pep['transition_group'],
                                  parent_id=pep['parent_id'],
                                  modified_sequence=pep['mod_sequence'],
                                  ssrcalc=pep['ssrcalc'])

            transitions = collider.calculate_transitions_ch(
                ((q1, pep['mod_sequence'], p_id), ), [1], q3_low, q3_high)
            #fake some srm_id for the transitions
            transitions = tuple([(t[0], i) for i, t in enumerate(transitions)])
            ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor)
            nr_transitions = len(transitions)
            if nr_transitions == 0: continue  #no transitions in this window

            ###################################
            # Old way to calculate non_uislist
            #  - get all precursors from the SQL database
            #  - calculate collisions per peptide in C++

            par.query2_add = ' and isotope_nr = 0 '  # due to the new handling of isotopes
            mystart = time.time()
            self.mycollider.mysqlnewtime = 0
            precursors = self.mycollider._get_all_precursors(
                par, precursor, cursor)
            newsql += time.time() - mystart

            mystart = time.time()
            q3_low, q3_high = par.get_q3range_transitions()
            collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series(
                transitions, precursors, par, q3_low, q3_high, par.q3_window,
                par.ppm, False)
            non_uis_list = [{} for i in range(MAX_UIS + 1)]
            for order in range(1, MAX_UIS + 1):
                non_uis_list[order] = c_getnonuis.get_non_uis(
                    collisions_per_peptide, order)
            c_fromprecursortime += time.time() - mystart

            newl = [len(n) for n in non_uis_list]
            non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list]
            non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]]

            ###################################
            # New way to calculate non_uislist
            #  - start out from transitions, get non_uislist
            mystart = time.time()
            newresult = c_integrated.wrap_all_bitwise(
                transitions,
                q1_low,
                ssrcalc_low,
                q1_high,
                ssrcalc_high,
                peptide_key,
                min(MAX_UIS, nr_transitions),
                par.q3_window,  #q3_low, q3_high,
                par.ppm,
                par.isotopes_up_to,
                isotope_correction,
                par,
                r)
            newtime += time.time() - mystart

            ###################################
            # Assert equality, print out result
            self.assertEqual(newresult, non_uis_list_len)

            mys =  "%s\t%0.1fms\t\t%0.2fms\t>>>\t%0.1f" % \
             (ii,  #i
             (c_fromprecursortime + newsql)*1000/ii,  # oldtime
             (newtime)*1000/ii, # newtime
             (c_fromprecursortime + newsql) / (newtime), # speedup
            )

            self.ESC = chr(27)
            sys.stdout.write(self.ESC + '[2K')
            if self._cursor:
                sys.stdout.write(self.ESC + '[u')
            self._cursor = True
            sys.stdout.write(self.ESC + '[s')
            sys.stdout.write(mys)
            sys.stdout.flush()
Example #14
0
    def test_two_table_mysql(self):
        """Compare the two table vs the one table MySQL approach
            
            Here we are comparing the old (querying the transitions database as
            well as the precursor database) and the new way (only query the
            precursor database and calculate the transitions on the fly) way of
            calculating the collisions.
            """

        print '\n' * 1
        print "Comparing one vs two table MySQL solution"
        par = self.par
        cursor = self.cursor

        mycollider = collider.SRMcollider()
        mypepids = _get_unique_pepids(par, cursor)
        self.mycollider.pepids = mypepids
        self.mycollider.calcinner = 0
        shuffle(self.mycollider.pepids)
        self.mycollider.pepids = self.mycollider.pepids[:self.limit]

        MAX_UIS = 5
        c_newuistime = 0
        oldtime = 0
        c_fromprecursortime = 0
        oldsql = 0
        newsql = 0
        oldcalctime = 0
        localsql = 0
        self._cursor = False
        print "oldtime = get UIS from collisions and transitions (getting all collisions from the transitions db)"
        print "cuis + oldsql = as oldtime but calculate UIS in C++"
        print "py+newsql = only get the precursors from the db and calculate collisions in Python"
        print "ctime + newsql = only get the precursors from the db and calculate collisions in C++"
        print "new = use fast SQL and C++ code"
        print "old = use slow SQL and Python code"
        print "i\toldtime\tcuis+oldsql\tpy+newsql\tctime+newsql\t>>>\toldsql\tnewsql\t...\t...\tspeedup"
        for kk, pep in enumerate(self.mycollider.pepids):
            ii = kk + 1
            p_id = pep['parent_id']
            q1 = pep['q1']
            q3_low, q3_high = par.get_q3range_transitions()
            precursor = Precursor(q1=pep['q1'],
                                  transition_group=pep['transition_group'],
                                  parent_id=pep['parent_id'],
                                  modified_sequence=pep['mod_sequence'],
                                  ssrcalc=pep['ssrcalc'])
            transitions = collider.calculate_transitions_ch(
                ((q1, pep['mod_sequence'], p_id), ), [1], q3_low, q3_high)
            #fake some srm_id for the transitions
            transitions = tuple([(t[0], i) for i, t in enumerate(transitions)])
            ##### transitions = self.mycollider._get_all_transitions(par, pep, cursor)
            nr_transitions = len(transitions)
            if nr_transitions == 0: continue  #no transitions in this window
            #
            mystart = time.time()
            collisions = list(
                self.mycollider._get_all_collisions_calculate_new(
                    par, precursor, cursor))
            oldcolllen = len(collisions)
            oldcalctime += time.time() - mystart
            #
            mystart = time.time()
            collisions = _get_all_collisions(mycollider,
                                             par,
                                             pep,
                                             cursor,
                                             transitions=transitions)
            oldcsqllen = len(collisions)
            oldsql += time.time() - mystart
            #
            par.query2_add = ' and isotope_nr = 0 '  # due to the new handling of isotopes
            mystart = time.time()
            self.mycollider.mysqlnewtime = 0
            precursors = self.mycollider._get_all_precursors(
                par, precursor, cursor)
            newsql += time.time() - mystart
            #
            mystart = time.time()
            #precursors = self.mycollider._get_all_precursors(par, pep, cursor_l)
            localsql += time.time() - mystart
            par.query2_add = ''  # due to the new handling of isotopes
            #
            mystart = time.time()
            non_uis_list = get_non_UIS_from_transitions(
                transitions, tuple(collisions), par, MAX_UIS)
            cnewuis = non_uis_list
            c_newuistime += time.time() - mystart
            #
            mystart = time.time()
            non_uis_list = get_non_UIS_from_transitions_old(
                transitions, collisions, par, MAX_UIS)
            oldnonuislist = non_uis_list
            oldtime += time.time() - mystart
            #
            mystart = time.time()
            q3_low, q3_high = par.get_q3range_transitions()
            collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide_other_ion_series(
                transitions, precursors, par, q3_low, q3_high, par.q3_window,
                par.ppm, False)
            non_uis_list = [{} for i in range(MAX_UIS + 1)]
            for order in range(1, MAX_UIS + 1):
                non_uis_list[order] = c_getnonuis.get_non_uis(
                    collisions_per_peptide, order)
            c_fromprecursortime += time.time() - mystart

            newl = [len(n) for n in non_uis_list]
            self.assertEqual(newl, [len(o) for o in cnewuis])
            self.assertEqual(newl, [len(o) for o in oldnonuislist])

            non_uis_list = [set(k.keys()) for k in non_uis_list]
            cnewuis = [set(k.keys()) for k in cnewuis]

            self.assertEqual(non_uis_list, cnewuis)
            self.assertEqual(non_uis_list, oldnonuislist)

            mys =  "%s\t%0.fms\t%0.fms\t\t%0.fms\t\t%0.2fms\t\t>>>\t%0.fms\t%0.2fms\t...\t...\t%0.2f" % \
             (ii,  #i
             (oldtime + oldsql)*1000/ii,  #oldtime
             (c_newuistime+oldsql)*1000/ii, #cuis + oldsql
             (oldcalctime + newsql + oldtime)*1000/ii,  #newsql
             (c_fromprecursortime + newsql)*1000/ii,  #ctime+newsql
             #(c_fromprecursortime + localsql)*1000/ii,

             oldsql*1000/ii, #newsql
             newsql*1000/ii, #oldsql
             #localsql*1000/ii,
             #oldtime / c_newuistime
             (oldtime + oldsql) / (c_fromprecursortime + newsql)
            )

            self.ESC = chr(27)
            sys.stdout.write(self.ESC + '[2K')
            if self._cursor:
                sys.stdout.write(self.ESC + '[u')
            self._cursor = True
            sys.stdout.write(self.ESC + '[s')
            sys.stdout.write(mys)
            sys.stdout.flush()
Example #15
0
    def test_integrated_cpp(self):
        """ Compare the fully integrated vs the mixed C++ rangetree / Python solution.

            Here we are comparing the fully integrated solution of storing all
            precursors in a C++ range tree and never passing them to Python vs
            the solution where we store the precursors in the rangetree, pass
            them to Python and then evaluate them.
            """

        verbose = True
        verbose = False
        print '\n' * 1
        print "Comparing fully integrated solution (c_integrated.wrap_all)"
        par = self.par
        cursor = self.cursor

        all_precursors = self.precursors_to_evaluate
        shuffle(all_precursors)
        all_precursors = all_precursors[:self.limit_large]

        self.myprecursors.build_parent_id_lookup()
        testrange = self.myprecursors.build_rangetree()
        import c_rangetree
        r = c_rangetree.ExtendedRangetree_Q1_RT.create()
        r.new_rangetree()
        r.create_tree(tuple(self.alltuples_isotope_correction))
        #c_integrated.create_tree(tuple(self.alltuples_isotope_correction))

        MAX_UIS = 5
        newtime = 0
        oldtime = 0
        ctime = 0
        oldsql = 0
        newsql = 0
        alllocaltime = 0
        localprecursor = 0
        transitiontime = 0
        c_fromprecursortime = 0
        prepare = []
        self._cursor = False
        print "Running experiment ", par.get_common_filename()
        print "calc_trans. = time to calculate the transitions of the precursor"
        print "old = use rangetree to get precursors, use C++ code to get collperpep"
        print "new = use rangetree to get precursors, use single C++ code to get collperpep"
        print "i\tcalc_trans.\tnew\t\told\t\t>>\tspeedup"
        for kk, precursor in enumerate(all_precursors):
            ii = kk + 1

            q1 = precursor.q1
            ssrcalc = precursor.ssrcalc
            sequence = precursor.modified_sequence
            peptide_key = precursor.transition_group
            p_id = precursor.parent_id

            q3_low, q3_high = par.get_q3range_transitions()

            #new way to calculate the precursors
            mystart = time.time()
            transitions = c_getnonuis.calculate_transitions_ch(
                ((q1, sequence, p_id), ), [1, 2], q3_low, q3_high)
            nr_transitions = len(transitions)
            #fake some srm_id for the transitions
            transitions = tuple([(t[0], i) for i, t in enumerate(transitions)])

            q1_low = q1 - par.q1_window
            q1_high = q1 + par.q1_window
            innerstart = time.time()
            #correct rounding errors, s.t. we get the same results as before!
            ssrcalc_low = ssrcalc - par.ssrcalc_window + 0.001
            ssrcalc_high = ssrcalc + par.ssrcalc_window - 0.001
            transitiontime += time.time() - mystart

            isotope_correction = par.isotopes_up_to * Residues.mass_diffC13 / min(
                par.parent_charges)

            ###################################
            # New way to calculate non_uislist
            #  - start out from transitions, get non_uislist

            mystart = time.time()
            newresult = c_integrated.wrap_all_bitwise(
                transitions,
                q1_low,
                ssrcalc_low,
                q1_high,
                ssrcalc_high,
                peptide_key,
                min(MAX_UIS, nr_transitions),
                par.q3_window,  #q3_low, q3_high,
                par.ppm,
                par.isotopes_up_to,
                isotope_correction,
                par,
                r)
            newtime += time.time() - mystart

            ###################################
            # Old way to calculate non_uislist:
            #  - get collisions per peptide
            #  - get non_uislist

            mystart = time.time()
            collisions_per_peptide_obj = self.myprecursors.get_collisions_per_peptide_from_rangetree(
                precursor, precursor.q1 - par.q1_window,
                precursor.q1 + par.q1_window, transitions, par, r)

            ## # if False:
            ## #     precursor_ids = tuple(c_rangetree.query_tree( q1_low, ssrcalc_low,
            ## #         q1_high,  ssrcalc_high, par.isotopes_up_to, isotope_correction))
            ## #     precursors = tuple([parentid_lookup[myid[0]] for myid in precursor_ids
            ## #                         #dont select myself
            ## #                        if parentid_lookup[myid[0]][2]  != peptide_key])
            ## #     collisions_per_peptide = c_getnonuis.calculate_collisions_per_peptide(
            ## #         transitions, precursors, q3_low, q3_high, par.q3_window, par.ppm)

            non_uis_list = [{} for i in range(MAX_UIS + 1)]
            for order in range(1, MAX_UIS + 1):
                non_uis_list[order] = c_getnonuis.get_non_uis(
                    collisions_per_peptide_obj, order)

            oldtime += time.time() - mystart

            non_uis_list_old_way = [set(kk.keys()) for kk in non_uis_list]
            non_uis_list_len = [len(kk) for kk in non_uis_list_old_way[1:]]

            ###################################
            # Assert equality, print out result
            self.assertEqual(newresult, non_uis_list_len)

            mys =  "%s\t%0.4fms\t%0.2fms\t\t%0.2fms\t\t>>\t%0.2f" % \
            (ii, transitiontime *1000/ ii,
                    newtime*1000/ii, oldtime*1000/ii,
            oldtime *1.0 / newtime)
            #start a new line for each output?
            #mys += '\t%s\t%s' % ( len(precursors), len(precursor_new) )
            if False: mys += '\n'

            self.ESC = chr(27)
            sys.stdout.write(self.ESC + '[2K')
            if self._cursor:
                sys.stdout.write(self.ESC + '[u')
            self._cursor = True
            sys.stdout.write(self.ESC + '[s')
            sys.stdout.write(mys)
            sys.stdout.flush()