예제 #1
0
 def getNewEnvelope(self, atomCnt_str, jP, precDigits):
     counts = []
     isotope_masses = []
     isotope_probs = []
     atomCnt = self.formParser.parse(atomCnt_str)
     for el, cnt in list(atomCnt.items()):
         counts.append(cnt)
         isotope_masses.append(self.isoMasses[el])
         isotope_probs.append(self.isoProbs[el])
     envelope = IsoSpecPy.IsoSpec(counts, isotope_masses, isotope_probs, jP)
     masses, logprobs, _ = envelope.getConfsRaw()
     masses = cdata2numpyarray(masses)
     probs = np.exp(cdata2numpyarray(logprobs))
     masses, probs = agg_spec_proper(masses, probs, precDigits)
     # memoization
     self.isotopicEnvelopes[(atomCnt_str, jP, precDigits)] = (masses, probs)
     return masses.copy(), probs.copy()
예제 #2
0
    def __getNewEnvelope(self, atomCnt_str, jP, prec_digits):
        T0 = time()
        counts = []
        isotope_masses = []
        isotope_probs = []
        atomCnt = self.formParser.parse(atomCnt_str)
        for el, cnt in atomCnt.items():
            counts.append(cnt)
            isotope_masses.append(self.iso_masses[el])
            isotope_probs.append(self.iso_probs[el])
        envelope = IsoSpecPy.IsoSpec(counts, isotope_masses, isotope_probs, jP)
        masses, logprobs, _ = envelope.getConfsRaw()
        masses = cdata2numpyarray(masses)
        probs = np.exp(cdata2numpyarray(logprobs))
        masses, probs = aggregate_envelopes(masses, probs, prec_digits)
        # memoization#TODO get rid of it when IsoSpec using stats convolution
        self.isotopicEnvelopes[(atomCnt_str, jP, prec_digits)] = (masses,
                                                                  probs)
        T1 = time()
        self.stats['Envelopes Generation Total T'] += T1 - T0

        return masses.copy(), probs.copy()
예제 #3
0
def main():
    global m_proton
    parser = GooeyParser(prog='IsoTool')
    #parser = argparse.ArgumentParser()

    parser.add_argument('Molecule', type=str, widget='FileChooser')

    g = parser.add_argument_group()
    isos = g.add_mutually_exclusive_group(required=True)

    isos.add_argument('--terrestrial',
                      metavar='Terrestrial abundances',
                      default=os.path.join(data_folder,
                                           'isotopes_terrestrial.csv'),
                      dest='isotopes')

    isos.add_argument('--custom',
                      metavar='Custom isotope definitons',
                      dest='isotopes',
                      widget='FileChooser')

    #parser.add_argument('-i', '--isotopes', metavar='Isotopes',
    #                   help='Isotope masses and abundances', type=str, widget='FileChooser',
    #                  default = os.path.join(folder,'isotopes_terrestrial.csv'))

    parser.add_argument('-o',
                        '--output_folder',
                        metavar='Output Folder',
                        type=str,
                        widget='DirChooser',
                        default=folder)
    parser.add_argument(
        '-z',
        '--charge',
        metavar='Charge',
        help='z, adds protons and recalculates m/z accordingly',
        default=1,
        dest='z',
        type=int)

    parser.add_argument('-r',
                        '--resolution',
                        metavar='Resolution',
                        help='m/z*1/FWHM, determines Gaussian peak width',
                        type=int,
                        default=40000)
    parser.add_argument('-bs',
                        '--binsize',
                        help='for summing Gaussian peaks of isotopologues',
                        type=float,
                        default=0.002)
    parser.add_argument('-ps',
                        '--points',
                        help='no. of points for Gaussian peak',
                        type=int,
                        default=250)
    parser.add_argument('-dmz',
                        '--delta_mz',
                        metavar='Delta m/z',
                        help='two sided mass offset for Gaussian peaks',
                        type=float,
                        default=0.05)
    parser.add_argument(
        '-p',
        '--proba',
        metavar='Probability Cutoff',
        help='Cumulative probability threshold for isotopologue calculations',
        type=float,
        default=0.9999)

    args = parser.parse_args()

    isotopes = pd.read_csv(args.isotopes, index_col='element_symbol').dropna()

    mol = pd.read_csv(args.Molecule).set_index('element')

    mol.loc['H', 'n'] += args.z

    elements = [
        e for e in mol.reset_index().element.values if mol.loc[e].n > 0
    ]

    atoms = [n for n in mol.n.values if n > 0]
    masses = []
    probs = []

    for e in elements:
        try:
            assert e in isotopes.index
            temp_masses = tuple(isotopes.loc[e].atomic_mass.values)
            masses.append(temp_masses)
            temp_probs = tuple(isotopes.loc[e].abundance.values)
            probs.append(temp_probs)
        except:
            raise ValueError('Element %s has no defined isotopes' % e)
            pass

    i = IsoSpecPy.IsoSpec(atoms, masses, probs, args.proba)

    confs = i.getConfs()

    confs = pd.DataFrame(confs).transpose()
    confs.columns = ['mz', 'p', 'isotopologue']
    confs.p = confs.p.astype(np.float64).apply(np.exp)

    confs.mz = (confs.mz - m_electron) / args.z
    confs = confs.sort_values('mz')

    f = isotopes.loc[elements]
    iso_cols = [
        ''.join([str(y), str(x)]) for (x, y) in zip(f.index, f.mass_number)
    ]
    temp_df = pd.DataFrame.from_records(confs.isotopologue, columns=iso_cols)
    confs = pd.concat(
        [
            confs[['mz', 'p']].reset_index(drop=True),
            temp_df.reset_index(drop=True)
        ],
        axis=1,
    )

    print(confs.head().to_string())

    spectrum = peak_shaper(confs[['mz', 'p']].values,
                           args.resolution,
                           delta_mz=args.delta_mz,
                           binsize=args.binsize,
                           no_points=args.points,
                           normalize=True,
                           process_binning=True)

    spectrum = pd.DataFrame(spectrum, columns=['mz', 'int'])

    confs.to_csv(os.path.join(args.output_folder, 'centroids.csv'),
                 index=False)
    spectrum.to_csv(os.path.join(args.output_folder, 'spectrum.csv'),
                    index=False)
예제 #4
0
파일: water.py 프로젝트: zmzhang/IsoSpec
print "Number of Protium atoms:", confs[0][2][0][0]
print "Number of Deuterium atoms", confs[0][2][0][1]
print "Number of O16 atoms:", confs[0][2][1][0]
print "Number of O17 atoms:", confs[0][2][1][1]
print "Number of O18 atoms:", confs[0][2][1][2]

print
print "Now what if both isotopes of hydrogen were equally probable, while prob. of O16 was 50%, O17 at 30% and O18 at 20%?"

hydrogen_probs = (0.5, 0.5)
oxygen_probs = (0.5, 0.3, 0.2)
hydrogen_masses = (1.00782503207, 2.0141017778)
oxygen_masses = (15.99491461956, 16.99913170, 17.9991610)
atom_counts = (2, 1)

i = IsoSpecPy.IsoSpec(atom_counts, (hydrogen_masses, oxygen_masses),
                      (hydrogen_probs, oxygen_probs), 0.9)

print "The isotopologue set containing at least 0.9 probability has", len(
    i), "element(s)"

confs = i.getConfs()

print "The first configuration has the following parameters:"
print "Mass:", confs[0][0]
print "log-prob:", confs[0][1]
print "probability:", exp(confs[0][1])
print "Number of Protium atoms:", confs[0][2][0][0]
print "Number of Deuterium atoms", confs[0][2][0][1]
print "Number of O16 atoms:", confs[0][2][1][0]
print "Number of O17 atoms:", confs[0][2][1][1]
print "Number of O18 atoms:", confs[0][2][1][2]
예제 #5
0
    def _cal_isotopologues(self, formula_dict, min_abundance, current_abundance, ms_dynamic_range):
        
        '''
        primary function to look for isotopologues based on a monoisotopic molecular formula
        INPUT {'C':10, 'H', 20, 'O', 2, etc} Atomic labels need to follow Atoms class atoms labels
        
        This function needs to be expanded to include the calculation of resolving power
        and plot the results.
        
        *   use this function at runtime during the molecular identification algorithm
            only when a positive ID is observed to the monoisotopic ion
        
        *   use this function to simulate mass spectrum 
            (needs resolving power calculation to be fully operational)        
            last update on 05-26-2020, Yuri E. Corilo 
        
        *   it might break when adding non-conventional atoms (not yet tested)
            
        *   it needs speed optimization; update: (Using IsoSpeccPy, a C Library (fast and accurate)) 
            https://github.com/MatteoLacki/IsoSpec
        '''
        # updated it to reflect min possible mass peak abundance
        cut_off_to_IsoSpeccPy = 1-(1/ms_dynamic_range)
        
        #print("cut_off_to_IsoSpeccPy", cut_off_to_IsoSpeccPy, current_abundance, min_abundance, ms_dynamic_range)
        #print(cut_off_to_IsoSpeccPy)
        atoms_labels = (atom for atom in formula_dict.keys() if atom != Labels.ion_type and atom != 'H')
       
        atoms_count = []
        masses_list_tuples = []
        props_list_tuples = []
        all_atoms_list = []
        
        for atom_label in atoms_labels:
            
            if not len(Atoms.isotopes.get(atom_label))>1:
                'This atom_label has no heavy isotope'
                atoms_count.append(formula_dict.get(atom_label))
                mass = Atoms.atomic_masses.get(atom_label)
                prop = Atoms.isotopic_abundance.get(atom_label)
                masses_list_tuples.append([mass])
                props_list_tuples.append([prop])
                all_atoms_list.append(atom_label)
                
            else:
                
                isotopes_label_list = Atoms.isotopes.get(atom_label)[1]
            
                if len(isotopes_label_list) > 1:
                    'This atom_label has two or more heavy isotope'
                    isotopos_labels = [i for i in isotopes_label_list]
                else:
                    'This atom_label only has one heavy isotope'
                    isotopos_labels = [isotopes_label_list[0]]
                
                #all_atoms_list.extend(isotopos_labels) 
                isotopos_labels = [atom_label] + isotopos_labels
                
                all_atoms_list.extend(isotopos_labels)
                
                masses = [Atoms.atomic_masses.get(atom_label) for atom_label in isotopos_labels]
                props = [Atoms.isotopic_abundance.get(atom_label) for atom_label in isotopos_labels]
                
                atoms_count.append(formula_dict.get(atom_label))
                masses_list_tuples.append(masses)
                props_list_tuples.append(props)
        
        iso = IsoSpecPy.IsoSpec(atoms_count,masses_list_tuples,props_list_tuples, cut_off_to_IsoSpeccPy)
        
        conf = iso.getConfs()
        masses = conf[0]
        probs = exp(conf[1])
        molecular_formulas = conf[2]
        #print('conf', conf)
        #print('probs', conf[1])
        
        new_formulas = []
        
        for isotopologue_index in range(len(iso)):
            #skip_mono_isotopic 
            
            formula_list = molecular_formulas[isotopologue_index]
            new_formula_dict = dict(zip(all_atoms_list, formula_list))
            new_formula_dict[Labels.ion_type] = formula_dict.get(Labels.ion_type)
            if formula_dict.get('H'):
                new_formula_dict['H'] = formula_dict.get('H')

            new_formulas.append({x:y for x,y in new_formula_dict.items() if y!=0})
        
        # formula_dict in new_formulas check if monoisotopic is being returned
        if new_formulas:# and formula_dict in new_formulas:
            
            #print(conf)    
            #print(new_formulas)    
            #print(atoms_count)
            #print(all_atoms_list)
            #print(masses_list_tuples)
            #print(props_list_tuples)
            # find where monoisotopic is
            index_mono = new_formulas.index(formula_dict)   
            # calculate ratio iso/mono
            probs = list(probs/probs[index_mono])
            
            # delete the monoisotopic
            del probs[index_mono]
            del new_formulas[index_mono]
            
            #print('probs_exp', probs)
            for formulas, prob in zip(new_formulas, probs):
                
                theor_abundance = current_abundance* prob
                if theor_abundance > min_abundance:
                    #print(prob, theor_abundance, current_abundance)
                    yield (formulas, prob)