def confs_from_formula(formula, threshold=0.001, total_prob=None, charge=1, adduct=None): """Simulate and return spectrum peaks for given formula. Parameters as in __init__ method. `formula` must be a nonempty string. """ parsed = re.findall('([A-Z][a-z]*)([0-9]*)', formula) formula = Counter() for e, n in parsed: n = int(n) if n else 1 formula[e] += n if adduct: formula[adduct] += charge assert all(v >= 0 for v in formula.values()) formula = ''.join(x + str(formula[x]) for x in formula if formula[x]) if total_prob is not None: isospec = IsoSpecPy.IsoTotalProb(formula=formula, prob_to_cover=total_prob, get_minimal_pset=True, get_confs=False) else: isospec = IsoSpecPy.IsoThreshold(formula=formula, threshold=threshold, absolute=False, get_confs=False) confs = [(x[0] / abs(charge), x[1]) for x in zip(isospec.masses, isospec.probs)] return confs
def test_isospec_threshold_(formula, thr): t0 = time() res = iso.IsoThreshold( formula=formula, threshold=thr, get_confs=True ) t1 = time() return t1 - t0
def expand_isotopes(peptide, charge_states=[2, 3]): ''' Convert peptide to DataFrame of isotopic peaks Input Series, should contain 'sequence', 'z+' columns, and model columns Return DataFrame with one row for each isotopic peak columns are: mz - m/z of ion ic_XX - ion abundance acording to XX model z - charge sequence - peptide sequence ''' formula = ''.join([ '{}{}'.format(x, y) for x, y in mass.Composition(peptide['sequence']).items() ]) cluster = IsoSpecPy.IsoThreshold(formula=formula, threshold=0.005, absolute=True) mz0 = cluster.np_masses() int0 = cluster.np_probs() mz = np.concatenate([get_ions(mz0, z) for z in charge_states]) ic = np.concatenate( [int0 * peptide['{}+'.format(z)] for z in charge_states]) charge = np.concatenate( [np.repeat(z, mz0.shape[0]) for z in charge_states]) result = pd.DataFrame({'mz': mz, 'ic': ic, 'z': charge}) result['sequence'] = peptide['sequence'] for model in params.ion_models: result['ic_{}'.format(model)] = result['ic'] * peptide[model] return result
def confs_from_layered_generator(formula, target_prob): ret = ([], [], []) for conf in IsoSpecPy.IsoLayered(formula=formula, prob_to_cover=target_prob, get_confs=True, get_minimal_pset=True): conf = (conf[0], log(conf[1]), conf[2]) ret[0].append(conf[0]) ret[1].append(conf[1]) ret[2].append([item for sublist in conf[2] for item in sublist]) return sort_confs(ret)
def confs_from_ordered_generator(formula, target_prob): ret = ([], [], []) prob = 0.0 for conf in IsoSpecPy.IsoOrderedGenerator(formula=formula, get_confs=True): conf = (conf[0], log(conf[1]), conf[2]) if prob >= target_prob and target_prob < 1.0: return ret ret[0].append(conf[0]) prob += exp(conf[1]) ret[1].append(conf[1]) ret[2].append([item for sublist in conf[2] for item in sublist]) return ret
def confs_from_threshold_generator(formula, target_prob): ret = ([], [], []) for conf in IsoSpecPy.IsoThresholdGenerator(formula=formula, threshold=target_prob, absolute=True, get_confs=True): conf = (conf[0], log(conf[1]), conf[2]) ret[0].append(conf[0]) ret[1].append(conf[1]) ret[2].append([item for sublist in conf[2] for item in sublist]) return sort_confs(ret)
def sample_isospec(formula, count, precision): population = IsoSpecPy.IsoLayeredGenerator(formula, t_prob_hint = precision, reorder_marginals = False) #population = IsoSpecPy.IsoThresholdGenerator(formula = formula, threshold = -1.0) #for x in population: # yield x '''Performs sampling with replacement from population argument, with associated probabilities from second argument. The probabilities must sum to 1. Yields a stream of tuples: (population_member, times_chosen). Accepts generators as first and second argument. May return duplicate tuples and tuples with times_chosen == 0. ''' pprob = 0.0 cprob = 0.0 accumulated = 0 iso_iter = population.__iter__() while count > 0: if accumulated > 0: yield (pop_next, accumulated) accumulated = 0 pop_next, prob_next = next(iso_iter) pprob += prob_next # Beta mode while (pprob - cprob) * count / (1.0 - cprob) < 1.0: cprob += _beta_1_b(count) * (1.0 - cprob) while pprob < cprob: if accumulated > 0: yield (pop_next, accumulated) accumulated = 0 pop_next, prob_next = next(iso_iter) pprob += prob_next accumulated += 1 count -= 1 if count == 0: break if count == 0: break # Binomial mode nrtaken = _safe_binom(count, (pprob-cprob)/(1.0-cprob)) accumulated += nrtaken count -= nrtaken cprob = pprob if accumulated > 0: yield (pop_next, accumulated)
import IsoSpecPy from math import exp from IsoSpecPy.Formulas import * from IsoSpecPy.approximations import approximate_subisotopologues test_on = horse_myoglobin test_prob = 0.9999 print("Formula:", test_on, "Probability:", test_prob) i = IsoSpecPy.Iso(test_on) print("From C++ code:", list(map(exp, i.getMarginalLogSizeEstimates(test_prob)))) symbols, _ = IsoSpecPy.ParseFormula(test_on) dct = approximate_subisotopologues(test_on, test_prob) print("From Python: ", [dct[s] for s in symbols]) v = IsoSpecPy.IsoTotalProb(formula=test_on, prob_to_cover=test_prob, get_confs=True, get_minimal_pset=True) acc = [set() for _ in range(v.dimNumber)] for conf in v.confs: for i in range(v.dimNumber): acc[i].add(conf[i]) print("Real:", list(map(len, acc))) print(len(v), "total confs.")
for conf in IsoSpecPy.IsoThreshold(formula=formula, threshold=target_prob, absolute=True, get_confs=True): conf = (conf[0], log(conf[1]), conf[2]) ret[0].append(conf[0]) ret[1].append(conf[1]) ret[2].append([item for sublist in conf[2] for item in sublist]) return sort_confs(ret) is_ok = False try: i = IsoSpecPy.IsoThreshold(0.1, atomCounts=[100], isotopeMasses=[[1.0, 2.0, 3.0]], isotopeProbabilities=[[0.0, 0.6, 0.4]]) for x in i: print(x) except ValueError: is_ok = True assert is_ok total_confs = 0 for molecule in molecules: for parameter in parameters: if not silentish_run: sprint("{} {}... ".format(molecule, parameter)) old_ordered = OldIsoSpecPy.IsoSpecPy.IsoSpec.IsoFromFormula( molecule, parameter, method="ordered").getConfs()
import IsoSpecPy from tqdm import tqdm t = 0.0 for x in tqdm(xrange(100000)): i = IsoSpecPy.Iso("C100H100N100O100") t += i.getTheoreticalAverageMass() print t
def makeplot(sth): x = [] y = [] z = [] with open("results205.csv", 'r') as csvfile: plots = csv.reader(csvfile, delimiter=",") for row in tqdm(plots): #x.append(float(row[0])) #y.append(float(row[1])) conf1 = next(IsoSpecPy.IsoOrderedGenerator(row[2], get_confs = True).__iter__())[2] conf2 = next(IsoSpecPy.IsoOrderedGenerator(row[3], get_confs = True).__iter__())[2] #C H N O S if sth == 'nucleons': nukleons1 = [0,0,0] nukleons2 = [0,0,0] i = 0 nukleons1[0]+=conf1[i][0]*12 nukleons2[0]+=conf2[i][0]*12 nukleons1[0]+=conf1[i][1]*13 nukleons2[0]+=conf2[i][1]*13 i = 1 nukleons1[0]+=conf1[i][0]*1 nukleons2[0]+=conf2[i][0]*1 nukleons1[0]+=conf1[i][1]*2 nukleons2[0]+=conf2[i][1]*2 i = 2 nukleons1[0]+=conf1[i][0]*14 nukleons2[0]+=conf2[i][0]*14 nukleons1[0]+=conf1[i][1]*15 nukleons2[0]+=conf2[i][1]*15 i = 3 nukleons1[0]+=conf1[i][0]*16 nukleons2[0]+=conf2[i][0]*16 nukleons1[0]+=conf1[i][1]*17 nukleons2[0]+=conf2[i][1]*17 nukleons1[0]+=conf1[i][2]*18 nukleons2[0]+=conf2[i][2]*18 i = 4 nukleons1[0]+=conf1[i][0]*32 nukleons2[0]+=conf2[i][0]*32 nukleons1[0]+=conf1[i][1]*33 nukleons2[0]+=conf2[i][1]*33 nukleons1[0]+=conf1[i][2]*34 nukleons2[0]+=conf2[i][2]*34 nukleons1[0]+=conf1[i][3]*36 nukleons2[0]+=conf2[i][3]*36 diff = abs(nukleons1[0] - nukleons2[0]) if diff != 0: z.append(diff) x.append(float(row[0])) y.append(float(row[1])) else: continue elif sth == 'C': cs1 = 0 cs2 = 0 cs1 += conf1[0][0] + conf1[0][1] cs2 += conf2[0][0] + conf2[0][1] diff = abs(cs1 - cs2) if diff != 0: z.append(diff) x.append(float(row[0])) y.append(float(row[1])) else: continue elif sth == 'H': hs1 = 0 hs2 = 0 hs1 += conf1[1][0] + conf1[1][1] hs2 += conf2[1][0] + conf2[1][1] diff = abs(hs1 - hs2) if diff != 0: z.append(diff) x.append(float(row[0])) y.append(float(row[1])) elif sth == 'N': ns1 = 0 ns2 = 0 ns1 += conf1[2][0] + conf1[2][1] ns2 += conf2[2][0] + conf2[2][1] diff = abs(ns1 - ns2) if diff != 0: z.append(diff) x.append(float(row[0])) y.append(float(row[1])) elif sth == 'O': os1 = 0 os2 = 0 os1 += conf1[3][0] + conf1[3][1] + conf1[3][2] os2 += conf2[3][0] + conf2[3][1] + conf2[3][2] diff = abs(os1 - os2) if diff != 0: z.append(diff) x.append(float(row[0])) y.append(float(row[1])) elif sth == 'S': ss1 = 0 ss2 = 0 ss1 += conf1[4][0] + conf1[4][1] + conf1[4][2] + conf1[4][3] ss2 += conf2[4][0] + conf2[4][1] + conf2[4][2] + conf2[4][3] diff = abs(ss1 - ss2) if diff != 0: z.append(diff) x.append(float(row[0])) y.append(float(row[1])) fig, ax = plt.subplots() plot = ax.scatter(x, y, c = z, alpha = 0.3, edgecolors = None, cmap = 'YlGnBu', s = 5.0) legend = ax.legend(*plot.legend_elements(alpha = 1.0), loc = "lower right", title=('%s' % (sth)) + ' ' + "number difference") ax.add_artist(legend) plt.xlabel('Mean mass difference') plt.ylabel('Wasserstein distance') plt.title('Mean mass difference, Wasserstein distance and' + ' ' + ('%s' % (sth)) + ' ' + 'difference plot ') #plt.gray() plt.legend() #plt.show() plt.savefig("Plot" + ('%s' % (sth)) + ".png")
# 14C isn't normally considered in the isotopic distribution, here we add an extra isotope to the standard ones radiolabelled_carbon_masses = PeriodicTbl.symbol_to_masses["C"] + ( 14.003241989, ) # Assuming that the labelling was only 95% efficient, that is only 95% # of the radiolabel atoms have standard C replaced with 14C. Non-replaced atoms have standard # isotopic abundance (realtive to each other) normal_carbon_probs = PeriodicTbl.symbol_to_probs["C"] radiolabelled_carbon_probs = (0.05 * normal_carbon_probs[0], 0.05 * normal_carbon_probs[1], 0.95) i = IsoSpecPy.IsoTotalProb( formula="C4H12O6", # The formula for glucose, sans the radiolabel atoms # Here we specify additional "elements" which occur *in addition* to those from the formula atomCounts=(2, ), isotopeMasses=(radiolabelled_carbon_masses, ), isotopeProbabilities=(radiolabelled_carbon_probs, ), # And the rest of parameters for configuration prob_to_cover=0.99, get_confs=True) # Radiolabelling (or isotopic labelling) with more than one element looks like this: # Let's say we wanted to have glucose with one 14C carbon, and two deuteriums, all with 95% probability # Then it would be: #i = IsoSpecPy.IsoLayeredGenerator(formula = "C5H10O6", # The formula for glucose, sans the radiolabel atoms # atomCounts = (1, 2), # isotopeMasses = (radiolabelled_carbon_masses, PeriodicTbl.symbol_to_masses["H"]), # isotopeProbabilities = (radiolabelled_carbon_probs, (0.05, 0.95)), # # And the rest of parameters for configuration # prob_to_cover = 0.99, # get_confs=True)
while S.next(): print(S.confs_prob, S.chasing_prob) yield (S.current_conf, S.current_count) from IsoSpecPy.Formulas import * from scipy.stats import chisquare import sys if __name__ == '__main__': test_mol = surcose count = 10000000 print("Starting...") X = sorted(x for x in IsoSpecPy.IsoThresholdGenerator(formula=test_mol, threshold=sys.float_info.min, reorder_marginals = False) if x[1] > 0) print("No configs: " + str(len(X))) Y = dict([(v[0], 0) for v in X]) #print(Y) s = 0 for x in sample_ciic(test_mol, count, 0.999999): print(x) Y[x[0]] = x[1] s += x[1] print("S:", s) assert s == count #print(X)
def sample_isospec2(formula, count, precision): population = IsoSpecPy.IsoLayeredGenerator(formula, t_prob_hint = precision, reorder_marginals = False) S = Sampler(population, count, precision, 1.0) while S.advance(): yield S.current()
''' import IsoSpecPy from math import exp try: if IsoSpecPy.__version__[:4] != '2.1.': raise AttributeError except AttributeError: print( "This file is meant to be used with IsoSpecPy version 2.0.X. You seem to have a different version installed on your system." ) import sys sys.exit(-1) i = IsoSpecPy.IsoTotalProb(formula="H2O1", prob_to_cover=0.999, get_confs=True) print( "Calculating isotopic distribution of water. Here's a list of configurations necessary to cover at least 0.999 of total probability:" ) for mass, prob, conf in i: print("") print("Mass: " + str(mass)) print("probability: " + str(prob)) print("Number of Protium atoms: " + str(conf[0][0])) print("Number of Deuterium atoms: " + str(conf[0][1])) print("Number of O16 atoms: " + str(conf[1][0])) print("Number of O17 atoms: " + str(conf[1][1])) print("Number of O18 atoms: " + str(conf[1][2]))
def sample_ciic(formula, count, precision): population = IsoSpecPy.IsoLayeredGenerator(formula, t_prob_hint = precision, reorder_marginals = False) S = CIIC(population, count, precision, -1.0) while S.next(): print(S.confs_prob, S.chasing_prob) yield (S.current_conf, S.current_count)
def get_real_confs(formula, P): confs = [set() for el, atom_cnt in parse(formula) ] for _, _, C in iso.IsoLayered(formula=formula, prob_to_cover=P, get_confs = True): for i, x in enumerate(C): confs[i].add(x) return np.array([len(c) for c in confs])
def count_totalprob(mass, formula, prob): s = IsoSpecPy.IsoTotalProb(prob, formula) s.normalize() return (mass, formula, s)
from __future__ import print_function import IsoSpecPy from IsoSpecPy.Formulas import * import math try: math.isclose except AttributeError: def isclose(a, b, rel_tol=1e-09, abs_tol=0.0): return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) math.isclose = isclose glu = IsoSpecPy.IsoThreshold(0.0, formula=glucose) ca = IsoSpecPy.IsoThreshold(0.0, formula=caffeine) print("Checking Wasserstein distance...", end=' ') print(ca.wassersteinDistance(glu), end=' ') assert(math.isclose(ca.wassersteinDistance(glu), 14.03495145836358)) print("OK!") print("Checking normalization... ", end='') ubiq = IsoSpecPy.IsoTotalProb(0.9999, ubiquitin) print(ubiq.total_prob(), end=' ') assert(math.isclose(ubiq.total_prob(), 0.9999, rel_tol=0.01)) ubiq = IsoSpecPy.IsoTotalProb(0.9999, ubiquitin) ubiq.scale(0.5) assert(math.isclose(ubiq.total_prob(), 0.9999*0.5, rel_tol=0.01)) ubiq._recalculate_everything() assert(math.isclose(ubiq.total_prob(), 0.9999*0.5, rel_tol=0.01))
def progr(window): with open('testowy.txt') as file: with open('test.csv', 'w', newline='') as write_file: writer = csv.writer(write_file) L = [] P = [] S = [] masses = [] formulas = [] limes = float('inf') for line in tqdm(file, total=177754527): average_mass = float(line.split(",")[0]) formula = line.split(",")[1].strip() if average_mass <= limes: L.append((average_mass, formula)) limes = average_mass + window #print(L) else: #rob IsoSpecPy if len(L) > 1: #print(L) combined_s = [] combined_mass = [] combined_formulas = [] for i in range(len(L)): s = IsoSpecPy.IsoTotalProb(0.99, L[i][1]) s.normalize() mass = L[i][0] S.append(s) masses.append(mass) formulas.append(L[i][1]) for i in range(len(L)): for j in range(i + 1, len(L)): combined_s.append((S[i], S[j])) combined_mass.append((masses[i], masses[j])) combined_formulas.append( (formulas[i], formulas[j])) S = [] masses = [] #print(len(combined_s)) for i in range(len(combined_s)): wasserstein = combined_s[i][0].wassersteinDistance( combined_s[i][1]) mass_difference = abs(combined_mass[i][0] - combined_mass[i][1]) used_formulas = combined_formulas[i] #print(mass_difference, wasserstein, used_formulas) writer.writerow([(mass_difference, wasserstein, used_formulas)]) #print("napisalem") formulas = [] combined_s = [] combined_mass = [] combined_formulas = [] L.append((average_mass, formula)) for i in range(len(L)): if L[i][0] == average_mass: P.append(L[i]) #print(P) L = P P = [] limes = average_mass + window
def generate_isotopologues(formula_entry, smiles_entry, resolution_entry): formula = "" if formula_entry is not None and len(formula_entry): formula = formula_entry else: # Getting exact mass url = "https://gnps-structure.ucsd.edu/formula?smiles={}".format( urllib.parse.quote(smiles_entry)) r = requests.get(url) formula = (r.text) i = IsoSpecPy.IsoTotalProb( formula= formula, # The formula for glucose, sans the radiolabel atoms # And the rest of parameters for configuration prob_to_cover=0.99, get_confs=True) output_list = [] for mass, prob, conf in i: output_dict = {} output_dict["prob"] = prob output_dict["mz"] = mass - 0.00054858 output_list.append(output_dict) table_fig = dash_table.DataTable( columns=[{ "name": i, "id": i, "deletable": True, "selectable": True } for i in ["mz", "prob"]], data=output_list, editable=True, filter_action="native", sort_action="native", sort_mode="multi", column_selectable="single", selected_columns=[], selected_rows=[], page_action="native", page_current=0, page_size=10, ) # Drawing Figure main_mz = output_list[0]["mz"] delta_m = main_mz / float(resolution_entry) sigma = delta_m / 2.355 display_bins = 0.02 display_bins = sigma import numpy as np mz_grid = np.arange(output_list[0]["mz"] - 1, output_list[-1]["mz"] + 1, display_bins) intensity = np.zeros_like(mz_grid) for peak in output_list: # Add gaussian peak shape centered around each theoretical peak intensity += peak["prob"] * np.exp( -(mz_grid - peak["mz"])**2 / (2 * sigma)) / (np.sqrt(2 * np.pi) * sigma) # Normalize profile to 0-100 intensity = (intensity / intensity.max()) * 100 df = pd.DataFrame() df["mz"] = mz_grid df["intensity"] = intensity line_fig = px.line( df, x="mz", y="intensity", title='Isotopologue Distribution - {} - Resolution - {}'.format( formula, resolution_entry)) return [[table_fig, dcc.Graph(figure=line_fig)]]