Example #1
0
    def confs_from_formula(formula,
                           threshold=0.001,
                           total_prob=None,
                           charge=1,
                           adduct=None):
        """Simulate and return spectrum peaks for given formula.

        Parameters as in __init__ method. `formula` must be a nonempty string.
        """
        parsed = re.findall('([A-Z][a-z]*)([0-9]*)', formula)
        formula = Counter()
        for e, n in parsed:
            n = int(n) if n else 1
            formula[e] += n
        if adduct:
            formula[adduct] += charge
        assert all(v >= 0 for v in formula.values())
        formula = ''.join(x + str(formula[x]) for x in formula if formula[x])
        if total_prob is not None:
            isospec = IsoSpecPy.IsoTotalProb(formula=formula,
                                             prob_to_cover=total_prob,
                                             get_minimal_pset=True,
                                             get_confs=False)
        else:
            isospec = IsoSpecPy.IsoThreshold(formula=formula,
                                             threshold=threshold,
                                             absolute=False,
                                             get_confs=False)
        confs = [(x[0] / abs(charge), x[1])
                 for x in zip(isospec.masses, isospec.probs)]
        return confs
def test_isospec_threshold_(formula, thr):
	t0 = time()
	res = iso.IsoThreshold( formula=formula, 
			     			threshold=thr,
				 			get_confs=True  )
	t1 = time()
	return t1 - t0
Example #3
0
def expand_isotopes(peptide, charge_states=[2, 3]):
    '''
    Convert peptide to DataFrame of isotopic peaks
    Input
        Series, should contain 'sequence', 'z+' columns, and model columns
    Return
        DataFrame with one row for each isotopic peak
        columns are:
            mz - m/z of ion
            ic_XX - ion abundance acording to XX model
            z  - charge
            sequence - peptide sequence
    '''
    formula = ''.join([
        '{}{}'.format(x, y)
        for x, y in mass.Composition(peptide['sequence']).items()
    ])
    cluster = IsoSpecPy.IsoThreshold(formula=formula,
                                     threshold=0.005,
                                     absolute=True)
    mz0 = cluster.np_masses()
    int0 = cluster.np_probs()
    mz = np.concatenate([get_ions(mz0, z) for z in charge_states])
    ic = np.concatenate(
        [int0 * peptide['{}+'.format(z)] for z in charge_states])
    charge = np.concatenate(
        [np.repeat(z, mz0.shape[0]) for z in charge_states])
    result = pd.DataFrame({'mz': mz, 'ic': ic, 'z': charge})
    result['sequence'] = peptide['sequence']
    for model in params.ion_models:
        result['ic_{}'.format(model)] = result['ic'] * peptide[model]

    return result
Example #4
0
def confs_from_layered_generator(formula, target_prob):
    ret = ([], [], [])
    for conf in IsoSpecPy.IsoLayered(formula=formula,
                                     prob_to_cover=target_prob,
                                     get_confs=True,
                                     get_minimal_pset=True):
        conf = (conf[0], log(conf[1]), conf[2])
        ret[0].append(conf[0])
        ret[1].append(conf[1])
        ret[2].append([item for sublist in conf[2] for item in sublist])

    return sort_confs(ret)
Example #5
0
def confs_from_ordered_generator(formula, target_prob):
    ret = ([], [], [])
    prob = 0.0
    for conf in IsoSpecPy.IsoOrderedGenerator(formula=formula, get_confs=True):
        conf = (conf[0], log(conf[1]), conf[2])
        if prob >= target_prob and target_prob < 1.0:
            return ret
        ret[0].append(conf[0])
        prob += exp(conf[1])
        ret[1].append(conf[1])
        ret[2].append([item for sublist in conf[2] for item in sublist])
    return ret
Example #6
0
def confs_from_threshold_generator(formula, target_prob):
    ret = ([], [], [])
    for conf in IsoSpecPy.IsoThresholdGenerator(formula=formula,
                                                threshold=target_prob,
                                                absolute=True,
                                                get_confs=True):
        conf = (conf[0], log(conf[1]), conf[2])
        ret[0].append(conf[0])
        ret[1].append(conf[1])
        ret[2].append([item for sublist in conf[2] for item in sublist])

    return sort_confs(ret)
Example #7
0
def sample_isospec(formula, count, precision):
    population = IsoSpecPy.IsoLayeredGenerator(formula, t_prob_hint = precision, reorder_marginals = False)
    #population = IsoSpecPy.IsoThresholdGenerator(formula = formula, threshold = -1.0)

    #for x in population:
    #    yield x
    '''Performs sampling with replacement from population argument, with
    associated probabilities from second argument. The probabilities must 
    sum to 1. Yields a stream of tuples: (population_member, times_chosen).
    Accepts generators as first and second argument. May return duplicate
    tuples and tuples with times_chosen == 0.
    '''
    pprob = 0.0
    cprob = 0.0
    accumulated = 0
    iso_iter = population.__iter__()
    while count > 0:
        if accumulated > 0:
            yield (pop_next, accumulated)
            accumulated = 0
        pop_next, prob_next = next(iso_iter)
        pprob += prob_next
        # Beta mode
        while (pprob - cprob) * count / (1.0 - cprob) < 1.0:
            cprob += _beta_1_b(count) * (1.0 - cprob)
            while pprob < cprob:
                if accumulated > 0: 
                    yield (pop_next, accumulated)
                    accumulated = 0
                pop_next, prob_next = next(iso_iter)
                pprob += prob_next
            accumulated += 1
            count -= 1
            if count == 0: break
        if count == 0: break
        # Binomial mode
        nrtaken = _safe_binom(count, (pprob-cprob)/(1.0-cprob))
        accumulated += nrtaken
        count -= nrtaken
        cprob = pprob
    if accumulated > 0:
        yield (pop_next, accumulated)
Example #8
0
import IsoSpecPy
from math import exp
from IsoSpecPy.Formulas import *
from IsoSpecPy.approximations import approximate_subisotopologues

test_on = horse_myoglobin
test_prob = 0.9999

print("Formula:", test_on, "Probability:", test_prob)

i = IsoSpecPy.Iso(test_on)
print("From C++ code:", list(map(exp,
                                 i.getMarginalLogSizeEstimates(test_prob))))

symbols, _ = IsoSpecPy.ParseFormula(test_on)
dct = approximate_subisotopologues(test_on, test_prob)
print("From Python:  ", [dct[s] for s in symbols])

v = IsoSpecPy.IsoTotalProb(formula=test_on,
                           prob_to_cover=test_prob,
                           get_confs=True,
                           get_minimal_pset=True)

acc = [set() for _ in range(v.dimNumber)]

for conf in v.confs:
    for i in range(v.dimNumber):
        acc[i].add(conf[i])

print("Real:", list(map(len, acc)))
print(len(v), "total confs.")
Example #9
0
    for conf in IsoSpecPy.IsoThreshold(formula=formula,
                                       threshold=target_prob,
                                       absolute=True,
                                       get_confs=True):
        conf = (conf[0], log(conf[1]), conf[2])
        ret[0].append(conf[0])
        ret[1].append(conf[1])
        ret[2].append([item for sublist in conf[2] for item in sublist])

    return sort_confs(ret)


is_ok = False
try:
    i = IsoSpecPy.IsoThreshold(0.1,
                               atomCounts=[100],
                               isotopeMasses=[[1.0, 2.0, 3.0]],
                               isotopeProbabilities=[[0.0, 0.6, 0.4]])
    for x in i:
        print(x)
except ValueError:
    is_ok = True
assert is_ok

total_confs = 0

for molecule in molecules:
    for parameter in parameters:
        if not silentish_run:
            sprint("{} {}... ".format(molecule, parameter))
            old_ordered = OldIsoSpecPy.IsoSpecPy.IsoSpec.IsoFromFormula(
                molecule, parameter, method="ordered").getConfs()
Example #10
0
import IsoSpecPy
from tqdm import tqdm

t = 0.0
for x in tqdm(xrange(100000)):
    i = IsoSpecPy.Iso("C100H100N100O100")
    t += i.getTheoreticalAverageMass()
print t
Example #11
0
def makeplot(sth):
    
    x = []
    y = []
    z = []

    with open("results205.csv", 'r') as csvfile:
        plots = csv.reader(csvfile, delimiter=",")
        for row in tqdm(plots):
            #x.append(float(row[0]))
            #y.append(float(row[1]))
            conf1 = next(IsoSpecPy.IsoOrderedGenerator(row[2], get_confs = True).__iter__())[2]
            conf2 = next(IsoSpecPy.IsoOrderedGenerator(row[3], get_confs = True).__iter__())[2]
            #C H N O S
            if sth == 'nucleons':
                nukleons1 = [0,0,0]
                nukleons2 = [0,0,0]
                i = 0
                nukleons1[0]+=conf1[i][0]*12
                nukleons2[0]+=conf2[i][0]*12
                nukleons1[0]+=conf1[i][1]*13
                nukleons2[0]+=conf2[i][1]*13
                i = 1
                nukleons1[0]+=conf1[i][0]*1
                nukleons2[0]+=conf2[i][0]*1
                nukleons1[0]+=conf1[i][1]*2
                nukleons2[0]+=conf2[i][1]*2
                i = 2
                nukleons1[0]+=conf1[i][0]*14
                nukleons2[0]+=conf2[i][0]*14
                nukleons1[0]+=conf1[i][1]*15
                nukleons2[0]+=conf2[i][1]*15
                i = 3
                nukleons1[0]+=conf1[i][0]*16
                nukleons2[0]+=conf2[i][0]*16
                nukleons1[0]+=conf1[i][1]*17
                nukleons2[0]+=conf2[i][1]*17
                nukleons1[0]+=conf1[i][2]*18
                nukleons2[0]+=conf2[i][2]*18
                i = 4
                nukleons1[0]+=conf1[i][0]*32
                nukleons2[0]+=conf2[i][0]*32
                nukleons1[0]+=conf1[i][1]*33
                nukleons2[0]+=conf2[i][1]*33
                nukleons1[0]+=conf1[i][2]*34
                nukleons2[0]+=conf2[i][2]*34
                nukleons1[0]+=conf1[i][3]*36
                nukleons2[0]+=conf2[i][3]*36
                
                diff = abs(nukleons1[0] - nukleons2[0])
                if diff != 0:
                    z.append(diff)
                    x.append(float(row[0]))
                    y.append(float(row[1]))
                else:
                    continue

            elif sth == 'C':
                cs1 = 0
                cs2 = 0
                cs1 += conf1[0][0] + conf1[0][1]
                cs2 += conf2[0][0] + conf2[0][1]
                diff = abs(cs1 - cs2)
                if diff != 0:
                    z.append(diff)
                    x.append(float(row[0]))
                    y.append(float(row[1]))
                else:
                    continue

            elif sth == 'H':
                hs1 = 0
                hs2 = 0
                hs1 += conf1[1][0] + conf1[1][1]
                hs2 += conf2[1][0] + conf2[1][1]
                diff = abs(hs1 - hs2)
                if diff != 0:
                    z.append(diff)
                    x.append(float(row[0]))
                    y.append(float(row[1]))

            elif sth == 'N':
                ns1 = 0
                ns2 = 0
                ns1 += conf1[2][0] + conf1[2][1]
                ns2 += conf2[2][0] + conf2[2][1]
                diff = abs(ns1 - ns2)
                if diff != 0:
                    z.append(diff)
                    x.append(float(row[0]))
                    y.append(float(row[1]))

            elif sth == 'O':
                os1 = 0
                os2 = 0
                os1 += conf1[3][0] + conf1[3][1] + conf1[3][2]
                os2 += conf2[3][0] + conf2[3][1] + conf2[3][2]
                diff = abs(os1 - os2)
                if diff != 0:
                    z.append(diff)
                    x.append(float(row[0]))
                    y.append(float(row[1]))

            elif sth == 'S':
                ss1 = 0
                ss2 = 0
                ss1 += conf1[4][0] + conf1[4][1] + conf1[4][2] + conf1[4][3]
                ss2 += conf2[4][0] + conf2[4][1] + conf2[4][2] + conf2[4][3]
                diff = abs(ss1 - ss2)
                if diff != 0:
                    z.append(diff) 
                    x.append(float(row[0]))
                    y.append(float(row[1]))

    fig, ax = plt.subplots()
    plot = ax.scatter(x, y, c = z, alpha = 0.3, edgecolors = None, cmap = 'YlGnBu', s = 5.0)
    legend = ax.legend(*plot.legend_elements(alpha = 1.0), loc = "lower right", title=('%s' % (sth)) + ' ' + "number difference")
    ax.add_artist(legend)
    plt.xlabel('Mean mass difference')
    plt.ylabel('Wasserstein distance')
    plt.title('Mean mass difference, Wasserstein distance and' + ' ' + ('%s' % (sth)) + ' ' + 'difference plot ')
    #plt.gray()
    plt.legend()
    #plt.show()
    plt.savefig("Plot" + ('%s' % (sth)) + ".png")
Example #12
0
# 14C isn't normally considered in the isotopic distribution, here we add an extra isotope to the standard ones
radiolabelled_carbon_masses = PeriodicTbl.symbol_to_masses["C"] + (
    14.003241989, )

# Assuming that the labelling was only 95% efficient, that is only 95%
# of the radiolabel atoms have standard C replaced with 14C. Non-replaced atoms have standard
# isotopic abundance (realtive to each other)
normal_carbon_probs = PeriodicTbl.symbol_to_probs["C"]
radiolabelled_carbon_probs = (0.05 * normal_carbon_probs[0],
                              0.05 * normal_carbon_probs[1], 0.95)

i = IsoSpecPy.IsoTotalProb(
    formula="C4H12O6",  # The formula for glucose, sans the radiolabel atoms
    # Here we specify additional "elements" which occur *in addition* to those from the formula
    atomCounts=(2, ),
    isotopeMasses=(radiolabelled_carbon_masses, ),
    isotopeProbabilities=(radiolabelled_carbon_probs, ),
    # And the rest of parameters for configuration
    prob_to_cover=0.99,
    get_confs=True)

# Radiolabelling (or isotopic labelling) with more than one element looks like this:
# Let's say we wanted to have glucose with one 14C carbon, and two deuteriums, all with 95% probability
# Then it would be:
#i = IsoSpecPy.IsoLayeredGenerator(formula = "C5H10O6", # The formula for glucose, sans the radiolabel atoms
#                                  atomCounts = (1, 2),
#                                  isotopeMasses = (radiolabelled_carbon_masses, PeriodicTbl.symbol_to_masses["H"]),
#                                  isotopeProbabilities = (radiolabelled_carbon_probs, (0.05, 0.95)),
#                                  # And the rest of parameters for configuration
#                                  prob_to_cover = 0.99,
#                                  get_confs=True)
Example #13
0
    while S.next():
        print(S.confs_prob, S.chasing_prob)
        yield (S.current_conf, S.current_count)



from IsoSpecPy.Formulas import *
from scipy.stats import chisquare
import sys

if __name__ == '__main__':
    test_mol = surcose
    count = 10000000

    print("Starting...")
    X = sorted(x for x in IsoSpecPy.IsoThresholdGenerator(formula=test_mol, threshold=sys.float_info.min, reorder_marginals = False) if x[1] > 0)

    print("No configs: " + str(len(X)))

    Y = dict([(v[0], 0) for v in X])
    #print(Y)

    s = 0
    for x in sample_ciic(test_mol, count, 0.999999):
        print(x)
        Y[x[0]] = x[1]
        s += x[1]
    print("S:", s)
    assert s == count

    #print(X)
Example #14
0
def sample_isospec2(formula, count, precision):
    population = IsoSpecPy.IsoLayeredGenerator(formula, t_prob_hint = precision, reorder_marginals = False)
    S = Sampler(population, count, precision, 1.0)
    while S.advance():
        yield S.current()
Example #15
0
'''

import IsoSpecPy
from math import exp

try:
    if IsoSpecPy.__version__[:4] != '2.1.':
        raise AttributeError
except AttributeError:
    print(
        "This file is meant to be used with IsoSpecPy version 2.0.X. You seem to have a different version installed on your system."
    )
    import sys
    sys.exit(-1)

i = IsoSpecPy.IsoTotalProb(formula="H2O1", prob_to_cover=0.999, get_confs=True)

print(
    "Calculating isotopic distribution of water. Here's a list of configurations necessary to cover at least 0.999 of total probability:"
)

for mass, prob, conf in i:
    print("")
    print("Mass: " + str(mass))
    print("probability: " + str(prob))
    print("Number of Protium atoms: " + str(conf[0][0]))
    print("Number of Deuterium atoms: " + str(conf[0][1]))
    print("Number of O16 atoms: " + str(conf[1][0]))
    print("Number of O17 atoms: " + str(conf[1][1]))
    print("Number of O18 atoms: " + str(conf[1][2]))
Example #16
0
def sample_ciic(formula, count, precision):
    population = IsoSpecPy.IsoLayeredGenerator(formula, t_prob_hint = precision, reorder_marginals = False)
    S = CIIC(population, count, precision, -1.0)
    while S.next():
        print(S.confs_prob, S.chasing_prob)
        yield (S.current_conf, S.current_count)
Example #17
0
def get_real_confs(formula, P):
	confs = [set() for el, atom_cnt in parse(formula) ]
	for _, _, C in iso.IsoLayered(formula=formula, prob_to_cover=P, get_confs = True):
		for i, x in enumerate(C):
			confs[i].add(x)
	return np.array([len(c) for c in confs])
Example #18
0
def count_totalprob(mass, formula, prob):
    s = IsoSpecPy.IsoTotalProb(prob, formula)
    s.normalize()
    return (mass, formula, s)
Example #19
0
from __future__ import print_function
import IsoSpecPy
from IsoSpecPy.Formulas import *
import math


try:
    math.isclose
except AttributeError:
    def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):
        return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
    math.isclose = isclose

glu = IsoSpecPy.IsoThreshold(0.0, formula=glucose)
ca = IsoSpecPy.IsoThreshold(0.0, formula=caffeine)

print("Checking Wasserstein distance...", end=' ')
print(ca.wassersteinDistance(glu), end=' ')
assert(math.isclose(ca.wassersteinDistance(glu), 14.03495145836358))
print("OK!")

print("Checking normalization... ", end='')

ubiq = IsoSpecPy.IsoTotalProb(0.9999, ubiquitin)
print(ubiq.total_prob(), end=' ')
assert(math.isclose(ubiq.total_prob(), 0.9999, rel_tol=0.01))
ubiq = IsoSpecPy.IsoTotalProb(0.9999, ubiquitin)
ubiq.scale(0.5)
assert(math.isclose(ubiq.total_prob(), 0.9999*0.5, rel_tol=0.01))
ubiq._recalculate_everything()
assert(math.isclose(ubiq.total_prob(), 0.9999*0.5, rel_tol=0.01))
Example #20
0
def progr(window):
    with open('testowy.txt') as file:
        with open('test.csv', 'w', newline='') as write_file:
            writer = csv.writer(write_file)
            L = []
            P = []
            S = []
            masses = []
            formulas = []
            limes = float('inf')
            for line in tqdm(file, total=177754527):
                average_mass = float(line.split(",")[0])
                formula = line.split(",")[1].strip()
                if average_mass <= limes:
                    L.append((average_mass, formula))
                    limes = average_mass + window
                    #print(L)
                else:
                    #rob IsoSpecPy
                    if len(L) > 1:
                        #print(L)
                        combined_s = []
                        combined_mass = []
                        combined_formulas = []
                        for i in range(len(L)):
                            s = IsoSpecPy.IsoTotalProb(0.99, L[i][1])
                            s.normalize()
                            mass = L[i][0]
                            S.append(s)
                            masses.append(mass)
                            formulas.append(L[i][1])
                        for i in range(len(L)):
                            for j in range(i + 1, len(L)):
                                combined_s.append((S[i], S[j]))
                                combined_mass.append((masses[i], masses[j]))
                                combined_formulas.append(
                                    (formulas[i], formulas[j]))
                        S = []
                        masses = []
                        #print(len(combined_s))
                        for i in range(len(combined_s)):
                            wasserstein = combined_s[i][0].wassersteinDistance(
                                combined_s[i][1])
                            mass_difference = abs(combined_mass[i][0] -
                                                  combined_mass[i][1])
                            used_formulas = combined_formulas[i]
                            #print(mass_difference, wasserstein, used_formulas)
                            writer.writerow([(mass_difference, wasserstein,
                                              used_formulas)])
                            #print("napisalem")
                        formulas = []
                        combined_s = []
                        combined_mass = []
                        combined_formulas = []
                    L.append((average_mass, formula))
                    for i in range(len(L)):
                        if L[i][0] == average_mass:
                            P.append(L[i])
                            #print(P)
                    L = P
                    P = []
                    limes = average_mass + window
Example #21
0
def generate_isotopologues(formula_entry, smiles_entry, resolution_entry):
    formula = ""
    if formula_entry is not None and len(formula_entry):
        formula = formula_entry
    else:
        # Getting exact mass
        url = "https://gnps-structure.ucsd.edu/formula?smiles={}".format(
            urllib.parse.quote(smiles_entry))
        r = requests.get(url)
        formula = (r.text)

    i = IsoSpecPy.IsoTotalProb(
        formula=
        formula,  # The formula for glucose, sans the radiolabel atoms                            # And the rest of parameters for configuration
        prob_to_cover=0.99,
        get_confs=True)
    output_list = []
    for mass, prob, conf in i:
        output_dict = {}
        output_dict["prob"] = prob
        output_dict["mz"] = mass - 0.00054858
        output_list.append(output_dict)

    table_fig = dash_table.DataTable(
        columns=[{
            "name": i,
            "id": i,
            "deletable": True,
            "selectable": True
        } for i in ["mz", "prob"]],
        data=output_list,
        editable=True,
        filter_action="native",
        sort_action="native",
        sort_mode="multi",
        column_selectable="single",
        selected_columns=[],
        selected_rows=[],
        page_action="native",
        page_current=0,
        page_size=10,
    )

    # Drawing Figure
    main_mz = output_list[0]["mz"]
    delta_m = main_mz / float(resolution_entry)
    sigma = delta_m / 2.355

    display_bins = 0.02
    display_bins = sigma

    import numpy as np
    mz_grid = np.arange(output_list[0]["mz"] - 1, output_list[-1]["mz"] + 1,
                        display_bins)
    intensity = np.zeros_like(mz_grid)

    for peak in output_list:
        # Add gaussian peak shape centered around each theoretical peak
        intensity += peak["prob"] * np.exp(
            -(mz_grid - peak["mz"])**2 /
            (2 * sigma)) / (np.sqrt(2 * np.pi) * sigma)

    # Normalize profile to 0-100
    intensity = (intensity / intensity.max()) * 100

    df = pd.DataFrame()
    df["mz"] = mz_grid
    df["intensity"] = intensity

    line_fig = px.line(
        df,
        x="mz",
        y="intensity",
        title='Isotopologue Distribution - {} - Resolution - {}'.format(
            formula, resolution_entry))

    return [[table_fig, dcc.Graph(figure=line_fig)]]