'''
Generate a list of all pairwise comparisons of the exact matches
'''


import sys
import re
from phage import Phage
phage = Phage()

try:
    f = sys.argv[1]
except:
    sys.exit("Exact match file, probably phage.kmers.bacteria.rc.txt")


bg = phage.completeBacteriaIDs()
pg = phage.phageIDs()


matches={}
for p in pg:
    matches[p]={}
    for b in bg:
        matches[p][b] = 0

with open(f, 'r') as fin:
    for l in fin:
        p=l.strip().split("\t")
        m=re.findall('NC_\d+', l)
        if len(m) != 2:
'''
Calculate the coverage of each genome pair from the blastx results

Starting with the blastx converted to NC/NC ids, we want to calculate
the coverage at each position in every genome.

We will consider all the genomes with the most number of bases in the
phage as the top genomes

'''


import sys
from phage import Phage
phage=Phage()

try:
    f=sys.argv[1]
except:
    sys.exit(sys.argv[0] + " <blast output file converted to NC/NC format. Probably phage.genomes.blastx")

count={}

lens=phage.phageSequenceLengths()
bctG = set(phage.completeBacteriaIDs())
phgG = set(phage.phageIDs())

for p in phgG:
    count[p]={}

sys.stderr.write("Reading " + f + "\n")
Exemple #3
0
'''
Calculate the coverage of each genome pair from the blastx results

Starting with the blastx converted to NC/NC ids, we want to calculate
the coverage at each position in every genome.

We will consider all the genomes with the most number of bases in the
phage as the top genomes

'''

import sys
from phage import Phage
phage = Phage()

try:
    f = sys.argv[1]
except:
    sys.exit(
        sys.argv[0] +
        " <blast output file converted to NC/NC format. Probably phage.genomes.blastx"
    )

count = {}

lens = phage.phageSequenceLengths()
bctG = set(phage.completeBacteriaIDs())
phgG = set(phage.phageIDs())

for p in phgG:
    count[p] = {}
'''Figure out which phages we do not have blastn hits for'''

import os,sys
from phage import Phage
import re

# get a list of all phages
phage=Phage()
phages=phage.phageTaxonomyString()

try:
    blastf=sys.argv[1]
except:
    sys.exit(sys.argv[0] + " <blast file>")

found={}
with open(blastf, 'r') as fin:
    for l in fin:
        p=l.split("\t")
        m=re.findall('(NC_\d+)', p[0])
        found[m[0]]=1

for p in phages:
    if p not in found:
        print "MISSED " + p

        return np.exp(-0.5 *
                      (np.square(x - mu) / sigma_r))  # gaussian r: ~(39.7, 10)


new_host = Host(
    c0=10**5,
    g_max=0.036,  #lit: 0.012
    yield_coeff=0.000000000001,
    half_sat=0.00000125,
    death_rate=0.001,
    t_dep=temperature_dependency_new_host,
)

original_phage = Phage(
    c0=1000,
    adsorption_rate=0.000000000001,
    burst_size=100,
    death_rate=0.00272,
)
new_phage = Phage(
    c0=1,
    adsorption_rate=0.0000001,
    burst_size=100,
    death_rate=0.00272,
)

s0 = 0.0000025  #stock concentration of nutrient (g/mL)#0.0000025
R_pnn = 1 / 1000  # fraction of new phage in library

# define system of differential equations
"""
def dXa_dt(X, t):
'''
For blastn searches we are going to calculate the percent coverage of the phage genome and score the longest coverage as the best hit. It doesn't matter where the hits are on the bacterial genome.

We are going to use a cutoff of 0.001 E value

'''

import sys, os, re
from phage import Phage
phage = Phage()

try:
    blastf = sys.argv[1]
except:
    sys.exit(sys.argv[0] + "< blast file>")

# read the fasta file of phages to get the lengths
lens = phage.phageSequenceLengths()
sys.stderr.write("Found " + str(len(lens)) + " sequences\n")
hits = {i: {} for i in lens}

with open(blastf, 'r') as fin:
    for l in fin:
        p = l.strip().split("\t")
        e = float(p[10])
        if e > 0.001:
            continue
        m = re.findall('(NC_\d+)', p[0])
        if m == []:
            sys.stderr.write("WARNING: No phage found in " + p[0] + "\n")
            continue
Exemple #7
0
'''
List complete bacteria or phage IDs. Set the bool to true to get bacteria
'''
import sys
from phage import Phage

phage=Phage()

try:
    bacteria = sys.argv[1]
except:
    bacteria = False


if bacteria:
    d = phage.completeBacteriaIDs()
else:
    d = phage.phageIDs()

for p in d:
    print(p)
Exemple #8
0
 def lyse(self):
     model.things = set([i for i in model.things if i != self])
     for i in range(self._burst_size):
         model.add(Phage(self.get_location()[0], self.get_location()[1]))
#!/usr/bin/python

'''Read the kmer counts from a series of files and plot a PCA'''

## start with reading the files and adding the kmer counts to the organism names
import sys
sys.path.append('/home3/redwards/bioinformatics/phage_host')
from phage import Phage
import re
import os

phage = Phage()
## we only choose those hosts with 5 phages that infect them.
#host = phage.phageHost()
host = phage.phageWithNHosts(5)

try:
    dir  = sys.argv[1]
    outf = sys.argv[2]
except:
    sys.exit(sys.argv[0] + " <directory of kmer counts> <file to write output table to>")

count={}
allkmers={}
organismId={}
for file in os.listdir(dir):
    match = re.findall('NC_\d+', file)
    id = match[0]
    if id not in host:
        sys.stderr.write("Found a sequence with id " + id + " but we don't have enough genomes for it\n")
        continue
Exemple #10
0
'''
List complete bacteria or phage IDs. Set the bool to true to get bacteria
'''
import sys
from phage import Phage

phage = Phage()

try:
    bacteria = sys.argv[1]
except:
    bacteria = False

if bacteria:
    d = phage.completeBacteriaIDs()
else:
    d = phage.phageIDs()

for p in d:
    print(p)
Exemple #11
0
'''Figure out which phages we do not have blastn hits for'''

import os, sys
from phage import Phage
import re

# get a list of all phages
phage = Phage()
phages = phage.phageTaxonomyString()

try:
    blastf = sys.argv[1]
except:
    sys.exit(sys.argv[0] + " <blast file>")

found = {}
with open(blastf, 'r') as fin:
    for l in fin:
        p = l.split("\t")
        m = re.findall('(NC_\d+)', p[0])
        found[m[0]] = 1

for p in phages:
    if p not in found:
        print "MISSED " + p
'''
Count the number of proteins in common between the phages and the
bacteria based on blastx
'''


import sys
from phage import Phage
phage=Phage()

try:
    f=sys.argv[1]
except:
    sys.exit(sys.argv[0] + " <blast output file converted to NC/NC format. Probably phage.genomes.blastx")

count={}


bctG = phage.completeBacteriaIDs()
phgG = phage.phageIDs()

for p in phgG:
    count[p]={}
    for b in bctG:
        count[p][b]=0

with open(f, 'r') as bin:
    for l in bin:
        p=l.strip().split("\t")
        if p[0] in count and p[1] in count[p[0]]:
            count[p[0]][p[1]] = count[p[0]].get(p[1], 0) + 1
Exemple #13
0
'''

Implement the Frac_Q and Frac_D methods from http://cge.cbs.dtu.dk/services/HostPhinder/.

We start with a directory of text files, where each genome has a list of kmers that are found in that genome. Then we need to know the number shared. That phage then predicts the hos.

'''

import sys,os
from phage import Phage
phage=Phage()
phages=phage.phageTaxonomy()

try:
    kmerD=sys.argv[1]
    outf=sys.argv[2]
except:
    sys.exit(sys.argv[0] + " <15-mer host directory> <output file>")

# read in all the 15mers
kmer={}
allk={}
for p in phages:
    kmer[p]={}
    if not os.path.exists(os.path.join(kmerD, p + ".tsv")):
        sys.stderr.write("No kmer file for " + p + "\n")

    with open(os.path.join(kmerD, p + ".tsv"), 'r') as fin:
        for l in fin:
            part=l.split("\t")
            kmer[p][part[0]]=1
class reaB():
    def __init__(self):
        # simulation time and resolution of samples
        self.dt = 0.1
        self.xs = np.linspace(0, 120, 120 / self.dt)
        self.fluxA = 0.0
        self.fluxB = 0.0

        self.new_host = Host(
            c0=10**5,
            g_max=0.036,  # lit: 0.012
            yield_coeff=0.000000000001,
            half_sat=0.00000125,
            death_rate=0.001,
            t_dep=self.temperature_dependency_new_host,
        )
        self.original_phage = Phage(
            c0=10**9,
            adsorption_rate=0.000000000001,
            burst_size=100,
            death_rate=0.00272,
        )
        self.new_phage = Phage(
            c0=10**6,
            adsorption_rate=0.0000000001,
            burst_size=100,
            death_rate=0.00272,
        )

        self.s0 = 0.0000025  # stock concentration of nutrient (g/mL) #0.0000025
        self.R_pnn = 1 / 1000  # fraction of new phage in library

        self.myinterpolator = scipy.interpolate.interp1d(
            np.array([0 - 1, 0]),  # X
            np.array([0, 1]).T,  # Y
            kind="linear",
            bounds_error=False,
            fill_value=0)

        self.d = 13.3  # lysis time delay
        self.dd = 13
        # dt *=10
        self.q_h_inf_poo = deque([0])
        self.q_h_inf_pnn = deque([0])
        for i in range(100):
            self.q_h_inf_poo.append(0)
            self.q_h_inf_pnn.append(0)

        data = ddeint(self.model,
                      self.initial_conditions,
                      self.xs,
                      fargs=(self.d, ))

        plt.figure(figsize=(16, 16))

        plt.subplot(3, 3, 1)
        plt.plot(self.xs, [data[t][0] for t in range(len(self.xs))],
                 label="reactor A")
        plt.xlabel('time [min]')
        plt.ylabel('concentration [#bacteria/mL]')
        plt.title('Host Concentration A over Time')
        plt.legend()

        plt.subplot(3, 3, 2)
        plt.plot(self.xs, [data[t][1] for t in range(len(self.xs))],
                 label="reactor A")
        plt.xlabel('time [min]')
        plt.ylabel('concentration [g/mL]')
        plt.title('Nutrient A over Time')
        plt.legend()

        plt.subplot(3, 3, 3)
        plt.plot(self.xs, [data[t][2] for t in range(len(self.xs))])
        plt.xlabel('time [min]')
        plt.ylabel('concentration [#bacteria/mL]')
        plt.title('Host Concentration B over Time')

        plt.subplot(3, 3, 4)
        plt.plot(self.xs, [data[t][3] for t in range(len(self.xs))])
        plt.xlabel('time [min]')
        plt.ylabel('concentration [g/mL]')
        plt.title('Nutrient B over Time')

        plt.subplot(3, 3, 5)
        plt.plot(self.xs, [data[t][4] for t in range(len(self.xs))])
        plt.xlabel('time [min]')
        plt.ylabel('concentration [#bacteria/mL]')
        plt.title('Host infected OO over Time')

        plt.subplot(3, 3, 6)
        plt.plot(self.xs, [data[t][5] for t in range(len(self.xs))])
        plt.xlabel('time [min]')
        plt.ylabel('concentration [#bacteria/mL]')
        plt.title('Host infected NN over Time')

        plt.subplot(3, 3, 8)
        plt.plot(self.xs, [data[t][6] for t in range(len(self.xs))])
        plt.xlabel('time [min]')
        plt.ylabel('concentration [#phage/mL]')
        plt.title('Phage OO over Time')

        plt.subplot(3, 3, 9)
        plt.plot(self.xs, [data[t][7] for t in range(len(self.xs))])
        plt.xlabel('time [min]')
        plt.ylabel('concentration [#phage/mL]')
        plt.title('Phage NN over Time')

        plt.subplot(3, 3, 7)
        plt.plot(self.xs, [data[t][6] for t in range(len(self.xs))],
                 label="original")
        plt.plot(self.xs, [data[t][7] for t in range(len(self.xs))],
                 label="new")
        plt.xlabel('time [min]')
        plt.ylabel('concentration [#phage/mL]')
        plt.title('Phage over Time')
        plt.legend()

        plt.subplots_adjust(wspace=0.4, hspace=0.4)
        plt.show()

    # Reactor A
    # lb influx profile of reactor A
    def in_a_lb(self, t):
        """
        :param t: time t
        :return: lb influx to reactor a at time t
        """
        return self.fluxA

    # biomass outflux profile of reactor A
    def out_a(self, t):
        """
        :param t: time t
        :return: biomass outflux of reactor a at time t
        """
        return self.fluxA

    # temperature profile of reactor A
    def temperature_a(self, t):
        """
        :param t: time t
        :return: temperature at time t
        """
        return 39.7

    # Reactor B
    # lb influx profile of reactor B
    def in_b_lb(self, t):
        """
        :param t: time t
        :return: lb influx to reactor b at time t
        """
        return self.fluxB / 3

    # lb influx profile of reactor B
    def in_nh(self, t):
        """
        :param t: time t
        :return: new_host influx from reactor a to reactor b at time t
        """
        return self.fluxB / 3

    # phage library influx in reactor B
    def in_lib(self, t):
        """
        param t: time t
        :return: library influx in reator b at time t
        """
        return self.fluxB / 3

    # biomass outflux profile of reactor B
    def out_b(self, t):
        """
        :param t: time t
        :return: biomass outflux of reactor b at time t
        """
        return self.fluxB

    # temperature profile of reactor B
    def temperature_b(self, t):
        """
        :param t: time t
        :return: temperature at time t
        """
        return 39.7

    def temperature_dependency_new_host(self, x):
        """
        :param x: temperature
        :return: growth rate factor
        """
        mu = 39.7
        sigma_l = 120.0
        sigma_r = 10.0
        if x < mu:
            return np.exp(
                -0.5 *
                (np.square(x - mu) / sigma_l))  # gaussian l: ~(39.7, 120)
        else:
            return np.exp(
                -0.5 *
                (np.square(x - mu) / sigma_r))  # gaussian r: ~(39.7, 10)

    def update(self, myinterpolator, t, Y):
        """ Add one new (ti,yi) to the interpolator """
        Y2 = Y if (Y.size == 1) else np.array([Y]).T
        myinterpolator = scipy.interpolate.interp1d(
            np.hstack([myinterpolator.x, [t]]),  # X
            np.hstack([myinterpolator.y, Y2]),  # Y
            kind="linear",
            bounds_error=False,
            fill_value=Y)
        return myinterpolator

    # define system of differential equations
    def model(self, Y, t, d):
        c_host_a, c_nutr_a, c_host_b, c_nutr_b, c_inf_poo, c_inf_pnn, c_poo, c_pnn = Y(
            t)
        c_host_a_d, c_nutr_a_d, c_host_b_d, c_nutr_b_d, c_inf_poo_d, c_inf_pnn_d, c_poo_d, c_pnn_d = Y(
            t - d)
        y = self.myinterpolator(t)
        self.myinterpolator = self.update(
            self.myinterpolator, t,
            self.original_phage.infection_rate(c_host_b, c_poo) - y -
            self.out_b(t) * c_inf_poo)
        if y != 0:
            pass
            #print(y)

        return np.array([
            0 if c_host_a < 0 else self.new_host.per_cell_growth_rate(
                c_nutr_a, self.temperature_a(t)) * c_host_a -
            self.out_a(t) * c_host_a - self.new_host.death_rate * c_host_a,
            self.s0 * self.in_a_lb(t) if c_nutr_a < 0 else
            -self.new_host.yield_coeff * self.new_host.per_cell_growth_rate(
                c_nutr_a, self.temperature_a(t)) * c_host_a +
            self.s0 * self.in_a_lb(t) - c_nutr_a * self.out_a(t),
            0 if c_host_b < 0 else self.new_host.per_cell_growth_rate(
                c_nutr_b, self.temperature_b(t)) * c_host_b +
            self.in_nh(t) * c_host_a -
            self.new_phage.infection_rate(c_host_b, c_pnn) -
            self.original_phage.infection_rate(c_host_b, c_poo) -
            self.new_host.death_rate * c_host_b - self.out_b(t) * c_host_b,
            self.s0 * self.in_b_lb(t) +
            c_nutr_a * self.in_nh(t) if c_nutr_b < 0 else
            -self.new_host.yield_coeff * self.new_host.per_cell_growth_rate(
                c_nutr_b, self.temperature_b(t)) *
            (c_host_b + c_inf_poo + c_inf_pnn) + self.s0 * self.in_b_lb(t) +
            c_nutr_a * self.in_nh(t) - c_nutr_b * self.out_b(t),
            self.original_phage.infection_rate(c_host_b, c_poo)
            if c_inf_poo < 0 else
            self.original_phage.infection_rate(c_host_b, c_poo) - y -
            self.out_b(t) * c_inf_poo,
            self.new_phage.infection_rate(c_host_b, c_pnn) if c_inf_pnn < 0
            else self.new_phage.infection_rate(c_host_b, c_pnn) - y -
            self.out_b(t) * c_inf_pnn,
            0 if c_poo < 0 else self.original_phage.burst_size * y -
            self.original_phage.infection_rate(c_host_b, c_poo) -
            self.out_b(t) * c_poo - self.original_phage.death_rate * c_poo +
            self.in_lib(t) * (1 - self.R_pnn),
            0 if c_pnn < 0 else self.new_phage.burst_size * y -
            self.new_phage.infection_rate(c_host_b, c_pnn) -
            self.out_b(t) * c_pnn - self.new_phage.death_rate * c_pnn +
            self.in_lib(t) * self.R_pnn
        ])

    def initial_conditions(self, t):
        return array([
            self.new_host.c0, self.s0, 10**9, self.s0 * 5 * 10**3, 0.0, 0.0,
            self.original_phage.c0, self.new_phage.c0
        ])
def test_phage_finder(phage_finder, input, refseq, name):
    result = Phage(input, phage_finder)
    assert refseq == result.refseq
    assert name == result.name
    def __init__(self):
        # simulation time and resolution of samples
        self.dt = 0.1
        self.xs = np.linspace(0, 120, 120 / self.dt)
        self.fluxA = 0.0
        self.fluxB = 0.0

        self.new_host = Host(
            c0=10**5,
            g_max=0.036,  # lit: 0.012
            yield_coeff=0.000000000001,
            half_sat=0.00000125,
            death_rate=0.001,
            t_dep=self.temperature_dependency_new_host,
        )
        self.original_phage = Phage(
            c0=10**9,
            adsorption_rate=0.000000000001,
            burst_size=100,
            death_rate=0.00272,
        )
        self.new_phage = Phage(
            c0=10**6,
            adsorption_rate=0.0000000001,
            burst_size=100,
            death_rate=0.00272,
        )

        self.s0 = 0.0000025  # stock concentration of nutrient (g/mL) #0.0000025
        self.R_pnn = 1 / 1000  # fraction of new phage in library

        self.myinterpolator = scipy.interpolate.interp1d(
            np.array([0 - 1, 0]),  # X
            np.array([0, 1]).T,  # Y
            kind="linear",
            bounds_error=False,
            fill_value=0)

        self.d = 13.3  # lysis time delay
        self.dd = 13
        # dt *=10
        self.q_h_inf_poo = deque([0])
        self.q_h_inf_pnn = deque([0])
        for i in range(100):
            self.q_h_inf_poo.append(0)
            self.q_h_inf_pnn.append(0)

        data = ddeint(self.model,
                      self.initial_conditions,
                      self.xs,
                      fargs=(self.d, ))

        plt.figure(figsize=(16, 16))

        plt.subplot(3, 3, 1)
        plt.plot(self.xs, [data[t][0] for t in range(len(self.xs))],
                 label="reactor A")
        plt.xlabel('time [min]')
        plt.ylabel('concentration [#bacteria/mL]')
        plt.title('Host Concentration A over Time')
        plt.legend()

        plt.subplot(3, 3, 2)
        plt.plot(self.xs, [data[t][1] for t in range(len(self.xs))],
                 label="reactor A")
        plt.xlabel('time [min]')
        plt.ylabel('concentration [g/mL]')
        plt.title('Nutrient A over Time')
        plt.legend()

        plt.subplot(3, 3, 3)
        plt.plot(self.xs, [data[t][2] for t in range(len(self.xs))])
        plt.xlabel('time [min]')
        plt.ylabel('concentration [#bacteria/mL]')
        plt.title('Host Concentration B over Time')

        plt.subplot(3, 3, 4)
        plt.plot(self.xs, [data[t][3] for t in range(len(self.xs))])
        plt.xlabel('time [min]')
        plt.ylabel('concentration [g/mL]')
        plt.title('Nutrient B over Time')

        plt.subplot(3, 3, 5)
        plt.plot(self.xs, [data[t][4] for t in range(len(self.xs))])
        plt.xlabel('time [min]')
        plt.ylabel('concentration [#bacteria/mL]')
        plt.title('Host infected OO over Time')

        plt.subplot(3, 3, 6)
        plt.plot(self.xs, [data[t][5] for t in range(len(self.xs))])
        plt.xlabel('time [min]')
        plt.ylabel('concentration [#bacteria/mL]')
        plt.title('Host infected NN over Time')

        plt.subplot(3, 3, 8)
        plt.plot(self.xs, [data[t][6] for t in range(len(self.xs))])
        plt.xlabel('time [min]')
        plt.ylabel('concentration [#phage/mL]')
        plt.title('Phage OO over Time')

        plt.subplot(3, 3, 9)
        plt.plot(self.xs, [data[t][7] for t in range(len(self.xs))])
        plt.xlabel('time [min]')
        plt.ylabel('concentration [#phage/mL]')
        plt.title('Phage NN over Time')

        plt.subplot(3, 3, 7)
        plt.plot(self.xs, [data[t][6] for t in range(len(self.xs))],
                 label="original")
        plt.plot(self.xs, [data[t][7] for t in range(len(self.xs))],
                 label="new")
        plt.xlabel('time [min]')
        plt.ylabel('concentration [#phage/mL]')
        plt.title('Phage over Time')
        plt.legend()

        plt.subplots_adjust(wspace=0.4, hspace=0.4)
        plt.show()
sys.path.append('/home3/redwards/bioinformatics/phage_host')
sys.path.append('/home3/redwards/bioinformatics/Modules')
from phage import Phage
import re
import os
import taxon

''' Code to add all the phage hosts taxonomic heirarchy to the phage host files'''

wanted = ['species', 'genus', 'family', 'order', 'class', 'phylum', 'superkingdom']



manual = {'Acinetobacter genomosp.' : '471', 'Actinobacillus actinomycetemcomitans' : '714', 'alpha proteobacterium' : '34025', 'Bacillus clarkii' : '79879', 'Brevibacterium flavum' : '92706', 'Celeribacter sp.' : '875171', 'Escherichia sp.' : '237777', 'Geobacillus sp.' : '340407', 'Gordonia rubropertincta' : '36822', 'Iodobacter sp.' : '641420', 'Listeria sp.' : '592375', 'Marinomonas sp.' : '127794', 'Methanobacterium thermoautotrophicum' : '145262', 'methicillin-resistant Staphylococcus' : '1280', 'Nitrincola sp.' : '459834', 'Persicivirga sp.' : '859306', 'Salisaeta sp.' : '1392396', 'Sulfitobacter sp.' : '191468'}

phage = Phage()
host = phage.phageHost()

taxa = taxon.readNodes()
names,blastname,genbankname,synonym = taxon.extendedNames()
divs = taxon.readDivisions()


name2id = {names[x].name:x for x in names}
name2id.update({blastname[x].name:x for x in blastname})
name2id.update({genbankname[x].name:x for x in genbankname})
name2id.update({synonym[x].name:x for x in synonym})

for id in host:
    if host[id] in manual:
        i = manual[host[id]]
#!/usr/bin/python

'''Read the kmer counts from a series of files and plot a PCA'''

## start with reading the files and adding the kmer counts to the organism names
import sys
sys.path.append('/home3/redwards/bioinformatics/phage_host')
from phage import Phage
import re
import os

phage = Phage()
## we only choose those hosts with 5 phages that infect them.
#host = phage.phageHost()
bacteria = phage.completeBacteria()


try:
    dir  = sys.argv[1]
    outf = sys.argv[2]
    merWanted = sys.argv[3]
except:
    sys.exit(sys.argv[0] + " <directory of kmer counts> <file to write output table to> <kmer size>")

count={}
allkmers={}
organismId={}
for file in os.listdir(dir):
    # our file names look kmers/165.kmers/NC_008025.1.3kmer.tsv
    match = re.match('(.*)\.\d+\.(\d+)kmer.tsv', file)
    if match ==None:
Exemple #19
0
    'Celeribacter sp.': '875171',
    'Escherichia sp.': '237777',
    'Geobacillus sp.': '340407',
    'Gordonia rubropertincta': '36822',
    'Iodobacter sp.': '641420',
    'Listeria sp.': '592375',
    'Marinomonas sp.': '127794',
    'Methanobacterium thermoautotrophicum': '145262',
    'methicillin-resistant Staphylococcus': '1280',
    'Nitrincola sp.': '459834',
    'Persicivirga sp.': '859306',
    'Salisaeta sp.': '1392396',
    'Sulfitobacter sp.': '191468'
}

phage = Phage()
host = phage.phageHost()

taxa = taxon.readNodes()
names, blastname, genbankname, synonym = taxon.extendedNames()
divs = taxon.readDivisions()

name2id = {names[x].name: x for x in names}
name2id.update({blastname[x].name: x for x in blastname})
name2id.update({genbankname[x].name: x for x in genbankname})
name2id.update({synonym[x].name: x for x in synonym})

for id in host:
    if host[id] in manual:
        i = manual[host[id]]
    else:
Exemple #20
0
    else:
        return np.exp(-0.5 *
                      (np.square(x - mu) / sigma_r))  # gaussian r: ~(39.7, 10)


new_host = Host(
    c0=10**5,
    g_max=0.036,  #lit: 0.012
    yield_coeff=0.000000000001,
    half_sat=0.00000125,
    death_rate=0.001,
    t_dep=temperature_dependency_new_host,
)
original_phage = Phage(
    c0=10**9,
    adsorption_rate=0.000000000001,
    burst_size=100,
    death_rate=0.00272,
)
new_phage = Phage(
    c0=10**6,
    adsorption_rate=0.0000000001,
    burst_size=100,
    death_rate=0.00272,
)

s0 = 0.0000025  #stock concentration of nutrient (g/mL) #0.0000025
R_pnn = 1 / 1000  # fraction of new phage in library


def c_nutr_b(tt, c_host_b, c_inf_poo, c_inf_pnn, c_nutr_a):
    return -new_host.yield_coeff * new_host.per_cell_growth_rate(
Exemple #21
0
'''
For blastn searches we are going to calculate the percent coverage of the phage genome and score the longest coverage as the best hit. It doesn't matter where the hits are on the bacterial genome.

We are going to use a cutoff of 0.001 E value

'''

import sys, os, re
from phage import Phage
phage = Phage()

try:
    blastf = sys.argv[1]
except:
    sys.exit(sys.argv[0] + "< blast file>")

# read the fasta file of phages to get the lengths
lens = phage.phageSequenceLengths()
sys.stderr.write("Found " + str(len(lens)) + " sequences\n")
hits = {i: {} for i in lens}

with open(blastf, 'r') as fin:
    for l in fin:
        p = l.strip().split("\t")
        e = float(p[10])
        if e > 0.001:
            continue
        m = re.findall('(NC_\d+)', p[0])
        if m == []:
            sys.stderr.write("WARNING: No phage found in " + p[0] + "\n")
            continue
def get_phagename_and_refseq(row, phage_finder):
    phage = Phage(row, phage_finder)
    return phage.name, phage.refseq
'''
Calculate the distance between two codon usages. 
We have two files, the first with just the phages and the second
with their hosts. Then we need to calculate which of the hosts is
closest
'''

import os
import sys
sys.path.append('/home3/redwards/bioinformatics/Modules')
import numpy as np
import scipy
from phage import Phage

phage = Phage()
bctG = set(phage.completeBacteriaIDs())
phgG = set(phage.phageIDs())



remove_ambiguous = True # do we want ambiguous bases or not
codons = set([
    'AAA', 'AAC', 'AAG', 'AAT', 'ACA', 'ACC', 'ACG', 'ACT', 
    'AGA', 'AGC', 'AGG', 'AGT', 'ATA', 'ATC', 'ATG', 'ATT', 
    'CAA', 'CAC', 'CAG', 'CAT', 'CCA', 'CCC', 'CCG', 'CCT',
    'CGA', 'CGC', 'CGG', 'CGT', 'CTA', 'CTC', 'CTG', 'CTT', 
    'GAA', 'GAC', 'GAG', 'GAT', 'GCA', 'GCC', 'GCG', 'GCT',
    'GGA', 'GGC', 'GGG', 'GGT', 'GTA', 'GTC', 'GTG', 'GTT', 
    'TAA', 'TAC', 'TAG', 'TAT', 'TCA', 'TCC', 'TCG', 'TCT',
    'TGA', 'TGC', 'TGG', 'TGT', 'TTA', 'TTC', 'TTG', 'TTT'
])
'''
Generate a list of all pairwise comparisons of the exact matches
'''

import sys
import re
from phage import Phage
phage = Phage()

try:
    f = sys.argv[1]
except:
    sys.exit("Exact match file, probably phage.kmers.bacteria.rc.txt")

bg = phage.completeBacteriaIDs()
pg = phage.phageIDs()

matches = {}
for p in pg:
    matches[p] = {}
    for b in bg:
        matches[p][b] = 0

with open(f, 'r') as fin:
    for l in fin:
        p = l.strip().split("\t")
        m = re.findall('NC_\d+', l)
        if len(m) != 2:
            #sys.stderr.write("Error parsing two NC ids from " + l)
            continue
'''
For blastn searches we are going to calculate the percent coverage of the phage genome and score the longest coverage as the best hit. It doesn't matter where the hits are on the bacterial genome.

We are going to use a cutoff of 0.001 E value

'''

import sys,os,re
from phage import Phage
phage=Phage()

try:
    blastf=sys.argv[1]
except:
    sys.exit(sys.argv[0] + "< blast file>")

# read the fasta file of phages to get the lengths
lens=phage.phageSequenceLengths()
sys.stderr.write("Found " + str(len(lens)) + " sequences\n")
hits={i:{} for i in lens}


with open(blastf, 'r') as fin:
    for l in fin:
        p=l.strip().split("\t")
        e=float(p[10])
        if e > 0.001:
            continue
        m=re.findall('(NC_\d+)', p[0])
        if m == []:
            sys.stderr.write("WARNING: No phage found in " + p[0] + "\n")