예제 #1
0
def create_linePlot_data(accession, path, windows):
    genome = get_genome(accession)
    print(f"Length of genome: {len(genome)}")
    for x in range(len(windows)):
        fractions, bases = calc_fractions(genome, windows[x])
        write_plotData(accession, path, fractions, bases, windows[x])
        print(f"Completed window {windows[x]}")
    print()
예제 #2
0
def get_gene_data(accession, path):
    genomeLength = len(get_genome(accession))
    text = get_gff(accession)

    data = extract_genes(accession, text, genomeLength)
    print("Extracted gene data")
    write_karyotype(accession, path, data, genomeLength)
    print("Created karyotype file")
    write_band_labels(accession, path, data, genomeLength)
    print("Created gene label file\n")
예제 #3
0
                    strandOuter += f"mt1 {centre} {centre} {D[0]}\n"
                else:
                    strandInner += f"mt1 {centre} {centre} {D[0]}\n"
            else:
                if D[4] == "+":
                    strandOuter += f"mt1 {D[1]} {D[2]} {D[0]}\n"
                else:
                    strandInner += f"mt1 {D[1]} {D[2]} {D[0]}\n"
    write_file(f"{path}karyotype.{accession}.+.band_labels.txt", strandOuter)
    write_file(f"{path}karyotype.{accession}.-.band_labels.txt", strandInner)


def get_gene_data(accession, path):
    genomeLength = len(get_genome(accession))
    text = get_gff(accession)

    data = extract_genes(accession, text, genomeLength)
    print("Extracted gene data")
    write_karyotype(accession, path, data, genomeLength)
    print("Created karyotype file")
    write_band_labels(accession, path, data, genomeLength)
    print("Created gene label file\n")


if __name__ == "__main__":
    accession = "NC_012920.1"
    path = "../data/temp/"
    gnome = get_genome(accession)
    genomeLength = len(gnome)
    gff = extract_genes(accession, get_gff(accession), genomeLength)
예제 #4
0
from mtdna_utilities import get_genome
from read_wiki_table import *

gnome = get_genome("NC_012920.1")
glist = {g[0]:[int(g[1]),int(g[2]),g[3]] for g in genes}
gorder = [g[0] for g in genes]

for g in gorder:
    print(g)
    gene = gnome[(glist[g][0]-1):(glist[g][1])]
    print(gene[0:30])
예제 #5
0
            stroke_color     = black
            """
        conf_plots += "</plot>\n"
    conf_plots += "</plots>\n"
    return conf_plots


#accession = "NC_012920.1" # Human reference sequence
#accession = "NC_005089.1" # Mouse reference sequence
accession = "NC_027264.1" # Baker's yeast (Saccharomyces cerevisiae) reference sequence
#accession = "NC_001224.1" # Another yeast

path = "../data/temp/"
ideoDims = [0.6, 0.075]
thickness = ideoDims[1]/ideoDims[0]
genomeLength = len(get_genome(accession))

conf_ideogram = create_ideogram(ideoDims[0], ideoDims[1])
conf_image = create_image("../images/circos", accession)
conf_plots = ""
conf_ticks = ""

if genomeLength < 30000:
    conf_ticks = create_ticks(f"{1-thickness}r+10p", "out", 1)
else:
    conf_ticks = create_ticks(f"{1-thickness}r+10p", "out", 5)

get_gene_data(accession, path)
plots = [["text", f"{path}karyotype.{accession}.+.band_labels.txt", 1, 1.25, "yes"],
         ["text", f"{path}karyotype.{accession}.-.band_labels.txt", 0.78-(2*thickness), 0.98-(2*thickness), "no"],
         ["tile", f"{path}karyotype.{accession}.-.txt", 0.98-(2*thickness), 0.98-thickness]]
예제 #6
0
    plt.ylabel("Proportion of base", fontsize=30)
    plt.tick_params(axis='both', which='major', labelsize=20)
    plt.ylim(0, yRange)
    plt.show()


def find_max(genomes, windows):
    '''Returns all of the fractions and maximum values for each window'''
    fractions = [[calc_fractions(G, W) for G in genomes] for W in windows]
    maxValues = [[[max(B) for B in fractions[w][g]]
                  for g in range(len(fractions[w]))]
                 for w in range(len(fractions))]
    maxValues = [[max(G) for G in maxValues[w]] for w in range(len(maxValues))]
    maxValues = [max(W) for W in maxValues]
    return fractions, maxValues


accession = "NC_012920.1"
windows = [10, 100, 500]
numShuffles = 3
bases = ["A", "C", "T", "G"]

genomes = [get_genome(accession)]
for x in range(numShuffles):
    genomes.append(generate_genome(genomes[0]))

fractions, maxValues = find_max(genomes, windows)
for w, W in enumerate(windows):
    for g in range(len(genomes)):
        plot(W, fractions[w][g], bases, maxValues[w])