def create_linePlot_data(accession, path, windows): genome = get_genome(accession) print(f"Length of genome: {len(genome)}") for x in range(len(windows)): fractions, bases = calc_fractions(genome, windows[x]) write_plotData(accession, path, fractions, bases, windows[x]) print(f"Completed window {windows[x]}") print()
def get_gene_data(accession, path): genomeLength = len(get_genome(accession)) text = get_gff(accession) data = extract_genes(accession, text, genomeLength) print("Extracted gene data") write_karyotype(accession, path, data, genomeLength) print("Created karyotype file") write_band_labels(accession, path, data, genomeLength) print("Created gene label file\n")
strandOuter += f"mt1 {centre} {centre} {D[0]}\n" else: strandInner += f"mt1 {centre} {centre} {D[0]}\n" else: if D[4] == "+": strandOuter += f"mt1 {D[1]} {D[2]} {D[0]}\n" else: strandInner += f"mt1 {D[1]} {D[2]} {D[0]}\n" write_file(f"{path}karyotype.{accession}.+.band_labels.txt", strandOuter) write_file(f"{path}karyotype.{accession}.-.band_labels.txt", strandInner) def get_gene_data(accession, path): genomeLength = len(get_genome(accession)) text = get_gff(accession) data = extract_genes(accession, text, genomeLength) print("Extracted gene data") write_karyotype(accession, path, data, genomeLength) print("Created karyotype file") write_band_labels(accession, path, data, genomeLength) print("Created gene label file\n") if __name__ == "__main__": accession = "NC_012920.1" path = "../data/temp/" gnome = get_genome(accession) genomeLength = len(gnome) gff = extract_genes(accession, get_gff(accession), genomeLength)
from mtdna_utilities import get_genome from read_wiki_table import * gnome = get_genome("NC_012920.1") glist = {g[0]:[int(g[1]),int(g[2]),g[3]] for g in genes} gorder = [g[0] for g in genes] for g in gorder: print(g) gene = gnome[(glist[g][0]-1):(glist[g][1])] print(gene[0:30])
stroke_color = black """ conf_plots += "</plot>\n" conf_plots += "</plots>\n" return conf_plots #accession = "NC_012920.1" # Human reference sequence #accession = "NC_005089.1" # Mouse reference sequence accession = "NC_027264.1" # Baker's yeast (Saccharomyces cerevisiae) reference sequence #accession = "NC_001224.1" # Another yeast path = "../data/temp/" ideoDims = [0.6, 0.075] thickness = ideoDims[1]/ideoDims[0] genomeLength = len(get_genome(accession)) conf_ideogram = create_ideogram(ideoDims[0], ideoDims[1]) conf_image = create_image("../images/circos", accession) conf_plots = "" conf_ticks = "" if genomeLength < 30000: conf_ticks = create_ticks(f"{1-thickness}r+10p", "out", 1) else: conf_ticks = create_ticks(f"{1-thickness}r+10p", "out", 5) get_gene_data(accession, path) plots = [["text", f"{path}karyotype.{accession}.+.band_labels.txt", 1, 1.25, "yes"], ["text", f"{path}karyotype.{accession}.-.band_labels.txt", 0.78-(2*thickness), 0.98-(2*thickness), "no"], ["tile", f"{path}karyotype.{accession}.-.txt", 0.98-(2*thickness), 0.98-thickness]]
plt.ylabel("Proportion of base", fontsize=30) plt.tick_params(axis='both', which='major', labelsize=20) plt.ylim(0, yRange) plt.show() def find_max(genomes, windows): '''Returns all of the fractions and maximum values for each window''' fractions = [[calc_fractions(G, W) for G in genomes] for W in windows] maxValues = [[[max(B) for B in fractions[w][g]] for g in range(len(fractions[w]))] for w in range(len(fractions))] maxValues = [[max(G) for G in maxValues[w]] for w in range(len(maxValues))] maxValues = [max(W) for W in maxValues] return fractions, maxValues accession = "NC_012920.1" windows = [10, 100, 500] numShuffles = 3 bases = ["A", "C", "T", "G"] genomes = [get_genome(accession)] for x in range(numShuffles): genomes.append(generate_genome(genomes[0])) fractions, maxValues = find_max(genomes, windows) for w, W in enumerate(windows): for g in range(len(genomes)): plot(W, fractions[w][g], bases, maxValues[w])