#Recurrence relation is defined as Fn = Fn-1 + Fn-2 where Fn-1 are the rabbits that were alive one month ago and Fn-2
# is not only the number ofrabbits that were alive two months ago, but also happens to be the number of offspring for the
# n month.
# What to do:
#1. function must take n and k, where n is the month we're interested in and k is the number of offspring per pair of raabits.
#2. Get the month and litter size from specified file
#3. write the result from the how_many_bunnies functions into a new file

from helperfunctions import write_to_file, make_output_filename, get_nums

input_filename = "filename.txt"


def how_many_bunnies(month, litter_size):
    total_buns = 0
    if month == 1 or month == 2:
        total_buns += 1
    else:
        total_buns += how_many_bunnies(
            month - 1, litter_size) + how_many_bunnies(
                month - 2, litter_size) * litter_size
    return total_buns


nums = get_nums(input_filename)
bunnies = how_many_bunnies(nums[0], nums[1])

write_to_file(input_filename, str(bunnies))
Ejemplo n.º 2
0
#What to do:
# 1. get the DNA string from the file
# 2. make the string into a Seq object with Biopython and specify RNA alphabet
# 3. use translate() from BioPython
# 4. write protein sequence to file

from helperfunctions import write_to_file, make_output_filename, get_string

from Bio.Seq import Seq
from Bio.Alphabet import IUPAC

input_filename = "filename.txt"

rna_string = get_string(input_filename)
rna_sequence = Seq(rna_string, IUPAC.unambiguous_rna)
protein_sequence = rna_sequence.translate(to_stop=True)

write_to_file(input_filename, protein_sequence)
Ejemplo n.º 3
0
from helperfunctions import write_to_file, make_output_filename, make_iterator

from Bio import SeqIO
from Bio.SeqUtils import GC

input_filename = "filename.txt"
format_type = "fasta"


def max_GC_content(input_file, format_type):

    max_GC_content = 0.00
    max_GC_content_id = ""
    generator = make_iterator(input_filename, format_type)

    for sequence in generator:
        GC_content = GC(
            sequence.seq
        )  #BioPython has a built in way to calculate the GC content of a sequence
        if GC_content > max_GC_content:
            max_GC_content = GC_content
            max_GC_content_id = sequence.id

    winner = max_GC_content_id + "\n" + str(max_GC_content)
    return winner


answer = max_GC_content(input_filename)

write_to_file(input_filename, answer)
Ejemplo n.º 4
0
#this chunk keeps tally of the nucleotides at each position by adding to profile as it tranverses each sequence
iterator = make_iterator(input_filename, "fasta")
for sequence in iterator:
    length = len(sequence.seq)
    if profile == []:
        profile = make_matrix(length, 4)
    for i in range(0, length):
        nucleotide = sequence.seq[i]
        profile[i][nucleotides[nucleotide]] += 1

consensus_sequence = ""

#this chunk finds what nucleotide had the most instances per position and adds it to the consensus sequence
for i in range(0, length):
    most_nucleotides = max(profile[i])
    nucleotide_index = profile[i].index(most_nucleotides)
    for key in nucleotides:
        if nucleotides[key] == nucleotide_index:
            consensus_sequence += key

#this chunk changes profile to a nice string that includes the consensus sequence and the final profile
final_profile = consensus_sequence + "\n"
for nucleotide in ["A", "C", "G", "T"]:
    row = []
    for i in range(0, length):
        row.append(str(profile[i][nucleotides[nucleotide]]))
    final_row = nucleotide + ": " + " ".join(row) + "\n"
    final_profile += final_row

write_to_file(input_filename, final_profile)
#What we're going to do
#1. get the DNA string
#2. switch each nucleotide to its complement but in lowercase, so it does not affect the conversion of other nucleotides
#3. make the strand uppercase, and then reverse it
#4. write the reverse complement to a new file

#import the functions from helperfunctions.py
from helperfunctions import get_string, write_to_file, make_output_filename

#setting the filename
input_filename = "filename.txt"

#get the DNA string from the file
DNA_string = get_string(input_filename)

# this is the easiest way but man is it ugly
rc1 = DNA_string.replace("A","t")
rc2 = rc1.replace("C","g")
rc3 = rc2.replace("G","c")
rc4 = rc3.replace("T","a")
reverse_complement = rc4.upper()[::-1] #note that the reversal part of this step comes straight from the stack overflow answer
                                       #here: https://stackoverflow.com/questions/931092/reverse-a-string-in-python I didn't
                                       #know the step part

#write reverse complement to output file
write_to_file(input_filename, reverse_complement)
Ejemplo n.º 6
0
#What to do:
# 1. Get the strings from file
# 2. use find() on the DNA string until it returns -1, everytime one is found, change the first base to *
# 3. keep track of the indexes
# 4. write indexes to file

from helperfunctions import write_to_file, get_strings, make_output_filename

input_filename = "filename.txt"

dna_strings = get_strings(input_filename)
dna_string = dna_strings[0].strip()
substring = dna_strings[1].strip()

substring_index = dna_string.find(substring)
indexes = []

while (substring_index != -1):
    indexes.append(str(substring_index + 1))
    dna_string = dna_string[:substring_index] + "*" + dna_string[
        substring_index + 1:]
    substring_index = dna_string.find(substring)

index_string = " ".join(indexes)

write_to_file(input_filename, index_string)

#this was tricky because the answer was in 1-based numbering and I goofed.
Ejemplo n.º 7
0
#What to do:
#1. make a little hammy distance function to compare two DNA sequences
#2. Extract both sequences from a single file
#3. write total point mutations to file

from helperfunctions import get_strings, write_to_file, make_output_filename

input_filename = "filename.txt"


def hamming_distance(DNA_seq1, DNA_seq2):
    Dh = 0
    for i in range(0, len(DNA_seq1)):
        if DNA_seq1[i] != DNA_seq2[i]:
            Dh += 1
    return Dh


sequences = get_strings(input_filename)
point_mutations = hamming_distance(sequences[0], sequences[1])

write_to_file(input_filename, str(point_mutations))
Ejemplo n.º 8
0
# heterozygote

# What to do:
# make a function for this
# 1. get the total number of organisms, and the total - 1 for the second organism
# 2. calcultate the probability it will be dominant homozygote or heterozygote
# 3. write this probability into a file

from helperfunctions import write_to_file, get_nums, make_output_file

input_filename = "filename.txt"


def prob_dominant_allele(k, m, n):
    #k homozygous dominant, m heterozygous, n homozygous recessive
    total_organisms = k + m + n
    prob_kk = (k / total_organisms * (k - 1) / (total_organisms - 1))
    prob_km = (k / total_organisms * m / (total_organisms - 1)) * 2
    prob_kn = (k / total_organisms * n / (total_organisms - 1)) * 2
    prob_mm = (m / total_organisms * (m - 1) / (total_organisms - 1) * .75)
    prob_mn = (m / total_organisms * n / (total_organisms - 1) * .5) * 2
    total_prob = prob_kk + prob_km + prob_kn + prob_mm + prob_mn
    return total_prob


organism_nums = get_nums(input_filename)
probability = prob_dominant_allele(organism_nums[0], organism_nums[1],
                                   organism_nums[2])

write_to_file(input_filename, str(probability))
#What I have to do
#1. Open file and read DNA string
#2. Replace every T nucleotide with a U
#3. write new RNA string into a file

from helperfunctions import get_string, write_to_file, make_output_filename

#setting filenames
input_filename = "filename.txt"

#open file and prepare DNA string
DNA_string = get_string(input_filename)

#replace every 'T' with 'U'
RNA_string = DNA_string.replace("T", "U")

#open output file and write RNA string to it
write_to_file(input_filename, RNA_string)
Ejemplo n.º 10
0
#What I have to do
#1. Open the file with the DNA string
#2. Read from the file
#3. Keep tally of nucleotides
#4. Create a new file and write the tally in the order A C G and T

from helperfunctions import get_string, write_to_file, make_output_filename

#setting file names
input_filename = "filename.txt"

#opening file and setting up DNA string for counting
DNA_string = get_string(input_filename)

#make a tally of nucleotides
nucleotide_tally = (str(DNA_string.count("A")) + " " +
                    str(DNA_string.count("C")) + " " +
                    str(DNA_string.count("G")) + " " +
                    str(DNA_string.count("T")))

#write tally to new file
write_to_file(input_filename, nucleotide_tally)