#Recurrence relation is defined as Fn = Fn-1 + Fn-2 where Fn-1 are the rabbits that were alive one month ago and Fn-2 # is not only the number ofrabbits that were alive two months ago, but also happens to be the number of offspring for the # n month. # What to do: #1. function must take n and k, where n is the month we're interested in and k is the number of offspring per pair of raabits. #2. Get the month and litter size from specified file #3. write the result from the how_many_bunnies functions into a new file from helperfunctions import write_to_file, make_output_filename, get_nums input_filename = "filename.txt" def how_many_bunnies(month, litter_size): total_buns = 0 if month == 1 or month == 2: total_buns += 1 else: total_buns += how_many_bunnies( month - 1, litter_size) + how_many_bunnies( month - 2, litter_size) * litter_size return total_buns nums = get_nums(input_filename) bunnies = how_many_bunnies(nums[0], nums[1]) write_to_file(input_filename, str(bunnies))
#What to do: # 1. get the DNA string from the file # 2. make the string into a Seq object with Biopython and specify RNA alphabet # 3. use translate() from BioPython # 4. write protein sequence to file from helperfunctions import write_to_file, make_output_filename, get_string from Bio.Seq import Seq from Bio.Alphabet import IUPAC input_filename = "filename.txt" rna_string = get_string(input_filename) rna_sequence = Seq(rna_string, IUPAC.unambiguous_rna) protein_sequence = rna_sequence.translate(to_stop=True) write_to_file(input_filename, protein_sequence)
from helperfunctions import write_to_file, make_output_filename, make_iterator from Bio import SeqIO from Bio.SeqUtils import GC input_filename = "filename.txt" format_type = "fasta" def max_GC_content(input_file, format_type): max_GC_content = 0.00 max_GC_content_id = "" generator = make_iterator(input_filename, format_type) for sequence in generator: GC_content = GC( sequence.seq ) #BioPython has a built in way to calculate the GC content of a sequence if GC_content > max_GC_content: max_GC_content = GC_content max_GC_content_id = sequence.id winner = max_GC_content_id + "\n" + str(max_GC_content) return winner answer = max_GC_content(input_filename) write_to_file(input_filename, answer)
#this chunk keeps tally of the nucleotides at each position by adding to profile as it tranverses each sequence iterator = make_iterator(input_filename, "fasta") for sequence in iterator: length = len(sequence.seq) if profile == []: profile = make_matrix(length, 4) for i in range(0, length): nucleotide = sequence.seq[i] profile[i][nucleotides[nucleotide]] += 1 consensus_sequence = "" #this chunk finds what nucleotide had the most instances per position and adds it to the consensus sequence for i in range(0, length): most_nucleotides = max(profile[i]) nucleotide_index = profile[i].index(most_nucleotides) for key in nucleotides: if nucleotides[key] == nucleotide_index: consensus_sequence += key #this chunk changes profile to a nice string that includes the consensus sequence and the final profile final_profile = consensus_sequence + "\n" for nucleotide in ["A", "C", "G", "T"]: row = [] for i in range(0, length): row.append(str(profile[i][nucleotides[nucleotide]])) final_row = nucleotide + ": " + " ".join(row) + "\n" final_profile += final_row write_to_file(input_filename, final_profile)
#What we're going to do #1. get the DNA string #2. switch each nucleotide to its complement but in lowercase, so it does not affect the conversion of other nucleotides #3. make the strand uppercase, and then reverse it #4. write the reverse complement to a new file #import the functions from helperfunctions.py from helperfunctions import get_string, write_to_file, make_output_filename #setting the filename input_filename = "filename.txt" #get the DNA string from the file DNA_string = get_string(input_filename) # this is the easiest way but man is it ugly rc1 = DNA_string.replace("A","t") rc2 = rc1.replace("C","g") rc3 = rc2.replace("G","c") rc4 = rc3.replace("T","a") reverse_complement = rc4.upper()[::-1] #note that the reversal part of this step comes straight from the stack overflow answer #here: https://stackoverflow.com/questions/931092/reverse-a-string-in-python I didn't #know the step part #write reverse complement to output file write_to_file(input_filename, reverse_complement)
#What to do: # 1. Get the strings from file # 2. use find() on the DNA string until it returns -1, everytime one is found, change the first base to * # 3. keep track of the indexes # 4. write indexes to file from helperfunctions import write_to_file, get_strings, make_output_filename input_filename = "filename.txt" dna_strings = get_strings(input_filename) dna_string = dna_strings[0].strip() substring = dna_strings[1].strip() substring_index = dna_string.find(substring) indexes = [] while (substring_index != -1): indexes.append(str(substring_index + 1)) dna_string = dna_string[:substring_index] + "*" + dna_string[ substring_index + 1:] substring_index = dna_string.find(substring) index_string = " ".join(indexes) write_to_file(input_filename, index_string) #this was tricky because the answer was in 1-based numbering and I goofed.
#What to do: #1. make a little hammy distance function to compare two DNA sequences #2. Extract both sequences from a single file #3. write total point mutations to file from helperfunctions import get_strings, write_to_file, make_output_filename input_filename = "filename.txt" def hamming_distance(DNA_seq1, DNA_seq2): Dh = 0 for i in range(0, len(DNA_seq1)): if DNA_seq1[i] != DNA_seq2[i]: Dh += 1 return Dh sequences = get_strings(input_filename) point_mutations = hamming_distance(sequences[0], sequences[1]) write_to_file(input_filename, str(point_mutations))
# heterozygote # What to do: # make a function for this # 1. get the total number of organisms, and the total - 1 for the second organism # 2. calcultate the probability it will be dominant homozygote or heterozygote # 3. write this probability into a file from helperfunctions import write_to_file, get_nums, make_output_file input_filename = "filename.txt" def prob_dominant_allele(k, m, n): #k homozygous dominant, m heterozygous, n homozygous recessive total_organisms = k + m + n prob_kk = (k / total_organisms * (k - 1) / (total_organisms - 1)) prob_km = (k / total_organisms * m / (total_organisms - 1)) * 2 prob_kn = (k / total_organisms * n / (total_organisms - 1)) * 2 prob_mm = (m / total_organisms * (m - 1) / (total_organisms - 1) * .75) prob_mn = (m / total_organisms * n / (total_organisms - 1) * .5) * 2 total_prob = prob_kk + prob_km + prob_kn + prob_mm + prob_mn return total_prob organism_nums = get_nums(input_filename) probability = prob_dominant_allele(organism_nums[0], organism_nums[1], organism_nums[2]) write_to_file(input_filename, str(probability))
#What I have to do #1. Open file and read DNA string #2. Replace every T nucleotide with a U #3. write new RNA string into a file from helperfunctions import get_string, write_to_file, make_output_filename #setting filenames input_filename = "filename.txt" #open file and prepare DNA string DNA_string = get_string(input_filename) #replace every 'T' with 'U' RNA_string = DNA_string.replace("T", "U") #open output file and write RNA string to it write_to_file(input_filename, RNA_string)
#What I have to do #1. Open the file with the DNA string #2. Read from the file #3. Keep tally of nucleotides #4. Create a new file and write the tally in the order A C G and T from helperfunctions import get_string, write_to_file, make_output_filename #setting file names input_filename = "filename.txt" #opening file and setting up DNA string for counting DNA_string = get_string(input_filename) #make a tally of nucleotides nucleotide_tally = (str(DNA_string.count("A")) + " " + str(DNA_string.count("C")) + " " + str(DNA_string.count("G")) + " " + str(DNA_string.count("T"))) #write tally to new file write_to_file(input_filename, nucleotide_tally)