def MPRT(id_list): ''' Finds locations of the N-glycosylation motif \n Input: List of Uniprot IDs \n Output: Writes ID followed by list of locations to outputs/mprt.txt ''' # get fasta file from uniprot output_list = [] fasta_dict = dict() for prot_id in id_list: fasta_dict.update(resources.get_uniprot(prot_id)) protein = fasta_dict[prot_id] # find locations of motif locations = [] i = 0 while i < len(protein) - 2: #if i is start of motif if (protein[i] == 'N' and \ protein[i+1] != 'P' and \ (protein[i+2] == 'S' or protein[i+2] == 'T') and \ protein[i+3] != 'P'): locations.append(i + 1) i+=1 #only append if there are motifs if locations != []: output_list.append(prot_id) output_list.append(resources.list_to_str(locations)) # write to file output_path = 'outputs/mprt.txt' resources.write_file(output_path, output_list)
def CONS(dna_dict): ''' Input: Fasta Dictionary with ID as key and DNA sequence as value \n Output: Writes a consensus string and profile matrix for the collection to outputs/cons.txt ''' matrix = [] for dna_seq in dna_dict.values(): matrix.append(dna_seq) length = len(matrix[0]) #for each position add all the nts A, C, G, T = [], [], [], [] consensus = '' j = 0 while j < length: #create index j in each list A.append(0) C.append(0) G.append(0) T.append(0) for dna_seq in matrix: if dna_seq[j] == 'A': A[j] += 1 elif dna_seq[j] == 'C': C[j] += 1 elif dna_seq[j] == 'G': G[j] += 1 elif dna_seq[j] == 'T': T[j] += 1 #consensus string compareDict = {'A': A[j], 'C': C[j], 'G': G[j], 'T': T[j]} consensus += max(compareDict, key=compareDict.get) j += 1 output_list = [consensus, "A: " + resources.list_to_str(A), "C: " + resources.list_to_str(C), \ "G: " + resources.list_to_str(G), "T: " + resources.list_to_str(T)] output_path = 'outputs/cons.txt' resources.write_file(output_path, output_list)
def PERM(n): ''' Input: length of permutations \n Output: Writes list of permutations to 'outputs/perm.txt' ''' num_list = [i+1 for i in range(n)] perm_list = permutations(num_list) formatted_list = [resources.list_to_str(perm).strip() for perm in perm_list] formatted_list.insert(0, str(len(perm_list))) output_path = 'outputs/perm.txt' resources.write_file(output_path, formatted_list)
def SUBS(s, t): ''' Input: 2 DNA strings s,t Output: Locations of t as a substring of s (space separated list) ''' loc = 0 locs = [s.find(t) + 1] while True: loc = s.find(t, locs[len(locs) - 1]) if loc == -1: break locs.append(loc + 1) locs_str = resources.list_to_str(locs) return (locs_str)
def REVP(dna_dict): ''' Input: Dict with description as key and dna sequence as value \n Output: Writes list of position and length of all reverse palindromes of length 4-12 to outputs/revp.txt ''' #get dna sequence dna_seq = resources.fasta_to_string(dna_dict)[1] #get compliment compliment = '' for nt in dna_seq: compliment += resources.compliments[nt] revp = [] #for each starting location in dna sequence for i in range(len(dna_seq)): #test each length from 4 to 12 for length in range(3,12): #if length is longer than dna seq skip if i + length + 1 > len(dna_seq): break pal = False #test if that length is a palindrome for pal_index in range(length): if dna_seq[i+pal_index] == compliment[i+length-pal_index]: pal = True else: pal = False break if pal: revp.append([i + 1, length + 1]) #convert to string revp = [resources.list_to_str(pair).strip() for pair in revp] output_path = 'outputs/revp.txt' resources.write_file(output_path, revp)
def GRPH(dna_dict): ''' Input: Fasta Dictionary with ID as key and DNA sequence as value \n Output: Writes the adjacency list for suffix 3 to outputs/grph.txt ''' k = 3 overlap = [] #for each dna seq, check last 3 nt with first 3 nt of each other dna seq for dna in dna_dict: temp = copy.copy(dna_dict) temp.pop(dna) for other_dna in temp: suffix = dna_dict[dna][len(dna_dict[dna]) - k:] prefix = temp[other_dna][:k] if suffix == prefix: overlap.append([dna, other_dna]) overlap_formatted = [] for pair in overlap: overlap_formatted.append(resources.list_to_str(pair)) output_path = 'outputs/grph.txt' resources.write_file(output_path, overlap_formatted)
def old(): # increasing increasing_possibilities = [permutation[0]] for num in permutation: for possibility in copy.copy(increasing_possibilities): # append num to possibility and check if it works # if it does, create a new possiblity with that list possible = True temp = list(copy.copy(possibility)) temp.append(num) for i in range(len(temp) - 1): if temp[i] > temp[i + 1]: possible = False break if possible: previous_possibilities = copy.copy(increasing_possibilities) increasing_possibilities.append(temp) # if there is another possibility of same length with lower/higher last number, remove higher possibility for possibility in previous_possibilities: possibility = list(possibility) print(possibility, temp) if len(possibility) != len(temp): continue temp1 = list(possibility).pop() temp2 = temp.pop() if temp1 == temp2: if int(possibility[len(possibility) - 1]) > int( temp[len(temp) - 1]): increasing_possibilities.remove(possibility) else: increasing_possibilities.remove(temp) for possibility in increasing_possibilities: if len(possibility) != 1: continue if int(possibility[0]) > int(num): increasing_possibilities.remove(possibility) increasing_possibilities.append([num]) print(increasing_possibilities) # find longest possibility increasing_longest = [] increasing_length = 0 for possibility in increasing_possibilities: if len(possibility) > increasing_length: increasing_longest = possibility increasing_length = len(possibility) # decreasing decreasing_possibilities = [] for num in permutation: for possibility in copy.copy(decreasing_possibilities): # append num to possibility and check if it works # if it does, create a new possiblity with that list possible = True temp = copy.copy(possibility) temp.append(num) for i in range(len(temp) - 1): if temp[i] < temp[i + 1]: possible = False break if possible: decreasing_possibilities.append(temp) decreasing_possibilities.append([num]) # find longest possibility decreasing_longest = [] decreasing_length = 0 for possibility in decreasing_possibilities: if len(possibility) > decreasing_length: decreasing_longest = possibility decreasing_length = len(possibility) increasing_str = resources.list_to_str(increasing_longest) decreasing_str = resources.list_to_str(decreasing_longest) output_path = 'outputs/lgis.txt' resources.write_file(output_path, [increasing_str, decreasing_str])
def LGIS(n, permutation): ''' Input: length of permutation, permutation of pi \n Output: longest increasing subsequence, longest decreasing subsequence ''' n = len(permutation) # LIS traces = [None] * n for i in reversed(range(0, n)): if i == n - 1: traces[i] = [permutation[i]] continue longest_possibile_trace = 0 longest_possibile_trace_length = 0 # compare with all for j in range(i + 1, n): if permutation[i] < permutation[j]: #check if <= or < if len(traces[j]) > longest_possibile_trace_length: longest_possibile_trace = j longest_possibile_trace_length = len(traces[j]) if longest_possibile_trace == 0: traces[i] = [permutation[i]] continue # print(i, longest_possibile_trace) traces[i] = copy.copy(traces[longest_possibile_trace]) traces[i].insert(0, permutation[i]) lis = [] for trace in traces: if len(trace) > len(lis): lis = trace # LDS traces = [None] * n for i in range(0, n): if i == 0: traces[i] = [permutation[i]] continue longest_possibile_trace = 0 longest_possibile_trace_length = 0 # compare with all for j in range(0, i): if permutation[i] < permutation[j]: #check if <= or < if len(traces[j]) > longest_possibile_trace_length: longest_possibile_trace = j longest_possibile_trace_length = len(traces[j]) if longest_possibile_trace == 0: traces[i] = [permutation[i]] continue traces[i] = copy.copy(traces[longest_possibile_trace]) traces[i].append(permutation[i]) lds = [] for trace in traces: if len(trace) > len(lds): lds = trace output_path = 'outputs/lgis.txt' output_list = [resources.list_to_str(i) for i in [lis, lds]] resources.write_file(output_path, output_list)