List of sequence numbers and the sequence itself, both trimmed. """ return zip(*[(i,y) for i,x,y in zip(seq_numbers, orig_seq, mut_seq) if x != '-']) def number_sequence(seq): """ Numbers a sequence (as in, doesn't number the gaps). Arguments: seq -- Sequence to be numbered. Returns: List of numbers corresponding to each index of the MSA. """ numbers = [] count = 1 for x in seq: if x != '-': numbers+=[count] count+=1 else: numbers+=['-'] return numbers # main code below here if __name__ == '__main__': a = parse_pdb_file(open_pdb_file(sys.argv[1]), ['ATOM', 'DBREF', 'REMARK'], [465]) a['ATOM'] = filter_target(a, name=['CA']) b = parse_fasta_file(open_fasta_file(sys.argv[2])) print(b) print(thread_sequence(b[0][1], a))
max_l = cur_l+1 end_index = y cur_l = 0 if max_l < cur_l+1: max_l = cur_l+1 end_index = len(res_seq_numbers) return res_seq_numbers[end_index-max_l:end_index] def calculate_edge_lengths(atoms, simplexlist): """ Calculates the edge lengths of every simplex. Arguments: atoms -- List of atoms from the pdb structure. simplexlist -- List of simplices. Returns: A 6-tuple of edge lengths for given simplices. """ return [ [norm(y[0]-y[1]) for y in combinations([atoms[z]['coord'] for z in x], 2) ] for x in simplexlist] # test code below here if __name__ == '__main__': a = filter_target(parse_pdb_file(open_pdb_file(sys.argv[1]), ['ATOM', 'REMARK'], [465]), chain=['A'], name=['CA']) b = tessellate(a).vertices c = [[a[x]['res'] for x in y] for y in b] d = simplex_potential(c) # [print(x['resseq'],x['res'],y) for x,y in zip(a,residue_potential(a, b, d))]