def make_prematrixlist(pdbid,chainid): #makes a matrix given the pdb id and cahin id for now after removing simplicies i do not update data ,i assume that its very unlikly that a residue will have no simplcies with lengths less than 10 a pdbstruct = (parse_pdb_file(download_pdb_file(pdbid),['ATOM'],[] )) a = filter_target(pdbstruct['ATOM'], name = ['CA'], chain = [chainid]) a = filter_target(a, name = ['CA'], span=find_optimal_span(a)) simplexlist = [[ (a[x]['resseq'], a[x]['res'], a[x]['coord'] ) for x in y] for y in tessellate(a).vertices]#gives grouping in terms of four(one simplex) data = ([(x['resseq'],x['res']) for x in a]) simplexlist = filter_simplicies(10,simplexlist)#integer sets cutoff return (neighborlist(data,simplexlist),simplexlist,dict(data))
List of sequence numbers and the sequence itself, both trimmed. """ return zip(*[(i,y) for i,x,y in zip(seq_numbers, orig_seq, mut_seq) if x != '-']) def number_sequence(seq): """ Numbers a sequence (as in, doesn't number the gaps). Arguments: seq -- Sequence to be numbered. Returns: List of numbers corresponding to each index of the MSA. """ numbers = [] count = 1 for x in seq: if x != '-': numbers+=[count] count+=1 else: numbers+=['-'] return numbers # main code below here if __name__ == '__main__': a = parse_pdb_file(open_pdb_file(sys.argv[1]), ['ATOM', 'DBREF', 'REMARK'], [465]) a['ATOM'] = filter_target(a, name=['CA']) b = parse_fasta_file(open_fasta_file(sys.argv[2])) print(b) print(thread_sequence(b[0][1], a))
max_l = cur_l+1 end_index = y cur_l = 0 if max_l < cur_l+1: max_l = cur_l+1 end_index = len(res_seq_numbers) return res_seq_numbers[end_index-max_l:end_index] def calculate_edge_lengths(atoms, simplexlist): """ Calculates the edge lengths of every simplex. Arguments: atoms -- List of atoms from the pdb structure. simplexlist -- List of simplices. Returns: A 6-tuple of edge lengths for given simplices. """ return [ [norm(y[0]-y[1]) for y in combinations([atoms[z]['coord'] for z in x], 2) ] for x in simplexlist] # test code below here if __name__ == '__main__': a = filter_target(parse_pdb_file(open_pdb_file(sys.argv[1]), ['ATOM', 'REMARK'], [465]), chain=['A'], name=['CA']) b = tessellate(a).vertices c = [[a[x]['res'] for x in y] for y in b] d = simplex_potential(c) # [print(x['resseq'],x['res'],y) for x,y in zip(a,residue_potential(a, b, d))]