sys.exit( "Error!! Can't find reference sequence...") else: try: print( "Finding reference sequence using global MSAsearch..." ) i_ref = sca.MSAsearch(headers_full, sequences_full, seq_pdb) options.i_ref = i_ref print("reference sequence index is: %i" % (i_ref)) print(headers_full[i_ref]) print(sequences_full[i_ref]) except: sys.exit("Error!! Can't find reference sequence...") sequences, ats = sca.makeATS(sequences_full, ats_pdb, seq_pdb, i_ref, options.truncate) dist_new = np.zeros((len(ats), len(ats))) for (j, pos1) in enumerate(ats): for (k, pos2) in enumerate(ats): if k != j: if (pos1 == '-') or (pos2 == '-'): dist_new[j, k] == 1000 else: ix_j = ats_pdb.index(pos1) ix_k = ats_pdb.index(pos2) dist_new[j, k] = dist_pdb[ix_j, ix_k] dist_pdb = dist_new except: sys.exit("Error!!! Something wrong with PDBid or path...") elif options.refseq is not None: print("Finding reference sequence using provided sequence file...")
# Create the ATS i_ref = options.i_ref print "Reference sequence %i:" % (i_ref) print headers_full[i_ref] s_tmp = sequences_full[i_ref] try: f = open(options.refpos,'r') ats_tmp = [line.rstrip('\n') for line in f] print ats_tmp f.close() except: sys.exit("Error!! Unable to read reference positions!") try: sequences, ats = sca.makeATS(sequences_full, ats_tmp, s_tmp, i_ref) except: sys.exit("Error!! Unable to make ATS!") print "Final alignment parameters:" print "Number of positions: L = %i" % (len(s_tmp)) print "Size of ats: %i" % len(ats) # saving the important stuff. Everything is stored in a file called [MSAname]_sequence.db. path_list = options.alignment.split(os.sep) fn = path_list[-1] fn_noext = fn.split(".")[0] D = {} D['alg'] = sequences_full D['hd'] = headers_full
print_("reference sequence index is: {:d}".format(i_ref)) print_(headers_full[i_ref]) print_(sequences_full[i_ref]) except: sys.exit("Error!! Can't find reference sequence...") else: try: print_("Finding reference sequence using global MSAsearch...") i_ref = sca.MSAsearch(headers_full, sequences_full, seq_pdb) options.i_ref = i_ref print_("reference sequence index is: {:d}".format(i_ref)) print_(headers_full[i_ref]) print_(sequences_full[i_ref]) except: sys.exit("Error!! Can't find reference sequence...") sequences, ats = sca.makeATS(sequences_full, ats_pdb, seq_pdb, i_ref, options.truncate) dist_new = np.zeros((len(ats), len(ats))) for (j, pos1) in enumerate(ats): for (k, pos2) in enumerate(ats): if k != j: if (pos1 == '-') or (pos2 == '-'): dist_new[j, k] == 1000 else: ix_j = ats_pdb.index(pos1) ix_k = ats_pdb.index(pos2) dist_new[j, k] = dist_pdb[ix_j, ix_k] dist_pdb = dist_new except: sys.exit("Error!!! Something wrong with PDBid or path...") elif options.refseq is not None: print_("Finding reference sequence using provided sequence file...")
print('Collecting residue data from PDB') pdb_residues = get_pdb_residues(options.pdbid, options.chainID, QUIET=True) print('Parsing sequence and pdb_ats') pdb_sequence, pdb_ats = get_pdb_sequence(pdb_residues) print('Calculating distances') pdb_distances = get_pdb_distances(pdb_residues) print("Finding reference sequence using Bio.pairwise2.align.globalxx") reference_index = locate_reference(position_filtered_sequences, pdb_sequence) print("Index of reference sequence: {}".format(reference_index)) sequences, ats = sca.makeATS(position_filtered_sequences, pdb_ats, pdb_sequence, reference_index, options.truncate) # filtering sequences and positions, calculations of effective number of seqs print( "Conducting sequence and position filtering: alignment size is {} seqs, {} pos" .format(len(sequences), len(sequences[0]))) print(f'ATS size: {len(ats)}') print( f'Dim Distance Matrix: {len(pdb_distances)} x {len(pdb_distances[0])}') alg0, seqw0, seqkeep = sca.filterSeq(sequences, reference_index, max_fracgaps=PARAMETERS[1], min_seqid=PARAMETERS[2], max_seqid=PARAMETERS[3])