else: ats_tmp = range(len(sequences_full[0])) sequences, ats = sca.makeATS(sequences_full, ats_tmp, s_tmp, i_ref, options.truncate) except: sys.exit("Error!! Can't find reference sequence...") # filtering sequences and positions, calculations of effective number of seqs print_("Conducting sequence and position filtering: alignment size is {:d} seqs, {:d} pos".format(len(sequences), len(sequences[0]))) if options.pdbid is not None: print_("ATS and distmat size - ATS: {:d}, distmat: {:d} x {:d}".format(len(ats), len(dist_pdb), len(dist_pdb[0]))) else: print_("ATS should also have {:d} positions - ATS: {:d}".format(len(sequences[0]), len(ats))) if i_ref is not None: alg0, seqw0, seqkeep = sca.filterSeq(sequences, i_ref, max_fracgaps=options.parameters[1], min_seqid=options.parameters[2], max_seqid=options.parameters[3]) else: alg0, seqw0, seqkeep = sca.filterSeq(sequences, max_fracgaps=options.parameters[1], min_seqid=options.parameters[2], max_seqid=options.parameters[3]) headers = [headers_full[s] for s in seqkeep] alg1, iposkeep = sca.filterPos(alg0, seqw0, options.parameters[0]) ats = [ats[i] for i in iposkeep] if options.pdbid is not None: distmat = dist_pdb[np.ix_(iposkeep, iposkeep)] effseqsprelimit = int(seqw0.sum()) Nseqprelimit = len(alg1) print_("After filtering: alignment size is {:d} seqs, {:d} effective seqs, {:d} pos".format(len(alg1), effseqsprelimit, len(alg1[0]))) # Limitation of total sequences to [1.5 * # ofeffective sequences] if Nselect is set to True
except: sys.exit("Error!! Can't find reference sequence...") # filtering sequences and positions, calculations of effective number of seqs print( "Conducting sequence and position filtering: alignment size is %i seqs, %i pos" % (len(sequences), len(sequences[0]))) if options.pdbid is not None: print("ATS and distmat size - ATS: %i, distmat: %i x %i" \ % (len(ats), len(dist_pdb), len(dist_pdb[0]))) else: print("ATS should also have %i positions - ATS: %i" % (len(sequences[0]), len(ats))) if i_ref is not None: alg0, seqw0, seqkeep = sca.filterSeq(sequences, i_ref, max_fracgaps=options.parameters[1], min_seqid=options.parameters[2], \ max_seqid=options.parameters[3]) else: alg0, seqw0, seqkeep = sca.filterSeq(sequences, max_fracgaps=options.parameters[1], min_seqid=options.parameters[2], \ max_seqid=options.parameters[3]) headers = [headers_full[s] for s in seqkeep] alg1, iposkeep = sca.filterPos(alg0, seqw0, options.parameters[0]) ats = [ats[i] for i in iposkeep] if options.pdbid is not None: distmat = dist_pdb[np.ix_(iposkeep, iposkeep)] effseqsprelimit = int(seqw0.sum()) Nseqprelimit = len(alg1) print( "After filtering: alignment size is %i seqs, %i effective seqs, %i pos" % (len(alg1), effseqsprelimit, len(alg1[0])))
print("Index of reference sequence: {}".format(reference_index)) sequences, ats = sca.makeATS(position_filtered_sequences, pdb_ats, pdb_sequence, reference_index, options.truncate) # filtering sequences and positions, calculations of effective number of seqs print( "Conducting sequence and position filtering: alignment size is {} seqs, {} pos" .format(len(sequences), len(sequences[0]))) print(f'ATS size: {len(ats)}') print( f'Dim Distance Matrix: {len(pdb_distances)} x {len(pdb_distances[0])}') alg0, seqw0, seqkeep = sca.filterSeq(sequences, reference_index, max_fracgaps=PARAMETERS[1], min_seqid=PARAMETERS[2], max_seqid=PARAMETERS[3]) headers = [standard_headers[s] for s in seqkeep] alg1, iposkeep = sca.filterPos(alg0, seqw0, PARAMETERS[0]) ats = [ats[i] for i in iposkeep] distance_matrix = pdb_distances[np.ix_(iposkeep, iposkeep)] effseqsprelimit = int(seqw0.sum()) Nseqprelimit = len(alg1) print( "After filtering: alignment size is {} seqs, {} effective seqs, {} pos" .format(len(alg1), effseqsprelimit, len(alg1[0]))) alg = alg1 hd = headers