Ejemplo n.º 1
0
            else:
                ats_tmp = range(len(sequences_full[0]))
            sequences, ats = sca.makeATS(sequences_full, ats_tmp, s_tmp, i_ref, options.truncate)
        except:
            sys.exit("Error!!  Can't find reference sequence...")

    # filtering sequences and positions, calculations of effective number of seqs
    print_("Conducting sequence and position filtering: alignment size is {:d} seqs, {:d} pos".format(len(sequences),
                                                                                                      len(sequences[0])))
    if options.pdbid is not None:
        print_("ATS and distmat size - ATS: {:d}, distmat: {:d} x {:d}".format(len(ats), len(dist_pdb), len(dist_pdb[0])))
    else:
        print_("ATS should also have {:d} positions - ATS: {:d}".format(len(sequences[0]), len(ats)))

    if i_ref is not None:
        alg0, seqw0, seqkeep = sca.filterSeq(sequences, i_ref, max_fracgaps=options.parameters[1],
                                             min_seqid=options.parameters[2], max_seqid=options.parameters[3])
    else:
        alg0, seqw0, seqkeep = sca.filterSeq(sequences, max_fracgaps=options.parameters[1], min_seqid=options.parameters[2],
                                             max_seqid=options.parameters[3])

    headers = [headers_full[s] for s in seqkeep]
    alg1, iposkeep = sca.filterPos(alg0, seqw0, options.parameters[0])
    ats = [ats[i] for i in iposkeep]
    if options.pdbid is not None:
        distmat = dist_pdb[np.ix_(iposkeep, iposkeep)]
    effseqsprelimit = int(seqw0.sum())
    Nseqprelimit = len(alg1)
    print_("After filtering: alignment size is {:d} seqs, {:d} effective seqs, {:d} pos".format(len(alg1), effseqsprelimit,
                                                                                                len(alg1[0])))

    # Limitation of total sequences to [1.5 * # ofeffective sequences] if Nselect is set to True
Ejemplo n.º 2
0
        except:
            sys.exit("Error!!  Can't find reference sequence...")

    # filtering sequences and positions, calculations of effective number of seqs
    print(
        "Conducting sequence and position filtering: alignment size is %i seqs, %i pos"
        % (len(sequences), len(sequences[0])))
    if options.pdbid is not None:
        print("ATS and distmat size - ATS: %i, distmat: %i x %i" \
              % (len(ats), len(dist_pdb), len(dist_pdb[0])))
    else:
        print("ATS should also have %i positions - ATS: %i" %
              (len(sequences[0]), len(ats)))

    if i_ref is not None:
        alg0, seqw0, seqkeep = sca.filterSeq(sequences, i_ref, max_fracgaps=options.parameters[1], min_seqid=options.parameters[2], \
                                                           max_seqid=options.parameters[3])
    else:
        alg0, seqw0, seqkeep = sca.filterSeq(sequences, max_fracgaps=options.parameters[1], min_seqid=options.parameters[2], \
                                                           max_seqid=options.parameters[3])

    headers = [headers_full[s] for s in seqkeep]
    alg1, iposkeep = sca.filterPos(alg0, seqw0, options.parameters[0])
    ats = [ats[i] for i in iposkeep]
    if options.pdbid is not None:
        distmat = dist_pdb[np.ix_(iposkeep, iposkeep)]
    effseqsprelimit = int(seqw0.sum())
    Nseqprelimit = len(alg1)
    print(
        "After filtering: alignment size is %i seqs, %i effective seqs, %i pos"
        % (len(alg1), effseqsprelimit, len(alg1[0])))
Ejemplo n.º 3
0
    print("Index of reference sequence: {}".format(reference_index))
    sequences, ats = sca.makeATS(position_filtered_sequences, pdb_ats,
                                 pdb_sequence, reference_index,
                                 options.truncate)

    # filtering sequences and positions, calculations of effective number of seqs
    print(
        "Conducting sequence and position filtering: alignment size is {} seqs, {} pos"
        .format(len(sequences), len(sequences[0])))
    print(f'ATS size: {len(ats)}')
    print(
        f'Dim Distance Matrix: {len(pdb_distances)} x {len(pdb_distances[0])}')

    alg0, seqw0, seqkeep = sca.filterSeq(sequences,
                                         reference_index,
                                         max_fracgaps=PARAMETERS[1],
                                         min_seqid=PARAMETERS[2],
                                         max_seqid=PARAMETERS[3])

    headers = [standard_headers[s] for s in seqkeep]
    alg1, iposkeep = sca.filterPos(alg0, seqw0, PARAMETERS[0])
    ats = [ats[i] for i in iposkeep]
    distance_matrix = pdb_distances[np.ix_(iposkeep, iposkeep)]
    effseqsprelimit = int(seqw0.sum())
    Nseqprelimit = len(alg1)
    print(
        "After filtering: alignment size is {} seqs, {} effective seqs, {} pos"
        .format(len(alg1), effseqsprelimit, len(alg1[0])))

    alg = alg1
    hd = headers