예제 #1
0
        % (len(sequences_full)))

    # Do an initial trimming to remove excessively gapped positions - this is critical for building a correct ATS
    print("Trimming alignment for highly gapped positions (80% or more).")
    alg_out, poskeep = sca.filterPos(sequences_full, [1], 0.8)
    sequences_ori = sequences_full
    sequences_full = alg_out
    print("Alignment size post-trimming: %i positions" %
          len(sequences_full[0]))

    # If i_ref is directly provided, we use it, ignoring all else.  Otherwise, we explore the other ways of specifying a reference sequences: (1) providing a PDBid (chainID defaults to 'A'), (2) providing the protein sequence with position numbers (defaults to just sequence numbering).  If none of these is provided, we just make an alignment based numbering for ats.  If a PDBid is provided, there is an option to also provide species information to permit identifying the reference sequence in the MSA without use of external packages for fast pairwise alignments.

    if options.i_ref is None:
        if options.pdbid is not None:
            try:
                seq_pdb, ats_pdb, dist_pdb = sca.pdbSeq(
                    options.pdbid, options.chainID)
                if options.species is not None:
                    try:
                        print(
                            "Finding reference sequence using species-based best match.."
                        )
                        i_ref = sca.MSAsearch(headers_full, sequences_full,
                                              seq_pdb, options.species)
                        Options_ref = i_ref
                        print("reference sequence index is: %i" % (i_ref))
                        print(headers_full[i_ref])
                        print(sequences_full[i_ref])
                    except:
                        print(
                            "Cant find the reference sequence using species-based best_match! Using global MSAsearch..."
                        )
예제 #2
0
    sequences_full = alg_out
    print_("Alignment size post-trimming: {:d} positions".format(len(sequences_full[0])))

    # If i_ref is directly provided, we use it, ignoring all else.  Otherwise,
    # we explore the other ways of specifying a reference sequences: (1)
    # providing a PDBid (chainID defaults to 'A'), (2) providing the protein
    # sequence with position numbers (defaults to just sequence numbering).
    # If none of these is provided, we just make an alignment based numbering
    # for ats.  If a PDBid is provided, there is an option to also provide
    # species information to permit identifying the reference sequence in the
    # MSA without use of external packages for fast pairwise alignments.

    if options.i_ref is None:
        if options.pdbid is not None:
            try:
                seq_pdb, ats_pdb, dist_pdb = sca.pdbSeq(options.pdbid, options.chainID)
                if options.species is not None:
                    try:
                        print_("Finding reference sequence using species-based best match..")
                        i_ref = sca.MSAsearch(
                            headers_full, sequences_full, seq_pdb, options.species)
                        Options_ref = i_ref
                        print_("reference sequence index is: {:d}".format(i_ref))
                        print_(headers_full[i_ref])
                        print_(sequences_full[i_ref])
                    except:
                        print_("Cant find the reference sequence using species-based best_match! Using global MSAsearch...")
                        try:
                            i_ref = sca.MSAsearch(headers_full, sequences_full, seq_pdb)
                            options.i_ref = i_ref
                            print_("reference sequence index is: {:d}".format(i_ref))