Example #1
0
                 try:
                     f = open(options.refpos, 'r')
                     ats_tmp = [line.rstrip('\n') for line in f]
                     f.close()
                 except:
                     print_("Error reading reference position file! Using default numbering 1 to number of positions")
                     ats_tmp = range(len(sequences[0]))
             else:
                 print_("No reference position list provided.  Using default numbering 1 to number of positions")
                 ats_tmp = range(len(sequences[0]))
             sequences, ats = sca.makeATS(
                 sequences_full, ats_tmp, s_tmp[0], i_ref, options.truncate)
         except:
             sys.exit("Error!!  Can't find reference sequence...")
     else:
         msa_num = sca.lett2num(sequences_full)
         i_ref = sca.chooseRefSeq(sequences_full)
         print_("No reference sequence given, chose as default ({:d}): {}".format(i_ref, headers_full[i_ref]))
         sequences = sequences_full
         ats = range(len(sequences[0]))
 else:
     print_("using provided reference index {:d}".format(i_ref))
     print_(headers_full[i_ref])
     s_tmp = sequences_ori[i_ref]
     try:
         if options.refpos is not None:
             f = open(options.refpos, 'r')
             ats_tmp = [line.rstrip('\n') for line in f]
             # print ats_tmp
             f.close()
         else:
Example #2
0
                 except:
                     print(
                         "Error reading reference position file! Using default numbering 1 to number of positions"
                     )
                     ats_tmp = range(len(sequences[0]))
             else:
                 print(
                     "No reference position list provided.  Using default numbering 1 to number of positions"
                 )
                 ats_tmp = range(len(sequences_full[0]))
             sequences, ats = sca.makeATS(sequences_full, ats_tmp, s_tmp[0],
                                          i_ref, options.truncate)
         except:
             sys.exit("Error!!  Can't find reference sequence...")
     else:
         msa_num = sca.lett2num(sequences_full)
         i_ref = sca.chooseRefSeq(sequences_full)
         print("No reference sequence given, chose as default (%i): %s" %
               (i_ref, headers_full[i_ref]))
         sequences = sequences_full
         ats = range(len(sequences[0]))
 else:
     print("using provided reference index %i" % (i_ref))
     print(headers_full[i_ref])
     s_tmp = sequences_ori[i_ref]
     try:
         if options.refpos is not None:
             f = open(options.refpos, 'r')
             ats_tmp = [line.rstrip('\n') for line in f]
             f.close()
         else:
Example #3
0
    alg1, iposkeep = sca.filterPos(alg0, seqw0, PARAMETERS[0])
    ats = [ats[i] for i in iposkeep]
    distance_matrix = pdb_distances[np.ix_(iposkeep, iposkeep)]
    effseqsprelimit = int(seqw0.sum())
    Nseqprelimit = len(alg1)
    print(
        "After filtering: alignment size is {} seqs, {} effective seqs, {} pos"
        .format(len(alg1), effseqsprelimit, len(alg1[0])))

    alg = alg1
    hd = headers

    # calculation of final MSA, sequence weights
    seqw = sca.seqWeights(alg)
    effseqs = seqw.sum()
    msa_num = sca.lett2num(alg)
    Nseq, Npos = msa_num.shape
    structPos = [i for (i, k) in enumerate(ats) if k != '-']
    print(
        FINAL_MSG.format(Nseq, effseqs, Npos, len(ats), len(structPos),
                         len(distance_matrix), len(distance_matrix[0])))

    path_list = os.path.split(options.alignment)
    fn = path_list[-1]
    fn_noext = fn.split(".")[0]

    with open("Outputs/" + fn_noext + "processed" + ".fasta", "w") as f:
        for i in range(len(alg)):
            f.write(">%s\n" % (hd[i]))
            f.write(alg[i] + "\n")