Example #1
0
def write_and_plot(project, genes, dimen, has_ctrl=False):
    names = ['A', 'B', 'C', 'D']
    #colors = ['k', 'k', 'grey', 'w']
    colors = ['r', 'b', 'y', 'k']
    #markers = ['s', 'D', 'o', '+']
    markers = ['$\\mathrm{' + n + '}$' for n in names]
    with open(project + '_tetra.txt', 'w') as f:
        f.write('Name,' + ','.join('A' + str(i) for i in range(dimen)))
        for n, g in zip(names, genes):  # loop through each gene
            for t in g:  # go through each "tetranucleotide" for that gene
                f.write('\n' + n + ',' + ','.join(str(i) for i in t))

    if has_ctrl:
        plot_dist(project, bio_ctrl='D')
    else:
        plot_dist(project)

    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    for m, c, g in zip(markers, colors, genes):
        ax.scatter(*g.T[0:3], edgecolor=c, s=30, alpha=1, marker=m)

    # make legend
    #p_styles = [plt.Line2D([0], [0], marker=m, c='w', markerfacecolor=c) \
    #            for c, m in zip(colors, markers)]
    ## readjust first one to make cross not disappear
    #p_styles[-1] = plt.Line2D([0], [0], marker=markers[-1], \
    #                         c='w', markeredgecolor=colors[-1])
    #plt.legend(p_styles, names, numpoints=1)
    plt.show()
Example #2
0
      help='Contigs need to be > than this to have tetra scores calculated')
    args = parser.parse_args()
    print(args.min_ctg_len, args.min_genes, args.hmm_e_val)

    project = op.join(args.working_dir, args.project)
    if args.build_orfs or args.build_all:
        find_classify_orfs(project, args.scaffolds, args.markov_model, \
                           args.metagenemark, args.hmmer, args.hmm_e_val)

    if args.build_tetra or args.build_all:
        generate_tetra(project, args.scaffolds, min_len=args.min_ctg_len)

    if args.build_plot or args.build_dist or args.build_all:
        M_DRAWS = 500  # number of Monte Carlo draws to do for controls
        FILT_LEN = args.min_genes

        gene_list = ['psaA', 'psaB', 'psbA', 'psbB', 'pufM', 'pufL', 'pr',
                     'pioA', 'pioC', 'iro', 'coxB', 'ompC', 'arch_amoA',
                     'bact_amoA', 'mmoZ', 'hszA', 'sqR-allo', 'sqR-rhodo',
                     'narG', 'nirK', 'dsrA', 'dsrB', 'mcrA', 'frhB', 'cdhD',
                     'fdhA', 'mvK', 'dxr', 'gggps', 'sqdB', 'cdsA-allo',
                     'cdsA-geo', 'cdsA-rhodo', 'cdsA-synn', 'mglcD', 'mgdA',
                     'btaA', 'olsB', 'shc', 'osc', 'cas1', 'crtI-allo',
                     'crtI-rhodo', 'crtP', 'nifH', 'luxI', 'raiI', 'por',
                     'bchF', 'rpoB']
        #gene_list = ['dsrA', 'dsrB', 'cdsA-synn', 'frhB', 'fdhA']
        #gene_list = None
        plot = args.build_plot or args.build_all
        save = args.build_dist or args.build_all
        plot_dist(project, gene_list, FILT_LEN, M_DRAWS, plot, save)