Example #1
0
def generate_figures():
    header_data = np.loadtxt(dev_const.PUB_CONDEL_PREDICTION_RESULT,
                             dtype='S20'
                             )[:, :5]
    scores_data = np.loadtxt(dev_const.PUB_CONDEL_PREDICTION_RESULT,
                             dtype='S20'
                             )[:, 5:13].astype(np.float)
    min_value = np.amin(scores_data)
    max_value = np.amax(scores_data)
    predictor_names = ('CombiVEP',
                       'Phylop',
                       'SIFT',
                       'PP2',
                       'LRT',
                       'MT',
                       'GERP',
                       'Condel',
                       )
    predictor_colors = ('k',
                        'm',
                        'c',
                        'g',
                        'b',
                        'coral',
                        'darkred',
                        'r',
                        )

    #produce roc data from CombiVEP, Phylop, SIFT, PP2, LRT, MT, GERP, Condel
    fp_rates, tp_rates = calculate_roc(scores_data[header_data[:, 4] == '1'],
                                       scores_data[header_data[:, 4] == '0'],
                                       np.linspace(min_value, max_value, 5001))

    fig = plt.figure()
    ax = fig.add_subplot(111)
    for i in xrange(len(predictor_names)):
        ax.plot(fp_rates[:, i],
                tp_rates[:, i],
                predictor_colors[i],
                label=predictor_names[i])
    ax.set_ylabel('true positive rate')
    ax.set_xlabel('false positive rate')
    ax.legend(bbox_to_anchor=(0.9999, 0.0001), loc=4)
    fig.savefig(dev_const.PUB_ROC_FIG, bbox_inches='tight', pad_inches=0.05)

    #produce auc data from roc data
    fig  = plt.figure()
    aucs = []
    ind  = []
    ax   = fig.add_subplot(111)
    for i in xrange(len(predictor_names)):
        aucs.append(auc(fp_rates[:, i], tp_rates[:, i]))
        ind.append(0.5*(i+1)-0.4)
    ax.bar(ind, aucs, 0.3, color=predictor_colors)
    for i in xrange(len(aucs)):
        ax.text(ind[i], aucs[i] + 0.01, "%0.3f" % aucs[i])
    ax.set_ylim([0.7, 0.9])
    ax.set_xticks(np.array(ind) + 0.15)
    ax.set_xticklabels(predictor_names, rotation=30)
    fig.savefig(dev_const.PUB_AUC_FIG, bbox_inches='tight', pad_inches=0.05)

    #plot scores distribution
    fig = plt.figure()
    ax  = fig.add_subplot(211)
    hist_range = (-0.005, 1.005)
    patho_hist, bins = hist(scores_data[header_data[:, 4] == '1'][:, 0],
                            bins=100,
                            range=hist_range)
    neutr_hist, bins = hist(scores_data[header_data[:, 4] == '0'][:, 0],
                            bins=100,
                            range=hist_range)
    center = (bins[:-1]+bins[1:]) / 2
    ax.plot(center, patho_hist, 'r--', label='pathogenic variants')
    ax.plot(center, neutr_hist, 'b--', label='neutral variants')
    ax.set_title('CombiVEP score distributuion')
    ax.set_ylabel('samples')
    ax.set_xlabel('score')
    ax.legend(bbox_to_anchor=(0.999, 0.999), loc=1)
    ax = fig.add_subplot(212)
    patho_hist, bins = hist(scores_data[header_data[:, 4] == '1'][:, 7],
                            bins=100,
                            range=hist_range)
    neutr_hist, bins = hist(scores_data[header_data[:, 4] == '0'][:, 7],
                            bins=100,
                            range=hist_range)
    center = (bins[:-1]+bins[1:])/2
    ax.plot(center, patho_hist, 'r--', label='pathogenic variants')
    ax.plot(center, neutr_hist, 'b--', label='neutral variants')
    ax.set_title('Condel score distributuion')
    ax.set_ylabel('samples')
    ax.set_xlabel('score')
    ax.legend(bbox_to_anchor=(0.999, 0.999), loc=1)
    fig.tight_layout()
    fig.savefig(dev_const.PUB_SCORES_DISTR_FIG,
                bbox_inches='tight',
                pad_inches=0.05)

    return (dev_const.PUB_ROC_FIG,
            dev_const.PUB_AUC_FIG,
            dev_const.PUB_SCORES_DISTR_FIG,
            )