コード例 #1
0
ファイル: gow_meta_analysis.py プロジェクト: Ursuline/dssp14
def plot_Gmeta(array1, array2, array3, title, ylabel, density):
    barWidth = .3
    colors = ['tan', 'yellowgreen', 'midnightblue']
    labels = ['Corpus', 'k-core', 'k-truss']
    names = get_names()

    bars = [array1, array2, array3]

    fig, ax = plt.subplots()
    # Set position of bar on X axis
    r = list()
    r.append(np.arange(len(bars[0])) + barWidth / 2)
    r.append([x + barWidth for x in r[0]])
    r.append([x + barWidth for x in r[1]])

    # Make the plot
    for i in range(3):
        plt.bar(r[i],
                bars[i],
                color=colors[i],
                width=barWidth,
                edgecolor='white',
                label=labels[i])

    text_x_offset = .1
    text_y_offset = 1.5
    for i in range(3):
        for j in range(3):
            x_pos = r[i][j] - barWidth / 3 + text_x_offset
            if density == False:
                y_pos = bars[i][j] + np.exp(text_y_offset)
                label = f'{bars[i][j]:.0f}'
            else:
                y_pos = bars[i][j] + np.log(1.0001)
                label = f'{bars[i][j]:.3f}'
            plt.text(x_pos,
                     y_pos,
                     label,
                     horizontalalignment='center',
                     rotation=0,
                     color='black',
                     fontsize=10)

    # Add ticks on the middle of the group bars
    plt.ylabel(ylabel, fontweight='normal')
    plt.xticks([r + barWidth for r in range(len(array1))], names)
    ax.set_yscale('log')
    plt.title(title)

    # Create legend show graphic & save to file
    plt.legend(loc='best', fontsize='small')
    plt.savefig(clean_filename(title, 'png', plot_directory))
    plt.show()
コード例 #2
0
ファイル: classifier_plots.py プロジェクト: Ursuline/dssp14
def ROC_plot(fpr, tpr, roc_auc, n_classes, ix, gmean, algo, method, save=True):
    '''Plots ROC curves
    gmean = average
    '''
    color_dict = get_color_dict()
    names = get_names()
    handles = get_handles()

    lw = 1
    figsize = 6
    title = f'ROC {method}+{algo}'

    # Make plot
    plt.figure(figsize=(figsize, figsize))
    colors = cycle([
        color_dict[handles[0]], color_dict[handles[1]], color_dict[handles[2]]
    ])

    for i, color in zip(range(n_classes), colors):
        plt.plot(fpr[i],
                 tpr[i],
                 color=color,
                 linewidth=lw,
                 label=f'ROC {names[i]} (auc = {roc_auc[i]:0.2f})')
        if i == 0:
            plt.scatter(fpr[i][ix[i]],
                        tpr[i][ix[i]],
                        marker='o',
                        color='black',
                        label=f'Best (gmeans={gmean:.3f})')
        else:
            plt.scatter(fpr[i][ix[i]],
                        tpr[i][ix[i]],
                        marker='o',
                        color='black')
    plt.plot([0, 1], [0, 1], 'k--', linewidth=lw, label='No Skill')  # diagonal

    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(title)
    plt.legend(loc="lower right")
    if save:
        plt.savefig(clean_filename(title, 'png', plot_directory))
    plt.show()
コード例 #3
0
ファイル: classifier_plots.py プロジェクト: Ursuline/dssp14
def plot_confusion_matrix(y_pred,
                          y_test,
                          score,
                          handles,
                          algo,
                          method,
                          normalize,
                          save=True):
    """
    Plot a confusion matrix
    Expected True values x-axis & predicted y-axis
    obtained from : confusion_matrix(y_pred, y_test, labels = labels)
    note: this order is the opposite of that suggested in scikit-learn
    """
    figsize_x = 6.5
    figsize_y = 6
    cm = confusion_matrix(y_pred, y_test, labels=handles)
    print(cm)
    print(f'[confusion_matrix_wrapper] {algo} score = {score}\n')
    labels = get_names()
    #print(f'[plot_confusion_matrix] labels = {labels}')
    accuracy = np.trace(cm) / float(np.sum(cm))
    print(f'[plot_confusion_matrix] Accuracy={accuracy}')
    print(f'[plot_confusion_matrix] confusion matrix:\n{cm}')
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        cm = np.round(cm, 2)

    fig = plt.figure(figsize=(figsize_x, figsize_y))
    ax = fig.add_subplot(111)
    ax.matshow(cm)

    #if method == 'GOW': method = 'Graph of Words'
    title = f'{method}+{algo}'
    plt.title(title, y=1.1)
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Wistia)
    ax.set_xticklabels([''] + labels)
    ax.set_yticklabels([''] + labels)
    plt.ylabel('Predicted')
    plt.xlabel(f'True\naccuracy={accuracy:0.3f}')
    for i in range(3):
        for j in range(3):
            plt.text(j, i, str(cm[i][j]))
    if save:
        plt.savefig(clean_filename(title, 'png', plot_directory))
    plt.show()
コード例 #4
0
ファイル: eda_plots.py プロジェクト: Ursuline/dssp14
def build_base_barplot(handles, array, x_pos, title, xlabel, ylabel, color_dict):
    '''Builds base plots for 3 handles'''
    width = .75
    names = get_names()

    fig, ax = plt.subplots()

    barlist=ax.bar(x_pos, array, width)

    for i, handle in enumerate(handles):
        barlist[i].set_color(color_dict[handle])

    ax.set_xticks(x_pos)
    ax.set_xticklabels(names)

    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    title = title
    ax.set_title(title)

    return fig, ax
コード例 #5
0
ファイル: eda_plots.py プロジェクト: Ursuline/dssp14
def tweeting_period_plot(handles, start, end, color_dict):
    '''Plot tweeting period from start and end dates'''
    fontsize = 14

    names = get_names()
    ndays = get_ndays(start, end)

    fig, ax = plt.subplots(figsize=(18,4.875))

    y_pos = np.arange(len(handles))
    barlist=ax.barh(y_pos, ndays, .75)

    for i, handle in enumerate(handles):
        barlist[i].set_color(color_dict[handle])

    # Annotate bars
    end_x   = .89
    start_x = [.05, .275, .55]
    y_val   = [.815, .485, .15]
    for i, handle in enumerate(handles):
        ax.annotate('<-- ' + start[i],
                    xy=(start_x[i], y_val[i]),
                    xycoords = 'axes fraction',
                    fontsize = fontsize)
        ax.annotate(end[i] + ' -->',
                    xy=(end_x, y_val[i]),
                    xycoords = 'axes fraction',
                    fontsize = fontsize)

    plt.gca().invert_xaxis() # right to left
    ax.set_yticks(y_pos)
    ax.set_yticklabels(names)
    ax.invert_yaxis()  # labels read top-to-bottom
    ax.set_xlabel('# of days')
    title = 'Tweeting time period'
    ax.set_title(title, fontsize = 18)
    plt.savefig(clean_filename(title , 'png', plot_directory))

    plt.show()
コード例 #6
0
ファイル: tweets_eda.py プロジェクト: Ursuline/dssp14
def build_set(tweets):
    '''builds set of (unique) words from corpus of tweets'''
    unique_words = set()
    for tweet in tweets:
        for word in tweet.split():
            unique_words.add(word)

    return unique_words


if __name__ == '__main__':
    t0 = time.time()
    name_dict = get_name_dict()
    color_dict = get_color_dict()
    handles = get_handles()
    names = get_names()

    start_dates = ['05/01/2018', '03/07/2018', '02/02/2019']
    end_dates = ['25/01/2020', '26/01/2020', '25/01/2020']
    eda_plots.tweeting_period_plot(handles, start_dates, end_dates, color_dict)

    nraw = list()  # number of raw tweets for each handle
    ntweets = list()  # number of processed tweets for each handle
    word_counts = list()  # list of word counts
    char_counts = list()  # list of character counts
    unique_words = list()  # list of # of unique words for each handle
    unique_all = set()  # set of unique words overall
    wc_arrays = list()  # list of arrays of word_counts for sig. test

    for handle in handles:  # Loop over handles
        print(name_dict[handle])