Ejemplo n.º 1
0
def histo_datalink(mode,
                   lambda_val,
                   attribute,
                   ci=95,
                   datalinks=range(0, NUM_DATA_LINK),
                   save=False):
    stats = scalar_stats(scalar_parse(mode, lambda_val))

    for u in datalinks:
        attr = attribute + '-' + str(u)
        bar = stats['mean'][attr]
        error = np.array([
            bar - stats['ci' + str(ci) + '_l'][attr],
            stats['ci' + str(ci) + '_h'][attr] - bar
        ]).reshape(2, 1)
        plt.bar('User ' + str(u),
                bar,
                yerr=error,
                align='center',
                alpha=0.95,
                ecolor='k',
                capsize=7)

    # Show graphic
    plt.title(attribute + ": " + MODE_DESCRIPTION[mode])
    if save:
        plt.savefig("histousers_" + attribute + "_" + mode + "_" + lambda_val +
                    ".pdf",
                    bbox_inches="tight")
        plt.clf()
    else:
        plt.show()
    return
    def visualize(self):
        # Aadesh
        fig = plt.figure()
        data1 = pd.read_csv("DSBDA.csv", names=['Name'])

        data2 = pd.read_csv(self.directory + "/DSBDA.csv", names=['Name'])

        df = pd.merge(data1, data2, how='left', indicator='Submitted')
        df['Submitted'] = np.where(df.Submitted == 'both', True, False)
        # print (df)

        y = []

        for i in range(df.index.min(), df.index.max() + 1):
            if (df['Submitted'][i] == True):
                y.append(1)
            elif (df['Submitted'][i] == False):
                y.append(0)

        x = []
        x = list(df.index + 1)

        barWidth = 0.85
        # Create green Bars
        plt.bar(x,
                y,
                color='green',
                edgecolor='white',
                width=barWidth,
                label='On Time')
        plt.savefig(self.directory + "/assigenmet1.pdf")
        print('Showing Plot')
        plt.show()
Ejemplo n.º 3
0
    def plot_stacked_barchart(self, dataframe, sort, title, xlable, ylable,
                              kurs):
        x = []
        tutor = []
        y = []
        for i in dataframe['tutor']:
            if i not in tutor:
                tutor.append(i)
                y.append([])

        for i, elem in enumerate(dataframe[sort]):
            print(y, elem)
            if elem in x:
                y[tutor.index(dataframe['tutor'][i])][x.index(elem)] += 1
            else:
                x.append(elem)
                for j, elem2 in enumerate(tutor):
                    y[j].append(0)
                y[tutor.index(dataframe['tutor'][i])][x.index(elem)] += 1

        for i, elem in enumerate(y):
            plt.bar(range(len(elem)), elem, label=tutor[i])
        plt.xlabel(xlable)
        plt.ylabel(ylable)
        plt.legend(loc="best")
        plt.savefig('./PDFcreater/Plots/{}/{}.png'.format(kurs, title))
        #plt.show()
        # loescht den Plot fuer den Naechsten Plot
        plt.clf()
        plt.cla()
        plt.close()
Ejemplo n.º 4
0
def print_pred_distrib_figure(filename, bins, histo, dx, J_opt):
    assert isinstance(filename, str), 'filename must be a string'
    filename = os.path.splitext(filename)[0] + '.png'

    matplotlib = _try_import_matplotlib()
    if matplotlib is None:
        return
    else:
        from matplotlib import pyplot as plt

    figure = plt.figure(figsize=(7, 7))
    plt.bar(bins[:-1],
            histo[0],
            width=dx,
            align='edge',
            color='blue',
            alpha=0.7,
            label='neutral')
    plt.bar(bins[:-1],
            histo[1],
            width=dx,
            align='edge',
            color='red',
            alpha=0.7,
            label='deleterious')
    plt.axvline(x=J_opt, color='k', ls='--', lw=1)
    plt.ylabel('distribution')
    plt.xlabel('predicted score')
    plt.legend()
    figure.savefig(filename, format='png', bbox_inches='tight')
    plt.close()
    plt.rcParams.update(plt.rcParamsDefault)
    LOGGER.info(f'Predictions distribution saved to {filename}')
def chartData(data, keyword):
    #Takes input of the results of the fts search and the phrase that was searched for
    #and creates bar chart of occurences by book for the phrase
    from matplotlib import pyplot as plt
    import numpy as np
    import math
    
    data = sorted(data, key=lambda x: x[1], reverse=True)
    info = []#number of occurences 
    books = []#list of books
    for d in data:
        info.append(d[1])
        b = d[0]
        books.append(b[b.find('LIBER'):])#Use only book number for label since name is the same for all books
    
    #create chart
    plt.close()
    fig = plt.figure()
    width = 0.4
    ind = np.arange(len(books))
    plt.bar(ind, info, width=width)
    plt.xticks(ind + width/2., books)
    plt.yticks(np.arange(0,max(info)*2,math.ceil(max(info)/5)))
    plt.ylabel('Number of Occurences')
    plt.xlabel('Books')
    plt.title('Occurences of "' + keyword + '" by book in Curtius Rufus (Latin)')
    fig.autofmt_xdate()
    plt.show(block=False)#display plot, and continue with program
Ejemplo n.º 6
0
def musicType_plot(rap, randb, classical, indie, pop, df):
    plt.figure()

    # declaring testing data
    xs = [1, 2, 3, 4, 5]
    ys = [randb, rap, classical, pop, indie]

    # setting range
    xrng = np.arange(len(xs))
    yrng = np.arange(0, max(ys)+60, 50)

    #labeling data
    plt.xlabel('Music Type')
    plt.ylabel('Music volume')

    # spacing and declare bar chart
    plt.bar(xrng, ys, 0.45, align="center") 


    # labeling
    plt.xticks(xrng, ["randb", "rap", "classical", "pop", "indie"])
    plt.yticks(yrng)

    plt.grid(True)
    plt.show()
Ejemplo n.º 7
0
def print_path_prob_figure(filename,
                           bins,
                           histo,
                           dx,
                           path_prob,
                           smooth_path_prob,
                           cutoff=200):
    assert isinstance(filename, str), 'filename must be a string'
    filename = os.path.splitext(filename)[0] + '.png'

    matplotlib = try_import_matplotlib()
    if matplotlib is None:
        return
    else:
        from matplotlib import pyplot as plt

    figure = plt.figure(figsize=(7, 7))
    s = np.sum(histo, axis=0)
    v1 = np.where(s >= cutoff, path_prob, 0)
    v2 = np.where(s < cutoff, path_prob, 0)
    v3 = np.where(s >= cutoff, smooth_path_prob, 0.)
    plt.bar(bins[:-1], v1, width=dx, align='edge', color='red', alpha=1)
    plt.bar(bins[:-1], v2, width=dx, align='edge', color='red', alpha=0.7)
    plt.plot(bins[:-1] + dx / 2, v3, color='orange')
    plt.ylabel('pathogenicity prob.')
    plt.xlabel('predicted score')
    plt.ylim((0, 1))
    figure.savefig(filename, format='png', bbox_inches='tight')
    plt.close()
    plt.rcParams.update(plt.rcParamsDefault)
    LOGGER.info(f'Pathogenicity plot saved to {filename}')
Ejemplo n.º 8
0
def evaluation(X, y, model, n_preds=10, random=True, show_graph=True):
    n_steps = X.shape[1]
    max_random_int = len(y) - n_steps
    y_true, y_pred, prediction_accuracy, slices = [], [], [], []

    for i in range(n_preds):

        if random == True:
            position = np.random.randint(0, max_random_int)
        else:
            position = i

        y_hat = model.predict(X[position:position + 1])[0][0]
        y_pred.append(y_hat)
        y_true.append(y[position])
        y_current = y[position]

        # If we predit return, c = 0, else c = previous sequence position
        if y.min() < 0:
            c = 0
        else:
            c = y[position - 1]

        if ((y_hat > c) & (y_current > c)) or ((y_hat < c) & (y_current < c)):
            acc = 1
        else:
            acc = 0

        prediction_accuracy.append(acc)
        slices.append((list(y[position - n_steps:position + 1]),
                       list(y[position - n_steps:position]) + [y_hat], acc))

    if show_graph == True:
        plt.rcParams['figure.dpi'] = 227
        plt.style.use('seaborn-whitegrid')
        plt.figure(figsize=(16, 6))
        plt.bar(range(n_preds),
                y_true[:],
                width=.7,
                alpha=.6,
                color="#4ac2fb",
                label="True")
        plt.bar(range(n_preds),
                y_pred[:],
                width=.7,
                alpha=.6,
                color="#ff4e97",
                label="Predicted")
        plt.axhline(0, color="#333333", lw=.8)
        plt.legend(loc=1)
        plt.title('Daily Return Prediction', fontSize=15)
        plt.show()

    print('MSE:', mean_squared_error(y_true, y_pred))
    print('Accuracy: {}%'.format(
        round((sum(prediction_accuracy) / len(prediction_accuracy)) * 100), 2))
    return slices, np.array(y_true), np.array(y_pred)
Ejemplo n.º 9
0
def char_positive():
    x_axis = Category
    y_axis = statistic_positive()
    y_pos = np.arange(len(y_axis))
    plt.figure(figsize=(10, 5))
    plt.bar(y_pos, y_axis)
    plt.xticks(y_pos, x_axis)
    plt.title('What categories we choose the most')
    plt.show()
Ejemplo n.º 10
0
 def plot_boring_barchart(self,dataframe,x,y,title,xlable,ylable, kurs):
     plt.bar(x, y, color='blue')
     #plt.title(title)
     plt.xlabel(xlable)
     plt.ylabel(ylable)
     plt.savefig('./PDFcreater/Plots/{}/{}.png'.format(kurs,title))
     #loescht den Plot fuer den Naechsten Plot
     plt.clf()
     plt.cla()
     plt.close()
Ejemplo n.º 11
0
def getHistogramPGN(df):
    y = df.iloc[:, -1]
    perc_win = y.sum() / y.count()
    height = [1 - perc_win, perc_win]
    bars = ('win=0 (%)', 'win=1 (%)')
    y_pos = np.arange(len(bars))
    plt.bar(y_pos, height)
    plt.xticks(y_pos, bars)
    plt.title("labels histogram")
    path = WORKING_PATH + "/___histogram.png"
    plt.savefig(path)
    return path
Ejemplo n.º 12
0
 def _plot_lengths(self, lens, prec, figsize):
     '''Plots a list of file lengths displaying prec digits of precision'''
     rounded = [round(i, prec) for i in lens]
     rounded_count = Counter(rounded)
     plt.figure(num=None, figsize=figsize, dpi=80, facecolor='w', edgecolor='k')
     labels = sorted(rounded_count.keys())
     values = [rounded_count[i] for i in labels]
     width = 1
     plt.bar(labels, values, width)
     xticks = np.linspace(int(min(rounded)), int(max(rounded))+1, 10)
     plt.xticks(xticks)
     plt.show()
Ejemplo n.º 13
0
def plot_hist(a,b):
    v = np.random.beta(a, b, 10000)
    s = np.zeros(v.shape[0])

    for i in range(v.shape[0]):
        s[i] = np.random.binomial(20,v[i])

    figure()

    center = scipy.stats.itemfreq(s)[:,0]
    hist = scipy.stats.itemfreq(s)[:,1]
    plt.bar(center, hist, align = 'center', width = 0.7)
    plt.savefig('../Figures/a'+str(a)+'_b'+str(int(b))+'.pdf')
Ejemplo n.º 14
0
def plot_hist(a, b):
    v = np.random.beta(a, b, 10000)
    s = np.zeros(v.shape[0])

    for i in range(v.shape[0]):
        s[i] = np.random.binomial(20, v[i])

    figure()

    center = scipy.stats.itemfreq(s)[:, 0]
    hist = scipy.stats.itemfreq(s)[:, 1]
    plt.bar(center, hist, align='center', width=0.7)
    plt.savefig('../Figures/a' + str(a) + '_b' + str(int(b)) + '.pdf')
Ejemplo n.º 15
0
def plotDisparityHistogram(network=None):
    assert network is not None, "Network is not initialised! Visualising failed."
    import matplotlib.pyplot as plt
    from NetworkBuilder import sameDisparityInd
    
    spikesPerDisparityMap = []
    for d in range(0, maxDisparity-minDisparity+1):
        cellsOut = [network[x][1] for x in sameDisparityInd[d]]
        spikesPerDisparityMap.append(sum([sum(x.get_spike_counts().values()) for x in cellsOut]))
    
    print spikesPerDisparityMap
    
    plt.bar(range(0, maxDisparity-minDisparity+1), spikesPerDisparityMap, align='center')
    
    plt.show()
def _Learning(descriptor_list, n_clusters, n_images, train_labels, ret=None, std=None):
	#set up model
	kmeans_obj = KMeans(n_clusters=n_clusters)
	
	#format the data (descriptor list)
	vStack = np.array(descriptor_list[0])
	for remaining in descriptor_list[1:]:
		vStack = np.vstack((vStack, remaining))
	descriptor_vstack = vStack.copy()
	
	#perform clustering
	kmeans_ret = kmeans_obj.fit_predict(descriptor_vstack)
	
	#develop vocabulary
	mega_histogram = np.array([np.zeros(n_clusters) for i in range(n_images)])
	old_count = 0
	for i in range(n_images):
		l = len(descriptor_list[i])
		for j in range(l):
			if ret is None:
				idx = kmeans_ret[old_count+j]
			else:
				idx = ret[old_count+j]
			mega_histogram[i][idx]+=1
		old_count+=1
	
	#display trained vocabulary
	vocabulary = mega_histogram
	x_scaler = np.arange(n_clusters)
	y_scalar = np.array([abs(np.sum(vocabulary[:,h], dtypes=np.int32)) for h in range(n_clusters)])
	plt.bar(x_scaler,y_scalar)
	plt.xlabel("Visual Word Index")
	plt.ylabel("Frequency")
	plt.title("Complete Vocabulary Generated")
	plt.xticks(x_scaler+0.4,x_scaler)
	plt.show()
	
	
	#standardize
	if std is None:
		scale = prep.StandardScaler().fit(mega_histogram)
		mega_histogram = scale.transform(mega_histogram)
	else:
		mega_histogram = std.transform(mega_histogram)
	
	#train--USES SVC!!
	clf = SVC()
	self.clf.fit(mega_histogram,train_labels)
Ejemplo n.º 17
0
def color_info(color_dist, inner_deck_list):
    color_dict = color_dist(inner_deck_list, "deck")
    print("colors:")
    pp.pprint(color_dict)

    fig, ax = plt.subplots()
    ax.set_xlabel('Colors')
    ax.set_ylabel('Count')
    ax.set_title('Distribution of Colored cards in deck')
    x_val = []
    y_val = []
    for i in color_dict.items():
        x_val.append(i[0])
        y_val.append(i[1])
    plt.bar(x_val, y_val)
    plt.show()
Ejemplo n.º 18
0
def myplot(feat, i, j, df):
    df = df[i:j + 1]
    df = df[df["rate"] > 3.8]
    n = df[feat].value_counts()
    l = list(zip(list(n), list(n.keys())))
    l.sort(key=lambda tup: tup[1])
    height = [x[0] for x in l]
    bars = [x[1] for x in l]

    y_pos = np.arange(len(bars))
    plt.bar(y_pos, height, color=sns.color_palette())
    plt.xticks(y_pos, bars, rotation="vertical")
    plt.xlabel("Suitable values")
    plt.ylabel("Count")
    plt.title("Values for " + feat)
    plt.show()
Ejemplo n.º 19
0
 def fnctn(a,c):
     df2 = pd.DataFrame(data.groupby(a)[c].count())
     df2=df2.astype(float)
     plot1 = plt.bar(x=df2.index,y=df2.iloc[:,0])
     plot1.set_xticklabels(plot1.get_xticklabels(), rotation=30, fontsize=6)
     plot1.figure.savefig("./static/q18.jpg")
     plt.close()
Ejemplo n.º 20
0
def draw_bar(savename):
    import matplotlib.pyplot as plt
    size = 3
    x = np.arange(size)
    a = [34.89, 33.87, 72.37]
    b = [551.72, 552.87, 698.34]
    xl = ['training time \n on MNIST', 'training time on \n Fashion-MNIST', 'training time \n on CIFAR-10']
    total_width, n = 0.54, 2
    width = total_width / n
    x = x - (total_width - width)
    plt.figure()
    plt.bar(x, a, width=width)
    plt.bar(x + width, b, width=width)
    plt.ylabel('time(s)')
    plt.xticks(x, xl,)
    plt.legend()
    plt.savefig('../eps/' + savename)
    plt.show()
Ejemplo n.º 21
0
def print_feat_imp_figure(filename, feat_imp, featset):
    assert isinstance(filename, str), 'filename must be a string'
    filename = os.path.splitext(filename)[0] + '.png'

    matplotlib = _try_import_matplotlib()
    if matplotlib is None:
        return
    else:
        from matplotlib import pyplot as plt

    fig = plt.figure(figsize=(7, 7))
    n = len(feat_imp)
    plt.bar(range(n), feat_imp, align='center', tick_label=featset)
    plt.xticks(rotation='vertical')
    plt.ylabel('feat. importance')
    fig.savefig(filename, format='png', bbox_inches='tight')
    plt.close()
    plt.rcParams.update(plt.rcParamsDefault)
    LOGGER.info(f'Feat. importance plot saved to {filename}')
Ejemplo n.º 22
0
def drawTKY():
    """
    Function plots 10 most popular Venue Categories in Tokyo.
    """
    #Import dataset of New York from root directory
    TKY = pd.read_csv('dataset_TSMC2014_TKY.csv')

    TKY=TKY[["venueCategory","venueCategoryId"]]
    grouped2=TKY.groupby(["venueCategory"]).count()
    grouped2=grouped2.sort_values('venueCategoryId')
    grouped2=grouped2[237:247]
    
    #Plot bars of most popular venue categories
    plt.figure(figsize=(16,6))
    plt.style.use('fivethirtyeight')
    plt.bar(grouped2.index,grouped2["venueCategoryId"])
    plt.title("10 Most Popular Venue Categories \n Tokyo: 2012-2013",fontsize=14,color='black')
    plt.ylabel("Check-ins per Venue Category",fontsize=14)
    plt.show()
Ejemplo n.º 23
0
def drawNYC():
    """
    Function plots 10 most popular Venue Categories in New York.
    """
    #Import dataset of New York from root directory
    NYC = pd.read_csv('dataset_TSMC2014_NYC.csv')

    NYC=NYC[["venueCategory","venueCategoryId"]]
    grouped=NYC.groupby(["venueCategory"]).count()
    grouped=grouped.sort_values('venueCategoryId')
    grouped=grouped[241:251]
    
    #Plot bars of most popular venue categories
    plt.figure(figsize=(16,6))
    plt.style.use('fivethirtyeight')
    plt.bar(grouped.index,grouped["venueCategoryId"])
    plt.title("10 Most Popular Venue Categories \n New York: 2012-2013",fontsize=14,color='black')
    plt.ylabel("Check-ins per Venue Category",fontsize=14)
    plt.show()
Ejemplo n.º 24
0
def visual_importnance(X, forest):
    importances = forest.feature_importances_
    std = np.std([tree.feature_importances_ for tree in forest.estimators_],
                 axis=0)
    indices = np.argsort(importances)[::-1]

    # Print the feature ranking
    print("Feature ranking:")

    for f in range(X.shape[1]):
        print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))

    # Plot the feature importances of the forest
    plt.figure()
    plt.title("Feature importances")
    plt.bar(range(X.shape[1]), importances[indices],
            color="r", yerr=std[indices], align="center")
    plt.xticks(range(X.shape[1]), indices)
    plt.xlim([-1, X.shape[1]])
    plt.show()
Ejemplo n.º 25
0
def sunSat_plot(saturday_mean, sunday_mean, df):
    plt.figure()

    # declaring values to be plotted
    xs = [1, 2]
    ys = [saturday_mean, sunday_mean]

    # setting range
    xrng = np.arange(len(xs))
    yrng = np.arange(0, max(ys)+50, 50)

    # making bar chart and alligning items properly
    plt.bar(xrng, ys, 0.45, align="center") 

    # labeling
    plt.xticks(xrng, ["Saturday", "Sunday"])
    plt.yticks(yrng)

    plt.grid(True)
    plt.show()
Ejemplo n.º 26
0
def make_hists(self, X, y):
    colors = [
        'red', 'tan', 'lime', 'orange', 'black', 'yellow', 'green', 'pink',
        'red', 'brown', 'grey', 'purple', 'navy'
    ]
    features = (set(self.train_df.keys()) -
                {'Vote'}) - set(categorical_features)
    for feature in features:
        df = pd.DataFrame({
            "x": X[feature].values,
            "class": y.values.flatten()
        })

        _, edges = np.histogram(df["x"], bins=15)
        histdata = []
        labels = []
        for n, group in df.groupby("class"):
            histdata.append(np.histogram(group["x"], bins=edges)[0])
            labels.append(n)

        hist = np.array(histdata)
        histcum = np.cumsum(hist, axis=0)

        plt.bar(edges[:-1],
                hist[0, :],
                width=np.diff(edges)[0],
                label=labels[0],
                align="edge")

        for i in range(1, len(hist)):
            plt.bar(edges[:-1],
                    hist[i, :],
                    width=np.diff(edges)[0],
                    bottom=histcum[i - 1, :],
                    color=colors[i],
                    label=labels[i],
                    align="edge")

        plt.legend(title="class")
        plt.savefig('hists_label/' + feature + '.jpeg')
Ejemplo n.º 27
0
    def scree_plot(self):
        total = sum(self.eig_vals)
        explained_var = [(i / total) * 100
                         for i in sorted(self.eig_vals, reverse=True)]
        cum_var = np.cumsum(explained_var)

        with plt.style.context('seaborn-darkgrid'):

            plt.bar(range(len(explained_var)),
                    explained_var,
                    align='center',
                    label='individual explained variance')
            plt.step(range(len(explained_var)),
                     cum_var,
                     where='mid',
                     label='cumulative explained variance',
                     color="red")
            plt.ylabel("Cumulative variance")
            plt.xlabel("Principal components")
            plt.tight_layout()
            plt.legend(loc='best')
            plt.show()
Ejemplo n.º 28
0
    def plotAmod(self):
        # Set up state space (FSM1,2,3, Loc and Vel)
        x_val = [1, 2, 3, 4, 5]
        y_val = [
            self.FSM1_elapsed, self.FSM2_elapsed, self.FSM3_elapsed,
            self.Loc_elapsed, self.Vel_elapsed
        ]
        # Compare separate states (MECE) and wheels active/non-active
        y_label = [
            'Intersection Active', 'State Estimator Active',
            'Indefinite Navigation Active', 'Localization Active',
            'Wheels Active'
        ]

        # Plot values upon shutdown
        plt.bar(x_val,
                y_val,
                tick_label=y_label,
                width=0.5,
                color=['blue', 'green'])
        plt.xlabel('Duckiebot states')
        plt.ylabel('Duration (s)')
        plt.title('Time spent by %s per state' % self.veh_name)
Ejemplo n.º 29
0
def plot_value_array(i, predictions_array, true_label, number_of_classes=3):
    predictions_array, true_label = predictions_array, true_label[i]
    plt.style.use(['classic'])
    plt.grid(False)
    plt.xticks(range(number_of_classes))
    plt.yticks([])
    thisplot = plt.bar(range(number_of_classes), 1, color="#FFFFFF")
    plt.ylim([0, 1])
    predicted_label = np.argmax(predictions_array)
    #print(true_label[0])
    #print(predicted_label)

    thisplot[predicted_label].set_color('red')
    thisplot[true_label].set_color('blue')
Ejemplo n.º 30
0
def make_bar_graph(data=housing):
    '''

    :param data:
    :return:
    '''
    data = data
    mp.figure('Bar', facecolor='lightgray')
    mp.title('Bar', fontsize=20)
    gs = mg.GridSpec(3, 4)
    i, j = 0, 0
    for column in housing.columns:
        # 创建子图
        mp.subplot(gs[i, j])
        # 在图形内部添加文字,设置位置,内容,对齐方式,字号,颜色,透明度
        mp.text(0.5, 0.5, str(i) + '+' +  str(j), ha='center', va='center', size=35, color='red', alpha=0.5)
        # 删除边界刻度
        mp.xticks(())
        mp.yticks(())
        # 绘制柱状图
        single_data = housing[column]
        min_data, max_data = min(single_data), max(single_data)
        mp.xlim(min(min_data, max_data))
        step = (max_data - min_data) / 10
        for x in range(min_data, max_data, step):
            sum_num = sum(x <= single_data <= x + step)
            y.append(sum_num)
        x = np.range(len(y))
        mp.bar(x, y, 0.4, color='dodgerblue', label=column, alpha=0.75)
        # 调整子图位置
        j += 1
        j = j % 4
        i = i + j // 4
            
    # 改变布局形式,改为紧凑布局
    mp.tight_layout()
    pass
Ejemplo n.º 31
0
def musicCategory_plt(randb_ser, rap_ser, classical_ser, pop_ser, indie_ser,df):
    plt.figure()

    # define x and y values
    xs = [1, 2, 3, 4, 5]
    ys = [len(randb_ser), len(rap_ser), len(classical_ser), len(pop_ser), len(indie_ser)]

    # setting range
    xrng = np.arange(len(xs))
    yrng = np.arange(0, max(ys)+60, 50)

    # labeling data
    plt.xlabel('Music Type')
    plt.ylabel('Days listened')

    # define bar chart and spacing
    plt.bar(xrng, ys, 0.45, align="center") 

    # more labeling
    plt.xticks(xrng, ["randb", "rap", "classical", "pop", "indie"])
    plt.yticks(yrng)

    plt.grid(True)
    plt.show()
Ejemplo n.º 32
0
def plot_response_time_variousT():
    dataframe = pd.DataFrame()
    index = []

    for k in capacity_change_time:
        for mode in modes:
            index.append(f"{mode},t={k}s")

    dataframe['index'] = index

    for mode in modes:

        index = []
        meanResponseTime = []

        for i in capacity_change_time:
            df = scalar_df_parse(
                f"C:\\Users\\Leonardo Poggiani\\Documents\\GitHub\\PECSNproject\\csv\\pool_classico_varia_T\\{mode}{i}.csv"
            )
            response = df[df.name == "responseTime"]
            meanResponseTime.append(response.value.mean())
            index.append(i)

        plt.bar(index, meanResponseTime)
        plt.title(f"{mode}")
        plt.rcParams["figure.figsize"] = (12, 10)
        plt.xticks(rotation=25)
        plt.show()

    plt.xlabel("Value of t")
    plt.ylabel("Response time")
    plt.title("Comparison of various values of t")
    plt.legend(loc='best')
    plt.savefig(
        "C:\\Users\\Leonardo Poggiani\\Documents\\GitHub\\PECSNproject\\analysis\\variandoT\\responseTimeAlVariareDiT2.png"
    )
fig = plt.pyplot.gcf()
fig.set_size_inches(16, 10)

count_by_occupation = user_fields.map(lambda fields: (fields[3], 1)).reduceByKey(lambda x, y: x + y).collect()
x_axis1 = np.array([c[0] for c in count_by_occupation])
y_axis1 = np.array([c[1] for c in count_by_occupation])

x_axis = x_axis1[np.argsort(y_axis1)]
y_axis = y_axis1[np.argsort(y_axis1)]

pos = np.arange(len(x_axis))
width = 1.0
ax = plt.axes()
ax.set_xticks(pos + (width / 2))
ax.set_xticklabels(x_axis)
plt.bar(pos, y_axis, width, color='lightblue')
plt.xticks(rotation=30)
fig = plt.pyplot.gcf()
fig.set_size_inches(16, 10)

count_by_occupation2 = user_fields.map(lambda fields: fields[3]).countByValue()
print "Map-reduce approach:"
print dict(count_by_occupation2)
print ""
print "countByValue approach:"
print dict(count_by_occupation)

'''
    explore movie data
'''
Ejemplo n.º 34
0
import matplotlib as plt
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import datetime    


Data = pd.read_csv("wikipedia_by_month.txt", sep='\t', header = None, names=['Date', 'Count'])

fig = plt.figure()
ind = np.arange(len(Data['Date']))
plt.bar(ind, Data['Count'])
fig.autofmt_xdate()
plt.xticks(ind, Data['Date'], rotation = 'vertical')
plt.savefig("figure.pdf")