Esempio n. 1
0
def draw_histo(df, col_name, x_start, bin_width, n_bins, title, output_dir,
               output_filename):
    """
    Create a histogram from a pandas dataframe column

    Args:
        df: a Pandas dataframe
        col_name: a string giving the column from the dataframe you'd like to plot
        x_start: the leftmost x-value in your plot
        bin_width: the desired width of your histogram bins
        n_bins: the desired number of histogram bins
        title: a string giving the title to be printed on your plot
        output_dir: the directory to write your output images to
        output_filename: the name you'd like to give the png output image
    """
    plt = df[col_name].plot.hist(
        bins=[bin_width * (x + 0.5) + x_start for x in np.arange(0, n_bins)],
        edgecolor='black',
        color='#88d498')
    plt.set_title(title)
    print(col_name + ' max: ' + str(df[col_name].max()) + ', min: ' +
          str(df[col_name].min()))
    plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 3))
    plt.get_figure().savefig(os.path.join(output_dir, output_filename))
    plt.get_figure().clf()
Esempio n. 2
0
 def getPlot(self, params):
     param = str(params['sort'])
     df = self.getData(params)
     x = df[[param]]
     y = df['week']
     plt = x.plot(y)
     fig = plt.get_figure()
     return fig
Esempio n. 3
0
def plotfigTimeSeriesThreetoOne(data):
    plt = data.plot(lw=2, title="wind predict before and after MOS ")
    plt.set_xlabel("time series")
    plt.set_ylabel("wind speed (m/s) ")

    fig = plt.get_figure()
    fig.savefig('./plot/' + PostObject + '/TimeSeriesThreetoOne.png')
    fig.clf()
Esempio n. 4
0
def sizeplot(dataf):
    dataf = dataf.copy()

    plt_data = dataf["ml"]
    plt_data = prep_sizebplot(plt_data)
    plt = sns.barplot(x='year', y='size', data=plt_data, palette="Blues_d")
    plt.set_title('Number of households in simulation')
    fig = plt.get_figure()
    return fig
Esempio n. 5
0
def countplot(dataf):
    dataf = dataf.copy()

    orig_df = dataf["standard"]
    orig_data = orig_df[orig_df['predicted'] == 0]

    plt_data = prep_countbplot(orig_data)
    plt = sns.barplot(x='year', y='relative', data=plt_data, palette="Blues_d")
    fig = plt.get_figure()
    return fig
Esempio n. 6
0
def plot_freqs(files, new_route="./data",
    old_route="./not_treated_data", freqs=[], times=[], errors=[]):
    for filename in files:
        sheets = pd.read_excel(old_route + "/" + filename,
            sheet_name=None)
        sheet_name, df = next(iter(sheets.items()))
        df = df.drop(df.columns[0], axis=1)
        print(df)
        df = df *30
        print(df)
        freq = df.to_numpy().sum() / len(df.columns)
        error = df.to_numpy().std()
        errors.append(error)
        freqs.append(freq)
        times.append(filename.split("_")[-1].split(".")[0])
    if sorted(times) == ["April", "August", "December", "February",
        "January", "July", "June", "March", "May", "November",
        "October", "September"]:
        months = ["January", "February", "March", "April", "May", 
            "June", "July", "August", "September", "October",
            "November", "December"]
        for i in range(len(times)):
            for j in range(len(months)):
                if times[i] == months[j]:
                    times[i] = j+1
        freqs = [x for y, x in sorted(zip(times,freqs))]
        errors = [x for y, x in sorted(zip(times,errors))]
        times = months
    else:  ## if we do not have to order by months, we order by names
        freqs = [x for y, x in sorted(zip(times,freqs))]
        errors = [x for y, x in sorted(zip(times,errors))]
        times = sorted(times)
    df = pd.DataFrame([times, freqs, errors],
        index=["Times", "Frequencies", "Deviations"],
        columns=range(len(times)))
    new_df = df.T
    maxy = roundup2(max(freqs) + max(errors),
        pos=len(str(ceil(max(freqs))))-1)
    miny = 0
    ticks = [miny, (miny + maxy)/3, 2*(miny + maxy)/3, maxy]
    plt = new_df.plot(kind="bar", x="Times", y="Frequencies",
        yerr="Deviations", yticks=ticks, legend=False, figsize=(10,10),
        capsize=5, fontsize=10)
    plt.tick_params(axis="x", labelsize=10, rotation=30)
    plt.set_xlabel("Months")
    plt.set_ylabel("Absolute Frequency")
    plt.set_title("Histogram of monthly absolute frequencies")
    scientific_formatter = FuncFormatter(scientific)
    plt.yaxis.set_major_formatter(scientific_formatter)
    fig = plt.get_figure()
    fig.savefig(new_route + "/Freqs_vs_Time.png")
Esempio n. 7
0
 def getPlot(self, params):
     ticke= str(params['ticke'])
     ticker3 = str(params['ticker3'])
     year = float(params['year'])
     df = pd.read_csv('provinces/vhi_id_%s.csv'%ticker3,index_col=False, header=1)
     if year!=0:
         w=df[df['year']==year]
     else:
         w=df
     x=w[[ticke]]
     y =w['week']
     plt = x.plot(y)
     fig = plt.get_figure()
     return fig
Esempio n. 8
0
def histogram():
    ten = pd.to_numeric(pivot_cost['2010']).dropna()
    eleven = pd.to_numeric(pivot_cost['2011']).dropna()
    twelve = pd.to_numeric(pivot_cost['2012']).dropna()
    thirteen = pd.to_numeric(pivot_cost['2013']).dropna()
    fourteen = pd.to_numeric(pivot_cost['2014']).dropna()

    plt = sns.distplot(fourteen)
    plt1 = sns.distplot(thirteen)
    plt2 = sns.distplot(twelve)
    plt3 = sns.distplot(eleven)
    plt4 = sns.distplot(ten)
    fig = plt.get_figure()
    fig.savefig("overlay.png")
Esempio n. 9
0
colors = [
    plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))
]
for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = [0, 0, 0, 1]

    class_member_mask = (labels == k)

    xy = X[class_member_mask & core_samples_mask]
    plt.plot(xy[:, 0],
             xy[:, 1],
             'o',
             markerfacecolor=tuple(col),
             markeredgecolor='k',
             markersize=14)

    xy = X[class_member_mask & ~core_samples_mask]
    plt.plot(xy[:, 0],
             xy[:, 1],
             'o',
             markerfacecolor=tuple(col),
             markeredgecolor='k',
             markersize=6)

plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()

plt.get_figure().savefig('dbscan.png', bbox_inches='tight')
Esempio n. 10
0
                        epochs=10,
                        verbose=1,
                        validation_data=(x_test, y_test))
model.summary()

#Create bar chart with train and val accuracies
train_accuracy = fit_info.history['accuracy']
test_accuracy = fit_info.history['val_accuracy']
df = pd.DataFrame(columns = ['train_accuracy'], index = range (1,epochs+1), data=train_accuracy)
df['test_accuracy'] = test_accuracy

plt = df.plot.bar(rot = 1, figsize = (12,10))
plt.grid(axis = 'y')
plt.set_ylim(0.8, 1)

fig = plt.get_figure()
fig.savefig("accuracy_plot.png")

print(df['test_accuracy'].max())

## Looping through different regularization factors and creating replicates to find the optimal parameter value (based on accuracy)
## New amount of units in layers: 300 & 500
from tensorflow.keras import regularizers
model = Sequential()
epochs = 40

regularization_factor = [0.000001,0.00025,0.00050,0.00075,0.0010]

max_accuracies = []

factor_count = 0
Esempio n. 11
0
def draw_plot(filename):
    ds = pd.read_csv(filename, index_col=['Date'], parse_dates=['Date'])
    ds.as_matrix()
    plt = ds.plot(figsize=(12, 6))
    plt.get_figure().savefig("files/" + str(os.path.splitext(os.path.basename(filename))[0]) + "_origin.png")
Esempio n. 12
0
def show_mappings(Z, path):
    plt = sns.scatterplot(Z[:, 0], Z[:, 1], hue=Z[:, 2])
    fig = plt.get_figure()
    fig.savefig(path)