예제 #1
0
def plot_graphs(history, string):
    plt.plot(history.history[string])
    plt.plot(history.history['val_' + string])
    plt.xlabel("Epochs")
    plt.ylabel(string)
    plt.legend([string, 'val_' + string])
    plt.show()
def draw_bar(grades):
    xticks = ['A', 'B', 'C', 'D', 'E']
    gradeGroup = {}
    #对每一类成绩进行频数统计
    for grade in grades:
        gradeGroup[grade] = gradeGroup.get(grade, 0) + 1
    #创建柱状图
    #第一个参数为柱的横坐标
    #第二个参数为柱的高度
    #参数align为柱的对齐方式,以第一个参数为参考标准
    plt.bar(range(5), [gradeGroup.get(xtick, 0) for xtick in xticks], align='center')

    #设置柱的文字说明
    #第一个参数为文字说明的横坐标
    #第二个参数为文字说明的内容
    plt.xticks(range(5), xticks)

    #设置横坐标的文字说明
    plt.xlabel('Grade')
    #设置纵坐标的文字说明
    plt.ylabel('Frequency')
    #设置标题
    plt.title('Grades Of Male Students')
    #绘图
    plt.show()
예제 #3
0
def predict_prices(dates, prices, x):
    dates = np.reshape(dates, (len(dates), 1))

    svr_len = SVR(kernel='linear', C=1e3)
    svr_poly = SVR(kernel='poly', C=1e3, degree=2)
    svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)

    svr_lin.fit(dates, prices)
    svr_poly.fit(dates, prices)
    svr_rbf.fit(dates, prices)

    plt.scatter(dates, prices, color='black', label='data')
    plt.plot(dates, svr_rbf.predict(dates), color='red', label='RBF model')
    plt.plot(dates,
             svr_lin.predict(dates),
             color='green',
             label='Linear model')
    plt.plot(dates,
             svr_poly.predict(dates),
             color='blue',
             label='Polynomial model')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.title('Sipport Vector Regression')
    plt.legend()
    plt.show()

    return svr_rbf.predict(x)[0], svr_lin.predict(x)[0], svr_poly.predict(x)[0]
예제 #4
0
def show_train_history(train_history, train, validation):
    plt.plot(train_history, history[train])
    plt.plot(train_history, history[validation])
    plt.title('Train History')
    plt.ylabel(train)
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()
def draw_scatter(heights, weights):
    #创建散点图
    #第一个参数为点的横坐标
    #第二个参数为点的纵坐标
    plt.scatter(heights, weights)
    plt.xlabel('Heights')
    plt.ylabel('Weights')
    plt.title('Heights & Weights Of Male Students')
    plt.show()
def draw_hist(heights):
    #创建直方图
    #第一个参数为待绘制的定量数据,不同于定性数据,这里并没有事先进行频数统计
    #第二个参数为划分的区间个数
    plt.hist(heights, 100)
    plt.xlabel('Heights')
    plt.ylabel('Frequency')
    plt.title('Heights Of Male Students')
    plt.show()
def draw_cumulative_hist(heights):
    #创建累积曲线
    #第一个参数为待绘制的定量数据
    #第二个参数为划分的区间个数
    #normed参数为是否无量纲化
    #histtype参数为'step',绘制阶梯状的曲线
    #cumulative参数为是否累积
    plt.hist(heights, 20, normed=True, histtype='step', cumulative=True)
    plt.xlabel('Heights')
    plt.ylabel('Frequency')
    plt.title('Heights Of Male Students')
    plt.show()
예제 #8
0
def plot_regression_line(x, y, b):
    # plotting the actual points as scatter plot
    plt.scatter(x, y, color="m", marker="o", s=30)

    # predict response vector
    y_pred = b[0] + b[1] * x

    # plotting the regression line
    plt.plot(x, y_pred, color="g")

    # putting labels
    plt.xlabel('x')
    plt.ylabel('y')

    # function to show plot
    plt.show()
예제 #9
0
def generate_plot(platforms, output_file):
    """ Generates a bar chart out of the given platforms and writes the
	output into the specified file as PNG image.
	"""
    # First off we need to convert the platforms in a format that can be
    # attached to the 2 axis of our bar chart. "labels" will become the
    # x-axis and "values" the value of each label on the y-axis:
    labels = []
    values = []
    for platform in platforms:
        name = platform['name']
        adapted_price = platform['adjusted_price']
        price = platform['original_price']
        # skip prices higher than 2000 USD simply because it would make the
        # output unsuable.
        if price > 2000:
            continue

        # If the name of the platform is too long, replace it with the
        # abbreviation. list.insert(0,val) inserts val at the beginning of
        # the list.
        if len(name) > 15:
            name = platform['abbreviation']
        labels.insert(
            0, u"{0}\n$ {1}\n$ {2}".format(name, price,
                                           round(adjusted_price, 2)))
        values.insert(0, adapted_price)

        # Let's define the width of each bar and the size of the resulting graph.
        width = 0.3
        ind = np.arange(len(values))
        fig = plt.figure(figsize=(len(labels) * 1.8, 10))

        # Generate a subplot and put our values onto it.
        ax = fig.add_subplot(1, 1, 1)
        ax.bar(ind, values, width, align='center')

        # Format the x and Y axis labels. Also set the ticks on the x-axis slightly
        # farther apart and give them a slight tilting effect.
        plt.ylabel('Adjusted price')
        plt.xlabel('Year/ Console')
        ax.set_xticks(ind + 0.3)
        ax.set_xtickslabels(labels)
        fig.autofmt_xdate()
        plt.grid(True)
        plt.savefig(output_file, dpi=72)
        
        
unique_provinces = list(unique_provinces)
province_confirmed_cases = list(province_confirmed_cases)

for i in nan_indices:
    unique_provices.pop(i)
    province_confirmed_cases.pop(i)     
    
    
# Plot a bar graph to see the total confirmed cases across different countries

plt.figure(figsize=(32,32))
plt.barh(unique_countries, country_confirmed_cases)
plt.title('Number of Covid-19 Confirmed Cases in Countries')
plt.xlabel('Number of Covid Confirmed Caese')
plt.show()

# Plot a bar graph to see the total confirmed cases b/w mainland china and outside mainland china

china_confirmed = latest_confirmed[confirmed_cases['Country/Region']=='China'].sum()
outside_mainland_china_confirmed = np.sum(country_confirmed_cases)-china_confirmed
plt.figure(figsize=(16, 9))
plt.barh('Mainland China',china_confirmed)
plt.barh('Outside Mainland China',outside_mainland_china_confirmed)
plt.title('Number of Confirmed Coronavirus cases')
plt.show()

# Print the total cases in mainland china outside of it

print('Outside Mainland China{} cases:',format(outside_mainland_china_confirmed))
    weekRatings[day].append(d['stars'])
weekAverages = {}
for d in weekRatings:
    weekAverages[d]=sum(weekRatings[d]*1.0/len(weekRatings[d]))
weekAverages
x = list(weekAverages,keys())
Y=[weekAverages[x] for x in X]
import matplotlib.pylot as plt
plt.plot(X,Y)
plt.bar(X,Y)
# zoom in more to see the detail
plt.ylim(3.6, 3.8)
plt.bar(X, Y)

plt.ylim(3.6,3.8)
plt.xlabel("Weekday")
plt.ylabel("Rating")
plt.xticks([0,1,2,3,4,5,6],['S','M','T','W','T','F','S'])
plt.title("Rating as a function of weekday")
plt.bar(X,Y)

#L4 Live-coding: MatPlotLib
path = "datasets/yelp_data/review.json"
f = open(path,'r',encoding = 'utf8')
import json
import time
dataset = []
for i in range(50000):
    d = json.loads(f.readline())
    # d['data']
    d['timeStruct'] = time.strptime(d['data'],'%Y-%m-%d')
예제 #12
0
from matplotliv import pyplot as plt

# In[4]:

from matplotlib import pyplot as plt

# In[9]:

x = [1, 2, 3]
y = [1, 4, 9]
z = [10, 5, 0]
plt.plot(x, y)
plt.plot(x, z)
plt.title("test plot")
plt.xlabel("x")
plt.ylabel("y and z")
plt.legend(["this is y", "this is z"])
plt.show()

# In[10]:

sample_data = pd.read_csv('sample_data.csv')

# In[11]:

sample_data

# In[12]:

type(sample_data)
예제 #13
0
    return optimizer

def plot_accuracies(train_top1, train_top5, val_top1, val_top5, SWD):
	''' Plots the top-1/5 accuracy for each epoch in the training and validation sets '''

    plt.figure()

    epochs = range(len(train_top1))
    lw=1

    plt.plot(epochs, train_top1, color='darkred', lw=lw, linestyle='dashed', label='top-1 (train)')
    plt.plot(epochs, train_top5, color='red', lw=lw, label='top-5 (train)')
    plt.plot(epochs, val_top1, color='darkblue', lw=lw*2, linestyle='dashed', label='top-1 (test)')
    plt.plot(epochs, val_top5, color='blue', lw=lw*2, label='top-5 (test)')

    plt.xlabel('Epoch #', fontsize=20)
    plt.ylabel('Accuracy (%)')

    if SWD:
        filename = 'SWD-results.png'
    else:
        filename = 'SGD-results.png'

    plt.savefig(filename)

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
예제 #14
0
import numpy as np
import random as rd
import matplotlib.pylot as plt

#data
#data = pd.read_csv('data/clustering.csv')
url = 'hhttps://raw.githubusercontent.com/DUanalytics/pyAnalytics/master/data/clustering.csv'
data = pd.read_csv(url)
data.shape
data.head()
data.describe()
data.columns

#visualise
plt.scatter(data.ApplicantIncome, data.LoanAmount)
plt.xlabel('Income')
plt.ylabel('LoanAmt')
plt.show()

#standardize data : Scaling

#missing values
#https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html
data.dtypes
data.isnull().any()
data.isnull().any(axis=1)
data.index[data.isnull().any(axis=1)]
data.iloc[6]
data.isnull().sum().sum()  #75 missing values
data.isnull().sum(axis=0)  #columns missing
data.isnull().sum(axis=1)
              step=0.01))
plt.contourf(X1,
             X2,
             classifer.predict(np.array([X1.ravel(),
                                         X2.ravel()]).T).reshape(X1.shape),
             alpha=0.75,
             cmap=ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0],
                X_set[y_set == j, 1],
                c=ListedColormap(('red', 'green'))(i),
                label=j)
plt.title('Classifier (Training set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()

# Visualising the Test set results
from matplotlib.colors import ListedColormap
X_set, y_set = X_test, y_test
X1, X2 = np.meshgrid(
    np.arange(start=X_set[:, 0].min() - 1,
              stop=X_set[:, 0].max() + 1,
              step=0.01),
    np.arange(start=X_set[:, 1].min() - 1,
              stop=X_set[:, 1].max() + 1,
              step=0.01))
plt.contourf(X1,
예제 #16
0
reference_data = pd.DataFrame.as_matrix(data['Ref AC'])
Y_calib = reference_data[:xxx]
Y_valid = reference_data[xxx:]
 
# Get spectra
X_calib = pd.DataFrame.as_matrix(data.iloc[:422, 2:])
X_valid = pd.DataFrame.as_matrix(data.iloc[423:, 2:])
 
# Get wavelengths (They are in the first line which is considered a header from pandas)
wl = np.array(list(data)[2:])
    
# Plot spectra
plt.figure(figsize=(8,4.5))
with plt.style.context(('ggplot')):
    plt.plot(wl, X_calib.T)
    plt.xlabel('Wavelength (nm)')
    plt.ylabel('Absorbance')    
plt.show()

# Calculate derivatives
X2_calib = savgol_filter(X_calib, 17, polyorder = 2,deriv=2)
X2_valid = savgol_filter(X_valid, 17, polyorder = 2,deriv=2)
 
# Plot second derivative
plt.figure(figsize=(8,4.5))
with plt.style.context(('ggplot')):
    plt.plot(wl, X2_calib.T)
    plt.xlabel('Wavelength (nm)')
    plt.ylabel('D2 Absorbance')
plt.show()
import matplotlib.pylot as plt
import pandas as pd

#fetch best performing model
best_model = RF_gscv.best_estimator_
best_model2 = MLP_gscv.best_estimator_

#fit permutation importance on test data
perm = PermutationImportance(best_model).fit(test_img, test_lab)
perm2 = PermutationImportance(best_model2).fit(test_img, test_lab)

#show weights
wghts = eli5.format_as_dataframe(eli5.explain_weights(perm))
wghts2 = eli5.format_as_dataframe(eli5.explain_weights(perm2))

#write dataframes to csv
wghts.to_csv(
    'D:/studies/phd/WV3_Data_July2019/010039360030_01/L_Sabie_subset/rf_permImportance.csv',
    encoding='utf-8',
    index=False)
wghts2.to_csv(
    'D:/studies/phd/WV3_Data_July2019/010039360030_01/L_Sabie_subset/mlp_permImportance.csv',
    encoding='utf-8',
    index=False)

gLawn = mlp_map_prob[:, 3]
w = x_img_arr[:, -9]
plt.scatter(w, gLawn)
plt.xlabel('proximity_to_water')
plt.ylabel('gLawn_probability')
plt.show()
예제 #18
0
import matplotlib.pylot as plt
input_values = [1, 2, 3, 4, 5]
squares = [1, 4, 9, 16, 25]

plt.plot(input_values, squares, linewidth=5)
plt.title("sqare numders", fontsize=28)
plt.xlabel("value", fontsize=14)
plt.ylable("squares, of value", fontsize=14)
plt.tick_params(axis='both', lablesize=14)
plt.show()
예제 #19
0
파일: session11.py 프로젝트: mntri4/Udemy
# coding: utf-8
import matplotlib.pylot as plt
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(0, 10, 10)
y = np.sin(x)
plt.plot(x, y)
plot.show()
plt.show()
plt.plot(x, y)
plt.xlabel("Time")
plt.ylabel("Some function of time")
plt.title("sin")
plt.show()
x = np.linspace(0, 10, 100)
y = np.sin(x)
plt.plot(x, y)
plt.show()
pd
예제 #20
0
percent_popular = len(np_ratings[popular_apps]) / len(np_ratings) * 100
print("percent_popular")

unpopular_apps = np_ratings < 4
print("percent_unpopular", len(np_ratings[unpopular_apps]))

percent_unpopular = 100 - (np_ratings[unpopular_apps]) / len(np_ratings) * 100
print("percent_unpopular")

somewhat_popular = 100 - (percent_popular + percent_unpopular)
print("somewhat_popular")

# do a visualization with out new data
labels = "Sucks", "Meh", "Love it!"
sizes = [unpopular_apps, somewhat_popular, popular_apps]
colors = ['yellowgreen', 'lightgreen', 'lightskyblue']
explode = (0.1, 0.1, 0.15)

plt.pie(sizes, explode=explode, colors=color, autopct='%1.1%', shadow=True, startangle=140)

plt.axis('equal')
plt.legend(labels, loc=1)
plt.title("Do we love our apps?")
plt.xlabel("User Ratings - App Installs (10,000+ apps)")
plt.show()

# print ('processed', line_count, 'lines of data')
print(categories)
print('first row of data', installs [0])
print('last row of data', installs [-1])
예제 #21
0
max_val = 0
max_index = 0
for index, fft_val in enumerate(red_fft):
    if fft_val > max_val:
        max_val = fft_val
        max_index = index

heartrate = freqs[max_index] * 60        
print('Estimated Heartate: {} bpm'.format(heartrate))


# Plotting
if PLOT:
    plt.figure(figsize=(16,9))
    plt.plot(x, colors['red'], color='#fc4f30')
    plt.xlabel('Time [s]')
    plt.ylabel('Normalized Pixel Color')
    plt.title('Time-Series Red Channel Pixel Data')
    fig1 = plt.gcf()
    plt.show()
    if SAVE:
        plt.draw()
        fig1.savefig('./{}_time_series.png'.format(filename), dpi=200)
    
    # Plot the highpass data
    plt.figure(figsize=(16,9))
    plt.plot(x_filt, colors['red_filt'], color='#fc4f30')
    plt.xlabel('Time [s]')
    plt.ylabel('Normalized Pixel Color')
    plt.title('Filtered Red Channel Pixel Data')
    fig2 = plt.gcf()
import pylab as pl
import matplotlib.pylot as plt
x = [1, 2, 3, 4, 5, 6, 7, 8]
y = [9, 8, 8.25, 8, 7.5, 8, 8, 8.75]
pl.plot(x, y, 'D')
plt.title("Grafica de promedios semestral")
plt.xlabel("Semestres cursados")
plt.ylabel("Promedio")
pl.savefig('promedios.png')
plt.show()
'''

import json
from textblob import TextBlob
from wordcloud import WordCloud
import matplotlib.pylot as plt

# Get the JSON data
tweetFile = open("tweets.json", "r")
tweetData = json.load(tweetFile)
tweetFile.close()

polarity_values = []

for tweet in tweetData:
    tweets.append(tweet["text"])
giant_string = " ".join(tweets)    


    tb = TextBlob(tweet_text)
    print("{}: {}".format(tweet_text, tb.polarity))
    polarity_values.append(tb.polarity)

# bins = [-1, -0.5, 0, 0.5, 1]

plt.hist(polarity_values, bins)
plt.title("tweet polarity")
plt.ylabel("Count of tweets")
plt.xlabel("Polarity")
plt.show()
예제 #24
0
파일: simple.py 프로젝트: nanonite9/WarZone
# initialize time and x and y expenditure at initial time
t_0 = 0
init_data = np.array([14, 5])

# starting RK45 integration method
sys_1 = integrate.RK45(model, t_0, init_data, 1000, 0.001)

# storing initial data
sol_x = [sys_1.y[0]]
sol_y = [sys_1.y[1]]
time = [t_0]

for i in range(5000):
    sys_1.step()  # performing integration step
    sol_x.append(
        sys_1.y[0]
    )  # storing the results in our solution list, y is the attribute current state
    sol_y.append(sys_1.y[1])
    time.append(sys_1.t)

plt.figure(figsize=(20, 10))

# plotting results in a graph
plt.plot(time, sol_x, 'b--', label='Country A')
plt.plot(time, sol_y, 'r--', label='Country B')
plt.ylabel('Military Expenditure (billions USD)', fontsize=16)
plt.xlabel('Time (years)', fontsize=16)
plt.legend(loc='best', fontsize=22)
plt.title('Simple Arms Race: Aggressive vs. Passive', fontsize=28)
plt.show()
# Inner product of vectors
print(a.dot(b))
print(np.dot(a, b))

# Matrix / vector product; both produce the rank 1 array [29 67]
print(c.dot(d))
print(np.dot(c, d))

# Matrix / matrix product; both produce the rank 2 array
# [[19 22]
#  [43 50]]
print(a.dot(c))
print(np.dot(b, d))

# In[24]:

import numpy as np
from matplotlib import pylot as plt

x = np.arrange(1, 11)
y = 2 * x + 5

plt.title("Matplotlib demo")
plt.xlabel("x axis caption")
plt.ylabel("y axis caption")
plt.plot(x, y, "ob")
plt.show()

# In[ ]:
예제 #26
0
Distribution = []
for OutcomeIndex1 in range(0, NumberFlips + 1):
    Distribution.append(SumTrials.count(OutcomeIndex1) / (1.0 * NumberTrials))

print(repr(Distribution))
# Print the sum of the elements in Distribution
sumDistrib = 0
for item in Distribution:
	sumDistrib += item
print(repr(sumDistrib))

OutcomeIndex2 = range(0, NumberFlips + 1)
num_bins = len(OutcomeIndex2)
bar_width = 0.8
XticksIndex = [(outcome + (0.5 * bar_width)) for outcome in OutcomeIndex2]
opacity = 0.4

plt.bar(OutcomeIndex2, Distribution, bar_width, alpha=opacity, color='b')
plt.xlabel("Value")
plt.ylabel("Probability")
plt.xticks(XticksIndex, OutcomeIndex2)
plt.show()

"""
Describe what happens to the figure as you vary ParameterP from zero to one.
-As ParameterP increases from zero to one, the figure shifts from left to right.  

What is the most likely outcome for ParameterP = 0.7 and NumberFlips = 8?
-With ParameterP = 0.7 and NumberFlips = 8, the most likely outcome is 6 with 29.7% probability. 
"""
예제 #27
0
import pandas as pd
import matplotlib.pylot as plt
from sklearn.linear_model import LinearRegression
x = np.arange(0, 100)
y = np.arange(0, 100)
print(x)
print(y)
lr = LinearRegression()

x.ndim
y.ndim

x.shape
y.shape

x = x.reshape(-1, 1)
x.shape(-1, 1)

x.ndim

lr.fit(x, y)

plt.scatter(x, y, color='red')

plt.plot(x, lr.prdict(x), color='blue')
plt.title('Linear Regression Demo')
plt.xlabel('X')
plt.ylabel('y')

plt.show()
예제 #28
0
#Data Visualization Reference.
import numpy as np
import pandas as pd
import matplotlib.pylot as plt
%matplotlib inline #jupyter notebook only.  below line for everything else.
plt.show()
x = np.arange(0, 10)
y = x ** 2
plt.plot(x, y, 'red') #shows red line.
plt.plot(x, y, '*') #shows stars on graph.
plt.plot(x, y, 'r--') #shows red line with dashes.
plt.xlim(0, 4) #shows x-axis limits at 0 and 4.
plt.ylim(0, 10) #shows y-axis limits at 0 and 10.
plt.title("title goes here")
plt.xlabel('x label goes here')
plt.ylabel('y label goes here')
mat = np.arange(0, 100).reshape(10, 10) #makes array.
plt.imshow(mat, cmap = 'RdYlGn')
mat = np.random.randint(0, 1000, (10, 10))
plt.imshow(mat)
plt.colorbar()
df = pd.read_csv('salaries.csv')
df.plot(x = 'salary', y = 'age', kind = 'scatter') #kind could be 'line' or whatever else you need.

#SciKit-Learn Reference/Pre-Processing.
import numpy as np
from sklearn.preprocessing import MinMaxScaler
data = np.random.randint(0, 100, (10, 2))
scaler_model = MinMaxScaler()
type(scaler_model)
예제 #29
0
import matplotlib.pylot as plt

years = [
    1950, 1995, 1960, 1965, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005,
    2010, 2015
]

pops = [2.5, 2.7, 3, 3.3, 3.6, 4, 4.4, 4.8, 5.3, 5.7, 6.1, 6.5, 6.9, 7.3]
death = [1.2, 1.7, 1.8, 2.2, 2.5, 2.7, 2.9, 3, 3.1, 3.3, 3.5, 3.8, 4.0, 4.3]
'''
plt.plot(years, pops,'---', color=(255/255, 100/255, 100/255))
plt.plot(years, death, color=(.6, .6, .1))
'''
lines = plt.plot(years, pops, years, death)
plt.grid(True)

plt.setp(lines, color=(1, .4, .4), marker='o')

plt.ylabel("Population in Billions")
plt.xlabel("Population growth by Year")
plt.title("Population Growth")
plt.show()
예제 #30
0
#import scipy as sp
import matplotlib.pylot as plt

import pandas as pd
data=pd.read_csv("scratch3.csv")
data['bedrooms'].value_counts().plot(kind='bar')
plt.title('number of bedrooms')
plt.xlabel('bedrooms')
plt.ylabel('count')
plt.show()