예제 #1
0
def scatter_plot(P, L, pcIdx1, pcIdx2, letterList, rev):
    fig = plt.figure()
    # following the convention in lecture note ScatterPlot.html
    colors = ["r", "lime", "b", "y", "c", "m", "k", "tan", "pink", "darkred"]
    for i, letter in enumerate(letterList):
        plt.scatter(P[L == letter, pcIdx2],
                    P[L == letter, pcIdx1],
                    s=0.1,
                    c=colors[i],
                    label=letter)
    plt.axes().set_aspect('equal')
    #plt.axes().set_aspect('equal', 'datalim')
    plt.xlabel("Principle Component {}".format(pcIdx2))
    plt.ylabel("Principle Component {}".format(pcIdx1))
    plt.axhline(0, color='grey')
    plt.axvline(0, color='grey')
    plt.ylim([-5000, 5000])
    plt.xlim([-5000, 5000])
    plt.legend()
    plt.gca().invert_yaxis()
    fig.set_size_inches(8, 8)
    fName = os.path.join(
        pDir, 'scatter_PC{}_PC{}_{}_{}.png'.format(pcIdx1, pcIdx2,
                                                   "".join(letterList), rev))
    savefig(fName, bbox_inches='tight')
    plt.show()
예제 #2
0
def evaluation(X, y, model, n_preds=10, random=True, show_graph=True):
    n_steps = X.shape[1]
    max_random_int = len(y) - n_steps
    y_true, y_pred, prediction_accuracy, slices = [], [], [], []

    for i in range(n_preds):

        if random == True:
            position = np.random.randint(0, max_random_int)
        else:
            position = i

        y_hat = model.predict(X[position:position + 1])[0][0]
        y_pred.append(y_hat)
        y_true.append(y[position])
        y_current = y[position]

        # If we predit return, c = 0, else c = previous sequence position
        if y.min() < 0:
            c = 0
        else:
            c = y[position - 1]

        if ((y_hat > c) & (y_current > c)) or ((y_hat < c) & (y_current < c)):
            acc = 1
        else:
            acc = 0

        prediction_accuracy.append(acc)
        slices.append((list(y[position - n_steps:position + 1]),
                       list(y[position - n_steps:position]) + [y_hat], acc))

    if show_graph == True:
        plt.rcParams['figure.dpi'] = 227
        plt.style.use('seaborn-whitegrid')
        plt.figure(figsize=(16, 6))
        plt.bar(range(n_preds),
                y_true[:],
                width=.7,
                alpha=.6,
                color="#4ac2fb",
                label="True")
        plt.bar(range(n_preds),
                y_pred[:],
                width=.7,
                alpha=.6,
                color="#ff4e97",
                label="Predicted")
        plt.axhline(0, color="#333333", lw=.8)
        plt.legend(loc=1)
        plt.title('Daily Return Prediction', fontSize=15)
        plt.show()

    print('MSE:', mean_squared_error(y_true, y_pred))
    print('Accuracy: {}%'.format(
        round((sum(prediction_accuracy) / len(prediction_accuracy)) * 100), 2))
    return slices, np.array(y_true), np.array(y_pred)
예제 #3
0
def plot_binned_residuals(bin_df):
    '''
    Plotted binned residual averages and confidence intervals.
    
    ins
    --
    bin_df ie from bin_residuals(resid, var, bins)
    outs
    --
    pretty plots
    '''
    import matplotlib as plt

    plt.plot(bin_df['var'], bin_df['resid'], '.')
    plt.plot(bin_df['var'], bin_df['lower_ci'], '-r')
    plt.plot(bin_df['var'], bin_df['upper_ci'], '-r')
    plt.axhline(0, color = 'gray', lw = .5)
예제 #4
0
def plot_binned_residuals(bin_df):
    '''
    Plots the binned residual averages and confidence intervals of a binned dataframe.

    Parameters
    ----------
    bin_df : DataFrame
       the binned dataframe from bin_residuals(residuals, feature, bin_count).
    
    Returns
    -------
    matplotlib.figure :
        Plot of data frame residuals and confidence intervals.
    '''
    plt.plot(bin_df['var'], bin_df['resid'], '.')
    plt.plot(bin_df['var'], bin_df['lower_ci'], '-r')
    plt.plot(bin_df['var'], bin_df['upper_ci'], '-r')
    plt.axhline(0, color='gray', lw=0.5)
    return plt
예제 #5
0
def plot_binned_residuals(bin_df):
    '''
    Plots the binned residual averages and confidence intervals of a binned dataframe.

    Parameters
    ----------
    bin_df : DataFrame
       the binned dataframe from bin_residuals(residuals, feature, bin_count).
    
    Returns
    -------
    matplotlib.figure :
        Plot of data frame residuals and confidence intervals.
    '''
    plt.plot(bin_df['var'], bin_df['resid'], '.')
    plt.plot(bin_df['var'], bin_df['lower_ci'], '-r')
    plt.plot(bin_df['var'], bin_df['upper_ci'], '-r')
    plt.axhline(0, color='gray', lw=0.5)
    return plt
예제 #6
0
    def display(self, data, candidates, fname, display):
        
        finallist=[]
        for c in candidates:
            finallist.append(c[0])
        #print finallist
        part1 = finallist[:len(finallist)/2]
        part2 = finallist[len(finallist)/2:]
        
        meandiff=int(np.sqrt(np.power(np.mean(part2),2)-np.power(np.mean(part1),2)))
        rangeA = max(part1)-min(part1)
        rangeB = max(part2)-min(part2)
        span = int((rangeA+rangeB)/2)
        dspan = int(meandiff/span)
        theta = float(meandiff/(rangeA+rangeB))
        oneortwo=""
        if dspan >3 and meandiff > 20 or meandiff>36:
            oneortwo = "Two distributions \n\n MD: %d \n Span: %d \n Dspan: %d \n theta: %d" % (meandiff, span, dspan, theta) 
        else:
            oneortwo = "One distribution \n\n MD: %d \n Span: %d \n Dspan: %d \n theta: %d" % (meandiff, span, dspan, theta)

        cans = np.array(candidates)
        plt.plot(cans[:,0],cans[:,1],'ro')
        plt.axhline(max(cans[:,1])/4, color='r')
        plt.axhline(max(cans[:,1]/2), color='r')
        plt.axhline(int(max(cans[:,1]))*0.75, color='r')
        red_patch = mpatches.Patch(color='red', label='75%, 50% and 25% \nof maximum frequency')
        plt.legend(handles=[red_patch])
        plt.ylabel('Frequency of occurence')
        plt.xlabel('separate items')
        plt.title('Frequency distribution estimation graph: %s' %(fname))
        plt.text(max(data)*1.1, max(cans[:,1])*0.62, oneortwo, fontsize = 11, color = 'r')
        plt.hist(data,range(int(min(data)),int(max(data)),1))
        ofile = fname[0:-3]+"png"
        print ("Writing outfile: %s") % (ofile)
        plt.savefig(ofile, bbox_inches='tight')
        if display == True: 
            plt.show()
        return;
예제 #7
0
plt.tight_layout()
plt.show()
ts_log_decompose = residual
ts_log_decompose.dropna(inplace=True)
res4 = check_stationarity(ts_log_decompose)
print(res4)

#ACF and PACF plots:
from statsmodels.tsa.stattools import acf, pacf

lag_acf = acf(ts_log_diff, nlags=20)
lag_pacf = pacf(ts_log_diff, nlags=20, method='ols')
#Plot ACF:
plt.subplot(121)
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
plt.title('Autocorrelation Function')
#Plot PACF:
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
plt.title('Partial Autocorrelation Function')
plt.tight_layout()
plt.show()

# ARIMA model
model = ARIMA(dfl, order=(0, 1, 0))
예제 #8
0
# In[45]:

muList = np.array([P[T == x].mean(axis=0) for x in digits])

# In[46]:

print muList.shape

# In[47]:

fig = plt.figure()
plt.plot(muList.T)
plt.title("Mean of Principle Component")
plt.xlabel("Index of Principle Component")
plt.axhline(0, color='grey')
plt.legend(letters, loc='upper right')
fig.set_size_inches(8, 8)
fName = os.path.join(pDir, 'mean_of_pc_n{}_{}.png'.format(n, rev))
savefig(fName, bbox_inches='tight')
plt.show()

# #### Covariance matrix of the principle component of each letter

# In[48]:

covList = np.array([np.cov(P[T == x], rowvar=False) for x in digits])

# In[49]:

print covList.shape
# Visualize performance predictions
plt.subplot(2, 3, 4)
teams_pred['profits_B365'].plot(color='red',
                                kind='bar',
                                label='B365',
                                alpha=0.5)
teams_pred['profits_DNN'].plot(color='blue',
                               kind='bar',
                               label='DNN',
                               alpha=0.5)
teams_pred['profits_DNN_highConf'].plot(color='green',
                                        kind='bar',
                                        label='DNN HC',
                                        alpha=0.5)
plt.axhline(0, color='k')
plt.title('Predicted profits')
plt.xlabel('Match ID')
plt.ylabel('€')
plt.legend()

ax = plt.subplot(2, 3, 5)
ax.plot(np.cumsum(teams_pred['profits_B365'].values.tolist()),
        'r',
        label='Bet365')
ax.plot(np.cumsum(teams_pred['profits_DNN'].values.tolist()), 'b', label='DNN')
ax.plot(np.cumsum(teams_pred['profits_DNN_highConf'].values.tolist()),
        'g',
        label='DNN HC')
plt.axhline(0, color='k')
plt.title('Predicted cumulated profits')
예제 #10
0
                                 n_jobs=20)
        out.append(scores.mean())

    return out


#run experiment

classif = RandomForestClassifier(random_state=42,
                                 n_jobs=20,
                                 n_estimators=100,
                                 max_depth=4)

numberofrepetitions = 1000
out = []
for i in range(2, 15):
    out.append(repeatCV(numberofrepetitions, i))
    print(i)

import matplotlib.pyplot as plt
fig = plt.figure(1)
ax = fig.add_subplot(111)
bp = ax.boxplot(out)
ax.set_xticklabels([x for x in range(2, 15)])
plt.xlabel('Number of Folds')
plt.ylabel('Accuracy')
plt.ylim(0.55, 0.85)
plt.axhline(y=0.7285714285714285, color="r")
plt.show()
plt.savefig('MOVIES_VAR_K_2.pdf')
예제 #11
0
solution, no_iterations = bisection(f, a, b, eps=1.0e-6)

print("Number of function calls: %d" % (1 + 2 * no_iterations))
print("A solution is: %f" % (solution))

# In[334]:

import scipy.linalg as la


def f(x):
    return np.log(x) / 1 + x


x = np.linspace(-1, 1, 100)
plt.axhline(0, c='red')
plt.plot(x, f(x))

# In[157]:

from sympy import symbols, hessian, Function, N

x, y = symbols('x y')
f = symbols('f', cls=Function)

f = 4 * x * y + (x + y * y)**2

H = hessian(f, [x, y]).subs([(x, 1), (y, 1)])
H, N(H.condition_number())

# In[676]:
예제 #12
0
T = 200
w > 200

# =======================================

print(w[w > 200])
print(w[w < -T])

# =======================================

len(w[w < -T]) + len(w[w > T])

# =======================================

plt.plot(w)
plt.axhline(y=T, color="red", linestyle="-")
plt.axhline(y=-T, color="red", linestyle="-")
plt.show()

# =======================================

x = np.where(w > T)
y = w[w > T]
plt.plot(w)
plt.scatter(x, y, color="purple")

# =======================================

max_index_tuple = argrelextrema(w, np.greater)
xx = max_index_tuple[0]
yy = w[xx]
예제 #13
0
ax = fig.add_subplot(111)

#...Plots V over sig vs time: WITH GAS
plt.plot(time1, VoverS1, linewidth=2.5, color='b')
plt.plot(time2, VoverS2, linewidth=2.5, color='r')
plt.plot(time3, VoverS3, linewidth=2.5, color='lime')
plt.plot(time4, VoverS4, linewidth=2.5, color='orange')

#...Plots V over sig vs time: WITHOUT GAS
plt.plot(time1a, VoverS1a, linewidth=2.5, color='b', linestyle='--')
plt.plot(time2a, VoverS2a, linewidth=2.5, color='r', linestyle='--')
plt.plot(time3a, VoverS3a, linewidth=2.5, color='lime', linestyle='--')
plt.plot(time4a, VoverS4a, linewidth=2.5, color='orange', linestyle='--')

#...Other Dwarfs
plt.axhline(y=1.7, xmin=0.0, xmax=0.3, color='lightblue')  # Aquarius
plt.axhline(y=1.99, xmin=0.0, xmax=0.3, color='lightblue')  # LeoA
plt.axhline(y=1.43, xmin=0.0, xmax=0.3, color='lightblue')  # Pegasus
plt.axhline(y=1.01, xmin=0.0, xmax=0.3, color='lightblue')  # WLM
plt.text(0.35, 1.65, r'Aquarius ', size=14)  #fontdict=font)
plt.text(0.35, 1.95, r'LeoA ', size=14)
plt.text(0.35, 1.38, r'Pegaus ', size=14)
plt.text(0.35, 0.95, r'WLM ', size=14)

#...Set up axis labels:
plt.yticks(np.arange(0, 2, 0.2))
plt.xlabel(r'$time\,[Gyr]$', fontsize=20)
plt.ylabel(r'$v_{rot}/\sigma_{*}$', fontsize=20)
plt.ylim([0, 2])
plt.xlim([0, 6])
예제 #14
0
                   np.random.normal(43000,100000,3650), 
                   np.random.normal(43500,140000,3650), 
                   np.random.normal(48000,70000,3650)], 
                  index=[1992,1993,1994,1995])

from scipy import stats
year_avg = df.mean(axis = 1)
year_std = df.std(axis = 1)
#maybe standard error
yerr = year_std / np.sqrt(df.shape[1]) * stats.t.ppf(1-0.05/2, df.shape[1]-1)
import matplotlib.pyplot as plt
plt.figure()
plt.show()
bars = plt.bar(range(df.shape[0]), year_avg, yerr= yerr, alpha = 0.6, color = 'rgby')
threshold=42000
plt.axhline(y = threshold, color = 'grey', alpha = 1)

plt.xticks(range(df.shape[0]), ['1992', '1993', '1994', '1995'], alpha = 0.8)
plt.title('Ferreira et al, 2014')



#project
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as ss
import seaborn as sns
import os
%matplotlib notebook
plt.style.use('seaborn-colorblind')