コード例 #1
0
def traniner(epoo=5):
    # initialize number of epochs
    epochs = epoo

    for epo in range(epochs):
        # create loss placeholder
        iterm_loss = 0

        # iterate through the data
        for img, labels in traindownloader:
            img, labels = img.to(device), labels.to(device)

            # clear the gradients
            optimin.zero_grad()

            # compute the forward pass
            output = model(img)

            # compute loss
            loss = criterion(output, labels)

            # store the loss
            iterm_loss += loss.item()

            # compute backward pass
            loss.backward()

            # update weights
            optimin.step()

        else:
            print2(f"Epoch {epo+1}/{epochs}..>..>.>  " +
                   f"Training loss:{iterm_loss/len(traindownloader):.4f}")
コード例 #2
0
ファイル: group1.py プロジェクト: jocoder22/PythonDataScience
def strcbreak(data, breakpoint):
    import statsmodels.api as sm
    from scipy import stats as st
    
    mr = data
    before = mr.loc[:breakpoint]
    after = mr.loc[breakpoint:]
#     print2("#"*20, before.tail())

    kmr = np.ones([mr.shape[0]])
    kkb = np.ones([before.shape[0]])
    kka = np.ones([after.shape[0]])

    mr_intercept  = sm.add_constant(kmr)
    before_with_intercept = sm.add_constant(kkb)
    after_with_intercept  = sm.add_constant(kka)

#     # Fit OLS regressions to each tota; period
#     result = sm.OLS(mr, mr_intercept).fit()

#     # # Retrieve the sum-of-squared residuals
#     ssr_total = result.ssr


#     # Fit OLS regressions to each sub-period
#     r_b = sm.OLS(before, before_with_intercept).fit()
#     r_a = sm.OLS(after,  after_with_intercept).fit()

#     # # Retrieve the sum-of-squared residuals
#     ssr_before = r_b.ssr
#     ssr_after = r_a.ssr



  
    # Fit OLS regressions to each total period
    result = sm.RLM(mr, mr_intercept, M=sm.robust.norms.HuberT()).fit()
    # Retrieve the sum-of-squared residuals
    ssr_total = np.sqrt(np.power(result.resid, 2).sum())
    # Fit OLS regressions to each sub-period
    r_b = sm.RLM(before, before_with_intercept, M=sm.robust.norms.HuberT()).fit()
    r_a = sm.RLM(after,  after_with_intercept, M=sm.robust.norms.HuberT()).fit()
    # Get sum-of-squared residuals for both regressions
    ssr_before = np.sqrt(np.power(r_b.resid, 2).sum())
    ssr_after = np.sqrt(np.power(r_a.resid, 2).sum())
   


    # Compute and display the Chow test statistic
    d_f = 1
    df2 = 2*d_f
    numerator = ((ssr_total - (ssr_before + ssr_after)) / d_f)
    denominator = ((ssr_before + ssr_after) / (mr.shape[0]/2 - df2))
    print("Chow test statistic: ", numerator / denominator)

    f = st.f.ppf(q=1-0.01, dfn=d_f, dfd=(mr.shape[0]/2 - df2))
    print2(f"F Critical point: {f}")
コード例 #3
0
def return_var(prices, probb):
    r = [np.dot(np.array(i[0]), np.array(i[1])) for i in zip(prices, probb)]
    t = [
        np.sqrt(
            np.dot(np.array(i[2]),
                   np.subtract(np.array(i[0]), np.array(i[1]))**2))
        for i in zip(prices, r, probb)
    ]
    print2(r, t)
    return r, t
コード例 #4
0
def check__nulls(df):
    """
    Test and report number of NAs in each column of the input data frame
    :param df: pandas.DataFrame
    :return: None
    
    """
    for col in df.columns:
        _nans = np.sum(df[col].isnull())
        if _nans > 0:
            print(f'{_nans} NaNs in column {col}')

    print2(f'New shape of {get__name(df)}: {df.shape}')
コード例 #5
0
def getload_decade(start=1920, end=1929, extension='prn'):
    "specify the starting year of the decade eg. 1900, 2010, 2009"

    webaddress = f'https://www.nyse.com/publicdocs/nyse/data/Daily_Share_Volume_{start}-{end}.{extension}'

    try:
        link = requests.get(webaddress)

        print2(link.status_code)
        if link.status_code == 404:
            raise

        else:
            if extension == "prn":
                data = pd.read_csv(webaddress,
                                   sep='   ',
                                   parse_dates=['Date'],
                                   engine='python').iloc[2:, 0:2]
                print2(data.head(), data.columns)
                data.loc[:, "  Stock U.S Gov't"] = pd.to_numeric(
                    data.loc[:, "  Stock U.S Gov't"], errors='coerce')
                data.Date = pd.to_datetime(data.Date,
                                           format='%Y%m%d',
                                           errors="coerce")
                data.columns = ['Date', 'Volume']
                print2(f"Successfully downloaded {start}-{end}")
                return data

            else:
                data = pd.read_csv(webaddress)
                data.iloc[:,
                          0] = data.iloc[:,
                                         0].apply(lambda x: str(x).strip(' '))
                data = data.iloc[:, 0].str.split(' ', 1, expand=True)
                data.columns = ['Date', 'Volume']
                data.loc[:, "Volume"] = pd.to_numeric(data.loc[:, "Volume"],
                                                      errors='coerce')
                data.Date = pd.to_datetime(data.Date,
                                           format='%Y%m%d',
                                           errors="coerce")
                print2(f"Successfully downloaded {start}-{end}")
                return data

    except:
        print2("There was an issue with the download \n\
            You may need a different date range or file extension.\n\
            Check out https://www.nyse.com/data/transactions-statistics-data-library"
               )
コード例 #6
0
def load_data(start=1920, end=1929, extension="prn"):
    # get the path
    path = os.path.join(path33, "Data",
                        f"Daily_Share_Volume_{start}-{end}.{extension}")
    # path = os.path.join(path33, f"Daily_Share_Volume_{start}-{end}.{extension}")

    if extension == "prn":
        data = pd.read_csv(path,
                           sep='   ',
                           parse_dates=['Date'],
                           engine='python').iloc[2:, 0:2]
        print2(data.head(), data.columns)
        data.loc[:, "  Stock U.S Gov't"] = pd.to_numeric(
            data.loc[:, "  Stock U.S Gov't"], errors='coerce')
        data.Date = pd.to_datetime(data.Date, format='%Y%m%d', errors="coerce")
        data.columns = ['Date', 'Volume']
        print2(f"Successfully downloaded {start}-{end}")
        return data

    else:
        data = pd.read_csv(path)
        data.iloc[:, 0] = data.iloc[:, 0].apply(lambda x: str(x).strip(' '))
        data = data.iloc[:, 0].str.split(' ', 1, expand=True)
        data.columns = ['Date', 'Volume']
        data.loc[:, "Volume"] = pd.to_numeric(data.loc[:, "Volume"],
                                              errors='coerce')
        data.Date = pd.to_datetime(data.Date, format='%Y%m%d', errors="coerce")
        print2(f"Successfully downloaded {start}-{end}")
        return data
コード例 #7
0
        self.name = name
        self.id = id
        self.gender = gender

    def __eq__(self, other):
        return self.name == other.name and self.id == other.id\
            and  type(self) == type(other)
        # return self.name == other.name and self.id == other.id\
        #     and isinstance(other, Patients)


class Staff:
    def __init__(self, name, id, gender):
        self.name = name
        self.id = id
        self.gender = gender

    def __eq__(self, other):
        return self.name == other.name and self.id == other.id\
            and isinstance(other, Staff)


patient1 = Patients("Charles", 459234, "Male")
patient2 = Patients("Charles", 876323, "Male")
patient3 = Patients("Marylene", 459234, "Female")
patient4 = Patients("Charles", 459234, "Male")
patient5 = Staff("Charles", 459234, "Male")

print2(patient1 == patient2, patient3 == patient1, patient1 == patient4)
print2("$" * 20)
print2(patient1 == patient5, patient5 == patient1)
コード例 #8
0
from sklearn.cluster import KMeans

from printdescribe import print2

nn = np.array([-8.0, -3.0, 0.0, 6.5, 9.0, 45.5]).reshape(-1, 1)
cc = np.array([-1.5, -1.0, -0.5, 1.5, 2.0, 2.5]).reshape(-1, 1)
gm = GaussianMixture(n_components=2, covariance_type='full')
gmm = gm.fit(nn)

pred = gmm.fit_predict(nn)
logprob = gmm.score_samples(nn)
responsibilities = gmm.predict_proba(nn)
pdf = np.exp(logprob)
pdf_individual = responsibilities * pdf[:, np.newaxis]

print2(nn, logprob, responsibilities, pdf, pdf_individual)
print2(np.round(gmm.weights_, 2), np.round(gmm.means_, 2),
       np.round(gmm.covariances_, 2))

print2(np.round(gmm.precisions_, 2), np.round(gmm.precisions_cholesky_, 2))
print2(np.round(pred, 2))
print2(gmm.score(nn))

print("######################################################")
print("This is for  [-1.5,-1.0,-0.5,1.5,2.0,2.5] data")
gcc = gm.fit(cc)
pred = gcc.fit_predict(cc)
logprob = gcc.score_samples(cc)
responsibilities = gcc.predict_proba(cc)
pdf = np.exp(logprob)
pdf_individual = responsibilities * pdf[:, np.newaxis]
コード例 #9
0
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
from printdescribe import print2, describe2, changepath

# import excel sheets
path = r"D:\Wqu_FinEngr\Portfolio Theory and Asset Pricing\GroupWork"

with changepath(path):
    data = pd.read_excel("GWP_PTAP_Data_2010.10.08.xlsx",
                         skiprows=1,
                         nrows=13,
                         sheet_name='10 SPDRs and S&P 500',
                         index_col=0)

describe2(data)
print2(data)

df_return = data.pct_change().dropna()
print2(df_return)

# df_activeReturn = df_return.sub(df_return.iloc[:,-1], axis=0).drop(['SP_500'], axis=1)
df_activeReturn = df_return.sub(df_return['S&P 500'], axis=0).drop(['S&P 500'],
                                                                   axis=1)
print2(df_activeReturn)

tracking_error = df_activeReturn.std()
mate_ = np.sqrt((df_activeReturn**2).sum() / df_activeReturn.shape[0])
print2(tracking_error, mate_)

# for col in df_return.columns[:-1]:
#     plt.figure(figsize=[10, 8])
コード例 #10
0
# My two other ETFs are
# 1. Vanguard S$P500 ETF (VOO)
# 2. iShares Core S&P500 ETF (IVV)

etfs_tickers = ["IVV", "SPY", "VOO", "^GSPC"]

# using 2 years of data from January 01, 2018 to December 31, 2019
starttime = datetime.datetime(2018, 1, 1)
endtime = datetime.datetime(2019, 12, 31)

# get only the closing prices
etfs = pdr.get_data_yahoo(etfs_tickers, starttime, endtime)['Close']
etfs.columns = ["iShares", "SPDR", "Vanguard", "S&P500"]

# print out dataset head
print2(etfs.head())

# compute simple reurns
etfs_return = etfs.pct_change().dropna()

# etfs_return.fillna(0, inplace=True)
returns2 = round(etfs_return * 100, 3)
print2(etfs_return, returns2)

# compute active returns
eft_index = etfs_return["S&P500"]
ppp = returns2.sub(returns2.iloc[:, -1], axis=0).drop(['S&P500'], axis=1)
ppp2 = returns2.sub(returns2['S&P500'], axis=0).drop(['S&P500'], axis=1)
etfs_activeR = etfs_return.sub([eft_index, eft_index, eft_index, eft_index],
                               axis='columns')
etfs_activeR.drop("S&P500", axis=1, inplace=True)
コード例 #11
0
import torch
from printdescribe import print2, describe2, changepath
from pytorchFunctions import sigmoid_activation

# set the seed
torch.manual_seed(90)

# create features
features = torch.randn((1, 10))

# define sizes of layers
input_size = features.shape[1]
n_hiddenlayers = 4
n_output = 1

# create weights
feature_weights = torch.randn((input_size, n_hiddenlayers))
hiddenlayer_weights = torch.randn((n_hiddenlayers, n_output))

# create biases
feature_bias = torch.randn((1, n_hiddenlayers))
hiddenlayer_bias = torch.randn((1, n_output))

# y = f2(f1(xW1)W2)
hiddenlayer_output = sigmoid_activation(
    torch.mm(features, feature_weights) + feature_bias)
y = sigmoid_activation(
    torch.mm(hiddenlayer_output, hiddenlayer_weights) + hiddenlayer_bias)

print2(y)
コード例 #12
0
pp_labels = ["JPMorgan Chase", "Goldman Sachs", "BofA Securities", "Morgan Stanley", "Citigroup", "Credit Suisse"] 

starttime = datetime.datetime(2000, 1, 1)
endtime = datetime.datetime(2019, 10, 1)

# get only the closing prices
assets = pdr.get_data_yahoo(stocklist, starttime, endtime)['Close']

# initialize the weights
weights = [0.2, 0.15, 0.2, 0.15, 0.2, 0.1]

# compute the simple returns
returns = assets.pct_change().dropna()

# visualse the data
print2(assets.head(), returns.head())
describe2(assets, returns)

# compute portfolio returns
portfolioReturn = returns.dot(weights)

# compute portfolio value for $1 investment
portfolioValue = (1 + portfolioReturn).cumprod()

print2(portReturn, meanDailyReturns, portfolioValue)

# Calculate individual mean returns 
meanDailyReturns = returns.mean()

# Define weights for the portfolio
weights = np.array([0.2, 0.2, 0.2, 0.1, 0.15, 0.15])
コード例 #13
0
ファイル: group1.py プロジェクト: jocoder22/PythonDataScience
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import scipy.stats
from printdescribe import print2

plt.style.use("ggplot")
plt.rcParams["figure.figsize"] = 10,8
plt.rcParams["axes.facecolor"] = "0.92"
show = plt.show

# Obtaining Stock Data of Microsoft and Benchmark Data
start_date = '2013-01-01'
end_date = '2019-12-31'

assets = ['MSFT', 'FDN', 'JPM', 'XLF']
datasets = dr.DataReader(assets, data_source='yahoo', start=start_date, end=end_date)
print2(datasets['Adj Close'].head())


# matplotlib.rcParams['figure.figsize'] = [15, 7]
plt.plot(datasets['Adj Close'])
plt.ylabel('Price')
plt.legend(assets)
plt.grid()
show()


# Obtaining the mean and standard devivation of the Assests
means = datasets['Adj Close'].mean()
stddevs = datasets['Adj Close'].std()
print2(means, stddevs)
コード例 #14
0
ファイル: pca101.py プロジェクト: jocoder22/PythonDataScience
                           parse_dates=True,
                           index_col="Date")
    # datasets = pd.read_csv("assets.csv",  compression='gzip', index_col=0)
    # datasets, dataset2 = pd.read_csv(["assets.csv","assets2.csv"])

dataset2.rename(columns={"Adj Close": "SPX"}, inplace=True)
dataset3.drop(columns=["^GSPC"], inplace=True)

# # df.set_index('Date', inplace=True)
# print2(dataset2.head())
# datasets.reindex(dataset2.index)

alldata = pd.concat([dataset3, dataset2], axis=1)
data2 = alldata.copy()
data2 = data2.loc[:"2013-12-20", :]
print2(data2.iloc[:, :5].tail(), data2.shape)
# tt = "https://dumbstockapi.com/stock?format=tickers-only&exchange=NYSE"
# pp = pd.read_json(tt)
# pp = list(pp.values.ravel())

# download data and view
# data2 = dr.DataReader(pp, data_source='yahoo', start=start_date)['Adj Close']
# print2(f"Asset Adjusted Closing Pices shape: {data2.shape}", data2.iloc[:,:10].head())

# drop columns with NaN
data2.dropna(axis=1)

print(data2.iloc[:, :5].head())
# clean the datasets, remove NaN smartly
# Get a summary view of NaNs
oo = data2.isnull().sum()
コード例 #15
0
plt.style.use("ggplot")
plt.rcParams["figure.figsize"] = 10, 8
plt.rcParams["axes.facecolor"] = "0.92"
np.random.seed(42)

# create matrix
A = A = np.linspace(1, 9, 9).reshape(-1, 3)
B = np.arange(10, 26).reshape(-1, 4)

# PLU decomposition; used for square matrix
P, L, U = lu(A)
Pb, Lb, Ub = lu(B)

# print results
print2(P, L, U)
print2(Pb, Lb, Ub)

# recombine the triangular factor matrices
A_ = P @ L @ U
B_ = Pb @ Lb @ Ub

# print results
print2(A_, B_)

#########################################################################################
#########################################################################################
# QR decomposition for all matrix
# create matrix
A2 = np.linspace(1, 35, 35).reshape(-1, 5)
B2 = np.arange(10, 34).reshape(-1, 4)
コード例 #16
0
#!/usr/bin/env python
import torch
from printdescribe import print2, describe2, changepath
from pytorchFunctions import sigmoid_activation

# set the seed
torch.manual_seed(90)

# generate features vector
features = torch.randn((1, 10))

# generate weights
weights = torch.randn_like(features)

# generate bias
bias = torch.randn((1, 1))

# computer the prediction
prob = sigmoid_activation(torch.sum(features * weights) + bias)
# also can do sigmoid_activation((features * weights).sum() + bias)

prob2 = sigmoid_activation(torch.mm(features, weights.view(-1, 1)) + bias)

if __name__ == "__main__":
    print2(prob, prob2)

## y = f2(f1(xW1)W2)
コード例 #17
0
    "~/.pytch/MNIST_data/",
    download=True,
    train=True,
    transform=transformer,
)
traindownloader = tch.utils.data.DataLoader(traindata,
                                            batch_size=64,
                                            shuffle=True)

# create an iterator to read the dataset
iterloader = iter(traindownloader)
img, labels = iterloader.next()

# if __name__ == "__main__":

print2(type(img), type(labels), img.shape, labels.shape)

# display the image
plt.imshow(img[1].numpy().squeeze(), cmap="Greys")
plt.show()

# Flatten the 2D images to 1D images
flat1d_img = img.view(img.shape[0], -1)

# create model parameters
input_size = flat1d_img.shape[1]
n_hiddenlayers = 256
n_output = 10

# create weights
feature_weights = tch.randn((input_size, n_hiddenlayers))
コード例 #18
0
#                                                 timeout=30, session=None, interval='day',
#                                                 span='year').read().reset_index()

# dw = durbin_watson(pd.to_numeric(apple.close_price).pct_change().dropna().values)
# print2(f'DW_Statistics: {dw}')

# Define start and end dates
starttime = '2018-01-01'
endtime = '2019-01-01'

# Download apple stock prices
apple = pdr.get_data_yahoo('AAPL', starttime, endtime)
dw = durbin_watson(pd.to_numeric(apple.Close).pct_change().dropna().values)

# Compute durbin_watson statistics
print2(f'DW_Statistics: {dw}')

# Get Nasdaq tickers
tickers = pdr.nasdaq_trader.get_nasdaq_symbols(retry_count=3,
                                               timeout=300,
                                               pause=None)
etfs = tickers.loc[tickers.ETF == True, :]
symbols = etfs.sample(75).index.tolist()
print2(etfs.head(), etfs.shape, symbols)

# packet = pdr.robinhood.RobinhoodHistoricalReader(symbols, retry_count=3, pause=0.1,
#                                                 timeout=30, session=None, interval='day',
#                                                 span='year')
# data = packet.read().reset_index()
# pivot = data.loc[:['symbol', 'begins_at', 'close_price']].drop_duplicates(),pivot(
#     index='begins_at', columns = 'symbol', values='close_price'
コード例 #19
0
# download test dataset
testdata = datasets.FashionMNIST(
    "~/.pytch/F_MNIST_data/",
    download=True,
    train=False,
    transform=transformer,
)
testdownloader = tch.utils.data.DataLoader(testdata,
                                           batch_size=64,
                                           shuffle=True)

# view the images
img, label = next(iter(traindownloader))
helper.imshow(img[10, :])
print2(label[10])
plt.show()


# define new classifier class
class MyNeuroNetwork(nn.Module):

    _inputs = 784
    _neuron1 = 128
    _neuron2 = 64
    _neuron3 = 32
    _output = 10

    def __init__(self):
        super().__init__()
コード例 #20
0
ファイル: pca102.py プロジェクト: jocoder22/PythonDataScience
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
import seaborn as sns
from printdescribe import print2

genes = ["gene" + str(i) for i in range(1, 101)]
wt = ["wt" + str(i) for i in range(1, 7)]
ko = ["ko" + str(i) for i in range(1, 7)]
data = pd.DataFrame(columns=[*wt, *ko], index=genes)

print2(wt, ko)
data = pd.DataFrame(columns=[*wt, *ko], index=genes)
print2(data.head())

n = 2
for gene in data.index:
    data.loc[gene, :"wt6"] = np.random.poisson(lam=np.random.randint(10, 100),
                                               size=6)
    np.random.seed(90 + n)
    data.loc[gene, "ko1":] = np.random.poisson(lam=np.random.randint(10, 100),
                                               size=6)
    n += 5

scaled_data = preprocessing.scale(data.T)
scaled_data[:20]
コード例 #21
0
def findIntersection(fun1, fun2, x0):
    return fsolve(lambda x: fun1(x) - fun2(x), x0)


country_list = ['USA', 'GBR', 'MEX', 'CAN', 'ZAF', 'NGA']
startdate = '1970'
enddate = '2019'
crisis_year = pd.to_datetime('1987-01-01')

gdp = wb.download(indicator='NY.GDP.PCAP.KD', country=country_list,
                start=pd.to_datetime(startdate, yearfirst=True),
                end=pd.to_datetime(enddate, yearfirst=True))\
                    .reset_index().dropna().iloc[::-1, :]

print2(gdp.shape, gdp.head(), gdp.info(),
       gdp.country.value_counts(dropna=False))

gdp2 = gdp.copy()
gdp2['year'] = pd.to_datetime(gdp2['year'])
gdp2.set_index('year', inplace=True)
gdp2.loc[:, "NY.GDP.PCAP.KD"] = gdp2.groupby('country')["NY.GDP.PCAP.KD"]\
    .apply(lambda x: pd.Series(x).interpolate())

gdp2.groupby(['country'])['NY.GDP.PCAP.KD'].plot()
plt.axvline(crisis_year, color="black")
plt.legend()
plt.show()

print2(gdp2.info())

gdp3 = gdp.copy()
コード例 #22
0
import sys
import matplotlib.pyplot as plt
import numpy as np
import shutil
# import tensorflow as tf

from ipywidgets import interact

import tensorflow.compat.v1 as tf

tf.disable_v2_behavior()

from printdescribe import print2, changepath
from datetime import datetime

print2(" ")

path22 = r"D:\PythonDataScience"
sys.path.insert(0, path22)
import input_data

path2 = r"D:\Wqu_FinEngr\Machine Learning in Finance\CourseMaterials\Module5\WQU_MLiF_Module5_Notebooks\ML M5 Notebooks (updated)"

with changepath(path2):
    print2(os.getcwd())
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

m, n = mnist.train.images.shape
number_to_show = 100

コード例 #23
0
ファイル: ppp22.py プロジェクト: jocoder22/PythonDataScience
warnings.filterwarnings('ignore')

# instantiate start date
start_date = '2013-01-01'
# end_date = '2020-02-29'

# Download daily Amazon stock Adjusted close prices and indexes
assets = ['AMZN', "^GSPC", "^DJI", "^IXIC", "^RUT", "CL=F"]
datasets = dr.DataReader(assets, data_source='yahoo',
                         start=start_date)["Adj Close"]
datasets.tail()

# Name of the columns
col = ["Amazon", "Sp500", "Dow20", "Nasdaq", "R2000", "Crude20"]
datasets.columns = col
print2(datasets.head())

datasets.iloc[:, ~datasets.columns.isin(["Dow20", "Nasdaq"])].plot(figsize=(10,
                                                                            5))
data = datasets.copy()
data['close'] = data["Amazon"]

# compute moving averages
fast_window = 20
slow_window = 50

data['fast_mavg'] = data['close'].rolling(window=fast_window,
                                          min_periods=fast_window,
                                          center=False).mean()
data['slow_mavg'] = data['close'].rolling(window=slow_window,
                                          min_periods=slow_window,
コード例 #24
0
ファイル: ml201.py プロジェクト: jocoder22/pytorch_ml
# %matplotlab inline
# %config InlineBackend.fig_format = "retina"

import numpy as np
import matplotlib.pyplot as plt
import helper
import torch as tch
from torch import nn, optim
import torch.nn.functional as F
from torchvision import datasets, transforms

from printdescribe import print2, describe2, changepath

plt.style.use("dark_background")

print2(tch.cuda.is_available())

# transformer to transform and normalize
transformer = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, ), (0.5, ))])

traindata = datasets.MNIST(
    "~/.pytch/MNIST_data/",
    download=True,
    train=True,
    transform=transformer,
)
traindownloader = tch.utils.data.DataLoader(traindata,
                                            batch_size=64,
                                            shuffle=True)
コード例 #25
0
#!/usr/bin/env python
import numpy as np
import pandas as pd
from printdescribe import print2, describe2, changepath

S0 = [[120, 90], [140, 90], [130, 100]]
prob = [[0.5, 0.5], [0.5, 0.5], [0.5, 0.5]]


def return_var(prices, probb):
    r = [np.dot(np.array(i[0]), np.array(i[1])) for i in zip(prices, probb)]
    t = [
        np.sqrt(
            np.dot(np.array(i[2]),
                   np.subtract(np.array(i[0]), np.array(i[1]))**2))
        for i in zip(prices, r, probb)
    ]
    print2(r, t)
    return r, t


vars = return_var(S0, prob)
print2(S0, prob, vars)
コード例 #26
0
ファイル: M4.py プロジェクト: jocoder22/PythonDataScience
# select the Average Value Weighted Returns -- Daily (1220 rows x 100 cols)
portfolios100 = portfolios100[0]
factors5 = factors5[0]

# Checking for missing values porfolios dataset
print(portfolios100[portfolios100.iloc[:,0] >98.0].sum().sum(),
portfolios100.isnull().sum().sum())


# Checking for missing values in factors dataset
print(factors5[portfolios100.iloc[:,0] >98.0].sum().sum(),
factors5.isnull().sum().sum())

portfolios100.iloc[:, 90:100].head()
print2(portfolios100.shape, factors5.head(), factors5.shape)


# # 1a.	Visually analyze the covariance between various factors and identify <br> the variance explained in principle components of  these factors. 
# 
# ## 1b. Next, consider the ACF and PACF of the process and its square.
# 


pd.melt(factors5.add(1).cumprod().reset_index(), id_vars=["Date"]).hvplot.line(x='Date', y='value', by='variable')

factors_cov = factors5.cov()
factors_cov

plt.figure(figsize = [10, 6])
# Visualize the covariance matrix using a heatmap
コード例 #27
0
    # print(pv_fcfe)
    comp1 = sum(pv_fcfe)
    # print(comp1)

    comp2_ = comp2(round(FCFE[-1], 2), listt[2], listt[3], n, one)
    # print(comp2_)
    mp_ggm = comp1 + comp2_

    return round(mp_ggm, 2)


Year = ["2008", "2009", "2010", "2011", "2012", "2013"]
FCFE_growth = [0.18, 0.18, 0.16, 0.12, 0.11, 0.06]
equity_discount_rate = 0.125
fcfe2007 = 2.0
g = 0.06

print(multi_ggm22(FCFE_growth, fcfe2007, g, equity_discount_rate))

initial = 5000
mmm = []
for i in range(40):
    final = initial * 1.05
    mmm.append(final)
    final += 5000
    initial = final
    if i > 20:
        initial -= 5000

print2(final, mmm, mmm[-1] / 5000)
コード例 #28
0
classifier.means_init = np.array([X_train[y_train == i].mean(axis=0)
                                    for i in range(n_classes)])

# define colors and markers
markers = ["*","o", "+"]
# colors = ["r", "y", "k"]
colors = ['navy', 'turquoise', 'darkorange']
col = ['r*','yo','k+']
labels = y_test

# Fit to data and predict using pipelined scaling, PCA.
gmm = make_pipeline(StandardScaler(), classifier)
gmm.fit(X_train)
pred_train = gmm.predict(X_train)
train_accuracy = np.mean(pred_train.ravel() == y_train.ravel()) * 100
print2(f"Train accuracy: {np.round(train_accuracy,2)}")
  
pred_test = gmm.predict(X_test)
test_accuracy = np.mean(pred_test.ravel() == y_test.ravel()) * 100
print2(f"Test accuracy: {np.round(test_accuracy,2)}")


 for n, color in enumerate(colors):
  # plot the original data
    data = iris.data[iris.target == n]
    plt.scatter(data[:, 0], data[:, 1], s=2.8, color=color,label=iris.target_names[n])
    
  # plot the test data
    data = X_test[y_test == n]
    plt.plot(data[:, 0], data[:, 1], 'x', color=color)
    
コード例 #29
0
#!/usr/bin/env python
import numpy as np
from printdescribe import print2, describe2, changepath

nlist = [0.06, 0.07, 0.08, 0.09, 0.10]


def forward_rate(spotList, tstar, t):
    """The forward_rate function calculates the forward rate e.g f(T*, T)
      given yearly spot rates.
      
      Inputs:
          spotList (list) : yearly spot rates
          tstar (int): time until initiation of the rate
          t (int): the tiemto maturity from tstar
          
       Output:
            result (float): forward rate
  """

    nume = pow(1 + spotList[t + tstar - 1], t + tstar)
    denom = pow(1 + spotList[tstar - 1], tstar)
    result = pow((nume / denom), 1 / t) - 1

    return round(result * 100, 2)


print2(forward_rate(nlist, 1, 2))
コード例 #30
0
import numpy as np
import pandas as pd
from printdescribe import print2

# download dataset
url2 = "https://archive.ics.uci.edu/ml/machine-learning-databases/00529/diabetes_data_upload.csv"
df = pd.read_csv(url2)

# expore dataset
print2(df.shape, df.dtypes, df.columns, df.info(), df.describe())

# Explore categorical features
for col in [x for x in df.select_dtypes(include=['object'])]:
    print2(df[col].value_counts(ascending=True, dropna=False))

# Add nan to dataset
num = df.Age.max()
for col in [x for x in df.select_dtypes(include=['object'])]:
    numm = num - 3
    df[col] = np.where(df.Age > numm, np.nan, df[col])
    num = numm

# view the new dataset
print2(df.isnull().sum())
print(df.info())

# explore the categorical datasets
for col in [x for x in df.select_dtypes(include=['object'])]:
    print2(df[col].value_counts(ascending=True, dropna=False))

# drop nan row