Ejemplo n.º 1
0
def main():
    #os.chdir(PROJ_DIR)
    os.chdir(os.path.dirname(os.path.realpath(__file__)))
    df = EDA(INSTRUMENT, debug=False, tech_analysis=True)
    print('Technical indicators')
    techical_indicators_linreg(df)

    df = EDA(INSTRUMENT)
    print('Market indicators')
    macro_indicators_linreg(df)
Ejemplo n.º 2
0
def main():
    #os.chdir(PROJ_DIR)
    os.chdir(os.path.dirname(os.path.realpath(__file__)))

    df = EDA(INSTRUMENT, False, True)
    print('Technical indicators')
    techical_indicators(df)

    df = EDA(INSTRUMENT)
    print('Technical indicators')
    index_indicators(df)
def main():
    os.chdir(os.path.dirname(os.path.realpath(__file__)))

    df = EDA(INSTRUMENT, False, True)
    df = df.drop('Close_open', axis=1)
    print(df.columns)
    print("Techincal indicators")
    techical_indicators(df)

    print()

    df = EDA(INSTRUMENT)
    df = df.drop('Close_open', axis=1)
    print(df.columns)
    print("Market indicators")
    index_indicators(df)
import tensorflow as tf
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from Recommender import Recommender
from EDA import EDA

train_ds, eval_ds, test_ds, n_users, n_movies = EDA()

#NCF Hyperparams
r_emb_dim = 32
r_lr = 0.0005
r_epochs = 10
r_l2 = 0.0000
r_dropout = 0.0
r_batch_size = 40960
r_dense_struct = [16, 4]

#Model instantiation
recommender = Recommender(n_users, n_movies, r_emb_dim, r_dense_struct,
                          r_dropout, r_l2)
recommender.compile(tf.keras.optimizers.Adam(r_lr),
                    tf.keras.losses.MeanSquaredError(),
                    metrics=[tf.keras.metrics.RootMeanSquaredError()])
recommender.train_on_batch(train_ds.batch(r_batch_size))
print(recommender.summary())

#Model fit
recommender.fit(train_ds.batch(r_batch_size),
Ejemplo n.º 5
0
import tensorflow as tf
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from MatrixFactorizer import MatrixFactorizer
from EDA import EDA

train_ds, eval_ds, test_ds, n_users, n_movies = EDA()

#Matrix Factorizer Hyperparams
f_emb_dim = 16
f_lr = 0.0015
f_epochs = 10
f_batch_size = 40960

#Model instantiation
factorizer = MatrixFactorizer(n_users, n_movies, f_emb_dim)
factorizer.compile(tf.keras.optimizers.Adam(f_lr),
                   tf.keras.losses.MeanSquaredError(),
                   metrics=[tf.keras.metrics.RootMeanSquaredError()])
factorizer.train_on_batch(train_ds.batch(f_batch_size))
print(factorizer.summary())

#Model fitting
factorizer.fit(train_ds.batch(f_batch_size),
               epochs=f_epochs,
               validation_data=eval_ds)

#Test Performance of Factorizer
Ejemplo n.º 6
0
import random
import seaborn as sn
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import cross_val_score
from sklearn.feature_selection import RFE

from EDA import EDA

# Users who were 60 days enrolled, churn in the next 30 days
df = pd.read_csv('churn_data.csv')

df = EDA.eda(df)

# =============================================================================
# Data Preprocessing
# =============================================================================

user_identifier = df['user']
df = df.drop(columns = ['user'])

# One-Hot Encoding
df.housing.value_counts()
df.groupby('housing')['churn'].nunique().reset_index()
df = pd.get_dummies(df)
df.columns
df = df.drop(columns = ['housing_na', 'zodiac_sign_na', 'payment_type_na'])
Ejemplo n.º 7
0
        .set("spark.driver.memory", "40g")

    sc = SparkContext(conf=conf)
    sc.addPyFile("/home/tongxialiang/workspace/lixj/SPC-POSM/SPC-POSM.zip")

    with open('../midResult/edaResult.txt', 'w') as f:
        for i in range(20):
            print i, 'th instance ...'
            # po is data contains informantion about PROVIDERS and CUSTOMERS
            po = PO()
            # read providers and customers data from text
            po.PROVIDERS, po.CUSTOMERS = LoadDataFromText('../data/instance' +
                                                          str(i) + '.txt')
            '''
            for surrogateSizeRatio in ratioList:
                print "surrogate size ratio", surrogateSizeRatio
                eda = EDA(popSize, iterMax, blockMax, po, alpha, beta, D, surrogateFlag, surrogateSizeRatio)
                eda.evaluate()
                print "the best solution serial, fitness, mmd, sigmaCost, sigmaCapacity, sigmaDemand ", \
                    eda.m_BestSolution.geneSerial, eda.m_BestSolution.fitness, eda.m_BestSolution.mmd, eda.m_BestSolution.sigmaCost, eda.m_BestSolution.sigmaCapacity, eda.m_BestSolution.sigmaDemand
                print "---------------------------------"
            '''
            sumAns = 0
            times = 5

            for _ in range(times):
                eda = EDA(sc, popSize, iterMax, blockMax, po, alpha, beta, D,
                          surrogateFlag, 0)
                eda.evaluate()
                sumAns += eda.m_BestSolution.sigmaCost
            f.write(str(sumAns / times) + '\n')
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from math import sqrt
from EDA import EDA

# import the dataset
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
store = pd.read_csv('store.csv')

# =============================================================================
# EDA
# =============================================================================
train, test, store = EDA.eda(train, test, store)

# =============================================================================
# Feature Engineering
# =============================================================================
store.isnull().sum()

# convert types of categorical values to numbers
store.head()
store['StoreType'] = store['StoreType'].astype('category').cat.codes
store['Assortment'] = store['Assortment'].astype('category').cat.codes
train["StateHoliday"] = train["StateHoliday"].astype('category').cat.codes

merged = pd.merge(train, store, on='Store', how='left')

# remove NaNs