コード例 #1
0
ファイル: simpleNN.py プロジェクト: arcunique/tagNN
def single_hidden_layer_train(X,
                              Y,
                              epoch=150,
                              lr=0.1,
                              hiddenlayer_neurons=3,
                              activation='sigmoid'):
    X, Y = np.reshape(X, (len(X), -1)), np.reshape(Y, (len(Y), -1))
    inputlayer_neurons = X.shape[1]  # number of features in data set
    output_neurons = Y.shape[1]
    if X.shape[0] != Y.shape[0]:
        raise IOError(
            'The number of input samples ({}) is not equal to output samples ({})'
            .format(X.shape[0], Y.shape[0]))
    wh = np.random.uniform(size=(inputlayer_neurons, hiddenlayer_neurons))
    bh = np.random.uniform(size=(1, hiddenlayer_neurons))
    wout = np.random.uniform(size=(hiddenlayer_neurons, output_neurons))
    bout = np.random.uniform(size=(1, output_neurons))
    ipscaler = mms(feature_range=(-1, 1))
    ipscaler.fit(X)
    opscaler = mms(feature_range=(-1, 1))
    opscaler.fit(Y)
    net = dict(wh=wh,
               bh=bh,
               wout=wout,
               bout=bout,
               activation=activation,
               scaler=(ipscaler, opscaler))
    NNtask = NN_1hid(net)

    for i in range(epoch):
        NNtask.forward(X)
        NNtask.backward(Y, lr)
    NNtask.forward(X)
    return NN_1hid
コード例 #2
0
ファイル: BRM.py プロジェクト: Manjunathsk92/dbanalysis
 def validate_neural_network(self):
     """
     Was used to test the model
     """
     self.train = self.data[self.data['year'] == 2016]
     self.test = self.data[self.data['year'] == 2017]
     from sklearn.preprocessing import MinMaxScaler as mms
     del (self.data)
     self.X_transformer = mms().fit(self.train[self.features])
     Y = self.train['traveltime'].values
     Y = Y.reshape(-1, 1)
     self.Y_transformer = mms().fit(Y)
     X = self.X_transformer.transform(self.train[self.features])
     Y = self.Y_transformer.transform(Y)
     del (self.train)
     self.model = self.rgr.fit(X, Y)
     del (X)
     del (Y)
     distances = sorted(self.test['distance'].unique())[1:]
     number_samples = []
     r2 = []
     mae = []
     mape = []
     from sklearn import metrics
     for i in range(0, len(distances) - 1):
         test = self.test[(self.test['distance'] >= distances[i])
                          & (self.test['distance'] < distances[i + 1])]
         Y = test['traveltime']
         number_samples.append(len(test))
         X = self.X_transformer.transform(test[self.features])
         preds = self.model.predict(X)
         real_preds = self.Y_transformer.inverse_transform(
             preds.reshape(-1, 1))
         real_preds = np.array([i[0] for i in real_preds])
         print(real_preds.mean())
         input()
         r2_score = metrics.r2_score(Y, real_preds)
         MAE = metrics.mean_absolute_error(Y, real_preds)
         MAPE = ((abs(Y - real_preds) / Y) * 100).mean()
         r2.append(r2_score)
         mae.append(MAE)
         mape.append(MAPE)
         print(r2_score, MAE, MAPE)
     self.distances = distances[:-1]
     del (self.test)
     del (test)
     del (preds)
コード例 #3
0
 def normalizeColumnsUsingMinMax(self, df, columnNames):
     """
     Method to normalize the data in specific columns using minmax
     :param df: Dataframe to process
     :param columnNames: Names of columns to normalize
     :return: Processed dataframe
     """
     df[columnNames] = mms(df[columnNames])
     return df
コード例 #4
0
ファイル: BRM.py プロジェクト: diarmuidmorgan/dba
    def build_neural_network(self):
        import numpy as np
        msk = np.random.rand(len(self.data)) < 0.5
        self.train = self.data[msk]
        del (msk)

        from sklearn.preprocessing import MinMaxScaler as mms
        del (self.data)
        self.X_transformer = mms().fit(self.train[self.features])
        Y = self.train['traveltime'].values
        Y = Y.reshape(-1, 1)
        self.Y_transformer = mms().fit(Y)
        X = self.X_transformer.transform(self.train[self.features])
        Y = self.Y_transformer.transform(Y)

        self.model = self.rgr.fit(X, Y)
        print('Built')
        del (X)
        del (Y)
        del (self.train)
コード例 #5
0
ファイル: BRM.py プロジェクト: Manjunathsk92/dbanalysis
    def build_neural_network(self):
        """
        On the last iteration of this (in the notebooks), minmax scaler was replaced
        With a standard scaler for both X and Y
        """
        import numpy as np
        msk = np.random.rand(len(self.data)) < 0.5
        self.train = self.data[msk]
        del (msk)

        from sklearn.preprocessing import MinMaxScaler as mms
        del (self.data)
        self.X_transformer = mms().fit(self.train[self.features])
        Y = self.train['traveltime'].values
        Y = Y.reshape(-1, 1)
        self.Y_transformer = mms().fit(Y)
        X = self.X_transformer.transform(self.train[self.features])
        Y = self.Y_transformer.transform(Y)

        self.model = self.rgr.fit(X, Y)
        print('Built')
        del (X)
        del (Y)
        del (self.train)
コード例 #6
0
ファイル: nn.py プロジェクト: astrogilda/LSTM
def timeseries_scaling(X, is_training_data=True, list_of_transformers=None):
    #X += epsilon
    #X.shape = (nsamples, timesteps, features)
    #is_training_data and list_of_transformers can take values 'True' and 'None, and 'False' and 'not None' only.
    X_new = np.zeros_like(X)
    for i in range(X.shape[0]):
        X_new[i] = mms().fit_transform(X[i])
    #
    #"""
    X_new2 = X_new.copy()
    if is_training_data:
        list_of_transformers = list()
    #
    for i in range(X.shape[1]):
        if is_training_data:
            tr = ss()
            tr.fit(X_new[:, i])
            list_of_transformers.append(tr)
        else:
            tr = list_of_transformers[i]
        X_new2[:, i] = tr.transform(X_new[:, i])
    return X_new2, list_of_transformers
def perform_iteration(current_gen_spectra, current_gen_conc, desired_spectra,
                      n_parents, n_offspring, mutation_rate, mutation_rate_2):
    """
    Perform one iteration of the GA algorithm.

    Inputs:
      - current_gen_spectra: The spectra of the current generation (batch).
      It is a 2D array with the number of rows equal to the number of samples
      in the generation and number of colums equal to the number of spectra
      datapoints.
      - current_gen_conc: The concentration of the current generation (batch).
      It is a 2D array with the number of rows equal to the number of samples
      in the generation and the number of columns equal to the number of
      dimensions, for exmaple, 3 columns if we are mixing red, blue, green dyes
      - desired_spectra: The desired spectra. It is a 1D array with one row
      and number of columns equal to the number of datapoints in the spectra.
      - n_parents: Integer which determines how many parents to create from the
      current generation.
      - n_offspring: Integer which determines how many offspring to create
      from the current generation.
      - mutation_rate: Float from range 0-1 which determines how often a
      mutation occurs.
      - mutation_rate_2: Float from range 0-1 which deterines how often a
      mutation occurs.
    Outputs:
      - next_gen_conc: The concentrations of the next generation to be tested.
      It is a 2D array with number of rows equal to n_offspring and number of
      columns equal to the number of dimensions.
    """
    np.random.seed(seed)
    cgs = current_gen_spectra.T
    current_gen_spectra = mms().fit(cgs).transform(cgs).T
    desired_spectra = prepare_desired_spectra(desired_spectra)
    # Perfrom Genetic Algorithm to determine next Generation
    next_gen_conc, median_fitness, max_fitness = GA_algorithm(
        current_gen_spectra, current_gen_conc, desired_spectra, n_parents,
        n_offspring, mutation_rate, mutation_rate_2)
    return next_gen_conc, median_fitness, max_fitness
コード例 #8
0
routes = json.loads(
    open('/home/student/dbanalysis/dbanalysis/resources/trimmed_routes.json',
         'r').read())
route = routes['15'][1]
models = []
features = ['day', 'month', 'hour', 'weekend', 'vappr']
for i in range(1, len(route) - 1):
    stopA = str(route[i])
    stopB = str(route[i + 1])
    print('Building for', stopA, 'to', stopB)
    df = stop_tools.stop_data(stopA, stopB)
    df['traveltime'] = df['actualtime_arr_to'] - df['actualtime_arr_from']
    df['weekend'] = df['day'] > 4
    print(df['traveltime'].mean())
    Y = numpy.array([i for i in df['traveltime']]).reshape(-1, 1)
    transformer2 = mms().fit(Y)
    Y = transformer2.transform(Y)
    transformer1 = mms().fit(df[features])
    X = transformer1.transform(df[features])
    import numpy
    model = mlp(hidden_layer_sizes=(40, 40, 40)).fit(X, Y)
    models.append({
        'transformer': transformer1,
        'transformer2': transformer2,
        'model': model
    })
    del (df)
    del (X)
    del (Y)
with open('/data/chained_models_neural.bin', 'wb') as handle:
    import pickle
コード例 #9
0
dh_data_x = pd.DataFrame(data=x_data_cols)
dh_data_y = pd.DataFrame(data=y_data_cols)
grand_set = pd.DataFrame(data=data_total)

grand_set.to_csv("grand_set", sep=',')
dh_data_x.to_csv("dh_data_x", sep='\t', index=False, index_label=False)
dh_data_y.to_csv("dh_data_y", sep='\t', index_label=False, index=False)

#Spliting & Preprocessing Data

X_train, X_test, y_train, y_test = tts(dh_data_x,
                                       dh_data_y,
                                       test_size=0.33,
                                       random_state=101)
scaler = mms()
scaler.fit(X_train)

X_train_scaled = pd.DataFrame(data=scaler.transform(X_train),
                              columns=X_train.columns,
                              index=X_train.index)

X_test_scaled = pd.DataFrame(data=scaler.transform(X_test),
                             columns=X_test.columns,
                             index=X_test.index)

#Creating feature columns

feature_cols = [
    tf.feature_column.numeric_column('Inverse_X'),
    tf.feature_column.numeric_column('Inverse_Y')
コード例 #10
0
import torch
from collections import OrderedDict
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl
import matplotlib.pyplot as plt

data = pd.read_csv("./dataset/creditcard.csv")
data.drop(["Time", "Class"], axis=1, inplace=True)
cuda = True if torch.cuda.is_available() else False

from sklearn.preprocessing import MinMaxScaler as mms
num_scaler = mms(feature_range=(-1, 1))
columns = data.columns.tolist()
data[columns] = num_scaler.fit_transform(data[columns])
data_np = data.values


class TabularDataModule(pl.LightningDataModule):
    def __init__(self, data, batch_size: int = 32, num_workers: int = 3):
        super().__init__()
        self.data = data
        self.batch_size = batch_size
        self.num_workers = num_workers

        self.dims = self.data.shape[1]

    def prepare_data(self, ):
コード例 #11
0
#Self Organizing Map
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#Importing dataset
dataset = pd.read_csv('Credit_Card_Applications.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
#Feature Scaling
from sklearn.preprocessing import MinMaxScaler as mms
sc = mms(feature_range=(0, 1))
X = sc.fit_transform(X)
#Training an SOM
from minisom import MiniSom as ms
som = ms(x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5)
som.random_weights_init(X)
som.train_random(data=X, num_iteration=100)
#Visualizing the SOM results
from pylab import bone, pcolor, colorbar, plot, show
bone()
pcolor(som.distance_map().T)
colorbar()
markers = ['o', 's']
colors = ['r', 'g']
for i, x in enumerate(X):
    w = som.winner(x)
    plot(w[0] + 0.5,
         w[1] + 0.5,
         markers[y[i]],
         markeredgecolor=colors[y[i]],
         markerfacecolor='None',
コード例 #12
0
def powertransform_func(x):
    # x is 2d array
    return pt().fit_transform(x)

def timeseries_powertransformation(X):
    # X is np.array, 3D
    X_2D = [*zip(X[i] for i in range(X.shape[0]))]
    with mp.Pool() as pool:
        X_new = pool.starmap(powertransform_func, X_2D)
    return np.asarray(X_new)

imputer_per_sample = make_pipeline(ft(timeseries_imputation, validate=False))

preprocessor_per_sample = make_pipeline(imputer_per_sample, ft(timeseries_powertransformation, validate=False), ft(timeseries_detrending, validate=False), ft(timeseries_normalization, validate=False))

preprocessor_per_timestep = make_pipeline(pt(), mms())

"""
## Run this commented part only once, so you are able to save the pickled files. Then comment it out.

# Read in all data in a single file
all_input, labels, ids = convert_json_data_to_nparray(path_to_data, file_name, selected_features)

all_input_test, labels_test, ids_test = convert_json_data_to_nparray(path_to_data, file_name_test, selected_features)


# Change X and y to numpy.array in the correct shape.
X = np.array(all_input)
y = np.array([labels]).T
print("The shape of X is (sample_size x time_steps x feature_num) = {}.".format(X.shape))
print("the shape of y is (sample_size x 1) = {}, because it is a binary classification.".format(y.shape))
コード例 #13
0
ファイル: script.py プロジェクト: amarish-kumar/HackerEarth-4
for i in range(len(offerid_te)):
    features_test.append([
        day_of_week_te[i], hour_of_day_te[i], minute_of_hour_te[i],
        second_of_minute_te[i], abs_time_te[i], siteid_te[i], offerid_te[i],
        category_te[i], merchant_te[i], countrycode_te[i], browserid_te[i],
        devid_te[i]
    ])

features_test = np.asarray(features_test)

features_train = features_train.astype(np.float)
features_test = features_test.astype(np.float)

from sklearn.preprocessing import MinMaxScaler as mms

scalar = mms()
features_train = scalar.fit_transform(features_train)
features_test = scalar.fit_transform(features_test)
print(features_train)
print(features_test)


def random_forest(f_train, l_train, f_test):
    from sklearn.ensemble import RandomForestClassifier
    #from sklearn.grid_search import GridSearchCV
    #param={'criterion' : ('gini','entropy'),'min_samples_split':[2,5,10,15,20,25,30],'n_estimators':[100]}
    #svr=RandomForestClassifier()
    #clf=GridSearchCV(svr,param)
    clf = RandomForestClassifier()
    import time
    start_time = time.time()
コード例 #14
0
def scaler(a: pd.DataFrame):
    scaler = mms()
    scaler.fit(a)
    return scaler.transform(a)
コード例 #15
0
                            temp[2+team_index] = team_info[team]
                        #"cheating features"
                        if team == 'towerKills':
                            temp[4+team_index] = team_info[team]
                        if team == 'inhibitorKills':
                            temp[6+team_index] = team_info[team] 
                        if team == 'winner' and team_info[team]:
                            winner_team = team_index
                      
                teams['data'].append(temp)
                teams['label'].append(winner_team)

    kf = KFold(len(teams['data']), n_folds=10)
    X = np.array(teams['data'])
    Y = np.array(teams['label'])
    mimas = mms()
    i = 0
    max_acc = 0
    max_k = 0
    k = 8
    acc_total = 0
    for train, test in kf:
        X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test]
        guesses = []
        # scaler = mimas.fit(X_train)
        # scaler_train = scaler.transform(X_train)
        # scaler_test = scaler.transform(X_test)        
        i+=1
        for x in range(len(X_test)):
            neighbors = knn(k, X_train, X_test)
            votes = vote(neighbors)
def prepare_desired_spectra(x_test):
    """Preprocess spectra."""
    x_test = mms().fit(x_test).transform(x_test).T
    x_test = x_test.reshape(1, -1)[0].reshape(-1, 1).T
    return x_test
コード例 #17
0
def class_efficiency(t_act, t_pred):
    cols = ['t_act', 't_pred']
    df = pd.DataFrame(np.concatenate(
        [training[1], pred_cat.reshape([n_obs, 1])], axis=1),
                      columns=cols)
    ct = pd.crosstab(df.t_act, df.t_pred)
    return ct


ohe1 = ohe(handle_unknown='ignore')
ohe1 = ohe1.fit(training[1])
targ = ohe1.transform(training[1]).toarray()

X = scores_trunc.copy()
dim = len(X.T)
mms1 = mms()
X = mms1.fit_transform(X)
train = np.concatenate([X, targ], axis=1)

n_cats = len(targ.T)
n_obs = len(train)
#dim = n_obs-n_cats

df_train = pd.DataFrame(train)
old_col = np.arange(dim, n_obs).tolist()
new_col = []
for i in range(0, n_cats):
    new_col.append("target" + str(i))
df_train.rename(columns={i: j for i, j in zip(old_col, new_col)}, inplace=True)

#Compute mean vector per class
コード例 #18
0
ファイル: tirosh.py プロジェクト: ahie/vptsne-results
    axl_corr[i], _ = pearsonr(malignant[:, i],
                              mitf_cell_scores[tirosh_cell_type_labels == 0])
axl_corr[np.isnan(axl_corr)] = np.inf
axl_program_gene_indices = np.argsort(axl_corr)[:100]
axl_cell_scores = control(axl_program_gene_indices,
                          tirosh_data_relative_expression)

#mel = axl_cell_scores[np.logical_and(tirosh_labels == 81, tirosh_cell_type_labels == 0)]
#plt.hist(mel)
#plt.show()
#mel = mitf_cell_scores[np.logical_and(tirosh_labels == 81, tirosh_cell_type_labels == 0)]
#plt.hist(mel)
#plt.show()

mitf[:, 0] = np.clip(
    mms().fit_transform(mitf_cell_scores.reshape(-1, 1)).reshape(-1), 0, 1)
axl[:, 0] = np.clip(
    mms().fit_transform(axl_cell_scores.reshape(-1, 1)).reshape(-1), 0, 1)

#for tumor in [53, 81, 82, 79, 80, 59, 84, 78, 88, 71]:
#  m = np.mean(mitf_cell_scores[np.logical_and(tirosh_labels == tumor, tirosh_cell_type_labels == 0)])
#  a = np.mean(axl_cell_scores[np.logical_and(tirosh_labels == tumor, tirosh_cell_type_labels == 0)])
#  plt.scatter(m, a)
#  plt.annotate("Mel" + str(tumor), (m, a))
#plt.show()
#
#plt.subplot(211)
#plt.hist(mitf_cell_scores)
#plt.subplot(212)
#plt.hist(axl_cell_scores)
#plt.show()