Exemple #1
0
def ANN_gridpoints(folder_pathmod, epochs=50, numlayers=1, units=[20]):

    if not os.path.exists(folder_pathmod):
        os.makedirs(folder_pathmod)

    transform_method = 'Norm'  #function or text
    n = 13
    #or n = 6, 4
    train_data1, test_data1, train_targets1, test_targets1, feature_names = mergeNGA_cells(
        nametrain=
        '/Users/aklimasewski/Documents/data/cybertrainyeti10_residfeb.csv',
        nametest=
        '/Users/aklimasewski/Documents/data/cybertestyeti10_residfeb.csv',
        filenamenga='/Users/aklimasewski/Documents/data/NGA_mag2_9.csv',
        n=13)

    x_train, y_train, x_test, y_test, x_range, x_train_raw, x_test_raw = transform_data(
        transform_method, train_data1, test_data1, train_targets1,
        test_targets1, feature_names, folder_pathmod)

    resid, resid_test, pre_train, pre_test = create_ANN(
        x_train, y_train, x_test, y_test, feature_names, numlayers, units,
        epochs, transform_method, folder_pathmod)

    period = [10, 7.5, 5, 4, 3, 2, 1, 0.5, 0.2, 0.1]
    plot_resid(resid, resid_test, folder_pathmod)
Exemple #2
0
    def __init__(self, datafile, voc={}, lens=()):
        self.raw_data = preprocessing.form_dataset(datafile)
        if not voc:
            voc = preprocessing.get_voc(self.raw_data)

        self.lens = lens
        if not lens:
            _, self.lens = preprocessing.encode_ind(self.raw_data, voc)

        enc_data, _ = preprocessing.encode_ind(self.raw_data, voc)

        self.data = preprocessing.transform_data(enc_data,
                                                 self.lens,
                                                 is_end=False)
Exemple #3
0
def ANN_gridpoints(folder_pathmod, epochs=50, numlayers=1, units=[20]):
    '''
    ANN with cell locations as additional features
    trained and tested with dataset including NGA data

    Parameters
    ----------
    folder_pathmod: path for saving files
    epochs: number of training epochs
    numlayers: integer number of hidden layers
    units: array of number of units for hidden layers

    Returns
    -------
    None.
    creates ANN and saves model files and figures
    '''

    if not os.path.exists(folder_pathmod):
        os.makedirs(folder_pathmod)

    train_data1, test_data1, train_targets1, test_targets1, feature_names = mergeNGAdata_cells(
        nametrain=
        '/Users/aklimasewski/Documents/data/cybertrainyeti10_residfeb.csv',
        nametest=
        '/Users/aklimasewski/Documents/data/cybertestyeti10_residfeb.csv',
        filenamenga='/Users/aklimasewski/Documents/data/NGA_mag2_9.csv',
        n=13)

    x_train, y_train, x_test, y_test, x_range, x_train_raw, x_test_raw = transform_data(
        transform_method, train_data1, test_data1, train_targets1,
        test_targets1, feature_names, folder_pathmod)

    resid, resid_test, pre_train, pre_test = create_ANN(
        x_train, y_train, x_test, y_test, feature_names, numlayers, units,
        epochs, transform_method, folder_pathmod)

    period = [10, 7.5, 5, 4, 3, 2, 1, 0.5, 0.2, 0.1]
    plot_resid(resid, resid_test, folder_pathmod)
Exemple #4
0
# nga_data1,feature_names = add_locfeatNGA(filenamenga,nga_data1,feature_names)

if az == True:
    nga_data1, feature_names = add_azNGA(nga_data1, feature_names)

# read in cyber shake trainineg and testing data
train_data1, test_data1, train_targets1, test_targets1, feature_names = readindata(
    nametrain=
    '/Users/aklimasewski/Documents/data/cybertrainyeti10_residfeb.csv',
    nametest='/Users/aklimasewski/Documents/data/cybertestyeti10_residfeb.csv',
    n=n)
train_data1, test_data1, feature_names = add_az(train_data1, test_data1,
                                                feature_names)

x_train, y_train, x_nga, y_nga, x_range, x_train_raw, x_nga_raw = transform_data(
    transform_method, train_data1, nga_data1, train_targets1, nga_targets1,
    feature_names, folder_pathNGA)

# load model and fit
loadedmodel = keras.models.load_model(folder_path + 'model/')

pre_nga = loadedmodel.predict(x_nga)
resid_nga = np.asarray(nga_targets1) - pre_nga

pre_train = loadedmodel.predict(x_train)
resid_train = np.asarray(train_targets1) - pre_train

period = [10, 7.5, 5, 4, 3, 2, 1, 0.5, 0.2, 0.1]

diff = np.std(resid_train, axis=0)
difftest = np.std(resid_nga, axis=0)
Exemple #5
0
nga_data1, nga_targets1, feature_names = readindataNGA(filenamenga, n)
nga_data1, feature_names = add_azNGA(filenamenga, nga_data1, feature_names)

# split into training and testing
ngatrain, ngatest, ngatrain_targets, ngatest_targets = train_test_split(
    nga_data1, nga_targets1, test_size=0.2, random_state=1)

# combine nga train and test
train_data1 = np.concatenate([train_data1, ngatrain], axis=0)
test_data1 = np.concatenate([test_data1, ngatest], axis=0)
train_targets1 = np.concatenate([train_targets1, ngatrain_targets], axis=0)
test_targets1 = np.concatenate([test_targets1, ngatest_targets], axis=0)

# transform data
x_train, y_train, x_test, y_test, x_range, x_train_raw, x_test_raw = transform_data(
    transform_method, train_data1, test_data1, train_targets1, test_targets1,
    feature_names, folder_path)

# build and train the ANN
resid_train, resid_test, pre_train, pre_test = create_ANN(
    x_train, y_train, x_test, y_test, feature_names, numlayers, units, epochs,
    transform_method, folder_path)

period = [10, 7.5, 5, 4, 3, 2, 1, 0.5, 0.2, 0.1]
plot_resid(resid_train, resid_test, folder_path)

mean_x_test_allT = pre_test
mean_x_train_allT = pre_train
predict_epistemic_allT = []
predict_epistemic_train_allT = []
def ANN_2step(folder_pathmod1,
              folder_pathmod2,
              epochs1=50,
              epochs2=50,
              numlayers1=1,
              numlayers2=1,
              units1=[20],
              units2=[20]):
    '''
    2 ANNs: 1st is the base ANN, 2nd ANN uses 1st model residuals as targets and cell location features

    Parameters
    ----------
    folder_pathmod1: path for saving png files for the first ANN
    folder_pathmod2: path for saving png files for the second ANN
    epochs1: number of training epochs for the first ANN
    epochs2: number of training epochs for the second ANN
    numlayers1: integer number of hidden layers for the first ANN
    numlayers2: integer number of hidden layers for the second ANN
    units1: array of number of units for hidden layers for first ANN
    units2: array of number of units for hidden layers for second ANN

    Returns
    -------
    None.
    creates two ANNS and saves model files and figures
    '''
    from sklearn.preprocessing import PowerTransformer

    if not os.path.exists(folder_pathmod1):
        os.makedirs(folder_pathmod1)

    # read in training, testing, and cell data
    train_data1, test_data1, train_targets1, test_targets1, feature_names = readindata(
        nametrain=
        '/Users/aklimasewski/Documents/data/cybertrainyeti10_residfeb.csv',
        nametest=
        '/Users/aklimasewski/Documents/data/cybertestyeti10_residfeb.csv',
        n=n)
    train_data1, test_data1, feature_names = add_az(train_data1, test_data1,
                                                    feature_names)

    cells = pd.read_csv(folder_path + 'gridpointslatlon_train.csv',
                        header=0,
                        index_col=0)
    cells_test = pd.read_csv(folder_path + 'gridpointslatlon_test.csv',
                             header=0,
                             index_col=0)

    x_train, y_train, x_test, y_test, x_range, x_train_raw, x_test_raw = transform_data(
        transform_method, train_data1, test_data1, train_targets1,
        test_targets1, feature_names, folder_pathmod1)

    resid, resid_test, pre_train, pre_test = create_ANN(
        x_train, y_train, x_test, y_test, feature_names, numlayers1, units1,
        epochs1, transform_method, folder_pathmod1)

    period = [10, 7.5, 5, 4, 3, 2, 1, 0.5, 0.2, 0.1]
    plot_resid(resid, resid_test, folder_pathmod1)

    # second ANN
    if not os.path.exists(folder_pathmod2):
        os.makedirs(folder_pathmod2)

    train_targets1 = resid
    test_targets1 = resid_test

    train_data1 = np.asarray(cells)
    test_data1 = np.asarray(cells_test)

    transform_method = PowerTransformer()
    feature_names = np.asarray([
        'eventlat',
        'eventlon',
        'midlat',
        'midlon',
        'sitelat',
        'sitelon',
    ])

    x_train, y_train, x_test, y_test, x_range, x_train_raw, x_test_raw = transform_data(
        transform_method, train_data1, test_data1, train_targets1,
        test_targets1, feature_names, folder_pathmod2)

    resid, resid_test, pre_train, pre_test = create_ANN(
        x_train, y_train, x_test, y_test, feature_names, numlayers2, units2,
        epochs2, transform_method, folder_pathmod2)

    period = [10, 7.5, 5, 4, 3, 2, 1, 0.5, 0.2, 0.1]
    plot_resid(resid, resid_test, folder_pathmod2)
def ANN_gridpoints(folder_pathmod, epochs=50, numlayers=1, units=[20]):
    '''
    ANN with cell locations as additional features

    Parameters
    ----------
    folder_pathmod: path for saving png files
    epochs: number of training epochs
    numlayers: integer number of hidden layers
    units: array of number of units for hidden layers

    Returns
    -------
    None.
    creates ANNS and saves model files and figures
    '''

    cells = pd.read_csv(folder_path + 'gridpointslatlon_train.csv',
                        header=0,
                        index_col=0)
    cells_test = pd.read_csv(folder_path + 'gridpointslatlon_test.csv',
                             header=0,
                             index_col=0)

    if not os.path.exists(folder_pathmod):
        os.makedirs(folder_pathmod1)

    transform_method = 'Norm'  #function or text
    n = 13

    train_data1, test_data1, train_targets1, test_targets1, feature_names = readindata(
        nametrain=
        '/Users/aklimasewski/Documents/data/cybertrainyeti10_residfeb.csv',
        nametest=
        '/Users/aklimasewski/Documents/data/cybertestyeti10_residfeb.csv',
        n=n)
    train_data1, test_data1, feature_names = add_az(train_data1, test_data1,
                                                    feature_names)

    # add the cell features
    train_data1 = np.concatenate([train_data1, cells], axis=1)
    test_data1 = np.concatenate([test_data1, cells_test], axis=1)
    feature_names = np.concatenate([
        feature_names,
        [
            'eventlat',
            'eventlon',
            'midlat',
            'midlon',
            'sitelat',
            'sitelon',
        ]
    ],
                                   axis=0)

    x_train, y_train, x_test, y_test, x_range, x_train_raw, x_test_raw = transform_data(
        transform_method, train_data1, test_data1, train_targets1,
        test_targets1, feature_names, folder_pathmod)

    resid, resid_test, pre_train, pre_test = create_ANN(
        x_train, y_train, x_test, y_test, feature_names, numlayers, units,
        epochs, transform_method, folder_pathmod)

    plot_resid(resid, resid_test, folder_pathmod1)
Exemple #8
0
input_days = 4

# We get the weather produced by file meteo.py
meteo = pd.read_pickle('tmp/meteo')


# WE GET THE CLEANED DATA
# X = pd.read_csv('data/train_2011_2012.csv', sep=';')
# X_cleaned = clean_data(X, assignment_list)

# X_cleaned was saved in pickle format so it is easier to take it there
X_cleaned = pd.read_pickle('tmp/X_cod')


# WE GET THE TRANSFORMED DATA
list_cod, X_bis = transform_data(X_cleaned, meteo, assignment_list, leap_days)

list_cod, X_bis, scalage = pd.read_pickle('tmp/X_bis')


# WE CREATE THE TRAINING SET

# X_train, J_train, y_train = build_training_set(X_bis, assignment_list, list_cod, days_test)
X_train, J_train, y_train = pd.read_pickle('tmp/training_set')


# WE TRAIN THE MODEL
# returns a dictionary of models for each ASSIGNMENT
model, MSE = model_creation(X_train, J_train, y_train, scalage, assignment_list, list_cod, input_days=4)

                                  hypotheses_lengths)
            loss = criterion(probs, labels)

            running_loss += loss.item()
            predictions = np.argmax(probs.data.cpu().numpy(), 1)
            correct += len(
                np.where(labels.data.cpu().numpy() == predictions)[0])
            total += premises.size(0)
        acc = correct / float(total)
        print("Accuracy:{0}".format(acc))
        return acc


if __name__ == '__main__':
    # 1.data load
    premises, hypotheses, labels = transform_data(train_data_path)
    train_data = SnliDataset(premises, hypotheses, labels, word2idx)
    trainloader = torch.utils.data.DataLoader(train_data,
                                              batch_size=batch_size,
                                              collate_fn=collate_snli,
                                              shuffle=True)

    premises, hypotheses, labels = transform_data(test_data_path)
    test_data = SnliDataset(premises,
                            hypotheses,
                            labels,
                            word2idx,
                            attack_label=0)
    testloader = torch.utils.data.DataLoader(test_data,
                                             batch_size=256,
                                             collate_fn=collate_snli,