Example #1
0
def model_train():
    all_types = os.listdir(root + '/type_err_feature/')
    result = pd.DataFrame()
    for t in all_types:
        all_cities = os.listdir(root + '/type_err_feature/' + t + '/center/')
        for c in all_cities:
            if os.path.exists(root + '/type_err_feature/' + t + '/center/' + c + '/demand_data.csv'):
                file = open(root + '/type_err_feature/' + t + '/center/' + c + '/demand_data.csv')
                data = pd.read_csv(file, encoding='utf-8')
                rec_start = data.loc[0, 'WEEK']

                col = ['demand']
                data = data.loc[:, col]
                if data['demand'].unique().tolist() != [0]:
                    scaler = MaxAbsScaler()
                    data = scaler.fit_transform(data)
                    data = pd.DataFrame(data[:, 0], columns=['demand'])
                    train_sample = int(0.7 * data.shape[0]) + 1

                    steps = 3
                    X, y = pre_process(data, steps)
                    X = scale(X, axis=1, with_std=False, with_mean=False)
                    tr_X, tr_y = X[:train_sample], y[:train_sample]
                    t_X, t_y = X[train_sample:], y[train_sample:]
                    tr_X, t_X = np.reshape(tr_X, (tr_X.shape[0], 1, tr_X.shape[1])), np.reshape(t_X, (t_X.shape[0], 1, t_X.shape[1]))

                    print(t, c)
                    path = root + '/type_err_feature/' + t + '/center/' + c
                    tr_predict, t_predict , well_learnt = lstm_train(path, tr_X, tr_y, t_X, t_y, scaler)
                    data = scaler.inverse_transform(data)
                    plot_predict(data, steps, path, tr_predict, t_predict)
                    result = result.append(save_data(c, t, data, tr_predict, t_predict, rec_start, well_learnt))
    result.to_csv(root + '/info/parts_prepare.csv', header=True, index=False, encoding='utf-8')
Example #2
0
def train_and_estimate_once(np_x: np.ndarray, np_y: np.ndarray,
                            stock_np_x: np.ndarray, num_cols: int,
                            scalery: MaxAbsScaler, rand_state: int) -> float:
    """
    Train an XGBoost market-cap estimator using the entire market less one stock
    and then make a prediction using that estimator for that stock.

    Importanly, this has the rand_state which will slightly tweak the training.
    The return value of this function is averaged over multiple rand_states
    """

    model = XGBRegressor(max_depth=num_cols // 4,
                         learning_rate=.5,
                         n_estimators=20,
                         subsample=.8,
                         random_state=rand_state)
    model.fit(np_x, np_y)
    scaled_pred = model.predict(stock_np_x)
    real_pred = scalery.inverse_transform(scaled_pred.reshape(-1, 1))
    return float(real_pred[0][0])
Example #3
0
# test_generator = get_generator_cyclic(Xvalid,Xvalid2,y_valid,readin)
for e in range(epochs):
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    print('Epoch', e,'Time: ', current_time)
    batches = 0
    while batches< len(X)/readin:
        Xtrain_1, Xtrain_2, ytrain_1 = next(training_generator)
#         Xtest_1, Xtest_2, ytest_1 =next(test_generator)
#         model.fit([Xtrain_1, Xtrain_2], ytrain_1, callbacks = [callback],validation_data = ([Xtest_1,Xtest_2], ytest_1),batch_size=batch_size,verbose = 0)
        model.fit([Xtrain_1, Xtrain_2], ytrain_1, callbacks = [callback],batch_size=batch_size,verbose = 0)
        batches += 1
    
    #calculates and prints the running validation once per epoch
    losssc, msesc, maesc = model.evaluate([Xvalid/255.,Xvalid2],y_valid,verbose=0)
    mae = sc_y.inverse_transform(np.array(maesc).reshape(1,-1))[0][0]
    if mae<mae_best:
        modelbest = model
        mae_best = mae
    print('Mean absolute error at {:4.0f} is: {:4.2f}'.format(e,mae))
modelbest.save('cnn3v28')    


# In[32]:


save_notebookparams = 1
if save_notebookparams:
    pkl_filename = "depthforcemodelparam_cnn3v28_pb.pkl"
    randata = {}
    randata['nsamps']=nsamps
Example #4
0
def fitnenss(learning_rate, num_layers, num_nodes, optimiser, batch_size,
             steps_epoch, month, quarter, year, dayyear, weekyear, day,
             residualV):
    def predict(x, encoder_predict_model, decoder_predict_model,
                num_steps_to_predict):

        y_predicted = []

        # Encode the values as a state vector
        states = encoder_predict_model.predict(x)

        # The states must be a list
        if not isinstance(states, list):
            states = [states]

        # Generate first value of the decoder input sequence
        decoder_input = np.zeros((x.shape[0], 1, num_y_signals))

        for _ in range(num_steps_to_predict):
            outputs_and_states = decoder_predict_model.predict(
                [decoder_input] + states, batch_size=1)
            output = outputs_and_states[0]
            states = outputs_and_states[1:]

            # add predicted value
            y_predicted.append(output)

        return np.concatenate(y_predicted, axis=1)

    groupList = results[["group"
                         ]].groupby("group").sum().index  #Number of groups

    index = pd.date_range(start="2012-03-04", end="2016-12-04", freq='w')
    residual = pd.DataFrame({"residual": np.zeros(len(index))}, index=index)

    qtys = []
    if residualV:
        qtys = ["residual"]

    total = pd.DataFrame(index=index)

    if dayyear:
        total = total.assign(
            Yearday=total.index.dayofyear)  #Time related features
    if weekyear:
        total = total.assign(Yearweek=total.index.weekofyear)
    if day:
        total = total.assign(Monthday=total.index.day)
    if year:
        total = total.assign(Year=total.index.year)
    if month:
        total = total.assign(month=total.index.month)
    if quarter:
        total = total.assign(quarter=total.index.quarter)

    k = 0
    groupsList = []

    for i, g in products:

        product = products.get_group(i)

        productName = product["product"].array[0]
        productGroup = product["group"].array[0]

        del product["group"]
        del product["product"]

        product = product.groupby("date").sum()
        product = product.astype("float64")

        #roduct.resample("w").sum()["2014-12-04":"2016-12-04"].astype(bool).sum(axis=0).values[0]
        dataPoints = product.resample(
            "w").sum()["2014-12-04":"2016-12-04"].astype(bool).sum(
                axis=0).values[0]  #Data Points in the last two years
        #If more than 10 we'll consider into our prediction
        #If not I'm gonna put it in the residual column

        #Fill empty spots and make them same size
        product = product.resample('w').sum().reindex(index).fillna(0)
        product["quantity"] = product["quantity"].apply(lambda x: 0.0
                                                        if x < 0 else x)

        if (dataPoints < 20):
            residual = residual.add(product.values)
        else:
            qtColName = productName + "_qty"
            ctColName = productName + "_group"
            groupsList.append(mapGroups(productGroup, groupList))

            tempDf = pd.DataFrame({
                qtColName: product["quantity"],
                ctColName: mapGroups(productGroup, groupList)
            })

            total = pd.concat([total, tempDf], axis=1, sort=False)

            qtys.append(qtColName)

    if residualV:
        total = pd.concat([total, residual], axis=1, sort=False)

    products_2 = results.groupby("product")
    from sklearn.metrics import mean_squared_error

    index = pd.date_range(start="2012-03-04", end="2016-12-04", freq='w')

    prods = []
    for i in range(1, len(qtys)):
        prods.append(qtys[i][:-4])

    t = 0
    k = 0

    naive = {}

    for i, g in products_2:

        product = products_2.get_group(i)
        prod = product["product"].array[0]
        product = product.groupby("date").sum()

        del product["group"]
        del product["product"]

        product = product.resample('w').sum().reindex(index).fillna(0)
        product["quantity"] = product["quantity"].apply(lambda x: 0.0
                                                        if x < 0 else x)

        coef = 0.8
        delimiter = int((1 - coef) * index.shape[0])

        validate = product.tail(delimiter)

        prediction = product[index.shape[0] - delimiter * 2:index.shape[0] -
                             delimiter]

        if (prod in prods):
            rmse = np.sqrt(
                mean_squared_error(prediction.values.astype("float"),
                                   validate.values.astype("float")))
            naive[prod] = rmse

            t = t + rmse
            k = k + 1

    product = total

    target = product[qtys]

    x_data = product.values  #trimming the end of the serie because of the nan values resulted from the shift
    y_data = target.values

    data_count = len(x_data)
    train_split = 0.8

    num_train = int(train_split * data_count)
    num_test = data_count - num_train

    #Creating the test and the train data
    x_train = x_data[:num_train]
    x_test = x_data[num_train:]

    y_train = y_data[0:num_train]
    y_test = y_data[num_train:]

    num_x_signals = x_data.shape[1]
    num_y_signals = y_data.shape[1]

    x_scaler = MaxAbsScaler()  #Normalize the data
    x_train_scaled = x_scaler.fit_transform(x_train)
    x_test_scaled = x_scaler.transform(x_test)

    y_scaler = MaxAbsScaler()
    y_train_scaled = y_scaler.fit_transform(y_train)
    y_test_scaled = y_scaler.transform(y_test)

    def batch_generator(batch_size, input_seq_len, target_seq_len):
        """
        Generator function for creating random batches of training-data.
        """

        while True:

            x_shape = (batch_size, input_seq_len, num_x_signals)
            y_shape = (batch_size, target_seq_len, num_y_signals)

            encoder_input = np.zeros(shape=x_shape, dtype=np.float16)
            decoder_output = np.zeros(shape=y_shape, dtype=np.float16)
            decoder_input = np.zeros(shape=y_shape, dtype=np.float16)

            total_length = input_seq_len + target_seq_len

            for i in range(batch_size):

                idx = np.random.randint(num_train - total_length)

                encoder_input[i] = x_train_scaled[idx:idx + input_seq_len]
                decoder_output[i] = y_train_scaled[idx + input_seq_len:idx +
                                                   total_length]

            yield ([encoder_input, decoder_input], decoder_output)

    print('learning rate: {0:.1e}'.format(learning_rate))
    print('Number of layers:', num_layers)
    print('Number of nodes:', num_nodes)

    stri = ""
    if (month):
        stri = stri + "Month "
    if (quarter):
        stri = stri + "Quarter "
    if (year):
        stri = stri + "Year "
    if (dayyear):
        stri = stri + "Dayyear "
    if (weekyear):
        stri = stri + "Weekyear "
    if (day):
        stri = stri + "Day "
    if (residualV):
        stri = stri + "Residual "

    print("State " + stri)

    if optimiser == 0:
        print("Optimiser: RMSProp")
    else:
        print("Optimiser: Adam")

    print('Batch size:', batch_size)
    print('Steps epoch', steps_epoch)

    generator = batch_generator(batch_size=batch_size,
                                input_seq_len=15,
                                target_seq_len=15)

    input_seq_len = 15
    target_seq_len = 15
    validation_data = ([
        np.expand_dims(x_test_scaled[:input_seq_len], axis=0),
        np.zeros(shape=(1, target_seq_len, num_y_signals), dtype=np.float16)
    ],
                       np.expand_dims(
                           y_test_scaled[input_seq_len:input_seq_len +
                                         target_seq_len],
                           axis=0))

    layers = []
    for i in range(num_layers):
        layers.append(num_nodes)

    #Encoder
    encoder_inputs = keras.layers.Input(shape=(None, num_x_signals))

    encoder_cells = []
    for hidden_neurons in layers:
        encoder_cells.append(keras.layers.GRUCell(hidden_neurons))

    encoder = keras.layers.RNN(encoder_cells, return_state=True)

    encoder_outputs_and_states = encoder(encoder_inputs)

    encoder_states = encoder_outputs_and_states[1:]

    #Decoder
    decoder_inputs = keras.layers.Input(shape=(None, num_y_signals))

    decoder_cells = []
    for hidden_neurons in layers:
        decoder_cells.append(keras.layers.GRUCell(hidden_neurons))

    decoder = keras.layers.RNN(decoder_cells,
                               return_sequences=True,
                               return_state=True)

    decoder_outputs_and_states = decoder(decoder_inputs,
                                         initial_state=encoder_states)

    decoder_outputs = decoder_outputs_and_states[0]

    decoder_dense = keras.layers.Dense(num_y_signals, activation='linear')

    decoder_outputs = decoder_dense(decoder_outputs)

    if (optimiser == 0):
        optimiser = keras.optimizers.RMSprop(lr=learning_rate)
    else:
        optimiser = keras.optimizers.Adam(lr=learning_rate)

    model = keras.models.Model(inputs=[encoder_inputs, decoder_inputs],
                               outputs=decoder_outputs)
    model.compile(optimizer=optimiser, loss="mse")

    log_dir = log_dir_name(learning_rate, num_layers, num_nodes, optimiser,
                           batch_size, steps_epoch, stri)

    callback_early_stopping = EarlyStopping(monitor='val_loss',
                                            patience=10,
                                            verbose=1)

    checkpoint_name = "checks/lr_{0:.0e}_layers_{1}_nodes_{2}_{3}_batch{4}_epoch{5}{6}.keras".format(
        learning_rate, num_layers, num_nodes, optimiser, batch_size,
        steps_epoch, stri)

    path_checkpoint = checkpoint_name
    callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint,
                                          monitor='val_loss',
                                          verbose=1,
                                          save_weights_only=True,
                                          save_best_only=True)

    callback_log = [
        callback_early_stopping, callback_checkpoint,
        TensorBoard(log_dir=log_dir,
                    histogram_freq=0,
                    write_graph=True,
                    write_grads=False,
                    write_images=False)
    ]

    # Use Keras to train the model.
    model.fit_generator(generator=generator,
                        epochs=200,
                        steps_per_epoch=steps_epoch,
                        validation_data=validation_data,
                        callbacks=callback_log)

    try:
        model.load_weights(path_checkpoint)
    except Exception as error:
        print("Error trying to load checkpoint.")
        print(error)

    loss = model.evaluate(validation_data[0], validation_data[1])

    print()
    print("Loss: " + str(loss))
    print()

    encoder_predict_model = keras.models.Model(encoder_inputs, encoder_states)

    decoder_states_inputs = []

    for hidden_neurons in layers[::-1]:
        # One state for GRU
        decoder_states_inputs.append(
            keras.layers.Input(shape=(hidden_neurons, )))

    decoder_outputs_and_states = decoder(decoder_inputs,
                                         initial_state=decoder_states_inputs,
                                         training=True)

    decoder_outputs = decoder_outputs_and_states[0]
    decoder_states = decoder_outputs_and_states[1:]

    decoder_outputs = decoder_dense(decoder_outputs)

    decoder_predict_model = keras.models.Model(
        [decoder_inputs] + decoder_states_inputs,
        [decoder_outputs] + decoder_states)

    horizon = 50

    X_for_pred = np.expand_dims(x_train_scaled[x_train_scaled.shape[0] -
                                               input_seq_len:],
                                axis=0)
    y_test_predicted = predict(X_for_pred, encoder_predict_model,
                               decoder_predict_model, horizon)

    pred = y_scaler.inverse_transform(y_test_predicted[0]).T
    true = y_test[:horizon].T

    t_rmse = 0

    rnn = {}

    for i in range(len(pred)):

        pred[i][pred[i] < 0] = 0

        rmse = np.sqrt(mean_squared_error(true[i], pred[i]))
        t_rmse = t_rmse + rmse

        rnn[qtys[i][:-4]] = rmse

    valsArr_old = [
        66.6851666549545, 67.88958611597897, 69.21681535327671,
        66.48135020266528, 64.27709785778202, 60.72749709928205,
        63.701465954928295, 65.17733810711289, 64.78680046203984,
        54.03158148686864, 50.996898683969604, 54.064319848800224,
        42.69338976116232, 7.7145067699141645, 58.73075744706745,
        42.21435723311817, 65.8428871293241, 55.54466048398387,
        37.459384554198074, 65.91935078588978, 59.44024944642978,
        66.75112087030772, 55.109361656982436, 48.873453068349015,
        46.018164004825145, 63.55124830274102, 58.51154392727636,
        65.20080941236168, 20.582912304727387, 65.47745592815562,
        29.05502729752538, 31.88538428636566, 66.19052965893539,
        25.209622432309466, 65.30813282106377, 48.47010482324254,
        55.35896540587586, 67.8970786052471, 63.95296694703776,
        65.58669446483145, 69.07507501099678, 53.27732807353019,
        59.13327495000382, 49.961020439339, 36.43109621690061,
        58.25244506735996, 73.47924335249874, 48.24591638102002,
        15.306627521117743, 54.619525753773665, 33.460658318138734,
        63.38118223469059, 21.4684065315376, 51.57643764331531,
        38.35348241078786, 57.49193432508641, 52.378319918141834,
        3.2970115809005662, 45.070092238160676, 50.820191022437356,
        20.009408502370174, 66.76133851423023, 15.381852603930446,
        55.844775832748425, 63.3844295901193, 62.06249668996134,
        18.796244866954524, 61.151276543758314, 69.76259052461357,
        32.22915185003426, 52.164895526117284, 55.51180174042507
    ]
    rnn["total"] = t_rmse / len(pred)

    valsArr = []
    for f in naive:

        val = (1 - (rnn[f] / naive[f])) * 100
        #print(f + ": " + str(np.round(val,decimals=2)) + "%")
        valsArr.append(val)

    print("Current : " + str(np.array(valsArr[:]).mean()))
    print("Old: " + str(np.array(valsArr_old[:]).mean()))

    percentage = np.array(valsArr).mean()

    global highest_per
    if percentage > highest_per:
        model.save(path_best_model)
        highest_per = percentage

    print("Highest Percentage: " + str(highest_per))
    print()

    del model
    K.clear_session()
    return -percentage
Example #5
0
# boxplots
#for col in data.columns:
#    plt.figure()
#    plt.boxplot(data[col])
#    plt.title(col)

# scaling the data
data_scaled = data.copy()
scaler = MaxAbsScaler()
data_scaled.loc[:, :] = scaler.fit_transform(data)
scaler_params = scaler.get_params()

# We will eventually need the physical (unscaled) data to display the results
extract_scaling_function = np.ones((1, data_scaled.shape[1]))
extract_scaling_function = scaler.inverse_transform(extract_scaling_function)

pd.set_option('display.max_columns', 7)
#print(data_scaled.iloc[:3,:])

## Shuffling data
#data = data.sample(frac=1,random_state=0).reset_index(drop=True)
#
#
## Separating inputs from outputs
#X_data = np.array(data.iloc[:,:6])
#y_data = np.array(data.iloc[:,-1])

# The dataset
datasets = {}
y = data_scaled['Residuary resistance'].values.reshape(-1, 1)
Example #6
0
class ANOMIGAN():
    def __init__(self):
        self.testfile = #TEST_FILE 
        self.data = #TRAIN_FILE
        self.num_feature = 30
        self.X_test = 0 
        self.X_gen = 0 
        self.scaler = MaxAbsScaler() 
        self.input_shape = (-1,-1) 
        self.latent_dim = 100
        self.C= tf.placeholder(tf.float32, [None, 512])
        self.C_prime = tf.placeholder(tf.float32, [None, 512])

        # models
        self.generator = None
        self.discriminator = None
        self.preTrainedModel = Sequential()

        # hyperparameter for loss
        self.lambda_a = 0.5
        self.lambda_b = 1 - self.lambda_a
        self.confidence = 1.0
        self.batch_size = 32
        self.num_variance = 5

        # temp Lists
        self.bList = []
        self.aList = []

        self.bFpr = []
        self.bTpr = []
        self.bThresholds = []

        self.aFpr = []
        self.aTpr = []
        self.aThresholds = []

        self.t_var = {}

######## drawring functions ###############
    def butter_lowpass_filter(self, data, cutoff, fs, order=5):
        b, a = self.butter_lowpass(cutoff, fs, order=order)
        y = lfilter(b, a, data)
        return y

    def butter_lowpass(self, cutoff, fs, order=5):
        nyq = 0.5 * fs
        normal_cutoff = cutoff / nyq
        b, a = butter(order, normal_cutoff, btype='low', analog=False)
        return b, a

    def drawLoss(self, S_loss_list, E_loss_list):
        # Filter requirements.
        order = 6
        fs = 30.0       # sample rate, Hz
        cutoff = 3.667  # desired cutoff frequency of the filter, Hz

        s_filter = self.butter_lowpass_filter(S_loss_list, cutoff, fs, order)
        d_filter = self.butter_lowpass_filter(E_loss_list, cutoff, fs, order)

        ylim = [0,3]
        f = plt.figure(tight_layout=True)  
        ax = f.add_subplot(111, ylim=ylim)
        ax.set_xlabel("Epochs",fontsize=20)
        ax.set_ylabel("Loss",fontsize=20)
        ax.plot(s_filter, label='Discriminator', color='blue', linewidth=1, linestyle='--' )
        ax.plot(d_filter, label='Encoder', color='green', linewidth=1, alpha=0.5 )
        ax.legend(loc=1,fontsize=15)

        plt.show()

    def drawAccuracyPlot(self):
        ylim = [0,105]
        xlim = [0, 10]
        f = plt.figure(tight_layout=True)  
        ax = f.add_subplot(111, ylim=ylim)
        ax.set_xlabel("Random Iterative Steps",fontsize=20)
        ax.set_ylabel("Accuracy",fontsize=20)
        plt.plot(self.bList, label='Original Samples', color='blue', linewidth=1, linestyle='--' )
        plt.plot(self.aList, label='Generated Samples', color='green', linewidth=1, )
        plt.legend()

        plt.show()

    def drawRocPlot(self):
        fpr1, tpr1, thresholds1 = self.bFpr, self.bTpr, self.bThresholds   
        roc_auc1 = auc(fpr1, tpr1)
 
        fpr2, tpr2, thresholds2 = self.aFpr, self.aTpr, self.aThresholds
        roc_auc2 = auc(fpr2, tpr2)

        plt.figure()
        plt.plot(fpr1, tpr1, color='blue', linestyle='--', linewidth=2, label='ROC curve with original samples (area = %0.2f)' % roc_auc1)
        plt.plot(fpr2, tpr2, color='green', linewidth=1, label='ROC curve with generated samples (area = %0.2f)' % roc_auc2)
        plt.plot([0, 1], [0, 1], color='black', lw=1, linestyle=':')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.legend(loc="lower right")
        plt.show()

######## target classifer model functions ###############
    def get_pretrainModel(self):
        #self.preTrainedModel ; # USE API to get target pretrained Model

    def get_target_features(self):
        X = 1 # Define input features of X
        Y = 1 # Define label of input features of X
        return X, Y 

######## AnomiGAN model functions ###############
    def discriminator(self, x):
        with tf.variable_scope("discriminator"):
            x_reshaped = tf.reshape(x, (-1, self.num_feature, 1))
            conv1 = tf.layers.conv1d(x_reshaped, filters=32, kernel_size=4, 
                                                        strides=2,
                                                        padding='VALID',
                                                        activation=tf.nn.relu)
            conv2 = tf.layers.conv1d(conv1, filters=10,
                                                       kernel_size=2,
                                                       strides=1,
                                                       padding='SAME',
                                                       activation=tf.nn.tanh)
            conv3 = tf.layers.conv1d(conv1, filters=20,
                                                       kernel_size=2,
                                                       strides=1,
                                                       padding='SAME',
                                                       activation=tf.nn.tanh)
            conv4 = tf.layers.conv1d(conv1, filters=30,
                                                       kernel_size=2,
                                                       strides=1,
                                                       padding='SAME',
                                                       activation=tf.nn.tanh)
            flatten = tf.layers.flatten(conv4)
            out = tf.layers.dense(flatten, self.num_feature, activation=tf.nn.relu)
            return out

    def operation_mode(self, x, message):
        if mode == 1:
            dtype = x.dtype
            x_btensor = tf.cast(x, tf.int32)
            m_btensor = tf.cast(message, tf.int32)
            xor = tf.bitwise.bitwise_xor(x_btensor, m_btensor)
            random = tf.cast(xor, dtype)
        else:
            random = x*message % np.amax(x) 

    def encoder(self, x, message, mode):
        with tf.variable_scope("encoder"):
            random = operation_mode(x, message, mode)
            x_flatten = tf.layers.flatten(random)
            fc1 = tf.reshape(x_flatten, (-1, self.num_feature, 1)) 
            conv1d_t1 = tf.layers.conv1d(fc1, filters=64, kernel_size=4, 
                                                        strides=2,
                                                        padding='VALID',
                                                        activation=tf.nn.relu)
            bn1 = tf.layers.batch_normalization(conv1d_t1)
            conv1d_t2 = tf.layers.conv1d(bn1, filters=32,
                                                       kernel_size=2,
                                                       strides=1,
                                                       padding='SAME',
                                                       activation=tf.nn.tanh)

            bn2 = tf.layers.batch_normalization(conv1d_t2)
            conv1d_t3 = tf.layers.conv1d(bn2, filters=16,
                                                       kernel_size=2,
                                                       strides=1,
                                                       padding='SAME',
                                                       activation=tf.nn.tanh)

            bn3 = tf.layers.batch_normalization(conv1d_t3)
            conv1d_t4 = tf.layers.conv1d(bn3, filters=8,
                                                       kernel_size=2,
                                                       strides=1,
                                                       padding='SAME',
                                                       activation=tf.nn.tanh)

            bn4 = tf.layers.batch_normalization(conv1d_t4)
            conv1d_t5 = tf.layers.conv1d(bn4, filters=4,
                                                       kernel_size=2,
                                                       strides=1,
                                                       padding='SAME',
                                                       activation=tf.nn.tanh)
    
            bn5 = tf.layers.batch_normalization(conv1d_t5)
            conv1d_t6 = tf.layers.conv1d(bn5, filters=8, 
                                                       kernel_size=2,
                                                       strides=1,
                                                       padding='SAME',
                                                       activation=tf.nn.tanh)
 
            bn6 = tf.layers.batch_normalization(conv1d_t6)
            conv1d_t7 = tf.layers.conv1d(bn6, filters=16, 
                                                       kernel_size=2,
                                                       strides=1,
                                                       padding='SAME',
                                                       activation=tf.nn.tanh)

            bn7 = tf.layers.batch_normalization(conv1d_t7)
            conv1d_t8 = tf.layers.conv1d(bn7, filters=self.num_feature, 
                                                       kernel_size=2,
                                                       strides=1,
                                                       padding='SAME',
                                                       activation=tf.nn.tanh)
            flatten = tf.layers.flatten(conv1d_t8)
            out = tf.layers.dense(flatten, self.num_feature, activation=tf.nn.relu)
            return out

    def get_solvers(self, learning_rate=1e-3, beta1=0.5):
        E_solver = tf.train.AdamOptimizer(learning_rate, beta1)
        S_solver = tf.train.AdamOptimizer(learning_rate, beta1)
        return E_solver, S_solver

    def train(self, sess, E_train_step, S_train_step, E_loss, S_loss, epochs=3000, batch_size=10):
        X, Y = self.get_target_features() 
        for it in range(epochs):
            minibatch, labels = self.get_shuffle_batch(X, Y, batch_size)
            minibatch = minibatch.reshape(batch_size, -1)

            if epochs > (epochs - 2000): 
                self.store_parameters(sess)

            #randomize original data
            fake = np.random.normal(0, 1, (batch_size, 30))
            randomized = sess.run(self.C_prime, feed_dict = {self.C:minibatch, self.random:fake})
            loss = self.target_classifier(randomized, labels, batch_size) 

            _, S_loss_curr = sess.run([S_train_step, S_loss], feed_dict={self.C:minibatch, self.random:fake,
            self.loss:loss})

            _, E_loss_curr = sess.run([E_train_step, E_loss], feed_dict={self.C:minibatch, self.random:fake,
            self.loss:loss})

            S_loss_list.append(S_loss_curr)
            E_loss_list.append(np.mean(E_loss_curr))

        #self.drawLoss(S_loss_list, E_loss_list) 
        print ("Train Finishied")

    def target_classifier(self, fake, fake_label, batch_size=32):
        cvscores = []
        scores = self.preTrainedModel.evaluate(fake, fake_label, verbose=0)
        output = np.mean(scores[1])
        return output 

    def calculate_loss(self, C, C_prime, logit_real, logit_fake, loss):
        real_label = tf.ones_like(logit_real)
        fake_label = tf.zeros_like(logit_fake)

        loss_S_real = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=real_label, logits=logit_real)
        loss_S_fake = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=fake_label, logits=logit_fake)

        loss_S = (tf.reduce_mean(loss_S_real) + (tf.reduce_mean(loss_S_fake)* (1-tf.reduce_mean(loss))))
        C_flatten = tf.layers.flatten(C)
        C_prime_flatten = tf.layers.flatten(C_prime)
        distance = (tf.sqrt(tf.reduce_sum(tf.square(C_flatten - C_prime_flatten), axis=1))) 
        distance = tf.reduce_mean(distance)
        loss_E = (self.lambda_a * (distance*self.confidence) )  + (self.lambda_b * loss_S)
        return loss_E, loss_S 

    def get_session(self):
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        session = tf.Session(config=config)
        return session

    def get_shuffle_batch(self, X, Y, batch_size=32):
        idx = random.randint(1, len(X)-batch_size)
        return  X[idx:idx+batch_size], Y[idx:idx+batch_size]

    def get_next_batch(self, X, Y, start, end, batch_size=32):
        X_train = []
        Y_train = []
        start = 0
        end = batch_size
        for i in range(len(X)-batch_size):
            start+=i
            end+=i
            X_train.append(X[start:end])
            Y_train.append(Y[start:end])
        return X_train, Y_train 

    def anonymize_sample(self, sess, batch_size):
        minibatch, labels = self.get_target_features() 
        batch_size = len(Y)
        fake = np.random.normal(0, 1, (batch_size, 30))
        randomized = sess.run(self.C_prime, feed_dict = {self.C:minibatch, self.random:fake})
        scores1 = self.preTrainedModel.evaluate(randomized, Y, verbose=0)
        cvscores1.append(scores1[1] * 100)
        self.get_inversed(randomized) 

    def get_inversed(self, normalized):
        np.set_printoptions(precision=6, suppress=True)
        inversed = self.scaler.inverse_transform(normalized)
        np.savetxt('fileout.txt', inversed, delimiter=',', fmt='%1.3f') 
        return inversed

    def store_parameters(self, sess):
        for i in range(1, 7):
            name = 'Encoder/conv1d_' + i + '/kernel:0'
            conv = sess.graph.get_tensor_by_name(name)
            self.t_var[name] = conv
            self.t_var.append(name, sess.run(conv))

    def add_variance(self, sess, num_var):
        for i in range(num_var):
            num = random.randint(1, 7)
            name = 'Encoder/conv1d_' + num + '/kernel:0'
            conv = sess.graph.get_tensor_by_name(name)
            var = np.var(self.t_var.get(name)), axis=0)
            sess.run(tf.assign(conv, conv + var))

    def get_pvalue(self, a, b):
        a = a.flatten()
        b = b.flatten() 
        t, p = stats.pearsonr(a,b)

    def main(self):
        self.get_pretrainModel()

        tf.reset_default_graph()

        self.C = tf.placeholder(tf.float32, [None, self.num_feature])
        self.random = tf.placeholder(tf.float32, [None, self.num_feature])

        self.C_prime = self.encoder(self.C, self.random, mode=2)
        self.loss = tf.placeholder(tf.float32)

        with tf.variable_scope("") as scope:
            logit_real = self.discriminator(self.C)
            scope.reuse_variables()
            logit_fake = self.discriminator(self.C_prime)


        encoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "encoder")
        steganalayzer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "discriminator")

        E_solver, S_solver = self.get_solvers()

        E_loss, S_loss = self.calculate_loss(self.C, self.C_prime, logit_real, logit_fake, self.loss)
        E_train_step = E_solver.minimize(E_loss, var_list=encoder_vars)
        S_train_step = E_solver.minimize(S_loss, var_list=steganalayzer_vars)

        tf.executing_eagerly()
        sess = self.get_session() 
        sess.run(tf.global_variables_initializer())
        self.train(sess, E_train_step, S_train_step, E_loss, S_loss)

        self.add_variance(sess, self.num_variance) 
        self.anonymize_sample(sess, self.batch_size)       


if __name__ == '__main__':
    anomigan = ANOMIGAN()
    anomigan.main()
Example #7
0
class Simulation(object):
    """Class glueing all the pieces together. Performs whole simulation.

    :param dataset: Dataset which extends :py:obj:`mutabledataset.SimMixin`
    :param AgentCl: Class defining agent behavior, namely `benefit` and `cost`.
    :param learner: Class defining learner behavior, namely `fit` and `predict`.
    :param split: Defines portion used for fitting the learner. Rest is used for determining `eps` value, regarding the epsilon equilibrium. Simulation is done on the whole dataset.
    :param cost_distribution: Passed on to AgentTransformer.
    :param cost_distribution_dep: Passed on to AgentTransformer.
    :param no_neighbors: Passed on to AgentTransformer.
    :param max_it: Passed on to AgentTransformer.
    :param collect_incentive_data: Passed on to AgentTransformer.
    """
    def __init__(self,
                 dataset,
                 AgentCl,
                 learner,
                 cost_distribution,
                 split=[0.5],
                 collect_incentive_data=False,
                 no_neighbors=60,
                 cost_distribution_dep=None,
                 max_it=130):
        self.dataset = dataset
        self.no_neighbors = no_neighbors
        self.cost_distribution = cost_distribution
        self.max_it = max_it
        self.learner = learner
        self.split = split
        self.AgentCl = AgentCl
        self.collect_incentive_data = collect_incentive_data
        self.cost_distribution_dep = cost_distribution_dep

    def no_classes(self, dataset):
        """
        :param dataset: Some AIF360 dataset
        :returns: Number of distinct labels (classes)
        """
        return len(set(dataset.labels.ravel()))

    def start_simulation(self, runs=1, scale=True):
        """
        :param runs: Run simulation multiple times with the same parameters
        :param scale: Perform scaling on dataset features.
        :returns: Modified dataset including new ground truth labels
        :rtype: :py:obj:`simulation.SimulationResultSet`
        """
        res_list = []
        for i in range(runs):
            res_list.append(self._simulate(scale))
        return SimulationResultSet(res_list, runs=runs)

    def _simulate(self, scale):
        """
        Private entrypoint to perform a single simulation

        :param scale: Perform scaling on dataset features
        :returns: Modified dataset including new ground truth labels
        :rtype: :py:obj:`simulation.SimulationResult`
        """
        self.scaler = MaxAbsScaler()
        dataset = self.dataset.copy(deepcopy=True)
        # we need at least one example for each class in each of the two splits
        while True:
            train, test = dataset.split(self.split, shuffle=False)
            break
            if self.no_classes(train) >= 2 and self.no_classes(test) >= 2:
                break
        train_indices = list(map(int, train.instance_names))
        test_indices = list(map(int, test.instance_names))

        self.train, self.test = train, test
        if scale:
            train.features = self.scaler.fit_transform(train.features)
            test.features = self.scaler.transform(test.features)
            dataset.features = self.scaler.transform(dataset.features)

        dataset.infer_domain()

        # learner moves
        self.learner.fit(train)

        ft_names = dataset.protected_attribute_names
        ft_indices = list(
            map(lambda x: not x in ft_names, dataset.feature_names))

        self.Y_predicted = self.learner.predict(dataset.features)
        self.Y_predicted_pr = self.learner.predict_proba(dataset.features)

        # agents move
        at = AgentTransformer(
            self.AgentCl,
            self.learner,
            self.cost_distribution,
            collect_incentive_data=self.collect_incentive_data,
            no_neighbors=self.no_neighbors,
            cost_distribution_dep=self.cost_distribution_dep,
            max_it=self.max_it)

        dataset_ = at.transform(dataset)

        train_ = utils.dataset_from_matrix(
            np.hstack((dataset_.features[train_indices, :],
                       dataset_.labels[train_indices])), dataset)
        test_ = utils.dataset_from_matrix(
            np.hstack((dataset_.features[test_indices, :],
                       dataset_.labels[test_indices])), dataset)

        acc_h = self.learner.accuracy(test)

        # update changed features

        #dataset_ = dataset_from_matrix(np.hstack((np.vstack((train_.features, test_.features)), np.vstack((train_.labels, test_.labels)))), dataset)
        self.Y_new_predicted = self.learner.predict(dataset_.features)
        self.Y_new_predicted_pr = self.learner.predict_proba(dataset_.features)

        acc_h_post = self.learner.accuracy(test_)

        # fit data again, see if accuracy changes
        self.learner.fit(train_)
        acc_h_star_post = self.learner.accuracy(test_)

        # construct datasets for features
        # including predicted label
        if scale:
            dataset.features = self.scaler.inverse_transform(dataset.features)
        dataset_df = dataset.convert_to_dataframe(de_dummy_code=True)[0]
        dataset_df['credit_h'] = pd.Series(self.Y_predicted,
                                           index=dataset_df.index)
        dataset_df['credit_h_pr'] = pd.Series(self.Y_predicted_pr,
                                              index=dataset_df.index)
        if scale:
            dataset_.features = self.scaler.inverse_transform(
                dataset_.features)
        dataset_new_df = dataset_.convert_to_dataframe(de_dummy_code=True)[0]
        dataset_new_df['credit_h'] = pd.Series(self.Y_new_predicted,
                                               index=dataset_new_df.index)
        dataset_new_df['credit_h_pr'] = pd.Series(self.Y_new_predicted_pr,
                                                  index=dataset_new_df.index)

        res = SimulationResult()
        res.df = dataset_df
        res.df_new = dataset_new_df
        res.eps = abs(acc_h_star_post - acc_h_post)
        res.acc_h = acc_h
        res.acc_h_post = acc_h_post
        res.acc_h_star_post = acc_h_star_post
        res.incentives = at.incentives
        return res
    rfr = RandomForestRegressor(n_estimators=100,max_features="sqrt")
    knn = neighbors.KNeighborsRegressor(n_neighbors, weights='uniform')
    reg = ElasticNet(alpha = .5)
    mlp = MLPRegressor(hidden_layer_sizes=(100,100,100))
    
    prediction = {}
    prediction_valid = {}
     
    for clf, name in [(knn, 'KNeighborsRegressor'),
                  (reg, 'ElasticNet'),
#                  (svc, 'Support Vector Classification'),
                  (rfr, 'Random Forest'),
                  (mlp, 'NeuralNet')]: 
        clf.fit(X_train_valid, y_train_valid[:,0])
        prediction[str(name)] = (clf.predict(X_test))
        avg_error = (np.mean(((((scaler_output.inverse_transform(y_test.reshape(-1,1))) - scaler_output.inverse_transform(prediction[str(name)].reshape(-1,1))))/(scaler_output.inverse_transform(y_test.reshape(-1,1))))**2.))
        r2_test = r2_score(y_test, prediction[str(name)])
        print("Mean error (test): ",avg_error,"R2:",r2_test,name)
        prediction_valid[str(name)] = (clf.predict(X_valid))
        avg_error_valid = (np.mean((((scaler_output.inverse_transform(y_valid.reshape(-1,1)) - scaler_output.inverse_transform(prediction_valid[str(name)].reshape(-1,1))))/scaler_output.inverse_transform(y_valid.reshape(-1,1)))**2.))
        r2_valid = r2_score(y_valid, prediction_valid[str(name)])        
        print("Mean error (valid): ",avg_error_valid,"R2:",r2_valid,name)
        if name == 'KNeighborsRegressor':
            normalized_std_KNN.append(avg_error)
            normalized_std_valid_KNN.append(avg_error_valid)
            R2_KNN.append(r2_test)
            R2_valid_KNN.append(r2_valid)
        if name == 'ElasticNet':    
            normalized_std_EN.append(avg_error)
            normalized_std_valid_EN.append(avg_error_valid)
            R2_EN.append(r2_test)
Example #9
0
class DEAP_CMAES:
    def __init__(
            self,
            centroid=None,
            sigma=None,
            popSize=200,  # lambda_ in the algorithm
            evalFunc=defaultEvaluate,
            hofn=5):

        global randomizers
        randomizers = InitRandomizers()

        if (centroid is None):
            centroid = defaultInitializer()
        if (sigma is None):
            sigma = 0.20

        self.scaler = MaxAbsScaler()
        self.scaler.fit([
            centroid,
        ])

        # Reset centroid
        centroid = self.scaler.transform([
            centroid,
        ])[0]

        hof = tools.HallOfFame(hofn)
        self.hof = hof

        self.popSize = popSize

        toolbox = base.Toolbox()

        stats = tools.Statistics(key=lambda ind: 1.0 / ind.fitness.values[0])
        stats.register("avg", np.mean)
        stats.register("std", np.std)
        stats.register("min", np.min)
        stats.register("max", np.max)
        self.stats = stats

        # Our fitness already takes into account all the molecules simultaneously.
        # Therefore, there is no need for a multi-objective optimization.
        creator.create("FitnessMin", base.Fitness, weights=(1.0, ))
        creator.create("Individual", list, fitness=creator.FitnessMin)
        toolbox.register("evaluate", evalFunc, scaler=self.scaler)

        strategy = cma.Strategy(centroid=centroid,
                                sigma=sigma,
                                lambda_=popSize)
        toolbox.register("generate", strategy.generate, creator.Individual)
        toolbox.register("update", strategy.update)

        self.toolbox = toolbox

    def run(self, nGens, nprocs=1):
        # Start processes
        if (nprocs > 1):
            pool = multiprocessing.Pool(processes=nprocs)
            self.toolbox.register("map", pool.map)

        # Run CMA-ES and store final things
        self.output = algorithms.eaGenerateUpdate(self.toolbox,
                                                  ngen=nGens,
                                                  stats=self.stats,
                                                  halloffame=self.hof,
                                                  verbose=True)

    def getBest(self):
        # Return best multiprofile
        best = self.hof[0]
        best = self.scaler.inverse_transform([
            best,
        ])[0]
        mp = multiProfile()
        mp.setOptimizableParameters(slice(0, len(best), 1), list(best))
        mp.minimizeProfiles()
        return mp
Example #10
0
def trainNeuralNetworkModel(dataFrame, targetColumn, featureNames, seed=43):
    def get_lr(optimizer):
        for param_group in optimizer.param_groups:
            return param_group['lr']

    dataFrame = dataFrame[featureNames]

    FEATURE_NAMES = list(dataFrame.columns)
    FEATURE_NAMES.remove(targetColumn)
    COLUMNS = list(dataFrame.columns)
    LABEL = targetColumn

    Y_dataFrame = dataFrame[[targetColumn]]
    Y_values = Y_dataFrame.values
    X_dataFrame = dataFrame.drop(targetColumn, axis=1)
    X_values = X_dataFrame.values
    Y_values = Y_values

    print(X_dataFrame.describe())

    FEATURE_DEFAULTS = ((X_dataFrame.max() + X_dataFrame.min()) *
                        0.5).to_dict()

    #preprocessorY = MinMaxScaler()
    #preprocessorY = StandardScaler()
    preprocessorY = MaxAbsScaler()
    preprocessorY.fit(Y_values)
    preprocessorX = MinMaxScaler()
    #preprocessorX = StandardScaler()
    preprocessorX.fit(X_values)

    Y_values = preprocessorY.transform(Y_values)
    X_values = preprocessorX.transform(X_values)

    device = torch.device('cpu')
    #device = torch.device('cuda') # Uncomment this to run on GPU

    #Create model
    in_size = len(FEATURE_NAMES)
    #model = ConvolutionalNet( in_size ).to( device )
    model = LinearNet(in_size).to(device)
    #model = ImprovedLinearNet( in_size ).to( device )

    learning_rate = 0.01
    #loss_fn       = torch.nn.SmoothL1Loss()
    #loss_fn       = QuantileRegressionLoss( 0.5 )
    #loss_fn       = HuberRegressionLoss( 0.15 )
    #loss_fn       = torch.nn.MSELoss  ( size_average=False)
    loss_fn = torch.nn.L1Loss()
    #optimizer     = torch.optim.SGD   ( model.parameters(), lr=learning_rate, momentum=0.9)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 amsgrad=True,
                                 weight_decay=0.001)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=250,
                                                gamma=0.5)

    batch_size = 256
    average_nbr_corrects = 0
    N = 100
    alpha = 2. / (N + 1)
    current_nbr_corrects = 0

    X_numpyTrainVal, X_numpyTest, Y_numpyTrainVal, Y_numpyTest = train_test_split(
        X_values, Y_values, test_size=0.1)
    X_torchTest = torch.from_numpy(X_numpyTest.astype(np.float32)).to(device)
    Y_torchTest = torch.from_numpy(Y_numpyTest.astype(np.float32)).to(device)
    X_torchTest_s = torch.split(X_torchTest, batch_size, dim=0)
    Y_torchTest_s = torch.split(Y_torchTest, batch_size, dim=0)

    for t in range(6000):
        model.train()
        X_numpyTrain, X_numpyVal, Y_numpyTrain, Y_numpyVal = train_test_split(
            X_numpyTrainVal, Y_numpyTrainVal, test_size=0.25)

        X_torchTrain = torch.from_numpy(X_numpyTrain.astype(
            np.float32)).to(device)
        X_torchVal = torch.from_numpy(X_numpyVal.astype(np.float32)).to(device)
        Y_torchTrain = torch.from_numpy(Y_numpyTrain.astype(
            np.float32)).to(device)
        Y_torchVal = torch.from_numpy(Y_numpyVal.astype(np.float32)).to(device)

        train_size = X_numpyTrain.shape[0]
        val_size = X_numpyVal.shape[0]

        train_index_s = torch.randperm(train_size)
        X_torchTrain_s = X_torchTrain[train_index_s]
        Y_torchTrain_s = Y_torchTrain[train_index_s]
        val_index_s = torch.randperm(val_size)
        X_torchVal_s = X_torchVal[val_index_s]
        Y_torchVal_s = Y_torchVal[val_index_s]

        X_torchTrain_s = torch.split(X_torchTrain, batch_size, dim=0)
        Y_torchTrain_s = torch.split(Y_torchTrain, batch_size, dim=0)
        X_torchVal_s = torch.split(X_torchVal, batch_size, dim=0)
        Y_torchVal_s = torch.split(Y_torchVal, batch_size, dim=0)

        length = (len(X_torchVal_s) - 1) * batch_size
        #Train
        for i in range(len(Y_torchTrain_s) - 1):
            x = X_torchTrain_s[i]
            y = Y_torchTrain_s[i]

            y_pred = model(x)
            loss = loss_fn((y_pred - y) / y, torch.zeros(y.shape))

            model.zero_grad()
            loss.backward()
            optimizer.step()
        scheduler.step()
        #Validate
        model.eval()

        ValLoss = 0
        Y_torchPredict = torch.zeros(Y_torchVal.shape,
                                     dtype=torch.float32).to(device)
        Y_torchPredict_s = torch.split(Y_torchPredict, batch_size, dim=0)
        for i in range(len(Y_torchPredict_s) - 1):
            x = X_torchVal_s[i]
            y = Y_torchVal_s[i]
            y_pred = model(x)

            Y_torchPredict_s[i].copy_(y_pred)
            ValLoss += loss_fn(y_pred, y)
        ValLoss /= (len(Y_torchPredict_s) - 1)
        Y_numpyPredict = Y_torchPredict.cpu().detach().numpy()

        threshold = 0.1
        eps = 0.001
        ValTrue_s = np.sum(
            np.abs((Y_numpyPredict - Y_numpyVal) /
                   (Y_numpyVal + eps)) <= threshold)
        ValFalse_s = np.sum(
            np.abs((Y_numpyPredict - Y_numpyVal) /
                   (Y_numpyVal + eps)) > threshold)
        ValAccuracy = float(ValTrue_s) / (ValTrue_s + ValFalse_s)

        TestLoss = 0
        TestAccuracy = 0
        if Y_torchTest.nelement() > 0:
            model.eval()
            TestLoss = 0
            Y_torchPredict = torch.zeros(Y_torchTest.shape,
                                         dtype=torch.float32).to(device)
            Y_torchPredict_s = torch.split(Y_torchPredict, batch_size, dim=0)
            for i in range(len(Y_torchPredict_s) - 1):
                x = X_torchTest_s[i]
                y = Y_torchTest_s[i]
                y_pred = model(x)

                Y_torchPredict_s[i].copy_(y_pred)
                TestLoss += loss_fn(y_pred, y)
            TestLoss /= (len(Y_torchPredict_s) - 1)
            Y_numpyPredict = Y_torchPredict.cpu().detach().numpy()

            threshold = 0.1
            eps = 0.001
            TestTrue_s = np.sum(
                np.abs((Y_numpyPredict - Y_numpyTest) /
                       (Y_numpyTest + eps)) <= threshold)
            TestFalse_s = np.sum(
                np.abs((Y_numpyPredict - Y_numpyTest) /
                       (Y_numpyTest + eps)) > threshold)
            TestAccuracy = float(TestTrue_s) / (TestTrue_s + TestFalse_s)

        print(
            "epoch: {:6d}, lr: {:8.6f}, val_loss: {:6.4f}, val_acc: {:6.4f}, test_loss: {:6.4f}, test_acc: {:6.4f}"
            .format(t, get_lr(optimizer), ValLoss, ValAccuracy, TestLoss,
                    TestAccuracy))

    # Check model
    model.eval()

    X_numpyTotal = X_values
    Y_numpyTotal = Y_values

    X_torchTotal = torch.from_numpy(X_numpyTotal.astype(np.float32)).to(device)
    Y_torchTotal = torch.from_numpy(Y_numpyTotal.astype(np.float32)).to(device)
    Y_torchPredict = model(X_torchTotal)
    Y_numpyPredict = Y_torchPredict.cpu().detach().numpy()
    Y_numpyTotal = Y_torchTotal.cpu().detach().numpy()

    eps = 0.001
    Y_relErr = np.abs(Y_numpyPredict - Y_numpyTotal) / (Y_numpyTotal + eps)
    for threshold in [0.025, 0.05, 0.10, 0.15]:
        bad_s = np.sum((Y_relErr > threshold))
        good_s = np.sum((Y_relErr <= threshold))
        total_s = Y_relErr.size
        print("threshold = {:5}, good = {:10}, bad = {:10}, err = {:4}".format(
            threshold, good_s, bad_s, good_s / (good_s + bad_s)))

    Y_numpyPredict = preprocessorY.inverse_transform(Y_numpyPredict)
    Y_numpyTotal = preprocessorY.inverse_transform(Y_numpyTotal)

    modelPacket = dict()
    modelPacket['model'] = model
    modelPacket['preprocessorX'] = preprocessorX
    modelPacket['preprocessorY'] = preprocessorY

    modelPacket['feature_names'] = FEATURE_NAMES
    modelPacket['feature_defaults'] = FEATURE_DEFAULTS

    return modelPacket, (Y_numpyPredict, Y_numpyTotal)
Example #11
0
def XGBoost(dataFrame, targetColumn, featureNames):
    dataFrame_short = dataFrame[featureNames]
    FEATURE_NAMES = list(dataFrame_short.columns)
    FEATURE_NAMES.remove(targetColumn)
    COLUMNS = list(dataFrame_short.columns)
    LABEL = targetColumn

    Y_dataFrame = dataFrame_short[[targetColumn]]
    Y_values = Y_dataFrame.values
    X_dataFrame = dataFrame_short.drop(targetColumn, axis=1)
    X_values = X_dataFrame.values
    Y_values = Y_values

    print(X_dataFrame.describe())

    FEATURE_DEFAULTS = ((X_dataFrame.max() + X_dataFrame.min()) *
                        0.5).to_dict()

    # preprocessorY = MinMaxScaler()
    # preprocessorY = StandardScaler()
    preprocessorY = MaxAbsScaler()
    preprocessorY.fit(Y_values)
    preprocessorX = MinMaxScaler()
    # preprocessorX = StandardScaler()
    preprocessorX.fit(X_values)

    Y_values = preprocessorY.transform(Y_values)
    X_values = preprocessorX.transform(X_values)
    Y_values_indexes = np.arange(0, len(Y_values), 1)
    X_numpyTrainVal, X_numpyTest, Y_numpyTrainVal_indexes, Y_numpyTest_indexes = train_test_split(
        X_values, Y_values_indexes, test_size=0.1)
    Y_numpyTrainVal = Y_values[Y_numpyTrainVal_indexes]
    Y_numpyTest = Y_values[Y_numpyTest_indexes]

    FEATURE_DEFAULTS = ((X_dataFrame.max() + X_dataFrame.min()) *
                        0.5).to_dict()
    model = xgboost.XGBRegressor(max_depth=20,
                                 objective='reg:squarederror',
                                 n_estimators=120,
                                 learning_rate=0.1,
                                 n_jobs=-1)
    model.fit(X_numpyTrainVal, Y_numpyTrainVal)  # обучение
    Y_numpyPredict = model.predict(X_numpyTest)

    X_numpyTotal = X_values
    Y_numpyTotal = Y_values
    eps = 0.001
    Y_relErr = np.abs(Y_numpyPredict -
                      Y_numpyTest.flatten()) / (Y_numpyTest.flatten() + eps)

    for threshold in [0.025, 0.05, 0.10, 0.15]:
        bad_s = np.sum((Y_relErr > threshold))
        good_s = np.sum((Y_relErr <= threshold))
        total_s = Y_relErr.size
        print("threshold = {:5}, good = {:10}, bad = {:10}, err = {:4}".format(
            threshold, good_s, bad_s, bad_s / (good_s + bad_s)))

    Y_numpyPredict = preprocessorY.inverse_transform(
        Y_numpyPredict.reshape(-1, 1))
    Y_numpyTest = preprocessorY.inverse_transform(Y_numpyTest.reshape(-1, 1))
    modelPacket = dict()
    modelPacket['model'] = model
    modelPacket['preprocessorX'] = preprocessorX
    modelPacket['preprocessorY'] = preprocessorY

    modelPacket['feature_names'] = FEATURE_NAMES
    modelPacket['feature_defaults'] = FEATURE_DEFAULTS
    threshold = 10
    print()
    Y_relativeError = np.abs(Y_numpyPredict - Y_numpyTest) * 100 / Y_numpyTest

    allValues = dataFrame.loc[Y_numpyTest_indexes]
    mask = Y_relativeError > threshold
    badValues = allValues[mask]
    mask = Y_relativeError <= threshold
    goodValues = allValues[mask]
    #print(badValues)
    f_bad_values = open("bad_values.txt", 'w')
    f_bad_values.write(str(badValues[featureNames + ['source_url']]))
    f_bad_values.close()
    allValues = Y_numpyTest
    mask = Y_relativeError > threshold
    badValues = Y_numpyTest[mask]
    mask = Y_relativeError <= threshold
    goodValues = Y_numpyTest[mask]
    bins = range(1, 20)
    bins = [i * 0.5e6 for i in bins]

    figure, axes = plt.subplots(3, 1)
    axes[1].axis('tight')
    axes[1].axis('off')

    resultValues = axes[0].hist([allValues, goodValues, badValues],
                                bins=bins,
                                histtype='bar',
                                color=['green', 'yellow', 'red'])
    allValues = resultValues[0][0]
    goodValues = resultValues[0][1]
    badValues = resultValues[0][2]

    accuracy = goodValues * 100 / (allValues + 0.01)
    col_label = [
        '{:5d}'.format(int((bins[i + 0] + bins[i + 1]) / 2))
        for i in range(len(bins) - 1)
    ]
    cell_text = [
        ['{:2.1f}'.format(acc_) for acc_ in accuracy],
    ]

    table_ = axes[1].table(cellText=cell_text,
                           colLabels=col_label,
                           loc='center')
    table_.auto_set_font_size(False)
    table_.set_fontsize(8)

    Y_numpyTest_max = np.max(Y_numpyTest)
    Y_numpyTest_min = np.min(Y_numpyTest)

    # axes[2].set_position([Y_numpyTotal_min-Y_numpyTotal_width*0.1,Y_numpyTotal_min-Y_numpyTotal_width*0.1,Y_numpyTotal_width*0.2,Y_numpyTotal_width*0.2])
    axes[2].plot(Y_numpyTest, Y_numpyTest, c='blue')
    axes[2].plot(Y_numpyTest, Y_numpyTest * (1.0 + 0.1), c='red')
    axes[2].plot(Y_numpyTest, Y_numpyTest * (1.0 - 0.1), c='red')
    axes[2].scatter(Y_numpyPredict, Y_numpyTest)
    plt.show()

    # figure, axes =plt.subplots(3,1)
    # clust_data = np.random.random((10,3))
    # collabel=("col 1", "col 2", "col 3")
    # axs[0].axis('tight')
    # axs[0].axis('off')
    # the_table = axs[0].table(cellText=clust_data,colLabels=collabel,loc='center')

    # axs[1].plot(clust_data[:,0],clust_data[:,1])
    # plt.show()

    return modelPacket, (Y_numpyPredict, Y_numpyTotal)
Example #12
0
    print(question_AR)
    #question_AR = Reshape((-1, args.input_length))(question_AR)
    #print(question_AR)
    #question_AR =TimeDistributed(Dense(1, kernel_regularizer=regularizers.l2(0.01)))(question_AR)
    question_AR = TimeDistributed(Dense(1))(question_AR)
    print(question_AR)
    question_AR = Flatten()(question_AR)
    main_output = add([main_output, question_AR])
    #main_output = Dense(y_train.shape[1])(main_output)
    #question_AR = Reshape(input_X_train.shape[2])(question_AR)
    #question_AR = Flatten()(question_AR)
    print(main_output)

#y_valid_ = y_valid * math.sqrt(scaler.var_[-1]) + scaler.mean_[-1]
#y_test_ = y_test * math.sqrt(scaler.var_[-1]) + scaler.mean_[-1]
y_valid_ = scaler.inverse_transform(y_valid)
y_test_ = scaler.inverse_transform(y_test)

opt = optimizers.RMSprop(lr=0.0001, decay=1e-8)
model = Model(inputs=[memory, question], outputs=[main_output])
model.compile(loss='mae', optimizer=opt, metrics=['mse', 'mae', 'mape', RRSE])
filepath = './model/memdnn_AR_inputlength_' + str(
    args.input_length) + '_cnn_' + str(args.CNN_unit) + '_cnnkernel_' + str(
        args.CNN_kernel) + '_gru_' + str(args.GRU_unit) + '_horizon_' + str(
            args.horizon) + '.h5'
checkpoint = ModelCheckpoint(filepath,
                             monitor='val_loss',
                             save_weights_only=False,
                             save_best_only=True)
earlystop = EarlyStopping(monitor='val_loss', patience=100)
callbacks_list = [checkpoint, earlystop]
Example #13
0
model.reset_states()

plt.figure(figsize=(16, 10))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss', fontsize=18)
plt.ylabel('Mean Squared Error (Loss)', fontsize=18)
plt.xlabel('Epoch', fontsize=18)
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()

# Model Predict on test set

y_pred = model.predict(X_test, batch_size=batch_size)
y_pred.shape

y_pred

y_new_inverse = scalery.inverse_transform(y_pred)

y_new_inverse

y_real = scalery.inverse_transform(y_test)
y_real

for i in range(len(y_pred)):
    print(y_new_inverse[i])

for i in range(len(y_val)):
    print(y_real[i])