Ejemplo n.º 1
0
def train(full_model, en_seq, fr_seq, batch_size, n_epochs=10):
    """ Training the model """

    for ep in range(n_epochs):
        losses = []
        for bi in range(0, en_seq.shape[0] - batch_size, batch_size):
            print("==================================================")
            en_onehot_seq = to_categorical(en_seq[bi:bi + batch_size, :],
                                           num_classes=en_vsize)
            fr_onehot_seq = to_categorical(fr_seq[bi:bi + batch_size, :],
                                           num_classes=fr_vsize)

            # decoder 输入:fr_onehot_seq[:, :-1, :]
            # decoder 标签:fr_onehot_seq[:, :1, :]
            full_model.train_on_batch(
                [en_onehot_seq, fr_onehot_seq[:, :-1, :]],
                fr_onehot_seq[:, 1:, :])

            l = full_model.evaluate([en_onehot_seq, fr_onehot_seq[:, :-1, :]],
                                    fr_onehot_seq[:, 1:, :],
                                    batch_size=batch_size,
                                    verbose=0)

            losses.append(l)
        if (ep + 1) % 1 == 0:
            logger.info("Loss in epoch {}: {}".format(ep + 1, np.mean(losses)))
Ejemplo n.º 2
0
    def _prepare(self, path):
        self.mapping = self._load_labels_mapping()
        self.number_of_classes = len(self.mapping)

        data = loadmat(path / "emnist-byclass.mat")

        # load training dataset
        x_train = data["dataset"][0][0][0][0][0][0].astype(np.float32)
        x_train = x_train.reshape(x_train.shape[0], 28, 28, 1, order="A")
        y_train = data["dataset"][0][0][0][0][0][1]
        # One hot encoding
        y_train = to_categorical(y_train,
                                 self.number_of_classes).astype(np.int)

        # Calculate mean and standard deviation for input normalization
        self.mean = x_train.mean()
        self.std = x_train.std()

        print("Balancing train dataset...")
        x_train, y_train = self._sample_to_balance(x_train, y_train)

        # load test dataset
        x_test = data["dataset"][0][0][1][0][0][0].astype(np.float32)
        x_test = x_test.reshape(x_test.shape[0], 28, 28, 1, order="A")
        y_test = data["dataset"][0][0][1][0][0][1]
        y_test = to_categorical(y_test, self.number_of_classes).astype(np.int)

        # https://www.tensorflow.org/guide/datasets
        self.train_dataset = tf.data.Dataset.from_tensor_slices(
            (x_train, y_train))
        self.test_dataset = tf.data.Dataset.from_tensor_slices(
            (x_test, y_test))
        print(
            f"Dataset ready, with {len(y_train)} training entries and {len(y_test)} test entries"
        )
Ejemplo n.º 3
0
def main():
    #VGGもでるの読みこみ
    input_tensor = Input(shape=(224, 224, 3))
    vgg16 = VGG16(include_top=False,
                  weights='imagenet',
                  input_tensor=input_tensor)
    model = build_VGG16_model(vgg16)
    model.summary()

    #データセットの作成
    create_txt(txts[0], 'train_img\\')
    create_txt(txts[1], 'test_img\\')
    X_train, Y_train = make_dataset(txts[0])
    X_test, Y_test = make_dataset(txts[1])
    #前処理(正規化)
    X_train = X_train.astype(np.float)
    X_test = X_test.astype(np.float)
    X_train = X_train / 255
    X_test = X_test / 255
    Y_train = to_categorical(Y_train, 9)
    Y_test = to_categorical(Y_test, 9)

    #CNNによる学習
    model, history = model_train(model, X_train, Y_train)
    #学習結果のグラフ描画
    graph_plot(history)
    #テストデータの検証結果の表示
    model.evaluate(X_test, Y_test)

    #学習モデル・重みの保存
    model.save('./model/model.h5')
Ejemplo n.º 4
0
def train(full_model, en_seq, fr_seq, batch_size, n_epochs=10):
    """ Training the model """

    for ep in range(n_epochs):
        losses = []
        for bi in tqdm(range(0, en_seq.shape[0] - batch_size, batch_size)):

            en_onehot_seq = to_categorical(en_seq[bi:bi + batch_size, :],
                                           num_classes=en_vsize)
            fr_onehot_seq = to_categorical(fr_seq[bi:bi + batch_size, :],
                                           num_classes=fr_vsize)

            full_model.train_on_batch(
                [en_onehot_seq, fr_onehot_seq[:, :-1, :]],
                fr_onehot_seq[:, 1:, :])

            l = full_model.evaluate([en_onehot_seq, fr_onehot_seq[:, :-1, :]],
                                    fr_onehot_seq[:, 1:, :],
                                    batch_size=batch_size,
                                    verbose=0)

            losses.append(l)
        if (ep + 1) % 1 == 0:
            logger.info("Loss in epoch {}: {}".format(ep + 1, np.mean(losses)))

        print('Epoch: {}, Loss: {}'.format(str(ep + 1), str(np.mean(losses))))
        """ Save model """
        model_save_dir = os.path.join(base_dir, 'output')
        if not os.path.exists(model_save_dir):
            os.mkdir(model_save_dir)
        full_model.save(
            os.path.join(model_save_dir, 'nmt_ep{}.h5'.format(str(ep + 1))))
def valid_generator(val_batch_size, dim='1D'):
    while True:
        ids = list(range(valid_df.shape[0]))
        for start in range(0, len(ids), val_batch_size):
            x_batch = []
            x_batch_1d = []
            y_batch = []
            end = min(start + val_batch_size, len(ids))
            i_val_batch = ids[start:end]
            if dim == 'combi':
                for i in i_val_batch:
                    x_2d, x_1d = process_wav_file(valid_df.wav_file.values[i],
                                                  phase='TRAIN',
                                                  dim=dim)
                    x_batch.append(x_2d)
                    x_batch_1d.append(x_1d)
                    y_batch.append(valid_df.label_id.values[i])
                x_batch = np.array(x_batch)
                x_batch_1d = np.array(x_batch_1d)
                y_batch = to_categorical(y_batch,
                                         num_classes=len(POSSIBLE_LABELS))
                yield [x_batch, x_batch_1d], y_batch
            else:
                for i in i_val_batch:
                    x_batch.append(
                        process_wav_file(valid_df.wav_file.values[i],
                                         phase='TRAIN',
                                         dim=dim))
                    y_batch.append(valid_df.label_id.values[i])
                x_batch = np.array(x_batch)
                y_batch = to_categorical(y_batch,
                                         num_classes=len(POSSIBLE_LABELS))
                yield x_batch, y_batch
Ejemplo n.º 6
0
def _ensemble_preds(image_id, save_path, binary_mask_path_list, data_key):

    for i, base_path in enumerate(binary_mask_path_list):
        # these numpy should have been saved _load_eval_w_thresh_list with save_binary=True
        data_path = base_path + '/' + image_id + '.npy'
        if i==0:
            if not path.exists(data_path):
                print('{} is not exist!!!'.format(data_path))
            else:
                #for tta, 'pred' -> 'mean_pred'?
                data = np.load(data_path)[()]
                pred = data[data_key]
                pred = to_categorical(pred, num_classes=2)
        else:
            if not path.exists(data_path):
                print('{} is not exist!!!'.format(data_path))
            else:
                pred_2 = np.load(data_path)[()][data_key]
                pred_2 = to_categorical(pred_2, num_classes=2)

                pred = np.add(pred, pred_2)
    # what if the number of models are even?
    pred = np.argmax(pred, axis=-1)
    # sve data={'pred':, 'mask': } here in order to eval with _load_eval_w_thresh_list
    # for test data, mask is not supplied, just save pred
    if 'mask' in data.keys():
        new_data = {'mask':data['mask'], 'pred':pred}
    else:
        new_data = {'pred':pred}
    np.save(save_path + '/' + image_id, new_data)
def train_generator(train_batch_size, dim='1D'):
    while True:
        this_train = train_df.groupby('label_id').apply(
            lambda x: x.sample(n=2000))
        shuffled_ids = random.sample(range(this_train.shape[0]),
                                     this_train.shape[0])
        for start in range(0, len(shuffled_ids), train_batch_size):
            x_batch = []
            x_batch_1d = []
            y_batch = []
            end = min(start + train_batch_size, len(shuffled_ids))
            i_train_batch = shuffled_ids[start:end]
            if dim == 'combi':
                for i in i_train_batch:
                    x_2d, x_1d = process_wav_file(
                        this_train.wav_file.values[i], phase='TRAIN', dim=dim)
                    x_batch.append(x_2d)
                    x_batch_1d.append(x_1d)
                    y_batch.append(this_train.label_id.values[i])
                x_batch = np.array(x_batch)
                x_batch_1d = np.array(x_batch_1d)
                y_batch = to_categorical(y_batch,
                                         num_classes=len(POSSIBLE_LABELS))
                yield [x_batch, x_batch_1d], y_batch
            else:
                for i in i_train_batch:
                    x_batch.append(
                        process_wav_file(this_train.wav_file.values[i],
                                         phase='TRAIN',
                                         dim=dim))
                    y_batch.append(this_train.label_id.values[i])
                x_batch = np.array(x_batch)
                y_batch = to_categorical(y_batch,
                                         num_classes=len(POSSIBLE_LABELS))
                yield x_batch, y_batch
Ejemplo n.º 8
0
    def __init__(self, train_from_idx, train_to_idx, included_characters):
        # input image dimensions
        img_rows, img_cols = 28, 28
        self.num_classes = 10

        # the data, shuffled and split between train and test sets
        (x_train, y_train), (x_test, y_test) = mnist.load_data()

        if K.image_data_format() == 'channels_first':
            x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
            x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
            self.input_shape = (1, img_rows, img_cols)
        else:
            x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
            x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
            self.input_shape = (img_rows, img_cols, 1)

        x_train = x_train[train_from_idx:train_to_idx, :, :, :]
        y_train = y_train[train_from_idx:train_to_idx]

        if not included_characters is None:
            train_idx = np.isin(y_train, included_characters)
            test_idx = np.isin(y_test, included_characters)
            y_train = y_train[train_idx]
            x_train = x_train[train_idx]
            y_test = y_test[test_idx]
            x_test = x_test[test_idx]

        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        self.x_train = x_train / 255
        self.x_test = x_test / 255

        self.y_train = to_categorical(y_train, self.num_classes)
        self.y_test = to_categorical(y_test, self.num_classes)
    def load_raw_dataset(self):
        # download the extract the dataset.
        self.download_and_extract()

        # load the train and test data and labels.
        x_train = self.read_all_images(TRAIN_DATA_PATH)
        y_train = self.read_labels(TRAIN_LABELS_PATH)
        x_test = self.read_all_images(TEST_DATA_PATH)
        y_test = self.read_labels(TEST_LABELS_PATH)

        # convert all images to floats in the range [0, 1]
        x_train = x_train.astype('float32')
        x_train = (x_train - 10) / 255.0
        x_test = x_test.astype('float32')
        x_test = (x_test - 10) / 255.0

        # convert the labels to be zero based.
        y_train -= 1
        y_test -= 1

        # convert labels to hot-one vectors.
        y_train_cat = to_categorical(y_train, self.num_classes)
        y_test_cat = to_categorical(y_test, self.num_classes)

        return (x_train, y_train, y_train_cat), (x_test, y_test, y_test_cat)
Ejemplo n.º 10
0
def train(train_X, train_y, test_X, test_y, num_of_features, classes, layer_det, id, num_epochs=30, batch_size=100):
	# Train a neural network
	# Saves a model to disk

	train_y = to_categorical(train_y)  #converts to one hot
	test_y = to_categorical(test_y)

	model = Sequential()
	model.add(Dense(layer_det[0], input_dim=num_of_features, activation='relu'))
	model.add(layers.Dropout(0.3, noise_shape=None, seed=None))
	model.add(Dense(layer_det[1], activation='relu'))
	model.add(layers.Dropout(0.2, noise_shape=None, seed=None))
	model.add(Dense(layer_det[2], activation='relu'))
	model.add(layers.Dropout(0.2, noise_shape=None, seed=None))
	model.add(Dense(classes, activation='softmax'))
	# Compile model
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])


	# Fit the model
	model.fit(train_X, train_y, epochs=num_epochs, batch_size=batch_size)
	score = model.evaluate(train_X, train_y, verbose=0)
	print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100))
	model.save('model'+str(id)+'.h5')  # creates a HDF5 file 'my_model.h5'

	score = model.evaluate(test_X,test_y, verbose=0)
	print("model"+str(id)+"====== %s: %.2f%%" % (model.metrics_names[1], score[1]*100))
Ejemplo n.º 11
0
def infer_nmt(encoder_model, decoder_model, test_en_seq, en_vsize, fr_vsize):
    """
    Infer logic
    :param encoder_model: keras.Model
    :param decoder_model: keras.Model
    :param test_en_seq: sequence of word ids
    :param en_vsize: int
    :param fr_vsize: int
    :return:
    """

    test_fr_seq = sents2sequences(fr_tokenizer, ['sos'], fr_vsize)
    test_en_onehot_seq = to_categorical(test_en_seq, num_classes=en_vsize)
    test_fr_onehot_seq = np.expand_dims(to_categorical(test_fr_seq, num_classes=fr_vsize), 1)

    enc_outs, enc_fwd_state, enc_back_state = encoder_model.predict(test_en_onehot_seq)
    dec_state = np.concatenate([enc_fwd_state, enc_back_state], axis=-1)
    attention_weights = []
    fr_text = ''

    for i in range(fr_timesteps):

        dec_out, attention, dec_state = decoder_model.predict(
            [enc_outs, dec_state, test_fr_onehot_seq])
        dec_ind = np.argmax(dec_out, axis=-1)[0, 0]

        if dec_ind == 0:
            break
        test_fr_seq = sents2sequences(fr_tokenizer, [fr_index2word[dec_ind]], fr_vsize)
        test_fr_onehot_seq = np.expand_dims(to_categorical(test_fr_seq, num_classes=fr_vsize), 1)

        attention_weights.append((dec_ind, attention))
        fr_text += fr_index2word[dec_ind] + ' '

    return fr_text, attention_weights
def prepare_training_and_testing_data():
    # All images have the same square size of 28×28 pixels.
    # the images are grayscale. ->
    (train_x, train_y), (test_x, test_y) = datasets.mnist.load_data()

    print('Train: X=%s, y=%s' % (train_x.shape, train_x.shape))
    print('Test: X=%s, y=%s' % (test_x.shape, test_x.shape))

    # plot first few images
    for i in range(9):
        # define subplot
        plt.subplot(330 + 1 + i)
        # plot raw pixel data
        plt.imshow(train_x[i], cmap=plt.get_cmap('gray'))
    # show the figure
    plt.show()

    # reshape dataset to set the number of color channels
    # (color channel  = 1, because the images are grayscale)
    train_x = train_x.reshape((train_x.shape[0], 28, 28, 1))
    test_x = test_x.reshape((test_x.shape[0], 28, 28, 1))

    train_y = to_categorical(train_y)
    test_y = to_categorical(test_y)

    train_x, test_x = normalize_image_pixel(train_x), normalize_image_pixel(
        test_x)

    return (train_x, train_y), (test_x, test_y)
Ejemplo n.º 13
0
def load():
    """ Load the Information Bottleneck harmonics dataset
    Returns:
        Returns two namedtuples, the first one containing training
        and the second one containing test data respectively. Both come with fields X, y and Y:
        - X is the data
        - y is class, with numbers from 0 to 1
        - Y is class, but coded as a 2-dim vector with one entry set to 1 at the column index corresponding to the class
    """
    ID = '2017_12_21_16_51_3_275766'
    n_classes = 2
    data_file = Path('datasets/IB_data_' + str(ID) + '.npz')
    if data_file.is_file():
        data = np.load('datasets/IB_data_' + str(ID) + '.npz')
    else:
        import_IB_data_from_mat(ID)
        data = np.load('datasets/IB_data_' + str(ID) + '.npz')

    X_train = data['X_train']
    y_train = data['y_train']
    X_test = data['X_test']
    y_test = data['y_test']

    Y_train = keras_utils.to_categorical(y_train, n_classes).astype('float32')
    Y_test = keras_utils.to_categorical(y_test, n_classes).astype('float32')

    Dataset = namedtuple('Dataset', ['X', 'Y', 'y', 'n_classes'])
    training = Dataset(X_train, Y_train, y_train, int(n_classes))
    test = Dataset(X_test, Y_test, y_test, int(n_classes))
    return training, test
Ejemplo n.º 14
0
def create_dataset(file_path, persons):
    path = file_path + "{}_{}.txt"
    sgn = []
    lbl = []
    for i in persons:
        for j in range(9):
            with open(path.format(i, j + 1), "rb") as fp:  # Unpickling
                data = pickle.load(fp)

            for k in range(np.shape(data)[0]):
                sgn.append(data[k])
                lbl.append(j)

    sgn = np.asarray(sgn, dtype=np.float32)
    lbl = np.asarray(lbl, dtype=np.int32)

    c = list(zip(sgn, lbl))
    shuffle(c)
    sgn, lbl = zip(*c)

    sgn = np.asarray(sgn, dtype=np.float64)
    lbl = np.asarray(lbl, dtype=np.int64)

    train_signals = sgn[0:int(0.6 * len(sgn))]
    train_labels = lbl[0:int(0.6 * len(lbl))]
    val_signals = sgn[int(0.6*len(sgn)):int(0.8*len(sgn))]
    val_labels = lbl[int(0.6*len(lbl)):int(0.8*len(lbl))]
    test_signals = sgn[int(0.8*len(sgn)):]
    test_labels = lbl[int(0.8*len(lbl)):]

    train_labels = to_categorical(train_labels)
    val_labels = to_categorical(val_labels)
    test_labels = to_categorical(test_labels)

    return train_signals, train_labels, val_signals, val_labels, test_signals, test_labels
Ejemplo n.º 15
0
    def __init__(self, train_from_idx, train_to_idx):
        # input image dimensions
        img_rows, img_cols = 28, 28
        self.num_classes = 10

        # the data, shuffled and split between train and test sets
        (x_train, y_train), (x_test, y_test) = mnist.load_data()

        if K.image_data_format() == 'channels_first':
            x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
            x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
            self.input_shape = (1, img_rows, img_cols)
        else:
            x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
            x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
            self.input_shape = (img_rows, img_cols, 1)

        x_train = x_train[train_from_idx:train_to_idx, :, :, :]
        y_train = y_train[train_from_idx:train_to_idx]

        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        self.x_train = x_train / 255
        self.x_test = x_test / 255

        # convert class vectors to binary class matrices
        self.y_train = to_categorical(y_train, self.num_classes)
        self.y_test = to_categorical(y_test, self.num_classes)
Ejemplo n.º 16
0
def main():
    '''MAIN'''

    # setup data
    (train_data, train_labels), (test_data, test_labels) = load_data()
    one_hot_train_labels = to_categorical(train_labels)
    one_hot_test_labels = to_categorical(test_labels)

    # build model
    model = build_model()

    # train the model
    x_val = train_data[:1000]
    partial_x_train = train_data[1000:]

    y_val = one_hot_train_labels[:1000]
    partial_y_train = one_hot_train_labels[1000:]

    history = model.fit(partial_x_train,
                        partial_y_train,
                        epochs=20,
                        batch_size=512,
                        validation_data=(x_val, y_val))

    plot_loss(history)
    plot_accuracy(history)
Ejemplo n.º 17
0
def train(full_model, en_seq, sp_seq, batch_size, n_epochs=1):
    """ Training the model """

    for ep in range(n_epochs):
        losses = []

        for bi in range(0, en_seq.shape[0] - batch_size, batch_size):
            try:
                en_onehot_seq = to_categorical(en_seq[bi:bi + batch_size, :],
                                               num_classes=en_vsize)
                sp_onehot_seq = to_categorical(sp_seq[bi:bi + batch_size, :],
                                               num_classes=sp_vsize)

                full_model.train_on_batch(
                    [en_onehot_seq, sp_onehot_seq[:, :-1, :]],
                    sp_onehot_seq[:, 1:, :])

                l = full_model.evaluate(
                    [en_onehot_seq, sp_onehot_seq[:, :-1, :]],
                    sp_onehot_seq[:, 1:, :],
                    batch_size=batch_size)
                losses.append(l)

                #Saving Weights
                if bi % 12800 == 0:
                    infer_dec_model.save_weights('decoder_weights_n.h5')
                    infer_enc_model.save_weights('encoder_weights_n.h5')
            except:
                continue

        if (ep + 1) % 1 == 0:
            print("Loss in epoch {}: {}".format(ep + 1, np.mean(losses)))
Ejemplo n.º 18
0
def get_mnist_datasets(img_h, img_w, batch_s):

    fashion_mnist = k_ds.fashion_mnist
    (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0

    # Further break training data into train / validation sets
    (x_train, x_valid) = x_train[5000:], x_train[:5000]
    (y_train, y_valid) = y_train[5000:], y_train[:5000]

    # Reshape input data from (28, 28) to (28, 28, 1)
    w, h = img_w, img_h
    x_train = x_train.reshape(x_train.shape[0], w, h, 1)
    x_valid = x_valid.reshape(x_valid.shape[0], w, h, 1)
    x_test = x_test.reshape(x_test.shape[0], w, h, 1)

    # One-hot encode the labels
    y_train = to_categorical(y_train, 10)
    y_valid = to_categorical(y_valid, 10)
    y_test = to_categorical(y_test, 10)

    train_ds = Dataset.from_tensor_slices((x_train, y_train)).shuffle(batch_s).batch(batch_s).repeat()
    validation_ds = Dataset.from_tensor_slices((x_valid, y_valid)).shuffle(batch_s).batch(batch_s).repeat()
    test_ds = Dataset.from_tensor_slices((x_test, y_test)).shuffle(batch_s).batch(batch_s)
    ds_lengths = (len(x_train), len(x_valid))

    return train_ds, validation_ds, ds_lengths
Ejemplo n.º 19
0
def train_optimal_action_given_future_obs(
        model,
        target_history,
        target_stocks,
        weight_path='weights/optimal_3_stocks.h5'):
    (X_train,
     y_train), (X_test,
                y_test) = create_optimal_imitation_dataset(target_history)
    nb_classes = len(target_stocks) + 1

    Y_train = to_categorical(y_train, nb_classes)
    Y_test = to_categorical(y_test, nb_classes)

    continue_train = True
    while continue_train:
        model.fit(X_train,
                  Y_train,
                  batch_size=128,
                  epochs=50,
                  validation_data=(X_test, Y_test),
                  shuffle=True)
        save_weights = input('Type True to save weights\n')
        if save_weights:
            model.save(weight_path)
        continue_train = input('True to continue train, otherwise stop\n')
Ejemplo n.º 20
0
def data_mnist():
    # These values are specific to MNIST
    img_rows = 28
    img_cols = 28
    nb_classes = 10

    # the data, shuffled and split between train and test sets
    (X_train, y_train), (X_test, y_test) = mnist.load_data()

    if keras.backend.image_data_format() == 'th':
        X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
        X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
    else:
        X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
        X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_train /= 255
    X_test /= 255
    print('X_train shape:', X_train.shape)
    print(X_train.shape[0], 'train samples')
    print(X_test.shape[0], 'test samples')

    # convert class vectors to binary class matrices
    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)
    from sklearn.utils import shuffle
    X_train, Y_train = shuffle(X_train, Y_train)
    return X_train, Y_train, X_test, Y_test
Ejemplo n.º 21
0
    def preprocess(self, df):
        time_of_day = df["time_of_day"].map(tod_dict).values
        day_of_week = df["day_of_week"].map(dow_dict).values

        label = df["target"].map(target2idx).values
        play_song = df['play_song'].map(self._to_id).values
        save = df['save'].map(self._to_id).values

        # one hot for categorical features
        ohe_label = to_categorical(label, num_classes=TARGET_CLASSES)
        ohe_tod = to_categorical(time_of_day, num_classes=TOTAL_TOD_BINS)
        ohe_dow = to_categorical(day_of_week, num_classes=TOTAL_DOW_BINS)

        # padding
        play_song_pad = pad_sequences(play_song,
                                      maxlen=SEQ_LEN,
                                      padding='pre',
                                      truncating='pre')
        save_pad = pad_sequences(save,
                                 maxlen=SEQ_LEN,
                                 padding='pre',
                                 truncating='pre')

        X = [play_song_pad, save_pad, ohe_tod, ohe_dow]
        y = ohe_label

        return X, y
Ejemplo n.º 22
0
    def _prepare(self, path):
        self.mapping = self._load_labels_mapping()
        self.number_of_classes = len(self.mapping)

        data = loadmat(path / "emnist-byclass.mat")

        # load training dataset
        x_train = data["dataset"][0][0][0][0][0][0].astype(np.float32)
        x_train = x_train.reshape(x_train.shape[0], 28, 28, 1, order="A")
        y_train = data["dataset"][0][0][0][0][0][1]
        # One hot encoding
        y_train = to_categorical(y_train, self.number_of_classes).astype(np.int)

        # Calculate mean and standard deviation for input normalization
        self.mean = x_train.mean()#.astype(np.float32)
        self.std = x_train.std()#.astype(np.float32)

        print("Balancing train dataset...")
        x_train, y_train = self._sample_to_balance(x_train, y_train)

        # load test dataset
        x_test = data["dataset"][0][0][1][0][0][0].astype(np.float32)
        x_test = x_test.reshape(x_test.shape[0], 28, 28, 1, order="A")
        y_test = data["dataset"][0][0][1][0][0][1]
        y_test = to_categorical(y_test, self.number_of_classes).astype(np.int)

        # https://www.tensorflow.org/guide/datasets
        self.train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
        self.test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
        print(f"Dataset ready, with {len(y_train)} training entries and {len(y_test)} test entries")
Ejemplo n.º 23
0
def onehot(data_tuple):
  """
  Takes a tuple of labels as input (train, test) or (train, validation, test) 
  and returns the one hot encoded labels for train and validation in the same 
  tuple format. The test set is not one hot encoded.

  Can be chained with test_val_rest_split() or test_rest_split()
  """
  if len(data_tuple) == 3:
    return (
      np_utils.to_categorical(data_tuple[0]),
      np_utils.to_categorical(data_tuple[1]),
      data_tuple[2]
    )
  elif len(data_tuple) == 2:
    return (
      np_utils.to_categorical(data_tuple[0]),
      data_tuple[1]
    )
  elif len(data_tuple) == 1:
    return (
      data_tuple[0]
    )
  else:
    raise TypeError('Wrong number of arguments')
Ejemplo n.º 24
0
def train(full_model, infer_enc_model, infer_dec_model, en_seq, fr_seq):
    """ Training the model """

    for ep in range(N_EPOCHS):
        losses = []
        start = time.time()
        for bi in tqdm(range(0, en_seq.shape[0] - BATCH_SIZE, BATCH_SIZE)):

            en_onehot_seq = to_categorical(en_seq[bi:bi + BATCH_SIZE, :],
                                           num_classes=en_vsize)
            fr_onehot_seq = to_categorical(fr_seq[bi:bi + BATCH_SIZE, :],
                                           num_classes=fr_vsize)

            full_model.train_on_batch(
                [en_onehot_seq, fr_onehot_seq[:, :-1, :]],
                fr_onehot_seq[:, 1:, :])

            l = full_model.evaluate([en_onehot_seq, fr_onehot_seq[:, :-1, :]],
                                    fr_onehot_seq[:, 1:, :],
                                    batch_size=BATCH_SIZE,
                                    verbose=0)

            losses.append(l)
        end = time.time()
        # if (ep + 1) % 5 == 0:
        # save model every 5 epochs
        # save_model(full_model, ep+1)

        # show test results after epoch
        test_inferring(infer_enc_model, infer_dec_model)
        logger.info("Elapsed: {} sec. Loss in epoch {}/{}: {}".format(
            round(end - start, 3), ep + 1, N_EPOCHS, np.mean(losses)))
Ejemplo n.º 25
0
def load():
    """Load the MNIST handwritten digits dataset
    Returns:
        Returns two namedtuples, the first one containing training
        and the second one containing test data respectively. Both come with fields X, y and Y:
        - X is the data
        - y is class, with numbers from 0 to 9
        - Y is class, but coded as a 10-dim vector with one entry set to 1 at the column index corresponding to the class
    """
    n_classes = 10
    (X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()
    X_train = np.reshape(X_train,
                         [X_train.shape[0], -1]).astype('float32') / 255.0
    X_test = np.reshape(X_test,
                        [X_test.shape[0], -1]).astype('float32') / 255.0

    X_train = X_train * 2.0 - 1.0
    X_test = X_test * 2.0 - 1.0

    Y_train = keras_utils.to_categorical(y_train, n_classes).astype('float32')
    Y_test = keras_utils.to_categorical(y_test, n_classes).astype('float32')

    Dataset = namedtuple('Dataset', ['X', 'Y', 'y', 'n_classes'])
    training = Dataset(X_train, Y_train, y_train, n_classes)
    test = Dataset(X_test, Y_test, y_test, n_classes)

    return training, test
Ejemplo n.º 26
0
def main():
    (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

    x_train = x_train.reshape(60000, 784)
    x_train = x_train / 255
    x_test = x_test.reshape(10000, 784)
    x_test = x_test / 255
    y_train = utils.to_categorical(y_train, 10)
    y_test = utils.to_categorical(y_test, 10)

    model = Sequential()
    model.add(Dense(784, input_dim=784, activation="relu"))
    model.add(Dense(10, activation="softmax"))

    model.compile(loss="categorical_crossentropy",
                  optimizer="SGD",
                  metrics=["accuracy"])
    model.summary()
    callback = [
        TensorBoard(log_dir='logs', histogram_freq=1, write_images=True)
    ]

    model.fit(x_train,
              y_train,
              batch_size=200,
              epochs=300,
              verbose=1,
              validation_split=0.2,
              callbacks=callback)

    model.save("fashion_model.h5")

    score = model.evaluate(x_test, y_test, verbose=1)
    print("Accuracy on test data is", score[1] * 100, "percent")
Ejemplo n.º 27
0
def infer_nmt(encoder_model, decoder_model, test_en_seq, en_vsize, sp_vsize):
    """

    Infer logic

    :param encoder_model: keras.Model

    :param decoder_model: keras.Model

    :param test_en_seq: sequence of word ids

    :param en_vsize: int

    :param sp_vsize: int

    :return:

    """

    test_sp_seq = sents2sequences(sp_tokenizer, ['sos'], sp_vsize)

    test_en_onehot_seq = to_categorical(test_en_seq, num_classes=en_vsize)

    test_sp_onehot_seq = np.expand_dims(
        to_categorical(test_sp_seq, num_classes=sp_vsize), 1)

    enc_outs, enc_last_state = encoder_model.predict(test_en_onehot_seq)

    dec_state = enc_last_state

    attention_weights = []

    sp_text = ''

    for i in range(20):

        dec_out, attention, dec_state = decoder_model.predict(
            [enc_outs, dec_state, test_sp_onehot_seq])

        dec_ind = np.argmax(dec_out, axis=-1)[0, 0]

        #print('Decoder Output Top 10', dec_out[0,0,:10])

        if dec_ind == 0:

            break

        test_sp_seq = sents2sequences(sp_tokenizer,
                                      [sp_tokenizer.index_word[dec_ind]],
                                      sp_vsize)

        test_sp_onehot_seq = np.expand_dims(
            to_categorical(test_sp_seq, num_classes=sp_vsize), 1)

        attention_weights.append((dec_ind, attention))

        sp_text += sp_tokenizer.index_word[dec_ind] + ' '

    return sp_text, attention_weights
Ejemplo n.º 28
0
def main():
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    model = ResNet50(input_shape=(32, 32, 3),
                     include_top=False,
                     weights='imagenet',
                     classes=10)
    model.summary()
Ejemplo n.º 29
0
def load_mnist():
    (train_x, train_y), (test_x, test_y) = mnist.load_data()

    train_x = train_x.reshape(train_x.shape[0], 784).astype('float32') / 255.0
    test_x = test_x.reshape(test_x.shape[0], 784).astype('float32') / 255.0

    train_y = to_categorical(train_y, 10)
    test_y = to_categorical(test_y, 10)

    return (train_x, train_y), (test_x, test_y)
Ejemplo n.º 30
0
def load_data():
    '''
    load data from MovieLens 100K Dataset
    http://grouplens.org/datasets/movielens/

    Note that this method uses ua.base and ua.test in the dataset.

    :return: train_users, train_x, test_users, test_x
    :rtype: list of int, numpy.array, list of int, numpy.array
    '''
    path = get_file(
        'ml-100k.zip', origin='http://files.grouplens.org/datasets/movielens/ml-100k.zip')
    with ZipFile(path, 'r') as ml_zip:
        max_item_id = -1
        train_history = {}
        with ml_zip.open('ml-100k/ua.base', 'r') as file:
            for line in file:
                user_id, item_id, rating, timestamp = line.decode(
                    'utf-8').rstrip().split('\t')
                if int(user_id) not in train_history:
                    train_history[int(user_id)] = [int(item_id)]
                else:
                    train_history[int(user_id)].append(int(item_id))

                if max_item_id < int(item_id):
                    max_item_id = int(item_id)

        test_history = {}
        with ml_zip.open('ml-100k/ua.test', 'r') as file:
            for line in file:
                user_id, item_id, rating, timestamp = line.decode(
                    'utf-8').rstrip().split('\t')
                if int(user_id) not in test_history:
                    test_history[int(user_id)] = [int(item_id)]
                else:
                    test_history[int(user_id)].append(int(item_id))

    max_item_id += 1  # item_id starts from 1
    train_users = list(train_history.keys())
    train_x = numpy.zeros((len(train_users), max_item_id), dtype=numpy.int32)
    print(train_x.shape)
    for i, hist in enumerate(train_history.values()):
        # print(hist)
        mat = to_categorical(hist, max_item_id)
        # print(mat.shape)
        train_x[i] = numpy.sum(mat, axis=0)
        # print(len(train_x[i]))

    test_users = list(test_history.keys())
    test_x = numpy.zeros((len(test_users), max_item_id), dtype=numpy.int32)
    for i, hist in enumerate(test_history.values()):
        mat = to_categorical(hist, max_item_id)
        test_x[i] = numpy.sum(mat, axis=0)

    return train_users, train_x, test_users, test_x
def get_dataset(class_type='gender'):
    """@:param class_type: str - type of class needed to be in Y.
        values { 'gender' , 'speaker' }
    """
    train = []
    test = []
    valid = []
    speaker_ids = get_speaker_ids()

    for s in speaker_ids:
        file_list = glob.glob(DATA_DIR + s + '/*/*.wav')
        print("Loading Data from :", DATA_DIR + s)
        all_data = []
        for f in file_list:
            speaker_id = f.split("/")[SPEAKER_IDX]
            chapter_id = f.split("/")[CHAPTER_IDX]
            filename = f.split("/")[FILENAME_IDX]

            all_data.append(
                os.path.join(
                    DATA_DIR,
                    os.path.join(
                        speaker_id,
                        os.path.join(chapter_id, os.path.join(filename)))))

        random.shuffle(all_data)
        split_tuple = np.split(
            np.array(all_data),
            [int(0.7 * len(all_data)),
             int(0.9 * len(all_data))])
        train = train + split_tuple[0].tolist()
        test = test + split_tuple[1].tolist()
        valid = valid + split_tuple[2].tolist()

    if class_type == 'gender':
        x_train, y_train = get_XY_gender(train)
        x_test, y_test = get_XY_gender(test)
        x_valid, y_valid = get_XY_gender(valid)
        num_classes = len(GENDER_CLASSES)
    elif class_type == 'speaker':
        x_train, y_train = get_XY_speaker(train)
        x_test, y_test = get_XY_speaker(test)
        x_valid, y_valid = get_XY_speaker(valid)
        num_classes = NUM_CLASSES
    else:
        print("Invalid class_type. Required 'gender' or 'speaker'. Given: {}".
              format(class_type))
        return

    return (x_train, to_categorical(y_train, num_classes=num_classes)), \
           (x_test, to_categorical(y_test, num_classes=num_classes)), \
           (x_valid, to_categorical(y_valid, num_classes=num_classes))
Ejemplo n.º 32
0
def get_input_datasets(use_bfloat16=False):
  """Downloads the MNIST dataset and creates train and eval dataset objects.

  Args:
    use_bfloat16: Boolean to determine if input should be cast to bfloat16

  Returns:
    Train dataset and eval dataset. The dataset doesn't include batch dim.

  """
  cast_dtype = dtypes.bfloat16 if use_bfloat16 else dtypes.float32

  # the data, split between train and test sets
  (x_train, y_train), (x_test, y_test) = mnist.load_data()

  train_data_shape = (x_train.shape[0],) + get_data_shape()
  test_data_shape = (x_test.shape[0],) + get_data_shape()
  if backend.image_data_format() == 'channels_first':
    x_train = x_train.reshape(train_data_shape)
    x_test = x_test.reshape(test_data_shape)
  else:
    x_train = x_train.reshape(train_data_shape)
    x_test = x_test.reshape(test_data_shape)

  x_train = x_train.astype('float32')
  x_test = x_test.astype('float32')
  x_train /= 255
  x_test /= 255

  # convert class vectors to binary class matrices
  y_train = utils.to_categorical(y_train, NUM_CLASSES)
  y_test = utils.to_categorical(y_test, NUM_CLASSES)

  # train dataset
  train_ds = dataset_ops.Dataset.from_tensor_slices((x_train, y_train))
  # TODO(rchao): Remove maybe_shard_dataset() once auto-sharding is done.
  train_ds = maybe_shard_dataset(train_ds)
  train_ds = train_ds.repeat()
  train_ds = train_ds.map(lambda x, y: (math_ops.cast(x, cast_dtype), y))
  train_ds = train_ds.batch(64, drop_remainder=True)

  # eval dataset
  eval_ds = dataset_ops.Dataset.from_tensor_slices((x_test, y_test))
  # TODO(rchao): Remove maybe_shard_dataset() once auto-sharding is done.
  eval_ds = maybe_shard_dataset(eval_ds)
  eval_ds = eval_ds.repeat()
  eval_ds = eval_ds.map(lambda x, y: (math_ops.cast(x, cast_dtype), y))
  eval_ds = eval_ds.batch(64, drop_remainder=True)

  return train_ds, eval_ds
 def encode_x(self, x):
     idx_x = [self.s_to_idx[s] for s in x]
     return to_categorical(idx_x, num_classes=self.n_symbols)
 def encode_y(self, y):
     idx_y = self.c_to_idx[y]
     return to_categorical(idx_y, num_classes=self.n_classes)