コード例 #1
0
ファイル: text_gen.py プロジェクト: mokul791/NLP_Prac
def generate_text(model, tokenizer, seq_length,seed_text, num_gen_words):
	output_text = []
	input_text = seed_text

	for i in range(num_gen_words):

		# take the input text string and encode it to a sequence
		encoded_text = tokenizer.texts_to_sequences([input_text])[0]

		# pad sequence to our seq_length
		pad_encoded = pad_sequences([encoded_text], maxlen=seq_length, truncating='pre')

		# predict class probabilities for each word
		pred_word_index = model.predict_classes(pad_encoded, verbose=0)[0]

		# get the word
		pred_word = tokenizer.index_word[pred_word_index]

		# update the sequence of input text (shifting one over with the new word)
		input_text += ' ' + pred_word

		output_text.append(pred_word)

	return output_text
コード例 #2
0
        X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
        X_val = X_val.reshape(X_val.shape[0], img_rows, img_cols, 1)
        X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
        input_shape = (img_rows, img_cols, 1)

    print('X_train shape:', X_train.shape)
    # np_utils.to_categorical将整型标签转为onehot。
    # 在这里将向量转成了矩阵
    Y_train = np_utils.to_categorical(y_train, 40)
    Y_val = np_utils.to_categorical(y_val, 40)
    Y_test = np_utils.to_categorical(y_test, 40)

    model = model.cnn_model()
    # 训练模型
    train.train_model(model, X_train, Y_train, X_val, Y_val, epochs)
    # 测试模型
    score = test.test_model(model, X_test, Y_test)
    print(score)
    # 加载训练好的模型
    model.load_weights('model_weights.h5')
    # 计算预测的类别
    classes = model.predict_classes(X_test, verbose=0)
    # 计算正确率
    test_accuracy = np.mean(np.equal(y_test, classes))
    print("last accuarcy:", test_accuracy)
    error_num = 0
    for i in range(0, 40):
        if y_test[i] != classes[i]:
            error_num += 1
            print(y_test[i], '被错误分成', classes[i])
    print("共有" + str(error_num) + "张图片被识别错了")
コード例 #3
0
import pandas as pd
import tensorflow as tf

import model

# Load train data
train = pd.read_csv('data/train.csv')
train_x = train.drop('label', axis=1).values / 255.0
train_y = train['label'].values

# Load test data
test = pd.read_csv('data/test.csv')
test_x = test.values / 255.0

# Convert flatten x data into 2d array
train_x = train_x.reshape(-1, 28, 28, 1)
test_x = test_x.reshape(-1, 28, 28, 1)

# Load model
model = model.build_model()

# Train model
model.fit(train_x, train_y, epochs=10)

# Predict test data and create submission file
preds = pd.DataFrame({
    'ImageId': list(range(1, test.shape[0] + 1)),
    'Label': model.predict_classes(test_x)
})
preds.to_csv('submission.csv', index=False, header=True)
コード例 #4
0
ファイル: validate.py プロジェクト: luka-papez/master-thesis
for model_name in os.listdir(
        trained_models_path
):  #[m for m in natsorted(os.listdir(trained_models_path))[::-1] if m.count('-') == 2][int((len(os.listdir('.')) - 20) / 2):]:
    #model = load_model(os.path.join(trained_models_path, model_name))
    model = model.get_model('LSTM')
    model.load_weights(os.path.join(trained_models_path, model_name))

    print()
    print('-' * 50)
    print(model_name)
    print(test_batches[0])

    batch_correspondence = []
    batch_edit_distances = []
    printed_first = False
    data = zip(model.predict_classes(x_test, constants.BATCH_SIZE), y_test)

    for predicted_classes, y_true in data:
        words_pred = [vocabulary.vocabulary[c] for c in predicted_classes]
        words_true = [vocabulary.vocabulary[np.argmax(row)] for row in y_true]

        # the prediction is cut at the end in the truth because the token there can simply be replaced with the <end>
        # token to obtain the correct sequence until the end. pads are ignores
        words_pred = words_pred[:words_true.index('<end>') +
                                1 if '<end>' in words_true else -1]
        words_true = words_true[:words_true.index('<end>') +
                                1 if '<end>' in words_true else -1]

        batch_correspondence.append(
            len([1 for x, y in zip(words_pred, words_true) if x == y]))
        batch_edit_distances.append(editdistance.eval(words_pred, words_true))
コード例 #5
0
test_images_path = '../dataset_images/test'
test_labels_path = '../dataset_labels/test'

model_name = 'STATS'
#model_name = 'nothing'
model = model.get_model(model_name)

data_chunk_size = 1024
test_batches, x_test, y_test = next(BatchGenerator(test_images_path, test_labels_path).generate(data_chunk_size))

correspondence = []
edit_distances = []

batch_correspondence = []
batch_edit_distances = []
y_pred = model.predict_classes(x_test, batch_size=constants.BATCH_SIZE)

printed_first = False
for predicted, true in tqdm(zip(y_pred, y_test)):
  words_pred = [vocabulary.vocabulary[c] for c in predicted]
  words_true = [vocabulary.vocabulary[np.argmax(row)] for row in true]

  batch_correspondence.append(len([1 for x, y in zip(words_pred, words_true) if x == y]))
  batch_edit_distances.append(editdistance.eval(words_pred, words_true))

  if not printed_first: # print one sample for visualization
    print(['CORRECT' if x == y else (x, y) for x, y in zip(words_pred, words_true) if y != '<pad>'])
    print()
    printed_first = True

print('acc: {}, avg_edit_dst: {}'.format((np.array(batch_correspondence) / constants.MAX_SEQ_LEN).mean(), np.array(batch_edit_distances).mean()))
コード例 #6
0
    chunk = 1
    for batch_files, x_train, y_train in dataset:
        train_history = model.fit(x_train, y_train, batch_size=constants.BATCH_SIZE, epochs=1, verbose=0)

        scores = model.evaluate(x_val, y_val, verbose=0, batch_size=constants.BATCH_SIZE)
        print("Iteration %d, chunk %d, (%d/%d) | train_loss: %.2f, train_acc: %.2f%%, val_acc: %.2f%%" % \
          (iteration, \
           chunk, \
           chunk * data_chunk_size, \
           number_of_train_files, \
           train_history.history['loss'][0], \
           train_history.history['categorical_accuracy'][0] * 100, \
           scores[1] * 100 \
           ))

        for p, (classes, y_true) in enumerate(zip(model.predict_classes(x_exa, batch_size=constants.BATCH_SIZE), y_exa)):
          words_pred = [vocabulary.vocabulary[c] for c in classes]
          words_true = [vocabulary.vocabulary[np.argmax(row)] for row in y_true]

          if p == 0:
            print(['CORRECT' if x == y else (x, y) for x, y in zip(words_pred, words_true) if y != '<pad>'])
            print()

        # a single point of history for the graph
        # (train_loss, train_acc, val_loss, val_acc)
        history_point = (train_history.history['loss'][0], train_history.history['categorical_accuracy'][0], scores[0], scores[1])
        with open(os.path.join(history_folder, 'history-{}-{}-{}.pickle'.format(MODEL_NAME, iteration, chunk)), 'wb') as f:
            pickle.dump(history_point, f)

        if data_chunk_size > 1000:
            model.save(os.path.join(models_folder, 'model-{}-{}-{}.h5'.format(MODEL_NAME, iteration, chunk)))
コード例 #7
0
ファイル: train_keras.py プロジェクト: codeaudit/modeling
def main(args):
    model_id = build_model_id(args)
    model_path = build_model_path(args, model_id)
    setup_model_dir(args, model_path)
    sys.stdout, sys.stderr = setup_logging(args, model_path)

    x_train, y_train = load_model_data(args.train_file, args.data_name,
                                       args.target_name)
    x_validation, y_validation = load_model_data(args.validation_file,
                                                 args.data_name,
                                                 args.target_name)

    rng = np.random.RandomState(args.seed)

    if args.n_classes > -1:
        n_classes = args.n_classes
    else:
        n_classes = max(y_train) + 1

    n_classes, target_names, class_weight = load_target_data(args, n_classes)

    logging.debug("n_classes {0} min {1} max {2}".format(
        n_classes, min(y_train), max(y_train)))

    y_train_one_hot = np_utils.to_categorical(y_train, n_classes)
    y_validation_one_hot = np_utils.to_categorical(y_validation, n_classes)

    logging.debug("y_train_one_hot " + str(y_train_one_hot.shape))
    logging.debug("x_train " + str(x_train.shape))

    min_vocab_index = np.min(x_train)
    max_vocab_index = np.max(x_train)
    logging.debug("min vocab index {0} max vocab index {1}".format(
        min_vocab_index, max_vocab_index))

    json_cfg = load_model_json(args, x_train, n_classes)

    logging.debug("loading model")

    sys.path.append(args.model_dir)
    import model
    from model import build_model

    if args.subsetting_function:
        subsetter = getattr(model, args.subsetting_function)
    else:
        subsetter = None

    def take_subset(subsetter, path, x, y, y_one_hot, n):
        if subsetter is None:
            return x[0:n], y[0:n], y_one_hot[0:n]
        else:
            mask = subsetter(path)
            idx = np.where(mask)[0]
            idx = idx[0:n]
        return x[idx], y[idx], y_one_hot[idx]

    x_train, y_train, y_train_one_hot = take_subset(subsetter,
                                                    args.train_file,
                                                    x_train,
                                                    y_train,
                                                    y_train_one_hot,
                                                    n=args.n_train)

    x_validation, y_validation, y_validation_one_hot = take_subset(
        subsetter,
        args.validation_file,
        x_validation,
        y_validation,
        y_validation_one_hot,
        n=args.n_validation)

    logging.debug("y_train_one_hot " + str(y_train_one_hot.shape))
    logging.debug("x_train " + str(x_train.shape))

    model_cfg = ModelConfig(**json_cfg)
    logging.info("model_cfg " + str(model_cfg))
    model = build_model(model_cfg)
    setattr(model, 'stop_training', False)

    logging.info('model has {n_params} parameters'.format(
        n_params=count_parameters(model)))

    if len(args.extra_train_file) > 1:
        callbacks = keras.callbacks.CallbackList()
    else:
        callbacks = []

    save_model_info(args, model_path, model_cfg)

    if not args.no_save:
        callbacks.append(
            ModelCheckpoint(filepath=model_path + '/model-{epoch:04d}.h5',
                            verbose=1,
                            save_best_only=True))

    callback_logger = logging.info if args.log else callable_print

    if args.n_epochs < sys.maxsize:
        # Number of epochs overrides patience.  If the number of epochs
        # is specified on the command line, the model is trained for
        # exactly that number; otherwise, the model is trained with
        # early stopping using the patience specified in the model
        # configuration.
        callbacks.append(
            EarlyStopping(monitor='val_loss',
                          patience=model_cfg.patience,
                          verbose=1))

    if args.classification_report:
        cr = ClassificationReport(x_validation,
                                  y_validation,
                                  callback_logger,
                                  target_names=target_names)
        callbacks.append(cr)

    if model_cfg.optimizer == 'SGD':
        callbacks.append(SingleStepLearningRateSchedule(patience=10))

    if len(args.extra_train_file) > 1:
        args.extra_train_file.append(args.train_file)
        logging.info("Using the following files for training: " +
                     ','.join(args.extra_train_file))

        train_file_iter = itertools.cycle(args.extra_train_file)
        current_train = args.train_file

        callbacks._set_model(model)
        callbacks.on_train_begin(logs={})

        epoch = batch = 0

        while True:
            iteration = batch % len(args.extra_train_file)

            logging.info(
                "epoch {epoch} iteration {iteration} - training with {train_file}"
                .format(epoch=epoch,
                        iteration=iteration,
                        train_file=current_train))
            callbacks.on_epoch_begin(epoch, logs={})

            n_train = x_train.shape[0]

            callbacks.on_batch_begin(batch, logs={'size': n_train})

            index_array = np.arange(n_train)
            if args.shuffle:
                rng.shuffle(index_array)

            batches = keras.models.make_batches(n_train, model_cfg.batch_size)
            logging.info(
                "epoch {epoch} iteration {iteration} - starting {n_batches} batches"
                .format(epoch=epoch,
                        iteration=iteration,
                        n_batches=len(batches)))

            avg_train_loss = avg_train_accuracy = 0.
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]

                if isinstance(model, keras.models.Graph):
                    data = {
                        'input': x_train[batch_ids],
                        'output': y_train_one_hot[batch_ids]
                    }
                    train_loss = model.train_on_batch(
                        data, class_weight=class_weight)
                    train_accuracy = 0.
                else:
                    train_loss, train_accuracy = model.train_on_batch(
                        x_train[batch_ids],
                        y_train_one_hot[batch_ids],
                        accuracy=True,
                        class_weight=class_weight)

                batch_end_logs = {
                    'loss': train_loss,
                    'accuracy': train_accuracy
                }

                avg_train_loss = (avg_train_loss * batch_index +
                                  train_loss) / (batch_index + 1)
                avg_train_accuracy = (avg_train_accuracy * batch_index +
                                      train_accuracy) / (batch_index + 1)

                callbacks.on_batch_end(batch,
                                       logs={
                                           'loss': train_loss,
                                           'accuracy': train_accuracy
                                       })

            logging.info(
                "epoch {epoch} iteration {iteration} - finished {n_batches} batches"
                .format(epoch=epoch,
                        iteration=iteration,
                        n_batches=len(batches)))

            logging.info(
                "epoch {epoch} iteration {iteration} - loss: {loss} - acc: {acc}"
                .format(epoch=epoch,
                        iteration=iteration,
                        loss=avg_train_loss,
                        acc=avg_train_accuracy))

            batch += 1

            # Validation frequency (this if-block) doesn't necessarily
            # occur in the same iteration as beginning of an epoch
            # (next if-block), so model.evaluate appears twice here.
            if (iteration + 1) % args.validation_freq == 0:
                val_loss, val_acc = model.evaluate(
                    x_validation,
                    y_validation_one_hot,
                    show_accuracy=True,
                    verbose=0 if args.log else 1)
                logging.info(
                    "epoch {epoch} iteration {iteration} - val_loss: {val_loss} - val_acc: {val_acc}"
                    .format(epoch=epoch,
                            iteration=iteration,
                            val_loss=val_loss,
                            val_acc=val_acc))
                epoch_end_logs = {
                    'iteration': iteration,
                    'val_loss': val_loss,
                    'val_acc': val_acc
                }
                callbacks.on_epoch_end(epoch, epoch_end_logs)

            if batch % len(args.extra_train_file) == 0:
                val_loss, val_acc = model.evaluate(
                    x_validation,
                    y_validation_one_hot,
                    show_accuracy=True,
                    verbose=0 if args.log else 1)
                logging.info(
                    "epoch {epoch} iteration {iteration} - val_loss: {val_loss} - val_acc: {val_acc}"
                    .format(epoch=epoch,
                            iteration=iteration,
                            val_loss=val_loss,
                            val_acc=val_acc))
                epoch_end_logs = {
                    'iteration': iteration,
                    'val_loss': val_loss,
                    'val_acc': val_acc
                }
                epoch += 1
                callbacks.on_epoch_end(epoch, epoch_end_logs)

            if model.stop_training:
                logging.info(
                    "epoch {epoch} iteration {iteration} - done training".
                    format(epoch=epoch, iteration=iteration))
                break

            current_train = next(train_file_iter)
            x_train, y_train = load_model_data(current_train, args.data_name,
                                               args.target_name)
            y_train_one_hot = np_utils.to_categorical(y_train, n_classes)

            if epoch > args.n_epochs:
                break

        callbacks.on_train_end(logs={})
    else:
        print('args.n_epochs', args.n_epochs)
        if isinstance(model, keras.models.Graph):
            data = {'input': x_train, 'output': y_train_one_hot}
            validation_data = {
                'input': x_validation,
                'output': y_validation_one_hot
            }
            model.fit(
                data,
                shuffle=args.shuffle,
                nb_epoch=args.n_epochs,
                batch_size=model_cfg.batch_size,
                #show_accuracy=True,
                validation_data=validation_data,
                callbacks=callbacks,
                class_weight=class_weight,
                verbose=2 if args.log else 1)
            y_hat = model.predict_classes(data)
            print('val_acc %.04f' % accuracy_score(y_validate, y_hat))
        else:
            model.fit(x_train,
                      y_train_one_hot,
                      shuffle=args.shuffle,
                      nb_epoch=args.n_epochs,
                      batch_size=model_cfg.batch_size,
                      show_accuracy=True,
                      validation_data=(x_validation, y_validation_one_hot),
                      callbacks=callbacks,
                      class_weight=class_weight,
                      verbose=2 if args.log else 1)