예제 #1
0
    def testIrisES(self):
        random.seed(42)

        iris = datasets.load_iris()
        x_train, x_test, y_train, y_test = train_test_split(iris.data,
                                                            iris.target,
                                                            test_size=0.2,
                                                            random_state=42)

        x_train, x_val, y_train, y_val = train_test_split(x_train,
                                                          y_train,
                                                          test_size=0.2,
                                                          random_state=42)
        val_monitor = learn.monitors.ValidationMonitor(
            x_val,
            y_val,
            every_n_steps=50,
            early_stopping_rounds=100,
            early_stopping_metric='accuracy',
            early_stopping_metric_minimize=False)

        # classifier without early stopping - overfitting
        classifier1 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                                    n_classes=3,
                                                    steps=1000)
        classifier1.fit(x_train, y_train)
        _ = accuracy_score(y_test, classifier1.predict(x_test))

        # Full 1000 steps, 11 summaries and no evaluation summary.
        # 11 summaries = first + every 100 out of 1000 steps.
        self.assertEqual(11, len(_get_summary_events(classifier1.model_dir)))
        with self.assertRaises(ValueError):
            _get_summary_events(classifier1.model_dir + '/eval')

        # classifier with early stopping - improved accuracy on testing set
        classifier2 = learn.TensorFlowDNNClassifier(
            hidden_units=[10, 20, 10],
            n_classes=3,
            steps=2000,
            config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))

        classifier2.fit(x_train, y_train, monitors=[val_monitor])
        _ = accuracy_score(y_val, classifier2.predict(x_val))
        _ = accuracy_score(y_test, classifier2.predict(x_test))

        # Note, this test is unstable, so not checking for equality.
        # See stability_test for examples of stability issues.
        if val_monitor.early_stopped:
            self.assertLess(val_monitor.best_step, 2000)
            # Note, due to validation monitor stopping after the best score occur,
            # the accuracy at current checkpoint is less.
            # TODO(ipolosukhin): Time machine for restoring old checkpoints?
            # flaky, still not always best_value better then score2 value.
            # self.assertGreater(val_monitor.best_value, score2_val)

            # Early stopped, unstable so checking only < then max.
            self.assertLess(len(_get_summary_events(classifier2.model_dir)),
                            21)
            self.assertLess(
                len(_get_summary_events(classifier2.model_dir + '/eval')), 4)
  def testIrisES(self):
    random.seed(42)

    iris = datasets.load_iris()
    x_train, x_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        test_size=0.2,
                                                        random_state=42)

    x_train, x_val, y_train, y_val = train_test_split(
        x_train, y_train, test_size=0.2)
    val_monitor = learn.monitors.ValidationMonitor(x_val, y_val,
                                                   early_stopping_rounds=100)

    # classifier without early stopping - overfitting
    classifier1 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                                n_classes=3,
                                                steps=1000)
    classifier1.fit(x_train, y_train)
    accuracy_score(y_test, classifier1.predict(x_test))

    # classifier with early stopping - improved accuracy on testing set
    classifier2 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                                n_classes=3,
                                                steps=1000)

    classifier2.fit(x_train, y_train, monitors=[val_monitor])
    accuracy_score(y_test, classifier2.predict(x_test))
    def testIrisES(self):
        random.seed(42)

        iris = datasets.load_iris()
        x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

        x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)
        val_monitor = learn.monitors.ValidationMonitor(
            x_val,
            y_val,
            every_n_steps=50,
            early_stopping_rounds=100,
            early_stopping_metric="accuracy",
            early_stopping_metric_minimize=False,
        )

        # classifier without early stopping - overfitting
        classifier1 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=1000)
        classifier1.fit(x_train, y_train)
        _ = accuracy_score(y_test, classifier1.predict(x_test))

        # Full 1000 steps, 12 summaries and no evaluation summary.
        # 12 summaries = global_step + first + every 100 out of 1000 steps.
        self.assertEqual(12, len(_get_summary_events(classifier1.model_dir)))
        with self.assertRaises(ValueError):
            _get_summary_events(classifier1.model_dir + "/eval")

        # classifier with early stopping - improved accuracy on testing set
        classifier2 = learn.TensorFlowDNNClassifier(
            hidden_units=[10, 20, 10],
            n_classes=3,
            steps=2000,
            config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1),
        )

        classifier2.fit(x_train, y_train, monitors=[val_monitor])
        _ = accuracy_score(y_val, classifier2.predict(x_val))
        _ = accuracy_score(y_test, classifier2.predict(x_test))

        # Note, this test is unstable, so not checking for equality.
        # See stability_test for examples of stability issues.
        if val_monitor.early_stopped:
            self.assertLess(val_monitor.best_step, 2000)
            # Note, due to validation monitor stopping after the best score occur,
            # the accuracy at current checkpoint is less.
            # TODO(ipolosukhin): Time machine for restoring old checkpoints?
            # flaky, still not always best_value better then score2 value.
            # self.assertGreater(val_monitor.best_value, score2_val)

            # Early stopped, unstable so checking only < then max.
            self.assertLess(len(_get_summary_events(classifier2.model_dir)), 21)
            # Eval typically has ~6 events, but it varies based on the run.
            self.assertLess(len(_get_summary_events(classifier2.model_dir + "/eval")), 8)
예제 #4
0
  def testIrisMomentum(self):
    random.seed(42)

    iris = datasets.load_iris()
    x_train, x_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        test_size=0.2,
                                                        random_state=42)

    def custom_optimizer(learning_rate):
      return tf.train.MomentumOptimizer(learning_rate, 0.9)

    cont_features = [
        tf.contrib.layers.real_valued_column("", dimension=4)]
    classifier = learn.TensorFlowDNNClassifier(
        feature_columns=cont_features,
        hidden_units=[10, 20, 10],
        n_classes=3,
        steps=400,
        learning_rate=0.01,
        optimizer=custom_optimizer)
    classifier.fit(x_train, y_train)
    score = accuracy_score(y_test, classifier.predict(x_test))

    self.assertGreater(score, 0.65, "Failed with score = {0}".format(score))
예제 #5
0
def get_datasets(csv_path, test_size=0.1, image_size=28, color=True):
    """ トレーニングとテスト用のデータセットを取得
    @param
        csv_path        データセットcsv
        test_size       データセットをテストに利用する割合
        image_size      画像の1辺のpixel数
        color           カラー画像フラグ
    @return
        x_train         トレーニングデータセット(特徴)
        x_test          テストデータセット(特徴)
        y_train         トレーニングデータセット(答えラベル)
        y_test          テストデータセット(答えラベル)
    """
    csv_reader = csv.reader(io.open(csv_path, 'r', encoding='utf-8'),
                            delimiter=',')
    labels = get_labels(csv_path)
    X = []
    y = []
    for row in csv_reader:
        # ベクトル化した画像
        X.append(image_to_vector(row[0], image_size=image_size, color=color))
        # one of k方式で答えラベルを用意
        one_of_k = np.zeros(len(labels))
        one_of_k.put(labels.index(row[1]), 1)
        y.append(one_of_k)
    return train_test_split(np.array(X),
                            np.array(y),
                            test_size=test_size,
                            random_state=42)
예제 #6
0
    def testIrisMomentum(self):
        random.seed(42)

        iris = datasets.load_iris()
        x_train, x_test, y_train, y_test = train_test_split(iris.data,
                                                            iris.target,
                                                            test_size=0.2,
                                                            random_state=42)

        def custom_optimizer(learning_rate):
            return tf.train.MomentumOptimizer(learning_rate, 0.9)

        classifier = learn.TensorFlowDNNClassifier(
            hidden_units=[10, 20, 10],
            feature_columns=learn.infer_real_valued_columns_from_input(
                x_train),
            n_classes=3,
            steps=400,
            learning_rate=0.01,
            optimizer=custom_optimizer)
        classifier.fit(x_train, y_train)
        score = accuracy_score(y_test, classifier.predict(x_test))

        self.assertGreater(score, 0.65,
                           "Failed with score = {0}".format(score))
예제 #7
0
  def testIrisMomentum(self):
    random.seed(42)

    iris = datasets.load_iris()
    x_train, x_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        test_size=0.2,
                                                        random_state=42)
    # setup exponential decay function
    def exp_decay(global_step):
      return tf.train.exponential_decay(learning_rate=0.1,
                                        global_step=global_step,
                                        decay_steps=100,
                                        decay_rate=0.001)

    def custom_optimizer(learning_rate):
      return tf.train.MomentumOptimizer(learning_rate, 0.9)

    classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                               n_classes=3,
                                               steps=400,
                                               learning_rate=exp_decay,
                                               optimizer=custom_optimizer)
    classifier.fit(x_train, y_train)
    score = accuracy_score(y_test, classifier.predict(x_test))

    self.assertGreater(score, 0.65, "Failed with score = {0}".format(score))
예제 #8
0
    def testIrisMomentum(self):
        random.seed(42)

        iris = datasets.load_iris()
        x_train, x_test, y_train, y_test = train_test_split(iris.data,
                                                            iris.target,
                                                            test_size=0.2,
                                                            random_state=42)

        def custom_optimizer():
            return momentum_lib.MomentumOptimizer(learning_rate=0.01,
                                                  momentum=0.9)

        classifier = learn.DNNClassifier(
            hidden_units=[10, 20, 10],
            feature_columns=learn.infer_real_valued_columns_from_input(
                x_train),
            n_classes=3,
            optimizer=custom_optimizer,
            config=learn.RunConfig(tf_random_seed=1))
        classifier.fit(x_train, y_train, steps=400)
        predictions = np.array(list(classifier.predict_classes(x_test)))
        score = accuracy_score(y_test, predictions)

        self.assertGreater(score, 0.65,
                           "Failed with score = {0}".format(score))
예제 #9
0
    def testIrisMomentum(self):
        random.seed(42)

        iris = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                            iris.target,
                                                            test_size=0.2,
                                                            random_state=42)

        # setup exponential decay function
        def exp_decay(global_step):
            return tf.train.exponential_decay(learning_rate=0.1,
                                              global_step=global_step,
                                              decay_steps=100,
                                              decay_rate=0.001)

        custom_optimizer = lambda learning_rate: tf.train.MomentumOptimizer(
            learning_rate, 0.9)
        classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                                   n_classes=3,
                                                   steps=800,
                                                   learning_rate=exp_decay,
                                                   optimizer=custom_optimizer)
        classifier.fit(X_train, y_train)
        score = accuracy_score(y_test, classifier.predict(X_test))

        self.assertGreater(score, 0.7, "Failed with score = {0}".format(score))
예제 #10
0
def train(tokenizer, model_dir):

    word_index, embeddings_matrix = generate_embeddings(tokenizer)

    x_train, x_validate, y_train, y_validate = train_test_split(
        data['content'], data['label'], test_size=0.1)

    list_tokenized_train = tokenizer.texts_to_sequences(x_train)
    input_train = sequence.pad_sequences(list_tokenized_train, maxlen=maxlen)

    list_tokenized_validation = tokenizer.texts_to_sequences(x_validate)
    input_validation = sequence.pad_sequences(list_tokenized_validation,
                                              maxlen=maxlen)

    y_train = keras.utils.to_categorical(y_train, num_classes=3)
    y_validate = keras.utils.to_categorical(y_validate, num_classes=3)

    model1 = CNN().model(embeddings_matrix, maxlen, word_index)
    file_path = model_dir + model_name % "{epoch:02d}"
    checkpoint = ModelCheckpoint(file_path, verbose=2, save_weights_only=True)
    metrics = Metrics()
    callbacks_list = [checkpoint, metrics]
    model1.fit(input_train,
               y_train,
               batch_size=batch_size,
               epochs=epochs,
               validation_data=(input_validation, y_validate),
               callbacks=callbacks_list,
               verbose=2)
    del model1
예제 #11
0
def dividirDataset(dataset):
    X = dataset[:, 0:21]
    Y = dataset[:, 21]

    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.35,
                                                        random_state=0)
    print(X_train.shape)
    print(X_test.shape)
    print(Y_train.shape)
    print(Y_test.shape)

    return (X_train, X_test, Y_train, Y_test)
예제 #12
0
    def split_data(self, x, y):
        x_train, x_validate, y_train, y_validate = train_test_split(
            x, y, test_size=0.1)

        tokenizer = Tokenizer(num_words=self.max_words)
        tokenizer.fit_on_texts(x_train)
        train_seq = tokenizer.texts_to_sequences(x_train)
        x_train = sequence.pad_sequences(train_seq, maxlen=self.max_len)

        tokenizer.fit_on_texts(x_validate)
        val_seq = tokenizer.texts_to_sequences(x_validate)
        x_validate = sequence.pad_sequences(val_seq, self.max_len)

        y_train = keras.utils.to_categorical(x_validate, num_classes=3)
        y_validate = keras.utils.to_categorical(y_validate, num_classes=3)

        return x_train, y_train, x_validate, y_validate
예제 #13
0
파일: main.py 프로젝트: MaxBurgert/mlgame
def train_model(training_data):
    X = np.array([i[0]
                  for i in training_data]).reshape(-1,
                                                   len(training_data[0][0]))
    y = np.array([i[1]
                  for i in training_data]).reshape(-1,
                                                   len(training_data[0][1]))
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    print("Shapes: X_train {},  X_test {}".format(X_train.shape, X_test.shape))
    model = build_model(input_size=len(X[0]), output_size=len(y[0]))

    history = model.fit(X_train,
                        y_train,
                        batch_size=512,
                        epochs=200,
                        verbose=0,
                        validation_data=(X_test, y_test),
                        shuffle=True)
    return model, history
예제 #14
0
    def testIrisMomentum(self):
        random.seed(42)

        iris = datasets.load_iris()
        x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

        def custom_optimizer():
            return tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9)

        classifier = learn.DNNClassifier(
            hidden_units=[10, 20, 10],
            feature_columns=learn.infer_real_valued_columns_from_input(x_train),
            n_classes=3,
            optimizer=custom_optimizer,
            config=learn.RunConfig(tf_random_seed=1),
        )
        classifier.fit(x_train, y_train, steps=400)
        score = accuracy_score(y_test, classifier.predict(x_test))

        self.assertGreater(score, 0.65, "Failed with score = {0}".format(score))
예제 #15
0
    def __init__(self, load=False):
        self.model = Sequential([
            Embedding(1000, 32),
            LSTM(128, dropout=0.2, recurrent_dropout=0.2),
            Dense(128, activation='relu'),
            Dense(1, activation='sigmoid'),
        ])

        if load:
            self.model.load_weights('sms_model')
        else:
            self.model.compile(metrics=['acc'],
                               optimizer='adam',
                               loss='binary_crossentropy')
            df = pd.read_csv('14 spam data.csv',
                             delimiter=',',
                             encoding='latin-1')[['v1', 'v2']]
            Y = (df.v1 == 'ham').astype('int').values
            X = df.v2
            max_words = 1000
            max_len = 150
            tok = Tokenizer(num_words=max_words)
            tok.fit_on_texts(X)
            sequences = tok.texts_to_sequences(X)
            X = sequence.pad_sequences(sequences, maxlen=max_len)
            X_train, X_test, y_train, y_test = train_test_split(
                X, Y, test_size=0.1, random_state=42)

            for i in range(5):
                self.model.fit(X_train,
                               y_train,
                               epochs=1,
                               batch_size=32,
                               verbose=1)
                self.model.save_weights('sms_model')

            print(self.model.evaluate(X_test, y_test))
예제 #16
0
    def testIrisMomentum(self):
        random.seed(42)

        iris = datasets.load_iris()
        x_train, x_test, y_train, y_test = train_test_split(iris.data,
                                                            iris.target,
                                                            test_size=0.2,
                                                            random_state=42)

        def custom_optimizer():
            return tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9)

        cont_features = [tf.contrib.layers.real_valued_column("", dimension=4)]
        classifier = learn.DNNClassifier(
            feature_columns=cont_features,
            hidden_units=[10, 20, 10],
            n_classes=3,
            optimizer=custom_optimizer,
            config=learn.RunConfig(tf_random_seed=1))
        classifier.fit(x_train, y_train, steps=400)
        score = accuracy_score(y_test, classifier.predict(x_test))

        self.assertGreater(score, 0.65,
                           "Failed with score = {0}".format(score))
예제 #17
0
def train_model(X_data, y_data):
    # https://keras.io/examples/generative/lstm_character_level_text_generation/
    # 清洗数据集,生成所需要的数据
    print('-' * 5 + ' ' * 3 + "清洗数据集" + ' ' * 3 + '-' * 5)
    X_doc, y_doc = generate_data(X_data, y_data)
    output_example_data(X_doc, y_doc)
    # ----------------------------------------------------------------------
    print('-' * 5 + ' ' * 3 + "填充数据集" + ' ' * 3 + '-' * 5)
    X_seq = pad_sequences(X_doc, maxlen = max_len, padding = 'post')
    y_seq = y_doc
    # ----------------------------------------------------------------------
    print('-' * 5 + ' ' * 3 + "拆分数据集" + ' ' * 3 + '-' * 5)
    X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, random_state = seed, stratify = y_seq)
    print("训练数据集(train_data):%d 条数据;测试数据集(test_data):%d 条数据" % ((len(y_train)), (len(y_test))))
    # ----------------------------------------------------------------------
    # 构建模型
    print('-' * 5 + ' ' * 3 + "构建网络模型" + ' ' * 3 + '-' * 5)
    model = construct_model()
    print(model.summary())

    # ----------------------------------------------------------------------
    # 输出训练的结果
    def output_result():
        print("模型预测-->", end = '')
        print("损失值 = {},精确度 = {}".format(results[0], results[1]))
        if label_name == 'age':
            np_argmax = np.argmax(predictions, 1)
            # print("前 30 个真实的预测数据 =", np.array(X_test[:30], dtype = int))
            print("前 30 个真实的目标数据 =", np.array(y_test[:30], dtype = int))
            print("前 30 个预测的目标数据 =", np.array(np.argmax(predictions[:30], 1), dtype = int))
            print("前 30 个预测的结果数据 =", )
            print(predictions[:30])
            for i in range(10):
                print("类别 {0} 的真实数目:{1},预测数目:{2}".format(i, sum(y_test == i), sum(np_argmax == i)))
        elif label_name == 'gender':
            predict_gender = np.array(predictions > 0.5, dtype = int)
            print("sum(abs(predictions>0.5-y_test_scaled))/sum(y_test_scaled) = error% =",
                  sum(abs(predict_gender - y_test)) / sum(y_test) * 100, '%')
            print("前100个真实的目标数据 =", np.array(y_test[:100], dtype = int))
            print("前100个预测的目标数据 =", np.array(predict_gender[:100], dtype = int))
            print("sum(predictions>0.5) =", sum(predict_gender))
            print("sum(y_test) =", sum(y_test))
            print("sum(abs(predictions-y_test))=error_number=", sum(abs(predict_gender - y_test)))
        else:
            print("错误的标签名称:", label_name)
            pass
        pass

    # ----------------------------------------------------------------------
    # 训练网络模型
    # 使用验证集
    print('-' * 5 + ' ' * 3 + "使用验证集训练网络模型" + ' ' * 3 + '-' * 5)
    model.fit(X_train, y_train, epochs = epochs, batch_size = batch_size,
              validation_split = 0.2, use_multiprocessing = True, verbose = 2)
    results = model.evaluate(X_test, y_test, verbose = 0)
    predictions = model.predict(X_test).squeeze()
    output_result()

    # ----------------------------------------------------------------------
    # 不使用验证集,训练次数减半
    print('-' * 5 + ' ' * 3 + "不使用验证集训练网络模型,训练次数减半" + ' ' * 3 + '-' * 5)
    model.fit(X_train, y_train, epochs = epochs // 2, batch_size = batch_size,
              use_multiprocessing = True, verbose = 2)
    results = model.evaluate(X_test, y_test, verbose = 0)
    predictions = model.predict(X_test).squeeze()
    output_result()
    pass
예제 #18
0
def main(_):
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to PTB data directory")
    gpus = [
        x.name for x in device_lib.list_local_devices()
        if x.device_type == "GPU"
    ]
    if FLAGS.num_gpus > len(gpus):
        raise ValueError("Your machine has only %d gpus "
                         "which is less than the requested --num_gpus=%d." %
                         (len(gpus), FLAGS.num_gpus))

    i = 0
    data = []
    L = file_name(FLAGS.data_path)
    for dir in L:
        if i == 0:
            data = np.loadtxt(dir)
        else:
            tmp = np.loadtxt(dir)
            data = np.vstack((data, tmp))
        i += 1

    random.shuffle(data)
    train_x, test_x, train_y, test_y = train_test_split(data[:, :-1],
                                                        data[:, -1],
                                                        test_size=0.2,
                                                        random_state=0)

    train_x, valid_x, train_y, valid_y = train_test_split(train_x[:, :],
                                                          train_y[:],
                                                          test_size=0.2,
                                                          random_state=42)

    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1

    # with tf.Graph().as_default():

    initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)
    with tf.name_scope("Train"):
        train_input = PTBInput(config=config,
                               x_data=train_x,
                               y_data=train_y,
                               name="TrainInput")
        with tf.variable_scope("Model", reuse=None, initializer=initializer):
            m = PTBModel(is_training=True, config=config, input_=train_input)
        tf.summary.scalar("Training Loss", m.cost)
        tf.summary.scalar("Learning Rate", m.lr)

    with tf.name_scope("Valid"):
        valid_input = PTBInput(config=config,
                               x_data=valid_x,
                               y_data=valid_y,
                               name="ValidInput")
        with tf.variable_scope("Model", reuse=True, initializer=initializer):
            mvalid = PTBModel(is_training=False,
                              config=config,
                              input_=valid_input)
        tf.summary.scalar("Validation Loss", mvalid.cost)

    with tf.name_scope("Test"):
        test_input = PTBInput(config=eval_config,
                              x_data=test_x,
                              y_data=test_y,
                              name="TestInput")
        with tf.variable_scope("Model", reuse=True, initializer=initializer):
            mtest = PTBModel(is_training=False,
                             config=eval_config,
                             input_=test_input)

    # models = {"Train": m, "Valid": mvalid, "Test": mtest}
    # for name, model in models.items():
    #   model.export_ops(name)
    # metagraph = tf.train.export_meta_graph()
    # if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1:
    #   raise ValueError("num_gpus > 1 is not supported for TensorFlow versions "
    #                    "below 1.1.0")
    soft_placement = False
    #   if FLAGS.num_gpus > 1:
    #     soft_placement = True
    #     util.auto_parallel(metagraph, m)
    #
    # with tf.Graph().as_default():
    #   tf.train.import_meta_graph(metagraph)
    #   # tf.train.import_meta_graph("models/model.ckpt-0.meta")
    #   for model in models.values():
    #     model.import_ops()
    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
    #   sv = tf.train.Supervisor()
    config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
    with sv.managed_session(config=config_proto) as session:
        for i in range(config.max_max_epoch):
            lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0)
            m.assign_lr(session, config.learning_rate * lr_decay)
            print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
            train_perplexity, cost = run_epoch(session,
                                               m,
                                               eval_op=m.train_op,
                                               verbose=True)
            print("Epoch: %d Train Perplexity: %.3f Cost: %.3f" %
                  (i + 1, train_perplexity, cost))
            valid_perplexity, cost = run_epoch(session, mvalid)
            print("Epoch: %d Valid Perplexity: %.3f Cost: %.3f" %
                  (i + 1, valid_perplexity, cost))

        test_perplexity, cost = run_epoch(session, mtest)
        print("Test Perplexity: %.3f accuracy: %.3f" %
              (test_perplexity, 100 - cost))

        if FLAGS.save_path:
            print("Saving model to %s." % FLAGS.save_path)
            sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
예제 #19
0
def main(_):
  if not FLAGS.data_path:
    raise ValueError("Must set --data_path to PTB data directory")
  gpus = [
    x.name for x in device_lib.list_local_devices() if x.device_type == "GPU"
  ]
  if FLAGS.num_gpus > len(gpus):
    raise ValueError(
      "Your machine has only %d gpus "
      "which is less than the requested --num_gpus=%d."
      % (len(gpus), FLAGS.num_gpus))

  i = 0
  data = []
  L = file_name(FLAGS.data_path)
  for dir in L:
    if i == 0:
      data = np.loadtxt(dir)
    else:
      tmp = np.loadtxt(dir)
      data = np.vstack((data, tmp))
    i += 1

  random.shuffle(data)
  train_x, test_x, train_y, test_y = train_test_split(data[:, :-1],
                                                      data[:, -1],
                                                      test_size=0.2,
                                                      random_state=0)

  train_x, valid_x, train_y, valid_y = train_test_split(train_x[:, :],
                                                        train_y[:],
                                                        test_size=0.2,
                                                        random_state=42)

  config = get_config()
  eval_config = get_config()
  eval_config.batch_size = 1

  train_epoch = len(train_y) // config.batch_size
  valid_epoch = len(valid_y) // config.batch_size
  test_epoch = len(test_y) // eval_config.batch_size

  '''
  train_x1 = tf.convert_to_tensor(train_x1, name="train_datax1", dtype=tf.float32)
  train_y1 = tf.convert_to_tensor(train_y1, name="train_datay1", dtype=tf.int32)
  valid_x1 = tf.convert_to_tensor(valid_x1, name="valid_datax1", dtype=tf.float32)
  valid_y1 = tf.convert_to_tensor(valid_y1, name="valid_datay1", dtype=tf.int32)
  test_x1 = tf.convert_to_tensor(test_x1, name="test_datax1", dtype=tf.float32)
  test_y1 = tf.convert_to_tensor(test_y1, name="test_datay1", dtype=tf.int32)
  '''
  with tf.Graph().as_default(), tf.Session() as session:

    initializer = tf.random_uniform_initializer(-config.init_scale,
                                               config.init_scale)
    with tf.name_scope("Train"):
      with tf.variable_scope("Model", reuse=tf.AUTO_REUSE, initializer=initializer):
        m = PTB(config=config, is_training=True, name="Train")
        tf.summary.scalar("Training Loss", m.cost)
        tf.summary.scalar("Learning Rate", m.lr)
        summary_op_m = tf.summary.merge_all()
    with tf.name_scope("Valid"):
      with tf.variable_scope("Model", reuse=True, initializer=initializer):
        mvalid = PTB(config=config, is_training=False, name="Valid")
    with tf.name_scope("Test"):
      with tf.variable_scope("Model", reuse=True, initializer=initializer):
        mtest = PTB(config=eval_config, is_training=False, name="Test")


    summary_writer = tf.summary.FileWriter('./lstm_logs',session.graph)

    tf.initialize_all_variables().run()  # 对参数变量初始化

    for i in range(config.max_max_epoch):

      lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
      m.assign_lr(session, config.learning_rate * lr_decay)
      print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
      train_perplexity, cost, accuracy = run_epoch(session, m, train_x, train_y, train_epoch, eval_op=m.train_op,
                                         verbose=True)
      print("Epoch: %d Train Perplexity: %.3f Cost: %.3f Accuracy: %.3f" % (i + 1, train_perplexity, cost, accuracy))
      valid_perplexity, cost, accuracy = run_epoch(session, mvalid, valid_x, valid_y, valid_epoch)
      print("Epoch: %d Valid Perplexity: %.3f Cost: %.3f Accuracy: %.3f" % (i + 1, valid_perplexity, cost, accuracy))

      test_perplexity, cost, accuracy = run_epoch(session, mtest, test_x, test_y, test_epoch)
      print("Test Perplexity: %.3f accuracy: %.3f" % (test_perplexity, accuracy))
      saver = tf.train.Saver()
      saver.save(session, './model/model.ckpt', global_step=i)
      summary_str = run_epoch_summary(session, m, summary_op_m, train_x, train_y, eval_op=m.train_op)
      summary_writer.add_summary(summary_str, i)
예제 #20
0
from tensorflow.contrib.learn.python.learn.estimators._sklearn import train_test_split
from tensorflow.python.keras import Sequential
from tensorflow.python.keras.layers import Dense, Dropout

numpy.random.seed(2)

# loading load prima indians diabetes dataset, past 5 years of medical history
dataset = numpy.loadtxt("prima-indians-diabetes.csv", delimiter=",")

# split into input (X) and output (Y) variables, splitting csv data
X = dataset[:, 0:8]
Y = dataset[:, 8]

# split X, Y into a train and test set
x_train, x_test, y_train, y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=42)

# create model, add dense layers one by one specifying activation function
model = Sequential()
model.add(Dense(15, input_dim=8,
                activation='relu'))  # input layer requires input_dim param
model.add(Dense(10, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dropout(.2))
model.add(Dense(1, activation='sigmoid')
          )  # sigmoid instead of relu for final probability between 0 and 1

# compile the model, adam gradient descent (optimized)
model.compile(loss="binary_crossentropy",
              optimizer="adam",
예제 #21
0
def run():
    # 从本地缓存加载数据
    title_count, title_set, genres2int, features, targets_values, \
    ratings, users, movies, data, movies_orig, users_orig = pickle.load(open('preprocess.p', mode='rb'))


    # 基本参数
    # 嵌入矩阵的维度
    embed_dim = 32
    # 用户ID个数
    # take 第一个参数为哪一列,第二个为轴参数
    uid_max = max(features.take(0, 1)) + 1  # 6040
    # 性别个数
    gender_max = max(features.take(2, 1)) + 1  # 1 + 1 = 2
    # 年龄类别个数
    age_max = max(features.take(3, 1)) + 1  # 6 + 1 = 7
    # 职业个数
    job_max = max(features.take(4, 1)) + 1  # 20 + 1 = 21

    # 电影ID个数
    movie_id_max = max(features.take(1, 1)) + 1  # 3952
    # 电影类型个数
    movie_categories_max = max(genres2int.values()) + 1  # 18 + 1 = 19
    # 电影名单词个数
    movie_title_max = len(title_set)  # 5216

    # 对电影类型嵌入向量做加和操作的标志,考虑过使用mean做平均,但是没实现mean
    combiner = "sum"

    # 电影名长度
    sentences_size = title_count  # = 15
    # 文本卷积滑动窗口,分别滑动2, 3, 4, 5个单词
    window_sizes = {2, 3, 4, 5}
    # 文本卷积核数量
    filter_num = 8

    # 电影ID转下标的字典,数据集中电影ID跟下标不一致,比如第5行的数据电影ID不一定是5
    movieid2idx = {val[0]: i for i, val in enumerate(movies.values)}


    # 超参数

    # Number of Epochs
    num_epochs = 5
    # Batch Size
    batch_size = 256

    dropout_keep = 0.5
    # Learning Rate
    learning_rate = 0.0001
    # Show stats for every n number of batches
    show_every_n_batches = 20

    save_dir = './save'



    def get_user_embedding(uid, user_gender, user_age, user_job):
        """
        定义User的嵌入矩阵
        :param uid:
        :param user_gender:
        :param user_age:
        :param user_job:
        :return:
        """
        with tf.name_scope("user_embedding"):
            # 创建一个uid_max * embed_dim 的矩阵
            uid_embed_matrix = tf.Variable(tf.random_uniform([uid_max, embed_dim], -1, 1), name="uid_embed_matrix")
            uid_embed_layer = tf.nn.embedding_lookup(uid_embed_matrix, uid, name="uid_embed_layer")

            gender_embed_matrix = tf.Variable(tf.random_uniform([gender_max, embed_dim // 2], -1, 1),
                                              name="gender_embed_matrix")
            gender_embed_layer = tf.nn.embedding_lookup(gender_embed_matrix, user_gender, name="gender_embed_layer")

            age_embed_matrix = tf.Variable(tf.random_uniform([age_max, embed_dim // 2], -1, 1), name="age_embed_matrix")
            age_embed_layer = tf.nn.embedding_lookup(age_embed_matrix, user_age, name="age_embed_layer")

            job_embed_matrix = tf.Variable(tf.random_uniform([job_max, embed_dim // 2], -1, 1), name="job_embed_matrix")
            job_embed_layer = tf.nn.embedding_lookup(job_embed_matrix, user_job, name="job_embed_layer")
        return uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer


    def get_user_feature_layer(uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer):
        """
        获取用户特征网络
        :param uid_embed_layer:
        :param gender_embed_layer:
        :param age_embed_layer:
        :param job_embed_layer:
        :return:
        """
        with tf.name_scope("user_fc"):
            # 第一层全连接
            uid_fc_layer = tf.layers.dense(uid_embed_layer, embed_dim, name="uid_fc_layer", activation=tf.nn.relu)
            gender_fc_layer = tf.layers.dense(gender_embed_layer, embed_dim, name="gender_fc_layer", activation=tf.nn.relu)
            age_fc_layer = tf.layers.dense(age_embed_layer, embed_dim, name="age_fc_layer", activation=tf.nn.relu)
            job_fc_layer = tf.layers.dense(job_embed_layer, embed_dim, name="job_fc_layer", activation=tf.nn.relu)

            # 第二层全连接, 上面每一个输出连接起来
            # (?, 1, 128), 0: bactch, 1: height, 2:width
            user_combine_layer = tf.concat([uid_fc_layer, gender_fc_layer, age_fc_layer, job_fc_layer], 2)
            user_combine_layer = tf.contrib.layers.fully_connected(user_combine_layer, 200, tf.tanh)  # (?, 1, 200)

            # 把输出结构重新编排
            user_combine_layer_flat = tf.reshape(user_combine_layer, [-1, 200])
        return user_combine_layer, user_combine_layer_flat

    def get_movie_id_embed_layer(movie_id):
        """
        movieID 嵌入层
        :param movie_id:
        :return:
        """
        with tf.name_scope("movie_embedding"):
            movie_id_embed_matrix = tf.Variable(tf.random_uniform([movie_id_max, embed_dim], -1, 1), # -1, 1 表示最小值和最大值
                                                name="movie_id_embed_matrix")
            movie_id_embed_layer = tf.nn.embedding_lookup(movie_id_embed_matrix, movie_id, name="movie_id_embed_layer")
        return movie_id_embed_layer

    def get_movie_categories_layers(movie_categories):
        """
        movie标签嵌入层
        :param movie_categories:
        :return:
        """
        with tf.name_scope("movie_categories_layers"):
            movie_categories_embed_matrix = tf.Variable(tf.random_uniform([movie_categories_max, embed_dim], -1, 1),
                                                        name="movie_categories_embed_matrix")
            movie_categories_embed_layer = tf.nn.embedding_lookup(movie_categories_embed_matrix, movie_categories,
                                                                  name="movie_categories_embed_layer")
            # 这里可以求和 或者求平均
            if combiner == "sum":
                # 把每一行的值求和,即把左右的category 对应的特征进行相加
                movie_categories_embed_layer = tf.reduce_sum(movie_categories_embed_layer, axis=1, keep_dims=True)
        # elif combiner == "mean":

        return movie_categories_embed_layer

    def get_movie_cnn_layer(movie_titles):
        """
        movie 标题嵌入以及卷积
        :param movie_titles:
        :return:
        """
        # 从嵌入矩阵中得到电影名对应的各个单词的嵌入向量
        with tf.name_scope("movie_embedding"):
            movie_title_embed_matrix = tf.Variable(tf.random_uniform([movie_title_max, embed_dim], -1, 1),
                                                   name="movie_title_embed_matrix")
            movie_title_embed_layer = tf.nn.embedding_lookup(movie_title_embed_matrix, movie_titles,
                                                             name="movie_title_embed_layer")
            # 增加一个chanels 维度
            movie_title_embed_layer_expand = tf.expand_dims(movie_title_embed_layer, -1) # -1 表示最后一维

        # 对文本嵌入层使用不同尺寸的卷积核做卷积和最大池化
        pool_layer_lst = []
        for window_size in window_sizes:
            with tf.name_scope("movie_txt_conv_maxpool_{}".format(window_size)):
                # 截断的正太分布
                filter_weights = tf.Variable(tf.truncated_normal([window_size, embed_dim, 1, filter_num], stddev=0.1),
                                             name="filter_weights")
                filter_bias = tf.Variable(tf.constant(0.1, shape=[filter_num]), name="filter_bias")

                conv_layer = tf.nn.conv2d(movie_title_embed_layer_expand, filter_weights, [1, 1, 1, 1], padding="VALID",
                                          name="conv_layer")
                relu_layer = tf.nn.relu(tf.nn.bias_add(conv_layer, filter_bias), name="relu_layer")

                maxpool_layer = tf.nn.max_pool(relu_layer, [1, sentences_size - window_size + 1, 1, 1], [1, 1, 1, 1],
                                               padding="VALID", name="maxpool_layer")
                pool_layer_lst.append(maxpool_layer)

        # Dropout层
        with tf.name_scope("pool_dropout"):
            pool_layer = tf.concat(pool_layer_lst, 3, name="pool_layer")
            max_num = len(window_sizes) * filter_num
            pool_layer_flat = tf.reshape(pool_layer, [-1, 1, max_num], name="pool_layer_flat")

            dropout_layer = tf.nn.dropout(pool_layer_flat, dropout_keep_prob, name="dropout_layer")
        return pool_layer_flat, dropout_layer

    def get_movie_feature_layer(movie_id_embed_layer, movie_categories_embed_layer, dropout_layer):
        """

        :param movie_id_embed_layer:
        :param movie_categories_embed_layer:
        :param dropout_layer:
        :return:
        """
        with tf.name_scope("movie_fc"):
            # 第一层全连接
            movie_id_fc_layer = tf.layers.dense(movie_id_embed_layer, embed_dim, name="movie_id_fc_layer",
                                                activation=tf.nn.relu)
            movie_categories_fc_layer = tf.layers.dense(movie_categories_embed_layer, embed_dim,
                                                        name="movie_categories_fc_layer", activation=tf.nn.relu)

            # 第二层全连接
            movie_combine_layer = tf.concat([movie_id_fc_layer, movie_categories_fc_layer, dropout_layer],
                                            2)  # (?, 1, 96)
            movie_combine_layer = tf.contrib.layers.fully_connected(movie_combine_layer, 200, tf.tanh)  # (?, 1, 200)

            movie_combine_layer_flat = tf.reshape(movie_combine_layer, [-1, 200])
        return movie_combine_layer, movie_combine_layer_flat


    # 构建计算图
    tf.reset_default_graph()
    train_graph = tf.Graph()
    with train_graph.as_default():
        # 获取输入占位符
        uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob = get_inputs()
        # 获取User的4个嵌入向量
        uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer = get_user_embedding(uid, user_gender,
                                                                                                   user_age, user_job)
        # 得到用户特征
        user_combine_layer, user_combine_layer_flat = get_user_feature_layer(uid_embed_layer, gender_embed_layer,
                                                                             age_embed_layer, job_embed_layer)
        # 获取电影ID的嵌入向量
        movie_id_embed_layer = get_movie_id_embed_layer(movie_id)
        # 获取电影类型的嵌入向量
        movie_categories_embed_layer = get_movie_categories_layers(movie_categories)
        # 获取电影名的特征向量
        pool_layer_flat, dropout_layer = get_movie_cnn_layer(movie_titles)
        # 得到电影特征
        movie_combine_layer, movie_combine_layer_flat = get_movie_feature_layer(movie_id_embed_layer,
                                                                                movie_categories_embed_layer,
                                                                                dropout_layer)
        # 计算出评分,要注意两个不同的方案,inference的名字(name值)是不一样的,后面做推荐时要根据name取得tensor
        with tf.name_scope("inference"):
            # 将用户特征和电影特征作为输入,经过全连接,输出一个值的方案
            #         inference_layer = tf.concat([user_combine_layer_flat, movie_combine_layer_flat], 1)  #(?, 200)
            #         inference = tf.layers.dense(inference_layer, 1,
            #                                     kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
            #                                     kernel_regularizer=tf.nn.l2_loss, name="inference")
            # 简单的将用户特征和电影特征做矩阵乘法得到一个预测评分
            #        inference = tf.matmul(user_combine_layer_flat, tf.transpose(movie_combine_layer_flat))
            inference = tf.reduce_sum(user_combine_layer_flat * movie_combine_layer_flat, axis=1)
            inference = tf.expand_dims(inference, axis=1)

        with tf.name_scope("loss"):
            # MSE损失,将计算值回归到评分
            cost = tf.losses.mean_squared_error(targets, inference)
            loss = tf.reduce_mean(cost)
            # 优化损失
            #     train_op = tf.train.AdamOptimizer(lr).minimize(loss)  #cost
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(lr)
        gradients = optimizer.compute_gradients(loss)  # cost
        train_op = optimizer.apply_gradients(gradients, global_step=global_step)


    import time
    import datetime

    losses = {'train': [], 'test': []}

    with tf.Session(graph=train_graph) as sess:

        # 搜集数据给tensorBoard用
        # Keep track of gradient values and sparsity
        grad_summaries = []
        for g, v in gradients:
            if g is not None:
                grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name.replace(':', '_')), g)
                sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name.replace(':', '_')),
                                                     tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        grad_summaries_merged = tf.summary.merge(grad_summaries)

        # Output directory for models and summaries
        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
        print("Writing to {}\n".format(out_dir))

        # Summaries for loss and accuracy
        loss_summary = tf.summary.scalar("loss", loss)

        # Train Summaries
        train_summary_op = tf.summary.merge([loss_summary, grad_summaries_merged])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

        # Inference summaries
        inference_summary_op = tf.summary.merge([loss_summary])
        inference_summary_dir = os.path.join(out_dir, "summaries", "inference")
        inference_summary_writer = tf.summary.FileWriter(inference_summary_dir, sess.graph)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        for epoch_i in range(num_epochs):

            # 将数据集分成训练集和测试集,随机种子不固定
            train_X, test_X, train_y, test_y = train_test_split(features,
                                                                targets_values,
                                                                test_size=0.2,
                                                                random_state=0)

            train_batches = get_batches(train_X, train_y, batch_size)
            test_batches = get_batches(test_X, test_y, batch_size)

            # 训练的迭代,保存训练损失
            for batch_i in range(len(train_X) // batch_size):
                x, y = next(train_batches)

                categories = np.zeros([batch_size, 18])
                for i in range(batch_size):
                    categories[i] = x.take(6, 1)[i]

                titles = np.zeros([batch_size, sentences_size])
                for i in range(batch_size):
                    titles[i] = x.take(5, 1)[i]

                feed = {
                    uid: np.reshape(x.take(0, 1), [batch_size, 1]),
                    user_gender: np.reshape(x.take(2, 1), [batch_size, 1]),
                    user_age: np.reshape(x.take(3, 1), [batch_size, 1]),
                    user_job: np.reshape(x.take(4, 1), [batch_size, 1]),
                    movie_id: np.reshape(x.take(1, 1), [batch_size, 1]),
                    movie_categories: categories,  # x.take(6,1)
                    movie_titles: titles,  # x.take(5,1)
                    targets: np.reshape(y, [batch_size, 1]),
                    dropout_keep_prob: dropout_keep,  # dropout_keep
                    lr: learning_rate}

                step, train_loss, summaries, _ = sess.run([global_step, loss, train_summary_op, train_op], feed)  # cost
                losses['train'].append(train_loss)
                train_summary_writer.add_summary(summaries, step)  #

                # Show every <show_every_n_batches> batches
                if (epoch_i * (len(train_X) // batch_size) + batch_i) % show_every_n_batches == 0:
                    time_str = datetime.datetime.now().isoformat()
                    print('{}: Epoch {:>3} Batch {:>4}/{}   train_loss = {:.3f}'.format(
                        time_str,
                        epoch_i,
                        batch_i,
                        (len(train_X) // batch_size),
                        train_loss))

            # 使用测试数据的迭代
            for batch_i in range(len(test_X) // batch_size):
                x, y = next(test_batches)

                categories = np.zeros([batch_size, 18])
                for i in range(batch_size):
                    categories[i] = x.take(6, 1)[i]

                titles = np.zeros([batch_size, sentences_size])
                for i in range(batch_size):
                    titles[i] = x.take(5, 1)[i]

                feed = {
                    uid: np.reshape(x.take(0, 1), [batch_size, 1]),
                    user_gender: np.reshape(x.take(2, 1), [batch_size, 1]),
                    user_age: np.reshape(x.take(3, 1), [batch_size, 1]),
                    user_job: np.reshape(x.take(4, 1), [batch_size, 1]),
                    movie_id: np.reshape(x.take(1, 1), [batch_size, 1]),
                    movie_categories: categories,  # x.take(6,1)
                    movie_titles: titles,  # x.take(5,1)
                    targets: np.reshape(y, [batch_size, 1]),
                    dropout_keep_prob: 1,
                    lr: learning_rate}

                step, test_loss, summaries = sess.run([global_step, loss, inference_summary_op], feed)  # cost

                # 保存测试损失
                losses['test'].append(test_loss)
                inference_summary_writer.add_summary(summaries, step)  #

                time_str = datetime.datetime.now().isoformat()
                if (epoch_i * (len(test_X) // batch_size) + batch_i) % show_every_n_batches == 0:
                    print('{}: Epoch {:>3} Batch {:>4}/{}   test_loss = {:.3f}'.format(
                        time_str,
                        epoch_i,
                        batch_i,
                        (len(test_X) // batch_size),
                        test_loss))

        # Save Model
        saver.save(sess, save_dir)  # , global_step=epoch_i
        print('Model Trained and Saved')

    # save_params((save_dir))
    # load_dir = load_params()

    plt.plot(losses['train'], label='Training loss')
    plt.legend()
    _ = plt.ylim()
예제 #22
0
nos = [
    filename for filename in os.listdir('originals/no')
    if filename.lower().endswith('.jpg')
]
for filename in nos:
    image = cv2.imread(f'originals/no/{filename}')
    image = cv2.resize(image, (299, 299))
    image = img_to_array(image)
    data.append(image)
    labels.append(0)  # 0 for no firetruck

data = np.array(data, dtype='float') / 255
labels = np.array(labels)

train_x, test_x, train_y, test_y = train_test_split(data,
                                                    labels,
                                                    test_size=0.25,
                                                    random_state=24601)
train_y = to_categorical(train_y, num_classes=2)
test_y = to_categorical(test_y, num_classes=2)

# train the model on the new data for a few epochs
history1 = model.fit(train_x,
                     train_y,
                     batch_size=64,
                     epochs=3,
                     validation_data=(test_x, test_y))

with open('history_new_layers.json', 'w') as f:
    json.dump(history1.history, f)

# at this point, the top layers are well trained and we can start fine-tuning
예제 #23
0
    def testIrisES(self):
        random.seed(42)

        iris = datasets.load_iris()
        x_train, x_test, y_train, y_test = train_test_split(iris.data,
                                                            iris.target,
                                                            test_size=0.2,
                                                            random_state=42)

        x_train, x_val, y_train, y_val = train_test_split(x_train,
                                                          y_train,
                                                          test_size=0.2,
                                                          random_state=42)
        val_monitor = learn.monitors.ValidationMonitor(
            x_val,
            y_val,
            every_n_steps=50,
            early_stopping_rounds=100,
            early_stopping_metric='loss',
            early_stopping_metric_minimize=False)

        feature_columns = learn.infer_real_valued_columns_from_input(iris.data)

        # classifier without early stopping - overfitting
        classifier1 = learn.DNNClassifier(feature_columns=feature_columns,
                                          hidden_units=[10, 20, 10],
                                          n_classes=3)
        classifier1.fit(x_train, y_train, steps=1000)
        _ = accuracy_score(y_test, classifier1.predict(x_test))

        # Full 1000 steps, 19 summaries and no evaluation summary:
        # 1 summary of net at step 1
        # 9 x (1 summary of net and 1 summary of global step) for steps 101, 201,...
        self.assertEqual(19, len(_get_summary_events(classifier1.model_dir)))
        with self.assertRaises(ValueError):
            _get_summary_events(classifier1.model_dir + '/eval')

        # classifier with early stopping - improved accuracy on testing set
        classifier2 = learn.DNNClassifier(
            hidden_units=[10, 20, 10],
            feature_columns=feature_columns,
            n_classes=3,
            config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))

        classifier2.fit(x_train, y_train, monitors=[val_monitor], steps=2000)
        _ = accuracy_score(y_val, classifier2.predict(x_val))
        _ = accuracy_score(y_test, classifier2.predict(x_test))

        # Note, this test is unstable, so not checking for equality.
        # See stability_test for examples of stability issues.
        if val_monitor.early_stopped:
            self.assertLess(val_monitor.best_step, 2000)
            # Note, due to validation monitor stopping after the best score occur,
            # the accuracy at current checkpoint is less.
            # TODO(ipolosukhin): Time machine for restoring old checkpoints?
            # flaky, still not always best_value better then score2 value.
            # self.assertGreater(val_monitor.best_value, score2_val)

            # Early stopped, unstable so checking only < then max.
            self.assertLess(len(_get_summary_events(classifier2.model_dir)),
                            21)
            # Eval typically has ~6 events, but it varies based on the run.
            self.assertLess(
                len(_get_summary_events(classifier2.model_dir + '/eval')), 8)
예제 #24
0
import os
import matplotlib.pyplot as plt
import sys
import numpy
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.estimators._sklearn import train_test_split

seed = 0
numpy.random.seed(seed)
tf.set_random_seed(seed)

npz_file = numpy.load('NPInter.npz')
npz_x_list = numpy.hstack([npz_file['XP'], npz_file['XR']])
X_train, X_test, Y_train, Y_test = train_test_split(npz_x_list,
                                                    npz_file['Y'],
                                                    test_size=0.1,
                                                    random_state=seed)

model = Sequential()
model.add(Conv1D(32, kernel_size=4, input_shape=(739, 1), activation='relu'))
model.add(Conv1D(64, 4, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.25))
model.add(Flatten())  # 2D -> 1D
model.add(Dense(128, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
예제 #25
0
    return np.squeeze(H.T)


f = open("train.pkl", 'rb')
x_train, y_train = pkl.load(f)

f1 = open("hogged.pkl", 'rb')
x_train = pkl.load(f1)
f1.close()
x_train = np.asarray(x_train)
x_train = x_train.squeeze()
y_train = y_train.transpose()
y_train = y_train[0]

x_train, x_test, y_train, y_test = train_test_split(x_train,
                                                    y_train,
                                                    test_size=0.1)

print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

name = "Bmodel2.h5"
layers = [
    Dense(1000, activation="relu", input_shape=(x_train[0].shape[0], )),
    Dense(500, activation="relu"),
    Dense(100, activation="relu"),
    Dense(36, activation="softmax")
]
예제 #26
0
batch_size = 100
learning_rate = 0.01
samples_per_epoch = 100  # Assuming 10,000 images
nb_epoch = 25
test_size = 0.3
keep_prob = 0.5
# </editor-fold>

# <editor-fold desc="Load Data">
data_df = pd.read_csv(os.path.join(dataset_dir + data, 'driving_log.csv'))

X = data_df[['center', 'left', 'right']].values
y = data_df['steering'].values

X_train, X_valid, y_train, y_valid = train_test_split(X,
                                                      y,
                                                      test_size=test_size,
                                                      random_state=0)

# </editor-fold>


class Model():
    def __init__(self, INPUT_SHAPE, keep_prob):
        self.model = self.load(INPUT_SHAPE, keep_prob)

    def load(self, INPUT_SHAPE, keep_prob):

        model = Sequential()
        model.add(Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE))
        model.add(Conv2D(24, 5, 5, activation='elu', subsample=(2, 2)))
        model.add(Conv2D(36, 5, 5, activation='elu', subsample=(2, 2)))
예제 #27
0
            if dir.startswith('.'):
                continue
            img = cv2.imread(os.path.join(root, file), cv2.COLOR_BGR2GRAY)
            img = cv2.resize(img, (28, 28), interpolation=cv2.INTER_CUBIC)
            labels.append(dir)
            datasets.append(img)

datasets = np.array(datasets)
labels = np.array(labels)

# 处理数据集
datasets = datasets.astype('float32')
datasets /= 255
datasets = datasets.reshape(datasets.shape[0], 28, 28, 1)
x_train, x_test, y_train, y_test = train_test_split(datasets,
                                                    labels,
                                                    test_size=0.3,
                                                    random_state=0)

y_prepare_train = []
for obj in y_train:
    y_prepare_train.append(int(re.sub("\D", "", obj)) - 1)

y_prepare_test = []
for obj in y_test:
    y_prepare_test.append(int(re.sub("\D", "", obj)) - 1)

y_train = keras.utils.to_categorical(y_prepare_train, type_num)
y_test = keras.utils.to_categorical(y_prepare_test, type_num)

model = Sequential()
model.add(
예제 #28
0
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_predict
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
dota2results = np.loadtxt('data/dota2Dataset/dota2Train.csv', delimiter=',')

dota2x = dota2results[:, 1:]
dota2y = dota2results[:, 0]
spamx = spambase[:, :57]
spamy = spambase[:, 57]
from sklearn.preprocessing import StandardScaler
from tensorflow.contrib.learn.python.learn.estimators._sklearn import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(spamx, spamy, test_size=0.1, random_state=0)
spamx = spambase[:, :57]
spamy = spambase[:, 57]
# 对数据的训练集进行标准化
ss = StandardScaler()
# X_train = ss.fit_transform(X_train)  # 先拟合数据在进行标准化
lr = LogisticRegressionCV(multi_class="ovr", fit_intercept=True, Cs=np.logspace(-2, 2, 20), cv=2, penalty="l2",
                          solver="lbfgs", tol=0.01)
re = lr.fit(X_train, Y_train)
#
# # 预测
# X_test = ss.transform(X_test)  # 数据标准化
# Y_predict = lr.predict(X_test)  # 预测

# Dota2数据
dx_train, dx_test, dy_train, dy_test = train_test_split(dota2x, dota2y, test_size=0.1, random_state=0)