예제 #1
0
        norms = np.array(sorted(norms, key=lambda x: x[1]), dtype=object)
        counts = np.bincount(norms[:self.k, 0].astype('int32'))
        return np.argmax(counts)


def plot_knn_results(data, k):
    knn_classifier = KNN(k=k)
    knn_classifier.fit(*data)
    sup_x = (np.min(data[0][:, 0]), np.max(data[0][:, 0]))
    sup_y = (np.min(data[0][:, 1]), np.max(data[0][:, 1]))
    results = np.array(
        [[knn_classifier.predict((x, y)) for y in range(*sup_y)]
         for x in range(*sup_x)])
    points, labels = data
    colors_dict = {0: 'red', 1: 'blue', 2: 'green'}
    brush = np.vectorize(lambda x: colors_dict[x])
    colors = brush(labels)
    background = brush(results)
    plt.pcolormesh(background)
    plt.scatter(points[:, 0], points[:, 1], c=colors)
    plt.show()


if __name__ == '__main__':
    data = data_processing.data_generator(10,
                                          3,
                                          2,
                                          centers=((0, 0), (5, 5), (-5, 5)))
    plot_knn_results(data, 7)
예제 #2
0
        for i in range(epochs):
            x, y = list(zip(data, pretty_labels))[np.random.randint(
                0, data.shape[0])]
            x = np.append(x, 1)
            self.beta -= learning_rate(i) * self.gradient(x, y)

    def predict(self, x):
        x = np.append(x, 1)
        return 1 if self.kernel(x, self.beta) >= 0 else -1


if __name__ == '__main__':
    # np.random.seed(1)
    data = data_processing.data_generator(no_samples_per_class=10,
                                          no_classes=2,
                                          no_dimensions=2,
                                          centers=((0, 0), (15, 5)))
    # data_processing.plot_data(data)
    # svm = SVM(softing_parameter=10, kernel=lambda x, y: (np.dot(x, y) + 1000)**10)
    svm = SVM(softing_parameter=10000, kernel=lambda x, y: np.dot(x, y))

    # print(svm.predict(np.array([15, 5])))

    data, target = datasets.load_iris(as_frame=True, return_X_y=True)
    data['target'] = target
    data = data[data['target'] < 2]
    data = shuffle(data)
    print(data.shape)
    training_data = data.iloc[:30, :]
    test_data = data.iloc[30:, :]
    print(len(test_data))
예제 #3
0
def train(sess, data, *,
          input_shape=None,
          epochs=10,
          batch_size=32,
          learning_rate=1e-4,
          weight_decay=0.0,
          save_dir='./saved_fcn8s',
          rootname='fcn8s',
          finalize_dir=None):
    """Train neural network and print out the loss during training.

    :param sess: TF Session
    :param data: Data object
        Data set.
    :param input_shape: tuple, (w, h)
        Input shape for the neural network.
    :param epochs: int
        Number of epochs.
    :param batch_size: int
        Batch size.
    :param learning_rate: float
        Learning rate.
    :param weight_decay: float
        L2 regularization strength.
    :param save_dir: string
        Directory of the saved model and weights.
    :param rootname: string
        Rootname for saved file.
    """
    input_ts, output_ts, keep_prob_ts = build_model(sess, data.n_classes)

    # cross entropy loss
    logits_ts = tf.reshape(output_ts, (-1, data.n_classes))
    labels_ts = tf.placeholder(tf.float32, (None, None, None, data.n_classes))
    cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
        logits=logits_ts, labels=tf.reshape(labels_ts, (-1, data.n_classes))))

    # L2 regularization
    trainable_vars = tf.trainable_variables()
    l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in trainable_vars
                        if 'bias' not in v.name]) * weight_decay

    total_loss = cross_entropy_loss + l2_loss

    # Optimizer for training
    optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(total_loss)

    # Initialization
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    loss_history_file = os.path.join(save_dir, rootname + '_loss_history.pkl')
    loss_history = dict()
    loss_history['loss'] = []
    loss_history['vali_loss'] = []

    # load the model if exists
    if os.path.exists(save_dir):
        try:
            print("--- Loading saved models! ---")
            saver.restore(sess, os.path.join(save_dir, rootname))
            if finalize_dir is not None:
                builder = tf.saved_model.builder.SavedModelBuilder(finalize_dir)
                builder.add_meta_graph_and_variables(
                    sess,
                    ['fcn8s'],
                    signature_def_map={
                        "model": tf.saved_model.signature_def_utils.predict_signature_def(
                            inputs={"input": input_ts},
                            outputs={"output": output_ts})
                    }
                )
                builder.save()
                return
        except:
            print("Cannot load existing model!")
    else:
        os.mkdir(save_dir)

    if finalize_dir is not None:
        print("Cannot finalize a saved model!")
        return

    # train the model
    print("--- Training ---")
    pbar = tqdm(total=epochs)
    for i in range(epochs):
        gen = data_generator(
            data.image_files_train,
            data.label_files_train,
            data.label_colors if data.label_colors else data.background_color,
            batch_size=batch_size,
            input_shape=input_shape)
        total_loss = 0
        count = 0
        for X, Y in gen:
            _, loss = sess.run([optimizer, cross_entropy_loss],
                               feed_dict={keep_prob_ts: 1.0,
                                          input_ts: X,
                                          labels_ts: Y
                                          })
            count += X.shape[0]
            total_loss += loss*X.shape[0]
            loss_history['loss'].append(loss)
            print("mini-batch loss: {:.4f}".format(loss), end='\r')
        avg_loss = total_loss / count

        # validation
        if data.image_files_vali is not None:
            vali_count = 0
            total_vali_loss = 0
            gen = data_generator(
                data.image_files_vali,
                data.label_files_vali,
                data.label_colors if data.label_colors else data.background_color,
                batch_size=batch_size,
                input_shape=input_shape,
                is_training=False)
            for X, Y in gen:
                vali_loss = sess.run(cross_entropy_loss,
                                     feed_dict={keep_prob_ts: 1.0,
                                                input_ts: X,
                                                labels_ts: Y})
                vali_count += X.shape[0]
                total_vali_loss += vali_loss*X.shape[0]
                loss_history['vali_loss'].append(vali_loss)
            avg_vali_loss = total_vali_loss / vali_count
        else:
            avg_vali_loss = None

        update_description(pbar, avg_loss, avg_vali_loss)
        pbar.update()

    # save the model
    saver.save(sess, os.path.join(save_dir, rootname))
    save_history(loss_history, loss_history_file)
예제 #4
0
def getGenerator(csv_path, length, k_folds, dim, target_height, target_width,
                 batch_size):
    """ Reading data from csv file and constructing data generators
    Args:
    csv_path: path of csv file
    length: length of sequence data to be read
    k_folds: number of folds for training and validation
    dim: the dimension of samples used for training or validation. Selectable
         value is 1, 2 or 3.
    target_height, target_width: target height and width of input samples
    batch_size: the size of data batch

    Retrun:
    two lists which save train_set and valid_set respectively
    two lists which save train_steps_per_epoch and valid_steps_per_epoch
    
    Remark:
    - random_state was set at every shuffle operaton to ensure that 
      all the models use the same training set and validation set.
    """
    # loading data
    dataset = csv_read(csv_path, length)
    dataset = shuffle(dataset, random_state=0)

    train_data, test_data = train_test_split(dataset,
                                             test_size=0.2,
                                             shuffle=True,
                                             random_state=10)

    test_generator = data_generator(Data=test_data,
                                    dim=dim,
                                    target_height=target_height,
                                    target_width=target_width,
                                    batch_size=len(test_data),
                                    random_state=20)

    train_steps, valid_steps = [], []
    train_generators, valid_generators = [], []
    # get data generator
    if k_folds == 1:
        train_set, valid_set = train_test_split(train_data,
                                                test_size=0.2,
                                                shuffle=True,
                                                random_state=30)
        train_steps_per_epoch = len(train_set) // batch_size
        valid_steps_per_epoch = len(valid_set) // batch_size

        train_steps.append(train_steps_per_epoch)
        valid_steps.append(valid_steps_per_epoch)

        train_generator = data_generator(Data=train_set,
                                         dim=dim,
                                         target_height=target_height,
                                         target_width=target_width,
                                         batch_size=batch_size)

        valid_generator = data_generator(Data=valid_set,
                                         dim=dim,
                                         target_height=target_height,
                                         target_width=target_width,
                                         batch_size=batch_size)
        train_generators.append(train_generator)
        valid_generators.append(valid_generator)

    elif k_folds >= 2:
        kfold = KFold(n_splits=k_folds, shuffle=True, random_state=40)

        for train_index, valid_index in kfold.split(dataset):
            train_set = np.array(dataset)[train_index]
            valid_set = np.array(dataset)[valid_index]

            train_steps_per_epoch = len(train_set) // batch_size
            valid_steps_per_epoch = len(valid_set) // batch_size

            train_steps.append(train_steps_per_epoch)
            valid_steps.append(valid_steps_per_epoch)

            train_generator = data_generator(Data=train_set,
                                             dim=dim,
                                             target_height=target_height,
                                             target_width=target_width,
                                             batch_size=batch_size)

            valid_generator = data_generator(Data=valid_set,
                                             dim=dim,
                                             target_height=target_height,
                                             target_width=target_width,
                                             batch_size=batch_size)

            # I have verified that the object of data_generator is an immutable object
            train_generators.append(train_generator)
            valid_generators.append(valid_generator)
    else:
        raise IOError("Undefined k folds value!")

    return (train_generators, train_steps, valid_generators, valid_steps,
            test_generator)