コード例 #1
0
def train_face_classifier(ntrain, ntest, orientations, wrap180,
                          model_save_file):
    print("Loading training data...")
    descriptors_train, classes_train = get_training_data(ntrain,
                                                         orientations,
                                                         wrap180=wrap180)
    print("Finished loading training data.")
    print("Loading test data...")
    descriptors_test, classes_test = get_testing_data(ntest,
                                                      orientations,
                                                      wrap180=wrap180)
    print("Finished loading test data.")
    print("Start training...")

    start_time = time.time()
    params, _ = logistic_fit(descriptors_train, classes_train)
    print("Training took {} seconds.".format(time.time() - start_time))

    np.save(model_save_file, params)

    predicted_train = logistic_prob(descriptors_train, params)
    plot_errors(predicted_train, classes_train, is_training=True)

    train_success_rate = classification_rate(predicted_train, classes_train)
    print("Training classification rate: {}".format(train_success_rate))

    predicted_test = logistic_prob(descriptors_test, params)
    plot_errors(predicted_test, classes_test, is_training=False)

    test_success_rate = classification_rate(predicted_test, classes_test)
    print("Testing classification rate: {}".format(test_success_rate))
コード例 #2
0
def locations(path=None):
    _, y = get_training_data()
    occurrences = Counter(y)
    location = list()
    for key, value in occurrences.items():
        print("{}: {}".format(key, value))
        location.append(key)
    return location
コード例 #3
0
def train_random_forest():
    model_file = "../data/rf.pkl"
    X, y = get_training_data()
    lp = make_pipeline(DictVectorizer(sparse=False), RandomForestClassifier(n_estimators=100, class_weight="balanced"))
    lp.fit(X, y)
    with open(model_file, "wb") as f:
        pickle.dump(lp, f)
    return lp
コード例 #4
0
def train():
    PYRNG = Random(0)
    ttv_proportions = dict(test=0.001, train=.96, validation=0.039)
    # Whiten, add flips, mask regions outside circle, train/test/val split
    DATA = (get_training_data().to_gpu().normalize().enrich().mask_circle().
            test_train_validation(PYRNG, **ttv_proportions))
    VALDATA = DATA.validation.get_examples(50, PYRNG)
    VALIMGS = Variable(T.from_numpy(VALDATA.images).type(TP.FloatTensor))
    VALCLASSES = Variable(TP.LongTensor(VALDATA.is_iceberg))
    BETA = 1e1
    BETA_FACTOR = .9999
    BATCH_SIZE = 32
    model = BentesModel()
    if T.cuda.is_available():
        model = model.cuda()
    optimizer = optim.Adam(model.parameters())
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9999)
    for i in range(1_000_000_000):
        scheduler.step()
        optimizer.zero_grad()
        batch = DATA.train.get_examples(BATCH_SIZE, PYRNG).rotate(PYRNG)
        imgvar = Variable(T.from_numpy(batch.images).type(TP.FloatTensor))
        result = model(imgvar)
        classvar = Variable(TP.LongTensor(batch.is_iceberg))
        accuracy = F.cross_entropy(result.activations, classvar)
        kl = T.mean(result.kl)
        loss = accuracy + BETA * kl
        loss.backward()
        valresult = model(VALIMGS)
        valaccuracy = F.cross_entropy(valresult.activations, VALCLASSES)
        optimizer.step()
        gf = lambda t: f'{t.data[0]:12.3f}'  # noqa: E731
        print(f'Step: {i:6d} CE: {gf(accuracy)} KL: {gf(kl)} loss: {gf(loss)} '
              f'val: {gf(valaccuracy)}')
        scores = (F.log_softmax(
            result.activations,
            dim=1).data.cpu().numpy()[list(range(BATCH_SIZE)),
                                      batch.is_iceberg])
        print(
            np.array(
                list(zip(*(s.astype(float) for s in np.histogram(scores))))).T)
        probs = F.softmax(result.activations).data.cpu().numpy().tolist()
        pprint(list(zip(batch.is_iceberg, probs)))
        BETA *= BETA_FACTOR
        print('first layer parameters/gradients for first kernel')
        print('convolution')
        print(model.layers[1].layer.weight[0])
        print(model.layers[1].layer.weight.grad[0])
        print('noise')
        print(model.layers[1].noise.weight[0])
        print(model.layers[1].noise.weight.grad[0])
        print('prior mean')
        print(model.layers[1].prior.mean[0])
        print(model.layers[1].prior.mean.grad[0])
        print('prior alpha')
        print(model.layers[1].prior.alpha[0])
        print(model.layers[1].prior.alpha.grad[0])
コード例 #5
0
def train_model_neural_network():
    model_file = "../data/nn.pkl"
    X, y = get_training_data()
    if len(X) == 0:
        raise ValueError("No wifi access points have been found during training")
    lp = make_pipeline(DictVectorizer(sparse=False), DecisionTreeClassifier(max_depth=None, min_samples_split=2, random_state=0))
    lp.fit(X, y)
    with open(model_file, "wb") as f:
        pickle.dump(lp, f)
    return lp
コード例 #6
0
ファイル: model_def.py プロジェクト: wbwatkinson/determined
 def build_training_data_loader(self) -> keras.InputData:
     hparams = self.context.get_hparams()
     # Return a tf.keras.Sequence.
     return get_training_data(
         data_directory=self.download_directory,
         batch_size=self.context.get_per_slot_batch_size(),
         width_shift_range=hparams.get("width_shift_range", 0.0),
         height_shift_range=hparams.get("height_shift_range", 0.0),
         horizontal_flip=hparams.get("horizontal_flip", False),
     )
コード例 #7
0
def train_xgb():
    model_file = "../data/xgb.pkl"
    X, y = get_training_data()
    if len(X) == 0:
        raise ValueError("No wifi access points have been found during training")
    #lp = make_pipeline(DictVectorizer(sparse=False), GradientBoostingClassifier(n_estimators=100))
    lp = make_pipeline(DictVectorizer(sparse=False), MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(15,), random_state=1))
    lp.fit(X, y)
    with open(model_file, "wb") as f:
        pickle.dump(lp, f)
    return lp
コード例 #8
0
def main():

  parser = argparse.ArgumentParser(description=__doc__, epilog=__epilog__,
      formatter_class=argparse.RawDescriptionHelpFormatter)

  parser.add_argument('-w', '--work-directory', metavar='DIR', default='tmp',
      help='Path to the working directory to use for storing files (defaults to `%(default)s\')')

  parser.add_argument('module', default='project', nargs='?', metavar='MODULE',
      help='Name of the module containing the code to execute')

  args = parser.parse_args()

  if not os.path.exists(args.work_directory):
    print("Creating directory `%s'..." % args.work_directory)
    os.makedirs(args.work_directory)
  else:
    print("Using existing directory `%s'..." % args.work_directory)

  print("Loading your project from `%s'..." % args.module)
  exec 'from %s import create_background_model, enroll' % args.module

  from data import get_training_data, get_data

  background_filename = os.path.join(args.work_directory, 'background.model')
  print("Creating background model -> `%s'..." % background_filename)
  if os.path.exists(background_filename): os.unlink(background_filename)
  create_background_model(get_training_data(), background_filename)

  print("Training models...")

  for group in ('devel', 'test'):
    print("... for `%s' group ..." % group)
    data = get_data(group, 'enroll')
    for identity, images in data.iteritems():
      filename = os.path.join(args.work_directory, 'client-%d.model' % identity)
      if os.path.exists(filename): os.unlink(filename)
      print("Enrolling client %d -> `%s'..." % (identity, filename))
      enroll(images, background_filename, filename)

  print("All done. Models saved at directory `%s'." % args.work_directory)
  print("You can proceed with the evaluation using `compute_performance.py'.")
コード例 #9
0
    label = label.ravel()
    return ((pred > 0.5) == label).mean()


# setting
mx.random.seed(random.randint(1, 10000))
logging.basicConfig(level=logging.DEBUG)

# create output dir
try:
    os.makedirs(opt.data_path)
except OSError:
    pass

# get training data
train_data = get_training_data(opt.batch_size)

# get model
g_net = get_generator()
d_net = get_descriptor(CTX)

# define loss function
loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()

# initialization
g_net.collect_params().initialize(mx.init.Xavier(), ctx=CTX)
d_net.collect_params().initialize(mx.init.Xavier(), ctx=CTX)
g_trainer = gluon.Trainer(
    g_net.collect_params(), 'Adam', {'learning_rate': LEARNING_RATE, 'beta1': BETA, 'clip_gradient': CLIP_GRADIENT})
d_trainer = gluon.Trainer(
    d_net.collect_params(), 'Adam', {'learning_rate': LEARNING_RATE, 'beta1': BETA, 'clip_gradient': CLIP_GRADIENT})
コード例 #10
0
from http.server import HTTPServer, BaseHTTPRequestHandler
from io import BytesIO
import json

# get relevant training data
import data
training_data = data.get_training_data()

# instantiate classifier
import classifier
nltk_classifier = classifier.NLTKClassifier(data.get_training_data())


# define a simple http server that will use the chatbot to respond to a user
class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
    def do_POST(self):
        content_length = int(self.headers['Content-Length'])
        body = json.loads(self.rfile.read(content_length))
        classification = nltk_classifier.classify(body['message'])
        self.send_response(200)
        self.end_headers()
        response = BytesIO()
        response.write(classification.encode('utf-8'))
        self.wfile.write(response.getvalue())


# run the server on port 8000
httpd = HTTPServer(('localhost', 8000), SimpleHTTPRequestHandler)
httpd.serve_forever()
コード例 #11
0
ファイル: network.py プロジェクト: adiman9/tensorTinker
import time
from data import get_training_data
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import TensorBoard

x_train, y_train = get_training_data()

# Normalise the data
x_train = x_train / 255.0

# Parameter options
dense_layers = [0, 1, 2]
layer_sizes = [32, 64, 128]
conv_layers = [1, 2, 3]

for num_dense_layers in dense_layers:
    for layer_size in layer_sizes:
        for num_conv_layers in conv_layers:
            NAME = f'{num_conv_layers}-conv-{layer_size}-nodes-{num_dense_layers}-dense-{int(time.time())}'
            tensorboard = TensorBoard(log_dir=f'logs/{NAME}')

            # Create model
            model = Sequential()
            # (3, 3) is the convolution kernel size (window size)
            model.add(Conv2D(layer_size, (3, 3), input_shape = x_train.shape[1:]))
            model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2, 2)))

            for _ in range(num_conv_layers - 1):
コード例 #12
0
ファイル: train.py プロジェクト: tsholmes/witness
def main():
    # Get args
    args = parser.parse_args()

    # Init wandb
    run = wandb.init()
    run.config.learning_rate = args.learning_rate or 1e-4
    run.config.num_epochs = args.epochs or 100
    run.config.steps_per_epoch = args.steps or 300
    run.config.batch_size = args.batch_size or 8
    run.config.image_size = (288, 512)
    run.config.num_predictions = args.num_predictions or 24
    run.config.beta = args.beta or 50

    wandb.save('*.py')

    training_data_generator = get_training_data(
        run.config.batch_size,
        'data/train',
        'images',
        'labels',
        augmentation_params,
        target_size=run.config.image_size)

    validation_data_generator = get_training_data(
        run.config.batch_size,
        'data/valid',
        'images',
        'labels', {},
        target_size=run.config.image_size)

    validation_data_generator_2 = get_training_data(
        run.config.batch_size,
        'data/valid',
        'images',
        'labels', {},
        target_size=run.config.image_size)

    os.makedirs('model', exist_ok=True)

    model = unet(image_size=run.config.image_size)
    metrics = ['accuracy', km.precision(), km.recall()]

    model.compile(
        optimizer=Adam(lr=run.config.learning_rate),
        loss=weighted_cross_entropy(run.config.beta),
        # loss='binary_crossentropy',
        metrics=metrics)

    # Save best model
    model_path = 'model/unet_witness.hdf5'
    model_checkpoint = ModelCheckpoint(model_path,
                                       monitor='loss',
                                       verbose=1,
                                       save_best_only=True)

    # Upload examples to W&B
    wandb_callback = WandbCallback(data_type='image',
                                   predictions=run.config.num_predictions,
                                   generator=validation_data_generator_2,
                                   save_model=True,
                                   monitor='loss',
                                   mode='min',
                                   labels=['void', 'puzzle'])

    # Save to tensorboard
    tensorboard_callback = TensorBoard(log_dir=wandb.run.dir,
                                       histogram_freq=0,
                                       write_graph=True,
                                       write_images=True)

    callbacks = [
        model_checkpoint,
        wandb_callback,
        #       tensorboard_callback,
    ]

    model.fit_generator(training_data_generator,
                        validation_data=validation_data_generator,
                        validation_steps=run.config.num_predictions,
                        steps_per_epoch=run.config.steps_per_epoch,
                        epochs=run.config.num_epochs,
                        callbacks=callbacks)

    # Upload best model to W&B

    wandb.save(model_path)

    all_imgs = glob.glob('data/all/images/*.jpg')
    all_labels = [
        f.replace('/images/', '/labels/').replace('.jpg', '.png')
        for f in all_imgs
    ]

    run.summary['results'] = make_segmentation_dataframe(
        model,
        all_imgs,
        all_labels,
        image_size=run.config.image_size,
        loss_function=weighted_cross_entropy(run.config.beta),
    )
コード例 #13
0
def main(_):
    train_x_batches, train_y_batches = get_training_data(FLAGS.batch_size * FLAGS.time_steps)
    run_training(train_x_batches, train_y_batches)
コード例 #14
0
ファイル: nn.py プロジェクト: aakhundov/mnist-challenge
        elif option == "-dropout_rates":
            dropout_rates = [float(r) for r in sys.argv[1].split(",")]
            del sys.argv[1]
        else:
            print sys.argv[0], ": invalid option", option
            sys.exit(1)

    np.seterr(over="ignore", divide="ignore")

    print "Neural Networks"
    print

    print "Reading data..."
    # reading the data, applying configured pre-processing, and adding 1.0 to each vector as a bias input
    X_train, T_train = data.get_training_data(ntrain,
                                              normalize=normalize,
                                              deskew=deskew,
                                              add_ones=True)
    X_test, T_test = data.get_testing_data(ntest,
                                           normalize=normalize,
                                           deskew=deskew,
                                           add_ones=True)
    print "{0} training data read".format(len(X_train))
    print "{0} testing data read".format(len(X_test))
    print

    input_dim = X_train.shape[1]
    output_dim = T_train.max() + 1
    weights, errors, params = [], [], []

    print "{0:40}\tV. Loss\t\tV. Error".format(
        "(Func, Hidden, Batch, Learn, Drop)")
コード例 #15
0
ファイル: training.py プロジェクト: yash1802/TensorBot
import pickle

import tensorflow as tf
import tflearn

import data

train_x, train_y, words, classes = data.get_training_data()

# Reset underlying graph data
tf.reset_default_graph()


def build_model():
    # Init model
    net = tflearn.input_data(shape=[None, len(train_x[0])])
    net = tflearn.fully_connected(net, 8)
    net = tflearn.fully_connected(net, 8)
    net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')
    net = tflearn.regression(net)
    model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')

    return model


if __name__ == '__main__':
    model = build_model()
    model.fit(train_x, train_y, n_epoch=1000, batch_size=8, show_metric=True)
    model.save('model.tflearn')
    # save all data structures
    pickle.dump(
コード例 #16
0
ファイル: main.py プロジェクト: iliakplv/ml-math
# input layer size must match the number of features
# output layer size must match the number of classes
network_architecture = [4, 5, 3]
activation_function = activation.tanh
activation_function_back = activation.tanh_back

loss_metric = metrics.mse
learning_rate = 0.000001
training_epochs = 5000

metrics_period = 10000  # calculate metrics every `metrics_period` iterations
test_examples = 10  # number of test examples for the trained network

if __name__ == '__main__':
    features, labels = data.get_training_data()

    start = time.time()

    train.train(features, labels, activation_function,
                activation_function_back, network_architecture, loss_metric,
                learning_rate, training_epochs, metrics_period)

    finish = time.time()

    print('\nTraining finished in {0:.1f} seconds\n'.format(finish - start))

    # Uncomment to do the random test on the trained NN
    # test_features = []
    # test_labels = []
    # for i in range(test_examples):
コード例 #17
0
def main(args):
    # Since we are doing batch normalization, we have to keep track of
    # whether we are training or testing. Also determines dropout
    # probability.
    training = tf.placeholder(tf.bool, name='training')
    example_sequence = tf.placeholder(tf.float32, [None, 700, 22])
    example_profile = tf.placeholder(tf.float32, [None, 700, 22])
    labels = tf.placeholder(tf.float32, [None, 700, 9])
    with tf.device('/gpu:0'):
        model = MODEL_MAPPING[args.model_num](example_sequence,
                                              example_profile, training)
        losses = tf.nn.softmax_cross_entropy_with_logits(labels=labels,
                                                         logits=model.logits)
        loss = tf.reduce_mean(losses)
        tf.summary.scalar('loss', loss)

        accuracy = validate.accuracy(model.logits, labels)

        # We need to execute update_ops before training for batch
        # normalization.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            training_step = tf.train.AdamOptimizer(.001).minimize(loss)

        training_data_sequence, training_data_profile, training_labels = (
            data.get_training_data(args.train_files[0], args.num_epochs,
                                   args.batch_size))
        validation_step, validation_initializer = data.get_validation_data(
            args.eval_files[0], args.batch_size)
        (validation_data_sequence, validation_data_profile,
         validation_labels) = validation_step

    summary = tf.summary.merge_all()
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          log_device_placement=True)) as sess:
        print "BEGINNING TRANING..."

        def validation_pass():
            print "VALIDATING..."
            sess.run([validation_initializer])
            losses = []
            accuracies = []
            count_val = 0
            while True:
                print count_val
                count_val += 1
                try:
                    val_ex_seq, val_ex_prof, val_label = sess.run([
                        validation_data_sequence, validation_data_profile,
                        validation_labels
                    ])
                    _loss, _accuracy = sess.run(
                        [loss, accuracy],
                        feed_dict={
                            example_sequence: val_ex_seq,
                            example_profile: val_ex_prof,
                            labels: val_label,
                            training: False
                        })
                    losses.append(_loss)
                    accuracies.append(_accuracy)
                except tf.errors.OutOfRangeError:
                    break
            total_loss = sum(losses) / float(len(losses))
            total_accuracy = sum(accuracies) / float(len(accuracies))
            print " -- TOTAL LOSS: " + str(total_loss)
            print " -- TOTAL ACCURACY: " + str(total_accuracy)

        sess.run(tf.global_variables_initializer())
        summary_writer = tf.summary.FileWriter(args.job_dir, sess.graph)
        step = 0
        while True:
            try:
                step += 1
                print step
                itr_ex_sequence, itr_ex_profile, itr_label = sess.run([
                    training_data_sequence, training_data_profile,
                    training_labels
                ])
                _, s, l = sess.run(
                    [training_step, summary, loss],
                    feed_dict={
                        example_sequence: itr_ex_sequence,
                        example_profile: itr_ex_profile,
                        labels: itr_label,
                        training: True
                    })
                # Log every step for now
                summary_writer.add_summary(s, step)
                print "LOSS: " + str(l)

                # Validation
                if step % args.validation_step == 0:
                    validation_pass()

            except tf.errors.OutOfRangeError:
                validation_pass()
                print("DONE TRAINING")
                break
コード例 #18
0
ファイル: gp.py プロジェクト: aakhundov/mnist-challenge
            ntest = int(sys.argv[1]); del sys.argv[1]
        elif option == "-deskew":
            deskew = int(sys.argv[1]); del sys.argv[1]
        elif option == "-normalize":
            normalize = int(sys.argv[1]); del sys.argv[1]
        elif option == "-lsquared":
            lsquared = float(sys.argv[1]); del sys.argv[1]
        else:
            print sys.argv[0], ": invalid option", option
            sys.exit(1)

    print "Gaussian Processes"
    print

    print "Reading data..."
    # reading the data and applying configured pre-processing steps
    X_train, T_train = data.get_training_data(ntrain, normalize=normalize, deskew=deskew)
    X_test, T_test = data.get_testing_data(ntest, normalize=normalize, deskew=deskew)
    print "{0} training data read".format(len(X_train))
    print "{0} testing data read".format(len(X_test))
    print

    # running a Gaussian process on training and testing sets, with "lsquared"
    T_predicted = gaussian_process(X_train, T_train, X_test, lsquared=lsquared)

    # evaluating the model performance on the testing set
    print "Testing Set Error: {0:.3f}".format(
        get_error_score(T_predicted, T_test)
    )
    print
コード例 #19
0
import numpy as np
from data import input_neurons, hidden_neurons, output_neurons
from data import get_test_data, get_training_data
from network import Network

network = Network([input_neurons, hidden_neurons, output_neurons])
# uncomment this stretch to start the network with a great test result
# saved_weights = np.load("weights.npy",mmap_mode=None, allow_pickle=True)
# network.weights = saved_weights

training_inputs = get_training_data()
test_inputs = get_test_data()

network.start_training(training_inputs,
                       1000,
                       learning_rate=0.7,
                       test_inputs=test_inputs)

print("\nRESULTADOS:")
for x, single_test in enumerate(test_inputs):
    if x == 0:
        print("\nSEM RUIDOS:")
    if x == 34:
        print("\nRUIDO MÍNIMO:")
    if x == 54:
        print("\nRUIDO MÉDIO:")
    if x == 74:
        print("\nRUIDO AVANÇADO:")
    if x == 94:
        print("\nNÃO FAZEM PARTE:")
    network.identify(single_test, log=True)