Esempio n. 1
0
def main(args):
    dataset = mnist.get_split('train', '/tmp/mnist')

    images, labels = load_batch(dataset, BATCHSIZE, is_training=True)

    with slim.arg_scope(lenet.lenet_arg_scope()):
        logits, end_points = lenet.lenet(images, is_training=True)

    one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)

    tf.losses.softmax_cross_entropy(one_hot_labels, logits)

    total_loss = tf.losses.get_total_loss() * LOSS_SCALING_FACTOR

    tf.summary.scalar('loss', total_loss / LOSS_SCALING_FACTOR)

    optimiser = tf.train.GradientDescentOptimizer(LEARNING_RATE)

    train_op = tf.contrib.training.create_train_op(
        total_loss,
        optimiser,
        summarize_gradients=True,
        transform_grads_fn=scale_down_grads)

    for i in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
        print(i)

    slim.learning.train(
        train_op,
        './log/train_3',
        save_summaries_secs=2,
        #session_wrapper=tf_debug.LocalCLIDebugWrapperSession
    )
Esempio n. 2
0
    def __init__(self, train_conf, test_conf={}, erase_model=False):
        print "setting up network"
        self.name = tf_utils.dict_to_string(train_conf)
        self.network = tf_utils.TFNet(self.name,
                                      logDir=tf_data_dir +
                                      'tf_training/openai/',
                                      modelDir=tf_data_dir + 'tf_models/',
                                      outputDir=tf_data_dir + 'tf_outputs/',
                                      eraseModels=erase_model)

        self.conf = train_conf
        self.batch_size = self.conf['batch']

        # image_batch  = tf.placeholder("float", [None, 15, 64, 64, 3])
        # action_batch = tf.placeholder("float", [None, 15, 2])
        # self.inputs = list(read_tf_record.build_tfrecord_input(self.conf, training=True))

        X = tf.placeholder("float", [None, 28 * 28])
        Y = tf.placeholder("float", [None, 10])
        image = tf.reshape(X, [self.batch_size, 28, 28, 1])
        # tf.image_summary("image", image)

        self.inputs = [X, Y]

        with slim.arg_scope(
            [slim.conv2d, slim.fully_connected],
                weights_regularizer=slim.l2_regularizer(0.00001),
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.1),
                activation_fn=tf.nn.relu) as sc:
            if self.conf['network'] == 'lenet':
                preds = lenet.lenet(image)
            if self.conf['network'] == 'lenet2':
                preds = lenet.lenet2(image, self.conf['share'])
            if self.conf['network'] == 'lenet3':
                preds = lenet.lenet3(image, self.conf['share'])
            if self.conf['network'] == 'rcnn':
                preds = rcnn.rcnn(image, self.conf['module'],
                                  self.conf['length'], self.conf['share'],
                                  self.conf['channels'], self.conf['resid'])

        correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_preds, tf.float32))
        self.loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(preds, Y))

        self.network.add_to_losses(self.loss)

        # make a training network
        self.train_network = tf_utils.TFTrain(self.inputs,
                                              self.network,
                                              batchSz=self.batch_size,
                                              initLr=self.conf['initLr'])
        self.train_network.add_loss_summaries([self.loss, self.accuracy],
                                              ['loss', 'acc'])

        self.outputs = [self.loss, self.accuracy, preds]
        self.output_names = ['loss', 'acc', 'pred']
        print "done with network setup"
def show():
    train = 'C:/tensorflow/food/evaluation/'
    image_array, label = get_one_image(train)

    with tf.Graph().as_default():

        N_CLASSES = 11

        image = tf.cast(image_array, tf.float32)

        image_s = tf.image.per_image_standardization(image)

        image_r = tf.reshape(image_s, [1, 64, 64, 3])
        keep_prob = tf.placeholder(tf.float32)
        conv1, conv2, _, logits = lenet.lenet(image_r,
                                              keep_prob,
                                              N_CLASSES,
                                              is_train=False,
                                              is_pretrain=True)

        x = tf.placeholder(tf.float32, shape=[1, 64, 64, 3])

        logs_train_dir = 'C:/tensorflow/food/log/'

        saver = tf.train.Saver()

        with tf.Session() as sess:
            q = sess.run(image_r)
            ckpt = tf.train.get_checkpoint_state(logs_train_dir)
            if ckpt and ckpt.model_checkpoint_path:
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split(
                    '-')[-1]
                saver.restore(sess, ckpt.model_checkpoint_path)
                print('sucess, training step %s' % global_step)
            else:
                print('error')

            feature_map = sess.run(conv2, feed_dict={x: q, keep_prob: 1.})

            feature_map = tf.reshape(feature_map, [8, 8, 64])
            images = tf.image.convert_image_dtype(feature_map, dtype=tf.uint8)
            images = sess.run(images)
            plt.figure(figsize=(6, 6))
            for i in np.arange(0, 32):
                plt.subplot(6, 6, i + 1)
                plt.axis('off')
                plt.imshow(images[:, :, i])
            plt.show()
Esempio n. 4
0
    test_label = np.array(test_label)

    np.random.seed(80)
    np.random.shuffle(test_data)
    np.random.seed(80)
    np.random.shuffle(test_label)

    return train_data, train_label, test_data, test_label, dic


train_data, train_label, test_data, test_label, dic = data_process(
    "data/train/", "data/test/")
print(train_data.shape, train_label.shape)
print(test_data.shape, test_label.shape)

model = lenet()(Input(shape=(32, 32, 1)))

early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              patience=3,
                              verbose=1,
                              factor=0.5)
checkpoint = ModelCheckpoint(
    filepath="logs/ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5",
    verbose=1,
    save_best_only=False,
    save_weights_only=True,
    period=10)

init_learning_rate = 1e-4
BATCH_SIZE = 2
Esempio n. 5
0
def main():
    symbol = lenet(num_classes=args.num_classes)
    kv = mx.kvstore.create(args.kv_store)
    devs = mx.cpu() if args.gpus is None else [
        mx.gpu(int(i)) for i in args.gpus.split(',')
    ]
    epoch_size = max(int(args.num_examples / args.batch_size / kv.num_workers),
                     1)
    begin_epoch = args.model_load_epoch if args.model_load_epoch else 0
    if not os.path.exists("./model"):
        os.mkdir("./model")
    model_prefix = "model/lenet-mnist-{}".format(kv.rank)
    checkpoint = mx.callback.do_checkpoint(model_prefix)
    arg_params = None
    aux_params = None
    if args.retrain:
        _, arg_params, aux_params = mx.model.load_checkpoint(
            model_prefix, args.model_load_epoch)
    train = mx.io.ImageRecordIter(
        path_imgrec=os.path.join("data", "rec.rec"),
        label_width=1,
        data_shape=(3, 28, 28),
        shuffle=True,
        num_parts=kv.num_workers,
        part_index=kv.rank,
        batch_size=args.batch_size,
    )
    # val = mx.io.ImageRecordIter(
    #     path_imgrec         = os.path.join(args.data_dir, "val_256_q90.rec"),
    #     label_width         = 1,
    #     data_shape          = (3, 224, 224),
    #     num_parts           = kv.num_workers,
    #     part_index          = kv.rank,
    #     batch_size          = args.batch_size,
    # )
    model = mx.mod.Module(
        symbol=symbol,
        data_names=('data', ),
        label_names=('softmax_label', ),
        context=devs,
    )
    model.fit(
        train_data=train,
        # eval_data           = val,
        eval_metric=['acc'],
        epoch_end_callback=checkpoint,
        batch_end_callback=mx.callback.Speedometer(args.batch_size,
                                                   args.frequent),
        kvstore=kv,
        optimizer='nag',
        optimizer_params=(('learning_rate', args.lr),
                          ('lr_scheduler',
                           multi_factor_scheduler(begin_epoch,
                                                  epoch_size,
                                                  step=[10, 20]))),
        initializer=mx.init.Xavier(rnd_type='gaussian',
                                   factor_type="in",
                                   magnitude=2),
        arg_params=arg_params,
        aux_params=aux_params,
        begin_epoch=begin_epoch,
        num_epoch=args.end_epoch,
    )
Esempio n. 6
0
def main():
    datadim = 3 * 32 * 32
    classdim = 10

    # PaddlePaddle init
    paddle.init(use_gpu=with_gpu, trainer_count=7)

    image = paddle.layer.data(
        name="image", type=paddle.data_type.dense_vector(datadim))

    # Add neural network config
    # option 1. resnet
    # net = resnet_cifar10(image, depth=32)
    # option 2. vgg
    net = lenet(image)

    out = paddle.layer.fc(
        input=net, size=classdim, act=paddle.activation.Softmax())

    lbl = paddle.layer.data(
        name="label", type=paddle.data_type.integer_value(classdim))
    cost = paddle.layer.classification_cost(input=out, label=lbl)

    # Create parameters
    parameters = paddle.parameters.create(cost)

    # Create optimizer
    momentum_optimizer = paddle.optimizer.Momentum(
        momentum=0.9,
        regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
        learning_rate=0.1 / 128.0,
        learning_rate_decay_a=0.1,
        learning_rate_decay_b=50000 * 100,
        learning_rate_schedule='discexp')

    # End batch and end pass event handler
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "\nPass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        if isinstance(event, paddle.event.EndPass):
            # save parameters
            with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
                parameters.to_tar(f)

            result = trainer.test(
                reader=paddle.batch(
                    paddle.dataset.cifar.test10(), batch_size=128),
                feeding={'image': 0,
                         'label': 1})
            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)

    # Create trainer
    trainer = paddle.trainer.SGD(
        cost=cost, parameters=parameters, update_equation=momentum_optimizer)

    # Save the inference topology to protobuf.
    inference_topology = paddle.topology.Topology(layers=out)
    with open("inference_topology.pkl", 'wb') as f:
        inference_topology.serialize_for_inference(f)

    trainer.train(
        reader=paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.cifar.train10(), buf_size=50000),
            batch_size=128),
        num_passes=200,
        event_handler=event_handler,
        feeding={'image': 0,
                 'label': 1})

    # inference
    from PIL import Image
    import numpy as np
    import os

    def load_image(file):
        im = Image.open(file)
        im = im.resize((32, 32), Image.ANTIALIAS)
        im = np.array(im).astype(np.float32)
        # The storage order of the loaded image is W(widht),
        # H(height), C(channel). PaddlePaddle requires
        # the CHW order, so transpose them.
        im = im.transpose((2, 0, 1))  # CHW
        # In the training phase, the channel order of CIFAR
        # image is B(Blue), G(green), R(Red). But PIL open
        # image in RGB mode. It must swap the channel order.
        im = im[(2, 1, 0), :, :]  # BGR
        im = im.flatten()
        im = im / 255.0
        return im

    test_data = []
    cur_dir = os.path.dirname(os.path.realpath(__file__))
    test_data.append((load_image(cur_dir + '/image/dog.png'), ))

    # users can remove the comments and change the model name
    # with open('params_pass_50.tar', 'r') as f:
    #    parameters = paddle.parameters.Parameters.from_tar(f)

    probs = paddle.infer(
        output_layer=out, parameters=parameters, input=test_data)
    lab = np.argsort(-probs)  # probs and lab are the results of one batch data
    print "Label of image/dog.png is: %d" % lab[0][0]
Esempio n. 7
0
def main():
    datadim = 3 * 32 * 32
    classdim = 10

    # PaddlePaddle init
    paddle.init(use_gpu=with_gpu, trainer_count=7)

    image = paddle.layer.data(name="image",
                              type=paddle.data_type.dense_vector(datadim))

    # Add neural network config
    # option 1. resnet
    # net = resnet_cifar10(image, depth=32)
    # option 2. vgg
    net = lenet(image)

    out = paddle.layer.fc(input=net,
                          size=classdim,
                          act=paddle.activation.Softmax())

    lbl = paddle.layer.data(name="label",
                            type=paddle.data_type.integer_value(classdim))
    cost = paddle.layer.classification_cost(input=out, label=lbl)

    # Create parameters
    parameters = paddle.parameters.create(cost)

    # Create optimizer
    momentum_optimizer = paddle.optimizer.Momentum(
        momentum=0.9,
        regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
        learning_rate=0.1 / 128.0,
        learning_rate_decay_a=0.1,
        learning_rate_decay_b=50000 * 100,
        learning_rate_schedule='discexp')

    # End batch and end pass event handler
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "\nPass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        if isinstance(event, paddle.event.EndPass):
            # save parameters
            with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
                parameters.to_tar(f)

            result = trainer.test(reader=paddle.batch(
                paddle.dataset.cifar.test10(), batch_size=128),
                                  feeding={
                                      'image': 0,
                                      'label': 1
                                  })
            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)

    # Create trainer
    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
                                 update_equation=momentum_optimizer)

    # Save the inference topology to protobuf.
    inference_topology = paddle.topology.Topology(layers=out)
    with open("inference_topology.pkl", 'wb') as f:
        inference_topology.serialize_for_inference(f)

    trainer.train(reader=paddle.batch(paddle.reader.shuffle(
        paddle.dataset.cifar.train10(), buf_size=50000),
                                      batch_size=128),
                  num_passes=200,
                  event_handler=event_handler,
                  feeding={
                      'image': 0,
                      'label': 1
                  })

    # inference
    from PIL import Image
    import numpy as np
    import os

    def load_image(file):
        im = Image.open(file)
        im = im.resize((32, 32), Image.ANTIALIAS)
        im = np.array(im).astype(np.float32)
        # The storage order of the loaded image is W(widht),
        # H(height), C(channel). PaddlePaddle requires
        # the CHW order, so transpose them.
        im = im.transpose((2, 0, 1))  # CHW
        # In the training phase, the channel order of CIFAR
        # image is B(Blue), G(green), R(Red). But PIL open
        # image in RGB mode. It must swap the channel order.
        im = im[(2, 1, 0), :, :]  # BGR
        im = im.flatten()
        im = im / 255.0
        return im

    test_data = []
    cur_dir = os.path.dirname(os.path.realpath(__file__))
    test_data.append((load_image(cur_dir + '/image/dog.png'), ))

    # users can remove the comments and change the model name
    # with open('params_pass_50.tar', 'r') as f:
    #    parameters = paddle.parameters.Parameters.from_tar(f)

    probs = paddle.infer(output_layer=out,
                         parameters=parameters,
                         input=test_data)
    lab = np.argsort(-probs)  # probs and lab are the results of one batch data
    print "Label of image/dog.png is: %d" % lab[0][0]
def evaluate_one_image():
    tf.reset_default_graph()
    train = 'C:/tensorflow/food/training/'
    image_array, label = get_one_image(train)
    food = [
        'bread', 'dairy', 'dessert', 'egg', 'fried_food', 'meat', 'noodles',
        'rice', 'seafood', 'soup', 'vegetables'
    ]
    with tf.Graph().as_default():
        N_CLASSES = 11

        image = tf.cast(image_array, tf.float32)
        image_s = tf.image.per_image_standardization(image)

        image_r = tf.reshape(image_s, [1, 64, 64, 3])
        x = tf.placeholder(tf.float32, shape=[1, 64, 64, 3])
        keep_prob = tf.placeholder(tf.float32)
        conv1, conv2, fc3, logits = lenet.lenet(image_r,
                                                keep_prob,
                                                N_CLASSES,
                                                is_train=False,
                                                is_pretrain=True)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            saver = tf.train.import_meta_graph(
                'C:/tensorflow/food/log/model.ckpt-19.meta')
            saver.restore(
                sess, tf.train.latest_checkpoint('C:/tensorflow/food/log/'))

            image_test = sess.run(image_r)

            prediction = sess.run(logits,
                                  feed_dict={
                                      x: image_test,
                                      keep_prob: 1.
                                  })
            print(prediction)
            max_index = np.argmax(prediction)
            print(max_index)
            label = int(label)
            print(label)
            if max_index == 0:
                print('pred:bread label:%s' % (food[label]))
            elif max_index == 1:
                print('pred:dairy label:%s' % (food[label]))
            elif max_index == 2:
                print('pred:dessert label:%s' % (food[label]))
            elif max_index == 3:
                print('pred:egg label:%s' % food[label])
            elif max_index == 4:
                print('pred:fried food label:%s' % food[label])
            elif max_index == 5:
                print('pred:meat label:%s' % food[label])
            elif max_index == 6:
                print('pred:noodles label:%s' % food[label])
            elif max_index == 7:
                print('pred:rice label:%s' % food[label])
            elif max_index == 8:
                print('pred:seafood label:%s' % food[label])
            elif max_index == 9:
                print('pred:soup label:%s' % food[label])
            elif max_index == 10:
                print('pred:vegetable label:%s' % food[label])
def getNet(net):
    if net == 'lenet':
        return lenet()
    elif net == 'resnet18':
        return resnet18()
Esempio n. 10
0
def main():
    #check gpu exist
    if not torch.cuda.is_available():
        print('Require nvidia gpu with tensor core to run')
        return

    global args
    args = parser.parse_args()
    #set up log and training results save path
    time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    if args.save is '':
        args.save = time_stamp
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    setup_logging(os.path.join(save_path, 'results.log'),
                  resume=args.resume is not '')
    results_path = os.path.join(save_path, 'results')
    results = ResultsLog(results_path,
                         title=args.model_type +
                         ' training results - %s' % args.save)

    logging.info("saving to %s", save_path)
    logging.info("run arguments: %s", args)

    if args.weight_hist:
        writer_path = os.path.join(save_path, 'runs')
        logging.info("writing weight histogram to %s", writer_path)
        if os.path.exists(writer_path) and os.path.isdir(writer_path):
            shutil.rmtree(writer_path)
        writer_weight = SummaryWriter(writer_path)
    else:
        writer_weight = None

    # random seed config
    if args.seed > 0:
        torch.manual_seed(args.seed)
        logging.info("random seed: %s", args.seed)
    else:
        logging.info("random seed: None")

    logging.info("act rounding scheme: %s", ti_torch.ACT_ROUND_METHOD.__name__)
    logging.info("err rounding scheme: %s",
                 ti_torch.ERROR_ROUND_METHOD.__name__)
    logging.info("gradient rounding scheme: %s",
                 ti_torch.GRAD_ROUND_METHOD.__name__)
    if args.weight_frac:
        ti_torch.UPDATE_WITH_FRAC = True
        logging.info("Update WITH Fraction")
    else:
        ti_torch.UPDATE_WITH_FRAC = False

    if args.weight_decay:
        ti_torch.WEIGHT_DECAY = True
        logging.info("Update WITH WEIGHT DECAY")
    else:
        ti_torch.WEIGHT_DECAY = False
    # logging.info("ACC bitwidth: %d", ti_torch.ACC_BITWIDTH)

    # Create Network
    if args.model_type == 'int':
        logging.info('Create integer model')
        optimizer = None
        if args.dataset == 'mnist':
            model = TiLenet()
        elif args.dataset == 'cifar10':
            if args.model == 'vgg':
                model = TiVGG_cifar(args.depth, 10)

        if args.weight_frac:
            regime = model.regime_frac
        else:
            regime = model.regime
    else:
        if args.dataset == 'mnist' and args.model == 'lenet':
            model = lenet().to('cuda:0')

        elif args.dataset == 'cifar10':
            if args.model == 'vgg':
                model = VGG_cifar(args.depth, 10).to('cuda:0')

        num_parameters = sum([l.nelement() for l in model.parameters()])
        logging.info("created float network on %s", args.dataset)
        logging.info("number of parameters: %d", num_parameters)
        regime = getattr(model, 'regime')
        optimizer = OptimRegime(model.parameters(), regime)

    best_prec1 = 0
    if args.evaluate:
        if not os.path.isfile(args.evaluate):
            parser.error('invalid checkpoint: {}'.format(args.evaluate))
        checkpoint = torch.load(args.evaluate)
        model.load_state_dict(checkpoint['state_dict'])
        logging.info("loaded checkpoint '%s' (epoch %s)", args.evaluate,
                     checkpoint['epoch'])
    elif args.resume:
        checkpoint_file = args.resume
        if os.path.isdir(checkpoint_file):
            results.load(os.path.join(checkpoint_file, 'results.csv'))
            checkpoint_file = os.path.join(checkpoint_file,
                                           'model_best.pth.tar')
        if os.path.isfile(checkpoint_file):
            logging.info("loading checkpoint '%s'", args.resume)
            checkpoint = torch.load(checkpoint_file)
            args.start_epoch = checkpoint['epoch'] + 1
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            logging.info("loaded checkpoint '%s' (epoch %s), best_prec1 %f",
                         checkpoint_file, checkpoint['epoch'],
                         checkpoint['best_prec1'])
        else:
            logging.error("no checkpoint found at '%s'", args.resume)
    elif args.init:
        if not os.path.isfile(args.init):
            parser.error('invalid checkpoint: {}'.format(args.init))
        checkpoint = torch.load(args.init)
        model.load_state_dict(checkpoint['state_dict'])
        logging.info("initial weights from checkpoint '%s' ", args.init)

    # dataset loading code
    default_transform = {
        'train': get_transform(args.dataset, augment=True),
        'eval': get_transform(args.dataset, augment=False)
    }
    criterion = nn.CrossEntropyLoss()
    criterion.to('cuda:0')

    test_data = get_dataset(name=args.dataset,
                            split='val',
                            transform=default_transform['eval'],
                            download=args.download,
                            datasets_path=args.data_dir)

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True)

    if args.evaluate:
        val_loss, val_prec1, val_prec5 = forward(test_loader,
                                                 model,
                                                 criterion,
                                                 0,
                                                 training=False,
                                                 model_type=args.model_type)
        logging.info('Validation Prec@1 {val_prec1:.3f} '
                     'Validation Prec@5 {val_prec5:.3f} \n'.format(
                         val_prec1=val_prec1, val_prec5=val_prec5))
        return

    train_data = get_dataset(name=args.dataset,
                             split='train',
                             transform=default_transform['train'],
                             download=args.download,
                             datasets_path=args.data_dir)
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    logging.info('training regime: %s', regime)
    if args.model_type == 'int' or args.model_type == 'hybrid':
        for s in regime:
            if s['epoch'] == 0:
                ti_torch.GRAD_BITWIDTH = s['gb']
                break

    for epoch in range(args.start_epoch, args.epochs):
        if args.model_type == 'int' or args.model_type == 'hybrid':
            for s in regime:
                if s['epoch'] == epoch:
                    ti_torch.GRAD_BITWIDTH = s['gb']
                    logging.info('changing gradient bitwidth: %d',
                                 ti_torch.GRAD_BITWIDTH)
                    break
        # train
        train_loss, train_prec1, train_prec5 = forward(
            train_loader,
            model,
            criterion,
            epoch,
            training=True,
            model_type=args.model_type,
            optimizer=optimizer,
            writer=writer_weight)

        val_loss, val_prec1, val_prec5 = forward(test_loader,
                                                 model,
                                                 criterion,
                                                 epoch,
                                                 training=False,
                                                 model_type=args.model_type)

        is_best = val_prec1 > best_prec1
        best_prec1 = max(val_prec1, best_prec1)
        logging.info("best_prec1: %f %s", best_prec1, save_path)
        save_checkpoint(
            {
                'epoch': epoch,
                'model': args.model,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'regime': regime
            }, is_best, save_path, 'checkpoint.pth.tar', args.save_all)
        #record results
        logging.info(
            args.model_type + ' '
            'Epoch: {0} '
            'Train Prec@1 {train_prec1:.3f} '
            'Train Prec@5 {train_prec5:.3f} '
            'Valid Prec@1 {val_prec1:.3f} '
            'Valid Prec@5 {val_prec5:.3f} \n'.format(epoch,
                                                     train_prec1=train_prec1,
                                                     val_prec1=val_prec1,
                                                     train_prec5=train_prec5,
                                                     val_prec5=val_prec5))
        results.add(
            epoch=epoch,
            train_error1=100 - train_prec1,
            val_error1=100 - val_prec1,
            train_error5=100 - train_prec5,
            val_error5=100 - val_prec5,
        )

        results.plot(x='epoch',
                     y=['train_error1', 'val_error1'],
                     legend=['train', 'val'],
                     title='Error@1',
                     ylabel='error %')

        results.plot(x='epoch',
                     y=['train_error5', 'val_error5'],
                     legend=['train', 'val'],
                     title='Error@5',
                     ylabel='error %')

        results.save()

        if args.weight_hist:
            logging.info("writing weight histogram to %s", save_path)
            writer_weight.add_scalar('Loss/train', train_loss, epoch)
            writer_weight.add_scalar('Loss/test', val_loss, epoch)
            writer_weight.add_scalar('Accuracy/train', train_prec1, epoch)
            writer_weight.add_scalar('Accuracy/test', val_prec1, epoch)
            if args.model_type == 'int':
                for idx, l in enumerate(model.forward_layers):
                    if hasattr(l, 'weight'):
                        weight = l.weight.float() * 2**l.weight_exp.float()
                        writer_weight.add_histogram(
                            'Weight/' + l.__class__.__name__ + '_' + str(idx),
                            weight, epoch)
                    # if hasattr(l,'bias'):
                    # bias = l.bias.float()*2**l.bias_exp.float()
                    # writer_weight.add_histogram('Bias/'+l.__class__.__name__ +'_'+str(idx), bias, epoch)
            elif args.model_type == 'float':
                for idx, l in enumerate(model.layers):
                    if hasattr(l, 'weight'):
                        writer_weight.add_histogram(
                            'Weight/' + l.__class__.__name__ + '_' + str(idx),
                            l.weight, epoch)
                    # if hasattr(l,'bias'):
                    # writer_weight.add_histogram('Bias/'+l.__class__.__name__ +'_'+str(idx), l.bias, epoch)
                for idx, l in enumerate(model.classifier):
                    if hasattr(l, 'weight'):
                        writer_weight.add_histogram(
                            'Weight/' + l.__class__.__name__ + '_' + str(idx),
                            l.weight, epoch)
                    # if hasattr(l,'bias'):
                    # writer_weight.add_histogram('Bias/'+l.__class__.__name__ +'_'+str(idx), l.bias, epoch)
            elif args.model_type == 'hybrid':
                for idx, l in enumerate(model.forward_layers):
                    if hasattr(l, 'weight'):
                        weight = l.weight.float() * 2**l.weight_exp.float()
                        writer_weight.add_histogram(
                            'Weight/' + l.__class__.__name__ + '_' + str(idx),
                            weight, epoch)
                for idx, l in enumerate(model.fp32_layers):
                    if hasattr(l, 'weight'):
                        writer_weight.add_histogram(
                            'Weight/' + l.__class__.__name__ + '_' + str(idx),
                            l.weight, epoch)
Esempio n. 11
0
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='data/',
                                          train=False,
                                          transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

model = lenet().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
Esempio n. 12
0
    print("Validation set length: {}".format(len(x_validation)))
    print("Test set length: {}".format(len(x_test)))

    x_train = np.pad(x_train, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')
    x_validation = np.pad(x_validation, ((0, 0), (2, 2), (2, 2), (0, 0)),
                          'constant')
    x_test = np.pad(x_test, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')

    print()
    print("Image shape padded: {}".format(x_train[0].shape))

    x = tf.placeholder(tf.float32, (None, 32, 32, 1))
    y = tf.placeholder(tf.int32, (None))
    one_hot_y = tf.one_hot(y, 10)

    logits = lenet.lenet(x)
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_y,
                                                            logits=logits)
    loss_operation = tf.reduce_mean(cross_entropy)
    optimizer = tf.train.AdamOptimizer(learning_rate=config.rate)
    training_operation = optimizer.minimize(loss_operation)

    correct_prediction = tf.equal(tf.argmax(logits, 1),
                                  tf.argmax(one_hot_y, 1))
    accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction,
                                                tf.float32))
    saver = tf.train.Saver()

    with tf.Session() as sess:
        print("Start training", file=sys.stderr)
        print("Start training")
def train():

    train_dir = 'C:/tensorflow/food/training/'
    vali_dir = 'C:/tensorflow/food/validation/'
    train_log_dir = 'C:/tensorflow/food/log/'

    train, train_label = input_data.get_files(train_dir)
    test_train, test_train_label = input_data.get_files(vali_dir)
    tr_loss = []
    with tf.Graph().device('/gpu:0'):
        my_global_step = tf.Variable(0, name='global_step', trainable=False)
        train_image_batch, train_label_batch, _ = input_data.get_batch(
            train,
            train_label,
            IMG_W,
            IMG_H,
            BATCH_SIZE,
            CAPACITY,
            N_CLASSES,
            distortion=True)
        test_image_batch, test_label_batch, _ = input_data.get_batch(
            test_train,
            test_train_label,
            IMG_W,
            IMG_H,
            BATCH_SIZE,
            CAPACITY,
            N_CLASSES,
            distortion=False)
        x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3])
        y_ = tf.placeholder(tf.int16, shape=[BATCH_SIZE, N_CLASSES])
        keep_prob = tf.placeholder(tf.float32)
        _, _, _, logits = lenet.lenet(x, keep_prob, N_CLASSES, IS_TRAIN,
                                      IS_PRETRAIN)
        loss = lenet.loss(logits, y_)
        accuracy = lenet.accuracy(logits, y_)
        train_op = lenet.optimize(loss, learning_rate, my_global_step)
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=Max_epoch)
        init = tf.global_variables_initializer()
        sess = tf.Session()
        sess.run(init)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        try:
            for epoch in range(Max_epoch):
                print('epoch:', epoch)
                for iteration in range(iterations):
                    if coord.should_stop():
                        break
                    if iteration % 50 == 0:
                        start = time.time()
                        tra_images, tra_labels = sess.run(
                            [train_image_batch, train_label_batch])
                        _, tra_loss, tra_acc = sess.run(
                            [train_op, loss, accuracy],
                            feed_dict={
                                x: tra_images,
                                y_: tra_labels,
                                keep_prob: 1.
                            })
                        val_images, val_labels = sess.run(
                            [test_image_batch, test_label_batch])
                        val_loss, val_acc = sess.run([loss, accuracy],
                                                     feed_dict={
                                                         x: val_images,
                                                         y_: val_labels,
                                                         keep_prob: 1.
                                                     })
                        end = time.time()
                        print('iteration: {} '.format(iteration),
                              'loss_train: {:.4f} '.format(tra_loss),
                              'accu_train: {:>5.2%} '.format(tra_acc),
                              'loss_test: {:.4f} '.format(val_loss),
                              'accu_valid: {:>5.2%} '.format(val_acc),
                              '{:.4f} sec/batch'.format((end - start)))
                        tr_loss.append(tra_loss)
                    else:
                        tra_images, tra_labels = sess.run(
                            [train_image_batch, train_label_batch])
                        sess.run([train_op],
                                 feed_dict={
                                     x: tra_images,
                                     y_: tra_labels,
                                     keep_prob: 1.
                                 })
                checkpoint_path = os.path.join(train_log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=epoch)
        except tf.errors.OutOfRangeError:
            print('Done training -- epoch limit reached')
        finally:
            coord.request_stop()
        sess.close()
        coord.join(threads)
        plt.title('learning curve')
        plt.grid(True)
        plt.plot(tr_loss, 'b-')
        plt.xlabel('iteration')
        plt.show()
def test():
    iterations_te = 52

    test_dir = 'C:/tensorflow/food/evaluation/'
    train_dir = 'C:/tensorflow/food/training/'
    train, train_label = input_data.get_files(train_dir)
    test_train, test_train_label = input_data.get_files(test_dir)

    train_image_batch, train_label_batch, _ = input_data.get_batch(
        train,
        train_label,
        IMG_W,
        IMG_H,
        BATCH_SIZE,
        CAPACITY,
        N_CLASSES,
        distortion=False)
    test_image_batch, test_label_batch, _ = input_data.get_batch(
        test_train,
        test_train_label,
        IMG_W,
        IMG_H,
        BATCH_SIZE,
        CAPACITY,
        N_CLASSES,
        distortion=False)

    total_te_accuracy = []
    total_tr_accuracy = []
    x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3])
    y_ = tf.placeholder(tf.int16, shape=[BATCH_SIZE, N_CLASSES])
    keep_prob = tf.placeholder(tf.float32)
    _, _, _, logits = lenet.lenet(x, keep_prob, N_CLASSES, False, IS_PRETRAIN)
    accuracy = lenet.accuracy(logits, y_)
    tf.Graph().device('/gpu:0')
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    sess = tf.Session()
    sess.run(init)
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    try:
        for i in range(Max_epoch):
            saver.restore(sess, 'C:/tensorflow/food/log/model.ckpt-' + str(i))
            te_accuracy = []
            train_accuracy = []
            for iteration in range(iterations_te):
                if coord.should_stop():
                    break
                val_images, val_labels = sess.run(
                    [test_image_batch, test_label_batch])
                val_acc = sess.run(accuracy,
                                   feed_dict={
                                       x: val_images,
                                       y_: val_labels,
                                       keep_prob: 1.
                                   })
                te_accuracy.append(val_acc)
            tmp = sum(te_accuracy) / iterations_te
            print('epoch' + str(i) + ',test acc: {:>5.2%}'.format(tmp))
            for iteration in range(iterations):
                if coord.should_stop():
                    break
                train_images, train_labels = sess.run(
                    [train_image_batch, train_label_batch])
                train_acc = sess.run(accuracy,
                                     feed_dict={
                                         x: train_images,
                                         y_: train_labels,
                                         keep_prob: 1.
                                     })
                train_accuracy.append(train_acc)
            tmp_tr = sum(train_accuracy) / iterations
            print('epoch' + str(i) + ',train acc: {:>5.2%}'.format(tmp_tr))
            total_te_accuracy.append(tmp * 100)
            total_tr_accuracy.append(tmp_tr * 100)
    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')
    finally:
        coord.request_stop()
    sess.close()
    coord.join(threads)
    plt.title('Accuracy')
    plt.grid(True)
    plt.plot(total_tr_accuracy, 'b-')
    plt.plot(total_te_accuracy, 'r-')
    plt.xlabel('iteration')
    plt.ylabel('%')
    plt.show()