예제 #1
0
def main(_):
    tf.logging.set_verbosity(tf.logging.DEBUG)

    with tf.variable_scope('data'):
        train_images, train_labels, num_classes = dog_tensor(
            FLAGS.dogdir, FLAGS.batch_size, class_regex=FLAGS.dog_regex)

    net = MobileNet(num_classes, alpha=FLAGS.alpha)

    train_logits = net(train_images, is_training=True)
    tf.logging.info('built model on training data')
    param_stats = tfprof.model_analyzer.print_model_analysis(
        tf.get_default_graph(),
        tfprof_options=tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS
    )

    with tf.variable_scope('training'):
        loss = tf.losses.sparse_softmax_cross_entropy(labels=train_labels,
                                                      logits=train_logits)
        loss = tf.reduce_mean(loss)
        tf.summary.scalar('train/xent', loss)

        global_step = tf.train.get_or_create_global_step()

        opt = tf.train.AdamOptimizer(FLAGS.learning_rate)
        train_step = opt.minimize(loss, global_step=global_step)
        # make sure we update running averages
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if update_ops:
            train_step = tf.group(train_step, *update_ops)

    # valid/test

    sv = tf.train.Supervisor(logdir=FLAGS.logdir,
                             global_step=global_step,
                             save_summaries_secs=15)

    with sv.managed_session() as sess, sv.stop_on_exception():
        tf.logging.debug('ready to run things')

        # sess = tfdbg.LocalCLIDebugWrapperSession(sess)

        step = sess.run(global_step)
        while step < FLAGS.max_steps:
            step, train_loss, _ = sess.run([global_step, loss, train_step])
            tf.logging.info('(%d) train loss: %f', step, train_loss)
예제 #2
0
def train():
    tr_config = {
        'flag': True,
        'rg': 25,  # 7, 5
        'wrg': 0.25,  # 1, 3
        'hrg': 0.25,  # 1, 3
        'zoom': 0.25  # 1, 1
    }
    callbacks = get_callbacks('mynet_v4_bias', patience=30)

    paths, y = search_file('set1/segmented_set1')
    paths, y = search_file('set2/segmented_set2', paths=paths, y=y)

    ds = DataSet(nframe=30,
                 fstride=6,
                 name='UT interaction',
                 size=[224, 224, 3],
                 filepaths=paths,
                 y=y,
                 kernel_size=4)
    ds.make_set(op='msqr', name='train')
    ds.make_set(op='msqr', name='valid')

    #opt = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, decay=0.1)
    #opt = SGD(lr=2*1e-1, momentum=0.9, nesterov=True, decay=0.2)
    opt = RMSprop(lr=0.001, rho=0.9, decay=0.01)

    model = MobileNet(alpha=1.0, shape=[29, 56, 56, 1], nframe=29)
    model.compile(optimizer=opt,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.summary()

    #model.load_weights('mynet_v4.h5')
    model.fit_generator(generator=ds.train_gen(batch_size=5,
                                               aug_config=tr_config),
                        steps_per_epoch=100,
                        epochs=300,
                        validation_data=ds.valid_gen(),
                        verbose=1,
                        validation_steps=ds.getVlen,
                        callbacks=callbacks)
예제 #3
0
class KeypointModel(BasicModule):
    def __init__(self, opt):
        super(KeypointModel, self).__init__(opt)
        self.pretrained = MobileNet()
        self.trf = nn.Sequential(nn.Conv2d(256, 256, 3, 1, 1),
                                 nn.BatchNorm2d(256), nn.ReLU(True),
                                 nn.Conv2d(256, 128, 3, 1, 1),
                                 nn.BatchNorm2d(128), nn.ReLU(True))
        # self.ReturnType = namedtuple('ReturnType',['out1','out2','out3','out4','out5','out6'])
        stages = [Stage(128)] + [Stage(169) for _ in range(2, 7)]
        self.stages = nn.ModuleList(stages)

    def forward(self, img):
        img = self.pretrained(img)
        #if self.optimizer.param_groups[0]['lr'] == 0:
        #    img = img.detach()
        features = self.trf(img)

        output = self.stages[0](features)
        outputs = [output]
        for ii in range(1, 6):
            stage = self.stages[ii]
            input = t.cat([features, output], dim=1)
            output = stage(input)
            outputs.append(output)

        return outputs

    def get_optimizer(self, lr1, lr2):
        param_groups = [{
            'params': self.pretrained.parameters(),
            'lr': lr1
        }, {
            'params': self.stages.parameters(),
            'lr': lr2
        }, {
            'params': self.trf.parameters(),
            'lr': lr2
        }]

        self.optimizer = t.optim.Adam(param_groups)
        return self.optimizer
def load_model (args):


		if args.model == 'inception':
			model = InceptionV3(include_top=False, weights='imagenet', pooling=args.pooling)
			preprocess_mode='tf'
		elif args.model == 'xception':
			model = Xception(include_top=False, weights='imagenet', pooling=args.pooling)
			preprocess_mode='tf'
		elif args.model == 'inceptionresnet':
			model = InceptionResNetV2(include_top=False, weights='imagenet', pooling=args.pooling)
			preprocess_mode='tf'
		elif args.model == 'mobilenet':
			model = MobileNet(include_top=False, weights='imagenet', pooling=args.pooling)
			preprocess_mode='tf'
		elif args.model == 'mobilenet2':	
			model = MobileNetV2(include_top=False, weights='imagenet', pooling=args.pooling)
			preprocess_mode='tf'
		elif args.model == 'nasnet':	
			model = NASNetLarge(include_top=False, weights='imagenet', pooling=args.pooling)
			preprocess_mode='tf'
		elif args.model == 'resnet':
			model = ResNet50(include_top=False, weights='imagenet', pooling=args.pooling)
			preprocess_mode='caffe'
		elif args.model == 'vgg16':
			model = VGG16(include_top=False, weights='imagenet', pooling=args.pooling)
			preprocess_mode='caffe'
		elif args.model == 'vgg19':
			model = VGG19(include_top=False, weights='imagenet', pooling=args.pooling)
			preprocess_mode='caffe'
		else:
			print ("Model not found")
			return 0


		return model,preprocess_mode
예제 #5
0
def run_training(config,
                 n_classes,
                 train_loader,
                 valid_loader,
                 width=1,
                 mb_version=1):
    """
    Whole training procedure with fine-tune after regular training
    """
    # defining model
    if width > 1:
        model = tvm.resnet18(num_classes=n_classes)
    else:
        if mb_version == 1:
            model = MobileNet(n_classes=n_classes, width_mult=width)
        else:
            model = MobileNetV2(n_classes=n_classes, width_mult=width)
    model = model.to(config['device'])

    # print out number of parameters
    num_params = 0
    for p in model.parameters():
        num_params += np.prod(p.size())
    print(f"width={width}, num_params {num_params}")

    # defining loss criterion, optimizer and learning rate scheduler
    criterion = t.nn.CrossEntropyLoss()
    opt = t.optim.Adam(model.parameters(), config['lr'])
    sched = t.optim.lr_scheduler.MultiStepLR(opt, [3, 6])

    # training process with Adam
    tr_loss, tr_accuracy, valid_loss, valid_accuracy = train(
        config, model, train_loader, valid_loader, criterion, opt, sched)
    # training process with SGDR
    opt = t.optim.SGD(model.parameters(), config['lr'] / 10, momentum=0.9)
    sched = SGDR(opt, 3, 1.2)
    tr_loss_finetune, tr_accuracy_finetune, valid_loss_finetune, valid_accuracy_finetune = train(
        config, model, train_loader, valid_loader, criterion, opt, sched)
    return [
        tr_loss + tr_loss_finetune, tr_accuracy + tr_accuracy_finetune,
        valid_loss + valid_loss_finetune,
        valid_accuracy + valid_accuracy_finetune
    ]
예제 #6
0
    def __init__(self, num_classes):
        super(SSD, self).__init__()
        self.num_classes = num_classes

        # Setup the backbone network (base_net)
        self.base_net = MobileNet(num_classes)

        # The feature map will extracted from layer[11] and layer[13] in (base_net)
        self.base_output_layer_indices = (11, 13)

        # Define the Additional feature extractor
        self.additional_feat_extractor = nn.ModuleList([
            # Conv8_2
            nn.Sequential(
                nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=256,
                          out_channels=512,
                          kernel_size=3,
                          stride=2,
                          padding=1), nn.ReLU()),
            # Conv9_2
            nn.Sequential(
                nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128,
                          out_channels=256,
                          kernel_size=3,
                          stride=2,
                          padding=1), nn.ReLU()),
            nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128,
                          out_channels=256,
                          kernel_size=3,
                          stride=2,
                          padding=1), nn.ReLU()),
            nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128,
                          out_channels=256,
                          kernel_size=3,
                          stride=2,
                          padding=1), nn.ReLU())
        ])

        # Bounding box offset regressor
        num_prior_bbox = 6  # num of prior bounding boxes
        self.loc_regressor = nn.ModuleList([
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=1024,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1)
        ])

        # Bounding box classification confidence for each label
        self.classifier = nn.ModuleList([
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=1024,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1)
        ])

        # Todo: load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning
        temp_state = torch.load('pretrained/mobienetv2.pth')
        #self.base_net.load_state_dict(cur_state)

        cur_dict = self.base_net.state_dict()
        input_state = {
            k: v
            for k, v in temp_state.items()
            if k in cur_dict and v.size() == cur_dict[k].size()
        }
        cur_dict.update(input_state)
        self.base_net.load_state_dict(cur_dict)

        def init_with_xavier(m):
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform_(m.weight)

        self.loc_regressor.apply(init_with_xavier)
        self.classifier.apply(init_with_xavier)
        self.additional_feat_extractor.apply(init_with_xavier)
예제 #7
0
from mobilenet import MobileNet
# from my_generator2 import My_Generator

# net = applications.mobilenet_v2.MobileNetV2(include_top=False, pooling='avg', weights='imagenet',
#                                 input_shape = (223,223,3))
# net = applications.nasnet.NASNetMobile(input_shape=(223, 223, 3), include_top=False, weights='imagenet', 
#                                 pooling='avg')

# print(len(net.layers))
# model = Sequential()
# model.add(net)
# model.add(Dense(2, activation='softmax'))

# for layer in net.layers[:-45]:
#    layer.trainable = False
model = MobileNet((64, 64, 3), 200)

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
# exit(0)
print("Compile model done!")


earlyStopping = EarlyStopping(monitor='val_acc', patience=20, verbose=1)
filepath = "models/imagenet_clf_model_test.h5"
mcp_save = ModelCheckpoint(filepath, save_best_only=True, monitor='val_acc')
reduce_lr = ReduceLROnPlateau('val_acc', factor=0.5,
                                patience=4, verbose=1)

train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
예제 #8
0
import keras
from keras.models import load_model
from keras.preprocessing import image
from keras.preprocessing.image import load_img
from mobilenet import MobileNet
from keras.applications.imagenet_utils import decode_predictions

img_file = 'demo.png'
img = load_img(img_file, target_size=(32, 32))
image = image.img_to_array(img)
image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
model = MobileNet()
model.load_weights('mobileV1-lite.h5')

# model.summary()
result = model.predict(image)
print(result)
예제 #9
0
class SSD(nn.Module):
    def __init__(self, num_classes):
        super(SSD, self).__init__()
        self.num_classes = num_classes

        # Setup the backbone network (base_net)
        self.base_net = MobileNet(num_classes)

        # The feature map will extracted from layer[11] and layer[13] in (base_net)
        self.base_output_layer_indices = (11, 13)

        # Define the Additional feature extractor
        self.additional_feat_extractor = nn.ModuleList([
            # Conv8_2
            nn.Sequential(
                nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=256,
                          out_channels=512,
                          kernel_size=3,
                          stride=2,
                          padding=1), nn.ReLU()),
            # Conv9_2
            nn.Sequential(
                nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128,
                          out_channels=256,
                          kernel_size=3,
                          stride=2,
                          padding=1), nn.ReLU()),
            nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128,
                          out_channels=256,
                          kernel_size=3,
                          stride=2,
                          padding=1), nn.ReLU()),
            nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128,
                          out_channels=256,
                          kernel_size=3,
                          stride=2,
                          padding=1), nn.ReLU())
        ])

        # Bounding box offset regressor
        num_prior_bbox = 6  # num of prior bounding boxes
        self.loc_regressor = nn.ModuleList([
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=1024,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1)
        ])

        # Bounding box classification confidence for each label
        self.classifier = nn.ModuleList([
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=1024,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1)
        ])

        # Todo: load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning
        temp_state = torch.load('pretrained/mobienetv2.pth')
        #self.base_net.load_state_dict(cur_state)

        cur_dict = self.base_net.state_dict()
        input_state = {
            k: v
            for k, v in temp_state.items()
            if k in cur_dict and v.size() == cur_dict[k].size()
        }
        cur_dict.update(input_state)
        self.base_net.load_state_dict(cur_dict)

        def init_with_xavier(m):
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform_(m.weight)

        self.loc_regressor.apply(init_with_xavier)
        self.classifier.apply(init_with_xavier)
        self.additional_feat_extractor.apply(init_with_xavier)

    def feature_to_bbbox(self, loc_regress_layer, confidence_layer,
                         input_feature):
        """
        Compute the bounding box class scores and the bounding box offset
        :param loc_regress_layer: offset regressor layer to run forward
        :param confidence_layer: confidence layer to run forward
        :param input_feature: feature map to be feed in
        :return: confidence and location, with dim:(N, num_priors, num_classes) and dim:(N, num_priors, 4) respectively.
        """
        conf = confidence_layer(input_feature)
        loc = loc_regress_layer(input_feature)

        # Confidence post-processing:
        # 1: (N, num_prior_bbox * n_classes, H, W) to (N, H*W*num_prior_bbox, n_classes) = (N, num_priors, num_classes)
        #    where H*W*num_prior_bbox = num_priors
        conf = conf.permute(0, 2, 3, 1).contiguous()
        num_batch = conf.shape[0]
        c_channels = int(conf.shape[1] * conf.shape[2] * conf.shape[3] /
                         self.num_classes)
        #print('conf shape',conf.shape)

        conf = conf.view(num_batch, c_channels, self.num_classes)

        # Bounding Box loc and size post-processing
        # 1: (N, num_prior_bbox*4, H, W) to (N, num_priors, 4)
        loc = loc.permute(0, 2, 3, 1).contiguous()
        #print('loc shape',loc.shape)
        l_channels = int(loc.shape[1] * loc.shape[2] * loc.shape[3] / 4)
        #print('l chanel', l_channels)
        loc = loc.view(num_batch, l_channels, 4)

        return conf, loc

    def forward(self, input):

        confidence_list = []
        loc_list = []

        # Run the backbone network from [0 to 11, and fetch the bbox class confidence
        # as well as position and size
        y = module_util.forward_from(self.base_net.base_net, 0,
                                     self.base_output_layer_indices[0] + 1,
                                     input)
        #print('y',y.shape)
        confidence, loc = self.feature_to_bbbox(self.loc_regressor[0],
                                                self.classifier[0], y)
        confidence_list.append(confidence)
        loc_list.append(loc)
        #print('cof, loc size', confidence.shape, loc.shape)

        # Todo: implement run the backbone network from [11 to 13] and compute the corresponding bbox loc and confidence
        y = module_util.forward_from(self.base_net.base_net,
                                     self.base_output_layer_indices[0],
                                     self.base_output_layer_indices[1] + 1, y)
        #print('y', y.shape)
        confidence, loc = self.feature_to_bbbox(self.loc_regressor[1],
                                                self.classifier[1], y)
        confidence_list.append(confidence)
        loc_list.append(loc)
        #print('cof, loc size', confidence.shape, loc.shape)

        #conv to 12
        #y = module_util.forward_from(self.base_net.base_net, self.base_output_layer_indices[1], self.base_output_layer_indices[2]+1, y)
        # Todo: forward the 'y' to additional layers for extracting coarse features
        for idx in range(0, len(self.additional_feat_extractor)):
            #print('current idx', idx)
            #print('y', y.shape)
            y = module_util.forward_from(self.additional_feat_extractor[idx],
                                         0, 4, y)
            confidence, loc = self.feature_to_bbbox(
                self.loc_regressor[idx + 2], self.classifier[idx + 2], y)
            confidence_list.append(confidence)
            loc_list.append(loc)
            #print('cof, loc size', confidence.shape, loc.shape)

        confidences = torch.cat(confidence_list, 1)
        locations = torch.cat(loc_list, 1)
        #print('cof, loc size after cat',  np.asarray(confidences).shape, np.asarray(locations).shape)

        # [Debug] check the output
        assert confidences.dim() == 3  # should be (N, num_priors, num_classes)
        assert locations.dim() == 3  # should be (N, num_priors, 4)
        assert confidences.shape[1] == locations.shape[1]
        assert locations.shape[2] == 4

        if not self.training:
            # If in testing/evaluating mode, normalize the output with Softmax
            confidences = F.softmax(confidences, dim=2)

        return confidences, locations
예제 #10
0
def _model_fn(num_bits, features, labels, mode, params):

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    # weight reguralization
    regularizer = tf.contrib.layers.l2_regularizer(scale=config.weight_decay)
    # create model
    num_classes = 10
    model = MobileNet(num_classes,
                      is_training,
                      num_bits,
                      width_multiplier=config.width_multiplier,
                      quant_mode=config.quant_method,
                      conv2d_regularizer=regularizer)

    # forward pass
    logits = model.forward_pass(features)
    predict_class = tf.argmax(input=logits, axis=1)
    #predict_class = tf.Print(predict_class, [predict_class])
    predictions = {
        'classes': predict_class,
        'probabilities': tf.nn.softmax(logits)
    }

    # calculate accuracy
    accuracy = tf.metrics.accuracy(labels, predictions['classes'])
    metrics = {'accuracy': accuracy}

    # loss function
    loss = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)

    # reguralization loss
    reg_variables = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    reg_term = tf.contrib.layers.apply_regularization(regularizer,
                                                      reg_variables)
    loss += reg_term

    if mode == tf.estimator.ModeKeys.TRAIN:

        # add fake_quant to 'normal' graph
        if config.quant_method == 'tensorflow':
            print("TF quantize create training graph")
            g = tf.get_default_graph()
            tf.contrib.quantize.create_training_graph(input_graph=g,
                                                      quant_delay=0)

        # learning rate decay
        global_step = tf.train.get_global_step()
        steps_per_epoch = num_training_per_epoch / config.train_batch_size
        decay_steps = steps_per_epoch * config.decay_per_epoch
        decay_rate = config.decay_rate

        learning_rate = tf.train.exponential_decay(config.learning_rate,
                                                   global_step, decay_steps,
                                                   decay_rate)

        learning_rate = tf.maximum(learning_rate, config.learning_rate * 0.01)
        # optimize loss
        optimizer = tf.train.AdamOptimizer(learning_rate)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(
                loss=loss, global_step=tf.train.get_global_step())

        # logging
        tf.summary.scalar("accuracy", accuracy[1])
        tf.summary.scalar("learning_rate", learning_rate)

        # printing
        tensors_to_log = {
            'learning_rate': learning_rate,
            'loss': loss,
            'accuracy': accuracy[1]
        }

        train_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                                every_n_iter=1000)

        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          loss=loss,
                                          train_op=train_op,
                                          training_hooks=[train_hook],
                                          eval_metric_ops=metrics)

    elif mode == tf.estimator.ModeKeys.EVAL:
        if config.quant_method == 'tensorflow':
            g = tf.get_default_graph()
            tf.contrib.quantize.create_eval_graph(input_graph=g)

        tf.summary.scalar("accuracy", accuracy[1])
        eval_tensors_to_log = {'eval_loss': loss, 'eval_accuracy': accuracy[1]}
        evaluation_hook = tf.train.LoggingTensorHook(
            tensors=eval_tensors_to_log, every_n_iter=1000)

        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          loss=loss,
                                          evaluation_hooks=[evaluation_hook],
                                          eval_metric_ops=metrics)
예제 #11
0
import matplotlib as mlp
import matplotlib.patches

np.set_printoptions(precision=2)

os.chdir("./handwritten_digit_recognition/")
# from wide_resnet_28_10 import WideResNet28_10
from mobilenet import MobileNet
from utils import load_mnist
os.chdir("../")

PATH = './handwritten_digit_recognition/models/'
#model_name = "WideResNet28_10"
#model=WideResNet28_10()
model_name = "MobileNet"
model = MobileNet()
model.compile()

model2 = MobileNet()
model2.compile()

print('Loading pretrained weights for ', model_name, '...', sep='')
model.load_weights(PATH + model_name + "_ajustado_1" + '.h5')
model2.load_weights(PATH + model_name + '.h5')


def mesaImagen(mesa, boleta=1):
    fname = 'actas/{0:06d}'.format(mesa * 10 + boleta) + '.jpg'
    data_name = "mesas_rv/" + '{}'.format(mesa) + '.json'
    path = "./"
    out_path = './results/'
예제 #12
0
class SSD(nn.Module):
    def __init__(self, num_classes):
        super(SSD, self).__init__()
        self.num_classes = num_classes
        # Setup the backbone network (base_net)
        self.base_net = MobileNet(num_classes)
        # The feature map will extracted from layer[11] and layer[13] in (base_net)
        self.base_output_layer_indices = (11, 13)
        # Define the Additional feature extractor
        self.additional_feat_extractor = nn.ModuleList([
            # Conv8_2
            nn.Sequential(
                nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=256,
                          out_channels=512,
                          kernel_size=3,
                          stride=2,
                          padding=1), nn.ReLU()),
            # Conv9_2
            nn.Sequential(
                nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128,
                          out_channels=256,
                          kernel_size=3,
                          stride=2,
                          padding=1), nn.ReLU()),
            # Conv10_2
            nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128,
                          out_channels=256,
                          kernel_size=3,
                          stride=1,
                          padding=1),
                nn.ReLU(),
            ),
            # Conv11_2
            nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128,
                          out_channels=256,
                          kernel_size=3,
                          stride=1),
                nn.ReLU(),
            ),
        ])

        # Bounding box offset regressor
        num_prior_bbox = 6  # num of prior bounding boxes
        self.loc_regressor = nn.ModuleList([
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),  #Cov5_3
            nn.Conv2d(in_channels=1024,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),  #FC7
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),  #Conv8_2
            # TODO: implement remaining layers.
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),  #Conv9_2
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),  #Conv10_2
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),  #Conv11_2
        ])

        # Bounding box classification confidence for each label
        self.classifier = nn.ModuleList([
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=1024,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
        ])

        # Load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning
        basenet_state = torch.load('pretrained/mobienetv2.pth',
                                   map_location='cpu')
        base_net_1 = {
            key: value
            for key, value in basenet_state.items() if 'base_net' in key
        }
        self.base_net.load_state_dict(base_net_1)
        layer_idx = 0

        def init_with_xavier(m):
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform_(m.weight)

        self.loc_regressor.apply(init_with_xavier)
        self.classifier.apply(init_with_xavier)
        self.additional_feat_extractor.apply(init_with_xavier)

    def feature_to_bbbox(self, loc_regress_layer, confidence_layer,
                         input_feature):
        conf = confidence_layer(input_feature)
        loc = loc_regress_layer(input_feature)
        conf = conf.permute(0, 2, 3, 1).contiguous()
        num_batch = conf.shape[0]
        c_channels = int(conf.shape[1] * conf.shape[2] * conf.shape[3] /
                         self.num_classes)
        conf = conf.view(num_batch, c_channels, self.num_classes)
        loc = loc.permute(0, 2, 3, 1).contiguous()
        loc = loc.view(num_batch, c_channels, 4)
        return conf, loc

    def forward(self, input):
        confidence_list = []
        loc_list = []
        y = module_util.forward_from(self.base_net.base_net, 0,
                                     self.base_output_layer_indices[0] + 1,
                                     input)  #11 , 13
        confidence, loc = self.feature_to_bbbox(self.loc_regressor[0],
                                                self.classifier[0], y)
        confidence_list.append(confidence)
        loc_list.append(loc)
        y = module_util.forward_from(self.base_net.base_net,
                                     self.base_output_layer_indices[0] + 1,
                                     self.base_output_layer_indices[1] + 1, y)
        confidence, loc = self.feature_to_bbbox(self.loc_regressor[1],
                                                self.classifier[1], y)
        confidence_list.append(confidence)
        loc_list.append(loc)
        for i in range(len(self.additional_feat_extractor)):
            y = module_util.forward_from(self.additional_feat_extractor, i,
                                         i + 1, y)
            confidence, loc = self.feature_to_bbbox(self.loc_regressor[i + 2],
                                                    self.classifier[i + 2], y)
            confidence_list.append(confidence)
            loc_list.append(loc)
        confidences = torch.cat(confidence_list, 1)
        locations = torch.cat(loc_list, 1)
        # [Debug] check the output
        assert confidence.dim() == 3  # should be (N, num_priors, num_classes)
        assert locations.dim() == 3  # should be (N, num_priors, 4)
        assert confidences.shape[1] == locations.shape[1]
        assert locations.shape[2] == 4
        if not self.training:
            confidences = F.softmax(confidences, dim=2)
        return confidences, locations
예제 #13
0
    def test_compression(self):
        """
        Model: mobilenet_v1
        data: mnist
        step1: Training one epoch
        step2: pruning flops
        step3: fine-tune one epoch
        step4: check top1_acc.
        """
        if not fluid.core.is_compiled_with_cuda():
            return
        class_dim = 10
        image_shape = [1, 28, 28]
        image = fluid.layers.data(
            name='image', shape=image_shape, dtype='float32')
        image.stop_gradient = False
        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
        out = MobileNet().net(input=image, class_dim=class_dim)
        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
        val_program = fluid.default_main_program().clone(for_test=False)

        cost = fluid.layers.cross_entropy(input=out, label=label)
        avg_cost = fluid.layers.mean(x=cost)

        optimizer = fluid.optimizer.Momentum(
            momentum=0.9,
            learning_rate=0.01,
            regularization=fluid.regularizer.L2Decay(4e-5))

        place = fluid.CUDAPlace(0)
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())

        val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)

        val_feed_list = [('img', image.name), ('label', label.name)]
        val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
                                                        acc_top5.name)]

        train_reader = paddle.batch(
            paddle.dataset.mnist.train(), batch_size=128)
        train_feed_list = [('img', image.name), ('label', label.name)]
        train_fetch_list = [('loss', avg_cost.name)]

        com_pass = Compressor(
            place,
            fluid.global_scope(),
            fluid.default_main_program(),
            train_reader=train_reader,
            train_feed_list=train_feed_list,
            train_fetch_list=train_fetch_list,
            eval_program=val_program,
            eval_reader=val_reader,
            eval_feed_list=val_feed_list,
            eval_fetch_list=val_fetch_list,
            train_optimizer=optimizer)
        com_pass.config('./filter_pruning/compress.yaml')
        eval_graph = com_pass.run()
        self.assertTrue(
            abs((com_pass.context.eval_results['acc_top1'][-1] - 0.969) / 0.969)
            < 0.02)
예제 #14
0
# Get the data.
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# Preprocess the images.
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# Get the model and compile it.
img_input = keras.layers.Input(shape=(32, 32, 3))

model = MobileNet(input_tensor=img_input, classes=num_classes)

model.compile(loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy'])
    print("Training model.")

model.fit(x_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(x_test, y_test),
    shuffle=True,
    verbose=1)
예제 #15
0
#                神兽保佑
#                BUG是不可能有BUG的!
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

from mobilenet import MobileNet
import numpy as np
np.random.seed(10)
from keras.datasets import cifar10
from keras.utils import np_utils

(x_img_train, y_label_train),(x_img_test, y_label_test) = cifar10.load_data()

x_img_train = x_img_train.astype('float')/255.0
x_img_test = x_img_test.astype('float')/255.0

y_label_train = np_utils.to_categorical(y_label_train)
y_label_test = np_utils.to_categorical(y_label_test)

model = MobileNet()
try:
    model.load_weights("mobileV1-lite.h5")
    print("模型加载成功!继续训练")
except:
    print("模型加载失败!从头开始训练")

model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
train_history = model.fit(x_img_train, y_label_train, validation_split=0.2, epochs=10, batch_size=128, verbose=2)
model.save_weights("mobileV1-lite.h5")
print("保存模型成功!")
예제 #16
0
from utils import load_mnist
from keras.utils import np_utils
from vgg16 import VGG16
from resnet164 import ResNet164
from mobilenet import MobileNet
from wide_resnet_28_10 import WideResNet28_10
from super_learner import SuperLearner
# from super_learner_extension import SuperLearnerExtension
import argparse
import numpy as np
import os

PATH = './models/'
models = [VGG16(), ResNet164(), WideResNet28_10(), MobileNet()]

def get_argument_parser():
    '''
    Argument parser which returns the options which the user inputted.

    Args:
        None

    Returns:
        argparse.ArgumentParser().parse_args()
    '''

    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset',
                        help = 'training set: 0, validation set: 1, test set: 2',
                        type = int, default = 1)
    args = parser.parse_args()
예제 #17
0
def demo(data,
         save,
         efficient=True,
         valid_size=5000,
         n_epochs=30,
         batch_size=64,
         seed=None):
    """
    A demo to show off training of efficient DenseNets.
    Trains and evaluates a DenseNet-BC on CIFAR-10.

    Args:
        data (str) - path to directory where data should be loaded from/downloaded
            (default $DATA_DIR)
        save (str) - path to save the model to (default /tmp)

        depth (int) - depth of the network (number of convolution layers) (default 40)
        growth_rate (int) - number of features added per DenseNet layer (default 12)
        efficient (bool) - use the memory efficient implementation? (default True)

        valid_size (int) - size of validation set
        n_epochs (int) - number of epochs for training (default 300)
        batch_size (int) - size of minibatch (default 256)
        seed (int) - manually set the random seed (default None)
    """

    # Data transforms
    mean = [0.5071, 0.4867, 0.4408]
    stdv = [0.2675, 0.2565, 0.2761]
    train_transforms = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=stdv),
    ])
    test_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=stdv),
    ])

    # Datasets
    train_set = datasets.CIFAR10(data,
                                 train=True,
                                 transform=train_transforms,
                                 download=True)
    test_set = datasets.CIFAR10(data,
                                train=False,
                                transform=test_transforms,
                                download=False)

    # Models
    model = MobileNet()
    print(model)

    # Make save directory
    if not os.path.exists(save):
        os.makedirs(save)
    if not os.path.isdir(save):
        raise Exception('%s is not a dir' % save)

    # Train the model
    train(model=model,
          train_set=train_set,
          test_set=test_set,
          save=save,
          valid_size=valid_size,
          n_epochs=n_epochs,
          batch_size=batch_size,
          seed=seed)
    print('Done!')
예제 #18
0
## Flask application to act as a REST service for android app, dummy frontend

from flask import Flask, render_template, request, redirect, Response
from base64 import b64decode
from hashlib import md5
from mobilenet import MobileNet
from os import remove as remove_file

app = Flask(__name__)
mb = MobileNet()
TEMP_FILENAME = "infer_input.jpg"


# dummy frontend to make sure server is up
@app.route("/")
def main():
    return render_template("index.html")


# inference POST route
@app.route("/infer", methods=["POST"])
def infer_image():
    # find the contents and save the image from them
    for value in request.values:
        f = open(TEMP_FILENAME, "wb")

        # first replace all special characters then decode the base64 encoding
        base64encoded = value.replace('@', '=').replace('*', '+')
        base64decoded = b64decode(base64encoded)

        # write the image to a temporary file
예제 #19
0
def main():
    global opt, start_epoch, best_prec1
    opt = cfg
    opt.gpuids = list(map(int, opt.gpuids))

    if opt.cuda and not torch.cuda.is_available():
        raise Exception("No GPU found, please run without --cuda")

    model = MobileNet()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=opt.lr,
                          momentum=opt.momentum,
                          weight_decay=opt.weight_decay,
                          nesterov=True)
    start_epoch = 0

    ckpt_file = join("model", opt.ckpt)

    if opt.cuda:
        torch.cuda.set_device(opt.gpuids[0])
        with torch.cuda.device(opt.gpuids[0]):
            model = model.cuda()
            criterion = criterion.cuda()
        model = nn.DataParallel(model,
                                device_ids=opt.gpuids,
                                output_device=opt.gpuids[0])
        cudnn.benchmark = True

    # for resuming training
    if opt.resume:
        if isfile(ckpt_file):
            print("==> Loading Checkpoint '{}'".format(opt.ckpt))
            if opt.cuda:
                checkpoint = torch.load(ckpt_file,
                                        map_location=lambda storage, loc:
                                        storage.cuda(opt.gpuids[0]))
                try:
                    model.module.load_state_dict(checkpoint['model'])
                except:
                    model.load_state_dict(checkpoint['model'])
            else:
                checkpoint = torch.load(
                    ckpt_file, map_location=lambda storage, loc: storage)
                try:
                    model.load_state_dict(checkpoint['model'])
                except:
                    # create new OrderedDict that does not contain `module.`
                    new_state_dict = OrderedDict()
                    for k, v in checkpoint['model'].items():
                        if k[:7] == 'module.':
                            name = k[7:]  # remove `module.`
                        else:
                            name = k[:]
                        new_state_dict[name] = v

                    model.load_state_dict(new_state_dict)

            start_epoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])

            print("==> Loaded Checkpoint '{}' (epoch {})".format(
                opt.ckpt, start_epoch))
        else:
            print("==> no checkpoint found at '{}'".format(opt.ckpt))
            return

    # Download & Load Dataset
    print('==> Preparing data..')
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_val = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    trainset = torchvision.datasets.CIFAR10(root='./data',
                                            train=True,
                                            download=True,
                                            transform=transform_train)
    train_loader = torch.utils.data.DataLoader(trainset,
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=opt.workers)

    valset = torchvision.datasets.CIFAR10(root='./data',
                                          train=False,
                                          download=True,
                                          transform=transform_val)
    val_loader = torch.utils.data.DataLoader(valset,
                                             batch_size=opt.test_batch_size,
                                             shuffle=False,
                                             num_workers=opt.workers)

    classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
               'ship', 'truck')

    # for evaluation
    if opt.eval:
        if isfile(ckpt_file):
            print("==> Loading Checkpoint '{}'".format(opt.ckpt))
            if opt.cuda:
                checkpoint = torch.load(ckpt_file,
                                        map_location=lambda storage, loc:
                                        storage.cuda(opt.gpuids[0]))
                try:
                    model.module.load_state_dict(checkpoint['model'])
                except:
                    model.load_state_dict(checkpoint['model'])
            else:
                checkpoint = torch.load(
                    ckpt_file, map_location=lambda storage, loc: storage)
                try:
                    model.load_state_dict(checkpoint['model'])
                except:
                    # create new OrderedDict that does not contain `module.`
                    new_state_dict = OrderedDict()
                    for k, v in checkpoint['model'].items():
                        if k[:7] == 'module.':
                            name = k[7:]  # remove `module.`
                        else:
                            name = k[:]
                        new_state_dict[name] = v

                    model.load_state_dict(new_state_dict)

            start_epoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])

            print("==> Loaded Checkpoint '{}' (epoch {})".format(
                opt.ckpt, start_epoch))

            # evaluate on validation set
            print("\n===> [ Evaluation ]")
            start_time = time.time()
            prec1 = validate(val_loader, model, criterion)
            elapsed_time = time.time() - start_time
            print("====> {:.2f} seconds to evaluate this model\n".format(
                elapsed_time))
            return
        else:
            print("==> no checkpoint found at '{}'".format(opt.ckpt))
            return

    # train...
    train_time = 0.0
    validate_time = 0.0
    for epoch in range(start_epoch, opt.epochs):
        adjust_learning_rate(optimizer, epoch)

        print('\n==> Epoch: {}, lr = {}'.format(
            epoch, optimizer.param_groups[0]["lr"]))

        # train for one epoch
        print("===> [ Training ]")
        start_time = time.time()
        train(train_loader, model, criterion, optimizer, epoch)
        elapsed_time = time.time() - start_time
        train_time += elapsed_time
        print(
            "====> {:.2f} seconds to train this epoch\n".format(elapsed_time))

        # evaluate on validation set
        print("===> [ Validation ]")
        start_time = time.time()
        prec1 = validate(val_loader, model, criterion)
        elapsed_time = time.time() - start_time
        validate_time += elapsed_time
        print("====> {:.2f} seconds to validate this epoch\n".format(
            elapsed_time))

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        state = {
            'epoch': epoch + 1,
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }
        save_model(state, epoch, is_best)

    avg_train_time = train_time / opt.epochs
    avg_valid_time = validate_time / opt.epochs
    total_train_time = train_time + validate_time
    print("====> average training time per epoch: {}m {:.2f}s".format(
        int(avg_train_time // 60), avg_train_time % 60))
    print("====> average validation time per epoch: {}m {:.2f}s".format(
        int(avg_valid_time // 60), avg_valid_time % 60))
    print("====> training time: {}m {:.2f}s".format(int(train_time // 60),
                                                    train_time % 60))
    print("====> validation time: {}m {:.2f}s".format(int(validate_time // 60),
                                                      validate_time % 60))
    print("====> total training time: {}m {:.2f}s".format(
        int(total_train_time // 60), total_train_time % 60))
예제 #20
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    if os.path.exists(DICO_PKL):
        with open(DICO_PKL, 'rb') as f:
            word_to_id, id_to_word = pickle.load(f)
    else:
        word_to_id, id_to_word = create_dico(DICO)
        with open(DICO_PKL, 'wb') as f:
            pickle.dump([word_to_id, id_to_word], f)

    gen_data_loader = Gen_Data_loader(BATCH_SIZE, word_to_id)
    dis_data_loader = Dis_Data_loader(BATCH_SIZE, word_to_id)
    vocab_size = len(word_to_id)
    assert START_TOKEN == word_to_id['sos']

    generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                          SEQ_LENGTH, START_TOKEN)
    discriminator = BLEUCNN(SEQ_LENGTH, 2, EMB_DIM, generator)
    mobilenet = MobileNet(BATCH_SIZE)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    mobilenet.load_pretrained_weights(sess)
    sess.run(tf.global_variables_initializer())

    log = open('experiment-log.txt', 'w', encoding='utf-8')
    #  pre-train generator and discriminator
    log.write('pre-training...\n')
    print('Start pre-training discriminator...')
    datas = create_data(DICO, word_to_id)
    gen_data_loader.create_batches(CORPUS, IMAGE)
    samples = []
    for it in range(gen_data_loader.num_batch):
        inp_batch, image_batch = gen_data_loader.next_batch()
        feed_dict = {mobilenet.X: image_batch, mobilenet.is_training: False}
        hidden_batch = sess.run(mobilenet.y_output, feed_dict=feed_dict)
        samples.extend(generator.generate(sess, hidden_batch).tolist())
    dis_data_loader.create_batches(random.sample(datas, 3000), samples)
    for _ in range(PRE_EPOCH_NUM):
        dis_data_loader.reset_pointer()
        for it in range(dis_data_loader.num_batch):
            x_batch, labels = dis_data_loader.next_batch()
            feed = {
                discriminator.input_x: x_batch,
                discriminator.labels: labels,
                discriminator.dropout_keep_prob: 0.75
            }
            _ = sess.run(discriminator.train_op, feed)

    print('Start pre-training generator...')
    for epoch in range(PRE_EPOCH_NUM):
        supervised_g_losses = []
        gen_data_loader.reset_pointer()
        for it in range(gen_data_loader.num_batch):
            inp_batch, image_batch = gen_data_loader.next_batch()
            feed_dict = {
                mobilenet.X: image_batch,
                mobilenet.is_training: False
            }
            hidden_batch = sess.run(mobilenet.y_output, feed_dict=feed_dict)
            _, g_loss = generator.pretrain_step(sess, inp_batch, hidden_batch)
            supervised_g_losses.append(g_loss)
        loss = np.mean(supervised_g_losses)
        if epoch % 5 == 0:
            print('pre-train epoch ', epoch, 'train_loss ', loss)
            buffer = 'epoch:\t' + str(epoch) + '\ttrain_loss:\t' + str(
                loss) + '\n'
            log.write(buffer)

    rollout = ROLLOUT(generator, 0.8)

    print(
        '#########################################################################'
    )
    print('Start REINFORCE Training...')
    log.write('REINFORCE training...\n')
    for total_batch in range(RL_EPOCH_NUM):
        gen_data_loader.reset_pointer()
        for it in range(gen_data_loader.num_batch):
            ra = random.randint(0, 1)
            inp_batch, image_batch = gen_data_loader.next_batch(shuffle=ra)
            feed_dict = {
                mobilenet.X: image_batch,
                mobilenet.is_training: False
            }
            hidden_batch = sess.run(mobilenet.y_output, feed_dict=feed_dict)
            samples = generator.generate(sess, hidden_batch)
            rewards = rollout.get_reward(sess, samples, hidden_batch, 16,
                                         discriminator)
            feed = {
                generator.x: inp_batch,
                generator.rewards: rewards,
                generator.hiddens: hidden_batch
            }
            _ = sess.run(generator.g_updates, feed_dict=feed)

        # Test
        if total_batch % 5 == 0 or total_batch == RL_EPOCH_NUM - 1:
            mean_rewards = []
            gen_data_loader.reset_pointer()
            for it in range(gen_data_loader.num_batch):
                inp_batch, image_batch = gen_data_loader.next_batch()
                feed_dict = {
                    mobilenet.X: image_batch,
                    mobilenet.is_training: False
                }
                hidden_batch = sess.run(mobilenet.y_output,
                                        feed_dict=feed_dict)
                samples = generator.generate(sess, hidden_batch)
                rewards = rollout.get_reward(sess, samples, hidden_batch, 16,
                                             discriminator)
                mean_rewards.append(np.mean(rewards[:, -1]))
            reward = np.mean(mean_rewards)
            buffer = 'epoch:\t' + str(total_batch) + '\treward:\t' + str(
                reward) + '\n'
            print('total_batch: ', total_batch, 'reward: ', reward)
            log.write(buffer)
            generator.save_weight(sess)

        # Update roll-out parameters
        rollout.update_params()
        discriminator.update_embedding()

        # Train the discriminator
        samples = []
        for it in range(gen_data_loader.num_batch):
            inp_batch, image_batch = gen_data_loader.next_batch()
            feed_dict = {
                mobilenet.X: image_batch,
                mobilenet.is_training: False
            }
            hidden_batch = sess.run(mobilenet.y_output, feed_dict=feed_dict)
            samples.extend(generator.generate(sess, hidden_batch).tolist())
        dis_data_loader.create_batches(random.sample(datas, 3000), samples)
        dis_data_loader.reset_pointer()
        for it in range(dis_data_loader.num_batch):
            x_batch, labels = dis_data_loader.next_batch()
            feed = {
                discriminator.input_x: x_batch,
                discriminator.labels: labels,
                discriminator.dropout_keep_prob: 0.75
            }
            _ = sess.run(discriminator.train_op, feed)

    # final test
    gen_data_loader.reset_pointer()
    _, image_batch = gen_data_loader.next_batch()
    feed_dict = {mobilenet.X: image_batch, mobilenet.is_training: False}
    hidden_batch = sess.run(mobilenet.y_output, feed_dict=feed_dict)
    samples = generator.generate(sess, hidden_batch)
    y = samples.tolist()
    sams = []
    for k, sam in enumerate(y):
        sa = [id_to_word[i] for i in sam]
        sa = ''.join(sa)
        sams.append(sa)
    for sam in sams:
        log.write(sam + '\n')
    log.close()
예제 #21
0
    def quan(self, config_file):
        if not fluid.core.is_compiled_with_cuda():
            return
        class_dim = 10
        image_shape = [1, 28, 28]

        train_program = fluid.Program()
        startup_program = fluid.Program()

        with fluid.program_guard(train_program, startup_program):
            with fluid.unique_name.guard():
                image = fluid.layers.data(
                    name='image', shape=image_shape, dtype='float32')
                image.stop_gradient = False
                label = fluid.layers.data(
                    name='label', shape=[1], dtype='int64')
                out = MobileNet(name='quan').net(input=image,
                                                 class_dim=class_dim)
                print("out: {}".format(out.name))
                acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
                acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
                cost = fluid.layers.cross_entropy(input=out, label=label)
                avg_cost = fluid.layers.mean(x=cost)

        val_program = train_program.clone(for_test=False)

        optimizer = fluid.optimizer.Momentum(
            momentum=0.9,
            learning_rate=0.01,
            regularization=fluid.regularizer.L2Decay(4e-5))

        scope = fluid.Scope()
        place = fluid.CUDAPlace(0)
        exe = fluid.Executor(place)
        exe.run(startup_program, scope=scope)

        val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)

        val_feed_list = [('img', image.name), ('label', label.name)]
        val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
                                                        acc_top5.name)]

        train_reader = paddle.batch(
            paddle.dataset.mnist.train(), batch_size=128)
        train_feed_list = [('img', image.name), ('label', label.name)]
        train_fetch_list = [('loss', avg_cost.name)]

        com_pass = Compressor(
            place,
            scope,
            train_program,
            train_reader=train_reader,
            train_feed_list=train_feed_list,
            train_fetch_list=train_fetch_list,
            eval_program=val_program,
            eval_reader=val_reader,
            eval_feed_list=val_feed_list,
            eval_fetch_list=val_fetch_list,
            train_optimizer=optimizer)
        com_pass.config(config_file)
        eval_graph = com_pass.run()
예제 #22
0
class SSD(nn.Module):
    
    def __init__(self, num_classes):
        super(SSD, self).__init__()
        self.num_classes = num_classes

        # Setup the backbone network (base_net)
        self.base_net = MobileNet(num_classes)

        # The feature map will extracted from layer[11] and layer[13] in (base_net)
        self.base_output_layer_indices = (6, 11)

        # Define the Additional feature extractor
        self.additional_feat_extractor = nn.ModuleList([
            # Conv8_2
            nn.Sequential(
                nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1),
                nn.ReLU()
            ),
            # Conv9_2
            nn.Sequential(
                nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1),
                nn.ReLU()
            ),
            # Conv10_2
            nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1),
                nn.ReLU()
            ),
            # Conv11_2
            nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2),
                nn.ReLU()
            )
        ])

        # Bounding box offset regressor
        num_prior_bbox = 6                                                               # num of prior bounding boxes
        self.loc_regressor = nn.ModuleList([
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            # TODO: implement remaining layers.
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1)
        ])
        
        # Bounding box classification confidence for each label
        self.classifier = nn.ModuleList([
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            # TODO: implement remaining layers.
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
        ])

        # load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning
        pretrained_dict = torch.load('./pretrained/mobienetv2.pth')
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if 'base_net' in k}
        model_dict = self.base_net.state_dict()

        keys = []
        for k,v in pretrained_dict.items():
            keys.append(k)

        i = 0
        for k,v in model_dict.items():
            if v.size() == pretrained_dict[keys[i]].size():
                model_dict[k] = pretrained_dict[keys[i]]
                i += 1
                if i == len(keys):
                    break
        
        self.base_net.load_state_dict(model_dict)
        self.base_net.eval()

        def init_with_xavier(m):
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform_(m.weight)
        self.loc_regressor.apply(init_with_xavier)
        self.classifier.apply(init_with_xavier)
        self.additional_feat_extractor.apply(init_with_xavier)

    def feature_to_bbbox(self, loc_regress_layer, confidence_layer, input_feature):
        """
        Compute the bounding box class scores and the bounding box offset
        :param loc_regress_layer: offset regressor layer to run forward
        :param confidence_layer: confidence layer to run forward
        :param input_feature: feature map to be feed in
        :return: confidence and location, with dim:(N, num_priors, num_classes) and dim:(N, num_priors, 4) respectively.
        """
        conf = confidence_layer(input_feature)
        loc = loc_regress_layer(input_feature)

        # Confidence post-processing:
        # 1: (N, num_prior_bbox * n_classes, H, W) to (N, H*W*num_prior_bbox, n_classes) = (N, num_priors, num_classes)
        #    where H*W*num_prior_bbox = num_priors
        conf = conf.permute(0, 2, 3, 1).contiguous()
        num_batch = conf.shape[0]
        c_channels = int(conf.shape[1]*conf.shape[2]*conf.shape[3] / self.num_classes)
        conf = conf.view(num_batch, c_channels, self.num_classes)

        # Bounding Box loc and size post-processing
        # 1: (N, num_prior_bbox*4, H, W) to (N, num_priors, 4)
        loc = loc.permute(0, 2, 3, 1).contiguous()
        loc = loc.view(num_batch, c_channels, 4)

        return conf, loc

    def forward(self, input):

        confidence_list = []
        loc_list = []

        # Run the backbone network from [0 to 11, and fetch the bbox class confidence
        # as well as position and size
        y = module_util.forward_from(self.base_net.conv_layers, 0, self.base_output_layer_indices[0], input)
        confidence, loc = self.feature_to_bbbox(self.loc_regressor[0], self.classifier[0], y)
        confidence_list.append(confidence)
        loc_list.append(loc)

        # implement run the backbone network from [11 to 13] and compute the corresponding bbox loc and confidence
        y = module_util.forward_from(self.base_net.conv_layers, self.base_output_layer_indices[0], self.base_output_layer_indices[1], y)
        confidence, loc = self.feature_to_bbbox(self.loc_regressor[1], self.classifier[1], y)
        confidence_list.append(confidence)
        loc_list.append(loc)

        # forward the 'y' to additional layers for extracting coarse features
        for i in range(4):
            y = module_util.forward_from(self.additional_feat_extractor, i, i+1, y)
            confidence, loc = self.feature_to_bbbox(self.loc_regressor[i+2], self.classifier[i+2], y)
            confidence_list.append(confidence)
            loc_list.append(loc)

        confidences = torch.cat(confidence_list, 1)
        locations = torch.cat(loc_list, 1)

        # [Debug] check the output
        assert confidences.dim() == 3  # should be (N, num_priors, num_classes)
        assert locations.dim() == 3   # should be (N, num_priors, 4)
        assert confidences.shape[1] == locations.shape[1]
        assert locations.shape[2] == 4

        if not self.training:
            # If in testing/evaluating mode, normalize the output with Softmax
            confidences = F.softmax(confidences, dim=2)

        return confidences, locations
예제 #23
0
def train():
    height = args.height
    width = args.width
    _step = 0

    if True:
        #glob_pattern = os.path.join(args.dataset_dir,"*_train.tfrecord")
        #tfrecords_list = glob.glob(glob_pattern)
        #filename_queue = tf.train.string_input_producer(tfrecords_list, num_epochs=None)
        img_batch, label_batch = get_batch("cifar10/cifar10_train.tfrecord",
                                           args.batch_size,
                                           shuffle=True)

        mobilenet = MobileNet(img_batch, num_classes=args.num_classes)
        logits = mobilenet.logits
        pred = mobilenet.predictions

        cross = tf.nn.softmax_cross_entropy_with_logits(labels=label_batch,
                                                        logits=logits)
        loss = tf.reduce_mean(cross)

        # L2 regularization
        list_reg = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        if len(list_reg) > 0:
            l2_loss = tf.add_n(list_reg)
            total_loss = loss + l2_loss
        else:
            total_loss = loss

        # evaluate model, for classification
        preds = tf.argmax(pred, 1)
        labels = tf.argmax(label_batch, 1)
        #correct_pred = tf.equal(tf.argmax(pred, 1), tf.cast(label_batch, tf.int64))
        correct_pred = tf.equal(preds, labels)
        acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        # learning rate decay
        base_lr = tf.constant(args.learning_rate)
        global_step = tf.Variable(0)
        lr = tf.train.exponential_decay(args.learning_rate,
                                        global_step=global_step,
                                        decay_steps=args.lr_decay_step,
                                        decay_rate=args.lr_decay,
                                        staircase=True)

        # optimizer
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = tf.train.AdamOptimizer(learning_rate=lr,
                                              beta1=args.beta1).minimize(
                                                  loss,
                                                  global_step=global_step)

        max_steps = int(args.num_samples / int(args.batch_size) *
                        int(args.epoch))

        # summary
        tf.summary.scalar('total_loss', total_loss)
        tf.summary.scalar('accuracy', acc)
        tf.summary.scalar('learning_rate', lr)
        summary_op = tf.summary.merge_all()

        with tf.Session() as sess:

            # summary writer
            writer = tf.summary.FileWriter(args.logs_dir, sess.graph)

            sess.run(tf.global_variables_initializer())

            saver = tf.train.Saver()
            _, _step = load(sess, saver, args.checkpoint_dir)

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            for step in range(_step + 1, max_steps + 1):

                start_time = time.time()

                _, _lr = sess.run([train_op, lr])

                if step % args.num_log == 0:
                    summ, _loss, _acc = sess.run([summary_op, total_loss, acc])
                    writer.add_summary(summ, step)
                    print(
                        'number to eval:{0}, time:{1:.3f}, lr:{2:.8f}, acc:{3:.6f}, loss:{4:.6f}'
                        .format(step * args.batch_size,
                                time.time() - start_time, _lr, _acc, _loss))

                if step % args.num_log == 0:
                    save_path = saver.save(sess,
                                           os.path.join(
                                               args.checkpoint_dir,
                                               args.model_name),
                                           global_step=step)

                if step % 100 == 0:
                    totalloss = 0.0
                    totalacc = 0.0
                    for e_step in range(200):
                        _loss, _acc = sess.run([total_loss, acc])
                        totalloss = totalloss + _loss
                        totalacc = totalacc + _acc

                    print('global_step:%g, time:%g, t acc:%g, t loss:%g' %
                          ((e_step + 1) * args.batch_size,
                           time.time() - start_time, totalacc /
                           (e_step + 1), totalloss / (e_step + 1)))

            tf.train.write_graph(sess.graph_def, args.checkpoint_dir,
                                 args.model_name + '.pb')
            save_path = saver.save(sess,
                                   os.path.join(args.checkpoint_dir,
                                                args.model_name),
                                   global_step=max_steps)

            coord.request_stop()
            coord.join(threads)
예제 #24
0
    def __init__(self, num_classes):
        super(SSD, self).__init__()
        self.num_classes = num_classes

        # Setup the backbone network (base_net)
        self.base_net = MobileNet(num_classes)

        # The feature map will extracted from layer[11] and layer[13] in (base_net)
        self.base_output_layer_indices = (6, 11)

        # Define the Additional feature extractor
        self.additional_feat_extractor = nn.ModuleList([
            # Conv8_2
            nn.Sequential(
                nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1),
                nn.ReLU()
            ),
            # Conv9_2
            nn.Sequential(
                nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1),
                nn.ReLU()
            ),
            # Conv10_2
            nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1),
                nn.ReLU()
            ),
            # Conv11_2
            nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2),
                nn.ReLU()
            )
        ])

        # Bounding box offset regressor
        num_prior_bbox = 6                                                               # num of prior bounding boxes
        self.loc_regressor = nn.ModuleList([
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            # TODO: implement remaining layers.
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1)
        ])
        
        # Bounding box classification confidence for each label
        self.classifier = nn.ModuleList([
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            # TODO: implement remaining layers.
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
        ])

        # load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning
        pretrained_dict = torch.load('./pretrained/mobienetv2.pth')
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if 'base_net' in k}
        model_dict = self.base_net.state_dict()

        keys = []
        for k,v in pretrained_dict.items():
            keys.append(k)

        i = 0
        for k,v in model_dict.items():
            if v.size() == pretrained_dict[keys[i]].size():
                model_dict[k] = pretrained_dict[keys[i]]
                i += 1
                if i == len(keys):
                    break
        
        self.base_net.load_state_dict(model_dict)
        self.base_net.eval()

        def init_with_xavier(m):
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform_(m.weight)
        self.loc_regressor.apply(init_with_xavier)
        self.classifier.apply(init_with_xavier)
        self.additional_feat_extractor.apply(init_with_xavier)
예제 #25
0
    def __init__(self, num_classes):
        super(SSD, self).__init__()
        self.num_classes = num_classes
        # Setup the backbone network (base_net)
        self.base_net = MobileNet(num_classes)
        # The feature map will extracted from layer[11] and layer[13] in (base_net)
        self.base_output_layer_indices = (11, 13)
        # Define the Additional feature extractor
        self.additional_feat_extractor = nn.ModuleList([
            # Conv8_2
            nn.Sequential(
                nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=256,
                          out_channels=512,
                          kernel_size=3,
                          stride=2,
                          padding=1), nn.ReLU()),
            # Conv9_2
            nn.Sequential(
                nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128,
                          out_channels=256,
                          kernel_size=3,
                          stride=2,
                          padding=1), nn.ReLU()),
            # Conv10_2
            nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128,
                          out_channels=256,
                          kernel_size=3,
                          stride=1,
                          padding=1),
                nn.ReLU(),
            ),
            # Conv11_2
            nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128,
                          out_channels=256,
                          kernel_size=3,
                          stride=1),
                nn.ReLU(),
            ),
        ])

        # Bounding box offset regressor
        num_prior_bbox = 6  # num of prior bounding boxes
        self.loc_regressor = nn.ModuleList([
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),  #Cov5_3
            nn.Conv2d(in_channels=1024,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),  #FC7
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),  #Conv8_2
            # TODO: implement remaining layers.
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),  #Conv9_2
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),  #Conv10_2
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * 4,
                      kernel_size=3,
                      padding=1),  #Conv11_2
        ])

        # Bounding box classification confidence for each label
        self.classifier = nn.ModuleList([
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=1024,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
        ])

        # Load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning
        basenet_state = torch.load('pretrained/mobienetv2.pth',
                                   map_location='cpu')
        base_net_1 = {
            key: value
            for key, value in basenet_state.items() if 'base_net' in key
        }
        self.base_net.load_state_dict(base_net_1)
        layer_idx = 0

        def init_with_xavier(m):
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform_(m.weight)

        self.loc_regressor.apply(init_with_xavier)
        self.classifier.apply(init_with_xavier)
        self.additional_feat_extractor.apply(init_with_xavier)
예제 #26
0
class SSD(nn.Module):
    
    def __init__(self, num_classes = 4):
        super(SSD, self).__init__()
        self.num_classes = num_classes

        # Setup the backbone network (base_net)
        self.base_net = MobileNet(num_classes)

        # The feature map will extracted from layer[11] and layer[13] in (base_net)
        self.base_output_layer_indices = (11, 13)

        # Define the Additional feature extractor
        self.additional_feat_extractor = nn.ModuleList([
            # Conv8_2
            nn.Sequential(
                nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1),
                nn.ReLU()
            ),
            # Conv9_2
            nn.Sequential(
                nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1),
                nn.ReLU()
            ),
	    # Conv10_2
	    nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
                nn.ReLU()
            ),
	    # Conv11_2
	     nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
                nn.ReLU()
            ),
            # TODO: implement two more layers. Done
        ])

        # Bounding box offset regressor
        num_prior_bbox = 6                                                               # num of prior bounding boxes
        self.loc_regressor = nn.ModuleList([
            nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1)
            # TODO: implement remaining layers. Done
        ])

        # Bounding box classification confidence for each label
        self.classifier = nn.ModuleList([
            nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1)
        # TODO: implement remaining layers. Done
        ])


        # Todo: load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning Done

        pretrained_model = torch.load("./pretrained/mobienetv2.pth")


        # new = list(pretrained_model.items())
        my_model= self.base_net.state_dict()

        # 1. filter out unnecessary keys
        #print(my_model)
        #print(pretrained_model.items())

        pretrained_model = {k: v for k, v in pretrained_model.items() if k in my_model}
        # 2. overwrite entries in the existing state dict
        my_model.update(pretrained_model)
        # print(my_model)
        # 3. load the new state dict
        self.base_net.load_state_dict(my_model)

        # print(self.base_net)
        # print(self.additional_feat_extractor)

        # print(my_model_kvpair,pretrained_model)
        # count = 0
        # for key, value in my_model_kvpair.items():
        #     layer_name, weights = new[count]
        #     my_model_kvpair[key] = weights
        #     count += 1

        #self.base_net.load_state_dict(pretrained_model)


        def init_with_xavier(m):
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform_(m.weight)
        self.loc_regressor.apply(init_with_xavier)
        self.classifier.apply(init_with_xavier)
        self.additional_feat_extractor.apply(init_with_xavier)

    def feature_to_bbbox(self, loc_regress_layer, confidence_layer, input_feature):
        """
        Compute the bounding box class scores and the bounding box offset
        :param loc_regress_layer: offset regressor layer to run forward
        :param confidence_layer: confidence layer to run forward
        :param input_feature: feature map to be feed in
        :return: confidence and location, with dim:(N, num_priors, num_classes) and dim:(N, num_priors, 4) respectively.
        """
        conf = confidence_layer(input_feature)
        loc = loc_regress_layer(input_feature)

        # Confidence post-processing:
        # 1: (N, num_prior_bbox * n_classes, H, W) to (N, H*W*num_prior_bbox, n_classes) = (N, num_priors, num_classes)
        #    where H*W*num_prior_bbox = num_priors
        conf = conf.permute(0, 2, 3, 1).contiguous()
        num_batch = conf.shape[0]
        c_channels = int(conf.shape[1]*conf.shape[2]*conf.shape[3] / self.num_classes)
        conf = conf.view(num_batch, c_channels, self.num_classes)

        # Bounding Box loc and size post-processing
        # 1: (N, num_prior_bbox*4, H, W) to (N, num_priors, 4)
        loc = loc.permute(0, 2, 3, 1).contiguous()
        loc = loc.view(num_batch, c_channels, 4)

        return conf, loc

    def forward(self, input):

        confidence_list = []
        loc_list = []

        # Run the backbone network from [0 to 11, and fetch the bbox class confidence
        # as well as position and size
        y = module_util.forward_from(self.base_net.conv_layers, 0, self.base_output_layer_indices[0], input)
        print(y.shape)
        confidence, loc = self.feature_to_bbbox(self.loc_regressor[0], self.classifier[0], y)
        confidence_list.append(confidence)
        loc_list.append(loc)

        # Todo: implement run the backbone network from [11 to 13] and compute the corresponding bbox loc and confidence Done
        y = module_util.forward_from(self.base_net.conv_layers,self.base_output_layer_indices[0],self.base_output_layer_indices[1], y)
        print(y.shape)
        confidence, loc = self.feature_to_bbbox(self.loc_regressor[1], self.classifier[1], y)
        confidence_list.append(confidence)
        loc_list.append(loc)
        # print(y)

        # Todo: forward the 'y' to additional layers for extracting coarse features Done

        for i in range(0,3):
            y = module_util.forward_from(self.additional_feat_extractor, i,i+1, y)

            confidence, loc = self.feature_to_bbbox(self.loc_regressor[i+2], self.classifier[i+2], y)

            confidence_list.append(confidence)
            loc_list.append(loc)
        # print(y)
        confidences = torch.cat(confidence_list, 1)
        locations = torch.cat(loc_list, 1)
        print(confidences.shape,locations.shape)
        # [Debug] check the output
        assert confidences.dim() == 3  # should be (N, num_priors, num_classes)
        assert locations.dim() == 3   # should be (N, num_priors, 4)
        assert confidences.shape[1] == locations.shape[1]
        assert locations.shape[2] == 4

        if not self.training:
            # If in testing/evaluating mode, normalize the output with Softmax
            confidences = F.softmax(confidences, dim=2)
        print(confidences.shape)
        return confidences, locations
예제 #27
0
         model = Resnet_interpretable_gradcam(num_classes=num_classe)
     elif args.model_type == 'ex_gradcam2':
         model = VGG_interpretable_gradcam2(num_classes=num_classe)
     else:
         model = Resnet(num_classes=num_classe)
 elif args.model == 'mobilenet':
     if args.model_type == 'ex_atten':
         model = VGG_interpretable_atten(num_classes=num_classe)
     elif args.model_type == 'ex':
         model = VGG_interpretable(num_classes=num_classe)
     elif args.model_type == 'ex_gradcam':
         model = Mobile_interpretable_gradcam(num_classes=num_classe)
     elif args.model_type == 'ex_gradcam2':
         model = VGG_interpretable_gradcam2(num_classes=num_classe)
     else:
         model = MobileNet(num_classes=num_classe)
 elif args.model == 'alexnet':
     if args.model_type == 'ex_atten':
         model = VGG_interpretable_atten(num_classes=num_classe)
     elif args.model_type == 'ex':
         model = Alexnet_interpretable(num_classes=num_classe)
     elif args.model_type == 'ex_gradcam':
         model = Alexnet_interpretable_gradcam(num_classes=num_classe)
     else:
         model = Alexnet(num_classes=num_classe)
 use_gpu = torch.cuda.is_available()  # 判断是否有GPU加速
 if use_gpu:
     model = model.cuda()
 if model_half:
     model = model.half()
 if args.model_init:
예제 #28
0
    def __init__(self, num_classes = 4):
        super(SSD, self).__init__()
        self.num_classes = num_classes

        # Setup the backbone network (base_net)
        self.base_net = MobileNet(num_classes)

        # The feature map will extracted from layer[11] and layer[13] in (base_net)
        self.base_output_layer_indices = (11, 13)

        # Define the Additional feature extractor
        self.additional_feat_extractor = nn.ModuleList([
            # Conv8_2
            nn.Sequential(
                nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1),
                nn.ReLU()
            ),
            # Conv9_2
            nn.Sequential(
                nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1),
                nn.ReLU()
            ),
	    # Conv10_2
	    nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
                nn.ReLU()
            ),
	    # Conv11_2
	     nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
                nn.ReLU()
            ),
            # TODO: implement two more layers. Done
        ])

        # Bounding box offset regressor
        num_prior_bbox = 6                                                               # num of prior bounding boxes
        self.loc_regressor = nn.ModuleList([
            nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1)
            # TODO: implement remaining layers. Done
        ])

        # Bounding box classification confidence for each label
        self.classifier = nn.ModuleList([
            nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1),
            nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1)
        # TODO: implement remaining layers. Done
        ])


        # Todo: load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning Done

        pretrained_model = torch.load("./pretrained/mobienetv2.pth")


        # new = list(pretrained_model.items())
        my_model= self.base_net.state_dict()

        # 1. filter out unnecessary keys
        #print(my_model)
        #print(pretrained_model.items())

        pretrained_model = {k: v for k, v in pretrained_model.items() if k in my_model}
        # 2. overwrite entries in the existing state dict
        my_model.update(pretrained_model)
        # print(my_model)
        # 3. load the new state dict
        self.base_net.load_state_dict(my_model)

        # print(self.base_net)
        # print(self.additional_feat_extractor)

        # print(my_model_kvpair,pretrained_model)
        # count = 0
        # for key, value in my_model_kvpair.items():
        #     layer_name, weights = new[count]
        #     my_model_kvpair[key] = weights
        #     count += 1

        #self.base_net.load_state_dict(pretrained_model)


        def init_with_xavier(m):
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform_(m.weight)
        self.loc_regressor.apply(init_with_xavier)
        self.classifier.apply(init_with_xavier)
        self.additional_feat_extractor.apply(init_with_xavier)
예제 #29
0
    def __init__(self, num_classes):
        super(SSD, self).__init__()
        self.num_classes = num_classes

        # Setup the backbone network (base_net)
        self.base_net = MobileNet(num_classes)

        # The feature map will extracted from layer[11] and layer[13] in (base_net)
        self.base_output_layer_indices = (11, 13)

        # Define the Additional feature extractor
        self.additional_feat_extractor = nn.ModuleList([
            # Conv8_2 : 256 x 5 x 5
            nn.Sequential(
                nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=256,
                          out_channels=512,
                          kernel_size=3,
                          stride=2,
                          padding=1), nn.ReLU()),
            # Conv9_2 : 256 x 3 x 3
            nn.Sequential(
                nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128,
                          out_channels=256,
                          kernel_size=3,
                          stride=2,
                          padding=1), nn.ReLU()),
            # TODO: implement two more layers.
            # Conv10_2: 256 x 2 x 2
            nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128,
                          out_channels=256,
                          kernel_size=3,
                          stride=2,
                          padding=1), nn.ReLU()),
            # Conv11_2: 256 x 1 x 1
            nn.Sequential(
                nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=128,
                          out_channels=256,
                          kernel_size=3,
                          stride=2,
                          padding=1), nn.ReLU())
        ])

        # Bounding box offset regressor
        num_prior_bbox = 6  # num of prior bounding boxes
        self.loc_regressor = nn.ModuleList([
            nn.Conv2d(in_channels=512,
                      out_channels=(num_prior_bbox * 4),
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=1024,
                      out_channels=(num_prior_bbox * 4),
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=512,
                      out_channels=(num_prior_bbox * 4),
                      kernel_size=3,
                      padding=1),
            # TODO: implement remaining layers.
            nn.Conv2d(in_channels=256,
                      out_channels=(num_prior_bbox * 4),
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=(num_prior_bbox * 4),
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=(num_prior_bbox * 4),
                      kernel_size=3,
                      padding=1),
        ])

        # Bounding box classification confidence for each label
        self.classifier = nn.ModuleList([
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=1024,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=512,
                      out_channels=num_prior_bbox * num_classes,
                      kernel_size=3,
                      padding=1),
            # TODO: implement remaining layers.
            nn.Conv2d(in_channels=256,
                      out_channels=(num_prior_bbox * num_classes),
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=(num_prior_bbox * num_classes),
                      kernel_size=3,
                      padding=1),
            nn.Conv2d(in_channels=256,
                      out_channels=(num_prior_bbox * num_classes),
                      kernel_size=3,
                      padding=1),
        ])

        # Todo: load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning
        def init_pretrained_weights(net_dict, pretrained_dict):
            ext_keys = []
            new_keys = []
            del_keys = []
            for key in pretrained_dict.keys():
                # change key names
                if key.find('base_net') > -1:
                    ext_keys.append(key)
                    new_keys.append('conv_layers' + key[len('base_net'):])
                # discard parameters not in mobilenet
                if key not in net_dict.keys():
                    del_keys.append(key)
            #copy value from ext_keys to new_keys
            for idx in range(len(ext_keys)):
                pretrained_dict[new_keys[idx]] = pretrained_dict[ext_keys[idx]]
            #delete unmatched keys
            for key in del_keys:
                pretrained_dict.pop(key)

            # add undefined name (FC is not used in our model, just initialize with default)
            for key in net_dict.keys():
                if key not in pretrained_dict.keys():
                    pretrained_dict[key] = net_dict[key]

            return pretrained_dict

        model_dict = self.state_dict()
        pretrained_dict = torch.load('./pretrained/mobienetv2.pth')
        pretrained_weights = init_pretrained_weights(model_dict,
                                                     pretrained_dict)
        model_dict.update(pretrained_weights)

        def init_with_xavier(m):
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform_(m.weight)

        self.loc_regressor.apply(init_with_xavier)
        self.classifier.apply(init_with_xavier)
        self.additional_feat_extractor.apply(init_with_xavier)
예제 #30
0
        if args.depth == 18:
            model = ResNet18()
        elif args.depth == 50:
            model = ResNet50()
        else:
            sys.exit("resnet doesn't implement those depth!")
    elif args.arch == "convnet":
        args.depth = 4
        model = ConvNet()
        print("convnet selected")
    elif args.arch == "lenet":
        args.depth = 5
        model = LeNet()
    elif args.arch == "mobilenet":
        args.depth = 13
        model = MobileNet()
    if args.multi_gpu:
        model = torch.nn.DataParallel(model)
    model.cuda()

#############

criterion = CrossEntropyLossMaybeSmooth(smooth_eps=args.smooth_eps).cuda()
# args.smooth = args.smooth_eps > 0.0
# args.mixup = config.alpha > 0.0

optimizer_init_lr = args.warmup_lr if args.warmup else args.lr

optimizer = None
if (args.optmzr == 'sgd'):
    optimizer = torch.optim.SGD(model.parameters(),