def main(_): tf.logging.set_verbosity(tf.logging.DEBUG) with tf.variable_scope('data'): train_images, train_labels, num_classes = dog_tensor( FLAGS.dogdir, FLAGS.batch_size, class_regex=FLAGS.dog_regex) net = MobileNet(num_classes, alpha=FLAGS.alpha) train_logits = net(train_images, is_training=True) tf.logging.info('built model on training data') param_stats = tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS ) with tf.variable_scope('training'): loss = tf.losses.sparse_softmax_cross_entropy(labels=train_labels, logits=train_logits) loss = tf.reduce_mean(loss) tf.summary.scalar('train/xent', loss) global_step = tf.train.get_or_create_global_step() opt = tf.train.AdamOptimizer(FLAGS.learning_rate) train_step = opt.minimize(loss, global_step=global_step) # make sure we update running averages update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: train_step = tf.group(train_step, *update_ops) # valid/test sv = tf.train.Supervisor(logdir=FLAGS.logdir, global_step=global_step, save_summaries_secs=15) with sv.managed_session() as sess, sv.stop_on_exception(): tf.logging.debug('ready to run things') # sess = tfdbg.LocalCLIDebugWrapperSession(sess) step = sess.run(global_step) while step < FLAGS.max_steps: step, train_loss, _ = sess.run([global_step, loss, train_step]) tf.logging.info('(%d) train loss: %f', step, train_loss)
def train(): tr_config = { 'flag': True, 'rg': 25, # 7, 5 'wrg': 0.25, # 1, 3 'hrg': 0.25, # 1, 3 'zoom': 0.25 # 1, 1 } callbacks = get_callbacks('mynet_v4_bias', patience=30) paths, y = search_file('set1/segmented_set1') paths, y = search_file('set2/segmented_set2', paths=paths, y=y) ds = DataSet(nframe=30, fstride=6, name='UT interaction', size=[224, 224, 3], filepaths=paths, y=y, kernel_size=4) ds.make_set(op='msqr', name='train') ds.make_set(op='msqr', name='valid') #opt = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, decay=0.1) #opt = SGD(lr=2*1e-1, momentum=0.9, nesterov=True, decay=0.2) opt = RMSprop(lr=0.001, rho=0.9, decay=0.01) model = MobileNet(alpha=1.0, shape=[29, 56, 56, 1], nframe=29) model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) model.summary() #model.load_weights('mynet_v4.h5') model.fit_generator(generator=ds.train_gen(batch_size=5, aug_config=tr_config), steps_per_epoch=100, epochs=300, validation_data=ds.valid_gen(), verbose=1, validation_steps=ds.getVlen, callbacks=callbacks)
class KeypointModel(BasicModule): def __init__(self, opt): super(KeypointModel, self).__init__(opt) self.pretrained = MobileNet() self.trf = nn.Sequential(nn.Conv2d(256, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(True), nn.Conv2d(256, 128, 3, 1, 1), nn.BatchNorm2d(128), nn.ReLU(True)) # self.ReturnType = namedtuple('ReturnType',['out1','out2','out3','out4','out5','out6']) stages = [Stage(128)] + [Stage(169) for _ in range(2, 7)] self.stages = nn.ModuleList(stages) def forward(self, img): img = self.pretrained(img) #if self.optimizer.param_groups[0]['lr'] == 0: # img = img.detach() features = self.trf(img) output = self.stages[0](features) outputs = [output] for ii in range(1, 6): stage = self.stages[ii] input = t.cat([features, output], dim=1) output = stage(input) outputs.append(output) return outputs def get_optimizer(self, lr1, lr2): param_groups = [{ 'params': self.pretrained.parameters(), 'lr': lr1 }, { 'params': self.stages.parameters(), 'lr': lr2 }, { 'params': self.trf.parameters(), 'lr': lr2 }] self.optimizer = t.optim.Adam(param_groups) return self.optimizer
def load_model (args): if args.model == 'inception': model = InceptionV3(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='tf' elif args.model == 'xception': model = Xception(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='tf' elif args.model == 'inceptionresnet': model = InceptionResNetV2(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='tf' elif args.model == 'mobilenet': model = MobileNet(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='tf' elif args.model == 'mobilenet2': model = MobileNetV2(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='tf' elif args.model == 'nasnet': model = NASNetLarge(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='tf' elif args.model == 'resnet': model = ResNet50(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='caffe' elif args.model == 'vgg16': model = VGG16(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='caffe' elif args.model == 'vgg19': model = VGG19(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='caffe' else: print ("Model not found") return 0 return model,preprocess_mode
def run_training(config, n_classes, train_loader, valid_loader, width=1, mb_version=1): """ Whole training procedure with fine-tune after regular training """ # defining model if width > 1: model = tvm.resnet18(num_classes=n_classes) else: if mb_version == 1: model = MobileNet(n_classes=n_classes, width_mult=width) else: model = MobileNetV2(n_classes=n_classes, width_mult=width) model = model.to(config['device']) # print out number of parameters num_params = 0 for p in model.parameters(): num_params += np.prod(p.size()) print(f"width={width}, num_params {num_params}") # defining loss criterion, optimizer and learning rate scheduler criterion = t.nn.CrossEntropyLoss() opt = t.optim.Adam(model.parameters(), config['lr']) sched = t.optim.lr_scheduler.MultiStepLR(opt, [3, 6]) # training process with Adam tr_loss, tr_accuracy, valid_loss, valid_accuracy = train( config, model, train_loader, valid_loader, criterion, opt, sched) # training process with SGDR opt = t.optim.SGD(model.parameters(), config['lr'] / 10, momentum=0.9) sched = SGDR(opt, 3, 1.2) tr_loss_finetune, tr_accuracy_finetune, valid_loss_finetune, valid_accuracy_finetune = train( config, model, train_loader, valid_loader, criterion, opt, sched) return [ tr_loss + tr_loss_finetune, tr_accuracy + tr_accuracy_finetune, valid_loss + valid_loss_finetune, valid_accuracy + valid_accuracy_finetune ]
def __init__(self, num_classes): super(SSD, self).__init__() self.num_classes = num_classes # Setup the backbone network (base_net) self.base_net = MobileNet(num_classes) # The feature map will extracted from layer[11] and layer[13] in (base_net) self.base_output_layer_indices = (11, 13) # Define the Additional feature extractor self.additional_feat_extractor = nn.ModuleList([ # Conv8_2 nn.Sequential( nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1), nn.ReLU()), # Conv9_2 nn.Sequential( nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU()), nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU()), nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU()) ]) # Bounding box offset regressor num_prior_bbox = 6 # num of prior bounding boxes self.loc_regressor = nn.ModuleList([ nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1) ]) # Bounding box classification confidence for each label self.classifier = nn.ModuleList([ nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1) ]) # Todo: load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning temp_state = torch.load('pretrained/mobienetv2.pth') #self.base_net.load_state_dict(cur_state) cur_dict = self.base_net.state_dict() input_state = { k: v for k, v in temp_state.items() if k in cur_dict and v.size() == cur_dict[k].size() } cur_dict.update(input_state) self.base_net.load_state_dict(cur_dict) def init_with_xavier(m): if isinstance(m, nn.Conv2d): nn.init.xavier_uniform_(m.weight) self.loc_regressor.apply(init_with_xavier) self.classifier.apply(init_with_xavier) self.additional_feat_extractor.apply(init_with_xavier)
from mobilenet import MobileNet # from my_generator2 import My_Generator # net = applications.mobilenet_v2.MobileNetV2(include_top=False, pooling='avg', weights='imagenet', # input_shape = (223,223,3)) # net = applications.nasnet.NASNetMobile(input_shape=(223, 223, 3), include_top=False, weights='imagenet', # pooling='avg') # print(len(net.layers)) # model = Sequential() # model.add(net) # model.add(Dense(2, activation='softmax')) # for layer in net.layers[:-45]: # layer.trainable = False model = MobileNet((64, 64, 3), 200) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() # exit(0) print("Compile model done!") earlyStopping = EarlyStopping(monitor='val_acc', patience=20, verbose=1) filepath = "models/imagenet_clf_model_test.h5" mcp_save = ModelCheckpoint(filepath, save_best_only=True, monitor='val_acc') reduce_lr = ReduceLROnPlateau('val_acc', factor=0.5, patience=4, verbose=1) train_data_dir = 'data/train' validation_data_dir = 'data/validation'
import keras from keras.models import load_model from keras.preprocessing import image from keras.preprocessing.image import load_img from mobilenet import MobileNet from keras.applications.imagenet_utils import decode_predictions img_file = 'demo.png' img = load_img(img_file, target_size=(32, 32)) image = image.img_to_array(img) image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) model = MobileNet() model.load_weights('mobileV1-lite.h5') # model.summary() result = model.predict(image) print(result)
class SSD(nn.Module): def __init__(self, num_classes): super(SSD, self).__init__() self.num_classes = num_classes # Setup the backbone network (base_net) self.base_net = MobileNet(num_classes) # The feature map will extracted from layer[11] and layer[13] in (base_net) self.base_output_layer_indices = (11, 13) # Define the Additional feature extractor self.additional_feat_extractor = nn.ModuleList([ # Conv8_2 nn.Sequential( nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1), nn.ReLU()), # Conv9_2 nn.Sequential( nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU()), nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU()), nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU()) ]) # Bounding box offset regressor num_prior_bbox = 6 # num of prior bounding boxes self.loc_regressor = nn.ModuleList([ nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1) ]) # Bounding box classification confidence for each label self.classifier = nn.ModuleList([ nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1) ]) # Todo: load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning temp_state = torch.load('pretrained/mobienetv2.pth') #self.base_net.load_state_dict(cur_state) cur_dict = self.base_net.state_dict() input_state = { k: v for k, v in temp_state.items() if k in cur_dict and v.size() == cur_dict[k].size() } cur_dict.update(input_state) self.base_net.load_state_dict(cur_dict) def init_with_xavier(m): if isinstance(m, nn.Conv2d): nn.init.xavier_uniform_(m.weight) self.loc_regressor.apply(init_with_xavier) self.classifier.apply(init_with_xavier) self.additional_feat_extractor.apply(init_with_xavier) def feature_to_bbbox(self, loc_regress_layer, confidence_layer, input_feature): """ Compute the bounding box class scores and the bounding box offset :param loc_regress_layer: offset regressor layer to run forward :param confidence_layer: confidence layer to run forward :param input_feature: feature map to be feed in :return: confidence and location, with dim:(N, num_priors, num_classes) and dim:(N, num_priors, 4) respectively. """ conf = confidence_layer(input_feature) loc = loc_regress_layer(input_feature) # Confidence post-processing: # 1: (N, num_prior_bbox * n_classes, H, W) to (N, H*W*num_prior_bbox, n_classes) = (N, num_priors, num_classes) # where H*W*num_prior_bbox = num_priors conf = conf.permute(0, 2, 3, 1).contiguous() num_batch = conf.shape[0] c_channels = int(conf.shape[1] * conf.shape[2] * conf.shape[3] / self.num_classes) #print('conf shape',conf.shape) conf = conf.view(num_batch, c_channels, self.num_classes) # Bounding Box loc and size post-processing # 1: (N, num_prior_bbox*4, H, W) to (N, num_priors, 4) loc = loc.permute(0, 2, 3, 1).contiguous() #print('loc shape',loc.shape) l_channels = int(loc.shape[1] * loc.shape[2] * loc.shape[3] / 4) #print('l chanel', l_channels) loc = loc.view(num_batch, l_channels, 4) return conf, loc def forward(self, input): confidence_list = [] loc_list = [] # Run the backbone network from [0 to 11, and fetch the bbox class confidence # as well as position and size y = module_util.forward_from(self.base_net.base_net, 0, self.base_output_layer_indices[0] + 1, input) #print('y',y.shape) confidence, loc = self.feature_to_bbbox(self.loc_regressor[0], self.classifier[0], y) confidence_list.append(confidence) loc_list.append(loc) #print('cof, loc size', confidence.shape, loc.shape) # Todo: implement run the backbone network from [11 to 13] and compute the corresponding bbox loc and confidence y = module_util.forward_from(self.base_net.base_net, self.base_output_layer_indices[0], self.base_output_layer_indices[1] + 1, y) #print('y', y.shape) confidence, loc = self.feature_to_bbbox(self.loc_regressor[1], self.classifier[1], y) confidence_list.append(confidence) loc_list.append(loc) #print('cof, loc size', confidence.shape, loc.shape) #conv to 12 #y = module_util.forward_from(self.base_net.base_net, self.base_output_layer_indices[1], self.base_output_layer_indices[2]+1, y) # Todo: forward the 'y' to additional layers for extracting coarse features for idx in range(0, len(self.additional_feat_extractor)): #print('current idx', idx) #print('y', y.shape) y = module_util.forward_from(self.additional_feat_extractor[idx], 0, 4, y) confidence, loc = self.feature_to_bbbox( self.loc_regressor[idx + 2], self.classifier[idx + 2], y) confidence_list.append(confidence) loc_list.append(loc) #print('cof, loc size', confidence.shape, loc.shape) confidences = torch.cat(confidence_list, 1) locations = torch.cat(loc_list, 1) #print('cof, loc size after cat', np.asarray(confidences).shape, np.asarray(locations).shape) # [Debug] check the output assert confidences.dim() == 3 # should be (N, num_priors, num_classes) assert locations.dim() == 3 # should be (N, num_priors, 4) assert confidences.shape[1] == locations.shape[1] assert locations.shape[2] == 4 if not self.training: # If in testing/evaluating mode, normalize the output with Softmax confidences = F.softmax(confidences, dim=2) return confidences, locations
def _model_fn(num_bits, features, labels, mode, params): is_training = (mode == tf.estimator.ModeKeys.TRAIN) # weight reguralization regularizer = tf.contrib.layers.l2_regularizer(scale=config.weight_decay) # create model num_classes = 10 model = MobileNet(num_classes, is_training, num_bits, width_multiplier=config.width_multiplier, quant_mode=config.quant_method, conv2d_regularizer=regularizer) # forward pass logits = model.forward_pass(features) predict_class = tf.argmax(input=logits, axis=1) #predict_class = tf.Print(predict_class, [predict_class]) predictions = { 'classes': predict_class, 'probabilities': tf.nn.softmax(logits) } # calculate accuracy accuracy = tf.metrics.accuracy(labels, predictions['classes']) metrics = {'accuracy': accuracy} # loss function loss = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels) # reguralization loss reg_variables = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) reg_term = tf.contrib.layers.apply_regularization(regularizer, reg_variables) loss += reg_term if mode == tf.estimator.ModeKeys.TRAIN: # add fake_quant to 'normal' graph if config.quant_method == 'tensorflow': print("TF quantize create training graph") g = tf.get_default_graph() tf.contrib.quantize.create_training_graph(input_graph=g, quant_delay=0) # learning rate decay global_step = tf.train.get_global_step() steps_per_epoch = num_training_per_epoch / config.train_batch_size decay_steps = steps_per_epoch * config.decay_per_epoch decay_rate = config.decay_rate learning_rate = tf.train.exponential_decay(config.learning_rate, global_step, decay_steps, decay_rate) learning_rate = tf.maximum(learning_rate, config.learning_rate * 0.01) # optimize loss optimizer = tf.train.AdamOptimizer(learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize( loss=loss, global_step=tf.train.get_global_step()) # logging tf.summary.scalar("accuracy", accuracy[1]) tf.summary.scalar("learning_rate", learning_rate) # printing tensors_to_log = { 'learning_rate': learning_rate, 'loss': loss, 'accuracy': accuracy[1] } train_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=1000) return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op, training_hooks=[train_hook], eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.EVAL: if config.quant_method == 'tensorflow': g = tf.get_default_graph() tf.contrib.quantize.create_eval_graph(input_graph=g) tf.summary.scalar("accuracy", accuracy[1]) eval_tensors_to_log = {'eval_loss': loss, 'eval_accuracy': accuracy[1]} evaluation_hook = tf.train.LoggingTensorHook( tensors=eval_tensors_to_log, every_n_iter=1000) return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, evaluation_hooks=[evaluation_hook], eval_metric_ops=metrics)
import matplotlib as mlp import matplotlib.patches np.set_printoptions(precision=2) os.chdir("./handwritten_digit_recognition/") # from wide_resnet_28_10 import WideResNet28_10 from mobilenet import MobileNet from utils import load_mnist os.chdir("../") PATH = './handwritten_digit_recognition/models/' #model_name = "WideResNet28_10" #model=WideResNet28_10() model_name = "MobileNet" model = MobileNet() model.compile() model2 = MobileNet() model2.compile() print('Loading pretrained weights for ', model_name, '...', sep='') model.load_weights(PATH + model_name + "_ajustado_1" + '.h5') model2.load_weights(PATH + model_name + '.h5') def mesaImagen(mesa, boleta=1): fname = 'actas/{0:06d}'.format(mesa * 10 + boleta) + '.jpg' data_name = "mesas_rv/" + '{}'.format(mesa) + '.json' path = "./" out_path = './results/'
class SSD(nn.Module): def __init__(self, num_classes): super(SSD, self).__init__() self.num_classes = num_classes # Setup the backbone network (base_net) self.base_net = MobileNet(num_classes) # The feature map will extracted from layer[11] and layer[13] in (base_net) self.base_output_layer_indices = (11, 13) # Define the Additional feature extractor self.additional_feat_extractor = nn.ModuleList([ # Conv8_2 nn.Sequential( nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1), nn.ReLU()), # Conv9_2 nn.Sequential( nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU()), # Conv10_2 nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1), nn.ReLU(), ), # Conv11_2 nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1), nn.ReLU(), ), ]) # Bounding box offset regressor num_prior_bbox = 6 # num of prior bounding boxes self.loc_regressor = nn.ModuleList([ nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), #Cov5_3 nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), #FC7 nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), #Conv8_2 # TODO: implement remaining layers. nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), #Conv9_2 nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), #Conv10_2 nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), #Conv11_2 ]) # Bounding box classification confidence for each label self.classifier = nn.ModuleList([ nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), ]) # Load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning basenet_state = torch.load('pretrained/mobienetv2.pth', map_location='cpu') base_net_1 = { key: value for key, value in basenet_state.items() if 'base_net' in key } self.base_net.load_state_dict(base_net_1) layer_idx = 0 def init_with_xavier(m): if isinstance(m, nn.Conv2d): nn.init.xavier_uniform_(m.weight) self.loc_regressor.apply(init_with_xavier) self.classifier.apply(init_with_xavier) self.additional_feat_extractor.apply(init_with_xavier) def feature_to_bbbox(self, loc_regress_layer, confidence_layer, input_feature): conf = confidence_layer(input_feature) loc = loc_regress_layer(input_feature) conf = conf.permute(0, 2, 3, 1).contiguous() num_batch = conf.shape[0] c_channels = int(conf.shape[1] * conf.shape[2] * conf.shape[3] / self.num_classes) conf = conf.view(num_batch, c_channels, self.num_classes) loc = loc.permute(0, 2, 3, 1).contiguous() loc = loc.view(num_batch, c_channels, 4) return conf, loc def forward(self, input): confidence_list = [] loc_list = [] y = module_util.forward_from(self.base_net.base_net, 0, self.base_output_layer_indices[0] + 1, input) #11 , 13 confidence, loc = self.feature_to_bbbox(self.loc_regressor[0], self.classifier[0], y) confidence_list.append(confidence) loc_list.append(loc) y = module_util.forward_from(self.base_net.base_net, self.base_output_layer_indices[0] + 1, self.base_output_layer_indices[1] + 1, y) confidence, loc = self.feature_to_bbbox(self.loc_regressor[1], self.classifier[1], y) confidence_list.append(confidence) loc_list.append(loc) for i in range(len(self.additional_feat_extractor)): y = module_util.forward_from(self.additional_feat_extractor, i, i + 1, y) confidence, loc = self.feature_to_bbbox(self.loc_regressor[i + 2], self.classifier[i + 2], y) confidence_list.append(confidence) loc_list.append(loc) confidences = torch.cat(confidence_list, 1) locations = torch.cat(loc_list, 1) # [Debug] check the output assert confidence.dim() == 3 # should be (N, num_priors, num_classes) assert locations.dim() == 3 # should be (N, num_priors, 4) assert confidences.shape[1] == locations.shape[1] assert locations.shape[2] == 4 if not self.training: confidences = F.softmax(confidences, dim=2) return confidences, locations
def test_compression(self): """ Model: mobilenet_v1 data: mnist step1: Training one epoch step2: pruning flops step3: fine-tune one epoch step4: check top1_acc. """ if not fluid.core.is_compiled_with_cuda(): return class_dim = 10 image_shape = [1, 28, 28] image = fluid.layers.data( name='image', shape=image_shape, dtype='float32') image.stop_gradient = False label = fluid.layers.data(name='label', shape=[1], dtype='int64') out = MobileNet().net(input=image, class_dim=class_dim) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=False) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) optimizer = fluid.optimizer.Momentum( momentum=0.9, learning_rate=0.01, regularization=fluid.regularizer.L2Decay(4e-5)) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128) val_feed_list = [('img', image.name), ('label', label.name)] val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5', acc_top5.name)] train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=128) train_feed_list = [('img', image.name), ('label', label.name)] train_fetch_list = [('loss', avg_cost.name)] com_pass = Compressor( place, fluid.global_scope(), fluid.default_main_program(), train_reader=train_reader, train_feed_list=train_feed_list, train_fetch_list=train_fetch_list, eval_program=val_program, eval_reader=val_reader, eval_feed_list=val_feed_list, eval_fetch_list=val_fetch_list, train_optimizer=optimizer) com_pass.config('./filter_pruning/compress.yaml') eval_graph = com_pass.run() self.assertTrue( abs((com_pass.context.eval_results['acc_top1'][-1] - 0.969) / 0.969) < 0.02)
# Get the data. (x_train, y_train), (x_test, y_test) = cifar10.load_data() # Convert class vectors to binary class matrices. y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) # Preprocess the images. x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 # Get the model and compile it. img_input = keras.layers.Input(shape=(32, 32, 3)) model = MobileNet(input_tensor=img_input, classes=num_classes) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print("Training model.") model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True, verbose=1)
# 神兽保佑 # BUG是不可能有BUG的! import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' from mobilenet import MobileNet import numpy as np np.random.seed(10) from keras.datasets import cifar10 from keras.utils import np_utils (x_img_train, y_label_train),(x_img_test, y_label_test) = cifar10.load_data() x_img_train = x_img_train.astype('float')/255.0 x_img_test = x_img_test.astype('float')/255.0 y_label_train = np_utils.to_categorical(y_label_train) y_label_test = np_utils.to_categorical(y_label_test) model = MobileNet() try: model.load_weights("mobileV1-lite.h5") print("模型加载成功!继续训练") except: print("模型加载失败!从头开始训练") model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy']) train_history = model.fit(x_img_train, y_label_train, validation_split=0.2, epochs=10, batch_size=128, verbose=2) model.save_weights("mobileV1-lite.h5") print("保存模型成功!")
from utils import load_mnist from keras.utils import np_utils from vgg16 import VGG16 from resnet164 import ResNet164 from mobilenet import MobileNet from wide_resnet_28_10 import WideResNet28_10 from super_learner import SuperLearner # from super_learner_extension import SuperLearnerExtension import argparse import numpy as np import os PATH = './models/' models = [VGG16(), ResNet164(), WideResNet28_10(), MobileNet()] def get_argument_parser(): ''' Argument parser which returns the options which the user inputted. Args: None Returns: argparse.ArgumentParser().parse_args() ''' parser = argparse.ArgumentParser() parser.add_argument('--dataset', help = 'training set: 0, validation set: 1, test set: 2', type = int, default = 1) args = parser.parse_args()
def demo(data, save, efficient=True, valid_size=5000, n_epochs=30, batch_size=64, seed=None): """ A demo to show off training of efficient DenseNets. Trains and evaluates a DenseNet-BC on CIFAR-10. Args: data (str) - path to directory where data should be loaded from/downloaded (default $DATA_DIR) save (str) - path to save the model to (default /tmp) depth (int) - depth of the network (number of convolution layers) (default 40) growth_rate (int) - number of features added per DenseNet layer (default 12) efficient (bool) - use the memory efficient implementation? (default True) valid_size (int) - size of validation set n_epochs (int) - number of epochs for training (default 300) batch_size (int) - size of minibatch (default 256) seed (int) - manually set the random seed (default None) """ # Data transforms mean = [0.5071, 0.4867, 0.4408] stdv = [0.2675, 0.2565, 0.2761] train_transforms = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=mean, std=stdv), ]) test_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=mean, std=stdv), ]) # Datasets train_set = datasets.CIFAR10(data, train=True, transform=train_transforms, download=True) test_set = datasets.CIFAR10(data, train=False, transform=test_transforms, download=False) # Models model = MobileNet() print(model) # Make save directory if not os.path.exists(save): os.makedirs(save) if not os.path.isdir(save): raise Exception('%s is not a dir' % save) # Train the model train(model=model, train_set=train_set, test_set=test_set, save=save, valid_size=valid_size, n_epochs=n_epochs, batch_size=batch_size, seed=seed) print('Done!')
## Flask application to act as a REST service for android app, dummy frontend from flask import Flask, render_template, request, redirect, Response from base64 import b64decode from hashlib import md5 from mobilenet import MobileNet from os import remove as remove_file app = Flask(__name__) mb = MobileNet() TEMP_FILENAME = "infer_input.jpg" # dummy frontend to make sure server is up @app.route("/") def main(): return render_template("index.html") # inference POST route @app.route("/infer", methods=["POST"]) def infer_image(): # find the contents and save the image from them for value in request.values: f = open(TEMP_FILENAME, "wb") # first replace all special characters then decode the base64 encoding base64encoded = value.replace('@', '=').replace('*', '+') base64decoded = b64decode(base64encoded) # write the image to a temporary file
def main(): global opt, start_epoch, best_prec1 opt = cfg opt.gpuids = list(map(int, opt.gpuids)) if opt.cuda and not torch.cuda.is_available(): raise Exception("No GPU found, please run without --cuda") model = MobileNet() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay, nesterov=True) start_epoch = 0 ckpt_file = join("model", opt.ckpt) if opt.cuda: torch.cuda.set_device(opt.gpuids[0]) with torch.cuda.device(opt.gpuids[0]): model = model.cuda() criterion = criterion.cuda() model = nn.DataParallel(model, device_ids=opt.gpuids, output_device=opt.gpuids[0]) cudnn.benchmark = True # for resuming training if opt.resume: if isfile(ckpt_file): print("==> Loading Checkpoint '{}'".format(opt.ckpt)) if opt.cuda: checkpoint = torch.load(ckpt_file, map_location=lambda storage, loc: storage.cuda(opt.gpuids[0])) try: model.module.load_state_dict(checkpoint['model']) except: model.load_state_dict(checkpoint['model']) else: checkpoint = torch.load( ckpt_file, map_location=lambda storage, loc: storage) try: model.load_state_dict(checkpoint['model']) except: # create new OrderedDict that does not contain `module.` new_state_dict = OrderedDict() for k, v in checkpoint['model'].items(): if k[:7] == 'module.': name = k[7:] # remove `module.` else: name = k[:] new_state_dict[name] = v model.load_state_dict(new_state_dict) start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) print("==> Loaded Checkpoint '{}' (epoch {})".format( opt.ckpt, start_epoch)) else: print("==> no checkpoint found at '{}'".format(opt.ckpt)) return # Download & Load Dataset print('==> Preparing data..') transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_val = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) train_loader = torch.utils.data.DataLoader(trainset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.workers) valset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_val) val_loader = torch.utils.data.DataLoader(valset, batch_size=opt.test_batch_size, shuffle=False, num_workers=opt.workers) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # for evaluation if opt.eval: if isfile(ckpt_file): print("==> Loading Checkpoint '{}'".format(opt.ckpt)) if opt.cuda: checkpoint = torch.load(ckpt_file, map_location=lambda storage, loc: storage.cuda(opt.gpuids[0])) try: model.module.load_state_dict(checkpoint['model']) except: model.load_state_dict(checkpoint['model']) else: checkpoint = torch.load( ckpt_file, map_location=lambda storage, loc: storage) try: model.load_state_dict(checkpoint['model']) except: # create new OrderedDict that does not contain `module.` new_state_dict = OrderedDict() for k, v in checkpoint['model'].items(): if k[:7] == 'module.': name = k[7:] # remove `module.` else: name = k[:] new_state_dict[name] = v model.load_state_dict(new_state_dict) start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) print("==> Loaded Checkpoint '{}' (epoch {})".format( opt.ckpt, start_epoch)) # evaluate on validation set print("\n===> [ Evaluation ]") start_time = time.time() prec1 = validate(val_loader, model, criterion) elapsed_time = time.time() - start_time print("====> {:.2f} seconds to evaluate this model\n".format( elapsed_time)) return else: print("==> no checkpoint found at '{}'".format(opt.ckpt)) return # train... train_time = 0.0 validate_time = 0.0 for epoch in range(start_epoch, opt.epochs): adjust_learning_rate(optimizer, epoch) print('\n==> Epoch: {}, lr = {}'.format( epoch, optimizer.param_groups[0]["lr"])) # train for one epoch print("===> [ Training ]") start_time = time.time() train(train_loader, model, criterion, optimizer, epoch) elapsed_time = time.time() - start_time train_time += elapsed_time print( "====> {:.2f} seconds to train this epoch\n".format(elapsed_time)) # evaluate on validation set print("===> [ Validation ]") start_time = time.time() prec1 = validate(val_loader, model, criterion) elapsed_time = time.time() - start_time validate_time += elapsed_time print("====> {:.2f} seconds to validate this epoch\n".format( elapsed_time)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) state = { 'epoch': epoch + 1, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } save_model(state, epoch, is_best) avg_train_time = train_time / opt.epochs avg_valid_time = validate_time / opt.epochs total_train_time = train_time + validate_time print("====> average training time per epoch: {}m {:.2f}s".format( int(avg_train_time // 60), avg_train_time % 60)) print("====> average validation time per epoch: {}m {:.2f}s".format( int(avg_valid_time // 60), avg_valid_time % 60)) print("====> training time: {}m {:.2f}s".format(int(train_time // 60), train_time % 60)) print("====> validation time: {}m {:.2f}s".format(int(validate_time // 60), validate_time % 60)) print("====> total training time: {}m {:.2f}s".format( int(total_train_time // 60), total_train_time % 60))
def main(): random.seed(SEED) np.random.seed(SEED) if os.path.exists(DICO_PKL): with open(DICO_PKL, 'rb') as f: word_to_id, id_to_word = pickle.load(f) else: word_to_id, id_to_word = create_dico(DICO) with open(DICO_PKL, 'wb') as f: pickle.dump([word_to_id, id_to_word], f) gen_data_loader = Gen_Data_loader(BATCH_SIZE, word_to_id) dis_data_loader = Dis_Data_loader(BATCH_SIZE, word_to_id) vocab_size = len(word_to_id) assert START_TOKEN == word_to_id['sos'] generator = Generator(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) discriminator = BLEUCNN(SEQ_LENGTH, 2, EMB_DIM, generator) mobilenet = MobileNet(BATCH_SIZE) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) mobilenet.load_pretrained_weights(sess) sess.run(tf.global_variables_initializer()) log = open('experiment-log.txt', 'w', encoding='utf-8') # pre-train generator and discriminator log.write('pre-training...\n') print('Start pre-training discriminator...') datas = create_data(DICO, word_to_id) gen_data_loader.create_batches(CORPUS, IMAGE) samples = [] for it in range(gen_data_loader.num_batch): inp_batch, image_batch = gen_data_loader.next_batch() feed_dict = {mobilenet.X: image_batch, mobilenet.is_training: False} hidden_batch = sess.run(mobilenet.y_output, feed_dict=feed_dict) samples.extend(generator.generate(sess, hidden_batch).tolist()) dis_data_loader.create_batches(random.sample(datas, 3000), samples) for _ in range(PRE_EPOCH_NUM): dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, labels = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.labels: labels, discriminator.dropout_keep_prob: 0.75 } _ = sess.run(discriminator.train_op, feed) print('Start pre-training generator...') for epoch in range(PRE_EPOCH_NUM): supervised_g_losses = [] gen_data_loader.reset_pointer() for it in range(gen_data_loader.num_batch): inp_batch, image_batch = gen_data_loader.next_batch() feed_dict = { mobilenet.X: image_batch, mobilenet.is_training: False } hidden_batch = sess.run(mobilenet.y_output, feed_dict=feed_dict) _, g_loss = generator.pretrain_step(sess, inp_batch, hidden_batch) supervised_g_losses.append(g_loss) loss = np.mean(supervised_g_losses) if epoch % 5 == 0: print('pre-train epoch ', epoch, 'train_loss ', loss) buffer = 'epoch:\t' + str(epoch) + '\ttrain_loss:\t' + str( loss) + '\n' log.write(buffer) rollout = ROLLOUT(generator, 0.8) print( '#########################################################################' ) print('Start REINFORCE Training...') log.write('REINFORCE training...\n') for total_batch in range(RL_EPOCH_NUM): gen_data_loader.reset_pointer() for it in range(gen_data_loader.num_batch): ra = random.randint(0, 1) inp_batch, image_batch = gen_data_loader.next_batch(shuffle=ra) feed_dict = { mobilenet.X: image_batch, mobilenet.is_training: False } hidden_batch = sess.run(mobilenet.y_output, feed_dict=feed_dict) samples = generator.generate(sess, hidden_batch) rewards = rollout.get_reward(sess, samples, hidden_batch, 16, discriminator) feed = { generator.x: inp_batch, generator.rewards: rewards, generator.hiddens: hidden_batch } _ = sess.run(generator.g_updates, feed_dict=feed) # Test if total_batch % 5 == 0 or total_batch == RL_EPOCH_NUM - 1: mean_rewards = [] gen_data_loader.reset_pointer() for it in range(gen_data_loader.num_batch): inp_batch, image_batch = gen_data_loader.next_batch() feed_dict = { mobilenet.X: image_batch, mobilenet.is_training: False } hidden_batch = sess.run(mobilenet.y_output, feed_dict=feed_dict) samples = generator.generate(sess, hidden_batch) rewards = rollout.get_reward(sess, samples, hidden_batch, 16, discriminator) mean_rewards.append(np.mean(rewards[:, -1])) reward = np.mean(mean_rewards) buffer = 'epoch:\t' + str(total_batch) + '\treward:\t' + str( reward) + '\n' print('total_batch: ', total_batch, 'reward: ', reward) log.write(buffer) generator.save_weight(sess) # Update roll-out parameters rollout.update_params() discriminator.update_embedding() # Train the discriminator samples = [] for it in range(gen_data_loader.num_batch): inp_batch, image_batch = gen_data_loader.next_batch() feed_dict = { mobilenet.X: image_batch, mobilenet.is_training: False } hidden_batch = sess.run(mobilenet.y_output, feed_dict=feed_dict) samples.extend(generator.generate(sess, hidden_batch).tolist()) dis_data_loader.create_batches(random.sample(datas, 3000), samples) dis_data_loader.reset_pointer() for it in range(dis_data_loader.num_batch): x_batch, labels = dis_data_loader.next_batch() feed = { discriminator.input_x: x_batch, discriminator.labels: labels, discriminator.dropout_keep_prob: 0.75 } _ = sess.run(discriminator.train_op, feed) # final test gen_data_loader.reset_pointer() _, image_batch = gen_data_loader.next_batch() feed_dict = {mobilenet.X: image_batch, mobilenet.is_training: False} hidden_batch = sess.run(mobilenet.y_output, feed_dict=feed_dict) samples = generator.generate(sess, hidden_batch) y = samples.tolist() sams = [] for k, sam in enumerate(y): sa = [id_to_word[i] for i in sam] sa = ''.join(sa) sams.append(sa) for sam in sams: log.write(sam + '\n') log.close()
def quan(self, config_file): if not fluid.core.is_compiled_with_cuda(): return class_dim = 10 image_shape = [1, 28, 28] train_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): image = fluid.layers.data( name='image', shape=image_shape, dtype='float32') image.stop_gradient = False label = fluid.layers.data( name='label', shape=[1], dtype='int64') out = MobileNet(name='quan').net(input=image, class_dim=class_dim) print("out: {}".format(out.name)) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) val_program = train_program.clone(for_test=False) optimizer = fluid.optimizer.Momentum( momentum=0.9, learning_rate=0.01, regularization=fluid.regularizer.L2Decay(4e-5)) scope = fluid.Scope() place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_program, scope=scope) val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128) val_feed_list = [('img', image.name), ('label', label.name)] val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5', acc_top5.name)] train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=128) train_feed_list = [('img', image.name), ('label', label.name)] train_fetch_list = [('loss', avg_cost.name)] com_pass = Compressor( place, scope, train_program, train_reader=train_reader, train_feed_list=train_feed_list, train_fetch_list=train_fetch_list, eval_program=val_program, eval_reader=val_reader, eval_feed_list=val_feed_list, eval_fetch_list=val_fetch_list, train_optimizer=optimizer) com_pass.config(config_file) eval_graph = com_pass.run()
class SSD(nn.Module): def __init__(self, num_classes): super(SSD, self).__init__() self.num_classes = num_classes # Setup the backbone network (base_net) self.base_net = MobileNet(num_classes) # The feature map will extracted from layer[11] and layer[13] in (base_net) self.base_output_layer_indices = (6, 11) # Define the Additional feature extractor self.additional_feat_extractor = nn.ModuleList([ # Conv8_2 nn.Sequential( nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1), nn.ReLU() ), # Conv9_2 nn.Sequential( nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU() ), # Conv10_2 nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU() ), # Conv11_2 nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2), nn.ReLU() ) ]) # Bounding box offset regressor num_prior_bbox = 6 # num of prior bounding boxes self.loc_regressor = nn.ModuleList([ nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), # TODO: implement remaining layers. nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1) ]) # Bounding box classification confidence for each label self.classifier = nn.ModuleList([ nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), # TODO: implement remaining layers. nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), ]) # load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning pretrained_dict = torch.load('./pretrained/mobienetv2.pth') pretrained_dict = {k: v for k, v in pretrained_dict.items() if 'base_net' in k} model_dict = self.base_net.state_dict() keys = [] for k,v in pretrained_dict.items(): keys.append(k) i = 0 for k,v in model_dict.items(): if v.size() == pretrained_dict[keys[i]].size(): model_dict[k] = pretrained_dict[keys[i]] i += 1 if i == len(keys): break self.base_net.load_state_dict(model_dict) self.base_net.eval() def init_with_xavier(m): if isinstance(m, nn.Conv2d): nn.init.xavier_uniform_(m.weight) self.loc_regressor.apply(init_with_xavier) self.classifier.apply(init_with_xavier) self.additional_feat_extractor.apply(init_with_xavier) def feature_to_bbbox(self, loc_regress_layer, confidence_layer, input_feature): """ Compute the bounding box class scores and the bounding box offset :param loc_regress_layer: offset regressor layer to run forward :param confidence_layer: confidence layer to run forward :param input_feature: feature map to be feed in :return: confidence and location, with dim:(N, num_priors, num_classes) and dim:(N, num_priors, 4) respectively. """ conf = confidence_layer(input_feature) loc = loc_regress_layer(input_feature) # Confidence post-processing: # 1: (N, num_prior_bbox * n_classes, H, W) to (N, H*W*num_prior_bbox, n_classes) = (N, num_priors, num_classes) # where H*W*num_prior_bbox = num_priors conf = conf.permute(0, 2, 3, 1).contiguous() num_batch = conf.shape[0] c_channels = int(conf.shape[1]*conf.shape[2]*conf.shape[3] / self.num_classes) conf = conf.view(num_batch, c_channels, self.num_classes) # Bounding Box loc and size post-processing # 1: (N, num_prior_bbox*4, H, W) to (N, num_priors, 4) loc = loc.permute(0, 2, 3, 1).contiguous() loc = loc.view(num_batch, c_channels, 4) return conf, loc def forward(self, input): confidence_list = [] loc_list = [] # Run the backbone network from [0 to 11, and fetch the bbox class confidence # as well as position and size y = module_util.forward_from(self.base_net.conv_layers, 0, self.base_output_layer_indices[0], input) confidence, loc = self.feature_to_bbbox(self.loc_regressor[0], self.classifier[0], y) confidence_list.append(confidence) loc_list.append(loc) # implement run the backbone network from [11 to 13] and compute the corresponding bbox loc and confidence y = module_util.forward_from(self.base_net.conv_layers, self.base_output_layer_indices[0], self.base_output_layer_indices[1], y) confidence, loc = self.feature_to_bbbox(self.loc_regressor[1], self.classifier[1], y) confidence_list.append(confidence) loc_list.append(loc) # forward the 'y' to additional layers for extracting coarse features for i in range(4): y = module_util.forward_from(self.additional_feat_extractor, i, i+1, y) confidence, loc = self.feature_to_bbbox(self.loc_regressor[i+2], self.classifier[i+2], y) confidence_list.append(confidence) loc_list.append(loc) confidences = torch.cat(confidence_list, 1) locations = torch.cat(loc_list, 1) # [Debug] check the output assert confidences.dim() == 3 # should be (N, num_priors, num_classes) assert locations.dim() == 3 # should be (N, num_priors, 4) assert confidences.shape[1] == locations.shape[1] assert locations.shape[2] == 4 if not self.training: # If in testing/evaluating mode, normalize the output with Softmax confidences = F.softmax(confidences, dim=2) return confidences, locations
def train(): height = args.height width = args.width _step = 0 if True: #glob_pattern = os.path.join(args.dataset_dir,"*_train.tfrecord") #tfrecords_list = glob.glob(glob_pattern) #filename_queue = tf.train.string_input_producer(tfrecords_list, num_epochs=None) img_batch, label_batch = get_batch("cifar10/cifar10_train.tfrecord", args.batch_size, shuffle=True) mobilenet = MobileNet(img_batch, num_classes=args.num_classes) logits = mobilenet.logits pred = mobilenet.predictions cross = tf.nn.softmax_cross_entropy_with_logits(labels=label_batch, logits=logits) loss = tf.reduce_mean(cross) # L2 regularization list_reg = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) if len(list_reg) > 0: l2_loss = tf.add_n(list_reg) total_loss = loss + l2_loss else: total_loss = loss # evaluate model, for classification preds = tf.argmax(pred, 1) labels = tf.argmax(label_batch, 1) #correct_pred = tf.equal(tf.argmax(pred, 1), tf.cast(label_batch, tf.int64)) correct_pred = tf.equal(preds, labels) acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # learning rate decay base_lr = tf.constant(args.learning_rate) global_step = tf.Variable(0) lr = tf.train.exponential_decay(args.learning_rate, global_step=global_step, decay_steps=args.lr_decay_step, decay_rate=args.lr_decay, staircase=True) # optimizer update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = tf.train.AdamOptimizer(learning_rate=lr, beta1=args.beta1).minimize( loss, global_step=global_step) max_steps = int(args.num_samples / int(args.batch_size) * int(args.epoch)) # summary tf.summary.scalar('total_loss', total_loss) tf.summary.scalar('accuracy', acc) tf.summary.scalar('learning_rate', lr) summary_op = tf.summary.merge_all() with tf.Session() as sess: # summary writer writer = tf.summary.FileWriter(args.logs_dir, sess.graph) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() _, _step = load(sess, saver, args.checkpoint_dir) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for step in range(_step + 1, max_steps + 1): start_time = time.time() _, _lr = sess.run([train_op, lr]) if step % args.num_log == 0: summ, _loss, _acc = sess.run([summary_op, total_loss, acc]) writer.add_summary(summ, step) print( 'number to eval:{0}, time:{1:.3f}, lr:{2:.8f}, acc:{3:.6f}, loss:{4:.6f}' .format(step * args.batch_size, time.time() - start_time, _lr, _acc, _loss)) if step % args.num_log == 0: save_path = saver.save(sess, os.path.join( args.checkpoint_dir, args.model_name), global_step=step) if step % 100 == 0: totalloss = 0.0 totalacc = 0.0 for e_step in range(200): _loss, _acc = sess.run([total_loss, acc]) totalloss = totalloss + _loss totalacc = totalacc + _acc print('global_step:%g, time:%g, t acc:%g, t loss:%g' % ((e_step + 1) * args.batch_size, time.time() - start_time, totalacc / (e_step + 1), totalloss / (e_step + 1))) tf.train.write_graph(sess.graph_def, args.checkpoint_dir, args.model_name + '.pb') save_path = saver.save(sess, os.path.join(args.checkpoint_dir, args.model_name), global_step=max_steps) coord.request_stop() coord.join(threads)
def __init__(self, num_classes): super(SSD, self).__init__() self.num_classes = num_classes # Setup the backbone network (base_net) self.base_net = MobileNet(num_classes) # The feature map will extracted from layer[11] and layer[13] in (base_net) self.base_output_layer_indices = (6, 11) # Define the Additional feature extractor self.additional_feat_extractor = nn.ModuleList([ # Conv8_2 nn.Sequential( nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1), nn.ReLU() ), # Conv9_2 nn.Sequential( nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU() ), # Conv10_2 nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU() ), # Conv11_2 nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2), nn.ReLU() ) ]) # Bounding box offset regressor num_prior_bbox = 6 # num of prior bounding boxes self.loc_regressor = nn.ModuleList([ nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), # TODO: implement remaining layers. nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1) ]) # Bounding box classification confidence for each label self.classifier = nn.ModuleList([ nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), # TODO: implement remaining layers. nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), ]) # load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning pretrained_dict = torch.load('./pretrained/mobienetv2.pth') pretrained_dict = {k: v for k, v in pretrained_dict.items() if 'base_net' in k} model_dict = self.base_net.state_dict() keys = [] for k,v in pretrained_dict.items(): keys.append(k) i = 0 for k,v in model_dict.items(): if v.size() == pretrained_dict[keys[i]].size(): model_dict[k] = pretrained_dict[keys[i]] i += 1 if i == len(keys): break self.base_net.load_state_dict(model_dict) self.base_net.eval() def init_with_xavier(m): if isinstance(m, nn.Conv2d): nn.init.xavier_uniform_(m.weight) self.loc_regressor.apply(init_with_xavier) self.classifier.apply(init_with_xavier) self.additional_feat_extractor.apply(init_with_xavier)
def __init__(self, num_classes): super(SSD, self).__init__() self.num_classes = num_classes # Setup the backbone network (base_net) self.base_net = MobileNet(num_classes) # The feature map will extracted from layer[11] and layer[13] in (base_net) self.base_output_layer_indices = (11, 13) # Define the Additional feature extractor self.additional_feat_extractor = nn.ModuleList([ # Conv8_2 nn.Sequential( nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1), nn.ReLU()), # Conv9_2 nn.Sequential( nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU()), # Conv10_2 nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1), nn.ReLU(), ), # Conv11_2 nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1), nn.ReLU(), ), ]) # Bounding box offset regressor num_prior_bbox = 6 # num of prior bounding boxes self.loc_regressor = nn.ModuleList([ nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), #Cov5_3 nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), #FC7 nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), #Conv8_2 # TODO: implement remaining layers. nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), #Conv9_2 nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), #Conv10_2 nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), #Conv11_2 ]) # Bounding box classification confidence for each label self.classifier = nn.ModuleList([ nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), ]) # Load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning basenet_state = torch.load('pretrained/mobienetv2.pth', map_location='cpu') base_net_1 = { key: value for key, value in basenet_state.items() if 'base_net' in key } self.base_net.load_state_dict(base_net_1) layer_idx = 0 def init_with_xavier(m): if isinstance(m, nn.Conv2d): nn.init.xavier_uniform_(m.weight) self.loc_regressor.apply(init_with_xavier) self.classifier.apply(init_with_xavier) self.additional_feat_extractor.apply(init_with_xavier)
class SSD(nn.Module): def __init__(self, num_classes = 4): super(SSD, self).__init__() self.num_classes = num_classes # Setup the backbone network (base_net) self.base_net = MobileNet(num_classes) # The feature map will extracted from layer[11] and layer[13] in (base_net) self.base_output_layer_indices = (11, 13) # Define the Additional feature extractor self.additional_feat_extractor = nn.ModuleList([ # Conv8_2 nn.Sequential( nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1), nn.ReLU() ), # Conv9_2 nn.Sequential( nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU() ), # Conv10_2 nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1), nn.ReLU() ), # Conv11_2 nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1), nn.ReLU() ), # TODO: implement two more layers. Done ]) # Bounding box offset regressor num_prior_bbox = 6 # num of prior bounding boxes self.loc_regressor = nn.ModuleList([ nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1) # TODO: implement remaining layers. Done ]) # Bounding box classification confidence for each label self.classifier = nn.ModuleList([ nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1) # TODO: implement remaining layers. Done ]) # Todo: load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning Done pretrained_model = torch.load("./pretrained/mobienetv2.pth") # new = list(pretrained_model.items()) my_model= self.base_net.state_dict() # 1. filter out unnecessary keys #print(my_model) #print(pretrained_model.items()) pretrained_model = {k: v for k, v in pretrained_model.items() if k in my_model} # 2. overwrite entries in the existing state dict my_model.update(pretrained_model) # print(my_model) # 3. load the new state dict self.base_net.load_state_dict(my_model) # print(self.base_net) # print(self.additional_feat_extractor) # print(my_model_kvpair,pretrained_model) # count = 0 # for key, value in my_model_kvpair.items(): # layer_name, weights = new[count] # my_model_kvpair[key] = weights # count += 1 #self.base_net.load_state_dict(pretrained_model) def init_with_xavier(m): if isinstance(m, nn.Conv2d): nn.init.xavier_uniform_(m.weight) self.loc_regressor.apply(init_with_xavier) self.classifier.apply(init_with_xavier) self.additional_feat_extractor.apply(init_with_xavier) def feature_to_bbbox(self, loc_regress_layer, confidence_layer, input_feature): """ Compute the bounding box class scores and the bounding box offset :param loc_regress_layer: offset regressor layer to run forward :param confidence_layer: confidence layer to run forward :param input_feature: feature map to be feed in :return: confidence and location, with dim:(N, num_priors, num_classes) and dim:(N, num_priors, 4) respectively. """ conf = confidence_layer(input_feature) loc = loc_regress_layer(input_feature) # Confidence post-processing: # 1: (N, num_prior_bbox * n_classes, H, W) to (N, H*W*num_prior_bbox, n_classes) = (N, num_priors, num_classes) # where H*W*num_prior_bbox = num_priors conf = conf.permute(0, 2, 3, 1).contiguous() num_batch = conf.shape[0] c_channels = int(conf.shape[1]*conf.shape[2]*conf.shape[3] / self.num_classes) conf = conf.view(num_batch, c_channels, self.num_classes) # Bounding Box loc and size post-processing # 1: (N, num_prior_bbox*4, H, W) to (N, num_priors, 4) loc = loc.permute(0, 2, 3, 1).contiguous() loc = loc.view(num_batch, c_channels, 4) return conf, loc def forward(self, input): confidence_list = [] loc_list = [] # Run the backbone network from [0 to 11, and fetch the bbox class confidence # as well as position and size y = module_util.forward_from(self.base_net.conv_layers, 0, self.base_output_layer_indices[0], input) print(y.shape) confidence, loc = self.feature_to_bbbox(self.loc_regressor[0], self.classifier[0], y) confidence_list.append(confidence) loc_list.append(loc) # Todo: implement run the backbone network from [11 to 13] and compute the corresponding bbox loc and confidence Done y = module_util.forward_from(self.base_net.conv_layers,self.base_output_layer_indices[0],self.base_output_layer_indices[1], y) print(y.shape) confidence, loc = self.feature_to_bbbox(self.loc_regressor[1], self.classifier[1], y) confidence_list.append(confidence) loc_list.append(loc) # print(y) # Todo: forward the 'y' to additional layers for extracting coarse features Done for i in range(0,3): y = module_util.forward_from(self.additional_feat_extractor, i,i+1, y) confidence, loc = self.feature_to_bbbox(self.loc_regressor[i+2], self.classifier[i+2], y) confidence_list.append(confidence) loc_list.append(loc) # print(y) confidences = torch.cat(confidence_list, 1) locations = torch.cat(loc_list, 1) print(confidences.shape,locations.shape) # [Debug] check the output assert confidences.dim() == 3 # should be (N, num_priors, num_classes) assert locations.dim() == 3 # should be (N, num_priors, 4) assert confidences.shape[1] == locations.shape[1] assert locations.shape[2] == 4 if not self.training: # If in testing/evaluating mode, normalize the output with Softmax confidences = F.softmax(confidences, dim=2) print(confidences.shape) return confidences, locations
model = Resnet_interpretable_gradcam(num_classes=num_classe) elif args.model_type == 'ex_gradcam2': model = VGG_interpretable_gradcam2(num_classes=num_classe) else: model = Resnet(num_classes=num_classe) elif args.model == 'mobilenet': if args.model_type == 'ex_atten': model = VGG_interpretable_atten(num_classes=num_classe) elif args.model_type == 'ex': model = VGG_interpretable(num_classes=num_classe) elif args.model_type == 'ex_gradcam': model = Mobile_interpretable_gradcam(num_classes=num_classe) elif args.model_type == 'ex_gradcam2': model = VGG_interpretable_gradcam2(num_classes=num_classe) else: model = MobileNet(num_classes=num_classe) elif args.model == 'alexnet': if args.model_type == 'ex_atten': model = VGG_interpretable_atten(num_classes=num_classe) elif args.model_type == 'ex': model = Alexnet_interpretable(num_classes=num_classe) elif args.model_type == 'ex_gradcam': model = Alexnet_interpretable_gradcam(num_classes=num_classe) else: model = Alexnet(num_classes=num_classe) use_gpu = torch.cuda.is_available() # 判断是否有GPU加速 if use_gpu: model = model.cuda() if model_half: model = model.half() if args.model_init:
def __init__(self, num_classes = 4): super(SSD, self).__init__() self.num_classes = num_classes # Setup the backbone network (base_net) self.base_net = MobileNet(num_classes) # The feature map will extracted from layer[11] and layer[13] in (base_net) self.base_output_layer_indices = (11, 13) # Define the Additional feature extractor self.additional_feat_extractor = nn.ModuleList([ # Conv8_2 nn.Sequential( nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1), nn.ReLU() ), # Conv9_2 nn.Sequential( nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU() ), # Conv10_2 nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1), nn.ReLU() ), # Conv11_2 nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1), nn.ReLU() ), # TODO: implement two more layers. Done ]) # Bounding box offset regressor num_prior_bbox = 6 # num of prior bounding boxes self.loc_regressor = nn.ModuleList([ nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * 4, kernel_size=3, padding=1) # TODO: implement remaining layers. Done ]) # Bounding box classification confidence for each label self.classifier = nn.ModuleList([ nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1) # TODO: implement remaining layers. Done ]) # Todo: load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning Done pretrained_model = torch.load("./pretrained/mobienetv2.pth") # new = list(pretrained_model.items()) my_model= self.base_net.state_dict() # 1. filter out unnecessary keys #print(my_model) #print(pretrained_model.items()) pretrained_model = {k: v for k, v in pretrained_model.items() if k in my_model} # 2. overwrite entries in the existing state dict my_model.update(pretrained_model) # print(my_model) # 3. load the new state dict self.base_net.load_state_dict(my_model) # print(self.base_net) # print(self.additional_feat_extractor) # print(my_model_kvpair,pretrained_model) # count = 0 # for key, value in my_model_kvpair.items(): # layer_name, weights = new[count] # my_model_kvpair[key] = weights # count += 1 #self.base_net.load_state_dict(pretrained_model) def init_with_xavier(m): if isinstance(m, nn.Conv2d): nn.init.xavier_uniform_(m.weight) self.loc_regressor.apply(init_with_xavier) self.classifier.apply(init_with_xavier) self.additional_feat_extractor.apply(init_with_xavier)
def __init__(self, num_classes): super(SSD, self).__init__() self.num_classes = num_classes # Setup the backbone network (base_net) self.base_net = MobileNet(num_classes) # The feature map will extracted from layer[11] and layer[13] in (base_net) self.base_output_layer_indices = (11, 13) # Define the Additional feature extractor self.additional_feat_extractor = nn.ModuleList([ # Conv8_2 : 256 x 5 x 5 nn.Sequential( nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1), nn.ReLU()), # Conv9_2 : 256 x 3 x 3 nn.Sequential( nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU()), # TODO: implement two more layers. # Conv10_2: 256 x 2 x 2 nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU()), # Conv11_2: 256 x 1 x 1 nn.Sequential( nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), nn.ReLU()) ]) # Bounding box offset regressor num_prior_bbox = 6 # num of prior bounding boxes self.loc_regressor = nn.ModuleList([ nn.Conv2d(in_channels=512, out_channels=(num_prior_bbox * 4), kernel_size=3, padding=1), nn.Conv2d(in_channels=1024, out_channels=(num_prior_bbox * 4), kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=(num_prior_bbox * 4), kernel_size=3, padding=1), # TODO: implement remaining layers. nn.Conv2d(in_channels=256, out_channels=(num_prior_bbox * 4), kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=(num_prior_bbox * 4), kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=(num_prior_bbox * 4), kernel_size=3, padding=1), ]) # Bounding box classification confidence for each label self.classifier = nn.ModuleList([ nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=1024, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=num_prior_bbox * num_classes, kernel_size=3, padding=1), # TODO: implement remaining layers. nn.Conv2d(in_channels=256, out_channels=(num_prior_bbox * num_classes), kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=(num_prior_bbox * num_classes), kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=(num_prior_bbox * num_classes), kernel_size=3, padding=1), ]) # Todo: load the pre-trained model for self.base_net, it will increase the accuracy by fine-tuning def init_pretrained_weights(net_dict, pretrained_dict): ext_keys = [] new_keys = [] del_keys = [] for key in pretrained_dict.keys(): # change key names if key.find('base_net') > -1: ext_keys.append(key) new_keys.append('conv_layers' + key[len('base_net'):]) # discard parameters not in mobilenet if key not in net_dict.keys(): del_keys.append(key) #copy value from ext_keys to new_keys for idx in range(len(ext_keys)): pretrained_dict[new_keys[idx]] = pretrained_dict[ext_keys[idx]] #delete unmatched keys for key in del_keys: pretrained_dict.pop(key) # add undefined name (FC is not used in our model, just initialize with default) for key in net_dict.keys(): if key not in pretrained_dict.keys(): pretrained_dict[key] = net_dict[key] return pretrained_dict model_dict = self.state_dict() pretrained_dict = torch.load('./pretrained/mobienetv2.pth') pretrained_weights = init_pretrained_weights(model_dict, pretrained_dict) model_dict.update(pretrained_weights) def init_with_xavier(m): if isinstance(m, nn.Conv2d): nn.init.xavier_uniform_(m.weight) self.loc_regressor.apply(init_with_xavier) self.classifier.apply(init_with_xavier) self.additional_feat_extractor.apply(init_with_xavier)
if args.depth == 18: model = ResNet18() elif args.depth == 50: model = ResNet50() else: sys.exit("resnet doesn't implement those depth!") elif args.arch == "convnet": args.depth = 4 model = ConvNet() print("convnet selected") elif args.arch == "lenet": args.depth = 5 model = LeNet() elif args.arch == "mobilenet": args.depth = 13 model = MobileNet() if args.multi_gpu: model = torch.nn.DataParallel(model) model.cuda() ############# criterion = CrossEntropyLossMaybeSmooth(smooth_eps=args.smooth_eps).cuda() # args.smooth = args.smooth_eps > 0.0 # args.mixup = config.alpha > 0.0 optimizer_init_lr = args.warmup_lr if args.warmup else args.lr optimizer = None if (args.optmzr == 'sgd'): optimizer = torch.optim.SGD(model.parameters(),