def __init__(self): super(Delg, self).__init__() self.globalmodel = ResNet() self.desc_cls = Arcface(cfg.MODEL.HEADS.REDUCTION_DIM, cfg.MODEL.NUM_CLASSES) self.localmodel = SpatialAttention2d(1024) self.att_cls = ResHead(512, cfg.MODEL.NUM_CLASSES)
def test_train(self): resnet = ResNet(dataset=DummyDataset(batch_size=2), block_nums=1, epochs=1) history = resnet.train() ok_('loss' in history)
def __init__(self, n_classes=21, No_feature=3): """ Model initialization :param x_n: number of input neurons :type x_n: int """ super(LinkNet, self).__init__() base = ResNet(BasicBlock, [2, 2, 2, 2], num_input_feature=No_feature) self.in_block = nn.Sequential( base.conv1, base.bn1, base.relu, base.maxpool ) self.encoder1 = base.layer1 self.encoder2 = base.layer2 self.encoder3 = base.layer3 self.encoder4 = base.layer4 self.decoder1 = Decoder(64, 64, 3, 1, 1, 0) self.decoder2 = Decoder(128, 64, 3, 2, 1, 1) self.decoder3 = Decoder(256, 128, 3, 2, 1, 1) self.decoder4 = Decoder(512, 256, 3, 2, 1, 1) # Classifier self.tp_conv1 = nn.Sequential(nn.ConvTranspose2d(64, 32, 3, 2, 1, 1), nn.BatchNorm2d(32), nn.ReLU(inplace=True),) self.conv2 = nn.Sequential(nn.Conv2d(32, 32, 3, 1, 1), nn.BatchNorm2d(32), nn.ReLU(inplace=True),) self.tp_conv2 = nn.ConvTranspose2d(32, n_classes, 2, 2, 0)
def main(argv=None): # pylint: disable=unused-argument net = ResNet(depth=50, training=True, weight_decay=args.weight_decay) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess: datahand = DataHandler(sess) train(sess, datahand, net)
def __init__(self): super(model_fas_classifier, self).__init__() self.backbone_img = ResNet("resnet18", -1, use_pretrain=False) self.global_pool = nn.AdaptiveAvgPool2d(1) self.dropout = nn.Dropout(0.5) self.classifier = nn.Linear(512, 2) self.classifier.apply(weights_init_classifier)
def build_model(network='resnet101', base_model_cfg='resnet'): feature_aggregation_module = [] for i in range(5): feature_aggregation_module.append(FAModule()) upsampling = [] for i in range(0, 4): upsampling.append([]) for j in range(0, i + 1): upsampling[i].append( nn.ConvTranspose2d(k, k, kernel_size=2**(j + 2), stride=2**(j + 1), padding=2**(j))) if base_model_cfg == 'resnet': parameter = [3, 4, 23, 3] if network == 'resnet101' else [3, 4, 6, 3] backbone = ResNet(Bottleneck, parameter) return JL_DCF(base_model_cfg, JLModule(backbone), CMLayer(), feature_aggregation_module, ScoreLayer(k), ScoreLayer(k), upsampling) elif base_model_cfg == 'vgg': backbone = vgg(network=network) return JL_DCF(base_model_cfg, JLModuleVGG(backbone), CMLayer(), feature_aggregation_module, ScoreLayer(k), ScoreLayer(k), upsampling) elif base_model_cfg == 'densenet': backbone = densenet161() return JL_DCF(base_model_cfg, JLModuleDensenet(backbone), CMLayer(), feature_aggregation_module, ScoreLayer(k), ScoreLayer(k), upsampling)
def create_model(self, depth, drop_ratio, net_mode, model_path): model = DataParallel(ResNet(depth, drop_ratio, net_mode)).to(self.device) load_state(model, None, None, model_path, True, False) model.eval() return model
def se_resnet34(num_classes): """Constructs a ResNet-34 model. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = ResNet(SEBasicBlock, [3, 4, 6, 3], num_classes=num_classes) model.avgpool = nn.AdaptiveAvgPool2d(1) return model
def se_resnet152(num_classes): """Constructs a ResNet-152 model. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = ResNet(SEBottleneck, [3, 8, 36, 3], num_classes=num_classes) model.avgpool = nn.AdaptiveAvgPool2d(1) return model
def __init__(self, loss_type='focal', depth_type=None): super().__init__() self.loss_type = loss_type self.resnet = ResNet(BasicBlock, [3, 4, 6, 3], num_classes=1) self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=1, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.encoder1 = nn.Sequential( nn.Conv2d(3, 64, kernel_size=7, stride=1, padding=3, bias=False), nn.BatchNorm2d(64), # self.conv1, # self.bn1, nn.ReLU(inplace=True), ) self.encoder2 = nn.Sequential( nn.MaxPool2d(kernel_size=2, stride=2), self.resnet.layer1, # 64 ) self.encoder3 = self.resnet.layer2 # 128 self.encoder4 = self.resnet.layer3 # 256 self.encoder5 = self.resnet.layer4 # 512 self.center = nn.Sequential( ConvBn2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), ConvBn2d(512, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), ) self.decoder5 = Decoder(256, 512, 512, 64) self.decoder4 = Decoder(64, 256, 256, 64) self.decoder3 = Decoder(64, 128, 128, 64) self.decoder2 = Decoder(64, 64, 64, 64) self.decoder1 = Decoder(64, 64, 32, 64) self.fuse_pixel = nn.Sequential( nn.Conv2d(64, 8, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(8, 1, kernel_size=1, padding=0), ) self.fuse = nn.Sequential(nn.Conv2d(6, 1, kernel_size=1, padding=0), ) self.logit_image = nn.Sequential( nn.Linear(512, 64), nn.ReLU(inplace=True), nn.Linear(64, 1), )
def create_model(self, depth, drop_ratio, net_mode, model_path, head): model = DataParallel(ResNet(depth, drop_ratio, net_mode)).to(self.device) head = DataParallel(head()).to(self.device) load_state(model, head, None, model_path, True) model.eval() head.eval() return model, head
def __init__(self): super(U_Net, self).__init__() self.backbone = ResNet("resnet18", 1) self.tanh = nn.Tanh() self.upsample_func = Upsample self.Upsample1 = self.upsample_func(512, 256, 256) self.Upsample2 = self.upsample_func(256, 128, 128) self.Upsample3 = self.upsample_func(128, 64, 64) self.Upsample4 = self.upsample_func(64, 64, 64) self.Upsample5 = self.upsample_func(64, 0, 3)
def se_resnet152(num_classes=1000, pretrained=True): """Constructs a ResNet-152 model. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = ResNet(SEBottleneck, [3, 8, 36, 3], num_classes=num_classes) model.avgpool = nn.AdaptiveAvgPool2d(1) if pretrained: state_dict = load_state_dict_from_url(model_urls["resnet152"], progress=True) model = load_pretrained(model, state_dict) return model
def __init__(self, bn=False): super(WSDR, self).__init__() self.resnet_feature = ResNet(BasicBlock, [2, 2, 2, 2]) self.feature_gap = nn.Sequential(ConvReLU(512, 256, 3, pd=True, bn=bn), ConvReLU(256, 128, 3, pd=True, bn=bn), nn.Conv2d(128, 1, kernel_size=3, stride=1, padding=1)) self.feature_fwd = nn.Conv2d(512, 1, kernel_size=3, stride=1, padding=1) self.sing_flow = nn.Conv2d(512, 20, kernel_size=3, stride=1, padding=1) self.sing_flow_2 = nn.Sequential(ConvReLU(512, 256, 3, pd=True, bn=bn), nn.Conv2d(256, 20, kernel_size=3, stride=1, padding=1)) # self.feature_fwd = nn.Sequential(ConvReLU(512, 1, 3, pd=True, bn=bn)) self.gap = nn.AvgPool2d(kernel_size=7, stride=7) self.box = nn.Sigmoid()
# -*- coding: utf-8 -*- import torch import torch.nn as nn from model.networkmodel import NetworkModel from model.resnet import ResNet from model.resnet import BasicBlock import torch.optim as optim from CTData import dataloader import time import numpy as np if __name__ == "__main__": resnet18_model = ResNet(BasicBlock, [2, 2, 2, 2]) resnet18_model.load_state_dict( torch.load("./weights/resnet18-5c106cde.pth")) model = NetworkModel(pretrained_net=resnet18_model, n_class=1) fcn_model = model.cuda() criterion = nn.BCELoss().cuda() # optimizer = optim.SGD(fcn_model.parameters(), lr=1e-2, momentum=0.7) optimizer = optim.Adam(fcn_model.parameters(), lr=1e-4) for epo in range(20): index = 0 epo_loss = 0 for item in dataloader: # time_end = time.time()
testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test) num_classes = 100 testloader = torch.utils.data.DataLoader(testset, batch_size=args.bs, shuffle=False, num_workers=2) # Model print('\n[Phase 2] : Model setup') print('| Building net type [' + args.net + ']...') if args.net == 'resnet34': net = ResNet(34, num_classes, 0.5) elif args.net == 'densenet': net = DenseNet3(100, num_classes, 12, 0.5, True, 0.2) elif args.net == 'vgg16': net = VGGNet(num_classes, 0.5, False, 2048, True) else: print('Error : Network should be either [ResNet34]') sys.exit(0) checkpoint = torch.load(args.model_path) net.load_state_dict(checkpoint['model']) net.to(device) avg = 0 for i in range(10):
def __init__(self, model, data_format, batch_size): """ init """ if (model[:3] == 'vgg'): self._network = Vgg(data_format, model) elif (model[:6] == 'resnet'): self._network = ResNet(data_format, model) elif (model[:9] == 'inception'): self._network = Inception(data_format, model) self._model = model self._data_format = data_format self._batch_size = batch_size if FLAGS.job_name: self.worker_prefix = '/job:worker/task:%s' % FLAGS.task_index else: self.worker_prefix = '' self.cpu_device = '%s/cpu:0' % self.worker_prefix self.gpu_devices = [ '%s/%s:%i' % (self.worker_prefix, 'gpu', i) for i in range(FLAGS.num_gpus) ] if FLAGS.local_parameter_device == 'gpu': self.param_server_device = self.gpu_devices[0] else: self.param_server_device = self.cpu_device self.replica_devices = [ tf.train.replica_device_setter(worker_device=d, ps_device=self.param_server_device, ps_tasks=1) for d in self.gpu_devices ] self.global_step_device = self.param_server_device self.v_mgr = None def model_fn(features, labels, mode): last_layer = self._network.inference(features) predictions = { 'classes': tf.argmax(input=last_layer, axis=1, name='classes'), 'probabilities': tf.nn.softmax(last_layer, name='softmax_tensor') } if (mode == tf.estimator.ModeKeys.PREDICT): return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) with tf.name_scope('xentropy'): cross_entropy = tf.losses.sparse_softmax_cross_entropy( logits=last_layer, labels=labels) loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') with tf.name_scope('accuracy'): accuracy = tf.metrics.accuracy( labels=labels, predictions=predictions['classes'], name='accuracy') batch_accuracy = tf.reduce_mean( tf.cast(tf.equal(labels, predictions['classes']), tf.float32)) eval_metric_ops = {'accuracy': accuracy} # with tf.device(self.cpu_device): # tf.summary.scalar('accuracy', batch_accuracy) if (mode == tf.estimator.ModeKeys.EVAL): return tf.estimator.EstimatorSpec( mode=mode, loss=loss, eval_metric_ops=eval_metric_ops) logging_hook = tf.train.LoggingTensorHook( { "loss": loss, "accuracy": batch_accuracy, "step": tf.train.get_global_step() }, every_n_iter=10) if (mode == tf.estimator.ModeKeys.TRAIN): with tf.name_scope('GD_Optimizer'): train_step = tf.train.GradientDescentOptimizer( 0.001).minimize(loss, tf.train.get_global_step()) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_step, eval_metric_ops=eval_metric_ops, training_hooks=[logging_hook]) def fake_input_fn(): if (self._model[:9] == "inception"): image_size = 299 else: image_size = 224 if self._data_format == 'NCHW': image_shape = [self._batch_size, 3, image_size, image_size] else: image_shape = [self._batch_size, image_size, image_size, 3] # ----------------------- Fake Input Images ----------------------- with tf.device( self.cpu_device), tf.name_scope('Fake_Input_Images'): ori_images = tf.Variable(tf.random_normal(image_shape, dtype=tf.float32, stddev=1e-1), trainable=False) ori_labels = tf.Variable(tf.ones([self._batch_size], dtype=tf.int64), trainable=False) images = tf.data.Dataset.from_tensors(ori_images).repeat(100) labels = tf.data.Dataset.from_tensors(ori_labels).repeat(100) return tf.data.Dataset.zip((images, labels)) self._model_fn = model_fn self._input_fn = fake_input_fn
def __init__(self): super(DelgExtraction, self).__init__() self.globalmodel = ResNet() self.localmodel = SpatialAttention2d(1024)
def test_init(self): resnet = ResNet(dataset=DummyDataset(), block_nums=1) eq_(len(resnet.blocks), 3)
def test_init_usese(self): resnet = ResNet(dataset=DummyDataset(), block_nums=1, use_se=True) ok_(isinstance(resnet.blocks[0]['residual_path'][-1], SEBlock))
def test_init_usext(self): resnet = ResNet(dataset=DummyDataset(), block_nums=1, use_xt=True) ok_(isinstance(resnet.blocks[0]['residual_path'][-4], list))
def __init__(self, config): self.config = config ATTR_HEAD = {'race': RaceHead, 'gender': GenderHead, 'age': AgeHead, 'recognition': self.config.recognition_head} self.writer = SummaryWriter(config.log_path) if path.isfile(self.config.train_source): self.train_loader = LMDBDataLoader(self.config, self.config.train_source) else: self.train_loader = CustomDataLoader(self.config, self.config.train_source, self.config.train_list) class_num = self.train_loader.class_num() print(len(self.train_loader.dataset)) print(f'Classes: {class_num}') self.model = ResNet(self.config.depth, self.config.drop_ratio, self.config.net_mode) if self.config.attribute == 'recognition': self.head = ATTR_HEAD[self.config.attribute](classnum=class_num, m=self.config.margin) else: self.head = ATTR_HEAD[self.config.attribute](classnum=class_num) paras_only_bn, paras_wo_bn = separate_bn_param(self.model) dummy_input = torch.zeros(1, 3, 112, 112) self.writer.add_graph(self.model, dummy_input) if torch.cuda.device_count() > 1: print(f"Model will use {torch.cuda.device_count()} GPUs!") self.model = DataParallel(self.model) self.head = DataParallel(self.head) self.model = self.model.to(self.config.device) self.head = self.head.to(self.config.device) self.weights = None if self.config.attribute in ['race', 'gender']: _, self.weights = np.unique(self.train_loader.dataset.get_targets(), return_counts=True) self.weights = np.max(self.weights) / self.weights self.weights = torch.tensor(self.weights, dtype=torch.float, device=self.config.device) self.config.weights = self.weights print(self.weights) if self.config.val_source is not None: if self.config.attribute != 'recognition': if path.isfile(self.config.val_source): self.val_loader = LMDBDataLoader(self.config, self.config.val_source, False) else: self.val_loader = CustomDataLoader(self.config, self.config.val_source, self.config.val_list, False) else: self.validation_list = [] for val_name in config.val_list: dataset, issame = get_val_pair(self.config.val_source, val_name) self.validation_list.append([dataset, issame, val_name]) self.optimizer = optim.SGD([{'params': paras_wo_bn, 'weight_decay': self.config.weight_decay}, {'params': self.head.parameters(), 'weight_decay': self.config.weight_decay}, {'params': paras_only_bn}], lr=self.config.lr, momentum=self.config.momentum) if self.config.resume: print(f'Resuming training from {self.config.resume}') load_state(self.model, self.head, self.optimizer, self.config.resume, False) if self.config.pretrained: print(f'Loading pretrained weights from {self.config.pretrained}') load_state(self.model, self.head, None, self.config.pretrained, True) print(self.config) self.save_file(self.config, 'config.txt') print(self.optimizer) self.save_file(self.optimizer, 'optimizer.txt') self.tensorboard_loss_every = max(len(self.train_loader) // 100, 1) self.evaluate_every = max(len(self.train_loader) // 5, 1) if self.config.lr_plateau: self.scheduler = ReduceLROnPlateau(self.optimizer, mode=self.config.max_or_min, factor=0.1, patience=3, verbose=True, threshold=0.001, cooldown=1) if self.config.early_stop: self.early_stop = EarlyStop(mode=self.config.max_or_min)
def __init__(self): """ init """ # -------------- Model Config -------------- self._model = FLAGS.model self._data_format = FLAGS.data_format if (self._model[:3] == 'vgg'): self._network = Vgg(self._data_format, self._model) elif (self._model[:6] == 'resnet'): self._network = ResNet(self._data_format, self._model) elif (self._model[:9] == 'inception'): self._network = Inception(self._data_format, self._model) self._batch_size = FLAGS.batch_size self._optimizer = FLAGS.optimizer # -------------- Device Config -------------- self._num_gpus = FLAGS.num_gpus if FLAGS.worker_hosts: self._worker_hosts = FLAGS.worker_hosts.split(",") self._num_workers = self._worker_hosts.__len__() self._worker_prefix = [ '/job:worker/replica:0/task:%s' % i for i in range(self._num_workers) ] self.cpu_device = [ '%s/cpu:0' % prefix for prefix in self._worker_prefix ] self.gpu_devices = [[ '%s/%s:%i' % (prefix, 'device:GPU', i) for i in range(self._num_gpus) ] for prefix in self._worker_prefix] self._param_server_device = self.cpu_device self._global_step_device = self._param_server_device[0] if FLAGS.strategy == 'ps': self._strategy = DistributedPSStrategy(self, FLAGS.staged_vars) elif FLAGS.strategy == 'allreduce': self._strategy = DistributedAllreduceStrategy(self) else: tf.logging.error("Strategy not found.") return else: self._worker_prefix = None self.cpu_device = '/device:CPU:0' self.gpu_devices = [ '/device:GPU:%i' % i for i in range(self._num_gpus) ] if FLAGS.local_parameter_device == 'gpu': self._param_server_device = self.gpu_devices[0] else: self._param_server_device = self.cpu_device self._global_step_device = self._param_server_device if FLAGS.strategy == 'ps': self._strategy = LocalPSStrategy(self, FLAGS.staged_vars) elif FLAGS.strategy == 'allreduce': self._strategy = LocalAllreduceStrategy(self) else: tf.logging.error("Strategy not found.") return # -------------- Model_fn & Input_fn -------------- def model_fn(features, labels): last_layer = self._network.inference(features) with tf.name_scope('xentropy'): cross_entropy = tf.losses.sparse_softmax_cross_entropy( logits=last_layer, labels=labels) loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') with tf.device(self.cpu_device): top_1_op = tf.reduce_sum( tf.cast(tf.nn.in_top_k(last_layer, labels, 1), tf.float32)) top_5_op = tf.reduce_sum( tf.cast(tf.nn.in_top_k(last_layer, labels, 5), tf.float32)) return loss, top_1_op, top_5_op def fake_input_fn(): if (self._model[:9] == "inception"): image_size = 299 else: image_size = 224 if self._data_format == 'NCHW': image_shape = [self._batch_size, 3, image_size, image_size] else: image_shape = [self._batch_size, image_size, image_size, 3] # ----------------------- Fake Input Images ----------------------- if self._worker_prefix: image_device = self.cpu_device[0] else: image_device = self.cpu_device with tf.device(image_device), tf.name_scope('Fake_Input_Images'): ori_images = tf.Variable(tf.random_normal(image_shape, dtype=tf.float32, stddev=1e-1), trainable=False) ori_labels = tf.Variable(tf.ones([self._batch_size], dtype=tf.int64), trainable=False) images = tf.data.Dataset.from_tensors(ori_images).repeat() labels = tf.data.Dataset.from_tensors(ori_labels).repeat() return tf.data.Dataset.zip((images, labels)).prefetch(1) def imagenet_input_fn(): if (self._model[:9] == "inception"): image_size = 299 else: image_size = 224 self._image_size = image_size if self._data_format == 'NCHW': image_shape = [self._batch_size, 3, image_size, image_size] else: image_shape = [self._batch_size, image_size, image_size, 3] if self._worker_prefix: image_device = self.cpu_device[0] else: image_device = self.cpu_device def map_fn(example_serialized): image_buffer, label_index, bbox, _ = rdread.parse_example_proto( example_serialized) return rdread.simple_process(image_buffer, bbox, image_size, image_size, 3, True), label_index with tf.device(image_device), tf.name_scope( 'ImageNet_Input_Images'): data = ImageNet_Data('ImageNet', FLAGS.work_mode) dataset = data.dataset() dataset = dataset.map( map_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE) dataset = dataset.shuffle(buffer_size=10000) dataset = dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) dataset = dataset.repeat(FLAGS.num_epochs) dataset = dataset.batch(batch_size=self._batch_size) return dataset self._model_fn = model_fn if FLAGS.work_mode == 'test': self._input_fn = fake_input_fn elif FLAGS.work_mode in ['train', 'validation']: self._input_fn = imagenet_input_fn else: print("work_mode Error") exit(-1)
args = parser.parse_args() pprint(args) # check and create directories if not os.path.exists(args.checkpoint): os.makedirs(args.checkpoint) if not os.path.exists(args.log): os.makedirs(args.log) arch = 'resnet18_' filename = arch + args.dataset + '_' + str(args.num_class) checkpoint_filename = os.path.join(args.checkpoint, filename + '.pt') model = ResNet(num_classes=args.num_class) criterion = torch.nn.CrossEntropyLoss(size_average=True) weight_criterion = CE(aggregate='sum') use_gpu = torch.cuda.is_available() if use_gpu: model = model.cuda() criterion = criterion.cuda() weight_criterion.cuda() torch.cuda.manual_seed(args.seed) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) # Adjust learning rate and betas for Adam Optimizer n_epoch = 200
from visualization.iterator_sample_ploter import display_iterator_sample from data_loader.dataset_creator import DatasetCreator torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False dataset_creator = DatasetCreator(root_dir='./dataset') trainset = dataset_creator.get_train_iterator() display_iterator_sample(trainset) trainloader = torch.utils.data.DataLoader(trainset, batch_size=16, shuffle=True, num_workers=0) net = ResNet(22) net.cuda() log_datatime = str(datetime.now().time()) loss_writer = SummaryWriter(os.path.join('logs', log_datatime, 'loss')) accuracy_writer = SummaryWriter(os.path.join('logs', log_datatime, 'accuracy')) validationset = dataset_creator.get_validation_iterator() validationloader = torch.utils.data.DataLoader(validationset, batch_size=32, shuffle=False, num_workers=0) fit(net, trainloader, validationloader,
def train(): # data train_images, train_labels = get_train_batch() test_images, test_labels = get_test_batch() # reshape train_images = tf.reshape(train_images, [-1, IMAGE_SIZE, IMAGE_SIZE, IMAGE_CHANNEL]) test_images = tf.reshape(test_images, [-1, IMAGE_SIZE, IMAGE_SIZE, IMAGE_CHANNEL]) train_images, train_labels = augment(train_images, train_labels, horizontal_flip=True, rotate=15, crop_probability=0.8, mixup=4) # model sess = tf.InteractiveSession() # variables phase_train = tf.placeholder(tf.bool, name='phase_train') global_step = tf.Variable(initial_value=0, trainable=False, name='global_step') with tf.name_scope('input'): x = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, IMAGE_CHANNEL], name='x_input') y = tf.placeholder(tf.float32, shape=[None, train_labels.shape[1]], name='y_input') # network #outputs = GoogLeNet(x, IMAGE_SIZE, IMAGE_CHANNEL, NUM_CLASSES, phase_train, '') outputs = ResNet(x, NUM_CLASSES, phase_train, 'res50') var = tf.trainable_variables() loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=outputs, labels=y)) l2 = tf.add_n([tf.nn.l2_loss(v) for v in var if 'bias' not in v.name]) * WEIGHT_DECAY total_loss = loss + l2 tf.summary.scalar('loss', total_loss) predict = tf.argmax(outputs, axis=1) acc = tf.equal(predict, tf.argmax(y, axis=1)) accuracy = tf.reduce_mean(tf.cast(acc, tf.float32)) tf.summary.scalar('accuracy', accuracy) optimizer = tf.train.MomentumOptimizer( learning_rate=args.lr, momentum=0.9).minimize(total_loss, global_step=global_step) # saver merged = tf.summary.merge_all() saver = tf.train.Saver() train_writer = tf.summary.FileWriter(SAVE_PATH, sess.graph) if args.ckpt is not None: print("Trying to restore from checkpoint ...") try: last_chk_path = tf.train.latest_checkpoint( checkpoint_dir=args.ckpt) saver.restore(sess, save_path=last_chk_path) except ValueError: print( "Failed to restore checkpoint. Initializing variables instead." ) sess.run(tf.global_variables_initializer()) else: sess.run(tf.global_variables_initializer()) global_val_acc = 0 for train_idx, val_idx in KFold(n_splits=SPLIT_SIZE).split( train_images, train_labels): train_x = train_images[train_idx] train_y = train_labels[train_idx] val_x = train_images[val_idx] val_y = train_labels[val_idx] for e in range(EPOCH): for batch_index in range(0, len(train_x), BATCH_SIZE): if batch_index + BATCH_SIZE < len(train_x): data = train_x[batch_index:batch_index + BATCH_SIZE] label = train_y[batch_index:batch_index + BATCH_SIZE] else: data = train_x[batch_index:len(train_images)] label = train_y[batch_index:len(train_labels)] start_time = time.time() step, _, batch_loss, batch_acc = sess.run( [global_step, optimizer, total_loss, accuracy], feed_dict={ x: data, y: label, phase_train: True }) end_time = time.time() duration = end_time - start_time # progress bar if batch_index % 20 == 0: percentage = float(batch_index + BATCH_SIZE + e * len(train_x)) / float( len(train_x) * EPOCH) * 100. bar_len = 29 filled_len = int((bar_len * int(percentage)) / 100) bar = '=' * filled_len + '>' + '-' * (bar_len - filled_len) msg = "Epoch: {:}/{:} - Step: {:>5} - [{}] {:.2f}% - Batch Acc: {:.2f} - Loss: {:.4f} - {:} Sample/sec" print( msg.format((e + 1), EPOCH, step, bar, percentage, batch_acc, batch_loss, int(BATCH_SIZE / duration))) summary = tf.Summary(value=[ tf.Summary.Value(tag='Epoch', simple_value=e), tf.Summary.Value(tag='Loss', simple_value=batch_loss) ]) train_writer.add_summary(summary, step) # validation predicted_matrix = np.zeros(shape=len(val_x), dtype=np.int) for batch_index in range(0, len(val_x), BATCH_SIZE): if batch_index + BATCH_SIZE < len(val_x): data = val_x[batch_index:batch_index + BATCH_SIZE] label = val_y[batch_index:batch_index + BATCH_SIZE] predicted_matrix[batch_index:batch_index + BATCH_SIZE] = sess.run(predict, feed_dict={ x: data, y: label, phase_train: False }) else: data = val_x[batch_index:len(val_x)] label = val_y[batch_index:len(val_y)] predicted_matrix[batch_index:len(val_y)] = sess.run( predict, feed_dict={ x: data, y: label, phase_train: False }) correct = (np.argmax(val_y, axis=1) == predicted_matrix) acc = correct.mean() * 100 correct_numbers = correct.sum() mes = "\nValidation Accuracy: {:.2f}% ({}/{})\n" print(mes.format(acc, correct_numbers, len(val_y))) if acc > global_val_acc: saver.save( sess, SAVE_PATH + str(e) + '_' + str(args.lr) + '_acc:' + str(acc) + '.ckpt') global_test_acc = acc print( "\nReach a better validation accuracy at epoch: {:} with {:.2f}%" .format(e + 1, acc)) print("Saving at ... %s" % SAVE_PATH + str(e + 1) + '_' + str(args.lr) + '_acc:' + str(acc) + '.ckpt\n') train_writer.close() # test section predicted_matrix = np.zeros(shape=len(test_images), dtype=np.int) for batch_index in range(0, len(test_images), BATCH_SIZE): if batch_index + BATCH_SIZE < len(test_images): data = test_images[batch_index:batch_index + BATCH_SIZE] label = test_labels[batch_index:batch_index + BATCH_SIZE] predicted_matrix[batch_index:batch_index + BATCH_SIZE] = sess.run( predict, feed_dict={ x: data, y: label, phase_train: False }) else: data = test_images[batch_index:len(test_images)] label = test_labels[batch_index:len(test_labels)] predicted_matrix[batch_index:len(test_labels)] = sess.run( predict, feed_dict={ x: data, y: label, phase_train: False }) correct = (np.argmax(test_labels, axis=1) == predicted_matrix) acc = correct.mean() * 100 correct_numbers = correct.sum() mes = "\nTest Accuracy: {:.2f}% ({}/{})" print(mes.format(acc, correct_numbers, len(test_labels))) saver.save(sess, SAVE_PATH + 'test' + '_acc:' + str(acc) + '.ckpt') global_test_acc = acc print( "\nReach a better testing accuracy at epoch: {:} with {:.2f}%".format( e, acc)) print("Saving at ... %s" % SAVE_PATH + 'test' + '_acc:' + str(acc) + '.ckpt') sess.close()
def main(): print('==> Preparing data..') transforms_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor() ]) transforms_test = transforms.Compose([transforms.ToTensor()]) mode = {'train': True, 'test': True} rate = np.squeeze([0.2, 0.5, 0.8]) for iter in range(rate.size): model = ResNet(num_classes=args.num_class) if use_gpu: model = model.cuda() model = torch.nn.DataParallel(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) image_datasets = { 'train': Cifar10(root='./datasets', train=True, transform=None, download=True), 'test': Cifar10(root='./datasets', train=False, transform=None, download=True) } trainData = image_datasets['train'].train_data trainLabel = image_datasets['train'].train_labels testData = image_datasets['test'].test_data testLabel = image_datasets['test'].test_labels true_label = np.squeeze(trainLabel).copy() trainLabel, actual_noise_rate = GN.noisify( nb_classes=args.num_class, train_labels=np.squeeze(trainLabel), noise_type='symmetric', noise_rate=rate[iter]) trainData = np.array(trainData) trainLabel = np.squeeze(trainLabel) testData = np.array(testData) testLabel = np.squeeze(testLabel) train_data = DT(trainData=trainData, trainLabel=trainLabel, transform=transforms_train) train_data_test = DT(trainData=trainData, trainLabel=trainLabel, transform=transforms_test) test_data = DT(trainData=testData, trainLabel=testLabel, transform=transforms_test) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) train_loader_test = torch.utils.data.DataLoader( train_data_test, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) train(model, optimizer, train_loader, test_loader, train_loader_test, true_label, rate[iter])
num_classes = 100 trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.bs, shuffle=True, num_workers=2) testloader = torch.utils.data.DataLoader(testset, batch_size=args.bs, shuffle=False, num_workers=2) # Model print('\n[Phase 2] : Model setup') print('| Building net type [' + args.net + ']...') if args.net == 'resnet34': net = ResNet(34, num_classes, args.stoch_depth) else: print('Error : Network should be either [ResNet34]') sys.exit(0) checkpoint = torch.load(args.pretrained_model_path) net.load_state_dict(checkpoint['model']) net.to(device) # Training print('\n[Phase 3] : Training model') print('| Training Epochs = ' + str(args.num_epochs)) print('| Initial Learning Rate = ' + str(args.lr)) optimizer = optim.SGD(net.parameters(), lr=args.lr,
def try_load_model(save_dir, step_ckpt=-1, return_new_model=True, verbose=True): """ Tries to load a model from the provided directory, otherwise returns a new initialized model. :param save_dir: directory with checkpoints :param step_ckpt: step of checkpoint where to resume the model from :param verbose: true for printing the model summary :return: """ import tensorflow as tf tf.compat.v1.enable_v2_behavior() if configs.config_values.model == 'baseline': configs.config_values.num_L = 1 # initialize return values model_name = configs.config_values.model if model_name == 'resnet': model = ResNet(filters=configs.config_values.filters, activation=tf.nn.elu) elif model_name in ['refinenet', 'baseline']: model = RefineNet(filters=configs.config_values.filters, activation=tf.nn.elu) elif model_name == 'refinenet_twores': model = RefineNetTwoResidual(filters=configs.config_values.filters, activation=tf.nn.elu) optimizer = tf.keras.optimizers.Adam( learning_rate=configs.config_values.learning_rate) step = 0 # if resuming training, overwrite model parameters from checkpoint if configs.config_values.resume: if step_ckpt == -1: print("Trying to load latest model from " + save_dir) checkpoint = tf.train.latest_checkpoint(save_dir) else: print("Trying to load checkpoint with step", step_ckpt, " model from " + save_dir) onlyfiles = [ f for f in os.listdir(save_dir) if os.path.isfile(os.path.join(save_dir, f)) ] r = re.compile(".*step_{}-.*".format(step_ckpt)) name_all_checkpoints = sorted(list(filter(r.match, onlyfiles))) # Retrieve name of the last checkpoint with that number of steps name_ckpt = name_all_checkpoints[-1][:-6] checkpoint = save_dir + name_ckpt if checkpoint is None: print("No model found.") if return_new_model: print("Using a new model") else: print("Returning None") model = None optimizer = None step = None else: step = tf.Variable(0) ckpt = tf.train.Checkpoint(step=step, optimizer=optimizer, model=model) ckpt.restore(checkpoint) step = int(step) print("Loaded model: " + checkpoint) evaluate_print_model_summary(model, verbose) return model, optimizer, step
def train(self): ''' Creates a deep model (e.g. Resnet or SE based) and trains it using the given dataset (e.g. CIFAR10) ''' # Define the deep model, (e.g. Resnet, SE, etc) net = ResNet(self.params) # Object instances for computing confusion matrix & mean class accuracy for train & test data self.train_accuracy = Accuracy(self.num_classes) self.eval_accuracy = Accuracy(self.num_classes) # Total number of the defined model self.net_total_params = sum(p.numel() for p in net.parameters()) print('Total number of model parameters: {}'.format( self.net_total_params)) # Object for logging training info (e.g. train loss & accuracy and evaluation accuracy) log_stats = LogStats(self.params, total_params=self.net_total_params) # Find the device to run the model on (if there is a gpu use that) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print(" The device selected is {}".format(device)) net = net.to(device) # Set up the model in train mode net.train() # Define the multi-gpu training if needed if device != 'cpu' and torch.cuda.device_count() > 1: net = nn.DataParallel(net, device_ids=self.params.DEVICES) # Define the cross entropy loss (combines softmax + negative-log-likelihood loss) if torch.cuda.is_available(): loss_fn = torch.nn.CrossEntropyLoss().cuda() else: loss_fn = torch.nn.CrossEntropyLoss() # Optimizer defined optim = AdjustableOptim(self.params, net, self.data_size) start_epoch = 0 loss_sum = 0 named_params = list(net.named_parameters()) grad_norm = np.zeros(len(named_params)) # Define multi-thread dataloader dataloader, eval_dataloader = self.create_dataloaders() train_time = 0 eval_time = 0 # Training script for epoch in range(start_epoch, self.params.MAX_EPOCH): # Externally shuffle if self.params.SHUFFLE_MODE == 'external': self.dataset_train.shuffle() time_start = time.time() print('') # Learning rate decay (the lr update is like the original Resnet paper) optim.scheduler_step() # Iteration for step, (img, label, idx) in enumerate(dataloader): optim.zero_grad() img = img.to(device) label = label.to(device) # Feed forward pred = net(img) # Loss computation & backward loss = loss_fn(pred, label) # if orthogonality of SE weights is set to True if self.params.ORTHOGONAL != "none": loss += self.params.ORTH_WEIGHT * net.orthogonal_loss loss.backward() # Optimize (updates weights) optim.step() # loss value added to total loss loss_sum += loss.item() # Train accuracy calculation train_acc = self.train_accuracy.per_class_accuracy_cumulative( pred, label) loss_np = loss.item() / self.params.BATCH_SIZE if self.params.VERBOSE: print( "\r[epoch %2d][step %4d/%4d][%s] loss: %.4f, acc: %.3f, lr: %.2e" % (epoch + 1, step, int(self.data_size / self.params.BATCH_SIZE), 'train', loss_np, train_acc, optim.lr()), end=' ') train_time += int(time.time() - time_start) eval_acc = 0.0 # Eval after every epoch if self.dataset_eval is not None: time_start = time.time() eval_acc = self.eval(net, eval_dataloader, epoch) eval_time += int(time.time() - time_start) # Updates log info for train & test accuracies & losses log_stats.update_stats(epoch=epoch, epoch_loss=loss_np, epoch_acc=[train_acc, eval_acc]) # Reset all computed variables of logs for next epoch self.train_accuracy.reset() self.eval_accuracy.reset() # print('') epoch_finish = epoch + 1 print("\ntrain acc: {}, eval acc: {}".format(train_acc, eval_acc)) loss_sum = 0 #self.save_outputs(net, dataloader, device) # Keeps a log of total training time & eval time in file "output/stats_logs/all_runs.txt" log_stats.log_finalize(train_time, eval_time)