def augmentation(self, img, shape): augset = Augment(self.args, self.mode) if self.args.task in ['v1', 'v2']: img_list = [] for _ in range(2): # query, key aug_img = tf.identity(img) if self.args.task == 'v1': aug_img = augset._augmentv1(aug_img, shape) # moco v1 else: radius = np.random.choice([3, 5]) aug_img = augset._augmentv2(aug_img, shape, (radius, radius)) # moco v2 img_list.append(aug_img) return img_list else: return augset._augment_lincls(img, shape)
def augmentation(img, label, shape): augment = Augment(args, mode) img = augment(img, shape) # one-hot encodding label = tf.one_hot(label, args.classes) return img, label
def __init__(self, args, task, mode, datalist, batch_size, num_workers=1, shuffle=True): self.args = args self.task = task self.mode = mode self.datalist = datalist self.batch_size = batch_size self.num_workers = num_workers self.shuffle = shuffle self.augset = Augment(self.args, self.mode) self.dataloader = self._dataloader()
def augmentation(self, img, shape): augset = Augment(self.args, self.mode) if self.args.task == 'pretext': img_dict = {} offset_list = [] size_list = [] isflip_list = [] prob_list = [{ 'p_blur': 1., 'p_solar': 0. }, { 'p_blur': .1, 'p_solar': .2 }] for i, view in enumerate(['view1', 'view2']): # view1, view2 aug_img = tf.identity(img) aug_img, offset, size, isflip = augset._augment_pretext( aug_img, shape, **prob_list[i]) img_dict[view] = aug_img offset_list.append(offset) size_list.append(size) isflip_list.append(isflip) A_dict = self.get_distance_A(offset_list, size_list, isflip_list) img_dict.update(A_dict) if self.mode == 'train': return img_dict else: return img_dict, { 'img': img, 'offset_list': offset_list, 'size_list': size_list, 'isflip_list': isflip_list } else: raise NotImplementedError('lincls is not implemented yet.')
def __init__(self, args, logger, num_workers=1, **kwargs): super(BarlowTwins, self).__init__(**kwargs) self.args = args self._num_workers = num_workers norm = 'bn' if self._num_workers == 1 else 'syncbn' # preprocess augment = Augment(args) self.preprocess = tf.keras.Sequential(name='preprocess') self.preprocess.add(Lambda(lambda x: augment._random_color_jitter(x))) self.preprocess.add(Lambda(lambda x: augment._random_grayscale(x))) if self.args.dataset == 'imagenet': self.preprocess.add( Lambda(lambda x: augment._random_gaussian_blur(x))) self.preprocess.add(Lambda(lambda x: augment._random_hflip(x))) self.preprocess.add(Lambda(lambda x: augment._standardize(x))) # encoder DEFAULT_ARGS = { "use_bias": self.args.use_bias, "kernel_regularizer": l2(self.args.weight_decay) } FAMILY_DICT[self.args.backbone].Conv2D = _conv2d(**DEFAULT_ARGS) FAMILY_DICT[self.args.backbone].BatchNormalization = _batchnorm( norm=norm) FAMILY_DICT[self.args.backbone].Dense = _dense(**DEFAULT_ARGS) DEFAULT_ARGS.update({'norm': norm}) # for resnet18 self.encoder = MODEL_DICT[self.args.backbone]( include_top=False, weights=None, input_shape=(self.args.img_size, self.args.img_size, 3), pooling='avg', **DEFAULT_ARGS if self.args.backbone == 'resnet18' else {}) DEFAULT_ARGS.pop('norm') # for resnet18 # projector num_mlp = 3 self.projector = tf.keras.Sequential(name='projector') for i in range(num_mlp - 1): self.projector.add( _dense(**DEFAULT_ARGS)(self.args.proj_dim, name=f'proj_fc{i+1}')) self.projector.add( _batchnorm(norm=norm)(epsilon=1.001e-5, name=f'proj_bn{i+1}')) self.projector.add(Activation('relu', name=f'proj_relu{i+1}')) self.projector.add( _dense(**DEFAULT_ARGS)(self.args.proj_dim, name=f'proj_fc{i+2}'))
def augment(in_path: str, anomaly_type: str, iterations: int, block_size: int = 1024) -> (np.ndarray, np.ndarray): global BLOCKSIZE PLT_CHANNELS = list(range(12)) a = Augment(use_path=True, path=in_path, anomaly_type=anomaly_type) ANOMALY = (anomaly_type, "", ".") # ANOMALY type, "", out path ITERATIONS = iterations try: BLOCKSIZE = block_size except: BLOCKSIZE = None ecg, ann = main(PLT_CHANNELS, ITERATIONS, a, ANOMALY, blocksize=BLOCKSIZE, ret=True) return np.array(ecg), np.array(ann)
def trainModel(): # Parse args parser = argparse.ArgumentParser(description='Train the CNN') parser.add_argument('--expt_dir', default='./logs', help='save dir for experiment logs') parser.add_argument('--train', default='./data', help='path to training set') parser.add_argument('--val', default='./data', help='path to validation set') parser.add_argument('--test', default='./data', help='path to test set') parser.add_argument('--save_dir', default='./models', help='path to save model') parser.add_argument('--arch', default='models/cnn.json', help = 'path to model architecture') parser.add_argument('--model_name', default = 'model', help = 'name of the model to save logs, weights') parser.add_argument('--lr', default = 0.001, help = 'learning rate') parser.add_argument('--init', default = '1', help = 'initialization') parser.add_argument('--batch_size', default = 20, help = 'batch_size') args = parser.parse_args() # Load data train_path, valid_path, test_path = args.train, args.val, args.test logs_path = args.expt_dir model_path, model_name = args.save_dir, args.model_name model_path = os.path.join(model_path, model_name) if not os.path.isdir(model_path): os.mkdir(model_path) lr, batch_size, init = float(args.lr), int(args.batch_size), int(args.init) data = loadData(train_path, valid_path, test_path) train_X, train_Y, valid_X, valid_Y, test_X, test_Y = data['train']['X'], data['train']['Y'],\ data['valid']['X'], data['valid']['Y'],\ data['test']['X'], data['test']['Y'], # Logging train_log_name = '{}.train.log'.format(model_name) valid_log_name = '{}.valid.log'.format(model_name) train_log = setup_logger('train-log', os.path.join(logs_path, train_log_name)) valid_log = setup_logger('valid-log', os.path.join(logs_path, valid_log_name)) # Train num_epochs = 500 num_batches = int(float(train_X.shape[0]) / batch_size) steps = 0 patience = 100 early_stop=0 model = getModel(lr) loss_history = [np.inf] for epoch in range(num_epochs): print 'Epoch {}'.format(epoch) steps = 0 indices = np.arange(train_X.shape[0]) np.random.shuffle(indices) train_X, train_Y = train_X[indices], train_Y[indices] for batch in range(num_batches): start, end = batch * batch_size, (batch + 1) * batch_size x, y = Augment(train_X[range(start, end)]).batch, train_Y[range(start, end)] model.fit(x.reshape((-1, 1, 28, 28)), y, batch_size = batch_size, verbose = 0) steps += batch_size if steps % train_X.shape[0] == 0 and steps != 0: train_loss, train_acc = model.evaluate(train_X.reshape((-1, 1, 28, 28)), train_Y) train_log.info('Epoch {}, Step {}, Loss: {}, Accuracy: {}, lr: {}'.format(epoch, steps, train_loss, train_acc, lr)) valid_loss, valid_acc = model.evaluate(valid_X.reshape((-1, 1, 28, 28)), valid_Y) valid_log.info('Epoch {}, Step {}, Loss: {}, Accuracy: {}, lr: {}'.format(epoch, steps, valid_loss, valid_acc, lr)) if valid_loss < min(loss_history): save_path = os.path.join(model_path, 'model') model.save(save_path) early_stop = 0 early_stop += 1 if (early_stop >= patience): print "No improvement in validation loss for " + str(patience) + " steps - stopping training!" print("Optimization Finished!") return 1 loss_history.append(valid_loss) print("Optimization Finished!")
def trainModel(): # Parse args parser = argparse.ArgumentParser(description='Train the CNN') parser.add_argument('--expt_dir', default='./logs', help='save dir for experiment logs') parser.add_argument('--train', default='./data', help='path to training set') parser.add_argument('--val', default='./data', help='path to validation set') parser.add_argument('--test', default='./data', help='path to test set') parser.add_argument('--save_dir', default='./models', help='path to save model') parser.add_argument('--arch', default='models/cnn.json', help='path to model architecture') parser.add_argument('--model_name', default='model', help='name of the model to save logs, weights') parser.add_argument('--lr', default=0.001, help='learning rate') parser.add_argument('--init', default='1', help='initialization') parser.add_argument('--batch_size', default=20, help='batch_size') args = parser.parse_args() # Load data train_path, valid_path, test_path = args.train, args.val, args.test logs_path = args.expt_dir model_path, model_arch, model_name = args.save_dir, args.arch, args.model_name model_path = os.path.join(model_path, model_name) if not os.path.isdir(model_path): os.mkdir(model_path) lr, batch_size, init = float(args.lr), int(args.batch_size), int(args.init) data = loadData(train_path, valid_path, test_path) train_X, train_Y, valid_X, valid_Y, test_X, test_Y = data['train']['X'], data['train']['Y'],\ data['valid']['X'], data['valid']['Y'],\ data['test']['X'], data['test']['Y'], # Load architecture arch = loadArch(model_arch) # Logging train_log_name = '{}.train.log'.format(model_name) valid_log_name = '{}.valid.log'.format(model_name) train_log = setup_logger('train-log', os.path.join(logs_path, train_log_name)) valid_log = setup_logger('valid-log', os.path.join(logs_path, valid_log_name)) # GPU config gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0) # Train num_epochs = 100 num_batches = int(float(train_X.shape[0]) / batch_size) steps = 0 patience = 50 early_stop = 0 with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as session: model = CNN(arch, session, logs_path, init, lr) loss_history = [np.inf] for epoch in range(num_epochs): print 'Epoch {}'.format(epoch) steps = 0 indices = np.arange(train_X.shape[0]) np.random.shuffle(indices) train_X, train_Y = train_X[indices], train_Y[indices] for batch in range(num_batches): start, end = batch * batch_size, (batch + 1) * batch_size x, y = Augment(train_X[range(start, end)]).batch, train_Y[range( start, end)] try: model.step(x, y) except MemoryError: print 'Memory error in step' exit() steps += batch_size if steps % train_X.shape[0] == 0 and steps != 0: try: train_loss, train_acc = testModel( model, train_X, train_Y, batch_size) except MemoryError: print 'Memory error in test for train' exit() train_log.info( 'Epoch {}, Step {}, Loss: {}, Accuracy: {}, lr: {}'. format(epoch, steps, train_loss, train_acc, model.lr)) try: valid_loss, valid_acc = testModel( model, valid_X, valid_Y, batch_size) except MemoryError: print 'Memory error in test for valid' exit() valid_log.info( 'Epoch {}, Step {}, Loss: {}, Accuracy: {}, lr: {}'. format(epoch, steps, valid_loss, valid_acc, model.lr)) if valid_loss < min(loss_history): save_path = os.path.join(model_path, 'model') model.save(save_path) early_stop = 0 early_stop += 1 if (early_stop >= patience): print "No improvement in validation loss for " + str( patience) + " steps - stopping training!" print("Optimization Finished!") return 1 loss_history.append(valid_loss) print("Optimization Finished!")
class DataLoader: def __init__(self, args, task, mode, datalist, batch_size, num_workers=1, shuffle=True): self.args = args self.task = task self.mode = mode self.datalist = datalist self.batch_size = batch_size self.num_workers = num_workers self.shuffle = shuffle self.augset = Augment(self.args, self.mode) self.dataloader = self._dataloader() def __len__(self): return len(self.datalist) def fetch_dataset(self, path, y=None): x = tf.io.read_file(path) if y is not None: return tf.data.Dataset.from_tensors((x, y)) return tf.data.Dataset.from_tensors(x) def augmentation(self, img, shape): if self.task == 'pretext': img_list = [] for _ in range(2): # query, key aug_img = tf.identity(img) aug_img = self.augset._augment_simsiam(aug_img, shape) img_list.append(aug_img) return img_list else: return self.augset._augment_lincls(img, shape) def dataset_parser(self, value, label=None): if self.args.dataset == 'imagenet': shape = tf.image.extract_jpeg_shape(value) img = tf.io.decode_jpeg(value, channels=3) elif self.args.dataset == 'cifar10': shape = (32, 32, 3) img = value if label is None: # pretext return self.augmentation(img, shape) else: # lincls inputs = self.augmentation(img, shape) # labels = tf.one_hot(label, self.args.classes) return (inputs, label) def _dataloader(self): self.imglist = self.datalist[:, 0].tolist() if self.task == 'pretext': dataset = tf.data.Dataset.from_tensor_slices(self.imglist) else: self.labellist = self.datalist[:, 1].tolist() dataset = tf.data.Dataset.from_tensor_slices( (self.imglist, self.labellist)) dataset = dataset.repeat() if self.shuffle: dataset = dataset.shuffle(len(self.datalist)) if self.args.dataset == 'imagenet': dataset = dataset.interleave(self.fetch_dataset, num_parallel_calls=AUTO) dataset = dataset.map(self.dataset_parser, num_parallel_calls=AUTO) dataset = dataset.batch(self.batch_size) dataset = dataset.prefetch(AUTO) return dataset
def main(): args = parse_args() if args.augment.lower() == 'none': args.augment = None device = to_device(args.gpu) args.seed = args.seed + args.fold np.random.seed(args.seed) torch.manual_seed(args.seed) data = load_data(args.data) num_features = data.num_features num_classes = data.num_classes trn_graphs, test_graphs = load_data_fold(args.data, args.fold) trn_loader = DataLoader(trn_graphs, batch_size=256) test_loader = DataLoader(test_graphs, batch_size=256) if args.iters == 'auto': args.iters = math.ceil(len(trn_graphs) / args.batch_size) else: args.iters = int(args.iters) model = GIN(num_features, num_classes, args.units, args.layers, args.dropout) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5) loss_func = SoftCELoss() augment = Augment(trn_graphs, args.augment, aug_size=args.aug_size) if args.verbose > 0: print(' epochs\t loss\ttrn_acc\tval_acc') out_list = dict(trn_loss=[], trn_acc=[], test_loss=[], test_acc=[]) for epoch in range(args.epochs): model.train() loss_sum = 0 for _ in range(args.iters): idx = torch.randperm(len(trn_graphs))[:args.batch_size] data = augment(idx).to(device) output = model(data.x, data.edge_index, data.batch) loss = loss_func(output, data.y) optimizer.zero_grad() loss.backward() optimizer.step() loss_sum += loss.item() if args.schedule: scheduler.step(epoch) trn_loss = loss_sum / args.iters trn_acc = eval_acc(model, trn_loader, device) test_loss = eval_loss(model, loss_func, test_loader, device) test_acc = eval_acc(model, test_loader, device) out_list['trn_loss'].append(trn_loss) out_list['trn_acc'].append(trn_acc) out_list['test_loss'].append(test_loss) out_list['test_acc'].append(test_acc) if args.verbose > 0 and (epoch + 1) % args.verbose == 0: print( f'{epoch + 1:7d}\t{trn_loss:7.4f}\t{trn_acc:7.4f}\t{test_acc:7.4f}' ) if args.print_all: out = {arg: getattr(args, arg) for arg in vars(args)} out['all'] = out_list print(json.dumps(out)) else: print(f'Training accuracy: {out_list["trn_acc"][-1]}') print(f'Test accuracy: {out_list["test_acc"][-1]}')
#!/usr/bin/env python from __future__ import print_function from augment import Augment # augment = Augment() augment.do_grayscale() augment.do_flip() augment.do_resize() augment.do_augment() augment.do_clean()