def miniset(paths, train=True, batch_size=BS): if type(paths) == str: paths = [paths] transf = transform_train if train else transform_test ifd_all = None for ipath, path in enumerate(paths): ifd = ImageFolderDataset(path, flag=0) # use folder names ("synsets") to map the loader class to our class index (in "classes") for ii, (f, cl) in enumerate(ifd.items): icl, ncl = folder_to_class(ifd.synsets[cl]) ifd.items[ii] = (f, icl) ifd.synsets = list(classes) #ifd = ifd.transform_first(transf) if ipath == 0: ifd_all = ifd else: ifd_all.items += ifd.items ifd_all = ifd_all.transform_first(transf) loader = DataLoader(ifd_all, shuffle=True, batch_size=batch_size) return ifd_all, loader
def get_folder_data(train_path, val_path, data_shape, batch_size, num_workers=os.cpu_count()): train_dataset = ImageFolderDataset(train_path) val_dataset = ImageFolderDataset(val_path) train_transformer = gluon.data.vision.transforms.Compose([ transforms.RandomFlipLeftRight(), transforms.RandomResizedCrop(data_shape, scale=(0.5, 1.0)), transforms.RandomBrightness(0.5), transforms.RandomHue(0.1), transforms.Resize(data_shape), transforms.ToTensor() ]) val_transformer = gluon.data.vision.transforms.Compose([ transforms.Resize(data_shape), transforms.ToTensor() ]) train_dataloader = data.DataLoader(train_dataset.transform_first(train_transformer), batch_size=batch_size, shuffle=True, last_batch='rollover', num_workers=num_workers) val_dataloader = data.DataLoader(val_dataset.transform_first(val_transformer), batch_size=batch_size, shuffle=True, last_batch='rollover', num_workers=num_workers) return train_dataloader, val_dataloader
def make_dataset_viewer(reldir): import urllib.parse dir = reldir[:-1] if reldir.endswith('/') else reldir class ListDict(dict): def __missing__(self, key): self[key] = [] return self[key] ifd = ImageFolderDataset(dir, flag=0) class_images = ListDict() for ii, (f, cl) in enumerate(ifd.items): icl, ncl = folder_to_class(ifd.synsets[cl]) class_images[icl].append(f) base = dir.split('/')[-1] def relpath(im): return urllib.parse.quote(im[im.index(base):]) def imgs(l): return '\n'.join((f'<img src="{relpath(im)}"/>' for im in l)) body = ''.join((f""" <div>Class {icl} <span>{classes[icl]}</span> ({len(class_images[icl])})</div> {imgs(class_images[icl])} """ for icl in sorted(class_images.keys()))) ofname = dir + '.html' with open(ofname, 'w') as f: print(""" <html> <head> <style> div span { border: 1px solid black; background: gray; font-size: 150%; padding: 0.2em; } </style> </head> <body> <div>All """ + str(sum([len(l) for l in class_images.values()])) + """</div> """ + body + """ </body> </html> """, file=f) print("Generated:", ofname)
# resize the shorter edge to 224, the longer edge will be greater or equal to 224 resized = mx.image.resize_short(image, TARGET_SIZE) # center and crop an area of size (224,224) cropped, crop_info = mx.image.center_crop(resized, SIZE) # transpose the channels to be (3,224,224) transposed = nd.transpose(cropped, (2, 0, 1)) return transposed, label ################################################ # Loading Images from folders ################################################ dataset_train = ImageFolderDataset(root=train_data_dir, transform=transform) dataset_test = ImageFolderDataset(root=validation_data_dir, transform=transform) dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS) # last_batch='discard' (removed for testing) dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, # last_batch='discard', shuffle=True, num_workers=NUM_WORKERS) print("Train dataset: {} images, Test dataset: {} images".format(len(dataset_train), len(dataset_test))) ################################################ # Check categories - for debuging only ################################################ categories = dataset_train.synsets NUM_CLASSES = len(categories)
net = vision.resnet18_v2(pretrained=True, ctx=ctx) net = net.features def transform(image, label): resized = mx.image.resize_short(image, SIZE[0]).astype('float32') cropped, crop_info = mx.image.center_crop(resized, SIZE) cropped /= 255. normalized = mx.image.color_normalize(cropped, mean=MEAN_IMAGE, std=STD_IMAGE) transposed = nd.transpose(normalized, (2,0,1)) return transposed, label empty_folder = tempfile.mkdtemp() # Create an empty image Folder Data Set dataset = ImageFolderDataset(root=empty_folder, transform=transform) print dataset def download_files(): for asin, url in data_image.iteritems(): path = os.path.join(images_path, asin+'.jpg') if not os.path.isfile(path): f = urllib2.urlopen(url) print images_path+asin+'.jpg' with open(images_path+asin+'.jpg', "wb") as local_file: local_file.write(f.read()) download_files()
resized = mx.image.resize_short(image, TARGET_SIZE) # center and crop an area of size (224,224) cropped, crop_info = mx.image.center_crop(resized, SIZE) # transpose the channels to be (3,224,224) transposed = nd.transpose(cropped, (2, 0, 1)) return transposed, label ################################################ # Loading Images from folders ################################################ dataset_test = ImageFolderDataset(root=test_data_dir, transform=transform) dataloader_test = DataLoader( dataset_test, batch_size=BATCH_SIZE, # last_batch='discard', shuffle=True, num_workers=NUM_WORKERS) print("Test dataset: {} images".format(len(dataset_test))) ################################################ # Check categories - for debuging only ################################################ categories = dataset_test.synsets NUM_CLASSES = len(categories) print(categories)
def main(train_list, val_list, model, exp, saved_model, batch_size, optimizer, nb_epochs, augment, max_lr, min_lr, loss_function, train_all, nb_frames, eager, params=None, **kwargs): print("Unused arguments:", kwargs) setname = train_list.split(os.sep)[0] # Timestamp to name experiment folder xptime = strftime("%Y-%m-%d_%Hh%Mm%Ss", gmtime()) xp_folder = "experiments/%s-%s-%s_%s" % (setname, model, exp, xptime) # Make folder mkdir_p(xp_folder) mkdir_p(os.path.join(xp_folder, 'checkpoints')) mkdir_p(os.path.join(xp_folder, 'tb')) print("\nSaving experiment data to:", xp_folder) # Save command (as well as possible) with open(os.path.join(xp_folder, 'command.sh'), "w") as f: command = " ".join(sys.argv[:]) + "\n" f.write(command) # Save employed parameters for future reference if params is not None: write_params(os.path.join(xp_folder, 'params.json'), params) ############# # Callbacks # ############# # Helper: Save the model. ckpt_fmt = os.path.join( xp_folder, 'checkpoints', model + '-' + exp + '.{epoch:03d}-loss{val_loss:.3f}-acc{val_acc:.3f}.hdf5') checkpointer = ModelCheckpoint(filepath=ckpt_fmt, verbose=1, save_best_only=True, monitor='val_acc') # Helper: TensorBoard tb = HistoryKeeper(logdir=os.path.join(xp_folder), keys=['val_acc', 'val_loss', 'train_time', 'val_time']) # Helper: Stop when we stop learning. # early_stopper = EarlyStopper(patience=15) # Helper: Terminate when finding a NaN loss nan_term = TerminateOnNaN() callbacks = [tb, checkpointer, nan_term] ############# ############# # Loading # ############# if augment: augmenter = default_augmenter(strip_size=4) else: augment = False augmenter = None # Dataset classes transform = lambda data, label: (augmenter(preprocess(data)), label) train_data = ImageFolderDataset(train_list, transform=transform) val_data = ImageFolderDataset(val_list) img_shape = train_data[0][0].shape # Train loader train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=10) nb_samples = len(train_data) # loader should provide the number of sampĺes # Validation loader val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=10) nb_validation = len( val_data) # loader should provide the number of sampĺes # Compute number of steps steps_per_epoch = math.ceil(nb_samples / batch_size) validation_steps = math.ceil(nb_validation / batch_size) # The model net = ResearchModels(8, model, saved_model, input_shape=img_shape, train_all=train_all).model # A little more verbosity print("************************************") if train_all: print("Train all layers.") print("Max lr:", max_lr, " Min lr:", min_lr) print("Batch size:", batch_size) print(nb_samples, "training samples,", steps_per_epoch, "steps per epoch") print(nb_validation, "validation samples,", validation_steps, "validation steps") print("Optimizer:", optimizer) if augment: print("Using data augmentation") else: print("WARNING: Not using data augmentation") print("************************************") ############################ # Loss and Optimization # ############################ trainer = gluon.Trainer(net.collect_params(), optimizer, {'learning_rate': max_lr}) if loss_function == 'categorical_crossentropy': loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() loss_fn.hybridize() ############ # Training # ############ progress_desc = "Epoch %03d - acc %.3f - loss %.3f " acc = Accuracy() loss = Loss() start_time = time() for epoch in range(1, nb_epochs + 1): nb_batches = 0 tic = time() acc.reset() loss.reset() train_time = 0 t = tqdm(train_loader, unit='batch') for data, label in t: size = data.shape[0] # print(data.shape) data = data.copyto(mx.gpu(0)) label = label.copyto(mx.gpu(0)) start = time() with autograd.record(): output = net(data) l = loss_fn(output, label) l.backward() end = time() train_time += end - start # update parameters trainer.step(size) acc.update(preds=output, labels=label) loss.update(preds=l, _=None) nb_batches += 1 t.set_description(progress_desc % (epoch, acc.get()[1], loss.get()[1])) train_loss = loss.get()[1] train_acc = acc.get()[1] acc.reset() val_time = 0 # calculate validation accuracy tval = tqdm(val_loader, leave=False, desc='Running validation', unit='batch') for data, label in tval: data = data.copyto(mx.gpu(0)) label = label.copyto(mx.gpu(0)) # Compute outputs start = time() output = net(data) l = loss_fn(output, label) end = time() val_time += end - start # Compute metrics loss.update(preds=l, _=None) acc.update(preds=output, labels=label) val_loss = loss.get()[1] val_acc = acc.get()[1] print( "Epoch %d: loss %.3f, acc %.3f, val_loss %.3f, val_acc %.3f, in %.1f sec" % (epoch, train_loss, train_acc, val_loss, val_acc, time() - tic)) print( "--------------------------------------------------------------------------------" ) stop = False train_info = { 'epoch': epoch, 'loss': train_loss, 'acc': train_acc, 'val_loss': val_loss, 'val_acc': val_acc, 'train_time': train_time, 'val_time': val_time } for cb in callbacks: if cb(net, train_info): stop = True if stop: break print() hours, rem = divmod(time() - start_time, 3600) days, hours = divmod(hours, 24) minutes, seconds = divmod(rem, 60) print("%d training epochs in %dd, %dh%dm%.2fs." % (epoch, int(days), int(hours), int(minutes), seconds))
resize_fn = partial(resize_worker, sizes=sizes) files = sorted(dataset.items, key=lambda x: x[0]) files = [(i, file) for i, (file, label) in enumerate(files)] total = 0 with multiprocessing.Pool(n_worker) as pool: for i, imgs in tqdm(pool.imap_unordered(resize_fn, files)): for size, img in zip(sizes, imgs): key = f'{size}-{str(i).zfill(5)}'.encode('utf-8') transaction.put(key, img) total += 1 transaction.put('length'.encode('utf-8'), str(total).encode('utf-8')) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--out', type=str) parser.add_argument('--n_worker', type=int, default=8) parser.add_argument('--path', type=str) args = parser.parse_args() imgset = ImageFolderDataset(args.path) with lmdb.open(args.out, map_size=1024**4, readahead=False) as env: with env.begin(write=True) as txn: prepare(txn, imgset, args.n_worker)
def train(current_host, hosts, num_gpus, log_interval, channel_input_dirs, batch_size, epochs, learning_rate, momentum, wd, resnet_size): print("Using Resnet {} model".format(resnet_size)) model_options = { '18': models.resnet18_v2, '34': models.resnet34_v2, '50': models.resnet50_v2, '101': models.resnet101_v2, '152': models.resnet152_v2 } if resnet_size not in model_options: raise Exception('Resnet size must be one of 18, 34, 50, 101, or 152') if len(hosts) == 1: kvstore = 'device' if num_gpus > 0 else 'local' else: kvstore = 'dist_device_sync' if num_gpus > 0: ctx = mx.gpu() else: ctx = mx.cpu() print(ctx) selected_model = model_options[resnet_size] pretrained_net = selected_model(ctx=ctx, pretrained=True) net = selected_model(ctx=ctx, pretrained=False, classes=2) # Changed classes to 2 net.features = pretrained_net.features part_index = 0 for i, host in enumerate(hosts): if host == current_host: part_index = i break data_dir = channel_input_dirs os.mkdir('/opt/ml/checkpoints') CHECKPOINTS_DIR = '/opt/ml/checkpoints' checkpoints_enabled = os.path.exists(CHECKPOINTS_DIR) train = ImageFolderDataset('/opt/ml/input/data/training/train') test = ImageFolderDataset('/opt/ml/input/data/training/test') transform_func = transforms.Compose([ transforms.Resize(size=(256)), transforms.CenterCrop(size=(224)), transforms.ToTensor(), transforms.Normalize(mean=[0.49139969, 0.48215842, 0.44653093], std=[0.20220212, 0.19931542, 0.20086347]) ]) train_transformed = train.transform_first(transform_func) test_transformed = test.transform_first(transform_func) print("Transformed Training and Test Files") train_data = gluon.data.DataLoader(train_transformed, batch_size=32, shuffle=True, num_workers=1) test_data = gluon.data.DataLoader(test_transformed, batch_size=32, num_workers=1) print("Initialized Batching Operation") net.initialize(mx.init.Xavier(), ctx=ctx) # Trainer is for updating parameters with gradient. criterion = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', optimizer_params={ 'learning_rate': learning_rate, 'momentum': momentum, 'wd': wd }, kvstore=kvstore) metric = mx.metric.Accuracy() net.hybridize() best_loss = 5.0 for epoch in range(epochs): # training loop (with autograd and trainer steps, etc.) cumulative_train_loss = mx.nd.zeros(1, ctx=ctx) training_samples = 0 metric.reset() for batch_idx, (data, label) in enumerate(train_data): data = data.as_in_context(ctx) label = label.as_in_context(ctx) outputs = [] with ag.record(): output = net(data) loss = criterion(output, label) outputs.append(output) loss.backward() trainer.step(data.shape[0]) metric.update(label, output) cumulative_train_loss += loss.sum() training_samples += data.shape[0] train_loss = cumulative_train_loss.asscalar() / training_samples name, train_acc = metric.get() # validation loop cumulative_valid_loss = mx.nd.zeros(1, ctx) valid_samples = 0 metric.reset() for batch_idx, (data, label) in enumerate(test_data): data = data.as_in_context(ctx) label = label.as_in_context(ctx) output = net(data) loss = criterion(output, label) cumulative_valid_loss += loss.sum() valid_samples += data.shape[0] metric.update(label, output) valid_loss = cumulative_valid_loss.asscalar() / valid_samples name, val_acc = metric.get() print( "Epoch {}, training loss: {:.2f}, validation loss: {:.2f}, train accuracy: {:.2f}, validation accuracy: {:.2f}" .format(epoch, train_loss, valid_loss, train_acc, val_acc)) # only save params on primary host if checkpoints_enabled and current_host == hosts[0]: if valid_loss < best_loss: best_loss = valid_loss logging.info('Saving the model, params and optimizer state') net.export(CHECKPOINTS_DIR + "/%.4f-hotdog" % (best_loss), epoch) save(net, CHECKPOINTS_DIR) trainer.save_states(CHECKPOINTS_DIR + '/%.4f-hotdog-%d.states' % (best_loss, epoch)) return net