def log_record(self, config): """ To record the loss value of testing data during training :param config: :return: """ log_dir = "log_{}".format('AI_GAN') tl.files.exists_or_mkdir(log_dir) self.log_all, self.log_all_filename = utils.logging_setup(log_dir) utils.log_config(self.log_all_filename, config)
from utils import spacy_nlp, logging_setup from rasa.nlu.tokenizers.whitespace_tokenizer import WhitespaceTokenizer from rasa.nlu.tokenizers.spacy_tokenizer import SpacyTokenizer from rasa.nlu.tokenizers.mitie_tokenizer import MitieTokenizer from rasa.nlu.tokenizers.convert_tokenizer import ConveRTTokenizer from rasa.nlu.tokenizers.lm_tokenizer import LanguageModelTokenizer from rasa.nlu.utils.hugging_face.hf_transformers import HFTransformersNLP from rasa.nlu.training_data import Message from rasa.nlu.constants import (TEXT, SPACY_DOCS) logger = logging_setup() test_input = "Okay, pick up this yellow banana for me." message = Message(test_input) tk = WhitespaceTokenizer() tokens = tk.tokenize(message, attribute=TEXT) logger.info('Whitespace: {}'.format([t.text for t in tokens])) tk = SpacyTokenizer() message.set(SPACY_DOCS[TEXT], spacy_nlp(test_input)) tokens = tk.tokenize(message, attribute=TEXT) logger.info('SpaCy: {}'.format([t.text for t in tokens])) tk = MitieTokenizer() tokens = tk.tokenize(message, attribute=TEXT) logger.info('Mitie: {}'.format([t.text for t in tokens])) tk = ConveRTTokenizer()
def log_record(self,config): log_dir = "log_{}".format('BrainQuantAI_Part_one') tl.files.exists_or_mkdir(log_dir) self.log_all, self.log_all_filename = utils.logging_setup(log_dir) utils.log_config(self.log_all_filename, config)
from logging import log import os import sys from argparse import ArgumentParser sys.path.append(os.path.realpath("framework")) from nydus_anchor import NydusAnchor from nydusify import Nydusify from utils import logging_setup logging_setup() # alpine:3.10.2 fedora:30 rethinkdb:2.3.6 postgres:13.1 redis:5.0.5 mariadb:10.5 python:3.9 golang:1.12.9 gcc:10.2.0 jruby:9.2.8.0 # perl:5.30 php:7.3.8 pypy:3.5 r-base:3.6.1 drupal:8.7.6 jenkins:2.60.3 node:13.13.0 tomcat:10.0.0-jdk15-openjdk-buster wordpress:5.7 if __name__ == "__main__": parser = ArgumentParser() parser.add_argument( "--sources", nargs="+", type=str, default="", ) parser.add_argument( "--backend", type=str, default="",
parser.add_argument('--label_smooth', type=float, default=0.1, help='label smoothing') parser.add_argument('--gamma', type=float, default=0.97, help='learning rate decay') parser.add_argument('--decay_period', type=int, default=1, help='epochs between two learning rate decays') parser.add_argument('--parallel', action='store_true', default=False, help='data parallelism') parser.add_argument('--ops', type=str, default='OPS', help='which operations to use, options are OPS and DARTS_OPS') parser.add_argument('--primitives', type=str, default='PRIMITIVES', help='which primitive layers to use inside a cell search space,' ' options are PRIMITIVES and DARTS_PRIMITIVES') parser.add_argument('--flops', action='store_true', default=False, help='count flops and exit, aka floating point operations.') args = parser.parse_args() args.save = 'eval-{}-{}-{}-{}'.format(time.strftime("%Y%m%d-%H%M%S"), args.save, args.dataset, args.arch) utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) log_file_path = os.path.join(args.save, 'log.txt') logger = utils.logging_setup(log_file_path) params_path = os.path.join(args.save, 'commandline_args.json') with open(params_path, 'w') as f: json.dump(vars(args), f) CLASSES = 1000 class CrossEntropyLabelSmooth(nn.Module): def __init__(self, num_classes, epsilon): super(CrossEntropyLabelSmooth, self).__init__() self.num_classes = num_classes self.epsilon = epsilon self.logsoftmax = nn.LogSoftmax(dim=1)
def main(): parser = argparse.ArgumentParser("Common Argument Parser") parser.add_argument('--data', type=str, default='../data', help='location of the data corpus') parser.add_argument('--dataset', type=str, default='cifar10', help='which dataset:\ cifar10, mnist, emnist, fashion, svhn, stl10, devanagari') parser.add_argument('--batch_size', type=int, default=64, help='batch size') parser.add_argument('--learning_rate', type=float, default=0.025, help='init learning rate') parser.add_argument('--learning_rate_min', type=float, default=1e-8, help='min learning rate') parser.add_argument('--lr_power_annealing_exponent_order', type=float, default=2, help='Cosine Power Annealing Schedule Base, larger numbers make ' 'the exponential more dominant, smaller make cosine more dominant, ' '1 returns to standard cosine annealing.') parser.add_argument('--momentum', type=float, default=0.9, help='momentum') parser.add_argument('--weight_decay', '--wd', dest='weight_decay', type=float, default=3e-4, help='weight decay') parser.add_argument('--partial', default=1/8, type=float, help='partially adaptive parameter p in Padam') parser.add_argument('--report_freq', type=float, default=50, help='report frequency') parser.add_argument('--gpu', type=int, default=0, help='gpu device id') parser.add_argument('--epochs', type=int, default=2000, help='num of training epochs') parser.add_argument('--start_epoch', default=1, type=int, metavar='N', help='manual epoch number (useful for restarts)') parser.add_argument('--warmup_epochs', type=int, default=5, help='num of warmup training epochs') parser.add_argument('--warm_restarts', type=int, default=20, help='warm restarts of cosine annealing') parser.add_argument('--init_channels', type=int, default=36, help='num of init channels') parser.add_argument('--mid_channels', type=int, default=32, help='C_mid channels in choke SharpSepConv') parser.add_argument('--layers', type=int, default=20, help='total number of layers') parser.add_argument('--model_path', type=str, default='saved_models', help='path to save the model') parser.add_argument('--auxiliary', action='store_true', default=False, help='use auxiliary tower') parser.add_argument('--mixed_auxiliary', action='store_true', default=False, help='Learn weights for auxiliary networks during training. Overrides auxiliary flag') parser.add_argument('--auxiliary_weight', type=float, default=0.4, help='weight for auxiliary loss') parser.add_argument('--cutout', action='store_true', default=False, help='use cutout') parser.add_argument('--cutout_length', type=int, default=16, help='cutout length') parser.add_argument('--autoaugment', action='store_true', default=False, help='use cifar10 autoaugment https://arxiv.org/abs/1805.09501') parser.add_argument('--random_eraser', action='store_true', default=False, help='use random eraser') parser.add_argument('--drop_path_prob', type=float, default=0.2, help='drop path probability') parser.add_argument('--save', type=str, default='EXP', help='experiment name') parser.add_argument('--seed', type=int, default=0, help='random seed') parser.add_argument('--arch', type=str, default='DARTS', help='which architecture to use') parser.add_argument('--ops', type=str, default='OPS', help='which operations to use, options are OPS and DARTS_OPS') parser.add_argument('--primitives', type=str, default='PRIMITIVES', help='which primitive layers to use inside a cell search space,' ' options are PRIMITIVES, SHARPER_PRIMITIVES, and DARTS_PRIMITIVES') parser.add_argument('--optimizer', type=str, default='sgd', help='which optimizer to use, options are padam and sgd') parser.add_argument('--load', type=str, default='', metavar='PATH', help='load weights at specified location') parser.add_argument('--grad_clip', type=float, default=5, help='gradient clipping') parser.add_argument('--flops', action='store_true', default=False, help='count flops and exit, aka floating point operations.') parser.add_argument('-e', '--evaluate', dest='evaluate', type=str, metavar='PATH', default='', help='evaluate model at specified path on training, test, and validation datasets') parser.add_argument('--multi_channel', action='store_true', default=False, help='perform multi channel search, a completely separate search space') parser.add_argument('--load_args', type=str, default='', metavar='PATH', help='load command line args from a json file, this will override ' 'all currently set args except for --evaluate, and arguments ' 'that did not exist when the json file was originally saved out.') parser.add_argument('--layers_of_cells', type=int, default=8, help='total number of cells in the whole network, default is 8 cells') parser.add_argument('--layers_in_cells', type=int, default=4, help='Total number of nodes in each cell, aka number of steps,' ' default is 4 nodes, which implies 8 ops') parser.add_argument('--weighting_algorithm', type=str, default='scalar', help='which operations to use, options are ' '"max_w" (1. - max_w + w) * op, and scalar (w * op)') # TODO(ahundt) remove final path and switch back to genotype parser.add_argument('--load_genotype', type=str, default=None, help='Name of genotype to be used') parser.add_argument('--simple_path', default=True, action='store_false', help='Final model is a simple path (MultiChannelNetworkModel)') args = parser.parse_args() args = utils.initialize_files_and_args(args) logger = utils.logging_setup(args.log_file_path) if not torch.cuda.is_available(): logger.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logger.info('gpu device = %d' % args.gpu) logger.info("args = %s", args) DATASET_CLASSES = dataset.class_dict[args.dataset] DATASET_CHANNELS = dataset.inp_channel_dict[args.dataset] DATASET_MEAN = dataset.mean_dict[args.dataset] DATASET_STD = dataset.std_dict[args.dataset] logger.info('output channels: ' + str(DATASET_CLASSES)) # # load the correct ops dictionary op_dict_to_load = "operations.%s" % args.ops logger.info('loading op dict: ' + str(op_dict_to_load)) op_dict = eval(op_dict_to_load) # load the correct primitives list primitives_to_load = "genotypes.%s" % args.primitives logger.info('loading primitives:' + primitives_to_load) primitives = eval(primitives_to_load) logger.info('primitives: ' + str(primitives)) genotype = eval("genotypes.%s" % args.arch) # create the neural network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() if args.multi_channel: final_path = None if args.load_genotype is not None: genotype = getattr(genotypes, args.load_genotype) print(genotype) if type(genotype[0]) is str: logger.info('Path :%s', genotype) # TODO(ahundt) remove final path and switch back to genotype cnn_model = MultiChannelNetwork( args.init_channels, DATASET_CLASSES, layers=args.layers_of_cells, criterion=criterion, steps=args.layers_in_cells, weighting_algorithm=args.weighting_algorithm, genotype=genotype) flops_shape = [1, 3, 32, 32] elif args.dataset == 'imagenet': cnn_model = NetworkImageNet(args.init_channels, DATASET_CLASSES, args.layers, args.auxiliary, genotype, op_dict=op_dict, C_mid=args.mid_channels) flops_shape = [1, 3, 224, 224] else: cnn_model = NetworkCIFAR(args.init_channels, DATASET_CLASSES, args.layers, args.auxiliary, genotype, op_dict=op_dict, C_mid=args.mid_channels) flops_shape = [1, 3, 32, 32] cnn_model = cnn_model.cuda() logger.info("param size = %fMB", utils.count_parameters_in_MB(cnn_model)) if args.flops: logger.info('flops_shape = ' + str(flops_shape)) logger.info("flops = " + utils.count_model_flops(cnn_model, data_shape=flops_shape)) return optimizer = torch.optim.SGD( cnn_model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay ) # Get preprocessing functions (i.e. transforms) to apply on data train_transform, valid_transform = utils.get_data_transforms(args) if args.evaluate: # evaluate the train dataset without augmentation train_transform = valid_transform # Get the training queue, use full training and test set train_queue, valid_queue = dataset.get_training_queues( args.dataset, train_transform, valid_transform, args.data, args.batch_size, train_proportion=1.0, search_architecture=False) test_queue = None if args.dataset == 'cifar10': # evaluate best model weights on cifar 10.1 # https://github.com/modestyachts/CIFAR-10.1 test_data = cifar10_1.CIFAR10_1(root=args.data, download=True, transform=valid_transform) test_queue = torch.utils.data.DataLoader( test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=8) if args.evaluate: # evaluate the loaded model, print the result, and return logger.info("Evaluating inference with weights file: " + args.load) eval_stats = evaluate( args, cnn_model, criterion, args.load, train_queue=train_queue, valid_queue=valid_queue, test_queue=test_queue) with open(args.stats_file, 'w') as f: arg_dict = vars(args) arg_dict.update(eval_stats) json.dump(arg_dict, f) logger.info("flops = " + utils.count_model_flops(cnn_model)) logger.info(utils.dict_to_log_string(eval_stats)) logger.info('\nEvaluation of Loaded Model Complete! Save dir: ' + str(args.save)) return lr_schedule = cosine_power_annealing( epochs=args.epochs, max_lr=args.learning_rate, min_lr=args.learning_rate_min, warmup_epochs=args.warmup_epochs, exponent_order=args.lr_power_annealing_exponent_order) epochs = np.arange(args.epochs) + args.start_epoch # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs)) epoch_stats = [] stats_csv = args.epoch_stats_file stats_csv = stats_csv.replace('.json', '.csv') with tqdm(epochs, dynamic_ncols=True) as prog_epoch: best_valid_acc = 0.0 best_epoch = 0 best_stats = {} stats = {} epoch_stats = [] weights_file = os.path.join(args.save, 'weights.pt') for epoch, learning_rate in zip(prog_epoch, lr_schedule): # update the drop_path_prob augmentation cnn_model.drop_path_prob = args.drop_path_prob * epoch / args.epochs # update the learning rate for param_group in optimizer.param_groups: param_group['lr'] = learning_rate # scheduler.get_lr()[0] train_acc, train_obj = train(args, train_queue, cnn_model, criterion, optimizer) val_stats = infer(args, valid_queue, cnn_model, criterion) stats.update(val_stats) stats['train_acc'] = train_acc stats['train_loss'] = train_obj stats['lr'] = learning_rate stats['epoch'] = epoch if stats['valid_acc'] > best_valid_acc: # new best epoch, save weights utils.save(cnn_model, weights_file) best_epoch = epoch best_stats.update(copy.deepcopy(stats)) best_valid_acc = stats['valid_acc'] best_train_loss = train_obj best_train_acc = train_acc # else: # # not best epoch, load best weights # utils.load(cnn_model, weights_file) logger.info('epoch, %d, train_acc, %f, valid_acc, %f, train_loss, %f, valid_loss, %f, lr, %e, best_epoch, %d, best_valid_acc, %f, ' + utils.dict_to_log_string(stats), epoch, train_acc, stats['valid_acc'], train_obj, stats['valid_loss'], learning_rate, best_epoch, best_valid_acc) stats['train_acc'] = train_acc stats['train_loss'] = train_obj epoch_stats += [copy.deepcopy(stats)] with open(args.epoch_stats_file, 'w') as f: json.dump(epoch_stats, f, cls=utils.NumpyEncoder) utils.list_of_dicts_to_csv(stats_csv, epoch_stats) # get stats from best epoch including cifar10.1 eval_stats = evaluate(args, cnn_model, criterion, weights_file, train_queue, valid_queue, test_queue) with open(args.stats_file, 'w') as f: arg_dict = vars(args) arg_dict.update(eval_stats) json.dump(arg_dict, f, cls=utils.NumpyEncoder) with open(args.epoch_stats_file, 'w') as f: json.dump(epoch_stats, f, cls=utils.NumpyEncoder) logger.info(utils.dict_to_log_string(eval_stats)) logger.info('Training of Final Model Complete! Save dir: ' + str(args.save))
def main(): global best_combined_error, args, logger args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 # commented because it is now set as an argparse param. # args.gpu = 0 args.world_size = 1 if args.distributed: args.gpu = args.local_rank % torch.cuda.device_count() torch.cuda.set_device(args.gpu) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() # note the gpu is used for directory creation and log files # which is needed when run as multiple processes args = utils.initialize_files_and_args(args) logger = utils.logging_setup(args.log_file_path) # # load the correct ops dictionary op_dict_to_load = "operations.%s" % args.ops logger.info('loading op dict: ' + str(op_dict_to_load)) op_dict = eval(op_dict_to_load) # load the correct primitives list primitives_to_load = "genotypes.%s" % args.primitives logger.info('loading primitives:' + primitives_to_load) primitives = eval(primitives_to_load) logger.info('primitives: ' + str(primitives)) # get the number of output channels classes = dataset.costar_class_dict[args.feature_mode] if args.arch == 'NetworkResNetCOSTAR': # baseline model for comparison model = NetworkResNetCOSTAR(args.init_channels, classes, args.layers, args.auxiliary, None, vector_size=VECTOR_SIZE, op_dict=op_dict, C_mid=args.mid_channels) else: # create model genotype = eval("genotypes.%s" % args.arch) # create the neural network model = NetworkCOSTAR(args.init_channels, classes, args.layers, args.auxiliary, genotype, vector_size=VECTOR_SIZE, op_dict=op_dict, C_mid=args.mid_channels) model.drop_path_prob = 0.0 # if args.pretrained: # logger.info("=> using pre-trained model '{}'".format(args.arch)) # model = models.__dict__[args.arch](pretrained=True) # else: # logger.info("=> creating model '{}'".format(args.arch)) # model = models.__dict__[args.arch]() if args.sync_bn: import apex logger.info("using apex synced BN") model = apex.parallel.convert_syncbn_model(model) model = model.cuda() if args.distributed: # By default, apex.parallel.DistributedDataParallel overlaps communication with # computation in the backward pass. # model = DDP(model) # delay_allreduce delays all communication to the end of the backward pass. model = DDP(model, delay_allreduce=True) # define loss function (criterion) and optimizer criterion = nn.MSELoss().cuda() # NOTE(rexxarchl): MSLE loss, indicated as better for rotation in costar_hyper/costar_block_stacking_train_regression.py # is not available in PyTorch by default # Scale learning rate based on global batch size args.learning_rate = args.learning_rate * float( args.batch_size * args.world_size) / 256. init_lr = args.learning_rate / args.warmup_lr_divisor optimizer = torch.optim.SGD(model.parameters(), init_lr, momentum=args.momentum, weight_decay=args.weight_decay) # epoch_count = args.epochs - args.start_epoch # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(epoch_count)) # scheduler = warmup_scheduler.GradualWarmupScheduler( # optimizer, args.warmup_lr_divisor, args.warmup_epochs, scheduler) # Optionally resume from a checkpoint if args.resume or args.evaluate: if args.evaluate: args.resume = args.evaluate # Use a local scope to avoid dangling references def resume(): if os.path.isfile(args.resume): logger.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint['epoch'] + 1 if 'best_combined_error' in checkpoint: best_combined_error = checkpoint['best_combined_error'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) # TODO(ahundt) make sure scheduler loading isn't broken if 'lr_scheduler' in checkpoint: scheduler.load_state_dict(checkpoint['lr_scheduler']) elif 'lr_schedule' in checkpoint: lr_schedule = checkpoint['lr_schedule'] logger.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: logger.info("=> no checkpoint found at '{}'".format( args.resume)) resume() # Get preprocessing functions (i.e. transforms) to apply on data # normalize_as_tensor = False because we normalize and convert to a # tensor in our custom prefetching function, rather than as part of # the transform preprocessing list. # train_transform, valid_transform = utils.get_data_transforms(args, normalize_as_tensor=False) train_transform = valid_transform = None # NOTE(rexxarchl): data transforms are not applicable for CoSTAR BSD at the moment evaluate = True if args.evaluate else False # Get the training queue, select training and validation from training set train_loader, val_loader = dataset.get_training_queues( args.dataset, train_transform, valid_transform, args.data, args.batch_size, train_proportion=1.0, collate_fn=fast_collate, distributed=args.distributed, num_workers=args.workers, costar_set_name=args.set_name, costar_subset_name=args.subset_name, costar_feature_mode=args.feature_mode, costar_version=args.version, costar_num_images_per_example=args.num_images_per_example, costar_output_shape=(224, 224, 3), costar_random_augmentation=None, costar_one_hot_encoding=True, evaluate=evaluate) if args.evaluate: # Load the test set test_loader = dataset.get_costar_test_queue( args.data, costar_set_name=args.set_name, costar_subset_name=args.subset_name, collate_fn=fast_collate, costar_feature_mode=args.feature_mode, costar_version=args.version, costar_num_images_per_example=args.num_images_per_example, costar_output_shape=(224, 224, 3), costar_random_augmentation=None, costar_one_hot_encoding=True) # Evaluate on all splits, without any augmentation validate(train_loader, model, criterion, args, prefix='evaluate_train_') validate(val_loader, model, criterion, args, prefix='evaluate_val_') validate(test_loader, model, criterion, args, prefix='evaluate_test_') return lr_schedule = cosine_power_annealing( epochs=args.epochs, max_lr=args.learning_rate, min_lr=args.learning_rate_min, warmup_epochs=args.warmup_epochs, exponent_order=args.lr_power_annealing_exponent_order, restart_lr=args.restart_lr) epochs = np.arange(args.epochs) + args.start_epoch stats_csv = args.epoch_stats_file stats_csv = stats_csv.replace('.json', '.csv') with tqdm(epochs, dynamic_ncols=True, disable=args.local_rank != 0, leave=False, initial=args.start_epoch) as prog_epoch: best_stats = {} stats = {} epoch_stats = [] best_epoch = 0 logger.info('Initial Learning Rate: ' + str(lr_schedule[0])) for epoch, learning_rate in zip(prog_epoch, lr_schedule): if args.distributed and train_loader.sampler is not None: train_loader.sampler.set_epoch(int(epoch)) # if args.distributed: # train_sampler.set_epoch(epoch) # update the learning rate for param_group in optimizer.param_groups: param_group['lr'] = learning_rate # scheduler.step() model.drop_path_prob = args.drop_path_prob * float(epoch) / float( args.epochs) # train for one epoch train_stats = train(train_loader, model, criterion, optimizer, int(epoch), args) if args.prof: break # evaluate on validation set combined_error, val_stats = validate(val_loader, model, criterion, args) stats.update(train_stats) stats.update(val_stats) # stats['lr'] = '{0:.5f}'.format(scheduler.get_lr()[0]) stats['lr'] = '{0:.5f}'.format(learning_rate) stats['epoch'] = epoch # remember best combined_error and save checkpoint if args.local_rank == 0: is_best = combined_error < best_combined_error best_combined_error = min(combined_error, best_combined_error) stats['best_combined_error'] = '{0:.3f}'.format( best_combined_error) if is_best: best_epoch = epoch best_stats = copy.deepcopy(stats) stats['best_epoch'] = best_epoch stats_str = utils.dict_to_log_string(stats) logger.info(stats_str) save_checkpoint( { 'epoch': epoch, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_combined_error': best_combined_error, 'optimizer': optimizer.state_dict(), # 'lr_scheduler': scheduler.state_dict() 'lr_schedule': lr_schedule, 'stats': best_stats }, is_best, path=args.save) prog_epoch.set_description( 'Overview ***** best_epoch: {0} best_valid_combined_error: {1:.2f} ***** Progress' .format(best_epoch, best_combined_error)) epoch_stats += [copy.deepcopy(stats)] with open(args.epoch_stats_file, 'w') as f: json.dump(epoch_stats, f, cls=utils.NumpyEncoder) utils.list_of_dicts_to_csv(stats_csv, epoch_stats) stats_str = utils.dict_to_log_string(best_stats, key_prepend='best_') logger.info(stats_str) with open(args.stats_file, 'w') as f: arg_dict = vars(args) arg_dict.update(best_stats) json.dump(arg_dict, f, cls=utils.NumpyEncoder) with open(args.epoch_stats_file, 'w') as f: json.dump(epoch_stats, f, cls=utils.NumpyEncoder) utils.list_of_dicts_to_csv(stats_csv, epoch_stats) logger.info('Training of Final Model Complete!') # Do a final evaluation logger.info('Final evaluation') # Load the best model best_model_path = os.path.join(args.save, 'model_best.pth.tar') best_model = torch.load( best_model_path, map_location=lambda storage, loc: storage.cuda(args.gpu)) model.load_state_dict(best_model['state_dict']) # optimizer.load_state_dict(best_model['optimizer']) logger.info("=> loaded best_model '{}' from epoch {}".format( best_model_path, best_model['epoch'])) # Get the train and validation set in evaluate mode train_loader, val_loader = dataset.get_training_queues( args.dataset, train_transform, valid_transform, args.data, args.batch_size, train_proportion=1.0, collate_fn=fast_collate, distributed=args.distributed, num_workers=args.workers, costar_set_name=args.set_name, costar_subset_name=args.subset_name, costar_feature_mode=args.feature_mode, costar_version=args.version, costar_num_images_per_example=args.num_images_per_example, costar_output_shape=(224, 224, 3), costar_random_augmentation=None, costar_one_hot_encoding=True, evaluate=evaluate) # Get the test set test_loader = dataset.get_costar_test_queue( args.data, costar_set_name=args.set_name, costar_subset_name=args.subset_name, collate_fn=fast_collate, costar_feature_mode=args.feature_mode, costar_version=args.version, costar_num_images_per_example=args.num_images_per_example, costar_output_shape=(224, 224, 3), costar_random_augmentation=None, costar_one_hot_encoding=True) # Evaluate on all splits, without any augmentation validate(train_loader, model, criterion, args, prefix='best_final_train_') validate(val_loader, model, criterion, args, prefix='best_final_val_') validate(test_loader, model, criterion, args, prefix='best_final_test_') logger.info("Final evaluation complete! Save dir: ' + str(args.save)")
NUM_CHANNELS = config.NUM_CHANNELS MODEL_NAME = config.MODEL_NAME tfrecord_filename = config.tfrecord_filename TESTING_NUM = config.TESTING_NUM Test_Batch_size = config.Test_Batch_size ### filename tfrecord_filename = config.tfrecord_filename save_model_filename = config.save_model_filename save_model_filename_best = config.save_model_filename_best restore_model_filename = config.restore_model_filename tl.files.exists_or_mkdir(save_model_filename) tl.files.exists_or_mkdir(save_model_filename_best) # log log_dir = "log_{}".format(MODEL_NAME) tl.files.exists_or_mkdir(log_dir) log_all, log_eval, log_all_filename, log_eval_filename = utils.logging_setup( log_dir) log_config(log_all_filename, config) log_config(log_eval_filename, config) # Create Input and Output with tf.name_scope('input'): low_res_holder = tf.placeholder( tf.float32, shape=[BATCH_SIZE, crop_size_FE, crop_size_PE, NUM_CHANNELS], name='low') high_res_holder = tf.placeholder( tf.float32, shape=[BATCH_SIZE, crop_size_FE, crop_size_PE, NUM_CHANNELS]) low_res_holder_validation = tf.placeholder( tf.float32, shape=[Test_Batch_size, validation_FE, validation_PE, NUM_CHANNELS])
def main(): global best_top1, args, logger args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 # commented because it is now set as an argparse param. # args.gpu = 0 args.world_size = 1 if args.distributed: args.gpu = args.local_rank % torch.cuda.device_count() torch.cuda.set_device(args.gpu) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() # note the gpu is used for directory creation and log files # which is needed when run as multiple processes args = utils.initialize_files_and_args(args) logger = utils.logging_setup(args.log_file_path) if args.fp16: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." if args.static_loss_scale != 1.0: if not args.fp16: logger.info( "Warning: if --fp16 is not used, static_loss_scale will be ignored." ) # # load the correct ops dictionary op_dict_to_load = "operations.%s" % args.ops logger.info('loading op dict: ' + str(op_dict_to_load)) op_dict = eval(op_dict_to_load) # load the correct primitives list primitives_to_load = "genotypes.%s" % args.primitives logger.info('loading primitives:' + primitives_to_load) primitives = eval(primitives_to_load) logger.info('primitives: ' + str(primitives)) # create model genotype = eval("genotypes.%s" % args.arch) # get the number of output channels classes = dataset.class_dict[args.dataset] # create the neural network if args.dataset == 'imagenet': model = NetworkImageNet(args.init_channels, classes, args.layers, args.auxiliary, genotype, op_dict=op_dict, C_mid=args.mid_channels) flops_shape = [1, 3, 224, 224] else: model = NetworkCIFAR(args.init_channels, classes, args.layers, args.auxiliary, genotype, op_dict=op_dict, C_mid=args.mid_channels) flops_shape = [1, 3, 32, 32] model.drop_path_prob = 0.0 # if args.pretrained: # logger.info("=> using pre-trained model '{}'".format(args.arch)) # model = models.__dict__[args.arch](pretrained=True) # else: # logger.info("=> creating model '{}'".format(args.arch)) # model = models.__dict__[args.arch]() if args.flops: model = model.cuda() logger.info("param size = %fMB", utils.count_parameters_in_MB(model)) logger.info("flops_shape = " + str(flops_shape)) logger.info("flops = " + utils.count_model_flops(model, data_shape=flops_shape)) return if args.sync_bn: import apex logger.info("using apex synced BN") model = apex.parallel.convert_syncbn_model(model) model = model.cuda() if args.fp16: model = network_to_half(model) if args.distributed: # By default, apex.parallel.DistributedDataParallel overlaps communication with # computation in the backward pass. # model = DDP(model) # delay_allreduce delays all communication to the end of the backward pass. model = DDP(model, delay_allreduce=True) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() # Scale learning rate based on global batch size args.learning_rate = args.learning_rate * float( args.batch_size * args.world_size) / 256. init_lr = args.learning_rate / args.warmup_lr_divisor optimizer = torch.optim.SGD(model.parameters(), init_lr, momentum=args.momentum, weight_decay=args.weight_decay) # epoch_count = args.epochs - args.start_epoch # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(epoch_count)) # scheduler = warmup_scheduler.GradualWarmupScheduler( # optimizer, args.warmup_lr_divisor, args.warmup_epochs, scheduler) if args.fp16: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.static_loss_scale, dynamic_loss_scale=args.dynamic_loss_scale) # Optionally resume from a checkpoint if args.resume or args.evaluate: if args.evaluate: args.resume = args.evaluate # Use a local scope to avoid dangling references def resume(): if os.path.isfile(args.resume): logger.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint['epoch'] if 'best_top1' in checkpoint: best_top1 = checkpoint['best_top1'] model.load_state_dict(checkpoint['state_dict']) # An FP16_Optimizer instance's state dict internally stashes the master params. optimizer.load_state_dict(checkpoint['optimizer']) # TODO(ahundt) make sure scheduler loading isn't broken if 'lr_scheduler' in checkpoint: scheduler.load_state_dict(checkpoint['lr_scheduler']) elif 'lr_schedule' in checkpoint: lr_schedule = checkpoint['lr_schedule'] logger.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: logger.info("=> no checkpoint found at '{}'".format( args.resume)) resume() # # Data loading code # traindir = os.path.join(args.data, 'train') # valdir = os.path.join(args.data, 'val') # if(args.arch == "inception_v3"): # crop_size = 299 # val_size = 320 # I chose this value arbitrarily, we can adjust. # else: # crop_size = 224 # val_size = 256 # train_dataset = datasets.ImageFolder( # traindir, # transforms.Compose([ # transforms.RandomResizedCrop(crop_size), # transforms.RandomHorizontalFlip(), # autoaugment.ImageNetPolicy(), # # transforms.ToTensor(), # Too slow, moved to data_prefetcher() # # normalize, # ])) # val_dataset = datasets.ImageFolder(valdir, transforms.Compose([ # transforms.Resize(val_size), # transforms.CenterCrop(crop_size) # ])) # train_sampler = None # val_sampler = None # if args.distributed: # train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) # val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) # train_loader = torch.utils.data.DataLoader( # train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), # num_workers=args.workers, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate) # val_loader = torch.utils.data.DataLoader( # val_dataset, # batch_size=args.batch_size, shuffle=False, # num_workers=args.workers, pin_memory=True, # sampler=val_sampler, # collate_fn=fast_collate) # Get preprocessing functions (i.e. transforms) to apply on data # normalize_as_tensor = False because we normalize and convert to a # tensor in our custom prefetching function, rather than as part of # the transform preprocessing list. train_transform, valid_transform = utils.get_data_transforms( args, normalize_as_tensor=False) # Get the training queue, select training and validation from training set train_loader, val_loader = dataset.get_training_queues( args.dataset, train_transform, valid_transform, args.data, args.batch_size, train_proportion=1.0, collate_fn=fast_collate, distributed=args.distributed, num_workers=args.workers) if args.evaluate: if args.dataset == 'cifar10': # evaluate best model weights on cifar 10.1 # https://github.com/modestyachts/CIFAR-10.1 train_transform, valid_transform = utils.get_data_transforms(args) # Get the training queue, select training and validation from training set # Get the training queue, use full training and test set train_queue, valid_queue = dataset.get_training_queues( args.dataset, train_transform, valid_transform, args.data, args.batch_size, train_proportion=1.0, search_architecture=False) test_data = cifar10_1.CIFAR10_1(root=args.data, download=True, transform=valid_transform) test_queue = torch.utils.data.DataLoader( test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) eval_stats = evaluate(args, model, criterion, train_queue=train_queue, valid_queue=valid_queue, test_queue=test_queue) with open(args.stats_file, 'w') as f: # TODO(ahundt) fix "TypeError: 1869 is not JSON serializable" to include arg info, see train.py # arg_dict = vars(args) # arg_dict.update(eval_stats) # json.dump(arg_dict, f) json.dump(eval_stats, f) logger.info("flops = " + utils.count_model_flops(model)) logger.info(utils.dict_to_log_string(eval_stats)) logger.info('\nEvaluation of Loaded Model Complete! Save dir: ' + str(args.save)) else: validate(val_loader, model, criterion, args) return lr_schedule = cosine_power_annealing( epochs=args.epochs, max_lr=args.learning_rate, min_lr=args.learning_rate_min, warmup_epochs=args.warmup_epochs, exponent_order=args.lr_power_annealing_exponent_order, restart_lr=args.restart_lr) epochs = np.arange(args.epochs) + args.start_epoch stats_csv = args.epoch_stats_file stats_csv = stats_csv.replace('.json', '.csv') with tqdm(epochs, dynamic_ncols=True, disable=args.local_rank != 0, leave=False) as prog_epoch: best_stats = {} stats = {} epoch_stats = [] best_epoch = 0 for epoch, learning_rate in zip(prog_epoch, lr_schedule): if args.distributed and train_loader.sampler is not None: train_loader.sampler.set_epoch(int(epoch)) # if args.distributed: # train_sampler.set_epoch(epoch) # update the learning rate for param_group in optimizer.param_groups: param_group['lr'] = learning_rate # scheduler.step() model.drop_path_prob = args.drop_path_prob * float(epoch) / float( args.epochs) # train for one epoch train_stats = train(train_loader, model, criterion, optimizer, int(epoch), args) if args.prof: break # evaluate on validation set top1, val_stats = validate(val_loader, model, criterion, args) stats.update(train_stats) stats.update(val_stats) # stats['lr'] = '{0:.5f}'.format(scheduler.get_lr()[0]) stats['lr'] = '{0:.5f}'.format(learning_rate) stats['epoch'] = epoch # remember best top1 and save checkpoint if args.local_rank == 0: is_best = top1 > best_top1 best_top1 = max(top1, best_top1) stats['best_top1'] = '{0:.3f}'.format(best_top1) if is_best: best_epoch = epoch best_stats = copy.deepcopy(stats) stats['best_epoch'] = best_epoch stats_str = utils.dict_to_log_string(stats) logger.info(stats_str) save_checkpoint( { 'epoch': epoch, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_top1': best_top1, 'optimizer': optimizer.state_dict(), # 'lr_scheduler': scheduler.state_dict() 'lr_schedule': lr_schedule, 'stats': best_stats }, is_best, path=args.save) prog_epoch.set_description( 'Overview ***** best_epoch: {0} best_valid_top1: {1:.2f} ***** Progress' .format(best_epoch, best_top1)) epoch_stats += [copy.deepcopy(stats)] with open(args.epoch_stats_file, 'w') as f: json.dump(epoch_stats, f, cls=utils.NumpyEncoder) utils.list_of_dicts_to_csv(stats_csv, epoch_stats) stats_str = utils.dict_to_log_string(best_stats, key_prepend='best_') logger.info(stats_str) with open(args.stats_file, 'w') as f: arg_dict = vars(args) arg_dict.update(best_stats) json.dump(arg_dict, f, cls=utils.NumpyEncoder) with open(args.epoch_stats_file, 'w') as f: json.dump(epoch_stats, f, cls=utils.NumpyEncoder) utils.list_of_dicts_to_csv(stats_csv, epoch_stats) logger.info('Training of Final Model Complete! Save dir: ' + str(args.save))
data_json['annotations'] = annotations data_json['categories'] = categories return data_json def save_json(json_data, output_path): dir_path = os.path.dirname(output_path) if dir_path and not os.path.exists(dir_path): os.makedirs(dir_path) with open(output_path, 'w') as fp: json.dump(json_data, fp, indent=4) logging.info(f'Json was saved. File path: {output_path}') logger = utils.logging_setup(__name__) # create logger def main(): args = parser.parse_args() categories = get_categories(args) maper_label_to_categoty_id = get_mapper_from_label_to_category_id( categories) if not args.split: images, annotations = process_data(args, maper_label_to_categoty_id) coco_json_data = create_coco_json(images, annotations, categories) save_json(coco_json_data, args.output_path) else: images_train, annotations_train, images_test, annotations_test = process_data_split_test_train( args, maper_label_to_categoty_id)
def main(): args = docopt.docopt(__doc__, version=__version__) log_level = 'DEBUG' if args['--debug'] else 'INFO' if sys.platform == 'win32': log_fn = 'c:/atxpkg/atxpkg.log' else: log_fn = '/tmp/atxpkg/atxpkg.log' utils.logging_setup(log_level, log_fn, print_=True) logging.info('*' * 40) logging.info('starting atxpkg v%s' % __version__) logging.debug('args: %s' % dict(args)) if sys.platform == 'win32': logging.debug('detected win32') db_fn = 'c:/atxpkg/installed.json' repos_fn = 'c:/atxpkg/repos.txt' prefix = 'c:' cache_dir = 'c:/atxpkg/cache' else: logging.debug('detected non-win32') db_fn = '/tmp/atxpkg/installed.json' repos_fn = '/tmp/atxpkg/repos.txt' prefix = '' cache_dir = '/tmp/atxpkg/cache' repos = utils.get_repos(repos_fn) repos.append(cache_dir) #logging.debug(str(args)) prefix = args['--prefix'] if args['--prefix'] else '' if not os.path.isfile(db_fn): logging.info('%s not found, creating empty one' % db_fn) with open(db_fn, 'w') as f: f.write('{}') if not os.path.isdir(cache_dir): logging.info('%s not found, creating empty one' % cache_dir) os.makedirs(cache_dir) installed_packages = utils.get_installed_packages(db_fn) force = args['--force'] yes, no = args['--yes'], args['--no'] if args['install']: available_packages = utils.get_available_packages(repos) for package in args['<package>']: package_name = utils.get_package_name(package) if package_name not in available_packages: raise Exception('unable to find package %s' % package_name) if package_name in installed_packages and not force: raise Exception('package %s already installed' % package_name) for package in args['<package>']: package_name = utils.get_package_name(package) package_version = utils.get_package_version(package) if package_version: url = utils.get_specific_version_url(available_packages[package_name], package_version) else: url = utils.get_max_version_url(available_packages[package_name]) ver = utils.get_package_version(utils.get_package_fn(url)) print('install %s-%s' % (package_name, ver)) if no or not (yes or utils.yes_no('continue?', default='y')): return for package in args['<package>']: package_name = utils.get_package_name(package) package_version = utils.get_package_version(package) if package_version: url = utils.get_specific_version_url(available_packages[package_name], package_version) else: url = utils.get_max_version_url(available_packages[package_name]) local_fn = utils.download_package(url, cache_dir) if not args['--downloadonly']: package_info = utils.install_package(local_fn, prefix, force) installed_packages[package_name] = package_info utils.save_installed_packages(installed_packages, db_fn) elif args['update']: available_packages = utils.get_available_packages(repos) if args['<package>']: packages = args['<package>'] for package in packages: if '..' in package: package_old, package_new = package.split('..') package_name_old = utils.get_package_name(package_old) package_name_new = utils.get_package_name(package_new) else: package_name_old = package_name_new = utils.get_package_name(package) if package_name_old not in installed_packages: raise Exception('package %s not installed' % package_name_old) else: packages = installed_packages.keys() packages_to_update = set() for package in packages: if '..' in package: package_old, package_new = package.split('..') package_name_old = utils.get_package_name(package_old) package_name_new = utils.get_package_name(package_new) package_version = utils.get_package_version(package_new) else: package_name_old = package_name_new = utils.get_package_name(package) package_version = utils.get_package_version(package) if package_name_new not in available_packages: logging.warning('%s not available in any repository' % package_name_new) continue if package_version: url = utils.get_specific_version_url(available_packages[package_name_new], package_version) else: url = utils.get_max_version_url(available_packages[package_name_new]) ver_cur = installed_packages[package_name_old]['version'] ver_avail = utils.get_package_version(utils.get_package_fn(url)) if package_name_old != package_name_new or ver_avail != ver_cur or force: print('update %s-%s -> %s-%s' % (package_name_old, ver_cur, package_name_new, ver_avail)) packages_to_update.add(package) if not packages_to_update: print('nothing to update') return if no or not (yes or utils.yes_no('continue?', default='y')): return for package in packages_to_update: if '..' in package: package_old, package_new = package.split('..') package_name_old = utils.get_package_name(package_old) package_name_new = utils.get_package_name(package_new) package_version = utils.get_package_version(package_new) else: package_name_old = package_name_new = utils.get_package_name(package) package_version = utils.get_package_version(package) if package_version: url = utils.get_specific_version_url(available_packages[package_name_new], package_version) else: url = utils.get_max_version_url(available_packages[package_name_new]) ver_cur = installed_packages[package_name_old]['version'] ver_avail = utils.get_package_version(utils.get_package_fn(url)) if package_name_old != package_name_new or ver_avail != ver_cur or force: local_fn = utils.download_package(url, cache_dir) if not args['--downloadonly']: package_info = utils.update_package(local_fn, package_name_old, installed_packages[package_name_old], prefix, force) del installed_packages[package_name_old] installed_packages[package_name_new] = package_info utils.save_installed_packages(installed_packages, db_fn) elif args['merge_config']: if args['<package>']: packages = args['<package>'] for package in packages: package_name = utils.get_package_name(package) if package_name not in installed_packages: raise Exception('package %s not installed' % package_name) else: packages = installed_packages.keys() for package in packages: package_name = utils.get_package_name(package) if package_name not in installed_packages: raise Exception('package %s not installed' % package_name) for package in packages: utils.mergeconfig_package(package, installed_packages, prefix) elif args['remove']: for package_name in args['<package>']: if package_name not in installed_packages: raise Exception('package %s not installed' % package_name) for package_name in args['<package>']: package_version = installed_packages[package_name]['version'] print('remove %s-%s' % (package_name, package_version)) if no or not (yes or utils.yes_no('continue?', default='n')): return for package_name in args['<package>']: utils.remove_package(package_name, installed_packages, prefix) del installed_packages[package_name] utils.save_installed_packages(installed_packages, db_fn) elif args['list_available']: available_packages = utils.get_available_packages(repos) for package_name in sorted(available_packages.keys()): print(package_name) elif args['list_installed']: for package_name, package_info in installed_packages.items(): package_version = package_info['version'] print('%s-%s' % (package_name, package_version)) elif args['show_untracked']: recursive = args['--recursive'] fn_to_package_name = utils.gen_fn_to_package_name_mapping(installed_packages, prefix) if args['<path>']: paths = set([args['<path>'], ]) else: paths = set() for fn in fn_to_package_name.keys(): paths.add(os.path.dirname(fn)) while paths: for path in paths.copy(): for fn in os.listdir(path): if os.path.isdir('%s/%s' % (path, fn)) and not os.path.islink('%s/%s' % (path, fn)): if recursive: paths.add('%s/%s' % (path, fn)) else: continue if '%s/%s' % (path, fn) in fn_to_package_name: continue print('%s/%s' % (path, fn)) paths.remove(path) elif args['clean_cache']: utils.clean_cache(cache_dir) logging.debug('exit') return 0
def main(): args = docopt.docopt(__doc__, version=__version__) log_level = 'DEBUG' if args['--debug'] else 'INFO' if sys.platform == 'win32': log_fn = 'c:/atxpkg/atxpkg.log' else: log_fn = '/tmp/atxpkg/atxpkg.log' utils.logging_setup(log_level, log_fn, print_=True) logging.info('*' * 40) logging.info('starting atxpkg v%s' % __version__) logging.debug('args: %s' % dict(args)) if sys.platform == 'win32': logging.debug('detected win32') db_fn = 'c:/atxpkg/installed.json' repos_fn = 'c:/atxpkg/repos.txt' prefix = 'c:' cache_dir = 'c:/atxpkg/cache' else: logging.debug('detected non-win32') db_fn = '/tmp/atxpkg/installed.json' repos_fn = '/tmp/atxpkg/repos.txt' prefix = '' cache_dir = '/tmp/atxpkg/cache' repos = utils.get_repos(repos_fn) repos.append(cache_dir) #logging.debug(str(args)) prefix = args['--prefix'] if args['--prefix'] else '' if not os.path.isfile(db_fn): logging.info('%s not found, creating empty one' % db_fn) with open(db_fn, 'w') as f: f.write('{}') if not os.path.isdir(cache_dir): logging.info('%s not found, creating empty one' % cache_dir) os.makedirs(cache_dir) installed_packages = utils.get_installed_packages(db_fn) force = args['--force'] yes, no = args['--yes'], args['--no'] if args['install']: available_packages = utils.get_available_packages(repos) for package in args['<package>']: package_name = utils.get_package_name(package) if package_name not in available_packages: raise Exception('unable to find package %s' % package_name) if package_name in installed_packages and not force: raise Exception('package %s already installed' % package_name) for package in args['<package>']: package_name = utils.get_package_name(package) package_version = utils.get_package_version(package) if package_version: url = utils.get_specific_version_url( available_packages[package_name], package_version) else: url = utils.get_max_version_url( available_packages[package_name]) ver = utils.get_package_version(utils.get_package_fn(url)) print('install %s-%s' % (package_name, ver)) if no or not (yes or utils.yes_no('continue?', default='y')): return for package in args['<package>']: package_name = utils.get_package_name(package) package_version = utils.get_package_version(package) if package_version: url = utils.get_specific_version_url( available_packages[package_name], package_version) else: url = utils.get_max_version_url( available_packages[package_name]) local_fn = utils.download_package(url, cache_dir) if not args['--downloadonly']: package_info = utils.install_package(local_fn, prefix, force) installed_packages[package_name] = package_info utils.save_installed_packages(installed_packages, db_fn) logging.info('%s-%s is now installed' % (package_name, package_version)) elif args['update']: available_packages = utils.get_available_packages(repos) if args['<package>']: packages = args['<package>'] for package in packages: if '..' in package: package_old, package_new = package.split('..') package_name_old = utils.get_package_name(package_old) package_name_new = utils.get_package_name(package_new) else: package_name_old = package_name_new = utils.get_package_name( package) if package_name_old not in installed_packages: raise Exception('package %s not installed' % package_name_old) else: packages = installed_packages.keys() packages_to_update = set() for package in packages: if '..' in package: package_old, package_new = package.split('..') package_name_old = utils.get_package_name(package_old) package_name_new = utils.get_package_name(package_new) package_version = utils.get_package_version(package_new) else: package_name_old = package_name_new = utils.get_package_name( package) package_version = utils.get_package_version(package) if package_name_new not in available_packages: logging.warning('%s not available in any repository' % package_name_new) continue if package_version: url = utils.get_specific_version_url( available_packages[package_name_new], package_version) else: url = utils.get_max_version_url( available_packages[package_name_new]) ver_cur = installed_packages[package_name_old]['version'] ver_avail = utils.get_package_version(utils.get_package_fn(url)) if package_name_old != package_name_new or ver_avail != ver_cur or force: print('update %s-%s -> %s-%s' % (package_name_old, ver_cur, package_name_new, ver_avail)) packages_to_update.add(package) if not packages_to_update: print('nothing to update') return if no or not (yes or utils.yes_no('continue?', default='y')): return for package in packages_to_update: if '..' in package: package_old, package_new = package.split('..') package_name_old = utils.get_package_name(package_old) package_name_new = utils.get_package_name(package_new) package_version = utils.get_package_version(package_new) else: package_name_old = package_name_new = utils.get_package_name( package) package_version = utils.get_package_version(package) if package_version: url = utils.get_specific_version_url( available_packages[package_name_new], package_version) else: url = utils.get_max_version_url( available_packages[package_name_new]) ver_cur = installed_packages[package_name_old]['version'] ver_avail = utils.get_package_version(utils.get_package_fn(url)) if package_name_old != package_name_new or ver_avail != ver_cur or force: local_fn = utils.download_package(url, cache_dir) if not args['--downloadonly']: package_info = utils.update_package( local_fn, package_name_old, installed_packages[package_name_old], prefix, force) del installed_packages[package_name_old] installed_packages[package_name_new] = package_info utils.save_installed_packages(installed_packages, db_fn) logging.info('%s-%s updated to %s-%s' % (package_name_old, ver_cur, package_name_new, ver_avail)) elif args['merge_config']: if args['<package>']: packages = args['<package>'] for package in packages: package_name = utils.get_package_name(package) if package_name not in installed_packages: raise Exception('package %s not installed' % package_name) else: packages = installed_packages.keys() for package in packages: package_name = utils.get_package_name(package) if package_name not in installed_packages: raise Exception('package %s not installed' % package_name) for package in packages: utils.mergeconfig_package(package, installed_packages, prefix) elif args['remove']: for package_name in args['<package>']: if package_name not in installed_packages: raise Exception('package %s not installed' % package_name) for package_name in args['<package>']: package_version = installed_packages[package_name]['version'] print('remove %s-%s' % (package_name, package_version)) if no or not (yes or utils.yes_no('continue?', default='n')): return for package_name in args['<package>']: utils.remove_package(package_name, installed_packages, prefix) del installed_packages[package_name] utils.save_installed_packages(installed_packages, db_fn) elif args['list_available']: available_packages = utils.get_available_packages(repos) for package_name in sorted(available_packages.keys()): print(package_name) elif args['list_installed']: for package_name, package_info in installed_packages.items(): package_version = package_info['version'] print('%s-%s' % (package_name, package_version)) elif args['show_untracked']: recursive = args['--recursive'] fn_to_package_name = utils.gen_fn_to_package_name_mapping( installed_packages, prefix) if args['<path>']: paths = set([ args['<path>'], ]) else: paths = set() for fn in fn_to_package_name.keys(): paths.add(os.path.dirname(fn)) while paths: for path in paths.copy(): for fn in os.listdir(path): if os.path.isdir( '%s/%s' % (path, fn)) and not os.path.islink('%s/%s' % (path, fn)): if recursive: paths.add('%s/%s' % (path, fn)) else: continue if '%s/%s' % (path, fn) in fn_to_package_name: continue print('%s/%s' % (path, fn)) paths.remove(path) elif args['clean_cache']: utils.clean_cache(cache_dir) elif args['check']: if args['<package>']: packages = args['<package>'] for package in packages: if not package in installed_packages.keys(): packages = [] print('%s not installed' % package) else: packages = installed_packages.keys() if packages: for package in packages: for fn in installed_packages[package]['md5sums']: if not os.path.isfile('%s/%s' % (prefix, fn)): logging.info('%s/%s does not exist' % (prefix, fn)) else: if utils.get_md5sum( '%s/%s' % (prefix, fn) ) != installed_packages[package]['md5sums'][fn]: logging.info('sum of %s/%s differs' % (prefix, fn)) print('check of %s complete' % package) logging.info('check of %s complete' % package) logging.debug('exit') return 0
def main(args, parallel_func): if not os.path.exists(args.output_path): logger.info(f'Create folder to save images: {args.output_path}') os.makedirs(args.output_path) tfrecord_paths = get_all_tfrecords_path_files(args) parallel_func = partial(parallel_func, args) outputs_codes = [] tasks = [] with Pool(processes=args.num_workers) as pool: for path_to_tfrecord in tqdm(tfrecord_paths): tasks.append( pool.apply_async(parallel_func, args=(path_to_tfrecord, ), error_callback=lambda e: logger.info(e))) for task in tasks: task.wait() outputs_codes.append(task.get()) pool.close() pool.join() logger.info('FINISH') logger = logging_setup(__name__) if __name__ == '__main__': args = build_argparser().parse_args() main(args, process_data)