def __init__(self, zone, config): # Initialises the source/destination of the log messages self.data_source_path = config['rabbitmq_input']['dir'] self.encapsulate_dest_path = zone + config['encalupated_out'][ 'dir'] + '/' self.data_source = config['rabbitmq_input']['type'] mkdir_p(self.data_source) self.n_mprocessing = config['processing']['ncores_per_zone'] self.part = 0 #count number of the file rep_tokens = { '\\': '', '"{': '{', '}"': '}', '\'{': '{', '}\'': '}', '{}': '"NA"' } self.rep_tokens = dict( (re.escape(k), v) for k, v in rep_tokens.iteritems()) self.pattern_tokens = re.compile("|".join(self.rep_tokens.keys())) rep_for_exchange = r'"oslo.message"' self.pattern_for_exchange = re.compile(rep_for_exchange) if self.data_source != 'File': raise ValueError("Data source other than File is not implemented") if self.data_source == 'File': self.files_to_load = None # NOTE(ab981s) change number of processors to modify the processing speed self.pool = Pool(self.n_mprocessing) LOG.debug("Completed initialization ")
default=0.1, type=float, help='learning rate Decay factor') # works for MNIST parser.add_argument('--stage2_lr_step', default=6, type=float, help='learning rate Decay step') # works for MNIST parser.add_argument('--stage2_bs', default=128, type=int, help='batch size') args = parser.parse_args() device = 'cuda' if torch.cuda.is_available() else 'cpu' args.checkpoint = './checkpoints/mnist/%s-%s-%s-dim%s-T%s-alpha%s' % ( args.train_class_num, args.test_class_num, args.arch, args.embed_dim, args.temperature, args.alpha) if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # folder to save figures args.plotfolder = os.path.join(args.checkpoint, "plotter") if not os.path.isdir(args.plotfolder): mkdir_p(args.plotfolder) # folder to save histogram args.histfolder = os.path.join(args.checkpoint, "histogram") if not os.path.isdir(args.histfolder): mkdir_p(args.histfolder) print('==> Preparing data..') transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) trainset = MNIST(root='../../data',
parser.add_argument('--hist_bins', default=100, type=int, help='divided into n bins') parser.add_argument('--hist_norm', default=True, action='store_true', help='if norm the frequency to [0,1]') args = parser.parse_args() device = 'cuda' if torch.cuda.is_available() else 'cpu' args.checkpoint = './checkpoints/mnist/%s_%s_%s_dim%s_gamma%s' % ( args.train_class_num, args.test_class_num, args.arch, args.embed_dim, args.gamma) if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) print('==> Preparing data..') transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) trainset = MNIST(root='../../data', train=True, download=True, transform=transform, train_class_num=args.train_class_num, test_class_num=args.test_class_num, includes_all_train_class=args.includes_all_train_class) testset = MNIST(root='../../data', train=False, download=True,
parser.add_argument('--plot_quality', default=200, type=int, help='DPI of plot figure') parser.add_argument('--bins', default=50, type=int, help='divided into n bins') parser.add_argument('--tail_number', default=50, type=int, help='number of maximum distance we do not take into account, ' 'which may be anomaly or wrong labeled.') args = parser.parse_args() device = 'cuda' if torch.cuda.is_available() else 'cpu' args.checkpoint = './checkpoints/mnist/' + args.arch +\ '/A%s_B%s_embed%s' % (args.alpha, args.beta,args.embed_dim) if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # folder to save figures args.plotfolder1 = os.path.join(args.checkpoint,"plotter_Stage1") if not os.path.isdir(args.plotfolder1): mkdir_p(args.plotfolder1) # folder to save figures args.plotfolder2 = os.path.join(args.checkpoint,"plotter_Stage2") if not os.path.isdir(args.plotfolder2): mkdir_p(args.plotfolder2) print('==> Preparing data..') transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])
parser.add_argument('--distance', default='l2', choices=['l2', 'l1', 'dotproduct'], type=str, help='choosing distance metric') parser.add_argument('--scaled', default=True, action='store_true', help='If scale distance by sqrt(embed_dim)') # Parameters for stage 1 parser.add_argument('--stage1_resume', default='', type=str, metavar='PATH', help='path to latest checkpoint') parser.add_argument('--bins', default=20, type=int, help='divided into n bins') # Parameters for plotting args = parser.parse_args() device = 'cuda' if torch.cuda.is_available() else 'cpu' args.distance_folder = './checkpoints/mnist/' + args.arch + '/distance_%s_%s' % (args.alpha, args.beta) if not os.path.isdir(args.distance_folder): mkdir_p(args.distance_folder) print('==> Preparing data..') transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ]) trainset = MNIST(root='../../data', train=True, download=True, transform=transform, train_class_num=args.train_class_num, test_class_num=args.test_class_num, includes_all_train_class=args.includes_all_train_class) testset = MNIST(root='../../data', train=False, download=True, transform=transform, train_class_num=args.train_class_num, test_class_num=args.test_class_num, includes_all_train_class=args.includes_all_train_class) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.bs, shuffle=True, num_workers=4)
def main(): device = 'cuda' if torch.cuda.is_available() else 'cpu' print(device) best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch # checkpoint args.checkpoint = './checkpoints/cifar/' + args.arch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Data print('==> Preparing data..') transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = CIFAR100(root='../../data', train=True, download=True, transform=transform_train, train_class_num=args.train_class_num, test_class_num=args.test_class_num, includes_all_train_class=args.includes_all_train_class) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.bs, shuffle=True, num_workers=4) testset = CIFAR100(root='../../data', train=False, download=True, transform=transform_test, train_class_num=args.train_class_num, test_class_num=args.test_class_num, includes_all_train_class=args.includes_all_train_class) testloader = torch.utils.data.DataLoader(testset, batch_size=args.bs, shuffle=False, num_workers=4) # Model print('==> Building model..') net = models.__dict__[args.arch](num_classes=args.train_class_num) # CIFAR 100 net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True if args.resume: # Load checkpoint. if os.path.isfile(args.resume): print('==> Resuming from checkpoint..') checkpoint = torch.load(args.resume) net.load_state_dict(checkpoint['net']) # best_acc = checkpoint['acc'] # print("BEST_ACCURACY: "+str(best_acc)) start_epoch = checkpoint['epoch'] logger = Logger(os.path.join(args.checkpoint, 'log.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt')) logger.set_names(['Epoch', 'Learning Rate', 'Train Loss','Train Acc.', 'Test Loss', 'Test Acc.']) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) # test(0, net, trainloader, testloader, criterion, device) epoch=0 if not args.evaluate: for epoch in range(start_epoch, start_epoch + args.es): print('\nEpoch: %d Learning rate: %f' % (epoch+1, optimizer.param_groups[0]['lr'])) adjust_learning_rate(optimizer, epoch, args.lr) train_loss, train_acc = train(net,trainloader,optimizer,criterion,device) save_model(net, None, epoch, os.path.join(args.checkpoint,'last_model.pth')) test_loss, test_acc = 0, 0 # logger.append([epoch+1, optimizer.param_groups[0]['lr'], train_loss, train_acc, test_loss, test_acc]) test(epoch, net, trainloader, testloader, criterion, device) logger.close()
def main(): global best_prec1, args args.gpu = 0 args.world_size = 1 if args.distributed: args.gpu = args.local_rank % torch.cuda.device_count() torch.cuda.set_device(args.gpu) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.total_batch_size = args.world_size * args.batch_size if not os.path.isdir(args.checkpoint) and args.local_rank == 0: mkdir_p(args.checkpoint) if args.fp16: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." if args.static_loss_scale != 1.0: if not args.fp16: print("Warning: if --fp16 is not used, static_loss_scale will be ignored.") # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) model = BuildNet(backbone=args.arch, num_classes=args.train_class_num) model = model.cuda() if args.fp16: model = network_to_half(model) if args.distributed: # shared param/delay all reduce turns off bucketing in DDP, for lower latency runs this can improve perf # for the older version of APEX please use shared_param, for newer one it is delay_allreduce model = DDP(model, delay_allreduce=True) # define loss function (criterion) and optimizer # equals to psoftmax if input is ["normweight_fea2cen"] criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.fp16: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.static_loss_scale, dynamic_loss_scale=args.dynamic_loss_scale, verbose=False) # optionally resume from a checkpoint title = 'ImageNet-' + args.arch if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] try: model.load_state_dict(checkpoint['state_dict']) except: from collections import OrderedDict new_check_point = OrderedDict() for k, v in checkpoint['state_dict'].items(): name = k[7:] # remove `module.` new_check_point[name] = v model.load_state_dict(new_check_point) # optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: if args.local_rank == 0: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.', 'Valid Top5.']) traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, args.val) crop_size = 224 val_size = 256 # pipe = HybridTrainPipe(batch_size=args.batch_size, num_threads=args.workers, device_id=args.local_rank, data_dir=traindir, crop=crop_size, dali_cpu=args.dali_cpu) # pipe.build() # train_loader = DALIClassificationIterator(pipe, size=int(pipe.epoch_size("Reader") / args.world_size)) pipe = HybridValPipe(batch_size=args.batch_size, num_threads=args.workers, device_id=args.local_rank, data_dir=valdir, crop=crop_size, size=val_size) pipe.build() val_loader = DALIClassificationIterator(pipe, size=int(pipe.epoch_size("Reader") / args.world_size)) validate(val_loader, model)
def main(): global best_prec1, args args.gpu = 0 args.world_size = 1 if args.distributed: args.gpu = args.local_rank % torch.cuda.device_count() torch.cuda.set_device(args.gpu) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.total_batch_size = args.world_size * args.batch_size if not os.path.isdir(args.checkpoint) and args.local_rank == 0: mkdir_p(args.checkpoint) if args.fp16: assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled." if args.static_loss_scale != 1.0: if not args.fp16: print( "Warning: if --fp16 is not used, static_loss_scale will be ignored." ) # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) model = Network(backbone=args.arch, num_classes=args.train_class_num) model = model.cuda() if args.fp16: model = network_to_half(model) if args.distributed: # shared param/delay all reduce turns off bucketing in DDP, for lower latency runs this can improve perf # for the older version of APEX please use shared_param, for newer one it is delay_allreduce model = DDP(model, delay_allreduce=True) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.fp16: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.static_loss_scale, dynamic_loss_scale=args.dynamic_loss_scale, verbose=False) # optionally resume from a checkpoint title = 'ImageNet-' + args.arch if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) if args.local_rank == 0: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: if args.local_rank == 0: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names([ 'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.', 'Valid Top5.' ]) traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') if (args.arch == "inception_v3"): crop_size = 299 val_size = 320 # I chose this value arbitrarily, we can adjust. else: crop_size = 224 val_size = 256 pipe = HybridTrainPipe(batch_size=args.batch_size, num_threads=args.workers, device_id=args.local_rank, data_dir=traindir, crop=crop_size, dali_cpu=args.dali_cpu) pipe.build() train_loader = DALIClassificationIterator( pipe, size=int(pipe.epoch_size("Reader") / args.world_size)) # pipe = HybridValPipe(batch_size=args.batch_size, num_threads=args.workers, device_id=args.local_rank, data_dir=valdir, crop=crop_size, size=val_size) # pipe.build() # val_loader = DALIClassificationIterator(pipe, size=int(pipe.epoch_size("Reader") / args.world_size)) # if args.evaluate: # validate(val_loader, model, criterion) # return total_time = AverageMeter() for epoch in range(args.start_epoch, args.epochs): # train for one epoch adjust_learning_rate(optimizer, epoch, args) if args.local_rank == 0: print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, optimizer.param_groups[0]['lr'])) [train_loss, train_acc, avg_train_time] = train(train_loader, model, criterion, optimizer, epoch) total_time.update(avg_train_time) # evaluate on validation set # [test_loss, prec1, prec5] = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint if args.local_rank == 0: # append logger file # logger.append([optimizer.param_groups[0]['lr'], train_loss, test_loss, train_acc, prec1, prec5]) logger.append([ optimizer.param_groups[0]['lr'], train_loss, 0.0, train_acc, 0.0, 0.0 ]) # is_best = prec1 > best_prec1 is_best = False # best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint, filename="checkpoint.pth.tar") # if epoch == args.epochs - 1: # print('##Top-1 {0}\n' # '##Top-5 {1}\n' # '##Perf {2}'.format(prec1, prec5, args.total_batch_size / total_time.avg)) # reset DALI iterators train_loader.reset() # val_loader.reset() if args.local_rank == 0: logger.close()
def main(): args.checkpoint = './checkpoints/mnist/' + args.arch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # folder to save figures args.plotfolder = './checkpoints/mnist/' + args.arch + '/plotter' if not os.path.isdir(args.plotfolder): mkdir_p(args.plotfolder) device = 'cuda' if torch.cuda.is_available() else 'cpu' print(device) start_epoch = 0 # start from epoch 0 or last checkpoint epoch print('==> Preparing data..') transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ]) trainset = MNIST(root='../../data', train=True, download=True, transform=transform, train_class_num=args.train_class_num, test_class_num=args.test_class_num, includes_all_train_class=args.includes_all_train_class) testset = MNIST(root='../../data', train=False, download=True, transform=transform, train_class_num=args.train_class_num, test_class_num=args.test_class_num, includes_all_train_class=args.includes_all_train_class) # data loader trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.bs, shuffle=True, num_workers=4) testloader = torch.utils.data.DataLoader(testset, batch_size=args.bs, shuffle=False, num_workers=4) print('==> Building model..') net = Network(backbone=args.arch, num_classes=args.train_class_num,embed_dim=args.embed_dim) fea_dim = net.classifier.in_features net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True criterion_softamx = nn.CrossEntropyLoss() criterion_centerloss = CenterLoss(num_classes=args.train_class_num, feat_dim=fea_dim).to(device) optimizer_softmax = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) optimizer_centerloss = torch.optim.SGD(criterion_centerloss.parameters(), lr=args.center_lr, momentum=0.9, weight_decay=5e-4) if args.resume: # Load checkpoint. if os.path.isfile(args.resume): print('==> Resuming from checkpoint..') checkpoint = torch.load(args.resume) net.load_state_dict(checkpoint['net']) criterion_centerloss.load_state_dict(checkpoint['centerloss']) # best_acc = checkpoint['acc'] # print("BEST_ACCURACY: "+str(best_acc)) start_epoch = checkpoint['epoch'] logger = Logger(os.path.join(args.checkpoint, 'log.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt')) logger.set_names(['Epoch', 'Total Loss','Softmax Loss', 'Center Loss', 'train Acc.']) if not args.evaluate: scheduler = lr_scheduler.StepLR(optimizer_softmax, step_size=20, gamma=0.1) for epoch in range(start_epoch, start_epoch + args.es): print('\nEpoch: %d Learning rate: %f' % (epoch + 1, optimizer_softmax.param_groups[0]['lr'])) train_loss, softmax_loss, center_loss, train_acc = train(net, trainloader, optimizer_softmax, optimizer_centerloss, criterion_softamx, criterion_centerloss, device) save_model(net, criterion_centerloss, epoch, os.path.join(args.checkpoint, 'last_model.pth')) # plot the training data if args.plot: plot_feature(net,criterion_centerloss, trainloader, device, args.plotfolder, epoch=epoch, plot_class_num=args.train_class_num,maximum=args.plot_max, plot_quality=args.plot_quality) logger.append([epoch + 1, train_loss, softmax_loss, center_loss, train_acc]) scheduler.step() test(net, testloader, device) if args.plot: plot_feature(net, criterion_centerloss, testloader, device, args.plotfolder, epoch="test", plot_class_num=args.train_class_num+1, maximum=args.plot_max, plot_quality=args.plot_quality) logger.close()
def main(): device = 'cuda' if torch.cuda.is_available() else 'cpu' print(device) best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch # checkpoint args.checkpoint = './checkpoints/mnist/' + args.arch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # folder to save figures args.plotfolder = './checkpoints/mnist/' + args.arch + '/plotter' if not os.path.isdir(args.plotfolder): mkdir_p(args.plotfolder) # Data print('==> Preparing data..') transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) trainset = MNIST(root='../../data', train=True, download=True, transform=transform, train_class_num=args.train_class_num, test_class_num=args.test_class_num, includes_all_train_class=args.includes_all_train_class) testset = MNIST(root='../../data', train=False, download=True, transform=transform, train_class_num=args.train_class_num, test_class_num=args.test_class_num, includes_all_train_class=args.includes_all_train_class) # data loader trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.bs, shuffle=True, num_workers=4) testloader = torch.utils.data.DataLoader(testset, batch_size=args.bs, shuffle=False, num_workers=4) # Model net = Network(backbone=args.arch, num_classes=args.train_class_num, embed_dim=args.embed_dim) fea_dim = net.classifier.in_features net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True if args.resume: # Load checkpoint. if os.path.isfile(args.resume): print('==> Resuming from checkpoint..') checkpoint = torch.load(args.resume) net.load_state_dict(checkpoint['net']) # best_acc = checkpoint['acc'] # print("BEST_ACCURACY: "+str(best_acc)) start_epoch = checkpoint['epoch'] logger = Logger(os.path.join(args.checkpoint, 'log.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt')) logger.set_names([ 'Epoch', 'Learning Rate', 'Train Loss', 'Train Acc.', 'Test Loss', 'Test Acc.' ]) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) # test(0, net, trainloader, testloader, criterion, device) epoch = 0 if not args.evaluate: for epoch in range(start_epoch, args.es): print('\nEpoch: %d Learning rate: %f' % (epoch + 1, optimizer.param_groups[0]['lr'])) adjust_learning_rate(optimizer, epoch, args.lr, step=20) train_loss, train_acc = train(net, trainloader, optimizer, criterion, device) save_model(net, None, epoch, os.path.join(args.checkpoint, 'last_model.pth')) test_loss, test_acc = 0, 0 # logger.append([ epoch + 1, optimizer.param_groups[0]['lr'], train_loss, train_acc, test_loss, test_acc ]) plot_feature(net, trainloader, device, args.plotfolder, epoch=epoch, plot_class_num=args.train_class_num, maximum=args.plot_max, plot_quality=args.plot_quality) test(epoch, net, trainloader, testloader, criterion, device) test(99999, net, trainloader, testloader, criterion, device) plot_feature(net, testloader, device, args.plotfolder, epoch="test", plot_class_num=args.train_class_num + 1, maximum=args.plot_max, plot_quality=args.plot_quality) logger.close()
metavar='PATH', help='path to latest checkpoint') # Parameters for plotting parser.add_argument( '--plot_max', default=0, type=int, help='max examples to plot in each class, 0 indicates all.') args = parser.parse_args() device = 'cuda' if torch.cuda.is_available() else 'cpu' args.plotter = './checkpoints/mnist/' + args.arch + '/plotter_%s_%s' % ( args.alpha, args.beta) if not os.path.isdir(args.plotter): mkdir_p(args.plotter) print('==> Preparing data..') transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) trainset = MNIST(root='../../data', train=True, download=True, transform=transform, train_class_num=args.train_class_num, test_class_num=args.test_class_num, includes_all_train_class=args.includes_all_train_class) testset = MNIST(root='../../data',
def __init__(self, zone, config): # Initialises the source/destination of the log messages self.transaction = config['analysis']['transaction'] self.maxtransactionduration = config['analysis']['maxtransactionduration'] self.data_source = config['rabbitmq_input']['type'] self.save_all_logs = config['analysis']['save_all_logs'] self.save_capsules = config['analysis']['save_capsules'] self.encapsulate_dest_path = os.path.join(zone, config['encalupated_out']['dir']) self.maxtransactionduration = config['analysis']['maxtransactionduration'] # if there is no end event after this period, the transaction is failed self.startevent = 'compute.instance.create.start' self.endevent = 'compute.instance.create.end' self.VMidList = [] self.ReqidList = [] self.TokenidList = [] self.GlobalidList = [] self.token2reqidDic = collections.defaultdict(dict) self.token2insidDic = collections.defaultdict(dict) self.token2starttimeDic = collections.defaultdict(dict) self.actiondfColList = ['transaction', 'token', 'request_id', 'instance_id', 'global_id', 'project_id', 'tenant_id', 'user_id', 'transaction_tstart', 'transaction_tend'] self.actionstartdf = pd.DataFrame(columns=self.actiondfColList) self.cerror = -1 # counter for the number of exception errors happend (can be canceled out) self.rowcount = -1 # counter for tracking the number of records/logs/lines (can be canceled out) self.transaction_aggregated_df = pd.DataFrame() # dataframe for storing the required infromation of observed VMs that have been launched self.debug_file_flag = int(config['debug']['debug_file_flag']) self.debug_sherlock_flag = int(config['debug']['debug_sherlock_flag']); if self.debug_sherlock_flag: self.debug_sherlock_file = config['debug']['debug_sherlock_file']; self.timeinterval = config['analysis']['cache_flush_interval'] if self.debug_file_flag == 1: self.current_interval_tick = np.datetime64(pd.to_datetime(str(config['debug']['startingdate']) +'-' +str(config['debug']['startingtime']))) else: self.current_interval_tick = np.datetime64(pd.datetime.now()) self.previous_interval_tick = self.current_interval_tick - np.timedelta64(self.timeinterval, 's') self.future_interval_tick = self.current_interval_tick + np.timedelta64(self.timeinterval, 's') self.interval_tickList = [self.previous_interval_tick, self.current_interval_tick, self.future_interval_tick] threading.Timer(1.0*int(self.timeinterval), self.update_current_tick_and_more).start() # Timer that calls "update_current_tick_and_more" every "self.timeinterval" seconds mkdir_p(self.encapsulate_dest_path) self.n_mprocessing = config['processing']['ncores_per_zone'] self.part = 0; #count number of the file rep_tokens = {'\\': '', '"{': '{', '}"': '}', '\'{': '{', '}\'': '}', '{}':'"NA"'} self.rep_tokens = dict((re.escape(k), v) for k, v in rep_tokens.iteritems()) self.pattern_tokens = re.compile("|".join(self.rep_tokens.keys())) rep_for_exchange = r'"oslo.message"' self.pattern_for_exchange = re.compile(rep_for_exchange) if self.data_source == 'file': self.files_to_load = None; self.data_source_path = config['rabbitmq_input']['file_parameters']['dir']; elif self.data_source == 'stream': self.flush_sherlock_msglist_interval = config['rabbitmq_input']['stream_parameters']['flush_sherlock_msglist_interval'] LOG.debug("starting the timer " + str(self.flush_sherlock_msglist_interval)) threading.Timer(self.flush_sherlock_msglist_interval, self.sherlock_retrive_df_process_frame).start(); if self.debug_sherlock_flag == 0: self.sherlock_listener = SherlockListener(config) self.sherlock_listener.start_listener() #t1 = threading.Thread(target=self.sherlock_listener.start_listener) #t1.start() #print "joining thread" # t1.join(0) else: self.msg_list = []; self.load_and_process_new_logs(filename = './sherlock'); else: raise ValueError("Data source other than File is not implemented"); if config['debug']['debug_file_flag'] == 1: self.current_interval_tick = np.datetime64(pd.to_datetime(str(config['debug']['startingdate']) +'-' +str(config['debug']['startingtime']))) else: self.current_interval_tick = np.datetime64(pd.datetime.now()) LOG.debug("Completed initialization ")