def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) dataset = Dataset(args.dataset) train_examples = torch.from_numpy(dataset.get_train().astype('int64')) valid_examples = torch.from_numpy(dataset.get_valid().astype('int64')) CLASSES = dataset.get_shape()[0] criterion = nn.CrossEntropyLoss(reduction='mean') #criterion = CrossEntropyLabelSmooth(CLASSES, args.label_smooth) criterion = criterion.cuda() regularizer = { 'N2': N2(args.reg), 'N3': N3(args.reg), }[args.regularizer] model = Network(args.channels, CLASSES, args.layers, criterion, regularizer, args.interleaved, dataset.get_shape(), args.emb_dim, args.init, args.steps) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) #model = utils.load(model, 'search-EXP-20190823-173036%f/weights.pt') weights = torch.load('search-EXP-20190823-173036%f/weights.pt') #print(weights) embeddings = [weights['embeddings.0.weight'], weights['embeddings.1.weight']] torch.save(embeddings, 'search-EXP-20190823-173036%f/embeddings.pt')
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) dataset = Dataset(args.dataset) train_examples = torch.from_numpy(dataset.get_train().astype('int64')) valid_examples = torch.from_numpy(dataset.get_valid().astype('int64')) CLASSES = dataset.get_shape()[0] criterion = nn.CrossEntropyLoss(reduction='mean') #criterion = CrossEntropyLabelSmooth(CLASSES, args.label_smooth) criterion = criterion.cuda() regularizer = { 'N2': N2(args.reg), 'N3': N3(args.reg), }[args.regularizer] model = Network(args.channels, CLASSES, args.layers, criterion, regularizer, args.interleaved, dataset.get_shape(), args.emb_dim, args.init, args.steps) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate), #momentum=args.momentum, #weight_decay=args.weight_decay) 'Adam': lambda: optim.Adam(model.parameters(), lr=args.learning_rate, betas=(args.decay1, args.decay2)), 'SGD': lambda: optim.SGD(model.parameters(), lr=args.learning_rate) }[args.optimizer]() # optimizer = torch.optim.SGD( # model.parameters(), # args.learning_rate, # #TODO can we reintroduce these? # momentum=args.momentum, # weight_decay=args.weight_decay) train_queue = torch.utils.data.DataLoader( train_examples, batch_size=args.batch_size, shuffle=True, #sampler=torch.utils.data.sampler.RandomSampler(), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_examples, batch_size=args.batch_size, shuffle=True, #sampler=torch.utils.data.sampler.RandomSampler(), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) best_acc = 0 patience = 0 curve = {'valid': [], 'test': []} architect = Architect(model, args) for epoch in range(args.epochs): model.epoch = epoch print('model temperature param', 1.05**model.epoch) scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax((1.05**epoch) * model.alphas_normal, dim=-1)) train_epoch(train_examples, train_queue, valid_queue, model, architect, criterion, optimizer, regularizer, args.batch_size, args.learning_rate) if (epoch + 1) % args.report_freq == 0: valid, test = [ avg_both(*dataset.eval(model, split, -1 if split != 'train' else 50000)) for split in ['valid', 'test'] ] curve['valid'].append(valid) curve['test'].append(test) #curve['train'].append(train) #print("\t TRAIN: ", train) print("\t VALID: ", valid) print("\t TEST: ", test) is_best = False if valid['MRR'] > best_acc: best_acc = valid['MRR'] is_best = True patience = 0 else: patience += 1