def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) dataset = Dataset(args.dataset) train_examples = torch.from_numpy(dataset.get_train().astype('int64')) valid_examples = torch.from_numpy(dataset.get_valid().astype('int64')) CLASSES = dataset.get_shape()[0] criterion = nn.CrossEntropyLoss(reduction='mean') #criterion = CrossEntropyLabelSmooth(CLASSES, args.label_smooth) criterion = criterion.cuda() regularizer = { 'N2': N2(args.reg), 'N3': N3(args.reg), }[args.regularizer] model = Network(args.channels, CLASSES, args.layers, criterion, regularizer, args.interleaved, dataset.get_shape(), args.emb_dim, args.init, args.steps) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) #model = utils.load(model, 'search-EXP-20190823-173036%f/weights.pt') weights = torch.load('search-EXP-20190823-173036%f/weights.pt') #print(weights) embeddings = [weights['embeddings.0.weight'], weights['embeddings.1.weight']] torch.save(embeddings, 'search-EXP-20190823-173036%f/embeddings.pt')
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) dataset = Dataset(args.dataset) train_examples = torch.from_numpy(dataset.get_train().astype('int64')) valid_examples = torch.from_numpy(dataset.get_valid().astype('int64')) CLASSES = dataset.get_shape()[0] criterion = nn.CrossEntropyLoss(reduction='mean') #criterion = CrossEntropyLabelSmooth(CLASSES, args.label_smooth) criterion = criterion.cuda() regularizer = { 'N2': N2(args.reg), 'N3': N3(args.reg), }[args.regularizer] model = Network(args.channels, CLASSES, args.layers, criterion, regularizer, args.interleaved, dataset.get_shape(), args.emb_dim, args.init, args.steps) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate), #momentum=args.momentum, #weight_decay=args.weight_decay) 'Adam': lambda: optim.Adam(model.parameters(), lr=args.learning_rate, betas=(args.decay1, args.decay2)), 'SGD': lambda: optim.SGD(model.parameters(), lr=args.learning_rate) }[args.optimizer]() # optimizer = torch.optim.SGD( # model.parameters(), # args.learning_rate, # #TODO can we reintroduce these? # momentum=args.momentum, # weight_decay=args.weight_decay) train_queue = torch.utils.data.DataLoader( train_examples, batch_size=args.batch_size, shuffle=True, #sampler=torch.utils.data.sampler.RandomSampler(), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_examples, batch_size=args.batch_size, shuffle=True, #sampler=torch.utils.data.sampler.RandomSampler(), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) best_acc = 0 patience = 0 curve = {'valid': [], 'test': []} architect = Architect(model, args) for epoch in range(args.epochs): model.epoch = epoch print('model temperature param', 1.05**model.epoch) scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax((1.05**epoch) * model.alphas_normal, dim=-1)) train_epoch(train_examples, train_queue, valid_queue, model, architect, criterion, optimizer, regularizer, args.batch_size, args.learning_rate) if (epoch + 1) % args.report_freq == 0: valid, test = [ avg_both(*dataset.eval(model, split, -1 if split != 'train' else 50000)) for split in ['valid', 'test'] ] curve['valid'].append(valid) curve['test'].append(test) #curve['train'].append(train) #print("\t TRAIN: ", train) print("\t VALID: ", valid) print("\t TEST: ", test) is_best = False if valid['MRR'] > best_acc: best_acc = valid['MRR'] is_best = True patience = 0 else: patience += 1
help="Learning rate" ) parser.add_argument( '--decay1', default=0.9, type=float, help="decay rate for the first moment estimate in Adam" ) parser.add_argument( '--decay2', default=0.999, type=float, help="decay rate for second moment estimate in Adam" ) args = parser.parse_args() dataset = Dataset(args.dataset) examples = torch.from_numpy(dataset.get_train().astype('int64')) print(dataset.get_shape()) model = { 'CP': lambda: CP(dataset.get_shape(), args.rank, args.init), 'ComplEx': lambda: ComplEx(dataset.get_shape(), args.rank, args.init), 'DistMult': lambda: DistMult(dataset.get_shape(), args.rank, args.init) }[args.model]() regularizer = { 'F2': F2(args.reg), 'N3': N3(args.reg), }[args.regularizer] has_cuda = torch.cuda.is_available() if has_cuda: device = 'cuda' else:
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) dataset = Dataset(args.dataset) train_examples = torch.from_numpy(dataset.get_train().astype('int64')) #TODO: does below need reintroducing somewhere? # device = 'cuda' # model.to(device) CLASSES = dataset.get_shape()[0] criterion = nn.CrossEntropyLoss(reduction='mean') #criterion = CrossEntropyLabelSmooth(CLASSES, args.label_smooth) criterion = criterion.cuda() regularizer = { 'N2': N2(args.reg), 'N3': N3(args.reg), }[args.regularizer] genotype = eval("genotypes.%s" % args.arch) logging.info('genotype = %s', genotype) model = Network(args.channels, CLASSES, args.layers, criterion, regularizer, genotype, args.interleaved, dataset.get_shape(), args.emb_dim, args.init) model = model.cuda() optimizer = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate), 'Adam': lambda: optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay = args.weight_decay, betas=(args.decay1, args.decay2)), 'SGD': lambda: optim.SGD(model.parameters(), lr=args.learning_rate) }[args.optimizer]() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) #optimizer = torch.optim.SGD( # model.parameters(), # args.learning_rate, #momentum=args.momentum, #weight_decay=args.weight_decay # ) train_queue = torch.utils.data.DataLoader( train_examples, batch_size=args.batch_size, shuffle = True, #sampler=torch.utils.data.sampler.RandomSampler(), pin_memory=True, num_workers=2) #TODO do we want the learning rate min here? scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) best_acc = 0 patience = 0 curve = {'valid': [], 'test': []} for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_epoch(train_examples, train_queue, model, optimizer, regularizer, args.batch_size) if (epoch + 1) % args.report_freq == 0: valid, test = [ avg_both(*dataset.eval(model, split, -1 if split != 'train' else 50000)) for split in ['valid', 'test'] ] curve['valid'].append(valid) curve['test'].append(test) #curve['train'].append(train) #print("\t TRAIN: ", train) print("\t VALID : ", valid) print("\t TEST: ", test) is_best = False if valid['MRR'] > best_acc: best_acc = valid['MRR'] is_best = True patience = 0 else: patience +=1 utils.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_acc_top1': best_acc, 'optimizer' : optimizer.state_dict(), }, is_best, args.save) if patience >= 5: print('early stopping...') break #utils.save(model, os.path.join(args.save, 'weights.pt')) results = dataset.eval(model, 'test', -1) print("\n\nTEST : ", results) with open(os.path.join(args.save, 'curve.pkl'), 'wb') as f: pickle.dump(curve, f, pickle.HIGHEST_PROTOCOL)
if not os.path.exists(save_path): os.mkdir(save_path) with open(os.path.join(save_path, 'config.json'), 'w') as f: json.dump(vars(args), f, indent=4) data_path = "../data" dataset = Dataset(data_path, args.dataset) examples = torch.from_numpy(dataset.get_train().astype('int64')) if args.do_ce_weight: ce_weight = torch.Tensor(dataset.get_weight()).cuda() else: ce_weight = None print(dataset.get_shape()) model = None regularizer = None exec('model = '+args.model+'(dataset.get_shape(), args.rank, args.init)') exec('regularizer = '+args.regularizer+'(args.reg)') regularizer = [regularizer, N3(args.reg)] device = 'cuda' model.to(device) for reg in regularizer: reg.to(device) optim_method = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate), 'Adam': lambda: optim.Adam(model.parameters(), lr=args.learning_rate, betas=(args.decay1, args.decay2)),
utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) dataset = Dataset(args.dataset) examples = torch.from_numpy(dataset.get_train().astype('int64')) model = { 'CP': lambda: CP(dataset.get_shape(), args.rank, args.init), 'ComplEx': lambda: ComplEx(dataset.get_shape(), args.rank, args.init), 'MLP': lambda: MLP(dataset.get_shape(), args.rank, args.init) }[args.model]() # device = 'cuda' # model.to(device) #genotype = eval("genotypes.%s" % args.arch) #check this CLASSES = dataset.get_shape()[0] model = Network(args.init_channels, CLASSES, args.layers, args.auxiliary, genotype) if args.parallel: