def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) dataset = Dataset(args.dataset) train_examples = torch.from_numpy(dataset.get_train().astype('int64')) valid_examples = torch.from_numpy(dataset.get_valid().astype('int64')) CLASSES = dataset.get_shape()[0] criterion = nn.CrossEntropyLoss(reduction='mean') #criterion = CrossEntropyLabelSmooth(CLASSES, args.label_smooth) criterion = criterion.cuda() regularizer = { 'N2': N2(args.reg), 'N3': N3(args.reg), }[args.regularizer] model = Network(args.channels, CLASSES, args.layers, criterion, regularizer, args.interleaved, dataset.get_shape(), args.emb_dim, args.init, args.steps) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) #model = utils.load(model, 'search-EXP-20190823-173036%f/weights.pt') weights = torch.load('search-EXP-20190823-173036%f/weights.pt') #print(weights) embeddings = [weights['embeddings.0.weight'], weights['embeddings.1.weight']] torch.save(embeddings, 'search-EXP-20190823-173036%f/embeddings.pt')
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) dataset = Dataset(args.dataset) train_examples = torch.from_numpy(dataset.get_train().astype('int64')) valid_examples = torch.from_numpy(dataset.get_valid().astype('int64')) CLASSES = dataset.get_shape()[0] criterion = nn.CrossEntropyLoss(reduction='mean') #criterion = CrossEntropyLabelSmooth(CLASSES, args.label_smooth) criterion = criterion.cuda() regularizer = { 'N2': N2(args.reg), 'N3': N3(args.reg), }[args.regularizer] model = Network(args.channels, CLASSES, args.layers, criterion, regularizer, args.interleaved, dataset.get_shape(), args.emb_dim, args.init, args.steps) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate), #momentum=args.momentum, #weight_decay=args.weight_decay) 'Adam': lambda: optim.Adam(model.parameters(), lr=args.learning_rate, betas=(args.decay1, args.decay2)), 'SGD': lambda: optim.SGD(model.parameters(), lr=args.learning_rate) }[args.optimizer]() # optimizer = torch.optim.SGD( # model.parameters(), # args.learning_rate, # #TODO can we reintroduce these? # momentum=args.momentum, # weight_decay=args.weight_decay) train_queue = torch.utils.data.DataLoader( train_examples, batch_size=args.batch_size, shuffle=True, #sampler=torch.utils.data.sampler.RandomSampler(), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_examples, batch_size=args.batch_size, shuffle=True, #sampler=torch.utils.data.sampler.RandomSampler(), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) best_acc = 0 patience = 0 curve = {'valid': [], 'test': []} architect = Architect(model, args) for epoch in range(args.epochs): model.epoch = epoch print('model temperature param', 1.05**model.epoch) scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax((1.05**epoch) * model.alphas_normal, dim=-1)) train_epoch(train_examples, train_queue, valid_queue, model, architect, criterion, optimizer, regularizer, args.batch_size, args.learning_rate) if (epoch + 1) % args.report_freq == 0: valid, test = [ avg_both(*dataset.eval(model, split, -1 if split != 'train' else 50000)) for split in ['valid', 'test'] ] curve['valid'].append(valid) curve['test'].append(test) #curve['train'].append(train) #print("\t TRAIN: ", train) print("\t VALID: ", valid) print("\t TEST: ", test) is_best = False if valid['MRR'] > best_acc: best_acc = valid['MRR'] is_best = True patience = 0 else: patience += 1
) args = parser.parse_args() dataset = Dataset(args.dataset) examples = torch.from_numpy(dataset.get_train().astype('int64')) print(dataset.get_shape()) model = { 'CP': lambda: CP(dataset.get_shape(), args.rank, args.init), 'ComplEx': lambda: ComplEx(dataset.get_shape(), args.rank, args.init), 'DistMult': lambda: DistMult(dataset.get_shape(), args.rank, args.init) }[args.model]() regularizer = { 'F2': F2(args.reg), 'N3': N3(args.reg), }[args.regularizer] has_cuda = torch.cuda.is_available() if has_cuda: device = 'cuda' else: device = 'cpu' model.to(device) optim_method = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate), 'Adam': lambda: optim.Adam(model.parameters(), lr=args.learning_rate, betas=(args.decay1, args.decay2)), 'SGD': lambda: optim.SGD(model.parameters(), lr=args.learning_rate) }[args.optimizer]()
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) dataset = Dataset(args.dataset) train_examples = torch.from_numpy(dataset.get_train().astype('int64')) #TODO: does below need reintroducing somewhere? # device = 'cuda' # model.to(device) CLASSES = dataset.get_shape()[0] criterion = nn.CrossEntropyLoss(reduction='mean') #criterion = CrossEntropyLabelSmooth(CLASSES, args.label_smooth) criterion = criterion.cuda() regularizer = { 'N2': N2(args.reg), 'N3': N3(args.reg), }[args.regularizer] genotype = eval("genotypes.%s" % args.arch) logging.info('genotype = %s', genotype) model = Network(args.channels, CLASSES, args.layers, criterion, regularizer, genotype, args.interleaved, dataset.get_shape(), args.emb_dim, args.init) model = model.cuda() optimizer = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate), 'Adam': lambda: optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay = args.weight_decay, betas=(args.decay1, args.decay2)), 'SGD': lambda: optim.SGD(model.parameters(), lr=args.learning_rate) }[args.optimizer]() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) #optimizer = torch.optim.SGD( # model.parameters(), # args.learning_rate, #momentum=args.momentum, #weight_decay=args.weight_decay # ) train_queue = torch.utils.data.DataLoader( train_examples, batch_size=args.batch_size, shuffle = True, #sampler=torch.utils.data.sampler.RandomSampler(), pin_memory=True, num_workers=2) #TODO do we want the learning rate min here? scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) best_acc = 0 patience = 0 curve = {'valid': [], 'test': []} for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_epoch(train_examples, train_queue, model, optimizer, regularizer, args.batch_size) if (epoch + 1) % args.report_freq == 0: valid, test = [ avg_both(*dataset.eval(model, split, -1 if split != 'train' else 50000)) for split in ['valid', 'test'] ] curve['valid'].append(valid) curve['test'].append(test) #curve['train'].append(train) #print("\t TRAIN: ", train) print("\t VALID : ", valid) print("\t TEST: ", test) is_best = False if valid['MRR'] > best_acc: best_acc = valid['MRR'] is_best = True patience = 0 else: patience +=1 utils.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_acc_top1': best_acc, 'optimizer' : optimizer.state_dict(), }, is_best, args.save) if patience >= 5: print('early stopping...') break #utils.save(model, os.path.join(args.save, 'weights.pt')) results = dataset.eval(model, 'test', -1) print("\n\nTEST : ", results) with open(os.path.join(args.save, 'curve.pkl'), 'wb') as f: pickle.dump(curve, f, pickle.HIGHEST_PROTOCOL)
dataset = TemporalDataset(args.dataset) sizes = dataset.get_shape() model = { 'ComplEx': ComplEx(sizes, args.rank), 'TComplEx': TComplEx(sizes, args.rank, no_time_emb=args.no_time_emb), 'TNTComplEx': TNTComplEx(sizes, args.rank, no_time_emb=args.no_time_emb), 'TuckERT': TuckERT(sizes, args), 'TuckERTNT': TuckERTNT(sizes, args) }[args.model] model = model.cuda() model.init() opt = optim.Adagrad(model.parameters(), lr=args.learning_rate) emb_reg = N3(args.emb_reg) time_reg = Lambda3(args.time_reg) for epoch in range(args.max_epochs): examples = torch.from_numpy(dataset.get_train().astype('int64')) model.train() if dataset.has_intervals(): optimizer = IKBCOptimizer(model, emb_reg, time_reg, opt, dataset, batch_size=args.batch_size) optimizer.epoch(examples)