def propagate(self, image, superpixels, pairwise): elapsed = tic() nLayers = 1 + len(self.layers) act = [ActiveLayer() for i in range(nLayers)] act[0].x = image.reshape(image.shape + tuple([1])) mi = ModelInputs(superpixels, pairwise) for i in range(nLayers - 1): lyr = self.layers[i] when = tic() if lyr.type in ['conv', 'logistic', 'pool', 'relu', 'custom']: act[i + 1].x = lyr.propagate(act[i].x, mi) elif lyr.type == 'structured_loss': act[i + 1].labels = self.structured_loss(act[i].x, lyr, mi) else: error('Unknown layer type %s', lyr.type) act[i].x = None # Recover memory. act[i].time = toc(when) # Save time elapsed processing the layer. if act[i + 1].x is not None: print( "L[%d]: %s layer '%s'\t produced a %s\t output in %f seconds." % (i, str(lyr.type), str(lyr.name), str( act[i + 1].x.shape), act[i].time)) else: print( "L[%d]: %s layer '%s'\t produced a %s\t output in %f seconds." % (i, str(lyr.type), str( lyr.name), str(act[i + 1].labels.shape), act[i].time)) print("NN forward propagation completed in %f seconds." % (toc(elapsed))) return act
def cluster_analysis(dim, k_cls): X_dim_red = {} print "Reducing dimensions..." util.tic() svd_cls = TruncatedSVD(n_components=dim, algorithm='arpack') svd = make_pipeline(normalizer_svd, svd_cls) X_dim_red['SVD'] = svd.fit_transform(X_tfidf) nmf_cls = NMF(n_components=dim, init='random', random_state=random_state) nmf = make_pipeline(normalizer_nmf, nmf_cls) X_dim_red['NMF'] = nmf.fit_transform(X_tfidf) util.toc() # transform #X_dim_red['SVD'] += np.max(X_dim_red['SVD']) #X_dim_red['SVD'] = X_dim_red['SVD'] ** 2 X_dim_red['NMF'] = util.clamp(-10, np.log(X_dim_red['NMF']), 10) # clustering print "Clustering..." util.tic() kmeans = {} kmeans['SVD'] = KMeans(n_clusters=k_cls, random_state=random_state).fit(X_dim_red['SVD']) kmeans['NMF'] = KMeans(n_clusters=k_cls, random_state=random_state).fit(X_dim_red['NMF']) util.toc() #-------------------------------- ### Evaluation # Purity statistics if k_cls == 6: y_true = group6_true elif k_cls == 20: y_true = group20_true print "Purity stats report:" print "Dimension = %d" % dim print "No. of groups = {}".format(k_cls) for method in ['SVD', 'NMF']: conf_mat = metrics.confusion_matrix(y_true, kmeans[method].labels_) print "======== Method: %s ========" % method print "Confusion Matrix:" print conf_mat #print "Confusion Matrix (w/ best permutation):" #print util.sort_matrix_diagonally(conf_mat) print "Homogeneity_score = {:4f}".format( homogeneity_score(y_true, kmeans[method].labels_)) print "Completeness_score = {:4f}".format( completeness_score(y_true, kmeans[method].labels_)) print "Adjusted_rand_score = {:4f}".format( adjusted_rand_score(y_true, kmeans[method].labels_)) print "Adjusted_mutual_info_score = {:4f}".format( adjusted_mutual_info_score(y_true, kmeans[method].labels_)) return
def prCIn(nm, nRep, sca): """ Start a propmter for displaying information about loop. Input nm - name nRep - #steps sca - scale of moving, (0, 1) | 1 | 2 | ... """ # variables set in "prSet()" global lPr, nmPrs, ticPrs, ticPr0s, nRepPrs, scaPrs # insert nmPrs[lPr] = nm ticPrs[lPr] = tic() ticPr0s[lPr] = ticPrs[lPr] nRepPrs[lPr] = nRep # scaling if sca < 1: sca = round(nRep * sca) if sca == 0: sca = 1 scaPrs[lPr] = sca # print pr('%s: %d %d' %(nm, nRep, sca)) lPr = lPr + 1
def _solve(m): """Solve instance of switch model, using the specified objective, then load the results""" tic() results = opt.solve(m, keepfiles=False, tee=True, symbolic_solver_labels=True, suffixes=['dual', 'iis']) log("Solver finished; "); toc() # results.write() log("loading solution... "); tic() # Pyomo changed their interface for loading results somewhere # between 4.0.x and 4.1.x in a way that was not backwards compatible. # Make the code accept either version if hasattr(m, 'solutions'): # Works in Pyomo version 4.1.x m.solutions.load_from(results) else: # Works in Pyomo version 4.0.9682 m.load(results) toc() return results
def prC(iRep): """ Prompt information of a counter. Input iRep - current step """ # variables defined in "prSet()" global lPr, nmPrs, ticPrs, nRepPrs, scaPrs lPr = lPr - 1 if (iRep != 0 and iRep % scaPrs[lPr] == 0) or (iRep == nRepPrs[lPr]): # time t = toc(ticPrs[lPr]) # print pr('%s: %d/%d, %.2f secs' %(nmPrs[lPr], iRep, nRepPrs[lPr], t)) # re-start a timer ticPrs[lPr] = tic() lPr = lPr + 1
def go(arg): global repeats repeats = arg.repeats tbdir = arg.tb_dir if arg.tb_dir is not None else os.path.join( './runs', get_slug(arg))[:250] tbw = SummaryWriter(log_dir=tbdir) dev = 'cuda' if torch.cuda.is_available() else 'cpu' train_mrrs = [] test_mrrs = [] train, val, test, (n2i, i2n), (r2i, i2r) = \ kgmodels.load_lp(arg.name) print(len(i2n), 'nodes') print(len(i2r), 'relations') print(train.size(0), 'training triples') print(test.size(0), 'test triples') print(train.size(0) + test.size(0), 'total triples') # print(train) # print(test) # sys.exit() # set of all triples (for filtering) alltriples = set() for s, p, o in torch.cat([train, test], dim=0): s, p, o = s.item(), p.item(), o.item() alltriples.add((s, p, o)) if arg.final: train, test = torch.cat([train, val], dim=0), test else: train, test = train, val if arg.decomp == 'block': # -- pad the node list to make it divisible by the nr. of blocks added = 0 while len(i2n) % arg.num_blocks != 0: label = 'null' + str(added) i2n.append(label) n2i[label] = len(i2n) - 1 added += 1 print( f'nodes padded to {len(i2n)} to make it divisible by {arg.num_blocks} (added {added} null nodes).' ) if repeats > 1: RP, EP = trange, range else: RP, EP = range, trange for r in RP(repeats): """ Define model """ if arg.model == 'classic': model = kgmodels.LinkPrediction(triples=train, n=len(i2n), r=len(i2r), hidden=arg.emb, out=arg.emb, decomp=arg.decomp, numbases=arg.num_bases, numblocks=arg.num_blocks, depth=arg.depth, do=arg.do, biases=arg.biases, prune=arg.prune, dropout=arg.edge_dropout) elif arg.model == 'narrow': model = kgmodels.LPNarrow(triples=train, n=len(i2n), r=len(i2r), emb=arg.emb, hidden=arg.hidden, decomp=arg.decomp, numbases=arg.num_bases, numblocks=arg.num_blocks, depth=arg.depth, do=arg.do, biases=arg.biases, prune=arg.prune, edge_dropout=arg.edge_dropout) elif arg.model == 'sampling': model = kgmodels.SimpleLP(triples=train, n=len(i2n), r=len(i2r), emb=arg.emb, h=arg.hidden, ksample=arg.k, csample=arg.c, multi=arg.multi, decoder=arg.decoder) else: raise Exception(f'model not recognized: {arg.model}') if torch.cuda.is_available(): prt('Using CUDA.') model.cuda() if arg.opt == 'adam': opt = torch.optim.Adam(model.parameters(), lr=arg.lr[0]) elif arg.opt == 'adamw': opt = torch.optim.AdamW(model.parameters(), lr=arg.lr[0]) elif arg.opt == 'adagrad': opt = torch.optim.Adagrad(model.parameters(), lr=arg.lr[0]) elif arg.opt == 'sgd': opt = torch.optim.SGD(model.parameters(), lr=arg.lr[0], nesterov=True, momentum=arg.momentum) else: raise Exception() # nr of negatives sampled ng = arg.negative_rate seen = 0 for e in range(sum(arg.epochs)): depth = 0 set_lr(opt, arg.lr[0]) if e >= arg.epochs[0]: depth = 1 set_lr(opt, arg.lr[1]) if e >= sum(arg.epochs[:2]): depth = 2 set_lr(opt, arg.lr[2]) seeni, sumloss = 0, 0.0 if arg.c is not None: tic() model.precompute_globals() print(f'precomp took {toc():.2}s') tsample, tforward, tbackward, ttotal, tloss, tstep = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 for fr in EP(0, train.size(0), arg.batch): tic() model.train(True) if arg.limit is not None and seeni > arg.limit: break # # if torch.cuda.is_available() and random.random() < 0.01: # print(f'\nPeak gpu memory use is {torch.cuda.max_memory_cached() / 1e9:.2} Gb') to = min(train.size(0), fr + arg.batch) with torch.no_grad(): positives = train[fr:to] b, _ = positives.size() tic() # sample negatives if arg.corrupt_global: # global corruption (sample random true triples to corrupt) indices = torch.randint(size=(b * ng, ), low=0, high=train.size(0)) negatives = train[indices, :].view( b, ng, 3) # -- triples to be corrupted else: # local corruption (directly corrupt the current batch) negatives = positives.clone()[:, None, :].expand( b, ng, 3).contiguous() corrupt(negatives, len(i2n)) triples = torch.cat([positives[:, None, :], negatives], dim=1) if torch.cuda.is_available(): triples = triples.cuda() if arg.loss == 'bce': labels = torch.cat( [torch.ones(b, 1), torch.zeros(b, ng)], dim=1) elif arg.loss == 'ce': labels = torch.zeros(b, dtype=torch.long) # -- CE loss treats the problem as a multiclass classification problem: for a positive triple, # together with its k corruptions, identify which is the true triple. This is always triple 0, # but the score function is order equivariant, so i can't see the index of the triple it's # classifying. if torch.cuda.is_available(): labels = labels.cuda() tsample += toc() opt.zero_grad() tic() out = model(triples, depth=depth) assert out.size() == (b, ng + 1) tic() if arg.loss == 'bce': loss = F.binary_cross_entropy_with_logits(out, labels) elif arg.loss == 'ce': loss = F.cross_entropy(out, labels) if arg.l2weight is not None: l2 = sum([p.pow(2).sum() for p in model.parameters()]) loss = loss + arg.l2weight * l2 tloss += toc() tforward += toc() tic() loss.backward() tbackward += toc() sumloss += float(loss.item()) tic() opt.step() tstep += toc() seen += b seeni += b ttotal += toc() prt(f'epoch {e} (d{depth}); training loss {sumloss/seeni:.4} s {tsample:.3}s, f {tforward:.3}s (loss {tloss:.3}s), b {tbackward:.3}, st {tstep:.3}, t {ttotal:.3}s' ) # Evaluate if (e % arg.eval_int == 0 and e != 0) or e == sum(arg.epochs) - 1: with torch.no_grad(): model.train(False) ranks = [] mrr = hitsat1 = hitsat3 = hitsat10 = 0.0 if arg.eval_size is None: testsub = test else: testsub = test[random.sample(range(test.size(0)), k=arg.eval_size)] tseen = 0 for tail in [True, False]: # head or tail prediction for s, p, o in (testsub if repeats > 1 else tqdm.tqdm(testsub)): s, p, o = s.item(), p.item(), o.item() if tail: ot = o del o raw_candidates = [(s, p, o) for o in range(len(i2n))] candidates = filter(raw_candidates, alltriples, (s, p, ot)) else: st = s del s raw_candidates = [(s, p, o) for s in range(len(i2n))] candidates = filter(raw_candidates, alltriples, (st, p, o)) candidates = torch.tensor(candidates) scores = util.batch(model, candidates, batch_size=arg.batch * 2, depth=depth) # -- the batch size needs to be a little conservative here, due to the high variance in nr of # triples sampled. sorted_candidates = [ tuple(p[0]) for p in sorted(zip(candidates.tolist(), scores.tolist()), key=lambda p: -p[1]) ] rank = (sorted_candidates.index((s, p, ot)) + 1) if tail else (sorted_candidates.index( (st, p, o)) + 1) ranks.append(rank) hitsat1 += (rank == 1) hitsat3 += (rank <= 3) hitsat10 += (rank <= 10) mrr += 1.0 / rank tseen += 1 mrr = mrr / tseen hitsat1 = hitsat1 / tseen hitsat3 = hitsat3 / tseen hitsat10 = hitsat10 / tseen prt(f'epoch {e}: MRR {mrr:.4}\t hits@1 {hitsat1:.4}\t hits@3 {hitsat3:.4}\t hits@10 {hitsat10:.4}' ) prt(f' ranks : {ranks[:10]}') test_mrrs.append(mrr) print('training finished.') temrrs = torch.tensor(test_mrrs) print( f'mean test MRR {temrrs.mean():.3} ({temrrs.std():.3}) \t{test_mrrs}' )
def from_washington(loc): return loc == 'Washington' or ', Washington' in loc or loc == 'WA' or ', WA' in loc def from_massachusetts(loc): return loc == 'Massachusetts' or ', Massachusetts' in loc or loc == 'MA' or ', MA' in loc # extracting tweet data hashtag = '#superbowl' fname = retreive_filename_by_hashtag(hashtag) #out = util.get_linecount(fname, timeit=True) util.tic('Loading tweets w/ %s' % hashtag) tweet_text = [] tweet_addr = [] i = 0 filestream = open(fname, 'r') for line in filestream: tweet = json.loads(line) tweet_content = tweet['title'] user_location = tweet['tweet']['user']['location'] if from_washington(user_location): tweet_text.append(tweet_content) tweet_addr.append(1)
def cluster_n_analyze(dim=2): print "Dimension = %d" % dim # reducing dimensions print "Reducing dimensions..." util.tic() normalizer = Normalizer(copy=False) svd_cls = TruncatedSVD(n_components=dim, n_iter=10, random_state=random_state) nmf_cls = NMF(n_components=dim, init='random', random_state=random_state) X_dim_red = {} #svd = make_pipeline(svd_cls, normalizer) if normalize else svd_cls svd = make_pipeline(normalizer, svd_cls) if normalize else svd_cls X_dim_red['SVD'] = svd.fit_transform(X_tfidf) print svd_cls.explained_variance_ratio_ #nmf = make_pipeline(nmf_cls, normalizer) if normalize else nmf_cls nmf = make_pipeline(normalizer, nmf_cls) if normalize else nmf_cls X_dim_red['NMF'] = nmf.fit_transform(X_tfidf) #print X_dim_red['SVD'][0] util.toc() # apply non-linear transformation here if transform_method == 'exp': X_dim_red['SVD'] = np.exp(X_dim_red['SVD']) X_dim_red['NMF'] = np.exp(X_dim_red['NMF']) elif transform_method == 'sqrt': X_dim_red['NMF'] = np.sqrt(X_dim_red['NMF']) elif transform_method == 'log': X_dim_red['NMF'] = util.clamp(-10, np.log(X_dim_red['NMF']), 10) elif transform_method == 'customized': # different transformation to SVD and NMF X_dim_red['SVD'] += np.max(X_dim_red['SVD']) X_dim_red['SVD'] = X_dim_red['SVD']**2 X_dim_red['NMF'] = util.clamp(-10, np.log(X_dim_red['NMF']), 10) # clustering #print X_dim_red['SVD'][0] print "Clustering..." util.tic() kmeans = {} kmeans['SVD'] = KMeans(n_clusters=2, random_state=random_state).fit(X_dim_red['SVD']) kmeans['NMF'] = KMeans(n_clusters=2, random_state=random_state).fit(X_dim_red['NMF']) util.toc() # Purity statistics print "Purity stats report:" print "Dimension = %d" % dim for method in ['SVD', 'NMF']: scores = [] scores.append(homogeneity_score(y_true, kmeans[method].labels_)) scores.append(completeness_score(y_true, kmeans[method].labels_)) scores.append(adjusted_rand_score(y_true, kmeans[method].labels_)) scores.append( adjusted_mutual_info_score(y_true, kmeans[method].labels_)) # document statistics if method not in perf_stats: perf_stats[method] = [[], [], [], []] for idx, arr in enumerate(perf_stats[method]): arr.append(scores[idx]) perf_stats[method][idx] = arr # print... print "======== Method: %s ========" % method print "Confusion Matrix:" print metrics.confusion_matrix(y_true, kmeans[method].labels_) print "Homogeneity_score = {:4f}".format(scores[0]) print "Completeness_score = {:4f}".format(scores[1]) print "Adjusted_rand_score = {:4f}".format(scores[2]) print "Adjusted_mutual_info_score = {:4f}".format(scores[3]) # stop if no visualization is needed if not visualize: return # Visualization for method in ['SVD', 'NMF']: print method xs, ys = [[], []], [[], []] xt, yt = [[], []], [[], []] xe0, ye0 = [], [] xe1, ye1 = [], [] labels = kmeans[method].labels_ # switch group if confusion matrix proved it to be a better match conf_mat = metrics.confusion_matrix(y_true, kmeans[method].labels_) if conf_mat[0][0] + conf_mat[1][1] < conf_mat[0][1] + conf_mat[1][0]: # since it only contains 0 and 1... labels = [1 - x for x in labels] for idx, val in enumerate(X_dim_red[method]): # projection: may find some other method xval = val[0] yval = val[1] label = labels[idx] truth = y_true[idx] if label == 0 and truth == 1: # errorneously put into category 0 xe0.append(xval) ye0.append(yval) elif label == 1 and truth == 0: # errorneously put into category 1 xe1.append(xval) ye1.append(yval) xs[label].append(xval) ys[label].append(yval) xt[truth].append(xval) yt[truth].append(yval) #if method == 'NMF': # plt.xscale('log') # plt.yscale('log') plt.figure() plt.scatter(xs[0], ys[0], c='r' if colorcode else 'k', marker=',', lw=0, s=1) plt.scatter(xs[1], ys[1], c='b' if colorcode else 'k', marker=',', lw=0, s=1) if not showtrue and colorcode: plt.scatter(xe0, ye0, c='g', marker=',', lw=0, s=1) plt.scatter(xe1, ye1, c='y', marker=',', lw=0, s=1) optional_str = ": kmeans\n" if showtrue else "" plt.title("%s%s" % (method, optional_str)) transf_opt = 'pre_transf' if transform_method == 'none' else 'post_transf' plt.savefig('scatter_%s_dim%d_%s.png' % (method, dim, transf_opt)) plt.show() if showtrue: plt.figure() plt.scatter(xt[0], yt[0], c='r', marker=',', lw=0, s=1) plt.scatter(xt[1], yt[1], c='b', marker=',', lw=0, s=1) plt.title("%s: true\ndim = %d" % (method, dim)) plt.show()
from sklearn.decomposition import * from sklearn.cluster import KMeans from sklearn.metrics.cluster import * from sklearn.pipeline import make_pipeline from sklearn.preprocessing import Normalizer import matplotlib.pyplot as plt import numpy as np import random import util import scipy.sparse.linalg as ssl #-------------------------------- ### Loading and converting data print "Loading comp and rec data..." util.tic() newsgroup = fetch_20newsgroups(subset='all', shuffle=True, random_state=42) util.toc() print "Vectorizing and converting to TFxIDF..." util.tic() count_vect = text.CountVectorizer(min_df=1, stop_words='english', analyzer='word', tokenizer=util.my_tokenizer) tfidf_transformer = text.TfidfTransformer() X_counts = count_vect.fit_transform(newsgroup.data)
def go(arg): if arg.seed < 0: seed = random.randint(0, 1000000) print('random seed: ', seed) else: torch.manual_seed(arg.seed) tbw = SummaryWriter(log_dir=arg.tb_dir) # Tensorboard logging # load the data (validation unless arg.final is true, then test) arg.data = here('data/enwik8.gz') if arg.data is None else arg.data data_train, data_val, data_test = enwik8(arg.data) data_train, data_test = (torch.cat([data_train, data_val], dim=0), data_test) \ if arg.final else (data_train, data_val) # create the model model = GTransformer(emb=arg.embedding_size, heads=arg.num_heads, depth=arg.depth, seq_length=arg.context, num_tokens=NUM_TOKENS, attention_type=arg.attention_type) if torch.cuda.is_available(): model.cuda() opt = torch.optim.Adam(lr=arg.lr, params=model.parameters()) # Linear learning rate warmup sch = torch.optim.lr_scheduler.LambdaLR( opt, lambda i: min(i / (arg.lr_warmup / arg.batch_size), 1.0)) # Training loop # -- We don't loop over the data, instead we sample a batch of random subsequences each time. This is not strictly # better or worse as a training method, it's just a little simpler. # instances_seen = 0 for i in tqdm.trange(arg.num_batches): opt.zero_grad() source, target = sample_batch(data_train, length=arg.context, batch_size=arg.batch_size) instances_seen += source.size(0) if torch.cuda.is_available(): source, target = source.cuda(), target.cuda() tic() output = model(source) # forward pass t = toc() # Compute the loss loss = F.nll_loss(output.transpose(2, 1), target, reduction='mean') tbw.add_scalar('transformer/train-loss', float(loss.item()) * LOG2E, i * arg.batch_size, instances_seen) tbw.add_scalar('transformer/time-forward', t, instances_seen) loss.backward() # backward pass # clip gradients # -- If the total gradient vector has a length > x, we clip it back down to x. if arg.gradient_clipping > 0.0: nn.utils.clip_grad_norm_(model.parameters(), arg.gradient_clipping) opt.step() # stochastic gradient descent step sch.step() # update the learning rate # Validate every `arg.test_every` steps. First we compute the # compression on the validation data (or a subset), # then we generate some random text to monitor progress. if i != 0 and (i % arg.test_every == 0 or i == arg.num_batches - 1): with torch.no_grad(): ## Sample and print a random sequence # Slice a random seed from the test data, and sample a continuation from the model. seedfr = random.randint(0, data_test.size(0) - arg.context) seed = data_test[seedfr:seedfr + arg.context].to(torch.long) if torch.cuda.is_available(): seed = seed.cuda() sample_sequence(model, seed=seed, max_context=arg.context, verbose=True, length=arg.sample_length) ## Compute validation bits per byte upto = data_test.size( 0) if i == arg.num_batches - 1 else arg.test_subset data_sub = data_test[:upto] bits_per_byte = compute_compression( model, data_sub, context=arg.context, batch_size=arg.test_batchsize) # -- Since we're not computing gradients, we can increase the batch size a little from what we used in # training. print(f'epoch{i}: {bits_per_byte:.4} bits per byte') tbw.add_scalar(f'transformer/eval-loss', bits_per_byte, i * arg.batch_size, instances_seen)
def go(arg): global repeats repeats = arg.repeats tbdir = arg.tb_dir if arg.tb_dir is not None else os.path.join('./runs', get_slug(arg))[:250] tbw = SummaryWriter(log_dir=tbdir) dev = 'cuda' if torch.cuda.is_available() else 'cpu' test_mrrs = [] train, val, test, (n2i, i2n), (r2i, i2r) = \ embed.load(arg.name) # set of all triples (for filtering) alltriples = set() for s, p, o in torch.cat([train, val, test], dim=0): s, p, o = s.item(), p.item(), o.item() alltriples.add((s, p, o)) truedicts = util.truedicts(alltriples) if arg.final: train, test = torch.cat([train, val], dim=0), test else: train, test = train, val subjects = torch.tensor(list({s for s, _, _ in train}), dtype=torch.long, device=d()) predicates = torch.tensor(list({p for _, p, _ in train}), dtype=torch.long, device=d()) objects = torch.tensor(list({o for _, _, o in train}), dtype=torch.long, device=d()) ccandidates = (subjects, predicates, objects) print(len(i2n), 'nodes') print(len(i2r), 'relations') print(train.size(0), 'training triples') print(test.size(0), 'test triples') print(train.size(0) + test.size(0), 'total triples') for r in tqdm.trange(repeats) if repeats > 1 else range(repeats): """ Define model """ model = embed.LinkPredictor( triples=train, n=len(i2n), r=len(i2r), embedding=arg.emb, biases=arg.biases, edropout = arg.edo, rdropout=arg.rdo, decoder=arg.decoder, reciprocal=arg.reciprocal, init_method=arg.init_method, init_parms=arg.init_parms) if torch.cuda.is_available(): prt('Using CUDA.') model.cuda() if arg.opt == 'adam': opt = torch.optim.Adam(model.parameters(), lr=arg.lr) elif arg.opt == 'adamw': opt = torch.optim.AdamW(model.parameters(), lr=arg.lr) elif arg.opt == 'adagrad': opt = torch.optim.Adagrad(model.parameters(), lr=arg.lr) elif arg.opt == 'sgd': opt = torch.optim.SGD(model.parameters(), lr=arg.lr, nesterov=True, momentum=arg.momentum) else: raise Exception() sched = torch.optim.lr_scheduler.ReduceLROnPlateau(patience=arg.patience, optimizer=opt, mode='max', factor=0.95, threshold=0.0001) \ if arg.sched else None #-- defaults taken from libkge # nr of negatives sampled weight = torch.tensor([arg.nweight, 1.0], device=d()) if arg.nweight else None seen = 0 for e in range(arg.epochs): seeni, sumloss = 0, 0.0 tforward = tbackward = 0 rforward = rbackward = 0 tprep = tloss = 0 tic() for fr in trange(0, train.size(0), arg.batch): to = min(train.size(0), fr + arg.batch) model.train(True) opt.zero_grad() positives = train[fr:to].to(d()) for ctarget in [0, 1, 2]: # which part of the triple to corrupt ng = arg.negative_rate[ctarget] if ng > 0: with torch.no_grad(): bs, _ = positives.size() tic() if arg.limit_negatives: cand = ccandidates[ctarget] mx = cand.size(0) idx = torch.empty(bs, ng, dtype=torch.long, device=d()).random_(0, mx) corruptions = cand[idx] else: mx = len(i2r) if ctarget == 1 else len(i2n) corruptions = torch.empty(bs, ng, dtype=torch.long, device=d()).random_(0, mx) tprep += toc() s, p, o = positives[:, 0:1], positives[:, 1:2], positives[:, 2:3] if ctarget == 0: s = torch.cat([s, corruptions], dim=1) if ctarget == 1: p = torch.cat([p, corruptions], dim=1) if ctarget == 2: o = torch.cat([o, corruptions], dim=1) # -- NB: two of the index vectors s, p o are now size (bs, 1) and the other is (bs, ng+1) # We will let the model broadcast these to give us a score tensor of (bs, ng+1) # In most cases we can optimize the decoder to broadcast late for better speed. if arg.loss == 'bce': labels = torch.cat([torch.ones(bs, 1, device=d()), torch.zeros(bs, ng, device=d())], dim=1) elif arg.loss == 'ce': labels = torch.zeros(bs, dtype=torch.long, device=d()) # -- CE loss treats the problem as a multiclass classification problem: for a positive triple, # together with its k corruptions, identify which is the true triple. This is always triple 0. # (It may seem like the model could easily cheat by always choosing triple 0, but the score # function is order equivariant, so it can't choose by ordering.) recip = None if not arg.reciprocal else ('head' if ctarget == 0 else 'tail') # -- We use the tail relations if the target is the relation (usually p-corruption is not used) tic() out = model(s, p, o, recip=recip) tforward += toc() assert out.size() == (bs, ng + 1), f'{out.size()=} {(bs, ng + 1)=}' tic() if arg.loss == 'bce': loss = F.binary_cross_entropy_with_logits(out, labels, weight=weight, reduction=arg.lred) elif arg.loss == 'ce': loss = F.cross_entropy(out, labels, reduction=arg.lred) assert not torch.isnan(loss), 'Loss has become NaN' sumloss += float(loss.item()) seen += bs; seeni += bs tloss += toc() tic() loss.backward() tbackward += toc() # No step yet, we accumulate the gradients over all corruptions. # -- this causes problems with modules like batchnorm, so be careful when porting. tic() regloss = None if arg.reg_eweight is not None: regloss = model.penalty(which='entities', p=arg.reg_exp, rweight=arg.reg_eweight) if arg.reg_rweight is not None: regloss = model.penalty(which='relations', p=arg.reg_exp, rweight=arg.reg_rweight) rforward += toc() tic() if regloss is not None: sumloss += float(regloss.item()) regloss.backward() rbackward += toc() opt.step() tbw.add_scalar('biases/train_loss', float(loss.item()), seen) if e == 0: print(f'\n pred: forward {tforward:.4}, backward {tbackward:.4}') print (f' reg: forward {rforward:.4}, backward {rbackward:.4}') print (f' prep {tprep:.4}, loss {tloss:.4}') print (f' total: {toc():.4}') # -- NB: these numbers will not be accurate for GPU runs unless CUDA_LAUNCH_BLOCKING is set to 1 # Evaluate if ((e+1) % arg.eval_int == 0) or e == arg.epochs - 1: with torch.no_grad(): model.train(False) if arg.eval_size is None: testsub = test else: testsub = test[random.sample(range(test.size(0)), k=arg.eval_size)] mrr, hits, ranks = util.eval( model=model, valset=testsub, truedicts=truedicts, n=len(i2n), batch_size=arg.test_batch, verbose=True) if arg.check_simple: # double-check using a separate, slower implementation mrrs, hitss, rankss = util.eval_simple( model=model, valset=testsub, alltriples=alltriples, n=len(i2n), verbose=True) assert ranks == rankss assert mrr == mrrs print(f'epoch {e}: MRR {mrr:.4}\t hits@1 {hits[0]:.4}\t hits@3 {hits[1]:.4}\t hits@10 {hits[2]:.4}') tbw.add_scalar('biases/mrr', mrr, e) tbw.add_scalar('biases/h@1', hits[0], e) tbw.add_scalar('biases/h@3', hits[1], e) tbw.add_scalar('biases/h@10', hits[2], e) if sched is not None: sched.step(mrr) # reduce lr if mrr stalls test_mrrs.append(mrr) print('training finished.') temrrs = torch.tensor(test_mrrs) print(f'mean test MRR {temrrs.mean():.3} ({temrrs.std():.3}) \t{test_mrrs}')
def solve( inputs='inputs', outputs='outputs', rps=True, renewables=True, demand_response=False, ev=None, pumped_hydro=False, ph_year=None, ph_mw=None, tag=None, thread=None, nthreads=3 ): # load and solve the model, using specified configuration # NOTE: this version solves repeatedly with different DR targets global switch_model, switch_instance, results, output_dir modules = ['switch_mod', 'fuel_cost', 'project.no_commit', 'switch_patch', 'batteries'] if rps: modules.append('rps') if not renewables: modules.append('no_renewables') if demand_response: modules.append('simple_dr') # repeat with a range of DR shares all_dr_shares = [0.00, 0.20, 0.40, 0.05, 0.15, 0.25, 0.35, 0.30, 0.10] if thread is None: dr_shares = all_dr_shares else: # take every nth element from all_dr_shares, starting with element i, where i=thread (1-based) and n=nthreads dr_shares = [all_dr_shares[x] for x in range(thread-1, len(all_dr_shares), nthreads)] else: # no_demand_response dr_shares = [0.00] if ev is None: # not specified, leave out ev's pass elif ev: # user asked for ev modules.append('ev') else: # user asked for no_ev (count transport emissions but don't allow EVs) modules.append('no_ev') if pumped_hydro: modules.append('pumped_hydro') log('using modules: {m}\n'.format(m=modules)) log("defining model... "); tic() switch_model = define_AbstractModel(*modules) switch_model.iis = Suffix(direction=Suffix.IMPORT) switch_model.dual = Suffix(direction=Suffix.IMPORT) # force construction of a fixed amount of pumped hydro if ph_mw is not None: print "Forcing construction of {m} MW of pumped hydro.".format(m=ph_mw) switch_model.Build_Pumped_Hydro_MW = Constraint(switch_model.LOAD_ZONES, rule=lambda m, z: m.Pumped_Hydro_Capacity_MW[z, m.PERIODS.last()] == ph_mw ) # force construction of pumped hydro only in a certain period if ph_year is not None: print "Allowing construction of pumped hydro only in {p}.".format(p=ph_year) switch_model.Build_Pumped_Hydro_Year = Constraint( switch_model.LOAD_ZONES, switch_model.PERIODS, rule=lambda m, z, p: m.BuildPumpedHydroMW[z, p] == 0 if p != ph_year else Constraint.Skip ) toc() # done defining model log("loading model data from {} dir... ".format(inputs)); tic() switch_instance = switch_model.load_inputs(inputs_dir=inputs) toc() output_dir = outputs setup_results_dir() create_batch_results_file(switch_instance, tag=tag) log("dr_shares = " + str(dr_shares) + "\n") for dr_share in dr_shares: if demand_response: switch_instance.demand_response_max_share = dr_share switch_instance.preprocess() tic() log("solving model with max DR={dr}...\n".format(dr=dr_share)) results = opt.solve(switch_instance, keepfiles=False, tee=True, # options='dualopt', # not sure how to put this in opt.options symbolic_solver_labels=True, suffixes=['dual', 'iis']) log("Solver finished; "); toc() # results.write() log("loading solution... "); tic() # Pyomo changed their interface for loading results somewhere # between 4.0.x and 4.1.x in a way that was not backwards compatible. # Make the code accept either version if hasattr(switch_instance, 'solutions'): # Works in Pyomo version 4.1.x switch_instance.solutions.load_from(results) else: # Works in Pyomo version 4.0.9682 switch_instance.load(results) toc() if results.solver.termination_condition == TerminationCondition.infeasible: print "Model was infeasible; Irreducible Infeasible Set (IIS) returned by solver:" print "\n".join(c.cname() for c in switch_instance.iis) if util.interactive_session: print "Unsolved model is available as switch_instance." raise RuntimeError("Infeasible model") if util.interactive_session: print "Model solved successfully." print "Solved model is available as switch_instance." print "\n\n=======================================================" print "Solved model" print "=======================================================" print "Total cost: ${v:,.0f}".format(v=value(switch_instance.Minimize_System_Cost)) if pumped_hydro: switch_instance.BuildPumpedHydroMW.pprint() append_batch_results(switch_instance, tag=tag) t = "" if tag is None else str(tag) + "_" write_results(switch_instance, tag=t+'dr_share_'+str(dr_share))
def solve( inputs_dir='inputs', inputs_subdir='', outputs_dir='outputs', rps=True, renewables=True, wind=None, central_pv=None, batteries=True, demand_response_simple=True, dr_shares=[0.3], ev=True, pumped_hydro=True, ph_year=None, ph_mw=None, hydrogen=True, fed_subsidies=False, biofuel_limit=0.05, ev_flat=False, scenario_name=None, tag=None ): # load and solve the model, using specified configuration # NOTE: this version solves repeatedly with different DR targets global switch_model, switch_instance, results, output_dir # quick fix to use scenario name and (optional) tag tag = None if scenario_name is None else append_tag(scenario_name, tag) # quick fix for inputs_dir / inputs_subdir inputs_dir = os.path.join(inputs_dir, inputs_subdir) modules = ['switch_mod', 'fuel_markets', 'fuel_markets_expansion', 'project.no_commit', 'switch_patch', 'rps'] modules.append('emission_rules') # no burning LSFO after 2017 except in cogen plants for m in ['ev', 'pumped_hydro', 'fed_subsidies', 'demand_response_simple', 'hydrogen', 'batteries']: if locals()[m] is True: modules.append(m) if demand_response_simple is not True: dr_shares = [0.00] # TODO: treat the 'no_*' modules as standard scenario names # (i.e., include no_renewables, etc. instead of excluding renewables, etc.) if renewables is False: modules.append('no_renewables') if wind is False: modules.append('no_wind') if central_pv is False: modules.append('no_central_pv') if ev is False: # user asked for no_ev (count transport emissions but don't allow EVs) modules.append('no_ev') log('using modules: {m}\n'.format(m=modules)) log("defining model... "); tic() switch_model = define_AbstractModel(*modules) switch_model.iis = Suffix(direction=Suffix.IMPORT) switch_model.dual = Suffix(direction=Suffix.IMPORT) # TODO: put scenario flags into a switch_model.config dictionary and then # do the following model modifications within the respective modules. # force construction of a fixed amount of pumped hydro if ph_mw is not None: print "Forcing construction of {m} MW of pumped hydro.".format(m=ph_mw) switch_model.Build_Pumped_Hydro_MW = Constraint(switch_model.LOAD_ZONES, rule=lambda m, z: m.Pumped_Hydro_Capacity_MW[z, m.PERIODS.last()] == ph_mw ) # force construction of pumped hydro only in a certain period if ph_year is not None: print "Allowing construction of pumped hydro only in {p}.".format(p=ph_year) switch_model.Build_Pumped_Hydro_Year = Constraint( switch_model.PH_PROJECTS, switch_model.PERIODS, rule=lambda m, pr, pe: m.BuildPumpedHydroMW[pr, pe] == 0 if pe != ph_year else Constraint.Skip ) if biofuel_limit is not None: print "Limiting (bio)fuels to {l}% of electricity production.".format(l=biofuel_limit*100.0) switch_model.rps_fuel_limit = biofuel_limit if ev_flat and ev: print "Charging EVs as baseload." switch_model.ChargeEVs_flat = Constraint( switch_model.LOAD_ZONES, switch_model.TIMEPOINTS, rule=lambda m, z, tp: m.ChargeEVs[z, tp] * m.ts_duration_hrs[m.tp_ts[tp]] == m.ev_mwh_ts[z, m.tp_ts[tp]] ) # add an alternative objective function that smoothes out various non-cost variables def Smooth_Free_Variables_obj_rule(m): # minimize production (i.e., maximize curtailment / minimize losses) obj = sum( getattr(m, component)[lz, t] for lz in m.LOAD_ZONES for t in m.TIMEPOINTS for component in m.LZ_Energy_Components_Produce) # also minimize the magnitude of demand adjustments if hasattr(m, "DemandResponse"): print "Will smooth DemandResponse." obj = obj + sum(m.DemandResponse[z, t]*m.DemandResponse[z, t] for z in m.LOAD_ZONES for t in m.TIMEPOINTS) # also minimize the magnitude of EV charging if hasattr(m, "ChargeEVs"): print "Will smooth EV charging." obj = obj + sum(m.ChargeEVs[z, t]*m.ChargeEVs[z, t] for z in m.LOAD_ZONES for t in m.TIMEPOINTS) return obj switch_model.Smooth_Free_Variables = Objective(rule=Smooth_Free_Variables_obj_rule, sense=minimize) toc() # done defining model log("loading model data from {} dir... ".format(inputs_dir)); tic() switch_instance = switch_model.load_inputs(inputs_dir=inputs_dir) toc() if rps is False: # deactivate the main RPS constraint # (we do this instead of omitting the whole RPS module, # so we can report RPS-qualified power even if the RPS is not in effect) # NOTE: for now, there's no easy way to pass solver flags into individual modules # which would probably be a cleaner solution switch_instance.RPS_Enforce.deactivate() switch_instance.preprocess() # investigate the cost_components_annual elements # import pdb; pdb.set_trace() output_dir = outputs_dir # assign to global variable with slightly different name (ugh) setup_results_dir() create_batch_results_file(switch_instance, scenario=tag) log("dr_shares = " + str(dr_shares) + "\n") for dr_share in dr_shares: if demand_response_simple: switch_instance.demand_response_max_share = dr_share switch_instance.preprocess() log("solving model with max DR={dr}...\n".format(dr=dr_share)) # make sure the minimum-cost objective is in effect switch_instance.Smooth_Free_Variables.deactivate() switch_instance.Minimize_System_Cost.activate() results = _solve(switch_instance) if results.solver.termination_condition == TerminationCondition.infeasible: print "Model was infeasible; Irreducible Infeasible Set (IIS) returned by solver:" print "\n".join(c.cname() for c in switch_instance.iis) if util.interactive_session: print "Unsolved model is available as switch_instance." raise RuntimeError("Infeasible model") append_batch_results(switch_instance, scenario=tag+'_unsmooth') if len(dr_shares) > 1: t = ("" if tag is None else str(tag) + '_') + 'dr_share_' + str(dr_share) else: t = tag if solver == "cplex": # Freeze all direct-cost variables, and then solve the model against # a smoothing objective instead of a cost objective. # (only applied for quadratic solvers, i.e., cplex) write_results(switch_instance, tag=t+'_unsmooth') # keep pre-smoothing results, in case smoothing crashes old_duals = [ (z, t, switch_instance.dual[switch_instance.Energy_Balance[z, t]]) for z in switch_instance.LOAD_ZONES for t in switch_instance.TIMEPOINTS] fix_obj_expression(switch_instance.Minimize_System_Cost) switch_instance.Minimize_System_Cost.deactivate() switch_instance.Smooth_Free_Variables.activate() switch_instance.preprocess() log("smoothing free variables...\n") results = _solve(switch_instance) # restore hourly duals from the original solution for (z, t, d) in old_duals: switch_instance.dual[switch_instance.Energy_Balance[z, t]] = d # unfix the variables fix_obj_expression(switch_instance.Minimize_System_Cost, False) log("finished smoothing free variables; "); toc() if util.interactive_session: print "Model solved successfully." print "Solved model is available as switch_instance." print "\n\n=======================================================" print "Solved model" print "=======================================================" print "Total cost: ${v:,.0f}".format(v=value(switch_instance.Minimize_System_Cost)) # if pumped_hydro: # switch_instance.BuildPumpedHydroMW.pprint() if hasattr(switch_instance, "ChargeBattery"): double_charge = [ ( z, t, switch_instance.ChargeBattery[z, t].value, switch_instance.DischargeBattery[z, t].value ) for z in switch_instance.LOAD_ZONES for t in switch_instance.TIMEPOINTS if switch_instance.ChargeBattery[z, t].value > 0 and switch_instance.DischargeBattery[z, t].value > 0 ] if len(double_charge) > 0: print "" print "WARNING: batteries are simultaneously charged and discharged in some hours." print "This is usually done to relax the biofuel limit." for (z, t, c, d) in double_charge: print 'ChargeBattery[{z}, {t}]={c}, DischargeBattery[{z}, {t}]={d}'.format( z=z, t=switch_instance.tp_timestamp[t], c=c, d=d ) append_batch_results(switch_instance, scenario=tag) if len(dr_shares) > 1: t = ("" if tag is None else str(tag) + '_') + 'dr_share_' + str(dr_share) else: t = tag write_results(switch_instance, tag=t)
import util import os BASE_DIR = 'test_data/' flist = os.listdir(BASE_DIR) flist = [BASE_DIR + f for f in flist] util.tic('Loading test data...') test_tweets = util.get_tweets(flist[0]) util.toc() """ In [3]: tweet = test_tweets[0] In [6]: tweet['tweet']['entities']['hashtags'] Out[6]: [] """
def solve( inputs='inputs', rps=True, demand_response=True, renewables=True, ev=None, pumped_hydro=True, ph_year=None, ph_mw=None, tag=None ): global switch_model, switch_instance, results modules = ['switch_mod', 'fuel_cost', 'project.no_commit', 'switch_patch', 'batteries'] if rps: modules.append('rps') if not renewables: modules.append('no_renewables') if demand_response: modules.append('simple_dr') if ev is None: # not specified, leave out ev's pass elif ev: # user asked for ev modules.append('ev') else: # user asked for no_ev modules.append('no_ev') if pumped_hydro: modules.append('pumped_hydro') log('using modules: {m}\n'.format(m=modules)) log("defining model... "); tic() switch_model = define_AbstractModel(*modules) switch_model.iis = Suffix(direction=Suffix.IMPORT) switch_model.dual = Suffix(direction=Suffix.IMPORT) # force construction of a fixed amount of pumped hydro if pumped_hydro and ph_mw is not None: print "Forcing construction of {m} MW of pumped hydro.".format(m=ph_mw) switch_model.Build_Pumped_Hydro_MW = Constraint(switch_model.LOAD_ZONES, rule=lambda m, z: m.Pumped_Hydro_Capacity_MW[z, m.PERIODS.last()] == ph_mw ) # force construction of pumped hydro only in a certain period if pumped_hydro and ph_year is not None: print "Allowing construction of pumped hydro only in {p}.".format(p=ph_year) switch_model.Build_Pumped_Hydro_Year = Constraint( switch_model.LOAD_ZONES, switch_model.PERIODS, rule=lambda m, z, p: m.BuildPumpedHydroMW[z, p] == 0 if p != ph_year else Constraint.Skip ) toc() # done defining model log("loading model data from {} dir... ".format(inputs)); tic() switch_instance = switch_model.load_inputs(inputs_dir=inputs) toc() log("solving model...\n"); tic() results = opt.solve(switch_instance, keepfiles=False, tee=True, symbolic_solver_labels=True, suffixes=['dual', 'iis']) log("Solver finished; "); toc() # results.write() log("loading solution... "); tic() # Pyomo changed their interface for loading results somewhere # between 4.0.x and 4.1.x in a way that was not backwards compatible. # Make the code accept either version if hasattr(switch_instance, 'solutions'): # Works in Pyomo version 4.1.x switch_instance.solutions.load_from(results) else: # Works in Pyomo version 4.0.9682 switch_instance.load(results) toc() if results.solver.termination_condition == TerminationCondition.infeasible: print "Model was infeasible; Irreducible Infeasible Set (IIS) returned by solver:" print "\n".join(c.cname() for c in switch_instance.iis) if util.interactive_session: print "Unsolved model is available as switch_instance." raise RuntimeError("Infeasible model") if util.interactive_session: print "Model solved successfully." print "Solved model is available as switch_instance." print "\n\n=======================================================" print "Solved model" print "=======================================================" print "Total cost: ${v:,.0f}".format(v=value(switch_instance.Minimize_System_Cost)) if pumped_hydro: switch_instance.BuildPumpedHydroMW.pprint() write_results(switch_instance, tag=tag)
def go(arg): global repeats repeats = arg.repeats tbdir = arg.tb_dir if arg.tb_dir is not None else os.path.join('./runs', get_slug(arg))[:250] tbw = SummaryWriter(log_dir=tbdir) dev = 'cuda' if torch.cuda.is_available() else 'cpu' test_mrrs = [] train, val, test, (n2i, i2n), (r2i, i2r) = \ kgmodels.load_lp(arg.name) # set of all triples (for filtering) alltriples = set() for s, p, o in torch.cat([train, val, test], dim=0): s, p, o = s.item(), p.item(), o.item() alltriples.add((s, p, o)) truedicts = util.truedicts(alltriples) if arg.final: train, test = torch.cat([train, val], dim=0), test else: train, test = train, val subjects = list({s for s, _, _ in train}) predicates = list({p for _, p, _ in train}) objects = list({o for _, _, o in train}) ccandidates = (subjects, predicates, objects) print(len(i2n), 'nodes') print(len(i2r), 'relations') print(train.size(0), 'training triples') print(test.size(0), 'test triples') print(train.size(0) + test.size(0), 'total triples') for r in tqdm.trange(repeats) if repeats > 1 else range(repeats): """ Define model """ model = kgmodels.LPShallow( triples=train, n=len(i2n), r=len(i2r), embedding=arg.emb, biases=arg.biases, edropout = arg.edo, rdropout=arg.rdo, decoder=arg.decoder) if torch.cuda.is_available(): prt('Using CUDA.') model.cuda() if arg.opt == 'adam': opt = torch.optim.Adam(model.parameters(), lr=arg.lr) elif arg.opt == 'adamw': opt = torch.optim.AdamW(model.parameters(), lr=arg.lr) elif arg.opt == 'adagrad': opt = torch.optim.Adagrad(model.parameters(), lr=arg.lr) elif arg.opt == 'sgd': opt = torch.optim.SGD(model.parameters(), lr=arg.lr, nesterov=True, momentum=arg.momentum) else: raise Exception() sched = torch.optim.lr_scheduler.ReduceLROnPlateau(patience=arg.patience, optimizer=opt, mode='max', factor=0.95, threshold=0.0001) \ if arg.sched else None #-- defaults taken from libkge # nr of negatives sampled weight = torch.tensor([arg.nweight, 1.0], device=d()) if arg.nweight else None seen = 0 for e in range(arg.epochs): seeni, sumloss = 0, 0.0 for fr in trange(0, train.size(0), arg.batch): tic() model.train(True) # if arg.limit is not None and seeni > arg.limit: # break # if torch.cuda.is_available() and random.random() < 0.01: # print(f'\nPeak gpu memory use is {torch.cuda.max_memory_cached() / 1e9:.2} Gb') to = min(train.size(0), fr + arg.batch) with torch.no_grad(): positives = train[fr:to] b, _ = positives.size() # # sample negatives # if arg.corrupt_global: # global corruption (sample random true triples to corrupt) # indices = torch.randint(size=(b*ng,), low=0, high=train.size(0)) # negatives = train[indices, :].view(b, ng, 3) # -- triples to be corrupted # # else: # local corruption (directly corrupt the current batch) # negatives = positives.clone()[:, None, :].expand(b, ng, 3).contiguous() ttriples = [] for target, ng in zip([0, 1, 2], arg.negative_rate): if ng > 0: negatives = positives.clone()[:, None, :].expand(b, ng, 3).contiguous() corrupt_one(negatives, ccandidates[target] if arg.limit_negatives else range(len(i2n)), target) ttriples.append(torch.cat([positives[:, None, :], negatives], dim=1)) triples = torch.cat(ttriples, dim=0) b, _, _ = triples.size() if arg.loss == 'bce': labels = torch.cat([torch.ones(b, 1), torch.zeros(b, ng)], dim=1) elif arg.loss == 'ce': labels = torch.zeros(b, dtype=torch.long) # -- CE loss treats the problem as a multiclass classification problem: for a positive triple, # together with its k corruptions, identify which is the true triple. This is always triple 0. # (It may seem like the model could easily cheat by always choosing triple 0, but the score # function is order equivariant, so it can't choose by ordering.) if torch.cuda.is_available(): triples = triples.cuda() labels = labels.cuda() opt.zero_grad() out = model(triples) if arg.loss == 'bce': loss = F.binary_cross_entropy_with_logits(out, labels, weight=weight, reduction=arg.lred) elif arg.loss == 'ce': loss = F.cross_entropy(out, labels, reduction=arg.lred) if arg.reg_eweight is not None: loss = loss + model.penalty(which='entities', p=arg.reg_exp, rweight=arg.reg_eweight) if arg.reg_rweight is not None: loss = loss + model.penalty(which='relations', p=arg.reg_exp, rweight=arg.reg_rweight) assert not torch.isnan(loss), 'Loss has become NaN' loss.backward() sumloss += float(loss.item()) #print('emean: ', model.relations.grad.mean().item()) opt.step() seen += b; seeni += b tbw.add_scalar('biases/train_loss', float(loss.item()), seen) # Evaluate if ((e+1) % arg.eval_int == 0) or e == arg.epochs - 1: with torch.no_grad(): model.train(False) if arg.eval_size is None: testsub = test else: testsub = test[random.sample(range(test.size(0)), k=arg.eval_size)] mrr, hits, ranks = util.eval_batch( model=model, valset=testsub, truedicts=truedicts, n=len(i2n), batch_size=arg.test_batch, verbose=True) if arg.check_simple: mrrs, hitss, rankss = util.eval_simple( model=model, valset=testsub, alltriples=alltriples, n=len(i2n), verbose=True) assert ranks == rankss assert mrr == mrrs print(f'epoch {e}: MRR {mrr:.4}\t hits@1 {hits[0]:.4}\t hits@3 {hits[1]:.4}\t hits@10 {hits[2]:.4}') print(f' ranks : {ranks[:10]}') print('len check', len(ranks), len(testsub)) tbw.add_scalar('biases/mrr', mrr, e) tbw.add_scalar('biases/h@1', hits[0], e) tbw.add_scalar('biases/h@3', hits[1], e) tbw.add_scalar('biases/h@10', hits[2], e) if sched is not None: sched.step(mrr) # reduce lr if mrr stalls test_mrrs.append(mrr) print('training finished.') temrrs = torch.tensor(test_mrrs) print(f'mean test MRR {temrrs.mean():.3} ({temrrs.std():.3}) \t{test_mrrs}')