def train_d2v(config): adjust_config() data = dataset() d2vpairs = D2VPairs(dataset=data, **config["pairs"]) d2vemb = SGNS(d2vpairs, dataset=data, **config["model"]) last_iter = str(config["model"]["iterations"]) macro, acc = eval_model(d2vemb.embeddings[last_iter], dataset=data) tune.track.log(f1_macro=macro, accuracy=acc, avg_loss=d2vemb.losses[last_iter])
def train_jce(config): adjust_config() data = dataset() d2vpairs = D2VPairs(dataset=data, **config["pairs_d2v"]) dwpairs = DWPairs(dataset=data, **config["pairs_dw"]) jceemb = JCE(data=[d2vpairs, dwpairs], dataset=data, **config["model"]) last_iter = str(config["model"]["iterations"]) macro, acc = eval_model(jceemb.embeddings[last_iter], dataset=data) tune.track.log(f1_macro=macro, accuracy=acc, avg_loss=jceemb.losses[last_iter])
def train(self): for epoch in range(self.iterations): self.data.make_pairs() loader = DataLoader(self.data, batch_size=self.batch_size, shuffle=True, num_workers=self.workers, pin_memory=True) total_batches = len(loader) tenth = int(total_batches / 10) epoch_loss = 0 epoch_batches = 0 avg_loss = 0 loop = tqdm(enumerate(loader), total=total_batches, disable=(not config.progress)) for i, (pos_u, pos_v, neg_v) in loop: epoch_batches += 1 if self.use_cuda: pos_u = pos_u.cuda() pos_v = pos_v.cuda() neg_v = neg_v.cuda() # Zero gradients self.optimizer.zero_grad() # Execute forward pass and get loss loss = self.model.forward(pos_u, pos_v, neg_v) epoch_loss += loss # Execute backward pass loss.backward() self.optimizer.step() if i % tenth == 0: avg_loss = epoch_loss / epoch_batches config.debug( f'Epoch {epoch+1}/{self.iterations} - {int(i/total_batches * 100)}%' ) if config.progress: loop.set_description( f'Epoch {epoch+1}/{self.iterations}, Total Loss {epoch_loss.round()}, Avg. Loss {avg_loss.round()}' + f', LR {self.get_current_lr()}') config.debug(f'Epoch {epoch+1}/{self.iterations} - 100%') loop.close() del loader, loop self.scheduler.step() self.report_values(epoch, epoch_loss / epoch_batches) if self.dataset: print("Evaluation: ", eval_model(self.get_embedding(), self.dataset, folds=10))
def train(self): for epoch in range(self.iterations): for i, data in enumerate(self.data): data.make_pairs(alg_index=i) loaders = [ DataLoader(pairs, batch_size=self.batch_sizes[i], shuffle=True, num_workers=self.workers, pin_memory=True) for i, pairs in enumerate(self.data) ] batches = round_robin([iter(loader) for loader in loaders]) total_batches = np.sum([len(loader) for loader in loaders]) tenth = int(total_batches / 10) epoch_loss = 0 epoch_batches = 0 avg_loss = 0 loop = tqdm(enumerate(batches), total=total_batches, disable=(not config.progress)) for i, (pos_u, pos_v, neg_v, alg_index) in loop: alg_index = alg_index.data[0] # Reduce [batch_size] to int epoch_batches += 1 if self.use_cuda: pos_u = pos_u.cuda() pos_v = pos_v.cuda() neg_v = neg_v.cuda() alg_index = alg_index.cuda() # Disable backward autograd for all embeddings expect the one of the current algorithm if self.disable_grad: optimizer = self.optimizers[alg_index] disabled_embeddings = [] for name, param in self.model.named_parameters(): if name.startswith( 'u_embeddings' ) and not name.startswith(f'u_embeddings.{alg_index}'): param.requires_grad = False disabled_embeddings.append(param) else: # for SINE (not disabling grad) we select the same optimizer for everything optimizer = self.optimizers[0] # Zero gradients optimizer.zero_grad() # Execute forward pass and get loss loss = self.model.forward(pos_u, pos_v, neg_v, alg_index) epoch_loss += loss # Execute backward pass loss.backward() optimizer.step() # Re-enable all disabled embeddings for next batch if self.disable_grad: for disabled_emb in disabled_embeddings: disabled_emb.requires_grad = True if i % tenth == 0: avg_loss = epoch_loss / epoch_batches config.debug( f'Epoch {epoch+1}/{self.iterations} - {int(i/total_batches * 100)}%' ) if config.progress: loop.set_description( f'Epoch {epoch+1}/{self.iterations}, Total Loss {epoch_loss.round()}, Avg. Loss {avg_loss.round()}' + f', LR {self.get_current_lr()}') config.debug(f'Epoch {epoch+1}/{self.iterations} - 100%') loop.close() del loop, batches, loaders for scheduler in self.schedulers: scheduler.step() self.report_values(epoch, epoch_loss / epoch_batches) if self.dataset: print("Evaluation: ", eval_model(self.get_embedding(), self.dataset, folds=10))
def main(): for drop_percentage in drop_percentages: os.system("mkdir {}/{} 2> /dev/null".format(TIMESTAMP_CACHE_DIR, drop_percentage)) if 0 == drop_percentage: # run on Full dataset test_to_be_run = ["full_dataset"] else: test_to_be_run = test_cases for test in test_to_be_run: print("Performing test {} with drop_percentage = {}".format( test, drop_percentage)) dataset = copy.deepcopy(complete_dataset) # do not perform any edge removal when running on full dataset if "full_dataset" != test: print("Removing process ...") remove_method = getattr(dataset, test) remove_method(drop_percentage) os.system("mkdir {}/{}/{} 2> /dev/null".format( TIMESTAMP_CACHE_DIR, drop_percentage, test)) cache_dir = "{}/{}/{}".format(TIMESTAMP_CACHE_DIR, drop_percentage, test) params["d2v"]["model"]["cache_file"] = cache_dir + "/d2v" params["dw"]["model"]["cache_file"] = cache_dir + "/dw" params["sine"]["cache_file"] = cache_dir + "/sine" ################################################################ # Saving dataset ################################################################ print( "saving missing dataset in {}/missing_edges_dataset.p".format( cache_dir)) pickle.dump( dataset, open("{}/missing_edges_dataset.p".format(cache_dir), "wb")) ################################################################ # Making pairs ################################################################ d2vpairs = D2VPairs(dataset=dataset, **params["d2v"]["pairs"]) dwpairs = DWPairs(dataset=dataset, **params["dw"]["pairs"]) ############################################# # Doc2Vec ############################################# print("Running Doc2Vec ...") d2vmodel = SGNS(d2vpairs, **params["d2v"]["model"]) d2vEmb = d2vmodel.embeddings[str( params["d2v"]["model"]["iterations"])] print("Eval D2V: ", eval_model(d2vEmb, dataset=dataset)) ############################################# # DeepWalk ############################################# print("Running DeepWalk ...") dwmodel = SGNS(dwpairs, **params["dw"]["model"]) dwEmb = dwmodel.embeddings[str( params["dw"]["model"]["iterations"])] print("Eval DW: ", eval_model(dwEmb, dataset=dataset)) ############################################# # SINE ############################################# print("Running SINE ...") sinemodel = JCE(data=[d2vpairs, dwpairs], disable_grad=False, **params["sine"]) sineEmb = sinemodel.embeddings[str(params["sine"]["iterations"])] print("Eval JCE (SINE): ", eval_model(sineEmb, dataset=dataset))