def _get_neighbors(self, web_page): """Get indices of at most |max_neighbors| neighbors for each relation Args: web_page (WebPage) Returns: neighbors: SequenceBatch of shape num_nodes x ??? containing the neighbor refs (??? is at most max_neighbors * len(neighbor_rels)) rels: SequenceBatch of shape num_nodes x ??? containing the relation indices """ G = web_page.graph batch_neighbors = [[] for _ in xrange(len(web_page.nodes))] batch_rels = [[] for _ in xrange(len(web_page.nodes))] for src, tgts in G.nodes.iteritems(): # Group by relation rel_to_tgts = defaultdict(list) for tgt, rels in tgts.iteritems(): for rel in rels: rel_to_tgts[rel].append(tgt) # Sample if needed for rel, index in self._neighbor_rels.iteritems(): tgts = rel_to_tgts[rel] random.shuffle(tgts) if not tgts: continue if len(tgts) > self._max_neighbors: tgts = tgts[:self._max_neighbors] batch_neighbors[src].extend(tgts) batch_rels[src].extend([index] * len(tgts)) # Create SequenceBatches max_len = max(len(x) for x in batch_neighbors) batch_mask = [] for neighbors, rels in izip(batch_neighbors, batch_rels): assert len(neighbors) == len(rels) this_len = len(neighbors) batch_mask.append([1.] * this_len + [0.] * (max_len - this_len)) neighbors.extend([0] * (max_len - this_len)) rels.extend([0] * (max_len - this_len)) return (SequenceBatch(V(LT(batch_neighbors)), V(FT(batch_mask))), SequenceBatch(V(LT(batch_rels)), V(FT(batch_mask))))
def __init__(self, embedding, vocab_size=20000, n_negs=20, weights=None, pad=None): super(SGNS, self).__init__() self.embedding = embedding self.vocab_size = vocab_size self.n_negs = n_negs self.weights = None self.pad = pad if weights is not None: wf = np.power(weights, 0.75) wf = wf / wf.sum() self.weights = FT(wf)
def __init__(self, base_model, vocab_size, n_negs, weights, loss_method, device): super(SGNS, self).__init__() self.ai2v = base_model self.vocab_size = vocab_size self.n_negs = n_negs self.device = device self.weights=None if weights is not None: wf = np.power(weights, 0.75) wf = wf / wf.sum() self.weights = FT(wf) self.loss_method = loss_method
def closure(): noise = V(FT(np.random.randn(self.noise_dim))) states = self.hallucinator.forward(noise.unsqueeze(0)) # Concatenating dimensions of bath(which is currently 1) and dimensions of states = states.view(states.size(0) * self.hallucinator.n, -1) actions = policy.forward(states) actions = actions.view(1, -1) states = states.view(1, -1) reward = self.critic(states, actions)[0] return reward
def __init__(self, emb_dim, token2path_dict): super(word2vec_module, self).__init__() self.vocab_size = len(token2path_dict) self.V = max([len(_) for _ in token2path_dict.values()]) self.emb_layer = nn.Embedding(vocab_size + 1, emb_dim) nn.init.uniform_(emb_layer.weight, -1.0, 1.0) self.W_matrix = torch.nn.parameter.Parameter( FT(np.random.random([V, emb_dim])) ) nn.init.uniform_(self.W_matrix, -1.0, 1.0) self.token2path_dict = token2path_dict return
def __init__(self, is_cuda=True, vocab_size=20000, embedding_size=300, padding_idx=0, numeral_weighted_fn=weighted_log): super(Word2VecFixed, self).__init__() self.vocab_size = vocab_size self.embedding_size = embedding_size self.ivectors = nn.Embedding(self.vocab_size, self.embedding_size, padding_idx=padding_idx) self.ovectors = nn.Embedding(self.vocab_size, self.embedding_size, padding_idx=padding_idx) # initialize weights self.ivectors.weight = nn.Parameter( t.cat([ t.zeros(1, self.embedding_size), FT(self.vocab_size - 1, self.embedding_size).uniform_(-0.5 / self.embedding_size, 0.5 / self.embedding_size) ])) self.ovectors.weight = nn.Parameter( t.cat([ t.zeros(1, self.embedding_size), FT(self.vocab_size - 1, self.embedding_size).uniform_(-0.5 / self.embedding_size, 0.5 / self.embedding_size) ])) self.is_cuda = is_cuda self.ivectors.weight.requires_grad = True self.ovectors.weight.requires_grad = True self.numeral_weighted_fn = numeral_weighted_fn
def forward(self, iword, owords): batch_size = iword.size()[0] context_size = owords.size()[1] if self.weights is not None: nwords = t.multinomial(self.weights, batch_size * context_size * self.n_negs, replacement=True).view(batch_size, -1) else: nwords = FT(batch_size, context_size * self.n_negs).uniform_(0, self.vocab_size - 1).long() ivectors = self.embedding.forward_i(iword).unsqueeze(2) ovectors = self.embedding.forward_o(owords) nvectors = self.embedding.forward_o(nwords).neg() oloss = t.bmm(ovectors, ivectors).squeeze().sigmoid().log().mean(1) nloss = t.bmm(nvectors, ivectors).squeeze().sigmoid().log().view(-1, context_size, self.n_negs).sum(2).mean(1) return -(oloss + nloss).mean()
def __init__(self, device, latent_dim, encoder_structure_config, decoder_structure_config, loss_structure_config, ae_dropout, fc_dropout, turn_on_noise=True): super(model_6_v1, self).__init__() self.turn_on_noise = turn_on_noise self.device = device self.latent_dim = latent_dim self.ae_module = AE(device=device, latent_dim=latent_dim, encoder_structure_config=encoder_structure_config, decoder_structure_config=decoder_structure_config, dropout=ae_dropout) self.ae_module = self.ae_module.to(self.device) self.ae_loss_module = AE_loss_module(self.device, loss_structure_config) self.ae_loss_module = self.ae_loss_module.to(self.device) self.num_fields = len(encoder_structure_config['discrete_dims']) + 1 latent_dim = self.ae_module.encoder.ae_latent_dimension self.score_layer = nn.Sequential( nn.Linear(latent_dim, latent_dim // 2), nn.Dropout(fc_dropout), nn.Tanh(), nn.Linear(latent_dim // 2, 1), nn.Sigmoid()) self.score_layer.to(device) # Possible values : train, test self.normal_noise_dist = Normal(loc=FT(np.zeros(latent_dim)), scale=FT(np.zeros(latent_dim))) print(self.normal_noise_dist) self.mode = 'train' return
def get_compressed_embedding(self, data): self.network_module.eval() self.network_module.mode = 'test' self.network_module.ae_module.mode = 'compress' X = FT(data).to(self.device) bs = 500 num_batches = data.shape[0] // self.batch_size + 1 output = [] for b in range(num_batches): _x = X[b * bs:(b + 1) * bs] z = self.network_module.ae_module(_x) z_data = z.clone().cpu().data.numpy() output.extend(z_data) return output
def train_model( dagmm_obj, data, _DEVICE, num_epochs=400, batch_size=512, LR=0.001, ): optimizer = torch.optim.Adam(dagmm_obj.parameters(), lr=LR) dagmm_obj.train() log_interval = 100 for epoch in tqdm(range(num_epochs)): num_batches = data.shape[0] // batch_size + 1 epoch_losses = [] np.random.shuffle(data) # X = FT(data).to(DEVICE) X = data lambda_energy = 0.1 lambda_cov_diag = 0.005 for b in range(num_batches): optimizer.zero_grad() input_data = X[b * batch_size: (b + 1) * batch_size] input_data = FT(input_data).to(_DEVICE) enc, dec, z, gamma = dagmm_obj(input_data) total_loss, sample_energy, recon_error, cov_diag = dagmm_obj.loss_function( input_data, dec, z, gamma, lambda_energy, lambda_cov_diag ) dagmm_obj.zero_grad() total_loss = Variable(total_loss, requires_grad=True) total_loss.backward() epoch_losses.append(total_loss.cpu().data.numpy()) torch.nn.utils.clip_grad_norm_(dagmm_obj.parameters(), 5) optimizer.step() loss = {} loss['total_loss'] = total_loss.data.item() loss['sample_energy'] = sample_energy.item() loss['recon_error'] = recon_error.item() loss['cov_diag'] = cov_diag.item() if (b + 1) % log_interval == 0: log = ' ' for tag, value in loss.items(): log += ", {}: {:.4f}".format(tag, value) print(log) print('Epoch loss ::', np.mean(epoch_losses)) return dagmm_obj
def forward(self, iword, owords): batch_size = iword.size()[0] context_size = owords.size()[1] if self.weights is not None: nwords = t.multinomial(self.weights, batch_size * context_size * self.n_negs, replacement=True).view(batch_size, -1) else: nwords = FT(batch_size, context_size * self.n_negs).uniform_( 0, self.vocab_size - 1).long() ivectors = self.embedding.forward_i(iword).unsqueeze(2) ovectors = self.embedding.forward_o(owords) non_pad = FT((owords != self.pad).float()) non_pad = non_pad.cuda( ) if self.embedding.ovectors.weight.is_cuda else non_pad N = non_pad.sum() nvectors = self.embedding.forward_o(nwords).neg() #oloss = t.bmm(ovectors, ivectors).squeeze().sigmoid().log().mean(1) oloss = t.sum(ls(t.bmm(ovectors, ivectors).squeeze()) * non_pad) / N nloss = ls(t.bmm(nvectors, ivectors).squeeze()).view(-1, context_size, self.n_negs).sum(2).mean(1) return -(oloss + nloss).mean()
def get_score(self, data): self.network_module.eval() self.network_module.mode = 'test' self.network_module.ae_module.mode = 'test' X = FT(data).to(self.device) bs = 500 num_batches = data.shape[0] // self.batch_size + 1 output = [] for b in range(num_batches): _x = X[b * bs:(b + 1) * bs] if _x.shape[0] == 0: continue z = self.network_module(_x) z_data = z.clone().cpu().data.numpy() output.extend(z_data) return output
def score_samples(self, data): bs = self.batch_size num_batches = data.shape[0] // bs + 1 res = [] for b in tqdm(range(num_batches)): x = data[b * bs: (b + 1)* bs] x = FT(x).to(self.device) if x.shape[0] == 0 : break r = self.__score_sample(x) r = r.cpu().data.numpy() res.extend(r) res = np.array(res) return res
def __init__(self, encoder, output_embedding_size, output_vocab_size, weights = None, n_negs = 20, padding_idx = 0): super(CBOWNet, self).__init__() self.encoder = encoder self.n_negs = n_negs self.weights = weights self.output_vocab_size = output_vocab_size self.output_embedding_size = output_embedding_size self.outputembeddings = nn.Embedding(output_vocab_size + 1, output_embedding_size, padding_idx=0) if self.weights is not None: wf = np.power(self.weights, 0.75) wf = wf / wf.sum() self.weights = FT(wf)
def random(model, gpu=False, num_samples=20): if not os.path.isdir('results'): os.mkdir('results') if not os.path.isdir('results/random'): os.mkdir('results/random') for i, idol in enumerate(idols): hot = np.zeros((num_samples, len(idols))) hot[:, i] = 1 c = V(FT(hot), requires_grad=False) c = c.cuda() if gpu else c x_ = model.predict(c) x_ = x_.cpu() if gpu else x_ for j in range(num_samples): imsave('results/random/{}_{}.jpg'.format(idol, j + 1), denormalize(x_.data[j].numpy()))
def train_model(self, X): self.network_module.ae_module.mode = 'train' self.network_module.ae_module.train() self.network_module.ae_loss_module.train() self.network_module.mode = 'train' learning_rate = self.LR parameters = list(self.network_module.parameters()) self.optimizer = torch.optim.Adam(parameters, lr=learning_rate) log_interval = self.log_interval losses = [] bs = self.batch_size for epoch in tqdm(range(1, self.num_epochs + 1)): t = epoch epoch_losses = [] num_batches = X.shape[0] // bs + 1 idx = np.arange(X.shape[0]) np.random.shuffle(idx) X_P = X[idx] X_P = FT(X_P).to(self.device) for b in range(num_batches): # self.network_module.zero_grad() self.optimizer.zero_grad() _x_p = X_P[b * bs:(b + 1) * bs] batch_loss = self.network_module(_x_p) # Standard AE loss batch_loss = batch_loss.squeeze(1) batch_loss = torch.mean(batch_loss, dim=0, keepdim=False) # ==================== # Clip Gradient # ==================== batch_loss.backward() torch.nn.utils.clip_grad_norm_( self.network_module.parameters(), 2) self.optimizer.step() loss_value = batch_loss.clone().cpu().data.numpy() losses.append(loss_value) if b % log_interval == 0: print(' Epoch {} Batch {} Loss {:.4f} '.format( epoch, b, batch_loss)) epoch_losses.append(loss_value) print('Epoch loss ::', np.mean(epoch_losses)) self.network_module.mode = 'test' return epoch_losses
def __init__(self, embedding_size, window_size, device, num_h, d_k=50, d_v=50): super(MultiHeadAttention, self).__init__() self.emb_size = embedding_size self.window_size = window_size self.device = device self.d_k = d_k self.d_v = d_v self.num_h = num_h self.Ac = nn.Linear(self.emb_size, self.num_h * self.d_k) self.At = nn.Linear(self.emb_size, self.num_h * self.d_k) self.cos = nn.CosineSimilarity(dim=-1, eps=1e-6) self.Bc = nn.Linear(self.emb_size, self.num_h * self.d_v) self.pos_bias = nn.Parameter(FT(self.window_size).uniform_(-0.5 / self.window_size, 0.5 / self.window_size)) self.pos_bias.requires_grad = True self.R = nn.Linear(self.num_h * self.d_v, self.emb_size)
def sample_episode(self, policy, n=1, skip=3): done = False total_reward = 0 for i in range(n): cur_obs = self.env.new_episode() t = 0 while not done: cur_obs = V(FT(cur_obs)).unsqueeze(0) display = (t % skip == 0) cur_action = policy.forward(cur_obs).data.cpu().numpy() cur_obs, cur_reward, done = self.env.next_obs( cur_action.squeeze(0), render=display) total_reward += cur_reward t += 1 avg_episode_reward = total_reward / n return avg_episode_reward
def __init__(self, data, bs, top_decoder, bottom_decoder): self.data = FT(data).cuda() self.n = data.shape[0] self.bs = bs self.td = top_decoder self.bd = bottom_decoder l1_zeros = np.zeros((self.n, L1_SIZE)) l2_zeros = np.zeros((self.n, L2_SIZE)) self.l1 = t.tensor(l1_zeros,dtype=t.float32, requires_grad=True, device="cuda"), t.tensor(l1_zeros, dtype=t.float32, requires_grad=True, device="cuda") self.l2 = t.tensor(l2_zeros,dtype=t.float32,requires_grad=True, device="cuda"), t.tensor(l2_zeros,dtype=t.float32,requires_grad=True, device="cuda") self.td_opt = opt.Adam(self.td.parameters(), lr=0.001) self.bd_opt = opt.Adam(self.bd.parameters(), lr=0.003) self.l1_opt = opt.Adam(self.l1, lr=0.1) self.l2_opt = opt.Adam(self.l2, lr=0.1)
def forward(self, iword, owords): batch_size = iword.size()[0] context_size = owords.size()[1] if self.fake_indices is None: if self.weights is not None: nwords = t.multinomial(self.weights, batch_size * context_size * self.n_negs, replacement=True).view(batch_size, -1) else: nwords = FT(batch_size, context_size * self.n_negs).uniform_( 0, self.vocab_size - 1).long() else: if self.weights is not None: # do broadcasting to check the values is_fake = iword.view(-1, 1).eq(self.fake_indices).sum(1).type( t.bool) n_fake = is_fake.sum() n_real = batch_size - n_fake # two times sampling nwords_fake = t.multinomial(self.weights_fake, n_fake * context_size * self.n_negs, replacement=True).view(n_fake, -1) nwords_real = t.multinomial(self.weights_real, n_real * context_size * self.n_negs, replacement=True).view(n_real, -1) # create empty tensor and use is_fake to assign the sampled words to it nwords = t.zeros(batch_size, context_size * self.n_negs).type(t.long) nwords[is_fake] = nwords_fake nwords[~is_fake] = nwords_real else: raise NotImplementedError() ivectors = self.embedding.forward_i(iword).unsqueeze(2) if self.tie_weights: ovectors = self.embedding.forward_i(owords) nvectors = self.embedding.forward_i(nwords).neg() else: ovectors = self.embedding.forward_o(owords) nvectors = self.embedding.forward_o(nwords).neg() oloss = t.bmm(ovectors, ivectors).squeeze().sigmoid().log().mean(1) nloss = t.bmm(nvectors, ivectors).squeeze().sigmoid().log().view( -1, context_size, self.n_negs).sum(2).mean(1) return -(oloss + nloss).mean()
def get_cluster(self, data): batch_size = self.batch_size num_batches = data.shape[0] // batch_size + 1 C = [] for b in range(num_batches): _x = data[b * batch_size: (b + 1) * batch_size] _x = FT(_x).to(self.device) z = self.ae(_x) _q_ = self.calc_q_ij(z) _c_ = torch.max( _q_, dim=1, keepdim=False )[1] C.append(_c_) C = torch.cat(C, dim=0) return C.cpu().data.numpy()
def __init__(self, base_model, vocab_size=20000, n_negs=20, weights=None, loss_method='CCE', device='cpu'): super(SGNS, self).__init__() self.embedding = base_model self.vocab_size = vocab_size self.n_negs = n_negs self.weights = None if weights is not None: wf = np.power(weights, 0.75) wf = wf / wf.sum() self.weights = FT(wf) self.loss_method = loss_method self.device = device
def __init__(self, embedding, vocab_size=20000, n_negs=20, weights=None, previous_model=None): super(SGNS, self).__init__() self.embedding = embedding self.vocab_size = vocab_size self.n_negs = n_negs self.weights = None if weights is not None: wf = np.power(weights, 0.75) wf = wf / wf.sum() self.weights = FT(wf) if previous_model is not None: self.previous_model = t.from_numpy(previous_model).cuda() else: self.previous_model = None
def __init__(self, embedding, vocab1_size, vocab2_size, num_poly, num_negs=20, weights=None): super(PolyPTE, self).__init__() self.embedding = embedding self.vocab1_size = vocab1_size self.vocab2_size = vocab2_size self.num_embedding1 = vocab1_size * num_poly self.num_embedding2 = vocab2_size * num_poly self.num_negs = num_negs self.weights = None if weights is not None: wf = np.power(weights, 0.75) wf = wf / wf.sum() self.weights = FT(wf)
def forward(self, input_s, missing_word): embedding = self.encoder(input_s) batch_size = embedding.size()[0] emb_size = embedding.size()[1] # draw negative samples if self.weights is not None: nwords = torch.multinomial(self.weights, batch_size * self.n_negs, replacement=True).view(batch_size, -1) else: nwords = FT(batch_size, self.n_negs).uniform_(0, self.vocab_size).long() nwords = Variable(torch.LongTensor(nwords), requires_grad=False).cuda() # lookup the embeddings of output words missing_word_vector = self.outputembeddings(missing_word) nvectors = self.outputembeddings(nwords).neg() # compute loss for correct word oloss = torch.bmm(missing_word_vector.view(batch_size, 1, emb_size), embedding.view(batch_size, emb_size, 1)) oloss = oloss.squeeze().sigmoid() ## add epsilon to prediction to avoid numerical instabilities oloss = self._add_epsilon(oloss) oloss = oloss.log() # compute loss for negative samples nloss = torch.bmm(nvectors, embedding.view(batch_size, -1, 1)).squeeze().sigmoid() ## add epsilon to prediction to avoid numerical instabilities nloss = self._add_epsilon(nloss) nloss = nloss.log() nloss = nloss.mean(1) # combine losses return -(oloss + nloss)
def main(): vsize = 10 args = get_args() mcv_file = args.mcv ldp_file = args.ldp batch_size = args.bsize model_file = args.model ldp=read_file(ldp_file) df = pd.read_csv(mcv_file, sep=' ', header=None) data = np.array(df.iloc[:,1:], dtype='float32') X = data.reshape(-1, vsize ** 3) X = V(FT(X), requires_grad=False).view(-1, 1, vsize, vsize, vsize) data_num = len(X) n_batches = ceil(data_num/batch_size) model = DenseNet().cuda() model = nn.DataParallel(model) model.module.load_state_dict(torch.load(model_file)) model.eval() y = np.zeros((data_num, 7), dtype='float32') for i in tqdm(range(n_batches)): X_batch = X[i * batch_size : (i + 1) * batch_size].cuda() y_batch = model(X_batch).cpu().detach().numpy() y[i * batch_size : (i + 1) * batch_size] = y_batch i = 0 for line in ldp: if line[0:4] == "ATOM": print("{}{:6d}{:6d}".format(line, np.argmax(y[i,:4]), np.argmax(y[i,4:])) ) i += 1 else: print(line)
def forward(self, titems, citems): batch_size = titems.size()[0] context_size = citems.size()[1] if self.weights is not None: nitems = t.multinomial(self.weights, batch_size * context_size * self.n_negs, replacement=True).view(batch_size, -1) else: nitems = FT(batch_size, self.n_negs).uniform_(0, self.vocab_size - 1).long() nitems = nitems.to(self.device) tvectors = self.embedding.forward_t(titems) cvectors = self.embedding.forward_c(citems) nvectors = self.embedding.forward_t(nitems).neg() all_tvectors = t.cat([tvectors.unsqueeze(1), nvectors], dim=1) if self.loss_method == 'CCE': loss = t.bmm(cvectors, all_tvectors.transpose(1, 2)) loss = -loss.sigmoid().log().sum(2).sum(1).mean() return loss else: raise NotImplementedError
def __init__(self, emb_dim, domain_dims): super(APE, self).__init__() self.save_path = None self.num_domains = len(domain_dims) self.emb_dim = emb_dim self.num_entities = sum(domain_dims.values()) self.emb_layer = nn.Embedding(num_embeddings=self.num_entities, embedding_dim=emb_dim) self.c = nn.Parameter(torch.from_numpy(np.random.random(1))) self.mode = 'train' k = 0 self.pair_W = nn.ParameterDict({}) for i in range(self.num_domains): for j in range(i + 1, self.num_domains): w_k = nn.Parameter(data=FT(np.random.random(1))) self.pair_W[str(k)] = w_k k += 1 return
def forward(self, nodes): """Embeds a batch of Nodes. Args: nodes (list[Node]) Returns: embeddings (Tensor): num_nodes x embed_dim """ texts = [] for node in nodes: if self._recursive_texts: text = ' '.join(node.all_texts(max_words=self._max_words)) else: text = node.text or '' texts.append(word_tokenize(text.lower())) text_embeddings = self._utterance_embedder(texts) ## num_nodes x attr_embed_dim tag_embeddings = self._tag_embedder.embed_tokens( [node.tag for node in nodes]) # num_nodes x attr_embed_dim id_embeddings = self._id_embedder( [word_tokenize(node.id_) for node in nodes]) # num_nodes x attr_embed_dim class_embeddings = self._classes_embedder( [word_tokenize(' '.join(node.classes)) for node in nodes]) # num_nodes x 3 coords = V( FT([[elem.x_ratio, elem.y_ratio, float(elem.visible)] for elem in nodes])) # num_nodes x dom_embed_dim dom_embeddings = torch.cat((text_embeddings, tag_embeddings, id_embeddings, class_embeddings, coords), dim=1) #dom_embeddings = text_embeddings return self.fc(dom_embeddings)
def forward(self, true_vecs, out_vecs): batch_size = true_vecs.size()[0] context_size = true_vecs.size()[1] if self.weights is not None: nwords = torch.multinomial(self.weights, batch_size * context_size * self.n_negs, replacement=True).view(batch_size, -1) else: nwords = FT(batch_size, context_size * self.n_negs).uniform_( 0, self.vocab_size - 1).long().to(device) nvectors = self.embedding(nwords).neg() # print(out_vecs.size()) # print(true_vecs.size()) # print(nvectors.size()) oloss = torch.bmm(out_vecs, true_vecs.transpose(1, 2)) oloss = (oloss.sigmoid() + 1e-05).log() oloss = oloss.mean(1) nloss = torch.bmm(nvectors, true_vecs.transpose(1, 2)) nloss = (nloss.squeeze().sigmoid() + 1e-05).log() nloss = nloss.view(-1, context_size, self.n_negs) nloss = nloss.sum(2).mean(1) # print(oloss.size()) # print(nloss.size()) return -(oloss + nloss).mean()