def __init__(self, num_bits, embed_dim, type_embed_dim, hidden_dim, num_layers=1, dropout=0, linear_end=False): super(PrefetchBinary, self).__init__() self.num_bits = num_bits self.pc_embed = nn.EmbeddingBag(num_bits, embed_dim, mode="max") self.delta_embed = nn.EmbeddingBag(2 * num_bits, embed_dim, mode="max") self.type_embed = nn.Embedding(3, type_embed_dim) self.linear_end = linear_end if linear_end: self.lstm = nn.LSTM(2 * embed_dim + type_embed_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout) self.out_lin = nn.Linear(hidden_dim, 2 * num_bits + 1) else: self.lstm = nn.LSTM(2 * embed_dim + type_embed_dim, 2 * num_bits + 1, num_layers, batch_first=True, dropout=dropout) # Automatically sigmoids and calculates cross entropy loss with given weights weights = torch.arange(num_bits - 1, -1, -1) weights = torch.cat([weights, weights, torch.tensor([num_bits])]) # weights = torch.ones(2*num_bits+1) self.loss_func = nn.BCEWithLogitsLoss(weight=weights, reduction='mean')
def __init__(self, config: Mapping[str, Any], info: Mapping[str, Any]): super().__init__() self.config = config self.info = info self.input_code_embedding = nn.EmbeddingBag( config["num_first"] + 1, config["size"] + 1, mode="mean" ) self.input_code_embedding1 = nn.EmbeddingBag( config["num_second"] + 1, (config["size"] // 4) + 1, mode="mean" ) self.input_code_embedding.weight.data.normal_(mean=0.0, std=0.02) self.input_code_embedding1.weight.data.normal_(mean=0.0, std=0.02) self.norm = nn.LayerNorm(config["size"]) self.drop = nn.Dropout(config["dropout"]) self.model: nn.Module if config["use_gru"]: input_size = config["size"] self.model = torch.nn.LSTM( input_size=input_size, hidden_size=config["size"], num_layers=config["gru_layers"], dropout=config["dropout"] if config["gru_layers"] > 1 else 0, ) else: print("Transformer") self.model = Encoder(d_model=config["size"])
def __init__(self, args, padding_idx=-1): super(UtteranceEmbedder, self).__init__() self.padding_idx = padding_idx input_dims = [args.vocabulary_size, args.num_entities] embeddings_specs = [args.token_emb, args.speaker_emb] # Token embeddings, either pre-trained or random weights if isinstance(embeddings_specs[0], np.ndarray): emb_weights = torch.Tensor(embeddings_specs[0]) self.token_emb = nn.Embedding(emb_weights.shape[0], emb_weights.shape[1]) self.token_emb.weight.data = emb_weights else: self.token_emb = nn.Embedding(input_dims[0], embeddings_specs[0]) # Speaker embeddings, either pre-trained or random weights if isinstance(embeddings_specs[1], np.ndarray): emb_weights = torch.Tensor(embeddings_specs[1]) self.speaker_emb = nn.EmbeddingBag(emb_weights.shape[0], emb_weights.shape[1]) self.speaker_emb.weight.data = emb_weights else: self.speaker_emb = nn.EmbeddingBag(input_dims[1], embeddings_specs[1], mode='sum') self.embedding_dim = self.token_emb.embedding_dim + self.speaker_emb.embedding_dim
def test_nn_embeddings(self): emb1, emb2, = nn.Embedding(10, 3), nn.Embedding(20, 3) emb1_bag, emb2_bag = nn.EmbeddingBag(10, 3), nn.EmbeddingBag(20, 3) emb3, emb3_bag = nn.Embedding(15, 3), nn.EmbeddingBag(20, 3) data_list = [('emb1', emb1), ('emb1_bag', emb1_bag), ('emb2', emb2), ('emb2_bag', emb2_bag)] defaults = {'test': 3} data_with_config = [ { 'name': 'emb3', 'data': emb3, 'config': { 'test': 7 } }, { 'name': 'emb3_bag', 'data': emb3_bag, 'config': { 'test': 8 } }, ] self.run_all_checks(data_list=data_list, defaults=defaults, data_with_config=data_with_config)
def __init__(self, numUsers, numItems, embedding_dim, cuda_available, gpunum): super(modeler, self).__init__() self.userEmbed = nn.EmbeddingBag(numUsers, embedding_dim, mode='mean') self.itemEmbed = nn.EmbeddingBag(numItems, embedding_dim, mode='mean') self.cuda_available = cuda_available self.init_weights() self.gpunum = gpunum
def __init__(self): super().__init__() self.emb1 = nn.Embedding(100, 3) self.embbag1 = nn.EmbeddingBag(200, 32) self.emb_seq = nn.Sequential(nn.Embedding(150, 3), nn.EmbeddingBag(100, 3)) self.linear1 = nn.Linear(32, 32) self.linear2 = nn.Linear(16, 16)
def __init__(self, vs_ap, ed_ap, vs_cid, ed_cid, num_class, mode="mean"): super().__init__() self.embedding_ap = nn.EmbeddingBag(vs_ap, ed_ap, mode=mode) self.embedding_cid = nn.EmbeddingBag(vs_cid, ed_cid, mode=mode) self.emb_drop = nn.Dropout(0.) self.fc1 = BasicLinearBlock(ed_cid + ed_ap, 512, 0.) self.fc2 = BasicLinearBlock(512, 256, 0.) self.fc = nn.Linear(256, num_class) self.init_weights()
def __init__(self,args,metric_module,**kwargs): super(encoder_model_avepool, self).__init__(args,metric_module,**kwargs) if 'pretrained_weight' in kwargs: ## !! MAKE SURE PADDING HAS 0 VECTOR VALUE self.label_embedding = nn.EmbeddingBag(kwargs['num_of_word'],kwargs['word_vec_dim'],mode="sum") self.label_embedding.weight.data.copy_(torch.from_numpy(kwargs['pretrained_weight'])) else: self.label_embedding = nn.EmbeddingBag(args.num_vocab,args.word_emb_dim,mode="sum") ## word embedding
def __init__(self, embedding, n, dim, pre_train): super(DenseNetwork, self).__init__() if pre_train == True: self.emb = nn.EmbeddingBag(n, dim, mode='sum').from_pretrained( embedding.float(), freeze=False) else: self.emb = nn.EmbeddingBag(n, dim, mode='sum') self.relu = nn.ReLU() self.fc1 = nn.Linear(dim, 32) self.bn = nn.BatchNorm1d(32) self.fc2 = nn.Linear(32, 4)
def __init__(self, library_size, slot_dim, settings, embedder=None): super(LibraryFixedSize, self).__init__() self.size = library_size self.slot_dim = slot_dim self.use_keys = settings.entlib_key self.dynamic = (settings.entity_library == "dynamic") self.gate_type = settings.gate_type self.normalization = settings.entlib_normalization self.value_weights = settings.entlib_value_weights self.values = None # Will contain activations # Initial library activations (i.e., after calling reset_activations()) if not settings.entlib_weights: # Ininialize as zeroes every new sequence; not trained weights. self.weights = None else: if embedder is None: # Initialize to an initially random embedding, trained with backprop. self.weights = nn.EmbeddingBag(library_size, slot_dim, mode='sum') elif settings.entlib_shared: # Initialize to an existing embedding; trained with weight sharing. self.weights = embedder else: # Initialize to a clone of an existing embedding; trained separately. self.weights = nn.EmbeddingBag(library_size, slot_dim, mode='sum') self.weights.weight.data = embedder.weight.data.clone() # Gates compute weights over the entity library given the entity query: if settings.gate_type == 'mlp': if self.use_keys: self.gate = MLPGate(settings.gate_mlp_hidden, settings.gate_nonlinearity, slot_dim, slot_dim, slot_dim) else: self.gate = MLPGate(settings.gate_mlp_hidden, settings.gate_nonlinearity, slot_dim, slot_dim) else: # similarity-based gate instead: self.gate = SimGate(settings.gate_type, settings.gate_nonlinearity, settings.gate_sum_keys_values) # Optionally take a 'global' perspective (followed by (log)softmax): self.gate_keeper = None self.gate_softmax = None if settings.gate_softmax: self.gate_keeper = nn.Linear(self.size, self.size) self.gate_softmax = nn.Softmax(dim=-1) # Components for computing information to update dynamic library with: if self.dynamic: self.nonlin = nn.PReLU() self.value_linear = nn.Linear(self.slot_dim, self.slot_dim) self.query_linear = nn.Linear(self.slot_dim, self.slot_dim) if self.use_keys: self.key_linear = nn.Linear(self.slot_dim, self.slot_dim)
def fit(self, questions, answers): questions, answers = np.asarray(questions), np.asarray(answers) self.to_ix.fit(np.append(questions, answers)) questions_numpy = self.to_ix.transform(questions) answers_numpy = self.to_ix.transform(answers) num_embeddings, hdim = self.to_ix.vocabulary_size_, self.hidden_size self.embedding1 = nn.Sequential( nn.EmbeddingBag(num_embeddings, hdim, mode='mean'), nn.Linear(hdim, hdim), nn.Dropout(0.2), nn.ReLU(), nn.Linear(hdim, hdim), nn.Dropout(0.2), nn.ReLU(), nn.Linear(hdim, hdim)) self.embedding2 = nn.Sequential( nn.EmbeddingBag(num_embeddings, hdim, mode='mean'), nn.Linear(hdim, hdim), nn.Dropout(0.2), nn.ReLU(), nn.Linear(hdim, hdim), nn.Dropout(0.2), nn.ReLU(), nn.Linear(hdim, hdim)) self.embedding1 = self.embedding1.cuda() self.embedding2 = self.embedding2.cuda() self.optimizer1 = optim.Adam(self.embedding1.parameters(), lr=self.lr) self.optimizer2 = optim.Adam(self.embedding2.parameters(), lr=self.lr) n_samples = questions_numpy.shape[0] batch_size = self.n_negative + 1 try: self.last_epoch = -1 for epoch in range(self.n_epochs): q_shuffled, a_shuffled = shuffle(questions_numpy, answers_numpy) q_shuffled, a_shuffled = V(LT(q_shuffled)), V(LT(a_shuffled)) for start in range(0, n_samples, batch_size): end = start + batch_size if end > n_samples: break q_batch = q_shuffled[start:end] a_batch = a_shuffled[start:end] self.partial_fit(q_batch, a_batch) self.last_epoch = epoch except KeyboardInterrupt: print("STAHP!.") finally: print() self.embedding1 = self.embedding1.cpu() self.embedding2 = self.embedding2.cpu() self.embedding2.eval() self._embedded_answers = self.embedding2(V(LT(answers_numpy))) self._answers = answers
def __init__(self, emb_length, num_units, num_heads, dnn_units): super().__init__() self.token_emb_layer = nn.EmbeddingBag.from_pretrained(word_embeddings, freeze=True, mode='mean') self.emb_layers = nn.ModuleList([nn.Linear(word_embeddings.size()[1], emb_length, bias=False)]) for i in range(1, len(field_dims)): if i in multihot_idx: self.emb_layers.append(nn.EmbeddingBag(field_dims[i], emb_length, mode='mean')) elif i in onehot_idx: self.emb_layers.append(nn.Embedding(field_dims[i], emb_length)) self.dnn_layer = None if dnn_units: dnn = [Flatten()] dnn_units = [len(field_dims) * emb_length] + dnn_units for i in range(len(dnn_units) - 2): dnn.append(nn.Linear(dnn_units[i], dnn_units[i + 1], bias=False)) dnn.append(nn.BatchNorm1d(dnn_units[i + 1])) dnn.append(nn.ReLU(True)) dnn.append(nn.Dropout(drop_rate)) dnn.append(nn.Linear(dnn_units[-2], dnn_units[-1])) self.dnn_layer = nn.Sequential(*dnn) for layer in self.emb_layers[1:]: nn.init.normal_(layer.weight, 0, emb_length ** -0.5) layers = [] for i in range(len(num_units)): if i == 0: layers.append(MutiheadAttention(emb_length, num_units[0], num_heads[0])) else: layers.append(MutiheadAttention(num_units[i - 1] * num_heads[i - 1], num_units[i], num_heads[i])) layers.append(Flatten()) self.att_layers = nn.Sequential(*layers) self.last_layer = nn.Linear( len(field_dims) * num_units[-1] * num_heads[-1] + (dnn_units[-1] if dnn_units else 0), 1, bias=True) self.loss_function = nn.BCELoss()
def create_emb(self, m, ln): emb_l = nn.ModuleList() for i in range(0, ln.size): n = ln[i] # construct embedding operator if self.qr_flag and n > self.qr_threshold: EE = QREmbeddingBag(n, m, self.qr_collisions, operation=self.qr_operation, mode="sum", sparse=True) else: EE = nn.EmbeddingBag(n, m, mode="sum", sparse=True) # initialize embeddings # nn.init.uniform_(EE.weight, a=-np.sqrt(1 / n), b=np.sqrt(1 / n)) W = np.random.uniform(low=-np.sqrt(1 / n), high=np.sqrt(1 / n), size=(n, m)).astype(np.float32) # approach 1 EE.weight.data = torch.tensor(W, requires_grad=True) # EE.weight.register_hook(print) # approach 2 # EE.weight.data.copy_(torch.tensor(W)) # approach 3 # EE.weight = Parameter(torch.tensor(W),requires_grad=True) emb_l.append(EE) return emb_l
def hashEmbeddingBagTest(): # test hashEmbeddingBag embedding_bag = HashVectorEmbeddingBag(10, 5, 1.0) # test_input = torch.randint(0, 10, torch.Size([5,])) # print("Test input", test_input) print("Embedding weight before forward: ", embedding_bag.hashed_weight) print( "Result:", embedding_bag(torch.tensor([0, 0, 1, 1, 2]), torch.tensor([0, 1, 2, 3, 4]))) print("Embedding weight after forward: ", embedding_bag.hashed_weight) # the original EmbeddingBag n, m = 10, 5 emb = nn.EmbeddingBag(n, m, mode="sum", sparse=True) emb.weight.data = embedding_bag.hashed_weight.data.reshape(n, m) # initialize embeddings # nn.init.uniform_(EE.weight, a=-np.sqrt(1 / n), b=np.sqrt(1 / n)) # W = np.random.uniform( # low=-np.sqrt(1 / n), high=np.sqrt(1 / n), size=(n, m) # ).astype(np.float32) # approach 1 # emb.weight.data = torch.tensor(W, requires_grad=True) print("Original emb: ", emb.weight.data, emb(torch.tensor([0, 0, 1, 1, 2]), torch.tensor([0, 1, 2, 3, 4]))) target = [0, 0, 0, 0, 1]
def __init__(self, config): super(Encoder, self).__init__() self.feature_emb = nn.EmbeddingBag(config['feature_dim'], config['n_rels'], mode='sum') self.feature_bias = nn.Parameter(torch.Tensor(config['n_rels'])) self.init()
def __init__(self, in_dim, hidden_dim1, hidden_dim2, half=False): super(SparseMultiAE, self).__init__() # use_embeddings = True print( "SparseMultiAE: input dim = {}, hidden_dim1 = {} hidden_dim2 = {}". format(in_dim, hidden_dim1, hidden_dim2)) self.in_dim = in_dim self.title_emb_dim = 512 self.hidden_dim2 = hidden_dim2 self.half = half self.emb1 = nn.EmbeddingBag(in_dim, hidden_dim1, mode='sum', sparse=True) self.l1 = nn.Linear(hidden_dim1, self.hidden_dim2) # Decode self.l2 = nn.Linear(self.hidden_dim2, hidden_dim1) self.l3 = nn.Linear(hidden_dim1, in_dim) # TODO: Is this activated in eval? self.drop = nn.Dropout(0.5)
def __init__(self, basedir, split="train", fasttext_model_path="models/europarl_lid.model.min5.bin", use_spaces=False, default_char="</s>"): self.basedir = basedir self.split = split self.use_spaces = use_spaces self.default_char = default_char self.f = open(os.path.join(self.basedir, self.splits[self.split]), "r") fasttext_model = load_model(os.path.join(basedir, fasttext_model_path)) input_matrix = fasttext_model.get_input_matrix() # numpy num_emb, emb_dim = input_matrix.shape self.embbag = nn.EmbeddingBag(num_emb, emb_dim) self.embbag.weight.data.copy_(torch.from_numpy(input_matrix)) self.embbag.eval() self.word_dict = { w: i for i, w in enumerate(fasttext_model.get_words(include_freq=False)) } self.label_dict = { l: i for i, l in enumerate(fasttext_model.get_labels( include_freq=False)) } self.data = [] self.labels = []
def __init__(self, vocab_size, embed_dim, num_class, model_type, hidden_layer): super().__init__() embedding = nn.EmbeddingBag(vocab_size, embed_dim, sparse=True) self.add_module('0', embedding) if model_type == 'cnn': rnn_model = nn.Conv2d(1, 1, kernel_num, (5, embed_dim)) elif model_type == 'bi_lstm': if __name__ == '__main__': rnn_model = nn.LSTM( input_size=embed_dim, hidden_size=embed_dim, num_layers=hidden_layer, batch_first=True, bidirectional=True ) self.add_module('1', rnn_model) elif model_type == 'lstm': rnn_model = nn.LSTM( input_size=embed_dim, hidden_size=embed_dim, num_layers=hidden_layer, batch_first=True, bidirectional=True ) self.add_module('1', rnn_model) else: raise ValueError self.add_module('2', nn.Dropout(0.3)) self.add_module('3', nn.Linear(embed_dim, embed_dim)) self.add_module('4', nn.Dropout(0.3)) self.add_module('5', nn.ReLU(True)) self.add_module('6', nn.Linear(embed_dim, num_class))
def create_emb(self, dim_size: int, vocab_size_list: np.ndarray): # m:2, ln:[2,3,4] # ln:[2,3,4], m=2, 意思是分别创建三个embedding矩阵,它们的dim均为2 emb_module_list = nn.ModuleList() for i in range(0, vocab_size_list.size): vocab_size = vocab_size_list[i] # vocab_size # construct embedding operator embedding_bag = nn.EmbeddingBag( vocab_size, dim_size, mode="sum", sparse=True) # n:vocab_size, m:emb_dim # initialize embeddings # nn.init.uniform_(embedding_bag.weight, a=-np.sqrt(1 / n), b=np.sqrt(1 / n)) W = np.random.uniform(low=-np.sqrt(1 / vocab_size), high=np.sqrt(1 / vocab_size), size=(vocab_size, dim_size)).astype(np.float32) # approach 1, embedding初始化 embedding_bag.weight.data = torch.tensor(W, requires_grad=True) # approach 2 # embedding_bag.weight.data.copy_(torch.tensor(W)) # approach 3 # embedding_bag.weight = Parameter(torch.tensor(W),requires_grad=True) emb_module_list.append(embedding_bag) return emb_module_list
def __init__(self, opt): super(noCluster, self).__init__() self.opt = opt self.emblen = opt['emb_len'] self.word_size = opt['word_size'] self.type_size = opt['type_size'] self.bag_weighting = opt['bag_weighting'] self.label_distribution = opt['label_distribution'] self.word_emb_bag = nn.EmbeddingBag(opt['word_size'], opt['emb_len']) self.word_emb_bag.weight = self.word_emb_bag.weight if opt['bias'] == 'fix': self.linear = nn.Linear(opt['emb_len'], opt['type_size'], bias=False) self.linear.weight.data.zero_() self.linear_bias = torch.log( torch.autograd.Variable(torch.cuda.FloatTensor( opt['label_distribution']), requires_grad=False)) else: self.linear = nn.Linear(opt['emb_len'], opt['type_size'], bias=True) self.linear.weight.data.zero_() self.linear.bias.data.zero_() self.crit = obj.partCE(if_average=opt['if_average']) self.drop_prob = opt['output_dropout']
def __init__(self, dim, heads=4, num_keys=128, topk=32, dim_head=256, input_dropout=0., query_dropout=0., value_dropout=0.): super().__init__() assert (dim % heads == 0), 'dimension must be divisible by number of heads' self.topk = topk self.heads = heads self.num_keys = num_keys dim_query = dim_head * heads self.to_queries = nn.Linear(dim, dim_query, bias=False) self.norm = MaskedBatchNorm1D(nn.BatchNorm1d(dim_query)) self.keys = nn.Parameter(torch.zeros(heads, num_keys, 2, dim_head // 2)) self.values = nn.EmbeddingBag(num_keys**2, dim, mode='sum') init_(self.keys) init_(self.values.weight) self.input_dropout = nn.Dropout(input_dropout) self.query_dropout = nn.Dropout(query_dropout) self.value_dropout = nn.Dropout(value_dropout)
def __init__(self, vocab_size, embed_dim, num_class): super().__init__() self.embedding = nn.EmbeddingBag( vocab_size, embed_dim, sparse=False ) # TODO(tilo): sparse=True leads to error in SGD gradient momentum calculation self.fc = nn.Linear(embed_dim, num_class) self.init_weights()
def __init__(self, vocab_size, embed_dim, num_class): super().__init__() self.embedding = nn.EmbeddingBag(vocab_size, embed_dim, sparse=True) self.fc = nn.Linear(embed_dim, embed_dim) self.fc2 = nn.Linear(embed_dim, num_class) # self.fc = nn.Linear(embed_dim, num_class) self.init_weights()
def __init__( self, embedding_dim: int, weight_scale: float, embedding_bag_mode: str, ignore_weight: bool, pooling_type: str, mlp_layer_dims: List[int], feature_buckets: Dict[int, int], ) -> None: super().__init__(embedding_dim) self.weight_scale = weight_scale self.ignore_weight = ignore_weight if not ignore_weight: assert embedding_bag_mode == "sum" # EmbeddingBag required. self.pooling_type = pooling_type self.mlp_layer_dims = mlp_layer_dims self.feature_buckets = {int(k): v for k, v in feature_buckets.items()} self.feature_embeddings = nn.ModuleDict({ str(k): nn.EmbeddingBag(v, embedding_dim, mode=embedding_bag_mode) for k, v in feature_buckets.items() }) self.num_intput_features = len(feature_buckets) input_dim = (self.num_intput_features * embedding_dim if self.pooling_type == "none" else embedding_dim) self.mlp = nn.Sequential(*(nn.Sequential(nn.Linear(m, n), nn.ReLU()) for m, n in zip( [input_dim] + list(mlp_layer_dims), mlp_layer_dims, ))) log_class_usage(__class__)
def __init__(self, vocab_dim, embed_dim, latent_dim, b_dim, m_dim, s_dim, d_dim): super(Net, self).__init__() price_dim = 3 price_embed_dim = 32 other_dim = 5 slope = 0.2 small_slope = 0.02 img_dim = 2048 reduced_img_dim = 128 latent_dim = embed_dim + price_embed_dim + other_dim + reduced_img_dim # text embeddings self._embedding = nn.EmbeddingBag(vocab_dim, embed_dim, mode='sum') # price embedding (sorta) self._price_proc = nn.Sequential(nn.Linear(price_dim, 128), nn.LeakyReLU(slope), nn.Linear(128, price_embed_dim), nn.LeakyReLU(slope), nn.BatchNorm1d(price_embed_dim)) # image processing self._img_proc = nn.Sequential(nn.Linear(img_dim, reduced_img_dim), nn.LeakyReLU(slope)) # residual unit self._res = nn.Sequential( nn.Linear(latent_dim, 3200), nn.LeakyReLU(slope), #nn.Dropout(0.2), nn.Linear(3200, 2 * latent_dim), nn.LeakyReLU(slope), nn.Linear(2 * latent_dim, latent_dim), nn.LeakyReLU(small_slope), nn.BatchNorm1d(latent_dim)) # decision layers self._b = nn.Sequential(nn.Linear(latent_dim, b_dim), nn.LogSoftmax(dim=1)) self._m = nn.Sequential(nn.Linear(latent_dim, m_dim), nn.LogSoftmax(dim=1)) self._s = nn.Sequential(nn.Linear(latent_dim, s_dim), nn.LogSoftmax(dim=1)) self._d = nn.Sequential(nn.Linear(latent_dim, d_dim), nn.LogSoftmax(dim=1)) leakyrelu_gain = nn.init.calculate_gain("leaky_relu", param=slope) for layer in (self._price_proc[0], self._price_proc[2], self._img_proc[0], self._res[0], self._res[2]): nn.init.xavier_normal_(layer.weight, gain=leakyrelu_gain) leakyrelu_gain2 = nn.init.calculate_gain("leaky_relu", param=small_slope) for layer in (self._res[4], ): nn.init.xavier_normal_(layer.weight, gain=small_slope)
def __init__(self, vocab_size, emb_dim): super(AverageEmbeddings, self).__init__() self.embedding = nn.EmbeddingBag(num_embeddings=vocab_size, embedding_dim=emb_dim, mode="mean") self.embedding.weight.requires_grad = False
def test_ddp_dist_autograd_sparse_grads(self): # Each trainer uses a different random seed. Otherwise, they are going # to have exactly the same initial model parameters, input, and # therefore grads. That means the grads will be the same before and # after DDP's all-reduce. torch.manual_seed(self.rank) dist.init_process_group( backend="gloo", init_method=INIT_METHOD_TEMPLATE.format(file_name=self.file_name), world_size=self.world_size, rank=self.rank, ) model = nn.EmbeddingBag(10, 3, sparse=True) ddp_model = DistributedDataParallel(model) # Different inputs for each input = torch.LongTensor(10).random_(0, 10) offsets = torch.LongTensor([0, 4]) # Run local. loss = ddp_model(input, offsets).sum() loss.backward() with dist_autograd.context() as context_id: loss = ddp_model(input, offsets).sum() dist_autograd.backward(context_id, [loss]) grads_dict = dist_autograd.get_gradients(context_id) self.assertEqual(1, len(grads_dict)) self.assertEqual(model.weight.grad, grads_dict[model.weight])
def create_node2vec_embedding_layer(user_embedding_path, freeze_weights): model = gensim.models.KeyedVectors.load_word2vec_format( user_embedding_path) num_authors = len(model.vocab) + 2 EMBEDDING_DIM = model.vector_size embedding_matrix = np.zeros((num_authors, EMBEDDING_DIM)) author_to_pos_dict = {} for i, word in enumerate(model.vocab): vector = model[word] embedding_matrix[i + 1] = vector author_id = int(word) author_to_pos_dict[author_id] = i + 1 # embedding_matrix[0] = np.random.rand(EMBEDDING_DIM) embedding_matrix = torch.from_numpy(embedding_matrix) emb_layer = nn.EmbeddingBag(num_authors, EMBEDDING_DIM, mode='mean') # fallback embedding at 0 emb_layer.weight = nn.Parameter(embedding_matrix) if freeze_weights: emb_layer.weight.requires_grad = False return EMBEDDING_DIM, num_authors, emb_layer, author_to_pos_dict
def create_embedding_matrix(self, feature_columns, embedding_size, init_std=0.0001, sparse=False): # Return nn.ModuleDict: for sparse features, {embedding_name: nn.Embedding} # for varlen sparse features, {embedding_name: nn.EmbeddingBag} sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if len(feature_columns) else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), feature_columns)) if len(feature_columns) else [] embedding_dict = nn.ModuleDict({ feat.embedding_name: nn.Embedding(feat.dimension, embedding_size, sparse=sparse) for feat in sparse_feature_columns }) for feat in varlen_sparse_feature_columns: embedding_dict[feat.embedding_name] = nn.EmbeddingBag( feat.dimension, embedding_size, sparse=sparse, mode=feat.combiner) for tensor in embedding_dict.values(): nn.init.normal_(tensor.weight, mean=0, std=init_std) return embedding_dict
def __init__(self, voca_size, input_size, bilstm_hidden_size, ffnn_hidden_size, ffnn_output_size, freeze=True): super(BOW_BiLSTM_RANDOM, self).__init__() # Get the dimension of embeddings as the input size of bilstm self.voca_size = voca_size self.input_size = input_size self.bilstm_hidden_size = bilstm_hidden_size self.ffnn_hidden_size = ffnn_hidden_size self.ffnn_output_size = ffnn_output_size self.freeze = freeze # BOW layer self.bow = nn.EmbeddingBag(voca_size, input_size, mode='mean') self.bow.weight.requires_grad = not self.freeze # Bilstm network self.bilstm = nn.LSTM(self.input_size, self.bilstm_hidden_size, bidirectional=True) # Feed forward neural network with one hidden layer self.ffnn = FFNN(self.bilstm_hidden_size * 2, self.ffnn_hidden_size, self.ffnn_output_size) # Softmax layer self.log_softmax = nn.LogSoftmax(dim=1)