def data_group_by_sentence_len(self, data_file_path): data_by_sentence_len = {} with open(data_file_path, "r") as f: reader = csv.DictReader(f) for row in tqdm(reader, desc="Read data from {} into memory".format( data_file_path)): data = data_by_sentence_len.setdefault( row["sentence_len"], {key: [] for key in DATA_KEYS}) sent_words = row["sentence"].split() assert len(sent_words) > 1 sent_inds = [ self.word2index.get(word, self.word2index[UNK_TOKEN]) for word in sent_words ] score = (float(row[self.src_aspect_score]) if len(row[self.src_aspect_score].strip()) > 0 else 0.0) aspect_index = self.aspect2index.get( row["aspect"], self.aspect2index[UNK_TOKEN]) aspect1hot = to_one_hot(aspect_index, self.n_aspect) user1hot = to_one_hot(self.user2index[row["reviewerID"]], self.n_user) item1hot = to_one_hot(self.item2index[row["asin"]], self.n_item) data["sentences"].append(sent_inds) data["scores"].append([score]) data["aspects"].append(row["aspect"]) data["aspects_pos"].append(row["aspect_pos"]) aspect_w = self.word2index.get(row["aspect"], self.word2index[UNK_TOKEN]) data["aspects_w"].append(aspect_w) data["aspect1hot"].append(aspect1hot) data["aspect_senti"].append(aspect1hot * score) data["uias_vec"].append( np.concatenate((user1hot, item1hot, aspect1hot * score))) data["random_aspect_senti"].append( to_one_hot( np.random.choice(self.n_aspect), self.n_aspect, np.random.random(1).astype(np.float32) * 4 + 1, )) # score range from 1 to 5) data["opinions"].append(row["opinion"]) data["opinions_pos"].append(row["opinion_pos"]) opinion_w = self.word2index.get(row["opinion"], self.word2index[UNK_TOKEN]) data["opinions_w"].append(opinion_w) data["ratings"].append(row["overall"]) return data_by_sentence_len
def train_and_save(): x = tf.placeholder(tf.float32, shape=[None, INPUT_SAMPLE_LENGHT], name=INPUT_TENSOR_NAME) y_ = tf.placeholder(tf.float32, shape=[None, NUMBER_OF_CLASSES], name=LABEL_TENSOR_NAME) logits = create_computation_graph(x) output = add_predictor(logits) datset = suffle_dataset(read_dataset(FEATURE_FILENAME)) datset.train_labels = to_one_hot(datset.train_labels) train_op, loss_op = make_traingin_step(logits, y_) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) train_model(sess, x, y_, train_op, datset, loss_op) saver.save(sess, TRAIN_MODEL_SAVE_NAME) tf.train.write_graph(sess.graph_def, "res", "tgraph.pb", as_text=False) save_labels(MODEL_METAFILE_NAME, datset) datset.train_labels = from_one_hot(datset.train_labels) print_acc(sess, output, x, datset)
def get_ranked_opinions(self, user, item, sentence, aspect_position, opinion_position, top_k=None): sentence = sentence.split() aspect_position = int(float(aspect_position)) opinion_position = int(float(opinion_position)) aspect = sentence[aspect_position] if 'aspect-opinion' in self.strategy: self.candidates = [ self.word2index[w] for w in self.aspect_opinions[aspect] if w in self.word2index and w != '<UNK>' ] self.w_candidates = self.model_reader.w[self.candidates] aspect_index = self.aspect2index.get(aspect, self.aspect2index['<UNK>']) aspect1hot = to_one_hot(aspect_index, self.n_aspect) score = self.preference.get_aspect_score(user, item, aspect) sentence_v = [ self.word2index.get(word, self.word2index['<UNK>']) for word in sentence ] x = ([sentence_v], [opinion_position], [aspect1hot * score]) similarity = cosine_similarity( self.model_reader.model.get_context_vector(x), self.w_candidates).reshape(len(self.candidates)) ranked_ids = (-similarity).argsort()[0:top_k] ranked_candidates = np.array(self.candidates).take(ranked_ids) return [self.index2word[idx] for idx in ranked_candidates]
def test_one_hot(self): arr = [1, 2, 0, 1, 2] K = 4 actual = util.to_one_hot(arr, K) expected = numpy.array( [[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0]], dtype=numpy.int32) numpy.testing.assert_equal(actual, expected)
def forward(self, ins): obj_ids = torch.arange(self.num_objects) obj_ids = util.to_one_hot(obj_ids, self.num_objects).unsqueeze(0) obj_ids = obj_ids.repeat((ins.size(0), 1, 1)).to(ins.get_device()) h = torch.cat((ins, obj_ids), -1) h = self.act1(self.fc1(h)) h = self.act2(self.fc2(h)) h = self.fc3(h).sum(1) return h.view(-1, self.output_size[0], self.output_size[1], self.output_size[2])
def read_batch(self, reader, batchsize, word2index): batch = {key: [] for key in DATA_KEYS} for row in reader: if len(batch["sentences"]) >= batchsize: break sent_words = row["sentence"].split() assert len(sent_words) > 1 sent_inds = [ word2index.get(word, word2index[UNK_TOKEN]) for word in sent_words ] score = (float(row[self.src_aspect_score]) if len(row[self.src_aspect_score].strip()) > 0 else 0.0) aspect_index = self.aspect2index.get(row["aspect"], self.aspect2index[UNK_TOKEN]) aspect1hot = to_one_hot(aspect_index, self.n_aspect) user1hot = to_one_hot(self.user2index[row["reviewerID"]], self.n_user) item1hot = to_one_hot(self.item2index[row["asin"]], self.n_item) batch["sentences"].append(sent_inds) batch["scores"].append([score]) batch["aspects"].append([row["aspect"]]) batch["aspects_pos"].append([row["aspect_pos"]]) aspect_w = word2index.get(row["aspect"], word2index[UNK_TOKEN]) batch["aspects_w"].append(aspect_w) batch["aspect1hot"].append(aspect1hot) batch["aspect_senti"].append(aspect1hot * score) batch["uias_vec"].append( np.concatenate((user1hot, item1hot, aspect1hot * score))) batch["random_aspect_senti"].append( to_one_hot( np.random.choice(self.n_aspect), self.n_aspect, np.random.random(1).astype(np.float32) * 4 + 1, )) # score range from 1 to 5) batch["opinions"].append(row["opinion"]) batch["opinions_pos"].append(row["opinion_pos"]) opinion_w = word2index.get(row["opinion"], word2index[UNK_TOKEN]) batch["opinions_w"].append(opinion_w) batch["ratings"].append(row["overall"]) return batch
def forward(self, states, action): cuda = states.is_cuda batch_size = states.size(0) num_nodes = states.size(1) # states: [batch_size (B), num_objects, embedding_dim] # node_attr: Flatten states tensor to [B * num_objects, embedding_dim] node_attr = states.view(-1, self.input_dim) edge_attr = None edge_index = None if num_nodes > 1: # edge_index: [B * (num_objects*[num_objects-1]), 2] edge list edge_index = self._get_edge_list_fully_connected( batch_size, num_nodes, cuda) row, col = edge_index edge_attr = self._edge_model(node_attr[row], node_attr[col], edge_attr) if not self.ignore_action: if self.copy_action: action_vec = util.to_one_hot(action, self.action_dim).repeat( 1, self.num_objects) action_vec = action_vec.view(-1, self.action_dim) else: action_vec = util.to_one_hot(action, self.action_dim * num_nodes) action_vec = action_vec.view(-1, self.action_dim) # Attach action to each state node_attr = torch.cat([node_attr, action_vec], dim=-1) node_attr = self._node_model(node_attr, edge_index, edge_attr) # [batch_size, num_nodes, hidden_dim] return node_attr.view(batch_size, num_nodes, -1)
def __call__(self, M: np.array, A: np.array = None, B: np.array = None) -> (np.array, np.array): return M, to_one_hot(1, M.shape[1])