Esempio n. 1
0
    def data_group_by_sentence_len(self, data_file_path):
        data_by_sentence_len = {}
        with open(data_file_path, "r") as f:
            reader = csv.DictReader(f)
            for row in tqdm(reader,
                            desc="Read data from {} into memory".format(
                                data_file_path)):
                data = data_by_sentence_len.setdefault(
                    row["sentence_len"], {key: []
                                          for key in DATA_KEYS})

                sent_words = row["sentence"].split()
                assert len(sent_words) > 1
                sent_inds = [
                    self.word2index.get(word, self.word2index[UNK_TOKEN])
                    for word in sent_words
                ]
                score = (float(row[self.src_aspect_score]) if
                         len(row[self.src_aspect_score].strip()) > 0 else 0.0)
                aspect_index = self.aspect2index.get(
                    row["aspect"], self.aspect2index[UNK_TOKEN])
                aspect1hot = to_one_hot(aspect_index, self.n_aspect)
                user1hot = to_one_hot(self.user2index[row["reviewerID"]],
                                      self.n_user)
                item1hot = to_one_hot(self.item2index[row["asin"]],
                                      self.n_item)
                data["sentences"].append(sent_inds)
                data["scores"].append([score])
                data["aspects"].append(row["aspect"])
                data["aspects_pos"].append(row["aspect_pos"])
                aspect_w = self.word2index.get(row["aspect"],
                                               self.word2index[UNK_TOKEN])
                data["aspects_w"].append(aspect_w)
                data["aspect1hot"].append(aspect1hot)
                data["aspect_senti"].append(aspect1hot * score)
                data["uias_vec"].append(
                    np.concatenate((user1hot, item1hot, aspect1hot * score)))
                data["random_aspect_senti"].append(
                    to_one_hot(
                        np.random.choice(self.n_aspect),
                        self.n_aspect,
                        np.random.random(1).astype(np.float32) * 4 + 1,
                    ))  # score range from 1 to 5)
                data["opinions"].append(row["opinion"])
                data["opinions_pos"].append(row["opinion_pos"])
                opinion_w = self.word2index.get(row["opinion"],
                                                self.word2index[UNK_TOKEN])
                data["opinions_w"].append(opinion_w)
                data["ratings"].append(row["overall"])
        return data_by_sentence_len
Esempio n. 2
0
def train_and_save():
    x = tf.placeholder(tf.float32,
                       shape=[None, INPUT_SAMPLE_LENGHT],
                       name=INPUT_TENSOR_NAME)
    y_ = tf.placeholder(tf.float32,
                        shape=[None, NUMBER_OF_CLASSES],
                        name=LABEL_TENSOR_NAME)
    logits = create_computation_graph(x)
    output = add_predictor(logits)
    datset = suffle_dataset(read_dataset(FEATURE_FILENAME))
    datset.train_labels = to_one_hot(datset.train_labels)
    train_op, loss_op = make_traingin_step(logits, y_)

    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(init)

        train_model(sess, x, y_, train_op, datset, loss_op)

        saver.save(sess, TRAIN_MODEL_SAVE_NAME)
        tf.train.write_graph(sess.graph_def, "res", "tgraph.pb", as_text=False)
        save_labels(MODEL_METAFILE_NAME, datset)
        datset.train_labels = from_one_hot(datset.train_labels)
        print_acc(sess, output, x, datset)
 def get_ranked_opinions(self,
                         user,
                         item,
                         sentence,
                         aspect_position,
                         opinion_position,
                         top_k=None):
     sentence = sentence.split()
     aspect_position = int(float(aspect_position))
     opinion_position = int(float(opinion_position))
     aspect = sentence[aspect_position]
     if 'aspect-opinion' in self.strategy:
         self.candidates = [
             self.word2index[w] for w in self.aspect_opinions[aspect]
             if w in self.word2index and w != '<UNK>'
         ]
         self.w_candidates = self.model_reader.w[self.candidates]
     aspect_index = self.aspect2index.get(aspect,
                                          self.aspect2index['<UNK>'])
     aspect1hot = to_one_hot(aspect_index, self.n_aspect)
     score = self.preference.get_aspect_score(user, item, aspect)
     sentence_v = [
         self.word2index.get(word, self.word2index['<UNK>'])
         for word in sentence
     ]
     x = ([sentence_v], [opinion_position], [aspect1hot * score])
     similarity = cosine_similarity(
         self.model_reader.model.get_context_vector(x),
         self.w_candidates).reshape(len(self.candidates))
     ranked_ids = (-similarity).argsort()[0:top_k]
     ranked_candidates = np.array(self.candidates).take(ranked_ids)
     return [self.index2word[idx] for idx in ranked_candidates]
Esempio n. 4
0
 def test_one_hot(self):
     arr = [1, 2, 0, 1, 2]
     K = 4
     actual = util.to_one_hot(arr, K)
     expected = numpy.array(
         [[0, 1, 0, 0],
          [0, 0, 1, 0],
          [1, 0, 0, 0],
          [0, 1, 0, 0],
          [0, 0, 1, 0]], dtype=numpy.int32)
     numpy.testing.assert_equal(actual, expected)
Esempio n. 5
0
    def forward(self, ins):
        obj_ids = torch.arange(self.num_objects)
        obj_ids = util.to_one_hot(obj_ids, self.num_objects).unsqueeze(0)
        obj_ids = obj_ids.repeat((ins.size(0), 1, 1)).to(ins.get_device())

        h = torch.cat((ins, obj_ids), -1)
        h = self.act1(self.fc1(h))
        h = self.act2(self.fc2(h))
        h = self.fc3(h).sum(1)
        return h.view(-1, self.output_size[0], self.output_size[1],
                      self.output_size[2])
Esempio n. 6
0
 def read_batch(self, reader, batchsize, word2index):
     batch = {key: [] for key in DATA_KEYS}
     for row in reader:
         if len(batch["sentences"]) >= batchsize:
             break
         sent_words = row["sentence"].split()
         assert len(sent_words) > 1
         sent_inds = [
             word2index.get(word, word2index[UNK_TOKEN])
             for word in sent_words
         ]
         score = (float(row[self.src_aspect_score])
                  if len(row[self.src_aspect_score].strip()) > 0 else 0.0)
         aspect_index = self.aspect2index.get(row["aspect"],
                                              self.aspect2index[UNK_TOKEN])
         aspect1hot = to_one_hot(aspect_index, self.n_aspect)
         user1hot = to_one_hot(self.user2index[row["reviewerID"]],
                               self.n_user)
         item1hot = to_one_hot(self.item2index[row["asin"]], self.n_item)
         batch["sentences"].append(sent_inds)
         batch["scores"].append([score])
         batch["aspects"].append([row["aspect"]])
         batch["aspects_pos"].append([row["aspect_pos"]])
         aspect_w = word2index.get(row["aspect"], word2index[UNK_TOKEN])
         batch["aspects_w"].append(aspect_w)
         batch["aspect1hot"].append(aspect1hot)
         batch["aspect_senti"].append(aspect1hot * score)
         batch["uias_vec"].append(
             np.concatenate((user1hot, item1hot, aspect1hot * score)))
         batch["random_aspect_senti"].append(
             to_one_hot(
                 np.random.choice(self.n_aspect),
                 self.n_aspect,
                 np.random.random(1).astype(np.float32) * 4 + 1,
             ))  # score range from 1 to 5)
         batch["opinions"].append(row["opinion"])
         batch["opinions_pos"].append(row["opinion_pos"])
         opinion_w = word2index.get(row["opinion"], word2index[UNK_TOKEN])
         batch["opinions_w"].append(opinion_w)
         batch["ratings"].append(row["overall"])
     return batch
Esempio n. 7
0
    def forward(self, states, action):

        cuda = states.is_cuda
        batch_size = states.size(0)
        num_nodes = states.size(1)

        # states: [batch_size (B), num_objects, embedding_dim]
        # node_attr: Flatten states tensor to [B * num_objects, embedding_dim]
        node_attr = states.view(-1, self.input_dim)

        edge_attr = None
        edge_index = None

        if num_nodes > 1:
            # edge_index: [B * (num_objects*[num_objects-1]), 2] edge list
            edge_index = self._get_edge_list_fully_connected(
                batch_size, num_nodes, cuda)

            row, col = edge_index
            edge_attr = self._edge_model(node_attr[row], node_attr[col],
                                         edge_attr)

        if not self.ignore_action:

            if self.copy_action:
                action_vec = util.to_one_hot(action, self.action_dim).repeat(
                    1, self.num_objects)
                action_vec = action_vec.view(-1, self.action_dim)
            else:
                action_vec = util.to_one_hot(action,
                                             self.action_dim * num_nodes)
                action_vec = action_vec.view(-1, self.action_dim)

            # Attach action to each state
            node_attr = torch.cat([node_attr, action_vec], dim=-1)

        node_attr = self._node_model(node_attr, edge_index, edge_attr)

        # [batch_size, num_nodes, hidden_dim]
        return node_attr.view(batch_size, num_nodes, -1)
Esempio n. 8
0
 def __call__(self,
              M: np.array,
              A: np.array = None,
              B: np.array = None) -> (np.array, np.array):
     return M, to_one_hot(1, M.shape[1])