Ejemplo n.º 1
0
    def sample(self, root, sample_num, sample_for_dis: bool):
        all_score = self.all_score

        if sample_for_dis:
            root_neighbor = self.graph[root]
            k = min(int(0.1 * len(self.root_nodes)),
                    sample_num * len(root_neighbor))
            sampled_nodes = [
                i for i in random.sample(self.root_nodes, k=k)
                if i not in root_neighbor
            ]
            if len(sampled_nodes) == 0:
                return []

            prob = softmax(all_score[root, sampled_nodes])
            sample = np.random.choice(sampled_nodes, size=sample_num,
                                      p=prob).tolist()
            return sample
        else:
            trace = [root]
            node_select = root
            while True:
                node_neighbor = self.graph[node_select]
                prob = softmax(all_score[node_select, node_neighbor])
                node_select = np.random.choice(node_neighbor, size=1,
                                               p=prob)[0]
                trace.append(node_select)
                if len(trace) == sample_num:
                    return trace
Ejemplo n.º 2
0
    def find_top10(self, img_path):
        # img = []
        # for i in range(100):
        #     img.append(dataloader.loaddata(ground_img[i][0][0]))
        img = self.img_list
        
        # print('For {}'.format(img_path))
        input_img = dataloader.loaddata(img_path)
        img.append(input_img)
        test_emb = image_encoder.embed_shoe(img)

        simi, ans = self.similarity_class2(self.truth_table, test_emb)
        # print(ans)
        cand = [{"name": self.ground_img[index], 
                "similarity": -1*simi[index] } 
                    for index in ans[:6]]
        trans_simi = list(utils.softmax(np.array([info.get('similarity') for info in cand[3:]] ))) # length 6
        
        high_cand = [{"name": cand_info.get('name'), "similarity": str(round(simi,2)    ) } for 
                    cand_info, simi in zip(cand[:3], trans_simi)]
        low_cand = cand[3:]

        for cand_info in high_cand:
            cand_info['img'] = utils.crawl_img_url(cand_info.get('name'))

        cand = {"high": high_cand, "low": low_cand}
        return cand
Ejemplo n.º 3
0
def get_recs(g,
             h,
             model,
             embed_dim,
             k,
             user_ids,
             already_bought_dict,
             remove_already_bought=True,
             cuda=False,
             device=None,
             pred: str = 'cos',
             use_popularity: bool = False,
             weight_popularity=1):
    """
    Computes K recommendation for all users, given hidden states, the model and what they already bought.
    """
    if cuda:  # model is already in cuda?
        model = model.to(device)
    print('Computing recommendations on {} users, for {} items'.format(
        len(user_ids), g.num_nodes('item')))
    recs = {}
    for user in user_ids:
        user_emb = h['user'][user]
        already_bought = already_bought_dict[user]
        user_emb_rpt = torch.cat(g.num_nodes('item') * [user_emb]).reshape(
            -1, embed_dim)

        if pred == 'cos':
            cos = nn.CosineSimilarity(dim=1, eps=1e-6)
            ratings = cos(user_emb_rpt, h['item'])

        elif pred == 'nn':
            cat_embed = torch.cat((user_emb_rpt, h['item']), 1)
            ratings = model.pred_fn.layer_nn(cat_embed)

        else:
            raise KeyError(f'Prediction function {pred} not recognized.')

        ratings_formatted = ratings.cpu().detach().numpy().reshape(
            g.num_nodes('item'), )
        if use_popularity:
            softmax_ratings = softmax(ratings_formatted)
            popularity_scores = g.ndata['popularity']['item'].numpy().reshape(
                g.num_nodes('item'), )
            ratings_formatted = np.add(softmax_ratings,
                                       popularity_scores * weight_popularity)
        order = np.argsort(-ratings_formatted)
        if remove_already_bought:
            order = [item for item in order if item not in already_bought]
        rec = order[:k]
        recs[user] = rec
    return recs
Ejemplo n.º 4
0
    def sample(self, root, tree, sample_num, for_d):
        """ sample nodes from BFS-tree

        Args:
            root: int, root node
            tree: dict, BFS-tree
            sample_num: the number of required samples
            for_d: bool, whether the samples are used for the generator or the discriminator
        Returns:
            samples: list, the indices of the sampled nodes
            paths: list, paths from the root to the sampled nodes
        """

        all_score = self.sess.run(self.generator.all_score)
        samples = []
        paths = []
        n = 0

        while len(samples) < sample_num:
            current_node = root
            previous_node = -1
            paths.append([])
            is_root = True
            paths[n].append(current_node)
            while True:
                node_neighbor = tree[current_node][1:] if is_root else tree[
                    current_node]
                is_root = False
                if len(node_neighbor) == 0:  # the tree only has a root
                    return None, None
                if for_d:  # skip 1-hop nodes (positive samples)
                    if node_neighbor == [root]:
                        # in current version, None is returned for simplicity
                        return None, None
                    if root in node_neighbor:
                        node_neighbor.remove(root)
                relevance_probability = all_score[current_node, node_neighbor]
                relevance_probability = utils.softmax(relevance_probability)
                next_node = np.random.choice(
                    node_neighbor, size=1,
                    p=relevance_probability)[0]  # select next node
                paths[n].append(next_node)
                if next_node == previous_node:  # terminating condition
                    samples.append(current_node)
                    break
                previous_node = current_node
                current_node = next_node
            n = n + 1
        return samples, paths
Ejemplo n.º 5
0
    def sample(self, root, tree, sample_num, for_d):
        """从 BFS-tree 中采样节点
        
        Args:
            root: int, 根节点
            tree: dict, BFS-tree
            sample_num: 需要采样的数量
            for_d : bool, 样本是用在生成器还是判别器
        
        Return:
            samples: list,采样节点的索引
            paths: list, 从根节点到采样节点的路径
        """

        all_score = self.generator.all_score
        samples = []
        paths = []
        n = 0

        while len(samples) < sample_num:
            current_node = root
            previous_node = -1
            paths.append([])
            is_root = True
            paths[n].append(current_node)
            while True:
                node_neighbor = tree[current_node][1:] if is_root else tree[current_node]
                is_root = False
                if len(node_neighbor) == 0:   # 当树只有一个节点(根)时
                    return None, None
                if for_d: # 跳过单跳节点(正采样)
                    if node_neighbor == [root]:
                        # 在当前的版本 None 被返回
                        return None, None
                    if root in node_neighbor:
                        node_neighbr.remove(root)
                relevance_probability = all_score[current_node, node_neighbor]
                relevance_probability = utils.softmax(relevance_probability)
                next_node = np.random.choice(node_neighbor, size=1, p=relevance_probability)[0] # 选择下一个节点
                paths[n].append(next_node)
                if next_node == previous_node: # 结束条件
                    samples.append(current_node)
                    break
                previous_node = current_node
                current_node = next_node
            n = n + 1
        return samples, paths
def perform_predict(predictor,
                    loader,
                    model_weight,
                    label_weight,
                    weights,
                    save_weights=True):
    temp_weight = {}
    total_true_output = []
    total_pred_output = []
    total_pred_idx = []
    total_true_idx = []
    right_count = 0
    n_labels = np.zeros((3, )) + 1e-5
    n_right_labels = np.zeros((3, ))
    with torch.no_grad():
        for i, (input, target) in enumerate(tqdm(loader)):
            test_pred_tta = []
            for j in range(8):
                output = predictor(input.cuda())
                output = output.data.cpu().numpy()
                test_pred_tta.append(output)
            output_data = softmax(np.mean(test_pred_tta, axis=0))

            # output = predictor(input.cuda())
            # output_data = test_pred_tta.cpu().numpy()
            if save_weights:
                predict_idx = np.argmax(output, axis=-1)
                target_idx = target.cpu().numpy()

                total_pred_idx.extend(predict_idx)
                total_true_idx.extend(target_idx)
                for j in range(len(target_idx)):
                    # 统计预测中预测对的数量,相当于precision
                    n_labels[predict_idx[j]] += 1
                    # 统计真实中预测对的数量,相当于recall
                    # n_labels[target_idx[j]] += 1

                    if predict_idx[j] == target_idx[j]:
                        right_count += 1
                        n_right_labels[predict_idx[j]] += 1
                    total_true_output.append(target_idx[j])
                    total_pred_output.append(output_data[j])
            else:
                total_pred_output.extend(output_data)

    model_name = model.default_cfg['model_name'].split('-')[1]
    if save_weights:
        # model_weight[predictor.default_cfg['model_name']] = np.array([float(right_count) / len(total_true_output)])
        # label_weight[predictor.default_cfg['model_name']] = n_right_labels / n_labels
        #
        # temp_weight['model_weight'] = float(right_count) / len(total_true_output)
        # temp_weight['label_weight'] = list(n_right_labels / n_labels)
        # weights[predictor.default_cfg['model_name']] = temp_weight
        #
        # with open('./weights/fusion_weights_tta.json', 'w') as json_file:
        #     json.dump(weights, json_file, indent=4)

        with open("./txts/v-info-new.json", 'r', encoding="utf-8") as f:
            shape_dict = json.load(f)

        filenames = dataset.filenames()
        dets_info = {}
        class_2_index = {0: 'normal', 1: 'phone', 2: 'smoke'}

        probs = np.max(softmax(np.array(total_pred_output)), axis=-1)
        for i, filename in enumerate(filenames):
            name = filename.split('/')[-1].split('.')[0]
            dets_info[name] = [
                class_2_index[int(total_pred_idx[i])], probs[i],
                shape_dict[name][1], shape_dict[name][2]
            ]

        with open("fusions/fv.json", "w", encoding="utf-8") as f:
            json.dump(dets_info, f, cls=MyEncoder)
        accuracy = round(accuracy_score(total_true_idx, total_pred_idx), 4)

        test_map, ap_list = eval_map(detFolder="fusions/fv.json",
                                     gtFolder="txts/v-info-new.json",
                                     return_each_ap=True)
        print("Accuracy: %s, map: %.4f" % (accuracy, test_map))

        with open("weights/fusion_weights_map.json", 'w',
                  encoding="utf-8") as f:
            weights[predictor.default_cfg['model_name']] = {}
            weights[
                predictor.default_cfg['model_name']]["model_weight"] = test_map
            weights[
                predictor.default_cfg['model_name']]["label_weight"] = ap_list

            json.dump(weights, f, cls=MyEncoder, indent=2)

        model_weight[predictor.default_cfg['model_name']] = test_map
        label_weight[predictor.default_cfg['model_name']] = ap_list
    else:
        with open('./weights/fusion_weights_tta.json', 'r') as json_file:
            json_data = json.load(json_file)
        model_weight[predictor.default_cfg['model_name']] = np.array(
            [json_data[predictor.default_cfg['model_name']]['model_weight']])
        label_weight[predictor.default_cfg['model_name']] = np.array(
            [json_data[predictor.default_cfg['model_name']]['label_weight']])

    return total_pred_output, total_true_output
Ejemplo n.º 7
0
 def sample_taxo(self, data_size, update_ratio, sample_for_dis: bool):
     taxo_all_score = self.taxo_all_score
     if sample_for_dis:
         nodes = []
         cates = []
         labels = []
         while len(nodes) < data_size:
             node = random.choice(self.root_nodes)
             paths = self.nodeid2path.get(node, None)
             if paths is None:
                 continue
             true_category = self.nodeid2category[node]
             true_path = random.choice(paths)
             p = self.taxo_rootid
             fake_path = []
             for c in true_path:
                 siblings = [
                     i for i in self.taxo_parent2children[p]
                     if i not in true_category
                 ]
                 if not siblings:
                     true_path = true_path[:len(fake_path)]
                     break
                 prob = softmax(taxo_all_score[node, siblings])
                 category_select = np.random.choice(siblings, p=prob)
                 fake_path.append(category_select)
                 p = c
             n_pos, n_neg = len(true_path), len(fake_path)
             cates.extend(true_path)
             labels.extend([1] * n_pos)
             cates.extend(fake_path)
             labels.extend([0] * n_neg)
             nodes.extend([node] * (n_pos + n_neg))
         levels = [self.taxo_child2parents[c] for c in cates]
         return nodes, cates, labels, levels
     else:
         nodes = []
         cates = []
         while len(nodes) < data_size:
             node = random.choice(self.root_nodes)
             paths = self.nodeid2path.get(node, None)
             if paths is None:
                 continue
             true_category = self.nodeid2category[node]
             true_path = random.choice(paths)
             p = self.taxo_rootid
             for c in true_path:
                 siblings = [
                     i for i in self.taxo_parent2children[p]
                     if i not in true_category
                 ]
                 if not siblings:
                     break
                 if len(siblings) > 1:
                     prob = softmax(taxo_all_score[node, siblings])
                     siblings = np.random.choice(siblings, size=1,
                                                 p=prob).tolist()
                 siblings.append(c)
                 cates.extend(siblings)
                 nodes.extend([node] * len(siblings))
                 p = c
         levels = [self.taxo_child2parents[c] for c in cates]
         return nodes, cates, levels