def yield_test_questions_K_edges(self, resampled=False, K=1, subset=False, expand_outfit=False): """ Yields questions, each of them with their own adj matrix. Each node on the question will be expanded to create K edges, so the adj matrix will have K*N edges. Also, the edges between the nodes of the outfit will be also present (except for the correct choice edges). The method to get this edges will be BFS. Args: resampled: if True, use the resampled version K: number of edges to expand for each question node subset: if true, use only a subset of the outfit as the query, and use the rest as links to the choices. Returns: yields questions """ assert K >= 0 from utils import Graph # each question consists on N*4 edges to predict # self.questions is a list of questions with N elements and 4 possible choices (answers) questions = self.questions if not resampled else self.questions_resampled n_nodes = self.test_adj.shape[0] for question in questions: outfit_ids = [] choices_ids = [] gt = [] valid = [] # keep only a subset of the outfit if subset: outfit_subset = np.random.choice(question[0], 3, replace=False) else: outfit_subset = question[0] for index in outfit_subset: # indexes of outfit nodes i = 0 for index_answer in question[ 1]: # indexes of possible choices answers outfit_ids.append(index) choices_ids.append(index_answer) gt.append( int(i == 0)) # the correct connection is the first # a link is valid if the candidate item is from the same category as the missing item valid.append(int(question[2][i] == question[3])) i += 1 # question adj with only the outfit edges question_adj = sp.csr_matrix((n_nodes, n_nodes)) question_adj = question_adj.tolil() if not expand_outfit: for j, u in enumerate(outfit_subset[:-1]): for v in outfit_subset[j + 1:]: question_adj[u, v] = 1 question_adj[v, u] = 1 if K > 0: # the K edges that will be sampled from each not will not belong to the outfit, and should not be the query edges, so remove them available_adj = self.test_adj.copy() available_adj = available_adj.tolil() for j, u in enumerate(question[0][:-1]): for v in question[0][j + 1:]: available_adj[u, v] = 0 available_adj[v, u] = 0 if expand_outfit: # activate intra-outfit edges for j, u in enumerate(outfit_subset[:-1]): for v in outfit_subset[j + 1:]: available_adj[u, v] = 1 available_adj[v, u] = 1 for u, v in zip(outfit_ids, choices_ids): available_adj[u, v] = 0 available_adj[v, u] = 0 available_adj = available_adj.tocsr() available_adj.eliminate_zeros() G = Graph(available_adj) extra_edges = [] # now fill the adj matrix with the expanded edges for each node (only for the choices) nodes_to_expand = choices_ids[:4] if expand_outfit: # expand the outfit items as well nodes_to_expand.extend(outfit_subset) for node in nodes_to_expand: edges = G.run_K_BFS(node, K) for edge in edges: u, v = edge question_adj[u, v] = 1 question_adj[v, u] = 1 extra_edges.append(edge) question_adj = question_adj.tocsr() yield question_adj, np.array(outfit_ids), np.array( choices_ids), np.array(gt), np.array(valid)
def test_compatibility(args): args = namedtuple("Args", args.keys())(*args.values()) load_from = args.load_from config_file = load_from + '/results.json' log_file = load_from + '/log.json' with open(config_file) as f: config = json.load(f) with open(log_file) as f: log = json.load(f) # Dataloader DATASET = config['dataset'] if DATASET == 'polyvore': # load dataset dl = DataLoaderPolyvore() orig_train_features, adj_train, train_labels, train_r_indices, train_c_indices = dl.get_phase( 'train') full_train_adj = dl.train_adj orig_val_features, adj_val, val_labels, val_r_indices, val_c_indices = dl.get_phase( 'valid') orig_test_features, adj_test, test_labels, test_r_indices, test_c_indices = dl.get_phase( 'test') full_test_adj = dl.test_adj dl.setup_test_compatibility(resampled=args.resampled) elif DATASET == 'ssense': dl = DataLoaderFashionGen() orig_train_features, adj_train, train_labels, train_r_indices, train_c_indices = dl.get_phase( 'train') orig_val_features, adj_val, val_labels, val_r_indices, val_c_indices = dl.get_phase( 'valid') orig_test_features, adj_test, test_labels, test_r_indices, test_c_indices = dl.get_phase( 'test') adj_q, q_r_indices, q_c_indices, q_labels, q_ids, q_valid = dl.get_test_questions( ) full_train_adj = dl.train_adj full_test_adj = dl.test_adj dl.setup_test_compatibility(resampled=args.resampled) else: raise NotImplementedError( 'A data loader for dataset {} does not exist'.format(DATASET)) NUMCLASSES = 2 BN_AS_TRAIN = False ADJ_SELF_CONNECTIONS = True def norm_adj(adj_to_norm): return normalize_nonsym_adj(adj_to_norm) train_features, mean, std = dl.normalize_features(orig_train_features, get_moments=True) val_features = dl.normalize_features(orig_val_features, mean=mean, std=std) test_features = dl.normalize_features(orig_test_features, mean=mean, std=std) train_support = get_degree_supports(adj_train, config['degree'], adj_self_con=ADJ_SELF_CONNECTIONS) val_support = get_degree_supports(adj_val, config['degree'], adj_self_con=ADJ_SELF_CONNECTIONS) test_support = get_degree_supports(adj_test, config['degree'], adj_self_con=ADJ_SELF_CONNECTIONS) for i in range(1, len(train_support)): train_support[i] = norm_adj(train_support[i]) val_support[i] = norm_adj(val_support[i]) test_support[i] = norm_adj(test_support[i]) num_support = len(train_support) placeholders = { 'row_indices': tf.compat.v1.placeholder(tf.int32, shape=(None, )), 'col_indices': tf.compat.v1.placeholder(tf.int32, shape=(None, )), 'dropout': tf.compat.v1.placeholder_with_default(0., shape=()), 'weight_decay': tf.compat.v1.placeholder_with_default(0., shape=()), 'is_train': tf.compat.v1.placeholder_with_default(True, shape=()), 'support': [ tf.compat.v1.sparse_placeholder(tf.float32, shape=(None, None)) for sup in range(num_support) ], 'node_features': tf.compat.v1.placeholder(tf.float32, shape=(None, None)), 'labels': tf.compat.v1.placeholder(tf.float32, shape=(None, )) } model = CompatibilityGAE(placeholders, input_dim=train_features.shape[1], num_classes=NUMCLASSES, num_support=num_support, hidden=config['hidden'], learning_rate=config['learning_rate'], logging=True, batch_norm=config['batch_norm']) # Construct feed dicts for train, val and test phases train_feed_dict = construct_feed_dict(placeholders, train_features, train_support, train_labels, train_r_indices, train_c_indices, config['dropout']) val_feed_dict = construct_feed_dict(placeholders, val_features, val_support, val_labels, val_r_indices, val_c_indices, 0., is_train=BN_AS_TRAIN) test_feed_dict = construct_feed_dict(placeholders, test_features, test_support, test_labels, test_r_indices, test_c_indices, 0., is_train=BN_AS_TRAIN) # Add ops to save and restore all the variables. saver = tf.compat.v1.train.Saver() def eval(): # use this as a control value, if the model is ok, the value will be the same as in log val_avg_loss, val_acc, conf, pred = sess.run( [model.loss, model.accuracy, model.confmat, model.predict()], feed_dict=val_feed_dict) print("val_loss=", "{:.5f}".format(val_avg_loss), "val_acc=", "{:.5f}".format(val_acc)) with tf.compat.v1.Session() as sess: saver.restore(sess, load_from + '/' + 'best_epoch.ckpt') count = 0 preds = [] labels = [] # evaluate the the model for accuracy prediction eval() prob_act = tf.nn.sigmoid K = args.k for outfit in dl.comp_outfits: before_item = time.time() items, score = outfit num_new = test_features.shape[0] new_adj = sp.csr_matrix((num_new, num_new)) # no connections if args.k > 0: # add edges to the adj matrix available_adj = dl.test_adj.copy() available_adj = available_adj.tolil() i = 0 for idx_from in items[:-1]: for idx_to in items[i + 1:]: # remove outfit edges, they won't be expanded available_adj[idx_to, idx_from] = 0 available_adj[idx_from, idx_to] = 0 i += 1 available_adj = available_adj.tocsr() available_adj.eliminate_zeros() if args.subset: # use only a subset (of size 3) of the outfit items = np.random.choice(items, 3) new_features = test_features # predict edges between the items query_r = [] query_c = [] i = 0 item_indexes = items for idx_from in item_indexes[:-1]: for idx_to in item_indexes[i + 1:]: query_r.append(idx_from) query_c.append(idx_to) i += 1 if args.k > 0: G = Graph(available_adj) nodes_to_expand = np.unique(items) for node in nodes_to_expand: edges = G.run_K_BFS(node, K) for edge in edges: u, v = edge new_adj[u, v] = 1 new_adj[v, u] = 1 query_r = np.array(query_r) query_c = np.array(query_c) new_adj = new_adj.tocsr() new_support = get_degree_supports( new_adj, config['degree'], adj_self_con=ADJ_SELF_CONNECTIONS, verbose=False) for i in range(1, len(new_support)): new_support[i] = norm_adj(new_support[i]) new_support = [sparse_to_tuple(sup) for sup in new_support] new_feed_dict = construct_feed_dict(placeholders, new_features, new_support, train_labels, query_r, query_c, 0., is_train=BN_AS_TRAIN) pred = sess.run(prob_act(model.outputs), feed_dict=new_feed_dict) predicted_score = pred.mean() print("[{}] Mean scores between outfit: {:.4f}, label: {}".format( count, predicted_score, score)) # TODO: remove this print print("Total Elapsed: {:.4f}".format(time.time() - before_item)) count += 1 preds.append(predicted_score) labels.append(score) preds = np.array(preds) labels = np.array(labels) AUC = compute_auc(preds, labels) # use this as a control value, if the model is ok, the value will be the same as in log eval() print('The AUC compat score is: {}'.format(AUC)) print('Best val score saved in log: {}'.format(config['best_val_score'])) print('Last val score saved in log: {}'.format(log['val']['acc'][-1])) print("mean positive prediction: {}".format( preds[labels.astype(bool)].mean())) print("mean negative prediction: {}".format(preds[np.logical_not( labels.astype(bool))].mean()))