def compute_scores_for_inference(self, clusters_mx, per_example_negs): # TODO: add description here args = self.args # get all of the unique examples examples = clusters_mx.data.tolist() examples.extend(flatten(per_example_negs.tolist())) examples = unique(examples) examples = list(filter(lambda x: x >= 0, examples)) # create dataset and dataloader dataset = InferenceEmbeddingDataset(args, examples, args.train_cache_dir) dataloader = InferenceEmbeddingDataLoader(args, dataset) # get the unique idxs and embeds for each idx idxs, embeds = self.get_embeddings(dataloader, evaluate=False) sparse_graph = None if get_rank() == 0: # create inverse index for mapping inverse_idxs = {v: k for k, v in enumerate(idxs)} ## make the list of pairs of dot products we need _row = clusters_mx.row # positives: local_pos_a, local_pos_b = np.where( np.triu(_row[np.newaxis, :] == _row[:, np.newaxis], k=1)) pos_a = clusters_mx.data[local_pos_a] pos_b = clusters_mx.data[local_pos_b] # negatives: local_neg_a = np.tile( np.arange(per_example_negs.shape[0])[:, np.newaxis], (1, per_example_negs.shape[1])).flatten() neg_a = clusters_mx.data[local_neg_a] neg_b = per_example_negs.flatten() neg_mask = (neg_b != -1) neg_a = neg_a[neg_mask] neg_b = neg_b[neg_mask] # create subset of the sparse graph we care about a = np.concatenate((pos_a, neg_a), axis=0) b = np.concatenate((pos_b, neg_b), axis=0) edges = list(zip(a, b)) affinities = [ np.dot(embeds[inverse_idxs[i]], embeds[inverse_idxs[j]]) for i, j in edges ] # convert to coo_matrix edges = np.asarray(edges).T affinities = np.asarray(affinities) _sparse_num = np.max(edges) + 1 sparse_graph = coo_matrix((affinities, edges), shape=(_sparse_num, _sparse_num)) synchronize() return sparse_graph
async def _set_actions(self, ctx, query, flags, *, color, default_op): flags = unique(flags) reduced = reduce(operator.or_, flags) default = default_op(reduced) channel_id, events = await ctx.db.fetchrow(query, ctx.guild.id, reduced.value, default) enabled_flags = ', '.join(flag.name for flag in ActionFlag if events & flag) embed = (discord.Embed(description=', '.join(flag.name for flag in ActionFlag), color=color) .set_author(name=f'Successfully {ctx.command.name}d the following actions') .add_field(name='The following mod actions will now be logged:', value=enabled_flags, inline=False)) await self._check_modlog_channel(ctx, channel_id, embed=embed)
def create_val_dataloader(self): args = self.args # load and cache examples and get the metadata for the dataset self.load_and_cache_examples(split='val', evaluate=True) if args.available_entities in ['candidates_only', 'knn_candidates']: examples = flatten( [[k] + v for k, v in self.val_metadata.midx2cand.items()]) elif args.available_entities == 'open_domain': examples = list(self.val_metadata.idx2uid.keys()) else: raise ValueError('Invalid available_entities') examples = unique(examples) self.val_dataset = InferenceEmbeddingDataset(args, examples, args.val_cache_dir) self.val_dataloader = InferenceEmbeddingDataLoader( args, self.val_dataset)
def _build_temp_sparse_graph(self, clusters_mx, per_example_negs): args = self.args # get all of the unique examples examples = clusters_mx.data.tolist() examples.extend(flatten(per_example_negs.tolist())) examples = unique(examples) examples = list(filter(lambda x: x >= 0, examples)) sparse_graph = None if get_rank() == 0: ## make the list of pairs of dot products we need _row = clusters_mx.row # positives: local_pos_a, local_pos_b = np.where( np.triu(_row[np.newaxis, :] == _row[:, np.newaxis], k=1)) pos_a = clusters_mx.data[local_pos_a] pos_b = clusters_mx.data[local_pos_b] # negatives: local_neg_a = np.tile( np.arange(per_example_negs.shape[0])[:, np.newaxis], (1, per_example_negs.shape[1])).flatten() neg_a = clusters_mx.data[local_neg_a] neg_b = per_example_negs.flatten() neg_mask = (neg_b != -1) neg_a = neg_a[neg_mask] neg_b = neg_b[neg_mask] # create subset of the sparse graph we care about a = np.concatenate((pos_a, neg_a), axis=0) b = np.concatenate((pos_b, neg_b), axis=0) edges = list(zip(a, b)) affinities = [0.0 for i, j in edges] # convert to coo_matrix edges = np.asarray(edges).T affinities = np.asarray(affinities) _sparse_num = np.max(edges) + 1 sparse_graph = coo_matrix((affinities, edges), shape=(_sparse_num, _sparse_num)) synchronize() return sparse_graph
async def _bulk_set_permissions(connection, guild_id, name, *entities, whitelist): ids = tuple(unique(entity.id for entity in entities)) # F**k this query = """ DELETE FROM permissions WHERE guild_id = $1 AND name = $2 AND snowflake = ANY($3::BIGINT[]); """ await connection.execute(query, guild_id, name, ids) if not whitelist: # Permissions shall not be created during a reset return columns = ('guild_id', 'name', 'snowflake', 'whitelist') to_insert = [(guild_id, name, id, whitelist) for id in ids] await connection.copy_records_to_table('permissions', columns=columns, records=to_insert)
def build_sparse_affinity_graph(args, midxs, example_dir, metadata, knn_index, sub_trainer, build_coref_graph=False, build_linking_graph=False): assert build_coref_graph or build_linking_graph coref_graph = None linking_graph = None if get_rank() == 0: mention_knn = None if build_coref_graph or args.available_entities == 'knn_candidates': ## get all of the mention kNN #mention_knn = knn_index.get_knn_limited_index( # midxs, include_index_idxs=midxs, k=args.k+1 #) #mention_knn = mention_knn[:,1:] midx2doc = {} doc2midx = defaultdict(list) for doc_id, wdoc_clusters in metadata.wdoc_clusters.items(): doc2midx[doc_id] = flatten(list(wdoc_clusters.values())) for midx in doc2midx[doc_id]: midx2doc[midx] = doc_id mention_knn = [] for midx in midxs: mention_knn.append([ x for x in doc2midx[midx2doc[midx]] if x != midx and x >= args.num_entities ]) if build_coref_graph: # list of edges for sparse graph we will build coref_graph_edges = [] if get_rank() == 0: # add mention-mention edges to list coref_graph_edges.extend([ tuple(sorted((a, b))) for a, l in zip(midxs, mention_knn) for b in l if a != b ]) coref_graph_edges = unique(coref_graph_edges) # broadcast edges to all processes to compute affinities coref_graph_edges = broadcast(coref_graph_edges, src=0) affinities = sub_trainer.get_edge_affinities(coref_graph_edges, example_dir, knn_index) # affinities are gathered to only rank 0 process if get_rank() == 0: # build the graph coref_graph_edges = np.asarray(coref_graph_edges).T _sparse_num = metadata.num_mentions + metadata.num_entities coref_graph = coo_matrix((affinities, coref_graph_edges), shape=(_sparse_num, _sparse_num)) if build_linking_graph: # list of edges for sparse graph we will build linking_graph_edges = [] if get_rank() == 0: # get mention-entity edges if args.available_entities == 'candidates_only': for midx in midxs: candidates = metadata.midx2cand.get(midx, []) if len(candidates) > 0: linking_graph_edges.extend([ tuple(sorted((midx, eidx))) for eidx in candidates ]) elif args.available_entities == 'knn_candidates': # get all of the mention kNN if args.clustering_domain == 'within_doc': for midx in midxs: candidates = metadata.midx2cand.get(midx, []) if len(candidates) > 0: linking_graph_edges.extend([ tuple(sorted((midx, eidx))) for eidx in candidates ]) elif args.clustering_domain == 'cross_doc': raise NotImplementedError('unsupported clustering_domain') else: raise ValueError('unsupported clustering_domain') else: # 'open_domain' # get all of the mention kNN cand_gen_knn = knn_index.get_knn_limited_index( midxs, include_index_idxs=np.arange(metadata.num_entities), k=args.k) linking_graph_edges.extend([ tuple(sorted((a, b))) for a, l in zip(midxs, cand_gen_knn) for b in l ]) # get all of the edges linking_graph_edges = unique(linking_graph_edges) # broadcast edges to all processes to compute affinities linking_graph_edges = broadcast(linking_graph_edges, src=0) affinities = sub_trainer.get_edge_affinities(linking_graph_edges, example_dir, knn_index) # affinities are gathered to only rank 0 process if get_rank() == 0: # build the graph linking_graph_edges = np.asarray(linking_graph_edges).T _sparse_num = metadata.num_mentions + metadata.num_entities linking_graph = coo_matrix((affinities, linking_graph_edges), shape=(_sparse_num, _sparse_num)) if args.available_entities == 'knn_candidates': assert args.clustering_domain == 'within_doc' # pick expansion edges based on coref knn mentions expansion_factor = 5 expansion_edges = [] if get_rank() == 0: def _get_top_k(midx, graph, k): row_entries = graph.getrow(midx).tocoo() col_entries = graph.getcol(midx).tocoo() entries = zip( np.concatenate((row_entries.col, col_entries.row), axis=0), np.concatenate((row_entries.data, col_entries.data), axis=0)) entries = list(entries) if len(entries) == 0: return [] sorted_data = sorted(entries, key=lambda x: x[1], reverse=True) top_k, _ = zip(*sorted_data[:k]) return top_k top_k_coref = lambda i: _get_top_k(i, coref_graph, expansion_factor) top_k_linking = lambda i: _get_top_k(i, linking_graph, expansion_factor) for midx in midxs: for coref_midx in top_k_coref(midx): expansion_edges.extend([ tuple(sorted((x, midx))) for x in top_k_linking(coref_midx) if x not in metadata.midx2cand[midx] ]) expansion_edges = unique(expansion_edges) # score the expanded candidate edges expansion_edges = broadcast(expansion_edges, src=0) expansion_affinities = sub_trainer.get_edge_affinities( expansion_edges, example_dir, knn_index) if get_rank() == 0: # build the graph expansion_edges = np.asarray(expansion_edges).T linking_graph_edges = np.concatenate( (linking_graph_edges, expansion_edges), axis=1) affinities += expansion_affinities _sparse_num = metadata.num_mentions + metadata.num_entities linking_graph = coo_matrix((affinities, linking_graph_edges), shape=(_sparse_num, _sparse_num)) return coref_graph, linking_graph