def add_edges(self, max_citations=50): """ For each syllabus, register citation pairs as edges. Args: max_citations (int): Discard documents with > N citations. """ # Aggregate the CNs. texts = ( fn.array_agg(HLOM_Record.control_number) .coerce(False) .alias('texts') ) # Select syllabi and cited CNs. documents = ( HLOM_Citation .select(HLOM_Citation.document, texts) .join(HLOM_Record) .having(fn.count(HLOM_Record.id) <= max_citations) .distinct(HLOM_Citation.document) .group_by(HLOM_Citation.document) ) for row in query_bar(documents): for cn1, cn2 in combinations(row.texts, 2): # If the edge exists, +1 the weight. if self.graph.has_edge(cn1, cn2): self.graph[cn1][cn2]['weight'] += 1 # Otherwise, initialize the edge. else: self.graph.add_edge(cn1, cn2, weight=1)
def add_edges(self, max_texts=20): """ For each syllabus, register citation pairs as edges. Args: max_texts (int): Ignore docs with > than N citations. """ text_ids = (fn.array_agg(Text.id).coerce(False).alias('text_ids')) docs = (Citation.select(Citation.document, text_ids).join(Text).having( fn.count(Text.id) <= max_texts).where(Text.display == True).where( Text.valid == True).group_by(Citation.document)) for row in query_bar(docs): for tid1, tid2 in combinations(row.text_ids, 2): # If the edge exists, increment the weight. if self.graph.has_edge(tid1, tid2): self.graph[tid1][tid2]['weight'] += 1 # Otherwise, initialize the edge. else: self.graph.add_edge(tid1, tid2, weight=1)
def add_edges(self): """ For each syllabus, register citation pairs as edges. """ text_ids = ( fn.array_agg(Text.id) .coerce(False) .alias('text_ids') ) docs = ( Citation .select(Citation.document, text_ids) .join(Text) .where(Text.display==True) .where(Text.valid==True) .group_by(Citation.document) ) for row in query_bar(docs): for tid1, tid2 in combinations(row.text_ids, 2): # If the edge exists, increment the weight. if self.graph.has_edge(tid1, tid2): self.graph[tid1][tid2]['weight'] += 1 # Otherwise, initialize the edge. else: self.graph.add_edge(tid1, tid2, weight=1)