Example #1
0
    def add_edges(self, max_citations=50):

        """
        For each syllabus, register citation pairs as edges.

        Args:
            max_citations (int): Discard documents with > N citations.
        """

        # Aggregate the CNs.
        texts = (
            fn.array_agg(HLOM_Record.control_number)
            .coerce(False)
            .alias('texts')
        )

        # Select syllabi and cited CNs.
        documents = (
            HLOM_Citation
            .select(HLOM_Citation.document, texts)
            .join(HLOM_Record)
            .having(fn.count(HLOM_Record.id) <= max_citations)
            .distinct(HLOM_Citation.document)
            .group_by(HLOM_Citation.document)
        )

        for row in query_bar(documents):
            for cn1, cn2 in combinations(row.texts, 2):

                # If the edge exists, +1 the weight.
                if self.graph.has_edge(cn1, cn2):
                    self.graph[cn1][cn2]['weight'] += 1

                # Otherwise, initialize the edge.
                else: self.graph.add_edge(cn1, cn2, weight=1)
Example #2
0
    def add_edges(self, max_texts=20):
        """
        For each syllabus, register citation pairs as edges.

        Args:
            max_texts (int): Ignore docs with > than N citations.
        """

        text_ids = (fn.array_agg(Text.id).coerce(False).alias('text_ids'))

        docs = (Citation.select(Citation.document, text_ids).join(Text).having(
            fn.count(Text.id) <= max_texts).where(Text.display == True).where(
                Text.valid == True).group_by(Citation.document))

        for row in query_bar(docs):
            for tid1, tid2 in combinations(row.text_ids, 2):

                # If the edge exists, increment the weight.

                if self.graph.has_edge(tid1, tid2):
                    self.graph[tid1][tid2]['weight'] += 1

                # Otherwise, initialize the edge.

                else:
                    self.graph.add_edge(tid1, tid2, weight=1)
Example #3
0
    def add_edges(self):

        """
        For each syllabus, register citation pairs as edges.
        """

        text_ids = (
            fn.array_agg(Text.id)
            .coerce(False)
            .alias('text_ids')
        )

        docs = (
            Citation
            .select(Citation.document, text_ids)
            .join(Text)
            .where(Text.display==True)
            .where(Text.valid==True)
            .group_by(Citation.document)
        )

        for row in query_bar(docs):
            for tid1, tid2 in combinations(row.text_ids, 2):

                # If the edge exists, increment the weight.

                if self.graph.has_edge(tid1, tid2):
                    self.graph[tid1][tid2]['weight'] += 1

                # Otherwise, initialize the edge.

                else:
                    self.graph.add_edge(tid1, tid2, weight=1)