Exemple #1
0
 def test_init(self):
     corpus = Corpus.from_file('deerwester')
     c2n = CorpusToNetwork(corpus)
     self.assertIsInstance(c2n.corpus, Corpus)
     self.assertEqual(len(c2n.corpus), 9)
     with self.assertRaises(Exception):
         c2n = CorpusToNetwork(corpus.domain)
Exemple #2
0
 def test_call_word(self):
     corpus = Corpus.from_file('deerwester')
     c2n = CorpusToNetwork(corpus)
     result = c2n(document_nodes=False,
                  window_size=1,
                  threshold=1,
                  freq_threshold=1)
     items = c2n.get_current_items(False)
     self.assertIsInstance(result, Network)
     self.assertIsInstance(items, Table)
     self.assertEqual(len(items), result.number_of_nodes())
     self.assertGreater(result.number_of_nodes(), len(corpus))
Exemple #3
0
 def test_empty(self):
     corpus = Corpus.from_file('deerwester')[:0]
     c2n = CorpusToNetwork(corpus)
     result = c2n(document_nodes=True,
                  window_size=1,
                  threshold=1,
                  freq_threshold=1)
     self.assertEqual(result.number_of_nodes(), 0)
     self.assertEqual(result.number_of_edges(), 0)
Exemple #4
0
 def test_cache(self):
     corpus = Corpus.from_file('deerwester')
     c2n = CorpusToNetwork(corpus)
     result1 = c2n(document_nodes=True,
                   window_size=1,
                   threshold=1,
                   freq_threshold=1)
     result2 = c2n(document_nodes=True,
                   window_size=1,
                   threshold=1,
                   freq_threshold=1)
     self.assertIs(result1, result2)
Exemple #5
0
    def set_data(self, data):
        if Network is None:
            self.Error.no_network_addon()
            return
        self.cancel()
        self._task_state = "running"
        self.button.setText("Stop")
        if not data:
            self._corpus_to_network = None
            self.clear_outputs()
            return

        self.corpus = data
        self._corpus_to_network = CorpusToNetwork(corpus=data)
        self.commit()
Exemple #6
0
def run(corpus_to_network: CorpusToNetwork, document_nodes: bool,
        threshold: int, window_size: int, freq_threshold: int,
        state: TaskState) -> Tuple[Network, Table]:
    def advance(progress):
        if state.is_interruption_requested():
            raise InterruptedError
        state.set_progress_value(progress)

    network = corpus_to_network(document_nodes=document_nodes,
                                window_size=window_size,
                                threshold=threshold,
                                freq_threshold=freq_threshold,
                                progress_callback=advance)
    items = corpus_to_network.get_current_items(document_nodes)

    return (network, items)
    def set_data(self, data):
        if Network is None:
            self.Error.no_network_addon()
            return
        self.cancel()
        self._task_state = "running"
        self.button.setText("Stop")
        if not data:
            self._corpus_to_network = None
            self.info.set_input_summary(self.info.NoInput)
            self.clear_outputs()
            return

        self.corpus = data
        summary = str(len(self.corpus))
        details = "Corpus with {} documents.".format(len(self.corpus))
        self.info.set_input_summary(summary, details)
        self._corpus_to_network = CorpusToNetwork(corpus=data)
        self.commit()
Exemple #8
0
 def test_params(self):
     corpus = Corpus.from_file('deerwester')
     c2n = CorpusToNetwork(corpus)
     result1 = c2n(document_nodes=False,
                   window_size=1,
                   threshold=1,
                   freq_threshold=1)
     result2 = c2n(document_nodes=False,
                   window_size=1,
                   threshold=100,
                   freq_threshold=1)
     self.assertGreater(result1.number_of_edges(),
                        result2.number_of_edges())
     result2 = c2n(document_nodes=False,
                   window_size=10,
                   threshold=1,
                   freq_threshold=1)
     self.assertLess(result1.number_of_edges(), result2.number_of_edges())
     result2 = c2n(document_nodes=False,
                   window_size=1,
                   threshold=1,
                   freq_threshold=100)
     self.assertGreater(result1.number_of_nodes(),
                        result2.number_of_nodes())