Python Digester Beispiele

Programmiersprache: Python

Namespace / Paketname: argos.core.digester

Klasse / Typ: Digester

Beispiele auf hotexamples.com: 6

Python Digester - 6 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die argos.core.digester.Digester, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

Digester(1)

__init__(1)

iterate(1)

Beispiel #1

Datei anzeigen

    def __init__(self,
                 file,
                 namespace=NAMESPACE,
                 db=DATABASE,
                 url=None,
                 silent=True):
        """
        Initialize the WikiDigester with a file and a namespace.

        Args:
            | file (str)        -- path to XML file (or bzipped XML) to digest.
            | namespace (str)   -- namespace of the file. Defaults to MediaWiki namespace.
            | db (str)          -- the name of the database to save to.
            | url (str)         -- the url from where the dump can be fetched.
            | silent (bool)     -- whether or not to send an email upon digestion completion.
        """

        # Python 2.7 support.
        try:
            super().__init__(file, namespace)
        except TypeError:
            Digester.__init__(self, file, namespace)

        self.database = db
        self._db = None
        self.url = url
        self.silent = silent

        # Keep track of number of docs.
        # Necessary for performing TF-IDF processing.
        self.num_docs = 0

Beispiel #2

Datei anzeigen

Datei: wikidigester.py Projekt: keho98/argos

    def __init__(self, file, namespace=NAMESPACE, db=DATABASE, url=None, silent=True):
        """
        Initialize the WikiDigester with a file and a namespace.

        Args:
            | file (str)        -- path to XML file (or bzipped XML) to digest.
            | namespace (str)   -- namespace of the file. Defaults to MediaWiki namespace.
            | db (str)          -- the name of the database to save to.
            | url (str)         -- the url from where the dump can be fetched.
            | silent (bool)     -- whether or not to send an email upon digestion completion.
        """

        # Python 2.7 support.
        try:
            super().__init__(file, namespace)
        except TypeError:
            Digester.__init__(self, file, namespace)

        self.database = db
        self._db = None
        self.url = url
        self.silent = silent

        # Keep track of number of docs.
        # Necessary for performing TF-IDF processing.
        self.num_docs = 0

Beispiel #3

Datei anzeigen

Datei: digester_test.py Projekt: keho98/argos

class DigesterTest(unittest.TestCase):
    def setUp(self):
        self.d = Digester('tests/data/article.xml', 'http://www.mediawiki.org/xml/export-0.8')

    def tearDown(self):
        self.d = None

    def test_instance(self):
        self.assertIsInstance(self.d, Digester)

    def test_iterate(self):
        for page in self.d.iterate('page'):
            self.assertIsNotNone(page)

    def test_iterate_bz2(self):
        self.d.file = 'tests/data/article.xml.bz2'
        for page in self.d.iterate('page'):
            self.assertIsNotNone(page)

Beispiel #4

Datei anzeigen

class DigesterTest(unittest.TestCase):
    def setUp(self):
        self.d = Digester('tests/data/article.xml', 'http://www.mediawiki.org/xml/export-0.8')

    def tearDown(self):
        self.d = None

    def test_instance(self):
        self.assertIsInstance(self.d, Digester)

    def test_iterate(self):
        for page in self.d.iterate('page'):
            self.assertIsNotNone(page)

    def test_iterate_bz2(self):
        self.d.file = 'tests/data/article.xml.bz2'
        for page in self.d.iterate('page'):
            self.assertIsNotNone(page)

Beispiel #5

Datei anzeigen

Datei: digester_test.py Projekt: keho98/argos

 def setUp(self):
     self.d = Digester('tests/data/article.xml', 'http://www.mediawiki.org/xml/export-0.8')

Beispiel #6

Datei anzeigen

 def setUp(self):
     self.d = Digester('tests/data/article.xml', 'http://www.mediawiki.org/xml/export-0.8')