Beispiel #1
0
Datei: db.py Projekt: imclab/iepy
    def create_document(self, identifier, text, metadata=None):
        """Creates a new Document with text ready to be inserted on the
        information extraction pipeline (ie, ready to be tokenized, POS Tagged,
        etc).

        Identifier must be a unique value that will be used for distinguishing
        one document from another. If no title is given, will be inferred from
        the identifier.
        Metadata is a dictionary where you can put whatever you want to persist
        with your document. IEPY will do nothing with it except ensuring that
        such information will be preserved.
        """
        if metadata is None:
            metadata = {}
        doc = IEDocument(human_identifier=identifier,
                         text=text,
                         metadata=metadata)
        doc.save()
        return doc
Beispiel #2
0
 def setUp(self):
     patcher = mock.patch.object(IEDocument, 'save')
     self.mock_save = patcher.start()
     self.addCleanup(patcher.stop)
     self.doc = IEDocument(metadata={'raw_text': 'hello world'})