Python Document Examples

Programming Language: Python

Namespace/Package Name: bibim.document.document

Class/Type: Document

Examples at hotexamples.com: 6

Python Document - 6 examples found. These are the top rated real world Python examples of bibim.document.document.Document extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Document(3)

content(1)

get_metadata_field(1)

set_metadata_field(1)

Example #1

Show file

File: extraction.py Project: Alex-Linhares/bibtexIndexMaker

 def extract(self, input_file):
     input_file = self._check_input_file(input_file)
     
     document = Document()
     file = open(input_file)
     document.content = file.read()
     file.close()
     
     return document

Example #2

Show file

class TestPDFTextExtractor(unittest.TestCase):
    def setUp(self):
        self.document = Document()

    def tearDown(self):
        pass

    def test_metadata_fields(self):
        self.document.set_metadata_field('Name', 'Document name')
        self.failUnless(
            self.document.get_metadata_field('Name') == 'Document name')

    def test_available_metadata(self):
        self.document.set_metadata_field('Name', 'Document name')
        self.document.set_metadata_field('CreationDate', 'Today')
        fields = self.document.available_metadata
        self.failUnless(len(fields) == 2)
        self.failUnless(fields.count('Name') == 1)
        self.failUnless(fields.count('CreationDate') == 1)

    def test_content(self):
        self.document.content = "Some text content"
        self.failUnless(self.document.content == "Some text content")

Example #3

Show file

File: test_document.py Project: rxuriguera/bibtexIndexMaker

class TestPDFTextExtractor(unittest.TestCase):

    def setUp(self):
        self.document = Document()
        
    def tearDown(self):
        pass

    def test_metadata_fields(self):
        self.document.set_metadata_field('Name', 'Document name')
        self.failUnless(self.document.get_metadata_field('Name') == 
                        'Document name')
    
    def test_available_metadata(self):
        self.document.set_metadata_field('Name', 'Document name')
        self.document.set_metadata_field('CreationDate', 'Today')
        fields = self.document.available_metadata
        self.failUnless(len(fields) == 2)
        self.failUnless(fields.count('Name') == 1)
        self.failUnless(fields.count('CreationDate') == 1)

    def test_content(self):
        self.document.content = "Some text content"
        self.failUnless(self.document.content == "Some text content")

Example #4

Show file

File: extraction.py Project: Alex-Linhares/bibtexIndexMaker

    def extract(self, input_file):
        input_file = self._check_input_file(input_file)
        # Extraction command and its options. They may be parametrized in the
        # future
        command = [self._pdf_extraction_tool, '-q', '-f', '1', '-l', '2',
                   '-enc', 'ASCII7', '-htmlmeta', input_file, '-']
        try:
            pop = subprocess.Popen(command, stdout=subprocess.PIPE)
        except subprocess.CalledProcessError as cpe:
            log.error ('Error executing PDF text extraction tool. Return code: ' #@UndefinedVariable
                   + repr(cpe.returncode))
        except OSError:
            log.error ('PDF extraction tool not found') #@UndefinedVariable
        
        stdout = pop.communicate()[0]
        if not stdout:
            raise ExtractionError('Corrupted file')
        
        parser = BeautifulSoup(stdout)
        document = Document()
        self._extract_metadata(parser, document)
        self._extract_content(parser, document)

        return document

Example #5

Show file

File: test_document.py Project: rxuriguera/bibtexIndexMaker

 def setUp(self):
     self.document = Document()

Example #6

Show file

 def setUp(self):
     self.document = Document()