コード例 #1
0
    def _create_document(self, sender, recipient, date, keywords, files):
        doc = Document()

        self.archive.add_document(doc)

        def create_repo_file(filename):
            import os.path
            token = ArchiveToken(filename, doc.uuid)
            file_type = os.path.splitext(filename)
            return RepoFile(token, file_type)

        for file in files:
            repo_file = create_repo_file(file)

            doc.add_file(repo_file)
            self.archive.add_file(file, repo_file.token)

        doc.sender = sender
        doc.recipient = recipient
        doc.date = date
        doc.keywords = keywords

        return doc
コード例 #2
0
ファイル: repository.py プロジェクト: scottgw/paperless
    def _create_document(self, sender, recipient, date, keywords, files):
        doc = Document()

        self.archive.add_document(doc)
        
        def create_repo_file(filename):
            import os.path
            token = ArchiveToken(filename, doc.uuid)
            file_type = os.path.splitext(filename)
            return RepoFile(token, file_type)

        for file in files:
            repo_file = create_repo_file(file)

            doc.add_file(repo_file)
            self.archive.add_file(file, repo_file.token)

        doc.sender = sender
        doc.recipient = recipient
        doc.date = date
        doc.keywords = keywords

        return doc
コード例 #3
0
ファイル: indexEngine.py プロジェクト: rfarmaha/MSCI_541
        if DOC_OPEN_TAG in line:
            # Create a Document object
            document = Document()
        elif DOC_NO_TAG in line:
            # Generate document internal id
            docno = re.search('(LA|RF)\d{6}-\d{4}', line).group()
            docno_list = docno.split('-')
            date = docno_list[0][2:]
            doc_id += 1
            document.doc_id = doc_id
            document.docno = docno

            # Generate formatted date
            date_obj = time.strptime(date, '%m%d%y')
            formatted_date = time.strftime('%B %d, %Y', date_obj)
            document.date = formatted_date

        elif DOC_CLOSE_TAG in line:
            raw_document_string = "".join(raw_document)
            document.raw_document = raw_document_string

            # Insert into docno to id map
            doc_id_no[doc_id] = document.docno

            # Build document metadata
            build_doc_metadata(document)

            # Build in-memory inversion index
            build_inversion_index(doc_id, document)

            # Insert into directory as YY/MM/DD/NNNN.p