def _create_document(self, sender, recipient, date, keywords, files): doc = Document() self.archive.add_document(doc) def create_repo_file(filename): import os.path token = ArchiveToken(filename, doc.uuid) file_type = os.path.splitext(filename) return RepoFile(token, file_type) for file in files: repo_file = create_repo_file(file) doc.add_file(repo_file) self.archive.add_file(file, repo_file.token) doc.sender = sender doc.recipient = recipient doc.date = date doc.keywords = keywords return doc
if DOC_OPEN_TAG in line: # Create a Document object document = Document() elif DOC_NO_TAG in line: # Generate document internal id docno = re.search('(LA|RF)\d{6}-\d{4}', line).group() docno_list = docno.split('-') date = docno_list[0][2:] doc_id += 1 document.doc_id = doc_id document.docno = docno # Generate formatted date date_obj = time.strptime(date, '%m%d%y') formatted_date = time.strftime('%B %d, %Y', date_obj) document.date = formatted_date elif DOC_CLOSE_TAG in line: raw_document_string = "".join(raw_document) document.raw_document = raw_document_string # Insert into docno to id map doc_id_no[doc_id] = document.docno # Build document metadata build_doc_metadata(document) # Build in-memory inversion index build_inversion_index(doc_id, document) # Insert into directory as YY/MM/DD/NNNN.p