def ingest(self, meta, local_path): temp_dir = make_tempdir() try: log.info("Unpacking bundle: %r", meta.file_name) with ZipFile(local_path, 'r') as zf: zf.extractall(temp_dir) self.ingest_directory(meta, temp_dir) finally: remove_tempdir(temp_dir)
def ingest(self, meta, local_path): # Work-around: try to unpack multi-part files by changing into # the directory containing the file. prev_cwd = os.getcwd() os.chdir(os.path.dirname(local_path)) temp_dir = make_tempdir(meta.file_name) try: log.info("Descending into package: %r", meta.file_name) self.unpack(meta, local_path, temp_dir) ingest_directory(self.collection_id, meta, temp_dir, base_path=meta.foreign_id, move=True) except rarfile.NeedFirstVolume: pass finally: remove_tempdir(temp_dir) os.chdir(prev_cwd)
def ingest(self, meta, local_path): work_dir = make_tempdir() try: bin_path = os.environ.get('READPST_BIN', 'readpst') args = [bin_path, '-D', '-e', '-o', work_dir, local_path] log.debug('Converting Outlook PST file: %r', ' '.join(args)) subprocess.call(args) for (dirpath, dirnames, filenames) in os.walk(work_dir): reldir = os.path.relpath(dirpath, work_dir) for filename in filenames: child = meta.make_child() for kw in reldir.split(os.path.sep): child.add_keyword(kw) child.foreign_id = os.path.join(meta.foreign_id, reldir, filename) ingest_file(self.source_id, meta, os.path.join(dirpath, filename), move=True) finally: remove_tempdir(work_dir)
def emit_bundle(self, meta, directory, files): bundle = meta.make_child() if meta.foreign_id: bundle.source_path = os.path.join(meta.foreign_id, self.BUNDLE_EXTENSION) bundle.mime_type = self.BUNDLE_MIME bundle.file_name = '%s.%s' % (meta.file_name, self.BUNDLE_EXTENSION) log.info("Creating bundle: %r", bundle.file_name) temp_dir = make_tempdir() try: bundle_path = os.path.join(temp_dir, bundle.file_name) with ZipFile(bundle_path, 'w', ZIP_STORED) as zf: for file_name in files: file_path = os.path.join(directory, file_name) zf.write(file_path, file_name) ingest_file(self.collection_id, bundle, bundle_path, move=True) finally: remove_tempdir(temp_dir)
def ingest(self, meta, local_path): work_dir = make_tempdir() try: bin_path = os.environ.get('READPST_BIN', 'readpst') args = [bin_path, '-D', '-e', '-o', work_dir, local_path] log.debug('Converting Outlook PST file: %r', ' '.join(args)) subprocess.call(args) for (dirpath, dirnames, filenames) in os.walk(work_dir): reldir = os.path.relpath(dirpath, work_dir) for filename in filenames: child = meta.make_child() for kw in reldir.split(os.path.sep): child.add_keyword(kw) child.foreign_id = os.path.join(meta.foreign_id, reldir, filename) ingest_file(self.collection_id, meta, os.path.join(dirpath, filename), move=True) finally: remove_tempdir(work_dir)