def crawl(self, directory=None, collection=None, meta={}): directory = string_value(directory) if directory is None or not os.path.exists(directory): log.error("Invalid directory: %r", directory) return directory = os.path.abspath(os.path.normpath(directory)) collection = collection or directory collection = Collection.create({ 'foreign_id': 'directory:%s' % slugify(collection), 'label': collection }) db.session.commit() meta = self.make_meta(meta) meta.source_path = directory ingest_directory(collection.id, meta, directory)
def ingest(self, meta, local_path): # Work-around: try to unpack multi-part files by changing into # the directory containing the file. prev_cwd = os.getcwd() os.chdir(os.path.dirname(local_path)) temp_dir = make_tempdir(meta.file_name) try: log.info("Descending into package: %r", meta.file_name) self.unpack(meta, local_path, temp_dir) ingest_directory(self.collection_id, meta, temp_dir, base_path=meta.foreign_id, move=True) except rarfile.NeedFirstVolume: pass finally: remove_tempdir(temp_dir) os.chdir(prev_cwd)
def crawl(self, directory=None, foreign_id=None, meta={}): directory = string_value(directory) if directory is None or not os.path.exists(directory): log.error("Invalid directory: %r", directory) return directory = os.path.abspath(os.path.normpath(directory)) collection = None if foreign_id is None: foreign_id = 'directory:%s' % slugify(directory) collection = self.load_collection({ 'foreign_id': foreign_id, 'label': directory, 'managed': True }) db.session.commit() meta = self.make_meta(meta) meta.source_path = directory ingest_directory(collection.id, meta, directory)