def ingest_upload(collection_id): collection = obj_or_404(Collection.by_id(collection_id)) request.authz.require(request.authz.collection_write(collection.id)) log_event(request) crawler_run = make_textid() try: meta = json.loads(request.form.get('meta', '{}')) except Exception as ex: raise BadRequest(unicode(ex)) documents = [] for storage in request.files.values(): sec_fn = os.path.join(upload_folder, secure_filename(storage.filename)) storage.save(sec_fn) content_hash = checksum(sec_fn) document = Document.by_keys(collection=collection, content_hash=content_hash) document.crawler = 'user_upload:%s' % request.authz.role.id document.crawler_run = crawler_run document.mime_type = storage.mimetype document.file_name = storage.filename try: meta = json.loads(request.form.get('meta', '{}')) validate(meta, 'metadata.json#') document.meta.update(meta) except Exception as ex: raise BadRequest(unicode(ex)) ingest_document(document, sec_fn, user_queue=True) os.unlink(sec_fn) documents.append(document) return jsonify({'status': 'ok', 'documents': documents})
def archive_file(self, file_path, content_hash=None): if content_hash is None: content_hash = checksum(file_path) obj = self._locate_key(content_hash) if obj is None: path = os.path.join(self._get_prefix(content_hash), 'data') self.bucket.upload_file(file_path, path) return content_hash
def archive_file(self, file_path, content_hash=None): """Import the given file into the archive.""" if content_hash is None: content_hash = checksum(file_path) if self._locate_key(content_hash): return content_hash archive_path = os.path.join(self.path, self._get_prefix(content_hash)) try: os.makedirs(archive_path) except: pass file_name = make_filename(file_path, default='data') archive_path = os.path.join(archive_path, file_name) shutil.copy(file_path, archive_path) return content_hash
def handle_child(self, parent, file_path, title=None, mime_type=None, id=None, file_name=None): file_path = decode_path(file_path) file_name = decode_path(file_name) or os.path.basename(file_path) content_hash = None if not os.path.isdir(file_path): content_hash = checksum(file_path) document = Document.by_keys(parent_id=parent.document.id, collection=parent.document.collection, foreign_id=id, content_hash=content_hash) document.title = title or document.meta.get('title') document.file_name = file_name or document.meta.get('file_name') document.mime_type = mime_type or document.meta.get('mime_type') from aleph.ingest import ingest_document ingest_document(document, file_path, user_queue=parent.user_queue)
def _update_metadata(self, filename, meta): meta.content_hash = checksum(filename) return meta