Beispiel #1
0
def ingest_upload(collection_id):
    collection = obj_or_404(Collection.by_id(collection_id))
    request.authz.require(request.authz.collection_write(collection.id))
    log_event(request)
    crawler_run = make_textid()

    try:
        meta = json.loads(request.form.get('meta', '{}'))
    except Exception as ex:
        raise BadRequest(unicode(ex))

    documents = []
    for storage in request.files.values():
        sec_fn = os.path.join(upload_folder, secure_filename(storage.filename))
        storage.save(sec_fn)
        content_hash = checksum(sec_fn)
        document = Document.by_keys(collection=collection,
                                    content_hash=content_hash)
        document.crawler = 'user_upload:%s' % request.authz.role.id
        document.crawler_run = crawler_run
        document.mime_type = storage.mimetype
        document.file_name = storage.filename

        try:
            meta = json.loads(request.form.get('meta', '{}'))
            validate(meta, 'metadata.json#')
            document.meta.update(meta)
        except Exception as ex:
            raise BadRequest(unicode(ex))

        ingest_document(document, sec_fn, user_queue=True)
        os.unlink(sec_fn)
        documents.append(document)
    return jsonify({'status': 'ok', 'documents': documents})
Beispiel #2
0
Datei: s3.py Projekt: wcyn/aleph
    def archive_file(self, file_path, content_hash=None):
        if content_hash is None:
            content_hash = checksum(file_path)

        obj = self._locate_key(content_hash)
        if obj is None:
            path = os.path.join(self._get_prefix(content_hash), 'data')
            self.bucket.upload_file(file_path, path)

        return content_hash
Beispiel #3
0
    def archive_file(self, file_path, content_hash=None):
        """Import the given file into the archive."""
        if content_hash is None:
            content_hash = checksum(file_path)

        if self._locate_key(content_hash):
            return content_hash

        archive_path = os.path.join(self.path, self._get_prefix(content_hash))
        try:
            os.makedirs(archive_path)
        except:
            pass
        file_name = make_filename(file_path, default='data')
        archive_path = os.path.join(archive_path, file_name)
        shutil.copy(file_path, archive_path)
        return content_hash
Beispiel #4
0
    def handle_child(self, parent, file_path, title=None, mime_type=None,
                     id=None, file_name=None):
        file_path = decode_path(file_path)
        file_name = decode_path(file_name) or os.path.basename(file_path)

        content_hash = None
        if not os.path.isdir(file_path):
            content_hash = checksum(file_path)

        document = Document.by_keys(parent_id=parent.document.id,
                                    collection=parent.document.collection,
                                    foreign_id=id, content_hash=content_hash)
        document.title = title or document.meta.get('title')
        document.file_name = file_name or document.meta.get('file_name')
        document.mime_type = mime_type or document.meta.get('mime_type')

        from aleph.ingest import ingest_document
        ingest_document(document, file_path, user_queue=parent.user_queue)
Beispiel #5
0
 def _update_metadata(self, filename, meta):
     meta.content_hash = checksum(filename)
     return meta
Beispiel #6
0
 def _update_metadata(self, filename, meta):
     meta.content_hash = checksum(filename)
     return meta