Beispiel #1
0
 def test_basic_archive(self):
     checksum_ = checksum(self.file)
     assert checksum_ is not None, checksum_
     out = self.archive.archive_file(self.file)
     assert checksum_ == out, (checksum_, out)
     out2 = self.archive.archive_file(self.file)
     assert out == out2, (out, out2)
Beispiel #2
0
def complete_export(export_id, file_path):
    export = Export.by_id(export_id)
    file_path = ensure_path(file_path)
    export.file_name = safe_filename(file_path)
    export.file_size = file_path.stat().st_size
    export.content_hash = checksum(file_path)
    try:
        archive.archive_file(file_path,
                             content_hash=export.content_hash,
                             mime_type=export.mime_type)
        export.set_status(status=Status.SUCCESS)
    except Exception:
        log.exception("Failed to upload export: %s", export)
        export.set_status(status=Status.FAILED)

    db.session.commit()
    params = {"export": export}
    role = Role.by_id(export.creator_id)
    log.info("Export [%r] complete: %s", export, export.status)
    publish(
        Events.COMPLETE_EXPORT,
        params=params,
        channels=[role],
    )
    send_export_notification(export)
Beispiel #3
0
def ingest_upload(collection_id):
    require(request.authz.can(collection_id, request.authz.WRITE))
    sync = get_flag('sync')
    meta, foreign_id = _load_metadata()
    parent_id = _load_parent(collection_id, meta)
    upload_dir = mkdtemp(prefix='aleph.upload.')
    try:
        path = None
        content_hash = None
        for storage in request.files.values():
            path = safe_filename(storage.filename, default='upload')
            path = os.path.join(upload_dir, path)
            storage.save(path)
            content_hash = checksum(path)
        document = Document.by_keys(collection_id=collection_id,
                                    parent_id=parent_id,
                                    foreign_id=foreign_id,
                                    content_hash=content_hash)
        document.update(meta)
        document.schema = Document.SCHEMA
        if content_hash is None:
            document.schema = Document.SCHEMA_FOLDER
        ingest_document(document,
                        path,
                        role_id=request.authz.id,
                        content_hash=content_hash)
    finally:
        shutil.rmtree(upload_dir)

    # Make sure collection counts are always accurate.
    update_document(document, shallow=True, sync=sync)
    return jsonify({'status': 'ok', 'id': stringify(document.id)}, status=201)
Beispiel #4
0
def ingest_upload(collection_id):
    require(request.authz.can(collection_id, request.authz.WRITE))
    sync = get_flag('sync')
    meta, foreign_id = _load_metadata()
    parent_id = _load_parent(collection_id, meta)
    upload_dir = mkdtemp(prefix='aleph.upload.')
    try:
        path = None
        content_hash = None
        for storage in request.files.values():
            path = safe_filename(storage.filename, default='upload')
            path = os.path.join(upload_dir, path)
            storage.save(path)
            content_hash = checksum(path)
        document = Document.by_keys(collection_id=collection_id,
                                    parent_id=parent_id,
                                    foreign_id=foreign_id,
                                    content_hash=content_hash)
        document.update(meta)
        document.schema = Document.SCHEMA
        if content_hash is None:
            document.schema = Document.SCHEMA_FOLDER
        ingest_document(document, path,
                        role_id=request.authz.id,
                        content_hash=content_hash)
    finally:
        shutil.rmtree(upload_dir)

    if document.collection.casefile:
        # Make sure collection counts are always accurate.
        update_document(document, sync=sync)
    return jsonify({
        'status': 'ok',
        'id': stringify(document.id)
    }, status=201)
Beispiel #5
0
 def set_filepath(self, file_path):
     file_path = ensure_path(file_path)
     file_name = safe_filename(file_path)
     file_size = file_path.stat().st_size
     self.file_name = file_name
     self.file_size = file_size
     self._file_path = file_path
     self.content_hash = checksum(file_path)
Beispiel #6
0
    def archive_file(self, file_path, content_hash=None, mime_type=None):
        """Import the given file into the archive."""
        if content_hash is None:
            content_hash = checksum(file_path)

        if content_hash is None:
            return

        if self._locate_key(content_hash):
            return content_hash

        archive_prefix = self._get_prefix(content_hash)
        archive_path = self.path.joinpath(archive_prefix)
        archive_path.mkdir(parents=True, exist_ok=True)
        file_name = safe_filename(file_path, default='data')
        archive_path = archive_path.joinpath(file_name)
        with open(file_path, 'rb') as fin:
            with open(archive_path, 'wb') as fout:
                shutil.copyfileobj(fin, fout, BUF_SIZE)
        return content_hash
Beispiel #7
0
    def archive_file(self, file_path, content_hash=None, mime_type=None):
        """Store the file located at the given path on S3, based on a path
        made up from its SHA1 content hash."""
        file_path = ensure_path(file_path)
        if content_hash is None:
            content_hash = checksum(file_path)

        # if content_hash is None:
        #     return

        obj = self._locate_key(content_hash)
        if obj is not None:
            return content_hash

        path = '{}/data'.format(self._get_prefix(content_hash))
        extra = {}
        if mime_type is not None:
            extra['ContentType'] = mime_type
        with open(file_path, 'rb') as fh:
            self.client.upload_fileobj(fh, self.bucket, str(path),
                                       ExtraArgs=extra)
        return content_hash
Beispiel #8
0
    def archive_file(self, file_path, content_hash=None, mime_type=None):
        """Store the file located at the given path on Google, based on a path
        made up from its SHA1 content hash."""
        file_path = ensure_path(file_path)
        if content_hash is None:
            content_hash = checksum(file_path)

        if content_hash is None:
            return

        file_path = ensure_posix_path(file_path)
        for attempt in service_retries():
            try:
                # blob = self._locate_contenthash(content_hash)
                # if blob is not None:
                #     return content_hash

                path = os.path.join(path_prefix(content_hash), "data")
                blob = Blob(path, self.bucket)
                blob.upload_from_filename(file_path, content_type=mime_type)
                return content_hash
            except FAILURES:
                log.exception("Store error in GS")
                backoff(failures=attempt)