def test_gen_metadata(monkeypatch):
    """Test that _generate_metadata() produces the correct metadata."""
    fpath = os.path.abspath("tests/data/test.txt")

    monkeypatch.setattr(
        db.Checksums, "get_checksums",
        lambda self: {fpath: "150b62e4e7d58c70503bd5fc8a26463c"})
    metadata = md._generate_metadata("tests/data/test.txt", "tests", "data",
                                     "pid:uuid:storage_id",
                                     db.Database().checksums.get_checksums())

    assert len(metadata["identifier"]) == 45
    assert metadata["file_name"] == "test.txt"
    assert metadata["file_format"] == "text/plain"
    assert metadata["byte_size"] == 31
    assert metadata["file_path"] == "/test.txt"
    assert metadata["project_identifier"] == "data"
    assert "file_uploaded" in metadata
    assert "file_modified" in metadata
    assert "file_frozen" in metadata

    checksum = metadata["checksum"]
    assert checksum["algorithm"] == "MD5"
    assert checksum["value"] == "150b62e4e7d58c70503bd5fc8a26463c"
    assert "checked" in checksum

    assert metadata["file_storage"] == "pid:uuid:storage_id"
Beispiel #2
0
def extract_task(fpath, dir_path, task_id):
    """Calculate the checksum of the archive and extracts the files into
    ``dir_path`` directory. Finally updates the status of the task into
    database.

    :param str fpath: file path of the archive
    :param str dir_path: directory to where the archive will be
                         extracted
    :param str task_id: mongo dentifier of the task
    """
    database = db.Database()

    database.tasks.update_message(task_id, "Extracting archive")
    md5 = gen_metadata.md5_digest(fpath)
    try:
        extract(fpath, dir_path)
    except (MemberNameError, MemberTypeError, MemberOverwriteError) as error:
        logging.error(str(error), exc_info=error)
        # Remove the archive and set task's state
        os.remove(fpath)
        database.tasks.update_status(task_id, "error")
        msg = {"message": str(error)}
        database.tasks.update_message(task_id, json.dumps(msg))
    else:
        # Add checksums of the extracted files to mongo
        database.checksums.insert(_get_archive_checksums(fpath, dir_path))

        # Remove archive and all created symlinks
        os.remove(fpath)
        _process_extracted_files(dir_path)

        msg = {"message": "Archive uploaded and extracted", "md5": md5}
        database.tasks.update_message(task_id, json.dumps(msg))
        database.tasks.update_status(task_id, "done")
Beispiel #3
0
def get_path(fpath):
    """Get filepath, name and checksum.

    :returns: HTTP Response
    """
    username = request.authorization.username
    database = db.Database()
    project = database.user(username).get_project()
    root_upload_path = current_app.config.get("UPLOAD_PATH")
    fpath, fname = utils.get_upload_path(project, fpath, root_upload_path)
    fpath = os.path.join(fpath, fname)

    if os.path.isfile(fpath):
        file_path = utils.get_return_path(project, fpath, root_upload_path)
        response = jsonify({
            "file_path":
            file_path,
            "metax_identifier":
            database.files.get_identifier(fpath),
            "md5":
            database.checksums.get_checksum(os.path.abspath(fpath)),
            "timestamp":
            md.iso8601_timestamp(fpath)
        })

    elif os.path.isdir(fpath):
        dir_tree = _get_dir_tree(project, fpath, root_upload_path)
        response = jsonify(dict(file_path=dir_tree))

    else:
        return utils.make_response(404, "File not found")

    response.status_code = 200
    return response
def enqueue_background_job(task_func, queue_name, username, job_kwargs):
    """Create a task ID and enqueue a RQ job.

    :param str task_func: Python function to run as a string to import
                          eg. "upload_rest_api.jobs.upload.extract_archive"
    :param str queue_name: Queue used to run the job
    :param str username: Username
    :param dict job_kwargs: Keyword arguments to pass to the background
                            task
    """
    queue = get_job_queue(queue_name)

    database = db.Database()
    project = database.user(username).get_project()
    task_id = database.tasks.create(project)
    database.tasks.update_message(task_id, "processing")

    job_kwargs["task_id"] = str(task_id)

    job_timeout = CONFIG.get("RQ_JOB_TIMEOUT", DEFAULT_JOB_TIMEOUT)

    queue.enqueue(
        task_func,
        job_id=str(task_id),
        timeout=job_timeout,  # rq 0.12.0 or older
        job_timeout=job_timeout,  # rq 0.13.0 and newer
        failure_ttl=CONFIG.get("RQ_FAILED_JOB_TTL", DEFAULT_FAILED_JOB_TTL),
        kwargs=job_kwargs)
    return str(task_id)
Beispiel #5
0
def delete_files():
    """Delete all files of a user.

    :returns: HTTP Response
    """
    username = request.authorization.username
    project = db.Database().user(username).get_project()
    root_upload_path = current_app.config.get("UPLOAD_PATH")
    fpath = safe_join(root_upload_path, secure_filename(project))

    if not os.path.exists(fpath):
        return utils.make_response(404, "No files found")

    task_id = enqueue_background_job(
        task_func="upload_rest_api.jobs.files.delete_files",
        queue_name=FILES_QUEUE,
        username=username,
        job_kwargs={
            "fpath": fpath,
            "username": username
        })

    polling_url = utils.get_polling_url(TASK_STATUS_API_V1.name, task_id)
    response = jsonify({
        "file_path": "/",
        "message": "Deleting files and metadata",
        "polling_url": polling_url,
        "status": "pending"
    })
    location = url_for(TASK_STATUS_API_V1.name + ".task_status",
                       task_id=task_id)
    response.headers[b'Location'] = location
    response.status_code = 202

    return response
Beispiel #6
0
def upload_file(fpath):
    """Save the uploaded file at <UPLOAD_PATH>/project/fpath.

    :returns: HTTP Response
    """
    username = request.authorization.username
    database = db.Database()
    project = database.user(username).get_project()

    response = up.validate_upload(database)
    if response:
        return response

    file_path, file_name = utils.get_upload_path(project, fpath)

    # Create directory if it does not exist
    if not os.path.exists(file_path):
        os.makedirs(file_path)

    file_path = os.path.join(file_path, file_name)
    try:
        response = up.save_file(database, project, file_path)
    except (up.OverwriteError) as error:
        return utils.make_response(409, str(error))

    database.user(request.authorization.username).update_used_quota(
        current_app.config.get("UPLOAD_PATH"))

    return response
def test_store_identifiers(monkeypatch):
    """Test that store_identifiers writes the POSTed identifiers and
    corresponding file_paths to Mongo.
    """
    monkeypatch.setattr(db, "_get_abs_path",
                        lambda path, _root_path, _project: path)

    monkeypatch.setattr(db.User, "get_project", lambda self: "project_path")

    metax_response = [{
        "object": {
            "identifier": "pid:urn:1",
            "file_path": "1"
        }
    }, {
        "object": {
            "identifier": "pid:urn:2",
            "file_path": "2"
        }
    }, {
        "object": {
            "identifier": "pid:urn:3",
            "file_path": "3"
        }
    }]

    database = db.Database()
    database.store_identifiers(metax_response, "/tmp", "user")
    assert database.files.get_all_ids() \
        == ["pid:urn:1", "pid:urn:2", "pid:urn:3"]
Beispiel #8
0
def upload_archive():
    """Upload and extract the archive at <UPLOAD_PATH>/project.

    :returns: HTTP Response
    """
    database = db.Database()
    response = up.validate_upload(database)
    if response:
        return response

    upload_dir = request.args.get("dir", default=None)
    file_path, file_name = utils.get_tmp_upload_path()

    # Create directory if it does not exist
    if not os.path.exists(file_path):
        os.makedirs(file_path)

    file_path = safe_join(file_path, file_name)
    try:
        response = up.save_archive(database, file_path, upload_dir)
    except (MemberOverwriteError, up.OverwriteError) as error:
        return utils.make_response(409, str(error))
    except MemberTypeError as error:
        return utils.make_response(415, str(error))
    except MemberNameError as error:
        return utils.make_response(400, str(error))
    except up.QuotaError as error:
        return utils.make_response(413, str(error))

    return response
Beispiel #9
0
def clean_mongo():
    """Clean old tasks from mongo.

    Clean file identifiers that do not exist in Metax any more from
    Mongo.

    :returns: Count of cleaned Mongo documents
    """
    conf = parse_conf("/etc/upload_rest_api.conf")
    url = conf["METAX_URL"]
    user = conf["METAX_USER"]
    password = conf["METAX_PASSWORD"]
    ssl_verification = conf["METAX_SSL_VERIFICATION"]
    time_lim = conf["CLEANUP_TIMELIM"]

    _clean_old_tasks(time_lim)

    projects = _get_projects()

    metax_ids = md.MetaxClient(url,
                               user,
                               password,
                               ssl_verification).get_all_ids(projects)

    files = db.Database().files
    mongo_ids = files.get_all_ids()
    id_list = []

    # Check for identifiers found in Mongo but not in Metax
    for identifier in mongo_ids:
        if identifier not in metax_ids:
            id_list.append(identifier)

    # Remove identifiers from mongo
    return files.delete(id_list)
def test_delete_user(mock_mongo):
    """Test deletion of an existing user."""
    db.Database().user("test").create("test_project")
    with mock.patch.object(sys, 'argv', ['upload-rest-api', 'delete', 'test']):
        upload_rest_api.__main__.main()

    assert mock_mongo.upload.users.count({"_id": "test"}) == 0
Beispiel #11
0
def files_col(mock_mongo):
    """Initialize and return Files instance with db connection through
    mongomock.
    """
    files_coll = db.Database().files
    files_coll.files = mock_mongo.upload.files

    return files_coll
Beispiel #12
0
def user(mock_mongo):
    """Initialize and return User instance with db connection through
    mongomock.
    """
    test_user = db.Database().user("test_user")
    test_user.users = mock_mongo.upload.users

    return test_user
def test_create_existing_user():
    """Test that creating a user that already exists raises
    UserExistsError.
    """
    db.Database().user("test").create("test_project")
    with mock.patch.object(sys, 'argv',
                           ['upload-rest-api', 'create', 'test', 'test']):
        with pytest.raises(db.UserExistsError):
            upload_rest_api.__main__.main()
Beispiel #14
0
def tasks_col(mock_mongo):
    """Initialize and return Tasks instance with db connection through
    mongomock.
    """
    tasks_col = db.Database().tasks
    tasks_col.tasks = mock_mongo.upload.tasks
    tasks_col.task_messages = mock_mongo.upload.task_messages

    return tasks_col
def post_metadata(fpath, username, storage_id, task_id):
    """Create file metadata in Metax.

    This function creates the metadata in Metax for the file(s) denoted
    by fpath argument. Finally updates the status of the task into
    database.

    :param str fpath: file path
    :param str username: current user
    :param str storage_id: pas storage identifier in Metax
    :param str task_id: mongo dentifier of the task
    """
    root_upload_path = CONFIG["UPLOAD_PATH"]

    status = "error"
    response = None

    metax_client = md.MetaxClient()
    database = db.Database()

    project = database.user(username).get_project()

    fpath, fname = utils.get_upload_path(project, fpath, root_upload_path)
    fpath = os.path.join(fpath, fname)
    ret_path = utils.get_return_path(project, fpath, root_upload_path)

    database.tasks.update_message(task_id, "Creating metadata: %s" % ret_path)

    if os.path.isdir(fpath):
        # POST metadata of all files under dir fpath
        fpaths = []
        for dirpath, _, files in os.walk(fpath):
            for fname in files:
                fpaths.append(os.path.join(dirpath, fname))

    elif os.path.isfile(fpath):
        fpaths = [fpath]

    else:
        response = {"code": 404, "error": "File not found"}
    if not response:
        status_code = 200
        try:
            response = metax_client.post_metadata(fpaths, root_upload_path,
                                                  username, storage_id)
            status = "done"
        except HTTPError as error:
            logging.error(str(error), exc_info=error)
            response = error.response.json()
            status_code = error.response.status_code

        # Create upload-rest-api response
        response = {"code": status_code, "metax_response": response}

    database.tasks.update_message(task_id, json.dumps(response))
    database.tasks.update_status(task_id, status)
    def wrapper(*args, **kwargs):
        task_id = kwargs["task_id"]

        try:
            return func(*args, **kwargs)
        except Exception:
            tasks = db.Database().tasks
            tasks.update_status(task_id, "error")
            tasks.update_message(task_id, "Internal server error")
            raise
Beispiel #17
0
def _clean_old_tasks(time_lim):
    """Remove tasks that are older than time_lim.

    :param time_lim: : expiration time in seconds
    """
    current_time = time.time()
    tasks = db.Database().tasks
    for task in tasks.get_all_tasks():
        if current_time - task["timestamp"] > time_lim:
            tasks.delete_one(task["_id"])
def test_get(capsys):
    """Test get command."""
    database = db.Database()
    database.user("test1").create("test_project")
    database.user("test2").create("test_project")

    with mock.patch.object(sys, 'argv', ['upload-rest-api', 'get', '--users']):
        upload_rest_api.__main__.main()

    out, _ = capsys.readouterr()
    assert out == "test1\ntest2\n"
Beispiel #19
0
def clean_project(project, fpath, metax=True):
    """Remove all files of a given project that haven't been accessed
    within time_lim seconds. If the removed file has a Metax file entry
    and metax_client is provided, remove the Metax file entry as well.

    :param project: Project identifier used to search files from Metax
    :param fpath: Path to the dir to cleanup
    :param time_lim: Time limit in seconds
    :param metax: Boolean. if True metadata is removed also from Metax

    :returns: Number of deleted files
    """
    conf = parse_conf("/etc/upload_rest_api.conf")
    time_lim = conf["CLEANUP_TIMELIM"]
    upload_path = conf["UPLOAD_PATH"]

    current_time = time.time()
    metax_client = None
    file_dict = None
    fpaths = []
    deleted_files = []

    if metax:
        metax_client = md.MetaxClient(
            url=conf["METAX_URL"],
            user=conf["METAX_USER"],
            password=conf["METAX_PASSWORD"],
            verify=conf["METAX_SSL_VERIFICATION"]
        )
        file_dict = metax_client.get_files_dict(project)

    # Remove all old files
    for dirpath, _, files in os.walk(fpath):
        for fname in files:
            _file = os.path.join(dirpath, fname)
            if _is_expired(_file, current_time, time_lim):
                _clean_file(
                    _file, upload_path, fpaths,
                    file_dict, metax_client
                )
                deleted_files.append(_file)

    # Remove all empty dirs
    _clean_empty_dirs(fpath)

    # Clean checksums of the deleted files from mongo
    db.Database().checksums.delete(deleted_files)

    # Remove Metax entries of deleted files that are not part of any
    # datasets
    if metax:
        metax_client.delete_metadata(project, fpaths)

    return len(deleted_files)
def test_modify():
    """Test modifying user quota and project."""
    user = db.Database().user("test")
    user.create("test_project")
    with mock.patch.object(sys, 'argv', [
            'upload-rest-api', 'modify', 'test', "--quota", "1", "--project",
            "X"
    ]):
        upload_rest_api.__main__.main()

    assert user.get_quota() == 1
    assert user.get_project() == "X"
Beispiel #21
0
def test_auth_user(user, password, result):
    """Test _auth_user() function with different username-password
    combinations.

    :param user: username of user
    :param password: password of user
    :param bool result: Excepted result of authentication
    """
    # Create one test user to database
    usersdoc = db.Database().user('test_user')
    usersdoc.create('test_project', 'test_password')

    # pylint: disable=protected-access
    assert auth._auth_user(user, password) is result
def _auth_user(username, password):
    """Authenticate user."""
    user = db.Database().user(username)

    try:
        user = user.get()
    except db.UserNotFoundError:
        # Calculate digest even if user does not exist to avoid
        # leaking information about which users exist
        return compare_digest(b"hash" * 16, db.hash_passwd("passwd", "salt"))

    salt = user["salt"]
    digest = user["digest"]

    return compare_digest(digest, db.hash_passwd(password, salt))
Beispiel #23
0
def init_db(mock_mongo):
    """Initialize user db."""
    mock_mongo.drop_database("upload")

    # test user
    user = db.Database().user("test")
    user.users = mock_mongo.upload.users
    user.create("test_project", password="******")

    # test2 user with same project
    user.username = "******"
    user.create("test_project", password="******")

    # test3 user with different project
    user.username = "******"
    user.create("project", password="******")
Beispiel #24
0
def get_files():
    """Get all files of the user.

    :return: HTTP Response
    """
    username = request.authorization.username
    project = db.Database().user(username).get_project()
    root_upload_path = current_app.config.get("UPLOAD_PATH")
    fpath = safe_join(root_upload_path, secure_filename(project))

    if not os.path.exists(fpath):
        return utils.make_response(404, "No files found")

    response = jsonify(_get_dir_tree(project, fpath, root_upload_path))
    response.status_code = 200
    return response
def post_metadata(fpath):
    """POST file metadata to Metax.

    A background task is launched to run the job. The ``Location``
    header and the body of the response contain the URL to be used for
    polling the status of the task. Status code is set to HTTP
    202(Accepted).

    :returns: HTTP Response
    """
    username = request.authorization.username
    project = db.Database().user(username).get_project()
    root_upload_path = current_app.config.get("UPLOAD_PATH")
    file_path, fname = utils.get_upload_path(project, fpath, root_upload_path)
    file_path = os.path.join(file_path, fname)

    storage_id = current_app.config.get("STORAGE_ID")
    task_id = enqueue_background_job(
        task_func="upload_rest_api.jobs.metadata.post_metadata",
        queue_name=METADATA_QUEUE,
        username=username,
        job_kwargs={
            "fpath": fpath,
            "username": username,
            "storage_id": storage_id
        })

    polling_url = utils.get_polling_url(TASK_STATUS_API_V1.name, task_id)
    ret_path = utils.get_return_path(project, file_path, root_upload_path)
    response = jsonify({
        "file_path": ret_path,
        "message": "Creating metadata",
        "polling_url": polling_url,
        "status": "pending"
    })
    location = url_for(TASK_STATUS_API_V1.name + ".task_status",
                       task_id=task_id)
    response.headers[b'Location'] = location
    response.status_code = 202

    return response
def delete_files(fpath, username, task_id):
    """Delete files and metadata denoted by fpath directory under user's
    project. The whole directory is recursively removed.

    :param str fpath: path to directory
    :param str username: current user
    :param str task_id: mongo dentifier of the task
    """
    root_upload_path = CONFIG["UPLOAD_PATH"]

    # Remove metadata from Metax
    metax_client = md.MetaxClient()
    database = db.Database()
    project = database.user(username).get_project()
    ret_path = utils.get_return_path(project, fpath, root_upload_path)
    database.tasks.update_message(task_id,
                                  "Deleting files and metadata: %s" % ret_path)
    try:
        metax_response = metax_client.delete_all_metadata(
            project, fpath, root_upload_path)
    except (MetaxError, HTTPError) as error:
        database.tasks.update_status(task_id, "error")
        msg = {"message": str(error)}
        database.tasks.update_message(task_id, json.dumps(msg))
        raise
    else:
        # Remove checksum from mongo
        database.checksums.delete_dir(fpath)

        # Remove project directory and update used_quota
        rmtree(fpath)
        database.user(username).update_used_quota(root_upload_path)
        response = {
            "file_path": ret_path,
            "status": "done",
            "metax": metax_response
        }
        database.tasks.update_message(task_id, json.dumps(response))
        database.tasks.update_status(task_id, "done")
def delete_metadata(fpath, username, task_id):
    """Delete file metadata.

    This function deletes the metadata in Metax for the file(s) denoted
    by fpath argument. Finally updates the status of the task into
    database.

    :param str fpath: file path
    :param str username: current user
    :param str task_id: mongo dentifier of the task
    """
    root_upload_path = CONFIG["UPLOAD_PATH"]

    status = "error"
    response = None

    metax_client = md.MetaxClient()
    database = db.Database()

    project = database.user(username).get_project()
    fpath, fname = utils.get_upload_path(project, fpath, root_upload_path)
    fpath = os.path.join(fpath, fname)
    ret_path = utils.get_return_path(project, fpath, root_upload_path)
    database.tasks.update_message(task_id, "Deleting metadata: %s" % ret_path)

    if os.path.isfile(fpath):
        # Remove metadata from Metax
        delete_func = metax_client.delete_file_metadata
    elif os.path.isdir(fpath):
        # Remove all file metadata of files under dir fpath from Metax
        delete_func = metax_client.delete_all_metadata
    else:
        response = {"code": 404, "error": "File not found"}

    if not response:
        try:
            response = delete_func(project,
                                   fpath,
                                   root_upload_path,
                                   force=True)
        except HTTPError as error:
            logging.error(str(error), exc_info=error)
            response = {
                "file_path":
                utils.get_return_path(project, fpath, root_upload_path),
                "metax":
                error.response.json()
            }
        except md.MetaxClientError as error:
            logging.error(str(error), exc_info=error)
            response = {"code": 400, "error": str(error)}
        else:
            status = "done"
            response = {
                "file_path":
                utils.get_return_path(project, fpath, root_upload_path),
                "metax":
                response
            }
    database.tasks.update_message(task_id, json.dumps(response))
    database.tasks.update_status(task_id, status)
Beispiel #28
0
def delete_path(fpath):
    """Delete fpath under user's project.

    If fpath resolves to a directory, the whole directory is recursively
    removed.

    :returns: HTTP Response
    """
    root_upload_path = current_app.config.get("UPLOAD_PATH")
    username = request.authorization.username
    database = db.Database()
    project = database.user(username).get_project()
    fpath, fname = utils.get_upload_path(project, fpath)
    fpath = os.path.join(fpath, fname)

    if os.path.isfile(fpath):
        # Remove metadata from Metax
        try:
            response = md.MetaxClient().delete_file_metadata(
                project, fpath, root_upload_path)
        except md.MetaxClientError as exception:
            response = str(exception)

        # Remove checksum from mongo
        database.checksums.delete_one(os.path.abspath(fpath))
        os.remove(fpath)

    elif os.path.isdir(fpath):
        # Remove all file metadata of files under dir fpath from Metax
        task_id = enqueue_background_job(
            task_func="upload_rest_api.jobs.files.delete_files",
            queue_name=FILES_QUEUE,
            username=username,
            job_kwargs={
                "fpath": fpath,
                "username": username
            })

        polling_url = utils.get_polling_url(TASK_STATUS_API_V1.name, task_id)
        response = jsonify({
            "file_path":
            fpath[len(os.path.join(root_upload_path, project)):],
            "message":
            "Deleting files and metadata",
            "polling_url":
            polling_url,
            "status":
            "pending"
        })
        location = url_for(TASK_STATUS_API_V1.name + ".task_status",
                           task_id=task_id)
        response.headers[b'Location'] = location
        response.status_code = 202
        return response

    else:
        return utils.make_response(404, "File not found")

    database.user(username).update_used_quota(root_upload_path)

    response = jsonify({
        "file_path":
        utils.get_return_path(project, fpath, root_upload_path),
        "message":
        "deleted",
        "metax":
        response
    })
    response.status_code = 200

    return response
Beispiel #29
0
    def post_metadata(self, fpaths, root_upload_path, username, storage_id):
        """Generate file metadata and POST it to Metax in 5k chunks.

        :param fpaths: List of files for which to generate the metadata
        :param root_upload_path: root upload directory
        :param username: current user
        :param storage_id: pas storage identifier in Metax
        :returns: Stripped HTTP response returned by Metax.
                  Success list contains succesfully generated file
                  metadata in format:
                  [
                      {
                          "object": {
                              "identifier": identifier,
                              "file_path": file_path,
                              "checksum": {"value": checksum},
                              "parent_directory": {
                                  "identifier": identifier
                              }
                          }
                      },
                      .
                      .
                      .
                  ]
        """
        database = db.Database()
        project = database.user(username).get_project()
        checksums = database.checksums.get_checksums()
        metadata = []
        responses = []

        i = 0
        for fpath in fpaths:
            metadata.append(_generate_metadata(
                fpath, root_upload_path,
                project, storage_id, checksums
            ))

            # POST metadata to Metax every 5k steps
            i += 1
            if i % 5000 == 0:
                response = self.client.post_file(metadata)
                responses.append(_strip_metax_response(response))
                # Add created identifiers to Mongo
                if "success" in response and response["success"]:
                    database.store_identifiers(
                        response["success"], root_upload_path, username
                    )

                metadata = []

        # POST remaining metadata
        if metadata:
            response = self.client.post_file(metadata)
            responses.append(_strip_metax_response(response))
            # Add created identifiers to Mongo
            if "success" in response and response["success"]:
                database.store_identifiers(
                    response["success"], root_upload_path, username
                )

        # Merge all responses into one response
        response = {"success": [], "failed": []}
        for metax_response in responses:
            if "success" in metax_response:
                response["success"].extend(metax_response["success"])
            if "failed" in metax_response:
                response["failed"].extend(metax_response["failed"])

        return response
def test_mongo_cleanup(
        app, test_auth, monkeypatch, background_job_runner
):
    """Test that cleaning files from mongo deletes all files that
    haven't been posted to Metax.
    """
    test_client = app.test_client()

    # Mock Files mongo connection
    def _mock_init(self, client):
        host = app.config.get("MONGO_HOST")
        port = app.config.get("MONGO_PORT")
        self.files = pymongo.MongoClient(host, port).upload.files

    monkeypatch.setattr(db.Files, "__init__", _mock_init)

    # Mock configuration parsing
    def _mock_conf(fpath):
        if not os.path.isfile(fpath):
            fpath = "include/etc/upload_rest_api.conf"

        conf = run_path(fpath)
        conf["METAX_PASSWORD"] = PASSWORD
        conf["UPLOAD_PATH"] = app.config.get("UPLOAD_PATH")
        conf["CLEANUP_TIMELIM"] = -1

        return conf

    monkeypatch.setattr(clean, "parse_conf", _mock_conf)

    files_col = db.Database().files

    # ----- Inserting fake identifiers to Mongo and cleaning them
    files_col.insert([
        {"_id": "pid:urn:1", "file_path": "1"},
        {"_id": "pid:urn:2", "file_path": "2"}
    ])
    assert len(files_col.get_all_ids()) == 2

    clean.clean_mongo()
    assert not files_col.get_all_ids()

    # Upload integration.zip, which is extracted by the server
    poll_response = _upload_file(
        test_client, "/v1/archives",
        test_auth, "tests/data/integration.zip"
    )
    response = background_job_runner(test_client, "upload", poll_response)
    assert response.status_code == 200

    # Generate and POST metadata for all the files in test_project
    poll_response = test_client.post("/v1/metadata/*", headers=test_auth)
    response = background_job_runner(test_client, "metadata", poll_response)

    assert response.status_code == 200

    # Check that generated identifiers were added to Mongo
    assert len(files_col.get_all_ids()) == 2

    # Check that generated file_paths resolve to actual files
    for file_doc in files_col.files.find():
        file_path = file_doc["file_path"]
        assert os.path.isfile(file_path)

    # Try to clean file documents that still exist in Metax
    clean.clean_mongo()

    assert len(files_col.get_all_ids()) == 2