def test_fetch_flowcell_pdc_retrieval_failed(
    mock_store,
    mock_pdc,
    mock_flowcell,
    mock_check_processing,
    mock_maximum_flowcells_ondisk,
    caplog,
):
    """tests the fetch_flowcell method of the backup API"""

    caplog.set_level(logging.INFO)

    # GIVEN we are going to retrieve a flowcell from PDC
    backup_api = BackupApi(
        mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir"
    )
    backup_api.check_processing.return_value = True
    backup_api.maximum_flowcells_ondisk.return_value = False

    # WHEN the retrieval process fails
    mock_pdc.retrieve_flowcell.side_effect = subprocess.CalledProcessError(1, "echo")
    with pytest.raises(subprocess.CalledProcessError):
        backup_api.fetch_flowcell(mock_flowcell, dry_run=False)

    # THEN the failure to retrieve is logged
    assert "retrieval failed" in caplog.text
def test_fetch_flowcell_no_flowcells_requested(
    mock_store,
    mock_pdc,
    mock_check_processing,
    mock_maximum_flowcells_ondisk,
    mock_pop_flowcell,
    caplog,
):
    """tests the fetch_flowcell method of the backup API"""

    caplog.set_level(logging.INFO)

    # GIVEN we check if a flowcell needs to be retrieved from PDC
    backup_api = BackupApi(
        mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir"
    )

    # WHEN no flowcells are requested
    mock_pop_flowcell.return_value = None
    backup_api.check_processing.return_value = True
    backup_api.maximum_flowcells_ondisk.return_value = False

    # AND no flowcell has been specified
    mock_flowcell = None

    result = backup_api.fetch_flowcell(mock_flowcell, dry_run=False)

    # THEN no flowcell will be fetched and a log message indicates that no flowcells have been
    # requested
    assert result is None
    assert "no flowcells requested" in caplog.text
def test_fetch_flowcell_retrieve_specified_flowcell(
    mock_store,
    mock_pdc,
    mock_flowcell,
    mock_check_processing,
    mock_maximum_flowcells_ondisk,
    caplog,
):
    """tests the fetch_flowcell method of the backup API"""

    caplog.set_level(logging.INFO)

    # GIVEN we want to retrieve a specific flowcell from PDC
    backup_api = BackupApi(
        mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir"
    )
    backup_api.check_processing.return_value = True
    backup_api.maximum_flowcells_ondisk.return_value = False

    result = backup_api.fetch_flowcell(mock_flowcell, dry_run=False)

    # THEN the process to retrieve the flowcell from PDC is started
    assert "retrieving from PDC" in caplog.text

    # AND when done the status of that flowcell is set to "retrieved"
    assert f'Status for flowcell {mock_flowcell.name} set to "retrieved"' in caplog.text
    assert mock_flowcell.status == "retrieved"

    # AND status-db is updated with the new status
    assert mock_store.commit.called

    # AND the elapsed time of the retrieval process is returned
    assert result > 0
def test_fetch_flowcell_max_flowcells_ondisk(
    mock_store,
    mock_pdc,
    mock_flowcell,
    mock_check_processing,
    mock_maximum_flowcells_ondisk,
    caplog,
):
    """tests the fetch_flowcell method of the backup API"""

    caplog.set_level(logging.INFO)

    # GIVEN we check if a flowcell needs to be retrieved from PDC
    backup_api = BackupApi(
        mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir"
    )

    # WHEN the processing queue is not full but the number of flowcells on disk is greater than the
    # maximum
    backup_api.check_processing.return_value = True
    backup_api.maximum_flowcells_ondisk.return_value = True

    result = backup_api.fetch_flowcell(mock_flowcell, dry_run=False)

    # THEN no flowcell will be fetched and a log message indicates that maximum number of flowcells
    # has been reached
    assert result is None
    assert "maximum flowcells ondisk reached" in caplog.text
Beispiel #5
0
def fetch_flowcell(context: click.Context, dry_run: bool, flowcell: str):
    """Fetch the first flowcell in the requested queue from backup."""
    status_api = Store(context.obj["database"])
    max_flowcells_on_disk = context.obj.get("max_flowcells",
                                            MAX_FLOWCELLS_ON_DISK)
    pdc_api = PdcApi()
    backup_api = BackupApi(status=status_api,
                           pdc_api=pdc_api,
                           max_flowcells_on_disk=max_flowcells_on_disk)
    if flowcell:
        flowcell_obj = status_api.flowcell(flowcell)
        if flowcell_obj is None:
            LOG.error(f"{flowcell}: not found in database")
            context.abort()
    else:
        flowcell_obj = None

    retrieval_time = backup_api.fetch_flowcell(flowcell_obj=flowcell_obj,
                                               dry_run=dry_run)

    if retrieval_time:
        hours = retrieval_time / 60 / 60
        LOG.info(f"Retrieval time: {hours:.1}h")
        return

    if not flowcell:
        return

    if not dry_run:
        LOG.info(f"{flowcell}: updating flowcell status to requested")
        flowcell_obj.status = "requested"
        status_api.commit()
def test_maximum_processing_queue_not_full(mock_store, mock_pdc):
    """tests check_processing method of the backup api"""
    # GIVEN a flowcell needs to be retrieved from PDC
    backup_api = BackupApi(
        mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir"
    )

    # WHEN there are no flowcells being retrieved from PDC
    mock_store.flowcells(status="processing").count.return_value = 0

    # THEN this method should return True
    assert backup_api.check_processing(max_processing_flowcells=1) is True
def test_maximum_flowcells_ondisk_not_reached(mock_store, mock_pdc):
    """tests maximum_flowcells_ondisk method of the backup api"""
    # GIVEN a flowcell needs to be retrieved from PDC
    backup_api = BackupApi(
        mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir"
    )

    # WHEN the number of flowcells with status "ondisk" less than the maximum number allowed
    mock_store.flowcells(status="ondisk").count.return_value = 1000

    # THEN this method should return False
    assert backup_api.maximum_flowcells_ondisk() is False
def test_pop_flowcell_no_flowcell_requested(mock_store, mock_pdc):
    """tests pop_flowcell method of the backup api"""
    # GIVEN status-db needs to be checked for flowcells to be retrieved from PDC
    backup_api = BackupApi(
        mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir"
    )

    # WHEN there are no flowcells requested to be retrieved from PDC
    mock_store.flowcells(status="requested").first.return_value = None

    popped_flowcell = backup_api.pop_flowcell(dry_run=False)

    # THEN no flowcell is returned
    assert popped_flowcell is None
def test_pop_flowcell_dry_run(mock_store, mock_pdc, mock_flowcell):
    """tests pop_flowcell method of the backup api"""
    # GIVEN status-db needs to be checked for flowcells to be retrieved from PDC
    backup_api = BackupApi(
        mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir"
    )

    # WHEN a flowcell is requested to be retrieved from PDC
    # AND it's a  dry run
    popped_flowcell = backup_api.pop_flowcell(dry_run=True)

    # THEN a flowcell is returned, the status is set to "processing", but status-db is NOT updated with
    # the new status
    assert popped_flowcell is not None
    assert not mock_store.commit.called
def test_pop_flowcell_next_requested(mock_store, mock_pdc, mock_flowcell):
    """tests pop_flowcell method of the backup api"""
    # GIVEN status-db needs to be checked for flowcells to be retrieved from PDC
    backup_api = BackupApi(
        mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir"
    )

    # WHEN a flowcell is requested to be retrieved from PDC
    mock_store.flowcells(status="requested").first.return_value = mock_flowcell

    popped_flowcell = backup_api.pop_flowcell(dry_run=False)

    # THEN a flowcell is returned, the status is set to "processing", and status-db is updated with
    # the new status
    assert popped_flowcell is not None
Beispiel #11
0
def backup(context: CGConfig):
    """Backup utilities."""
    pdc_api = PdcApi()
    context.meta_apis["backup_api"] = BackupApi(
        status=context.status_db,
        pdc_api=pdc_api,
        max_flowcells_on_disk=context.max_flowcells or MAX_FLOWCELLS_ON_DISK,
        root_dir=context.backup.root.dict(),
    )
Beispiel #12
0
def fixture_backup_context(cg_context: CGConfig) -> CGConfig:
    cg_context.meta_apis["backup_api"] = BackupApi(
        status=cg_context.status_db,
        pdc_api=PdcApi(),
        max_flowcells_on_disk=cg_context.max_flowcells
        or MAX_FLOWCELLS_ON_DISK,
        root_dir=cg_context.backup.root.dict(),
    )
    return cg_context
def test_fetch_flowcell_retrieve_next_flowcell(
    mock_store,
    mock_pdc,
    mock_check_processing,
    mock_maximum_flowcells_ondisk,
    mock_pop_flowcell,
    caplog,
):
    """tests the fetch_flowcell method of the backup API"""

    caplog.set_level(logging.INFO)

    # GIVEN we check if a flowcell needs to be retrieved from PDC
    backup_api = BackupApi(
        mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir"
    )

    # WHEN no flowcells is specified, but a flowcell in status-db has the status "requested"
    mock_flowcell = None
    mock_pop_flowcell.return_value = mock_store.add_flowcell(
        status="requested",
    )
    backup_api.check_processing.return_value = True
    backup_api.maximum_flowcells_ondisk.return_value = False

    result = backup_api.fetch_flowcell(mock_flowcell, dry_run=False)

    # THEN the process to retrieve the flowcell from PDC is started
    assert "retrieving from PDC" in caplog.text

    # AND when done the status of that flowcell is set to "retrieved"
    assert (
        f'Status for flowcell {mock_pop_flowcell.return_value.name} set to "retrieved"'
        in caplog.text
    )
    assert mock_pop_flowcell.return_value.status == "retrieved"

    # AND status-db is updated with the new status
    assert mock_store.commit.called

    # AND the elapsed time of the retrieval process is returned
    assert result > 0
def test_fetch_flowcell_processing_queue_full(
    mock_store, mock_pdc, mock_flowcell, mock_check_processing, caplog
):
    """tests the fetch_flowcell method of the backup API"""

    caplog.set_level(logging.INFO)

    # GIVEN we check if a flowcell needs to be retrieved from PDC
    backup_api = BackupApi(
        mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir"
    )

    # WHEN the processing queue is full
    backup_api.check_processing.return_value = False
    result = backup_api.fetch_flowcell(mock_flowcell, dry_run=False)

    # THEN no flowcell will be fetched and a log message indicates that the processing queue is
    # full
    assert result is None
    assert "processing queue is full" in caplog.text