Python put_file Examples, covidhub.google.drive.put_file Python Examples

Example #1

0

Show file

File: test_drive_find_file_by_search_terms.py Project: czbiohub/covidhub-pub

def test_find_file_by_search_terms(
        gdrive_service: DriveService,
        gdrive_folder: DriveObject,
        subdir="test_find_file_by_search_terms",
        search_terms=("hello", "world", "who"),
        filenames=("hello____world-who.txt", "hello____world----who.txt"),
):
    """Search for files by search terms."""

    subdir = mkdir(gdrive_service, gdrive_folder.id, subdir)

    put_request = put_file(gdrive_service, subdir.id, filenames[0])
    with put_request as fh:
        fh.write("this is random text 1")

    # find the file and retrieve the contents.
    result = find_file_by_search_terms(gdrive_service, subdir.id, search_terms)
    assert result.id == put_request.id

    # find the file and retrieve the contents.  this should fail because we
    # have an extra term that is not satisfied.
    with pytest.raises(RuntimeError):
        result = find_file_by_search_terms(gdrive_service, subdir.id,
                                           search_terms + ("chicken", ))

    put_request = put_file(gdrive_service, subdir.id, filenames[1])
    with put_request as fh:
        fh.write("this is random text 2")

    # find the file and retrieve the contents.  this should fail because we
    # now have multiple matching files.
    with pytest.raises(RuntimeError):
        result = find_file_by_search_terms(gdrive_service, subdir.id,
                                           search_terms)

Example #2

0

Show file

File: test_drive_find_file_by_name.py Project: czbiohub/covidhub-pub

def test_find_file_by_name_most_recent(
    gdrive_service,
    gdrive_folder: DriveObject,
    filename="test_find_file_by_name_most_recent.txt",
):
    """Puts a file, and then put it again with overwrite_if_present=False.  Finding by
    filename, using require single result mode, should fail.  Finding by the filename,
    using the most recent mode, should find the second file."""
    put_request = put_file(gdrive_service, gdrive_folder.id, filename)
    with put_request as fh:
        fh.write("first")

    put_request = put_file(gdrive_service,
                           gdrive_folder.id,
                           filename,
                           overwrite_if_present=False)
    with put_request as fh:
        fh.write("second")

    with pytest.raises(MultipleMatchesError):
        find_file_by_name(gdrive_service, gdrive_folder.id, filename)

    id = find_file_by_name(gdrive_service, gdrive_folder.id, filename,
                           FindMode.MOST_RECENTLY_MODIFIED).id
    assert id == put_request.id

    with pytest.raises(NoMatchesError):
        find_file_by_name(
            gdrive_service,
            gdrive_folder.id,
            "this_file_does_not_exist",
            FindMode.MOST_RECENTLY_MODIFIED,
        )

Example #3

0

Show file

 def write_marker_file(self, sample_barcode):
     """Writes a marker file for the given results"""
     marker_folder_id = drive.get_folder_id_of_path(
         self.drive_service, self.cfg.ACCESSSION_TRACKING_MARKERS_FOLDER)
     with drive.put_file(self.drive_service, marker_folder_id,
                         sample_barcode):
         ...

Example #4

0

Show file

def create_layout_pdf(cfg: Config, entry_data: Dict[str, str]):
    """Main function to read a layout file and write the resulting plate layout map.

    Parameters
    ----------
    cfg: Config
        configuration information
    entry_data: Dict[str, str]
        dictionary containing the response that was submitted to Sample Plate Metada.
        The required keys are: the researcher, timestamp, sample plate barcode, and a
        link to the sample plate map in Google Drive. Optionally, the "local_run" key
        is used as a flag to indicate the script is being run from the command line
        rather than on AWS.
    """
    sample_barcode = entry_data[SampleMetadata.SAMPLE_PLATE_BARCODE]
    output_filename = f"{sample_barcode}.pdf"

    if LOCAL_RUN in entry_data:
        output_path, drive_service = entry_data[LOCAL_RUN]
        output_file_object = (output_path / output_filename).open("wb")
    else:
        logger.debug("getting gdrive credentials")
        google_creds = gutils.get_secrets_manager_credentials()
        drive_service = drive.get_service(google_creds)

        processed_layout_folder_id = drive.get_folder_id_of_path(
            drive_service, cfg.LAYOUT_PDF_FOLDER)

        output_file_object = drive.put_file(
            drive_service,
            processed_layout_folder_id,
            output_filename,
            binary=True,
        )

    try:
        plate_map_file = drive.get_layout_file_from_url(
            drive_service, entry_data[SampleMetadata.SAMPLE_PLATE_MAP])
    except KeyError:
        raise BadDriveURL(
            f"Bad URL in {SampleMetadata.SHEET_NAME} for {sample_barcode}")

    plate_map_type = accession.get_plate_map_type_from_name(
        plate_map_file.name)

    with plate_map_file.open() as fh:
        accession_data = accession.read_accession_data(plate_map_type, fh)

    logger.info(f"Writing layout map to {output_filename}")
    with output_file_object as output_fh:
        format_pdf(
            entry_data[SampleMetadata.SAMPLE_PLATE_BARCODE],
            accession_data,
            entry_data[SampleMetadata.RESEARCHER_NAME],
            format_time(cfg, entry_data[SampleMetadata.TIMESTAMP]),
            output_fh,
        )

Example #5

0

Show file

def test_get_contents_by_folder_id_paging(
    gdrive_service: DriveService,
    gdrive_folder: DriveObject,
    new_folder_name="test_get_contents_by_folder_id_paging",
):
    """Verify that get_contents_by_folder_id can iterate through pages of results correctly."""
    subdir = mkdir(gdrive_service, gdrive_folder.id, new_folder_name)

    # put two files there.
    with put_file(gdrive_service, subdir.id, "file0") as fh:
        fh.write(b"this is a file")
    with put_file(gdrive_service, subdir.id, "file1") as fh:
        fh.write(b"this is a file")

    # even with page_size=1, we should be able to retrieve all the results.
    results = get_contents_by_folder_id(
        gdrive_service, subdir.id, only_files=True, page_size=1
    )
    assert len(results) == 2

Example #6

0

Show file

def test_put_overwrite_multiple(
    gdrive_service: DriveService,
    gdrive_folder: DriveObject,
    filename="test_put_overwrite_multiple.txt",
):
    """Test the case where we are overwriting and there are multiple files we
    could possibly overwrite.  It should overwrite the newest file."""
    put_request = put_file(gdrive_service, gdrive_folder.id, filename)
    with put_request as fh:
        fh.write("first")
    first_id = put_request.id

    put_request = put_file(gdrive_service,
                           gdrive_folder.id,
                           filename,
                           overwrite_if_present=False)
    with put_request as fh:
        fh.write("second")
    second_id = put_request.id

    put_request = put_file(gdrive_service,
                           gdrive_folder.id,
                           filename,
                           overwrite_if_present=True)
    with put_request as fh:
        fh.write("third")
    assert put_request.id == second_id

    listing = get_contents_by_folder_id(gdrive_service,
                                        gdrive_folder.id,
                                        only_files=True)
    matching_listings = [entry for entry in listing if entry.name == filename]
    assert len(matching_listings) == 2

    with get_file(gdrive_service, first_id, True) as fh:
        assert fh.read() == b"first"
    with get_file(gdrive_service, second_id, False) as fh:
        assert fh.read() == "third"

Example #7

0

Show file

def test_put_no_overwrite(
    gdrive_service: DriveService,
    gdrive_folder: DriveObject,
    filename="test_put_no_overwrite.txt",
):
    """Puts a file, and then put it again with overwrite_if_present=False.
    Both files should be found."""
    put_request = put_file(gdrive_service, gdrive_folder.id, filename)
    with put_request as fh:
        fh.write("first")

    put_request = put_file(gdrive_service,
                           gdrive_folder.id,
                           filename,
                           overwrite_if_present=False)
    with put_request as fh:
        fh.write("second")

    listing = get_contents_by_folder_id(gdrive_service,
                                        gdrive_folder.id,
                                        only_files=True)
    matching_listings = [entry for entry in listing if entry.name == filename]
    assert len(matching_listings) == 2

Example #8

0

Show file

def test_put_guess_mimetype(
    gdrive_service: DriveService,
    gdrive_folder: DriveObject,
    filename="test_put_guess_mimetype.txt",
):
    """Puts a file, guessing the mimetype from the filename, and ensure we can
    see the file."""
    put_request = put_file(gdrive_service, gdrive_folder.id, filename)
    with put_request as fh:
        fh.write("hello world")

    # find the file and retrieve the contents.
    with get_file_by_name(gdrive_service, gdrive_folder.id, filename) as fh:
        assert fh.read() == "hello world"

Example #9

0

Show file

File: test_drive_find_file_by_search_terms.py Project: czbiohub/covidhub-pub

def test_find_file_by_search_terms_exclude_contents(
        gdrive_service: DriveService,
        gdrive_folder: DriveObject,
        subdir="test_find_file_by_search_terms_exclude_contents",
        search_terms=("hello", "world", "who"),
        filenames=("keywords_not_in_filename.txt",
                   "hello____world----who.txt"),
):
    """Search for files by search terms."""

    subdir = mkdir(gdrive_service, gdrive_folder.id, subdir)

    put_request = put_file(gdrive_service, subdir.id, filenames[0])
    with put_request as fh:
        fh.write(" ".join(search_terms))

    put_request = put_file(gdrive_service, subdir.id, filenames[1])
    with put_request as fh:
        fh.write("this is random text 2")

    # find the file and retrieve the contents.
    result = find_file_by_search_terms(gdrive_service, subdir.id, search_terms)
    assert result.id == put_request.id

Example #10

0

Show file

File: test_drive_find_file_by_name.py Project: czbiohub/covidhub-pub

def test_find_file_by_name(
    gdrive_service,
    gdrive_folder: DriveObject,
    new_filename="find_file_by_name.txt",
):
    """Tests that we can search for a file by name successfully and unsuccessfully."""
    with pytest.raises(RuntimeError):
        find_file_by_name(gdrive_service, gdrive_folder.id, new_filename)
    put_request = put_file(gdrive_service, gdrive_folder.id, new_filename)
    with put_request as fh:
        fh.write("this is random text 1")

    found_file_id = find_file_by_name(gdrive_service, gdrive_folder.id,
                                      new_filename).id
    assert put_request.id == found_file_id

Example #11

0

Show file

def test_put_read_after(
    gdrive_service: DriveService,
    gdrive_folder: DriveObject,
    filename="test_put_read_after.txt",
):
    """Puts a file and ensure we can read what was written to the file."""
    put_request = put_file(gdrive_service, gdrive_folder.id, filename)
    with put_request as fh:
        fh.write("hello world")

    # find the file and retrieve the contents.
    with get_file_by_name(gdrive_service, gdrive_folder.id, filename) as fh:
        assert fh.read() == "hello world"

    with put_request as fh:
        assert fh.read() == "hello world"

Example #12

0

Show file

def test_put_enforce_binary(
    gdrive_service: DriveService,
    gdrive_folder: DriveObject,
    filename="test_put_enforce_binary.txt",
):
    """Puts a file, and ensure we can see the file."""
    put_request = put_file(gdrive_service,
                           gdrive_folder.id,
                           filename,
                           binary=True)
    with put_request as fh:
        fh.write(b"hello world")

    # find the file and retrieve the contents.  because the content-type is
    # guessed, we still get back a text file.
    with get_file_by_name(gdrive_service, gdrive_folder.id, filename) as fh:
        assert fh.read() == "hello world"

Example #13

0

Show file

def test_get_put_has_name(
    gdrive_service: DriveService,
    gdrive_folder: DriveObject,
    filename="test_get_put_has_name.txt",
):
    """Puts and gets files.  Ensure they both have sane 'name' fields for the file
    handle."""
    put_request = put_file(gdrive_service, gdrive_folder.id, filename)
    with put_request as fh:
        fh.write("hello world")
        assert fh.name is not None
    assert put_request.fh.name is not None

    # find the file and retrieve the contents.
    with get_file_by_name(gdrive_service, gdrive_folder.id, filename) as fh:
        assert fh.read() == "hello world"
        assert fh.name is not None

Example #14

0

Show file

File: test_drive_find_file_by_name.py Project: czbiohub/covidhub-pub

def test_find_file_by_name_not_dir(
    gdrive_service,
    gdrive_folder: DriveObject,
    new_filename="find_file_by_name_not_dir.txt",
):
    """Tests that we can search for a file and not get matched to a directory that is present."""
    mkdir(gdrive_service, gdrive_folder.id, new_filename)
    with pytest.raises(RuntimeError):
        find_file_by_name(gdrive_service, gdrive_folder.id, new_filename)

    # now we put a file with the same name (yeah, google drive is weird in that this is permitted)
    put_request = put_file(gdrive_service, gdrive_folder.id, new_filename)
    with put_request as fh:
        fh.write("this is random text 1")

    found_file_id = find_file_by_name(gdrive_service, gdrive_folder.id,
                                      new_filename).id
    assert put_request.id == found_file_id

Example #15

0

Show file

def test_put_override_content_type(
    gdrive_service: DriveService,
    gdrive_folder: DriveObject,
    filename="test_put_override_content_type.txt",
):
    """Puts a file, and ensure we can see the file."""
    put_request = put_file(
        gdrive_service,
        gdrive_folder.id,
        filename,
        content_type="application/i-made-this-up",
    )
    with put_request as fh:
        fh.write(b"hello world")

    # find the file and retrieve the contents.  because the content-type is
    # set to a binary one, we should get back binary data.
    with get_file_by_name(gdrive_service, gdrive_folder.id, filename) as fh:
        assert fh.read() == b"hello world"

Example #16

0

Show file

def test_get_put_http_client(
    gdrive_service: DriveService,
    gdrive_folder: DriveObject,
    filename="test_get_put_http_client.txt",
):
    """Puts a file.  Then retrieve the file using a custom HTTP client."""
    put_request = put_file(gdrive_service, gdrive_folder.id, filename)
    with put_request as fh:
        fh.write("hello world")
        assert fh.name is not None
    assert put_request.fh.name is not None

    # instantiate a new http client with the same credentials.
    http = new_http_client_from_service(gdrive_service)
    # find the file and retrieve the contents.
    with get_file_by_name(gdrive_service,
                          gdrive_folder.id,
                          filename,
                          http=http) as fh:
        assert fh.read() == "hello world"
        assert fh.name is not None

Example #17

0

Show file

File: conftest.py Project: czbiohub/covidhub-pub

def gdrive_hamilton_folder(
    gdrive_service, gdrive_folder, hamilton_plate_layout, sample_plate_barcode
):
    """This fixture creates a WellLit plate layout file and then removes it at
    the end."""
    put_request = put_file(
        gdrive_service,
        gdrive_folder.id,
        f"test_04082020-173053_{sample_plate_barcode}_hamilton - Hanna Retallack.csv",
        binary=False,
    )

    hamilton_plate_layout.seek(0)
    with put_request as fh:
        for line in hamilton_plate_layout:
            fh.write(line)

    yield gdrive_folder.id

    gdrive_service.files().delete(fileId=put_request.id).execute(
        num_retries=NUM_RETRIES
    )

Example #18

0

Show file

def test_get_contents_by_folder_id_types(
    gdrive_service: DriveService,
    gdrive_folder: DriveObject,
    new_folder_name="test_get_contents_by_folder_id_types",
):
    """Verify that get_contents_by_folder_id can filter by only files correctly."""
    subdir = mkdir(gdrive_service, gdrive_folder.id, new_folder_name)

    # put a file and subdirectory there.
    with put_file(gdrive_service, subdir.id, "file") as fh:
        fh.write(b"this is a file")

    mkdir(gdrive_service, subdir.id, "another-subdir")

    results = get_contents_by_folder_id(gdrive_service, subdir.id, only_files=True)
    assert len(results) == 1
    assert results[0].name == "file"

    results = get_contents_by_folder_id(gdrive_service, subdir.id, only_files=False)
    assert len(results) == 2
    assert any(result.name == "file" for result in results)
    assert any(result.name == "another-subdir" for result in results)

Example #19

0

Show file

File: conftest.py Project: czbiohub/covidhub-pub

def gdrive_legacy_folder(
    gdrive_service, gdrive_folder, legacy_plate_layout, sample_plate_barcode
):
    """This fixture creates a legacy plate layout file and then removes it at
    the end."""
    put_request = put_file(
        gdrive_service,
        gdrive_folder.id,
        f"test_welllit_{sample_plate_barcode}_plate_layout.xlsx",
        binary=True,
    )

    legacy_plate_layout.seek(0)
    with put_request as fh:
        for line in legacy_plate_layout:
            fh.write(line)

    yield gdrive_folder.id

    gdrive_service.files().delete(fileId=put_request.id).execute(
        num_retries=NUM_RETRIES
    )

Example #20

0

Show file

File: processing.py Project: czbiohub/covidhub-pub

def processing(cfg: Config, google_credentials: service_account.Credentials):
    git_info = get_git_info()
    drive_service = drive.get_service(google_credentials)
    logger.info(msg=f"Starting processing loop with code version: {git_info}")

    # qpcr logs folder
    logs_folder_id = drive.get_folder_id_of_path(drive_service,
                                                 cfg.PCR_LOGS_FOLDER)

    # markers folder
    markers_folder_id = drive.get_folder_id_of_path(drive_service,
                                                    cfg.PCR_MARKERS_FOLDER)

    # csv results folder
    csv_results_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.CSV_RESULTS_FOLDER)

    # CB rad results folder
    cb_report_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.CHINA_BASIN_CSV_REPORTS_FOLDER)

    # final reports folder
    final_results_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.FINAL_REPORTS_FOLDER)

    # get the collection spreadsheet
    collective_form = CollectiveForm(
        drive_service, cfg["DATA"]["collection_form_spreadsheet_id"])

    logs_folder_contents = drive.get_contents_by_folder_id(drive_service,
                                                           logs_folder_id,
                                                           only_files=True)
    marker_folder_contents = drive.get_contents_by_folder_id(drive_service,
                                                             markers_folder_id,
                                                             only_files=True)
    plate_layout_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.PLATE_LAYOUT_FOLDER)
    completed_barcodes = set(marker_folder_entry.name
                             for marker_folder_entry in marker_folder_contents)

    sample_metadata_form = collective_form[SampleMetadata.SHEET_NAME]
    rerun_form = collective_form[SampleRerun.SHEET_NAME]

    # group log file entries by barcode
    logger.info(msg="Checking for samples to process")

    barcodes_to_process = defaultdict(RunFiles)
    for entry in logs_folder_contents:
        m = RunFiles.get_qpcr_file_type(entry.name)
        if m is None or m[RunFiles.BARCODE] in completed_barcodes:
            continue
        else:
            barcodes_to_process[m[RunFiles.BARCODE]].add_file(m, entry)

    for barcode, barcode_files in barcodes_to_process.items():
        # all files must be present, at least one quant_amp file
        if not barcode_files.all_files:
            message = f"Missing files for: {barcode}. Skipping for now"
            logger.critical(msg=message, extra={"notify_slack": True})
            continue

        try:
            logger.info(msg=f"Found sample to process, barcode: {barcode}")

            logger.info(msg=f"Getting metadata and data for: {barcode}")
            bravo_metadata = BravoMetadata.load_from_spreadsheet(
                barcode,
                collective_form,
            )
            if bravo_metadata.sop_protocol is None:
                message = f"Skipping sample plate: {barcode}, no protocol"
                logger.critical(msg=message, extra={"notify_slack": True})
                continue

            protocol = get_protocol(bravo_metadata.sop_protocol)

            if not set(barcode_files.quant_amp).issuperset(protocol.mapping):
                missing = map(
                    str,
                    set(protocol.mapping) - set(barcode_files.quant_amp))
                message = f"Missing quant amp files for {barcode}: {', '.join(missing)}"
                logger.critical(msg=message, extra={"notify_slack": True})
                continue

            # process well data and check controls, return results
            logger.info(
                msg=f"Processing well data and controls for: {barcode}")
            accession_data = accession.get_accession_data_with_rerun(
                drive_service,
                plate_layout_folder_id,
                sample_metadata_form,
                rerun_form,
                bravo_metadata.sample_barcode,
            )

            control_wells = get_control_wells_from_type(
                controls_type=bravo_metadata.controls_type,
                accession_data=accession_data,
            )
            update_accession_data_with_controls(control_wells, accession_data,
                                                barcode)

            processing_results = process_barcode(
                cfg,
                barcode,
                barcode_files,
                bravo_metadata,
                protocol,
                control_wells,
                accession_data,
            )

            with drive.put_file(
                    drive_service,
                    csv_results_folder_id,
                    processing_results.results_filename,
            ) as fh:
                processing_results.write_results(fh)

            china_basin_result_file = drive.put_file(
                drive_service,
                cb_report_folder_id,
                processing_results.cb_report_filename,
            )
            with china_basin_result_file as fh:
                processing_results.write_cb_report(fh)

            # create pdf report
            logger.info(
                msg=f"Generating and uploading results PDF for: {barcode}")
            final_pdf = io.BytesIO()
            create_final_pdf(processing_results, final_pdf)
            pdf_results_file = drive.put_file(
                drive_service,
                final_results_folder_id,
                processing_results.final_pdf_filename,
            )
            with pdf_results_file as out_fh:
                out_fh.write(final_pdf.getvalue())

            logger.info(msg=f"Sending email report: {barcode}")
            mail.send_email(
                google_credentials,
                sender=cfg["EMAIL"].get("sender"),
                recipients=cfg["EMAIL"].get("recipients"),
                subject=_format_email_subject(
                    sample_barcode=bravo_metadata.sample_barcode,
                    qpcr_barcode=barcode,
                ),
                body=_format_email_body(
                    sample_barcode=bravo_metadata.sample_barcode,
                    results_file_id=china_basin_result_file.id,
                ),
                attachments={processing_results.final_pdf_filename: final_pdf},
            )

            message = (
                f"Processed sample plate: {bravo_metadata.sample_barcode}-{barcode}"
                f" using rev {git_info}")
            logger.critical(msg=message, extra={"notify_slack": True})
            # write a marker so we don't process this file again.
            processing_results.write_marker_file(drive_service,
                                                 markers_folder_id)

        except Exception as err:
            logger.critical(f"Error in [{cfg.aws_env}]: {err}",
                            extra={"notify_slack": True})
            logger.exception("Details:")

Example #21

0

Show file

File: test_processing.py Project: czbiohub/covidhub-pub

def test_processing(gdrive_service: DriveService, gdrive_folder: DriveObject):
    """Test to validate the gdrive processing pipeline.  This test has four phases:
    1. Create directories on a test gdrive space for the files that are staged.
    2. Stage the files so that it mimics the production environment.  This includes
       the logs, the plate layout files, and the control layout files.
    3. Run the main processsing loop.
    4. Verify that the output csv files are correct.
    5. Verify that the expected markers and PDFs are present.

    If this test fails, verify that all the data necessary to run the pipeline is staged
    correctly."""

    ####################################################################################
    # STEP 1: Create directories on a test gdrive space for the files that are staged.

    # set up the test space
    cfg = AlternateGDriveConfig(gdrive_folder.name)

    # make the necessary input folders
    logs_folder_id = mkdir_recursive(gdrive_service, "root",
                                     cfg.PCR_LOGS_FOLDER)
    plate_layout_folder_id = mkdir_recursive(gdrive_service, "root",
                                             cfg.PLATE_LAYOUT_FOLDER)

    # make the necessary output folders
    mkdir_recursive(gdrive_service, "root", cfg.CSV_RESULTS_FOLDER)
    cb_csv_folder_id = mkdir_recursive(gdrive_service, "root",
                                       cfg.CHINA_BASIN_CSV_REPORTS_FOLDER)
    final_results_folder_id = mkdir_recursive(gdrive_service, "root",
                                              cfg.FINAL_REPORTS_FOLDER)
    markers_folder_id = mkdir_recursive(gdrive_service, "root",
                                        cfg.PCR_MARKERS_FOLDER)

    ####################################################################################
    # STEP 2: Stage the files.

    # copy all the files to the appropriate places
    for filename in LOG_FILES:
        mode = f"r{file_mode(filename)}"
        with (EXAMPLE_FILE_DIR / filename).open(mode) as src_fh, put_file(
                gdrive_service, logs_folder_id, filename) as dst_fh:
            dst_fh.write(src_fh.read())

    for filename in PLATE_LAYOUT_FILES:
        mode = f"r{file_mode(filename)}"
        with (EXAMPLE_FILE_DIR / filename).open(mode) as src_fh, put_file(
                gdrive_service, plate_layout_folder_id, filename) as dst_fh:
            dst_fh.write(src_fh.read())

    ####################################################################################
    # STEP 3: Run the processing pipeline

    processing(cfg, credentials_for_tests())

    ####################################################################################
    # STEP 4: Verify the csv files.

    for remote_filename, local_filename in EXPECTED_CSV_FILES.items():
        with (EXAMPLE_FILE_DIR /
              local_filename).open("r") as t1, get_file_by_name(
                  gdrive_service, cb_csv_folder_id, remote_filename) as t2:
            rdr1 = csv.reader(t1)
            rdr2 = csv.reader(t2)
            for row1, row2, in zip(rdr1, rdr2):
                assert (
                    row1 == row2
                ), f"mismatch between {local_filename} and {remote_filename}"

    ####################################################################################
    # STEP 5: Verify that markers and PDFs were created and uploaded

    marker_folder_contents = get_contents_by_folder_id(gdrive_service,
                                                       markers_folder_id,
                                                       only_files=True)
    marker_set = {
        marker_folder_entry.name
        for marker_folder_entry in marker_folder_contents
    }

    assert marker_set == EXPECTED_MARKERS

    pdf_folder_contents = get_contents_by_folder_id(gdrive_service,
                                                    final_results_folder_id,
                                                    only_files=True)
    pdf_report_set = {
        pdf_folder_entry.name
        for pdf_folder_entry in pdf_folder_contents
    }

    assert pdf_report_set == EXPECTED_PDFS

Example #22

0

Show file

File: processing_results.py Project: czbiohub/covidhub-pub

 def write_marker_file(self, drive_service, markers_folder_id):
     """Writes a marker file for the given results"""
     with drive.put_file(
         drive_service, markers_folder_id, self.bravo_metadata.pcr_barcode
     ):
         ...