Beispiel #1
0
 def write_marker_file(self, sample_barcode):
     """Writes a marker file for the given results"""
     marker_folder_id = drive.get_folder_id_of_path(
         self.drive_service, self.cfg.ACCESSSION_TRACKING_MARKERS_FOLDER)
     with drive.put_file(self.drive_service, marker_folder_id,
                         sample_barcode):
         ...
Beispiel #2
0
def create_layout_pdf(cfg: Config, entry_data: Dict[str, str]):
    """Main function to read a layout file and write the resulting plate layout map.

    Parameters
    ----------
    cfg: Config
        configuration information
    entry_data: Dict[str, str]
        dictionary containing the response that was submitted to Sample Plate Metada.
        The required keys are: the researcher, timestamp, sample plate barcode, and a
        link to the sample plate map in Google Drive. Optionally, the "local_run" key
        is used as a flag to indicate the script is being run from the command line
        rather than on AWS.
    """
    sample_barcode = entry_data[SampleMetadata.SAMPLE_PLATE_BARCODE]
    output_filename = f"{sample_barcode}.pdf"

    if LOCAL_RUN in entry_data:
        output_path, drive_service = entry_data[LOCAL_RUN]
        output_file_object = (output_path / output_filename).open("wb")
    else:
        logger.debug("getting gdrive credentials")
        google_creds = gutils.get_secrets_manager_credentials()
        drive_service = drive.get_service(google_creds)

        processed_layout_folder_id = drive.get_folder_id_of_path(
            drive_service, cfg.LAYOUT_PDF_FOLDER)

        output_file_object = drive.put_file(
            drive_service,
            processed_layout_folder_id,
            output_filename,
            binary=True,
        )

    try:
        plate_map_file = drive.get_layout_file_from_url(
            drive_service, entry_data[SampleMetadata.SAMPLE_PLATE_MAP])
    except KeyError:
        raise BadDriveURL(
            f"Bad URL in {SampleMetadata.SHEET_NAME} for {sample_barcode}")

    plate_map_type = accession.get_plate_map_type_from_name(
        plate_map_file.name)

    with plate_map_file.open() as fh:
        accession_data = accession.read_accession_data(plate_map_type, fh)

    logger.info(f"Writing layout map to {output_filename}")
    with output_file_object as output_fh:
        format_pdf(
            entry_data[SampleMetadata.SAMPLE_PLATE_BARCODE],
            accession_data,
            entry_data[SampleMetadata.RESEARCHER_NAME],
            format_time(cfg, entry_data[SampleMetadata.TIMESTAMP]),
            output_fh,
        )
def test_mkdir(
    gdrive_service: DriveService,
    gdrive_folder: DriveObject,
    new_folder_name="mkdir-test",
):
    subdir = mkdir(gdrive_service, gdrive_folder.id, new_folder_name)
    traversed_folder_id = get_folder_id_of_path(
        gdrive_service, [gdrive_folder.name, new_folder_name]
    )
    assert subdir.id == traversed_folder_id
 def __init__(self, drive_service, cfg):
     self.completed_pcr_barcodes = get_completed_pcr_barcodes(
         drive_service, cfg)
     self.results_folder_id = drive.get_folder_id_of_path(
         drive_service, cfg.CSV_RESULTS_FOLDER_TRACKING)
     self.accession_folder_id = drive.get_folder_id_of_path(
         drive_service, cfg.PLATE_LAYOUT_FOLDER)
     self.accession_locations = get_accession_locations(drive_service, cfg)
     self.mark_as_processed_sample_barcodes = get_mark_as_processed_sample_barcodes(
         drive_service, cfg)
     self.accessions_sheet = cfg["DATA"].get("accession_tracking_sheet")
     self.clin_lab_sheet = cfg["DATA"].get("clin_lab_reporting_sheet")
     self.supervisor_plate_queue_sheet = cfg["DATA"].get(
         "supervisor_plate_queue_sheet")
     form_responses = CollectiveForm(
         drive_service, cfg["DATA"].get("collection_form_spreadsheet_id"))
     self.registered_df = form_responses[SampleRegistration.SHEET_NAME]
     self.bravo_rna_df = form_responses[BravoRNAExtraction.SHEET_NAME]
     self.check_in_df = form_responses[FridgeCheckin.SHEET_NAME]
     self.starting_bravo_df = form_responses[BravoStart.SHEET_NAME]
     self.freezer_check_in_df = form_responses[FreezerCheckin.SHEET_NAME]
def get_already_tracked_samples(drive_service, cfg):
    """Return a list of processed qPCR barcodes by checking our marker file folder"""
    marker_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.ACCESSSION_TRACKING_MARKERS_FOLDER)

    marker_folder_contents = drive.get_contents_by_folder_id(drive_service,
                                                             marker_folder_id,
                                                             only_files=True)

    completed_barcodes = set(marker_folder_entry.name
                             for marker_folder_entry in marker_folder_contents)
    return completed_barcodes
Beispiel #6
0
 def __init__(
     self,
     session: Session,
     drive_service: drive.DriveService,
     folder_path_components: List[str] = None,
 ):
     super().__init__(session=session)
     self.session = session
     self.drive_service = drive_service
     self.folder_path_components = folder_path_components
     self.folder_id = drive.get_folder_id_of_path(
         self.drive_service, self.folder_path_components)
     self.data = self.initialize_data_from_source()
def test_mkdir_recursive(
    gdrive_service: DriveService,
    gdrive_folder: DriveObject,
    new_folder_name="mkdir-recursive",
):
    new_path_components = [new_folder_name, "abc", "abc", "def"]
    full_path = [gdrive_folder.name] + new_path_components
    subdir_id = mkdir_recursive(gdrive_service, gdrive_folder.id, new_path_components)
    lookup = get_folder_id_of_path(gdrive_service, full_path)

    assert subdir_id == lookup

    second_subdir_id = mkdir_recursive(
        gdrive_service, gdrive_folder.id, new_path_components
    )

    assert second_subdir_id == lookup
Beispiel #8
0
def fetch_barcodes(args, cfg):
    google_credentials = gutils.get_secrets_manager_credentials(args.secret_id)
    drive_service = drive.get_service(google_credentials)

    # qpcr logs folder
    logs_folder_id = drive.get_folder_id_of_path(drive_service,
                                                 cfg.PCR_LOGS_FOLDER)
    logs_folder_contents = drive.get_contents_by_folder_id(drive_service,
                                                           logs_folder_id,
                                                           only_files=True)

    barcodes_to_fetch = defaultdict(RunFiles)
    for entry in logs_folder_contents:
        m = RunFiles.get_qpcr_file_type(entry.name)
        if m is None:
            continue
        elif m[RunFiles.BARCODE] in args.barcodes:
            barcodes_to_fetch[m[RunFiles.BARCODE]].add_file(m, entry)

    for barcode, barcode_files in barcodes_to_fetch.items():
        # all files must be present, at least one quant_amp file
        if not barcode_files.all_files:
            logger.warning(msg=f"Missing files for {barcode}!")
            continue

        logger.info(msg=f"Found sample to fetch: {barcode}")

        # read in the run information and quant cq
        run_info = barcode_files.run_info
        logger.info(msg=f"    Downloading: {run_info.name}")
        with drive.get_file(drive_service, run_info.id, binary=False) as fh:
            with (args.output_dir / run_info.name).open("w") as out:
                out.write(fh.read())

        quant_cq = barcode_files.quant_cq
        logger.info(msg=f"    Downloading: {quant_cq.name}")
        with drive.get_file(drive_service, quant_cq.id, binary=False) as fh:
            with (args.output_dir / quant_cq.name).open("w") as out:
                out.write(fh.read())

        for quant_amp in barcode_files.quant_amp.values():
            logger.info(msg=f"    Downloading: {quant_amp.name}")
            with drive.get_file(drive_service, quant_amp.id,
                                binary=False) as fh:
                with (args.output_dir / quant_amp.name).open("w") as out:
                    out.write(fh.read())
def get_accession_locations(drive_service, cfg) -> Dict[str, Tuple[str, str]]:
    """return a mapping between accession ID's and their origin location"""
    accession_locations = {}
    accession_location_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.ACCESSION_LOCATIONS_FOLDER)
    accession_location_files = drive.get_contents_by_folder_id(
        drive_service, accession_location_folder_id, only_files=True)
    for accession_location_file in accession_location_files:
        with drive.get_file(drive_service, accession_location_file.id) as fh:
            accession_location_reader = csv.reader(fh, delimiter=",")
            for row in accession_location_reader:
                if row[0] == "Accession":
                    # header row
                    continue
                submitter_id = ""
                if len(row) == 3:
                    accession, location, submitter_id = row
                else:
                    accession, location = row
                accession_locations[accession] = location, submitter_id
    return accession_locations
def test_get_folder_id_of_path_one_level(gdrive_service: DriveService,
                                         gdrive_folder: DriveObject):
    """Tests that get_folder_id_of_path works with an unnested folder."""
    retrieved_folder_id = get_folder_id_of_path(gdrive_service,
                                                [gdrive_folder.name])
    assert retrieved_folder_id == gdrive_folder.id
Beispiel #11
0
def processing(cfg: Config, google_credentials: service_account.Credentials):
    git_info = get_git_info()
    drive_service = drive.get_service(google_credentials)
    logger.info(msg=f"Starting processing loop with code version: {git_info}")

    # qpcr logs folder
    logs_folder_id = drive.get_folder_id_of_path(drive_service,
                                                 cfg.PCR_LOGS_FOLDER)

    # markers folder
    markers_folder_id = drive.get_folder_id_of_path(drive_service,
                                                    cfg.PCR_MARKERS_FOLDER)

    # csv results folder
    csv_results_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.CSV_RESULTS_FOLDER)

    # CB rad results folder
    cb_report_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.CHINA_BASIN_CSV_REPORTS_FOLDER)

    # final reports folder
    final_results_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.FINAL_REPORTS_FOLDER)

    # get the collection spreadsheet
    collective_form = CollectiveForm(
        drive_service, cfg["DATA"]["collection_form_spreadsheet_id"])

    logs_folder_contents = drive.get_contents_by_folder_id(drive_service,
                                                           logs_folder_id,
                                                           only_files=True)
    marker_folder_contents = drive.get_contents_by_folder_id(drive_service,
                                                             markers_folder_id,
                                                             only_files=True)
    plate_layout_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.PLATE_LAYOUT_FOLDER)
    completed_barcodes = set(marker_folder_entry.name
                             for marker_folder_entry in marker_folder_contents)

    sample_metadata_form = collective_form[SampleMetadata.SHEET_NAME]
    rerun_form = collective_form[SampleRerun.SHEET_NAME]

    # group log file entries by barcode
    logger.info(msg="Checking for samples to process")

    barcodes_to_process = defaultdict(RunFiles)
    for entry in logs_folder_contents:
        m = RunFiles.get_qpcr_file_type(entry.name)
        if m is None or m[RunFiles.BARCODE] in completed_barcodes:
            continue
        else:
            barcodes_to_process[m[RunFiles.BARCODE]].add_file(m, entry)

    for barcode, barcode_files in barcodes_to_process.items():
        # all files must be present, at least one quant_amp file
        if not barcode_files.all_files:
            message = f"Missing files for: {barcode}. Skipping for now"
            logger.critical(msg=message, extra={"notify_slack": True})
            continue

        try:
            logger.info(msg=f"Found sample to process, barcode: {barcode}")

            logger.info(msg=f"Getting metadata and data for: {barcode}")
            bravo_metadata = BravoMetadata.load_from_spreadsheet(
                barcode,
                collective_form,
            )
            if bravo_metadata.sop_protocol is None:
                message = f"Skipping sample plate: {barcode}, no protocol"
                logger.critical(msg=message, extra={"notify_slack": True})
                continue

            protocol = get_protocol(bravo_metadata.sop_protocol)

            if not set(barcode_files.quant_amp).issuperset(protocol.mapping):
                missing = map(
                    str,
                    set(protocol.mapping) - set(barcode_files.quant_amp))
                message = f"Missing quant amp files for {barcode}: {', '.join(missing)}"
                logger.critical(msg=message, extra={"notify_slack": True})
                continue

            # process well data and check controls, return results
            logger.info(
                msg=f"Processing well data and controls for: {barcode}")
            accession_data = accession.get_accession_data_with_rerun(
                drive_service,
                plate_layout_folder_id,
                sample_metadata_form,
                rerun_form,
                bravo_metadata.sample_barcode,
            )

            control_wells = get_control_wells_from_type(
                controls_type=bravo_metadata.controls_type,
                accession_data=accession_data,
            )
            update_accession_data_with_controls(control_wells, accession_data,
                                                barcode)

            processing_results = process_barcode(
                cfg,
                barcode,
                barcode_files,
                bravo_metadata,
                protocol,
                control_wells,
                accession_data,
            )

            with drive.put_file(
                    drive_service,
                    csv_results_folder_id,
                    processing_results.results_filename,
            ) as fh:
                processing_results.write_results(fh)

            china_basin_result_file = drive.put_file(
                drive_service,
                cb_report_folder_id,
                processing_results.cb_report_filename,
            )
            with china_basin_result_file as fh:
                processing_results.write_cb_report(fh)

            # create pdf report
            logger.info(
                msg=f"Generating and uploading results PDF for: {barcode}")
            final_pdf = io.BytesIO()
            create_final_pdf(processing_results, final_pdf)
            pdf_results_file = drive.put_file(
                drive_service,
                final_results_folder_id,
                processing_results.final_pdf_filename,
            )
            with pdf_results_file as out_fh:
                out_fh.write(final_pdf.getvalue())

            logger.info(msg=f"Sending email report: {barcode}")
            mail.send_email(
                google_credentials,
                sender=cfg["EMAIL"].get("sender"),
                recipients=cfg["EMAIL"].get("recipients"),
                subject=_format_email_subject(
                    sample_barcode=bravo_metadata.sample_barcode,
                    qpcr_barcode=barcode,
                ),
                body=_format_email_body(
                    sample_barcode=bravo_metadata.sample_barcode,
                    results_file_id=china_basin_result_file.id,
                ),
                attachments={processing_results.final_pdf_filename: final_pdf},
            )

            message = (
                f"Processed sample plate: {bravo_metadata.sample_barcode}-{barcode}"
                f" using rev {git_info}")
            logger.critical(msg=message, extra={"notify_slack": True})
            # write a marker so we don't process this file again.
            processing_results.write_marker_file(drive_service,
                                                 markers_folder_id)

        except Exception as err:
            logger.critical(f"Error in [{cfg.aws_env}]: {err}",
                            extra={"notify_slack": True})
            logger.exception("Details:")
Beispiel #12
0
def parse_qpcr_csv(args):
    cfg = Config()
    create_logger(cfg, debug=args.debug)

    logger.info(msg=f"Started local processing in: {args.qpcr_run_path}")

    if args.use_gdrive and not args.barcodes:
        raise ValueError(
            "You must specify barcodes to process from Google Drive")

    run_path = pathlib.Path(args.qpcr_run_path)

    google_credentials = gutils.get_secrets_manager_credentials(args.secret_id)

    drive_service = drive.get_service(google_credentials)
    collective_form = CollectiveForm(
        drive_service, cfg["DATA"]["collection_form_spreadsheet_id"])

    sample_metadata_form = collective_form[SampleMetadata.SHEET_NAME]
    rerun_form = collective_form[SampleRerun.SHEET_NAME]

    if args.use_gdrive:
        logs_folder_id = drive.get_folder_id_of_path(drive_service,
                                                     cfg.PCR_LOGS_FOLDER)
        logs_folder_contents = [
            drive_file for drive_file in drive.get_contents_by_folder_id(
                drive_service, logs_folder_id, only_files=True)
        ]

        plate_layout_folder_id = drive.get_folder_id_of_path(
            drive_service, cfg.PLATE_LAYOUT_FOLDER)
    else:
        logs_folder_contents = run_path.glob("*.csv")

    barcodes_to_process = defaultdict(RunFiles)
    for run_file in logs_folder_contents:
        m = RunFiles.get_qpcr_file_type(run_file.name)
        if m is None:
            continue
        elif args.barcodes and m[RunFiles.BARCODE] not in args.barcodes:
            continue
        else:
            barcodes_to_process[m[RunFiles.BARCODE]].add_file(m, run_file)

    for barcode, barcode_files in barcodes_to_process.items():
        # all files must be present, at least one quant_amp file
        if not barcode_files.all_files:
            message = f"Missing files for: {barcode}. Skipping for now"
            logger.info(msg=message)
            continue

        logger.info(msg=f"Found sample to process, barcode: {barcode}")

        logger.info(msg=f"Getting metadata and data for: {barcode}")
        bravo_metadata = BravoMetadata.load_from_spreadsheet(
            barcode, collective_form)
        if args.protocol is not None:
            # user specified the protocol
            protocol = get_protocol(args.protocol)
        else:
            protocol = get_protocol(bravo_metadata.sop_protocol)

        if not set(barcode_files.quant_amp).issuperset(protocol.mapping):
            missing = map(str,
                          set(protocol.mapping) - set(barcode_files.quant_amp))
            message = f"Missing quant amp files for {barcode}: {', '.join(missing)}"
            logger.critical(msg=message)
            continue

        if args.plate_map_file is not None:
            plate_map_type = accession.get_plate_map_type_from_name(
                args.plate_map_file.name)
            accession_data = accession.read_accession_data(
                plate_map_type, args.plate_map_file)
        elif args.use_gdrive:
            accession_data = accession.get_accession_data_with_rerun(
                drive_service,
                plate_layout_folder_id,
                sample_metadata_form,
                rerun_form,
                bravo_metadata.sample_barcode,
            )
        else:
            raise ValueError(
                "You must provide a plate map file or use Google Drive")

        control_wells = get_control_wells_from_type(
            controls_type=bravo_metadata.controls_type,
            accession_data=accession_data,
        )
        # check for valid accessions
        update_accession_data_with_controls(control_wells, accession_data,
                                            barcode)

        # process well data and check controls, return results
        logger.info(msg=f"Processing well data and controls for: {barcode}")

        processing_results = process_barcode(
            cfg,
            barcode,
            barcode_files,
            bravo_metadata,
            protocol,
            control_wells,
            accession_data,
        )

        with (run_path / processing_results.results_filename).open("w") as fh:
            processing_results.write_results(fh)

        with (run_path /
              processing_results.cb_report_filename).open("w") as fh:
            processing_results.write_cb_report(fh)

        # create pdf report
        logger.info(msg=f"Generating results PDF for: {barcode}")
        final_pdf_filename = run_path / processing_results.final_pdf_filename
        with open(final_pdf_filename, "wb") as output_file:
            create_final_pdf(processing_results, output_file)