Example #1
0
def google_sheets(gdrive_service, drive_folder_scoped_config):
    """This fixture returns a google sheets object."""
    cfg = drive_folder_scoped_config
    collective_form = CollectiveForm(
        gdrive_service, cfg["DATA"]["collection_form_spreadsheet_id"]
    )

    return collective_form
def get_mark_as_processed_sample_barcodes(drive_service, cfg):
    """Return list of sample barcodes that were never run and never going to be run from the
    "Do Not Process" Samples Spreadsheet"""
    collective_form = CollectiveForm(
        drive_service,
        cfg["DATA"]["do_not_process_spreadsheet_id"],
        skip_header=False)
    return list(collective_form["barcodes"]["sample_barcodes"].values)
Example #3
0
    def updated_sheet_data(
        self,
        spreadsheet_name,
        source_data_sheet_id,
        sheet_name,
        barcode_column,
        new_data,
    ):
        """
        Get the existing sheet data from prod drive and update with our new values

        Parameters
        ----------
        :param spreadsheet_name: The name of the google spreadsheet to update
        :param source_data_sheet_id: The ID of the prod sheet to get existing data from
        :param sheet_name: The name of the worksheet within the spreadsheet to update
        :param barcode_column: The name sample plate barcode column in the sheet
        :param new_data: The new data to update the sheet with
        """
        existing_data_df = CollectiveForm(self.drive_service,
                                          source_data_sheet_id,
                                          skip_header=False)[sheet_name]
        # create dataframe from processed values
        new_values_df = pd.DataFrame(new_data[1:], columns=new_data[0])
        values_to_remove = set(new_values_df[barcode_column].values)
        # remove all processed barcodes from existing data so we can replace
        existing_data_df = existing_data_df[~existing_data_df[barcode_column].
                                            isin(values_to_remove)]

        # now add new values
        existing_data_df = pd.concat([new_values_df, existing_data_df],
                                     ignore_index=True)
        existing_data_df.fillna("", inplace=True)

        spread_sheet = self.gc.open(spreadsheet_name)
        spread_sheet.values_clear(range=f"{sheet_name}!A2")
        # update the sheet
        spread_sheet.values_update(
            f"{sheet_name}!A2",
            params={"valueInputOption": "RAW"},
            body={"values": existing_data_df.values.tolist()},
        )
 def __init__(self, drive_service, cfg):
     self.completed_pcr_barcodes = get_completed_pcr_barcodes(
         drive_service, cfg)
     self.results_folder_id = drive.get_folder_id_of_path(
         drive_service, cfg.CSV_RESULTS_FOLDER_TRACKING)
     self.accession_folder_id = drive.get_folder_id_of_path(
         drive_service, cfg.PLATE_LAYOUT_FOLDER)
     self.accession_locations = get_accession_locations(drive_service, cfg)
     self.mark_as_processed_sample_barcodes = get_mark_as_processed_sample_barcodes(
         drive_service, cfg)
     self.accessions_sheet = cfg["DATA"].get("accession_tracking_sheet")
     self.clin_lab_sheet = cfg["DATA"].get("clin_lab_reporting_sheet")
     self.supervisor_plate_queue_sheet = cfg["DATA"].get(
         "supervisor_plate_queue_sheet")
     form_responses = CollectiveForm(
         drive_service, cfg["DATA"].get("collection_form_spreadsheet_id"))
     self.registered_df = form_responses[SampleRegistration.SHEET_NAME]
     self.bravo_rna_df = form_responses[BravoRNAExtraction.SHEET_NAME]
     self.check_in_df = form_responses[FridgeCheckin.SHEET_NAME]
     self.starting_bravo_df = form_responses[BravoStart.SHEET_NAME]
     self.freezer_check_in_df = form_responses[FreezerCheckin.SHEET_NAME]
Example #5
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("barcodes", nargs="+")
    parser.add_argument("--output-dir", type=Path, default=Path("."))

    parser.add_argument("--debug", action="store_true")
    parser.add_argument("--secret-id", default="covid-19/google_creds")

    args = parser.parse_args()

    cfg = Config()
    create_logger(cfg, debug=args.debug)

    google_creds = gutils.get_secrets_manager_credentials(args.secret_id)
    drive_service = drive.get_service(google_creds)

    logger.debug("Downloading collective form")
    collective_form = CollectiveForm(
        drive_service, cfg["DATA"]["collection_form_spreadsheet_id"])
    sample_plate_metadata = collective_form[SampleMetadata.SHEET_NAME]

    for barcode in args.barcodes:
        try:
            metadata_row = clean_single_row(
                sample_plate_metadata, SampleMetadata.SAMPLE_PLATE_BARCODE,
                barcode)
        except MetadataNotFoundError:
            logger.error(f"0 results for {barcode}, skipping")
            continue
        except MultipleRowsError as ex:
            logger.error(f"{ex.match_count} results for {barcode}, skipping")
            continue
        metadata_row[SampleMetadata.TIMESTAMP] = str(
            metadata_row[SampleMetadata.TIMESTAMP])
        metadata_row[LOCAL_RUN] = (args.output_dir, drive_service)

        logger.debug(f"Making layout PDF for {barcode}")
        create_layout_pdf(cfg=cfg, entry_data=metadata_row)
Example #6
0
def processing(cfg: Config, google_credentials: service_account.Credentials):
    git_info = get_git_info()
    drive_service = drive.get_service(google_credentials)
    logger.info(msg=f"Starting processing loop with code version: {git_info}")

    # qpcr logs folder
    logs_folder_id = drive.get_folder_id_of_path(drive_service,
                                                 cfg.PCR_LOGS_FOLDER)

    # markers folder
    markers_folder_id = drive.get_folder_id_of_path(drive_service,
                                                    cfg.PCR_MARKERS_FOLDER)

    # csv results folder
    csv_results_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.CSV_RESULTS_FOLDER)

    # CB rad results folder
    cb_report_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.CHINA_BASIN_CSV_REPORTS_FOLDER)

    # final reports folder
    final_results_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.FINAL_REPORTS_FOLDER)

    # get the collection spreadsheet
    collective_form = CollectiveForm(
        drive_service, cfg["DATA"]["collection_form_spreadsheet_id"])

    logs_folder_contents = drive.get_contents_by_folder_id(drive_service,
                                                           logs_folder_id,
                                                           only_files=True)
    marker_folder_contents = drive.get_contents_by_folder_id(drive_service,
                                                             markers_folder_id,
                                                             only_files=True)
    plate_layout_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.PLATE_LAYOUT_FOLDER)
    completed_barcodes = set(marker_folder_entry.name
                             for marker_folder_entry in marker_folder_contents)

    sample_metadata_form = collective_form[SampleMetadata.SHEET_NAME]
    rerun_form = collective_form[SampleRerun.SHEET_NAME]

    # group log file entries by barcode
    logger.info(msg="Checking for samples to process")

    barcodes_to_process = defaultdict(RunFiles)
    for entry in logs_folder_contents:
        m = RunFiles.get_qpcr_file_type(entry.name)
        if m is None or m[RunFiles.BARCODE] in completed_barcodes:
            continue
        else:
            barcodes_to_process[m[RunFiles.BARCODE]].add_file(m, entry)

    for barcode, barcode_files in barcodes_to_process.items():
        # all files must be present, at least one quant_amp file
        if not barcode_files.all_files:
            message = f"Missing files for: {barcode}. Skipping for now"
            logger.critical(msg=message, extra={"notify_slack": True})
            continue

        try:
            logger.info(msg=f"Found sample to process, barcode: {barcode}")

            logger.info(msg=f"Getting metadata and data for: {barcode}")
            bravo_metadata = BravoMetadata.load_from_spreadsheet(
                barcode,
                collective_form,
            )
            if bravo_metadata.sop_protocol is None:
                message = f"Skipping sample plate: {barcode}, no protocol"
                logger.critical(msg=message, extra={"notify_slack": True})
                continue

            protocol = get_protocol(bravo_metadata.sop_protocol)

            if not set(barcode_files.quant_amp).issuperset(protocol.mapping):
                missing = map(
                    str,
                    set(protocol.mapping) - set(barcode_files.quant_amp))
                message = f"Missing quant amp files for {barcode}: {', '.join(missing)}"
                logger.critical(msg=message, extra={"notify_slack": True})
                continue

            # process well data and check controls, return results
            logger.info(
                msg=f"Processing well data and controls for: {barcode}")
            accession_data = accession.get_accession_data_with_rerun(
                drive_service,
                plate_layout_folder_id,
                sample_metadata_form,
                rerun_form,
                bravo_metadata.sample_barcode,
            )

            control_wells = get_control_wells_from_type(
                controls_type=bravo_metadata.controls_type,
                accession_data=accession_data,
            )
            update_accession_data_with_controls(control_wells, accession_data,
                                                barcode)

            processing_results = process_barcode(
                cfg,
                barcode,
                barcode_files,
                bravo_metadata,
                protocol,
                control_wells,
                accession_data,
            )

            with drive.put_file(
                    drive_service,
                    csv_results_folder_id,
                    processing_results.results_filename,
            ) as fh:
                processing_results.write_results(fh)

            china_basin_result_file = drive.put_file(
                drive_service,
                cb_report_folder_id,
                processing_results.cb_report_filename,
            )
            with china_basin_result_file as fh:
                processing_results.write_cb_report(fh)

            # create pdf report
            logger.info(
                msg=f"Generating and uploading results PDF for: {barcode}")
            final_pdf = io.BytesIO()
            create_final_pdf(processing_results, final_pdf)
            pdf_results_file = drive.put_file(
                drive_service,
                final_results_folder_id,
                processing_results.final_pdf_filename,
            )
            with pdf_results_file as out_fh:
                out_fh.write(final_pdf.getvalue())

            logger.info(msg=f"Sending email report: {barcode}")
            mail.send_email(
                google_credentials,
                sender=cfg["EMAIL"].get("sender"),
                recipients=cfg["EMAIL"].get("recipients"),
                subject=_format_email_subject(
                    sample_barcode=bravo_metadata.sample_barcode,
                    qpcr_barcode=barcode,
                ),
                body=_format_email_body(
                    sample_barcode=bravo_metadata.sample_barcode,
                    results_file_id=china_basin_result_file.id,
                ),
                attachments={processing_results.final_pdf_filename: final_pdf},
            )

            message = (
                f"Processed sample plate: {bravo_metadata.sample_barcode}-{barcode}"
                f" using rev {git_info}")
            logger.critical(msg=message, extra={"notify_slack": True})
            # write a marker so we don't process this file again.
            processing_results.write_marker_file(drive_service,
                                                 markers_folder_id)

        except Exception as err:
            logger.critical(f"Error in [{cfg.aws_env}]: {err}",
                            extra={"notify_slack": True})
            logger.exception("Details:")
Example #7
0
def parse_qpcr_csv(args):
    cfg = Config()
    create_logger(cfg, debug=args.debug)

    logger.info(msg=f"Started local processing in: {args.qpcr_run_path}")

    if args.use_gdrive and not args.barcodes:
        raise ValueError(
            "You must specify barcodes to process from Google Drive")

    run_path = pathlib.Path(args.qpcr_run_path)

    google_credentials = gutils.get_secrets_manager_credentials(args.secret_id)

    drive_service = drive.get_service(google_credentials)
    collective_form = CollectiveForm(
        drive_service, cfg["DATA"]["collection_form_spreadsheet_id"])

    sample_metadata_form = collective_form[SampleMetadata.SHEET_NAME]
    rerun_form = collective_form[SampleRerun.SHEET_NAME]

    if args.use_gdrive:
        logs_folder_id = drive.get_folder_id_of_path(drive_service,
                                                     cfg.PCR_LOGS_FOLDER)
        logs_folder_contents = [
            drive_file for drive_file in drive.get_contents_by_folder_id(
                drive_service, logs_folder_id, only_files=True)
        ]

        plate_layout_folder_id = drive.get_folder_id_of_path(
            drive_service, cfg.PLATE_LAYOUT_FOLDER)
    else:
        logs_folder_contents = run_path.glob("*.csv")

    barcodes_to_process = defaultdict(RunFiles)
    for run_file in logs_folder_contents:
        m = RunFiles.get_qpcr_file_type(run_file.name)
        if m is None:
            continue
        elif args.barcodes and m[RunFiles.BARCODE] not in args.barcodes:
            continue
        else:
            barcodes_to_process[m[RunFiles.BARCODE]].add_file(m, run_file)

    for barcode, barcode_files in barcodes_to_process.items():
        # all files must be present, at least one quant_amp file
        if not barcode_files.all_files:
            message = f"Missing files for: {barcode}. Skipping for now"
            logger.info(msg=message)
            continue

        logger.info(msg=f"Found sample to process, barcode: {barcode}")

        logger.info(msg=f"Getting metadata and data for: {barcode}")
        bravo_metadata = BravoMetadata.load_from_spreadsheet(
            barcode, collective_form)
        if args.protocol is not None:
            # user specified the protocol
            protocol = get_protocol(args.protocol)
        else:
            protocol = get_protocol(bravo_metadata.sop_protocol)

        if not set(barcode_files.quant_amp).issuperset(protocol.mapping):
            missing = map(str,
                          set(protocol.mapping) - set(barcode_files.quant_amp))
            message = f"Missing quant amp files for {barcode}: {', '.join(missing)}"
            logger.critical(msg=message)
            continue

        if args.plate_map_file is not None:
            plate_map_type = accession.get_plate_map_type_from_name(
                args.plate_map_file.name)
            accession_data = accession.read_accession_data(
                plate_map_type, args.plate_map_file)
        elif args.use_gdrive:
            accession_data = accession.get_accession_data_with_rerun(
                drive_service,
                plate_layout_folder_id,
                sample_metadata_form,
                rerun_form,
                bravo_metadata.sample_barcode,
            )
        else:
            raise ValueError(
                "You must provide a plate map file or use Google Drive")

        control_wells = get_control_wells_from_type(
            controls_type=bravo_metadata.controls_type,
            accession_data=accession_data,
        )
        # check for valid accessions
        update_accession_data_with_controls(control_wells, accession_data,
                                            barcode)

        # process well data and check controls, return results
        logger.info(msg=f"Processing well data and controls for: {barcode}")

        processing_results = process_barcode(
            cfg,
            barcode,
            barcode_files,
            bravo_metadata,
            protocol,
            control_wells,
            accession_data,
        )

        with (run_path / processing_results.results_filename).open("w") as fh:
            processing_results.write_results(fh)

        with (run_path /
              processing_results.cb_report_filename).open("w") as fh:
            processing_results.write_cb_report(fh)

        # create pdf report
        logger.info(msg=f"Generating results PDF for: {barcode}")
        final_pdf_filename = run_path / processing_results.final_pdf_filename
        with open(final_pdf_filename, "wb") as output_file:
            create_final_pdf(processing_results, output_file)