def google_sheets(gdrive_service, drive_folder_scoped_config): """This fixture returns a google sheets object.""" cfg = drive_folder_scoped_config collective_form = CollectiveForm( gdrive_service, cfg["DATA"]["collection_form_spreadsheet_id"] ) return collective_form
def get_mark_as_processed_sample_barcodes(drive_service, cfg): """Return list of sample barcodes that were never run and never going to be run from the "Do Not Process" Samples Spreadsheet""" collective_form = CollectiveForm( drive_service, cfg["DATA"]["do_not_process_spreadsheet_id"], skip_header=False) return list(collective_form["barcodes"]["sample_barcodes"].values)
def updated_sheet_data( self, spreadsheet_name, source_data_sheet_id, sheet_name, barcode_column, new_data, ): """ Get the existing sheet data from prod drive and update with our new values Parameters ---------- :param spreadsheet_name: The name of the google spreadsheet to update :param source_data_sheet_id: The ID of the prod sheet to get existing data from :param sheet_name: The name of the worksheet within the spreadsheet to update :param barcode_column: The name sample plate barcode column in the sheet :param new_data: The new data to update the sheet with """ existing_data_df = CollectiveForm(self.drive_service, source_data_sheet_id, skip_header=False)[sheet_name] # create dataframe from processed values new_values_df = pd.DataFrame(new_data[1:], columns=new_data[0]) values_to_remove = set(new_values_df[barcode_column].values) # remove all processed barcodes from existing data so we can replace existing_data_df = existing_data_df[~existing_data_df[barcode_column]. isin(values_to_remove)] # now add new values existing_data_df = pd.concat([new_values_df, existing_data_df], ignore_index=True) existing_data_df.fillna("", inplace=True) spread_sheet = self.gc.open(spreadsheet_name) spread_sheet.values_clear(range=f"{sheet_name}!A2") # update the sheet spread_sheet.values_update( f"{sheet_name}!A2", params={"valueInputOption": "RAW"}, body={"values": existing_data_df.values.tolist()}, )
def __init__(self, drive_service, cfg): self.completed_pcr_barcodes = get_completed_pcr_barcodes( drive_service, cfg) self.results_folder_id = drive.get_folder_id_of_path( drive_service, cfg.CSV_RESULTS_FOLDER_TRACKING) self.accession_folder_id = drive.get_folder_id_of_path( drive_service, cfg.PLATE_LAYOUT_FOLDER) self.accession_locations = get_accession_locations(drive_service, cfg) self.mark_as_processed_sample_barcodes = get_mark_as_processed_sample_barcodes( drive_service, cfg) self.accessions_sheet = cfg["DATA"].get("accession_tracking_sheet") self.clin_lab_sheet = cfg["DATA"].get("clin_lab_reporting_sheet") self.supervisor_plate_queue_sheet = cfg["DATA"].get( "supervisor_plate_queue_sheet") form_responses = CollectiveForm( drive_service, cfg["DATA"].get("collection_form_spreadsheet_id")) self.registered_df = form_responses[SampleRegistration.SHEET_NAME] self.bravo_rna_df = form_responses[BravoRNAExtraction.SHEET_NAME] self.check_in_df = form_responses[FridgeCheckin.SHEET_NAME] self.starting_bravo_df = form_responses[BravoStart.SHEET_NAME] self.freezer_check_in_df = form_responses[FreezerCheckin.SHEET_NAME]
def main(): parser = argparse.ArgumentParser() parser.add_argument("barcodes", nargs="+") parser.add_argument("--output-dir", type=Path, default=Path(".")) parser.add_argument("--debug", action="store_true") parser.add_argument("--secret-id", default="covid-19/google_creds") args = parser.parse_args() cfg = Config() create_logger(cfg, debug=args.debug) google_creds = gutils.get_secrets_manager_credentials(args.secret_id) drive_service = drive.get_service(google_creds) logger.debug("Downloading collective form") collective_form = CollectiveForm( drive_service, cfg["DATA"]["collection_form_spreadsheet_id"]) sample_plate_metadata = collective_form[SampleMetadata.SHEET_NAME] for barcode in args.barcodes: try: metadata_row = clean_single_row( sample_plate_metadata, SampleMetadata.SAMPLE_PLATE_BARCODE, barcode) except MetadataNotFoundError: logger.error(f"0 results for {barcode}, skipping") continue except MultipleRowsError as ex: logger.error(f"{ex.match_count} results for {barcode}, skipping") continue metadata_row[SampleMetadata.TIMESTAMP] = str( metadata_row[SampleMetadata.TIMESTAMP]) metadata_row[LOCAL_RUN] = (args.output_dir, drive_service) logger.debug(f"Making layout PDF for {barcode}") create_layout_pdf(cfg=cfg, entry_data=metadata_row)
def processing(cfg: Config, google_credentials: service_account.Credentials): git_info = get_git_info() drive_service = drive.get_service(google_credentials) logger.info(msg=f"Starting processing loop with code version: {git_info}") # qpcr logs folder logs_folder_id = drive.get_folder_id_of_path(drive_service, cfg.PCR_LOGS_FOLDER) # markers folder markers_folder_id = drive.get_folder_id_of_path(drive_service, cfg.PCR_MARKERS_FOLDER) # csv results folder csv_results_folder_id = drive.get_folder_id_of_path( drive_service, cfg.CSV_RESULTS_FOLDER) # CB rad results folder cb_report_folder_id = drive.get_folder_id_of_path( drive_service, cfg.CHINA_BASIN_CSV_REPORTS_FOLDER) # final reports folder final_results_folder_id = drive.get_folder_id_of_path( drive_service, cfg.FINAL_REPORTS_FOLDER) # get the collection spreadsheet collective_form = CollectiveForm( drive_service, cfg["DATA"]["collection_form_spreadsheet_id"]) logs_folder_contents = drive.get_contents_by_folder_id(drive_service, logs_folder_id, only_files=True) marker_folder_contents = drive.get_contents_by_folder_id(drive_service, markers_folder_id, only_files=True) plate_layout_folder_id = drive.get_folder_id_of_path( drive_service, cfg.PLATE_LAYOUT_FOLDER) completed_barcodes = set(marker_folder_entry.name for marker_folder_entry in marker_folder_contents) sample_metadata_form = collective_form[SampleMetadata.SHEET_NAME] rerun_form = collective_form[SampleRerun.SHEET_NAME] # group log file entries by barcode logger.info(msg="Checking for samples to process") barcodes_to_process = defaultdict(RunFiles) for entry in logs_folder_contents: m = RunFiles.get_qpcr_file_type(entry.name) if m is None or m[RunFiles.BARCODE] in completed_barcodes: continue else: barcodes_to_process[m[RunFiles.BARCODE]].add_file(m, entry) for barcode, barcode_files in barcodes_to_process.items(): # all files must be present, at least one quant_amp file if not barcode_files.all_files: message = f"Missing files for: {barcode}. Skipping for now" logger.critical(msg=message, extra={"notify_slack": True}) continue try: logger.info(msg=f"Found sample to process, barcode: {barcode}") logger.info(msg=f"Getting metadata and data for: {barcode}") bravo_metadata = BravoMetadata.load_from_spreadsheet( barcode, collective_form, ) if bravo_metadata.sop_protocol is None: message = f"Skipping sample plate: {barcode}, no protocol" logger.critical(msg=message, extra={"notify_slack": True}) continue protocol = get_protocol(bravo_metadata.sop_protocol) if not set(barcode_files.quant_amp).issuperset(protocol.mapping): missing = map( str, set(protocol.mapping) - set(barcode_files.quant_amp)) message = f"Missing quant amp files for {barcode}: {', '.join(missing)}" logger.critical(msg=message, extra={"notify_slack": True}) continue # process well data and check controls, return results logger.info( msg=f"Processing well data and controls for: {barcode}") accession_data = accession.get_accession_data_with_rerun( drive_service, plate_layout_folder_id, sample_metadata_form, rerun_form, bravo_metadata.sample_barcode, ) control_wells = get_control_wells_from_type( controls_type=bravo_metadata.controls_type, accession_data=accession_data, ) update_accession_data_with_controls(control_wells, accession_data, barcode) processing_results = process_barcode( cfg, barcode, barcode_files, bravo_metadata, protocol, control_wells, accession_data, ) with drive.put_file( drive_service, csv_results_folder_id, processing_results.results_filename, ) as fh: processing_results.write_results(fh) china_basin_result_file = drive.put_file( drive_service, cb_report_folder_id, processing_results.cb_report_filename, ) with china_basin_result_file as fh: processing_results.write_cb_report(fh) # create pdf report logger.info( msg=f"Generating and uploading results PDF for: {barcode}") final_pdf = io.BytesIO() create_final_pdf(processing_results, final_pdf) pdf_results_file = drive.put_file( drive_service, final_results_folder_id, processing_results.final_pdf_filename, ) with pdf_results_file as out_fh: out_fh.write(final_pdf.getvalue()) logger.info(msg=f"Sending email report: {barcode}") mail.send_email( google_credentials, sender=cfg["EMAIL"].get("sender"), recipients=cfg["EMAIL"].get("recipients"), subject=_format_email_subject( sample_barcode=bravo_metadata.sample_barcode, qpcr_barcode=barcode, ), body=_format_email_body( sample_barcode=bravo_metadata.sample_barcode, results_file_id=china_basin_result_file.id, ), attachments={processing_results.final_pdf_filename: final_pdf}, ) message = ( f"Processed sample plate: {bravo_metadata.sample_barcode}-{barcode}" f" using rev {git_info}") logger.critical(msg=message, extra={"notify_slack": True}) # write a marker so we don't process this file again. processing_results.write_marker_file(drive_service, markers_folder_id) except Exception as err: logger.critical(f"Error in [{cfg.aws_env}]: {err}", extra={"notify_slack": True}) logger.exception("Details:")
def parse_qpcr_csv(args): cfg = Config() create_logger(cfg, debug=args.debug) logger.info(msg=f"Started local processing in: {args.qpcr_run_path}") if args.use_gdrive and not args.barcodes: raise ValueError( "You must specify barcodes to process from Google Drive") run_path = pathlib.Path(args.qpcr_run_path) google_credentials = gutils.get_secrets_manager_credentials(args.secret_id) drive_service = drive.get_service(google_credentials) collective_form = CollectiveForm( drive_service, cfg["DATA"]["collection_form_spreadsheet_id"]) sample_metadata_form = collective_form[SampleMetadata.SHEET_NAME] rerun_form = collective_form[SampleRerun.SHEET_NAME] if args.use_gdrive: logs_folder_id = drive.get_folder_id_of_path(drive_service, cfg.PCR_LOGS_FOLDER) logs_folder_contents = [ drive_file for drive_file in drive.get_contents_by_folder_id( drive_service, logs_folder_id, only_files=True) ] plate_layout_folder_id = drive.get_folder_id_of_path( drive_service, cfg.PLATE_LAYOUT_FOLDER) else: logs_folder_contents = run_path.glob("*.csv") barcodes_to_process = defaultdict(RunFiles) for run_file in logs_folder_contents: m = RunFiles.get_qpcr_file_type(run_file.name) if m is None: continue elif args.barcodes and m[RunFiles.BARCODE] not in args.barcodes: continue else: barcodes_to_process[m[RunFiles.BARCODE]].add_file(m, run_file) for barcode, barcode_files in barcodes_to_process.items(): # all files must be present, at least one quant_amp file if not barcode_files.all_files: message = f"Missing files for: {barcode}. Skipping for now" logger.info(msg=message) continue logger.info(msg=f"Found sample to process, barcode: {barcode}") logger.info(msg=f"Getting metadata and data for: {barcode}") bravo_metadata = BravoMetadata.load_from_spreadsheet( barcode, collective_form) if args.protocol is not None: # user specified the protocol protocol = get_protocol(args.protocol) else: protocol = get_protocol(bravo_metadata.sop_protocol) if not set(barcode_files.quant_amp).issuperset(protocol.mapping): missing = map(str, set(protocol.mapping) - set(barcode_files.quant_amp)) message = f"Missing quant amp files for {barcode}: {', '.join(missing)}" logger.critical(msg=message) continue if args.plate_map_file is not None: plate_map_type = accession.get_plate_map_type_from_name( args.plate_map_file.name) accession_data = accession.read_accession_data( plate_map_type, args.plate_map_file) elif args.use_gdrive: accession_data = accession.get_accession_data_with_rerun( drive_service, plate_layout_folder_id, sample_metadata_form, rerun_form, bravo_metadata.sample_barcode, ) else: raise ValueError( "You must provide a plate map file or use Google Drive") control_wells = get_control_wells_from_type( controls_type=bravo_metadata.controls_type, accession_data=accession_data, ) # check for valid accessions update_accession_data_with_controls(control_wells, accession_data, barcode) # process well data and check controls, return results logger.info(msg=f"Processing well data and controls for: {barcode}") processing_results = process_barcode( cfg, barcode, barcode_files, bravo_metadata, protocol, control_wells, accession_data, ) with (run_path / processing_results.results_filename).open("w") as fh: processing_results.write_results(fh) with (run_path / processing_results.cb_report_filename).open("w") as fh: processing_results.write_cb_report(fh) # create pdf report logger.info(msg=f"Generating results PDF for: {barcode}") final_pdf_filename = run_path / processing_results.final_pdf_filename with open(final_pdf_filename, "wb") as output_file: create_final_pdf(processing_results, output_file)