def create_layout_pdf(cfg: Config, entry_data: Dict[str, str]): """Main function to read a layout file and write the resulting plate layout map. Parameters ---------- cfg: Config configuration information entry_data: Dict[str, str] dictionary containing the response that was submitted to Sample Plate Metada. The required keys are: the researcher, timestamp, sample plate barcode, and a link to the sample plate map in Google Drive. Optionally, the "local_run" key is used as a flag to indicate the script is being run from the command line rather than on AWS. """ sample_barcode = entry_data[SampleMetadata.SAMPLE_PLATE_BARCODE] output_filename = f"{sample_barcode}.pdf" if LOCAL_RUN in entry_data: output_path, drive_service = entry_data[LOCAL_RUN] output_file_object = (output_path / output_filename).open("wb") else: logger.debug("getting gdrive credentials") google_creds = gutils.get_secrets_manager_credentials() drive_service = drive.get_service(google_creds) processed_layout_folder_id = drive.get_folder_id_of_path( drive_service, cfg.LAYOUT_PDF_FOLDER) output_file_object = drive.put_file( drive_service, processed_layout_folder_id, output_filename, binary=True, ) try: plate_map_file = drive.get_layout_file_from_url( drive_service, entry_data[SampleMetadata.SAMPLE_PLATE_MAP]) except KeyError: raise BadDriveURL( f"Bad URL in {SampleMetadata.SHEET_NAME} for {sample_barcode}") plate_map_type = accession.get_plate_map_type_from_name( plate_map_file.name) with plate_map_file.open() as fh: accession_data = accession.read_accession_data(plate_map_type, fh) logger.info(f"Writing layout map to {output_filename}") with output_file_object as output_fh: format_pdf( entry_data[SampleMetadata.SAMPLE_PLATE_BARCODE], accession_data, entry_data[SampleMetadata.RESEARCHER_NAME], format_time(cfg, entry_data[SampleMetadata.TIMESTAMP]), output_fh, )
def __init__(self, cfg, google_creds): self.cfg = cfg self.drive_service = drive.get_service(google_creds) self.verbose_data = [VERBOSE_SHEET_HEADER] self.clin_lab_data = [CLIN_LAB_SHEET_HEADER] self.supervisor_plate_queue_data = [SUPERVISOR_PLATE_QUEUE_HEADER] self.gc = gutils.get_gspread_connection(google_creds) logger.info(msg="Initializing processing resources") self.processing_resources = ProcessingResources( self.drive_service, self.cfg)
def lambda_handler(event, context): try: cfg = Config() create_logger(cfg) creds = gutils.get_secrets_manager_credentials() os.chdir("/tmp") # Set up for Google Drive drive_service = drive.get_service(creds) sheets_service = sheets.get_service(creds) # Route based on the actions action = None if "body" in event: event_body = json.loads(event["body"]) logger.info(msg=f"EVENT_BODY: {event_body}") action = event_body["action"] logger.info(msg=f"ACTION: {action}") if action == "external_sample_shipment": external_sample_shipment.handle_external_sample_shipment_request( cfg, drive_service, event_body) elif action == "sample_database": sample_database.external_sample_database(cfg, drive_service, sheets_service, event_body) elif action == "draw_96_plate_map": draw_96_plate_map.draw_96_plate_map(cfg, drive_service, sheets_service, event_body) elif action == "concat_96_384": concat_96_384.concat_96_384(cfg, drive_service, sheets_service, event_body) elif action == "bind_index_plate": bind_index_plate.handle_bind_index_plate_request( cfg, drive_service, sheets_service, event_body) elif action == "update_ripe_samples": update_ripe_samples.update_ripe_samples(cfg, drive_service, sheets_service, event_body) elif action == "metadata_lookup": metadata_lookup.metadata_lookup(cfg, drive_service, sheets_service, event_body) return { "statusCode": 200, "headers": { "Content-Type": "text/html" }, "body": "OK", } except Exception as err: slack.post(f"*Error in mNGS scripts:*\n{err}") raise
def fetch_barcodes(args, cfg): google_credentials = gutils.get_secrets_manager_credentials(args.secret_id) drive_service = drive.get_service(google_credentials) # qpcr logs folder logs_folder_id = drive.get_folder_id_of_path(drive_service, cfg.PCR_LOGS_FOLDER) logs_folder_contents = drive.get_contents_by_folder_id(drive_service, logs_folder_id, only_files=True) barcodes_to_fetch = defaultdict(RunFiles) for entry in logs_folder_contents: m = RunFiles.get_qpcr_file_type(entry.name) if m is None: continue elif m[RunFiles.BARCODE] in args.barcodes: barcodes_to_fetch[m[RunFiles.BARCODE]].add_file(m, entry) for barcode, barcode_files in barcodes_to_fetch.items(): # all files must be present, at least one quant_amp file if not barcode_files.all_files: logger.warning(msg=f"Missing files for {barcode}!") continue logger.info(msg=f"Found sample to fetch: {barcode}") # read in the run information and quant cq run_info = barcode_files.run_info logger.info(msg=f" Downloading: {run_info.name}") with drive.get_file(drive_service, run_info.id, binary=False) as fh: with (args.output_dir / run_info.name).open("w") as out: out.write(fh.read()) quant_cq = barcode_files.quant_cq logger.info(msg=f" Downloading: {quant_cq.name}") with drive.get_file(drive_service, quant_cq.id, binary=False) as fh: with (args.output_dir / quant_cq.name).open("w") as out: out.write(fh.read()) for quant_amp in barcode_files.quant_amp.values(): logger.info(msg=f" Downloading: {quant_amp.name}") with drive.get_file(drive_service, quant_amp.id, binary=False) as fh: with (args.output_dir / quant_amp.name).open("w") as out: out.write(fh.read())
def main(): parser = argparse.ArgumentParser() parser.add_argument("barcodes", nargs="+") parser.add_argument("--output-dir", type=Path, default=Path(".")) parser.add_argument("--debug", action="store_true") parser.add_argument("--secret-id", default="covid-19/google_creds") args = parser.parse_args() cfg = Config() create_logger(cfg, debug=args.debug) google_creds = gutils.get_secrets_manager_credentials(args.secret_id) drive_service = drive.get_service(google_creds) logger.debug("Downloading collective form") collective_form = CollectiveForm( drive_service, cfg["DATA"]["collection_form_spreadsheet_id"]) sample_plate_metadata = collective_form[SampleMetadata.SHEET_NAME] for barcode in args.barcodes: try: metadata_row = clean_single_row( sample_plate_metadata, SampleMetadata.SAMPLE_PLATE_BARCODE, barcode) except MetadataNotFoundError: logger.error(f"0 results for {barcode}, skipping") continue except MultipleRowsError as ex: logger.error(f"{ex.match_count} results for {barcode}, skipping") continue metadata_row[SampleMetadata.TIMESTAMP] = str( metadata_row[SampleMetadata.TIMESTAMP]) metadata_row[LOCAL_RUN] = (args.output_dir, drive_service) logger.debug(f"Making layout PDF for {barcode}") create_layout_pdf(cfg=cfg, entry_data=metadata_row)
def __init__(self, google_credentials, config): self.drive_service = drive.get_service(google_credentials) self.config = config
def gdrive_service(): """This fixture sets up a gdrive service object.""" return get_service(credentials_for_tests())
def processing(cfg: Config, google_credentials: service_account.Credentials): git_info = get_git_info() drive_service = drive.get_service(google_credentials) logger.info(msg=f"Starting processing loop with code version: {git_info}") # qpcr logs folder logs_folder_id = drive.get_folder_id_of_path(drive_service, cfg.PCR_LOGS_FOLDER) # markers folder markers_folder_id = drive.get_folder_id_of_path(drive_service, cfg.PCR_MARKERS_FOLDER) # csv results folder csv_results_folder_id = drive.get_folder_id_of_path( drive_service, cfg.CSV_RESULTS_FOLDER) # CB rad results folder cb_report_folder_id = drive.get_folder_id_of_path( drive_service, cfg.CHINA_BASIN_CSV_REPORTS_FOLDER) # final reports folder final_results_folder_id = drive.get_folder_id_of_path( drive_service, cfg.FINAL_REPORTS_FOLDER) # get the collection spreadsheet collective_form = CollectiveForm( drive_service, cfg["DATA"]["collection_form_spreadsheet_id"]) logs_folder_contents = drive.get_contents_by_folder_id(drive_service, logs_folder_id, only_files=True) marker_folder_contents = drive.get_contents_by_folder_id(drive_service, markers_folder_id, only_files=True) plate_layout_folder_id = drive.get_folder_id_of_path( drive_service, cfg.PLATE_LAYOUT_FOLDER) completed_barcodes = set(marker_folder_entry.name for marker_folder_entry in marker_folder_contents) sample_metadata_form = collective_form[SampleMetadata.SHEET_NAME] rerun_form = collective_form[SampleRerun.SHEET_NAME] # group log file entries by barcode logger.info(msg="Checking for samples to process") barcodes_to_process = defaultdict(RunFiles) for entry in logs_folder_contents: m = RunFiles.get_qpcr_file_type(entry.name) if m is None or m[RunFiles.BARCODE] in completed_barcodes: continue else: barcodes_to_process[m[RunFiles.BARCODE]].add_file(m, entry) for barcode, barcode_files in barcodes_to_process.items(): # all files must be present, at least one quant_amp file if not barcode_files.all_files: message = f"Missing files for: {barcode}. Skipping for now" logger.critical(msg=message, extra={"notify_slack": True}) continue try: logger.info(msg=f"Found sample to process, barcode: {barcode}") logger.info(msg=f"Getting metadata and data for: {barcode}") bravo_metadata = BravoMetadata.load_from_spreadsheet( barcode, collective_form, ) if bravo_metadata.sop_protocol is None: message = f"Skipping sample plate: {barcode}, no protocol" logger.critical(msg=message, extra={"notify_slack": True}) continue protocol = get_protocol(bravo_metadata.sop_protocol) if not set(barcode_files.quant_amp).issuperset(protocol.mapping): missing = map( str, set(protocol.mapping) - set(barcode_files.quant_amp)) message = f"Missing quant amp files for {barcode}: {', '.join(missing)}" logger.critical(msg=message, extra={"notify_slack": True}) continue # process well data and check controls, return results logger.info( msg=f"Processing well data and controls for: {barcode}") accession_data = accession.get_accession_data_with_rerun( drive_service, plate_layout_folder_id, sample_metadata_form, rerun_form, bravo_metadata.sample_barcode, ) control_wells = get_control_wells_from_type( controls_type=bravo_metadata.controls_type, accession_data=accession_data, ) update_accession_data_with_controls(control_wells, accession_data, barcode) processing_results = process_barcode( cfg, barcode, barcode_files, bravo_metadata, protocol, control_wells, accession_data, ) with drive.put_file( drive_service, csv_results_folder_id, processing_results.results_filename, ) as fh: processing_results.write_results(fh) china_basin_result_file = drive.put_file( drive_service, cb_report_folder_id, processing_results.cb_report_filename, ) with china_basin_result_file as fh: processing_results.write_cb_report(fh) # create pdf report logger.info( msg=f"Generating and uploading results PDF for: {barcode}") final_pdf = io.BytesIO() create_final_pdf(processing_results, final_pdf) pdf_results_file = drive.put_file( drive_service, final_results_folder_id, processing_results.final_pdf_filename, ) with pdf_results_file as out_fh: out_fh.write(final_pdf.getvalue()) logger.info(msg=f"Sending email report: {barcode}") mail.send_email( google_credentials, sender=cfg["EMAIL"].get("sender"), recipients=cfg["EMAIL"].get("recipients"), subject=_format_email_subject( sample_barcode=bravo_metadata.sample_barcode, qpcr_barcode=barcode, ), body=_format_email_body( sample_barcode=bravo_metadata.sample_barcode, results_file_id=china_basin_result_file.id, ), attachments={processing_results.final_pdf_filename: final_pdf}, ) message = ( f"Processed sample plate: {bravo_metadata.sample_barcode}-{barcode}" f" using rev {git_info}") logger.critical(msg=message, extra={"notify_slack": True}) # write a marker so we don't process this file again. processing_results.write_marker_file(drive_service, markers_folder_id) except Exception as err: logger.critical(f"Error in [{cfg.aws_env}]: {err}", extra={"notify_slack": True}) logger.exception("Details:")
def parse_qpcr_csv(args): cfg = Config() create_logger(cfg, debug=args.debug) logger.info(msg=f"Started local processing in: {args.qpcr_run_path}") if args.use_gdrive and not args.barcodes: raise ValueError( "You must specify barcodes to process from Google Drive") run_path = pathlib.Path(args.qpcr_run_path) google_credentials = gutils.get_secrets_manager_credentials(args.secret_id) drive_service = drive.get_service(google_credentials) collective_form = CollectiveForm( drive_service, cfg["DATA"]["collection_form_spreadsheet_id"]) sample_metadata_form = collective_form[SampleMetadata.SHEET_NAME] rerun_form = collective_form[SampleRerun.SHEET_NAME] if args.use_gdrive: logs_folder_id = drive.get_folder_id_of_path(drive_service, cfg.PCR_LOGS_FOLDER) logs_folder_contents = [ drive_file for drive_file in drive.get_contents_by_folder_id( drive_service, logs_folder_id, only_files=True) ] plate_layout_folder_id = drive.get_folder_id_of_path( drive_service, cfg.PLATE_LAYOUT_FOLDER) else: logs_folder_contents = run_path.glob("*.csv") barcodes_to_process = defaultdict(RunFiles) for run_file in logs_folder_contents: m = RunFiles.get_qpcr_file_type(run_file.name) if m is None: continue elif args.barcodes and m[RunFiles.BARCODE] not in args.barcodes: continue else: barcodes_to_process[m[RunFiles.BARCODE]].add_file(m, run_file) for barcode, barcode_files in barcodes_to_process.items(): # all files must be present, at least one quant_amp file if not barcode_files.all_files: message = f"Missing files for: {barcode}. Skipping for now" logger.info(msg=message) continue logger.info(msg=f"Found sample to process, barcode: {barcode}") logger.info(msg=f"Getting metadata and data for: {barcode}") bravo_metadata = BravoMetadata.load_from_spreadsheet( barcode, collective_form) if args.protocol is not None: # user specified the protocol protocol = get_protocol(args.protocol) else: protocol = get_protocol(bravo_metadata.sop_protocol) if not set(barcode_files.quant_amp).issuperset(protocol.mapping): missing = map(str, set(protocol.mapping) - set(barcode_files.quant_amp)) message = f"Missing quant amp files for {barcode}: {', '.join(missing)}" logger.critical(msg=message) continue if args.plate_map_file is not None: plate_map_type = accession.get_plate_map_type_from_name( args.plate_map_file.name) accession_data = accession.read_accession_data( plate_map_type, args.plate_map_file) elif args.use_gdrive: accession_data = accession.get_accession_data_with_rerun( drive_service, plate_layout_folder_id, sample_metadata_form, rerun_form, bravo_metadata.sample_barcode, ) else: raise ValueError( "You must provide a plate map file or use Google Drive") control_wells = get_control_wells_from_type( controls_type=bravo_metadata.controls_type, accession_data=accession_data, ) # check for valid accessions update_accession_data_with_controls(control_wells, accession_data, barcode) # process well data and check controls, return results logger.info(msg=f"Processing well data and controls for: {barcode}") processing_results = process_barcode( cfg, barcode, barcode_files, bravo_metadata, protocol, control_wells, accession_data, ) with (run_path / processing_results.results_filename).open("w") as fh: processing_results.write_results(fh) with (run_path / processing_results.cb_report_filename).open("w") as fh: processing_results.write_cb_report(fh) # create pdf report logger.info(msg=f"Generating results PDF for: {barcode}") final_pdf_filename = run_path / processing_results.final_pdf_filename with open(final_pdf_filename, "wb") as output_file: create_final_pdf(processing_results, output_file)