def write_marker_file(self, sample_barcode): """Writes a marker file for the given results""" marker_folder_id = drive.get_folder_id_of_path( self.drive_service, self.cfg.ACCESSSION_TRACKING_MARKERS_FOLDER) with drive.put_file(self.drive_service, marker_folder_id, sample_barcode): ...
def create_layout_pdf(cfg: Config, entry_data: Dict[str, str]): """Main function to read a layout file and write the resulting plate layout map. Parameters ---------- cfg: Config configuration information entry_data: Dict[str, str] dictionary containing the response that was submitted to Sample Plate Metada. The required keys are: the researcher, timestamp, sample plate barcode, and a link to the sample plate map in Google Drive. Optionally, the "local_run" key is used as a flag to indicate the script is being run from the command line rather than on AWS. """ sample_barcode = entry_data[SampleMetadata.SAMPLE_PLATE_BARCODE] output_filename = f"{sample_barcode}.pdf" if LOCAL_RUN in entry_data: output_path, drive_service = entry_data[LOCAL_RUN] output_file_object = (output_path / output_filename).open("wb") else: logger.debug("getting gdrive credentials") google_creds = gutils.get_secrets_manager_credentials() drive_service = drive.get_service(google_creds) processed_layout_folder_id = drive.get_folder_id_of_path( drive_service, cfg.LAYOUT_PDF_FOLDER) output_file_object = drive.put_file( drive_service, processed_layout_folder_id, output_filename, binary=True, ) try: plate_map_file = drive.get_layout_file_from_url( drive_service, entry_data[SampleMetadata.SAMPLE_PLATE_MAP]) except KeyError: raise BadDriveURL( f"Bad URL in {SampleMetadata.SHEET_NAME} for {sample_barcode}") plate_map_type = accession.get_plate_map_type_from_name( plate_map_file.name) with plate_map_file.open() as fh: accession_data = accession.read_accession_data(plate_map_type, fh) logger.info(f"Writing layout map to {output_filename}") with output_file_object as output_fh: format_pdf( entry_data[SampleMetadata.SAMPLE_PLATE_BARCODE], accession_data, entry_data[SampleMetadata.RESEARCHER_NAME], format_time(cfg, entry_data[SampleMetadata.TIMESTAMP]), output_fh, )
def test_mkdir( gdrive_service: DriveService, gdrive_folder: DriveObject, new_folder_name="mkdir-test", ): subdir = mkdir(gdrive_service, gdrive_folder.id, new_folder_name) traversed_folder_id = get_folder_id_of_path( gdrive_service, [gdrive_folder.name, new_folder_name] ) assert subdir.id == traversed_folder_id
def __init__(self, drive_service, cfg): self.completed_pcr_barcodes = get_completed_pcr_barcodes( drive_service, cfg) self.results_folder_id = drive.get_folder_id_of_path( drive_service, cfg.CSV_RESULTS_FOLDER_TRACKING) self.accession_folder_id = drive.get_folder_id_of_path( drive_service, cfg.PLATE_LAYOUT_FOLDER) self.accession_locations = get_accession_locations(drive_service, cfg) self.mark_as_processed_sample_barcodes = get_mark_as_processed_sample_barcodes( drive_service, cfg) self.accessions_sheet = cfg["DATA"].get("accession_tracking_sheet") self.clin_lab_sheet = cfg["DATA"].get("clin_lab_reporting_sheet") self.supervisor_plate_queue_sheet = cfg["DATA"].get( "supervisor_plate_queue_sheet") form_responses = CollectiveForm( drive_service, cfg["DATA"].get("collection_form_spreadsheet_id")) self.registered_df = form_responses[SampleRegistration.SHEET_NAME] self.bravo_rna_df = form_responses[BravoRNAExtraction.SHEET_NAME] self.check_in_df = form_responses[FridgeCheckin.SHEET_NAME] self.starting_bravo_df = form_responses[BravoStart.SHEET_NAME] self.freezer_check_in_df = form_responses[FreezerCheckin.SHEET_NAME]
def get_already_tracked_samples(drive_service, cfg): """Return a list of processed qPCR barcodes by checking our marker file folder""" marker_folder_id = drive.get_folder_id_of_path( drive_service, cfg.ACCESSSION_TRACKING_MARKERS_FOLDER) marker_folder_contents = drive.get_contents_by_folder_id(drive_service, marker_folder_id, only_files=True) completed_barcodes = set(marker_folder_entry.name for marker_folder_entry in marker_folder_contents) return completed_barcodes
def __init__( self, session: Session, drive_service: drive.DriveService, folder_path_components: List[str] = None, ): super().__init__(session=session) self.session = session self.drive_service = drive_service self.folder_path_components = folder_path_components self.folder_id = drive.get_folder_id_of_path( self.drive_service, self.folder_path_components) self.data = self.initialize_data_from_source()
def test_mkdir_recursive( gdrive_service: DriveService, gdrive_folder: DriveObject, new_folder_name="mkdir-recursive", ): new_path_components = [new_folder_name, "abc", "abc", "def"] full_path = [gdrive_folder.name] + new_path_components subdir_id = mkdir_recursive(gdrive_service, gdrive_folder.id, new_path_components) lookup = get_folder_id_of_path(gdrive_service, full_path) assert subdir_id == lookup second_subdir_id = mkdir_recursive( gdrive_service, gdrive_folder.id, new_path_components ) assert second_subdir_id == lookup
def fetch_barcodes(args, cfg): google_credentials = gutils.get_secrets_manager_credentials(args.secret_id) drive_service = drive.get_service(google_credentials) # qpcr logs folder logs_folder_id = drive.get_folder_id_of_path(drive_service, cfg.PCR_LOGS_FOLDER) logs_folder_contents = drive.get_contents_by_folder_id(drive_service, logs_folder_id, only_files=True) barcodes_to_fetch = defaultdict(RunFiles) for entry in logs_folder_contents: m = RunFiles.get_qpcr_file_type(entry.name) if m is None: continue elif m[RunFiles.BARCODE] in args.barcodes: barcodes_to_fetch[m[RunFiles.BARCODE]].add_file(m, entry) for barcode, barcode_files in barcodes_to_fetch.items(): # all files must be present, at least one quant_amp file if not barcode_files.all_files: logger.warning(msg=f"Missing files for {barcode}!") continue logger.info(msg=f"Found sample to fetch: {barcode}") # read in the run information and quant cq run_info = barcode_files.run_info logger.info(msg=f" Downloading: {run_info.name}") with drive.get_file(drive_service, run_info.id, binary=False) as fh: with (args.output_dir / run_info.name).open("w") as out: out.write(fh.read()) quant_cq = barcode_files.quant_cq logger.info(msg=f" Downloading: {quant_cq.name}") with drive.get_file(drive_service, quant_cq.id, binary=False) as fh: with (args.output_dir / quant_cq.name).open("w") as out: out.write(fh.read()) for quant_amp in barcode_files.quant_amp.values(): logger.info(msg=f" Downloading: {quant_amp.name}") with drive.get_file(drive_service, quant_amp.id, binary=False) as fh: with (args.output_dir / quant_amp.name).open("w") as out: out.write(fh.read())
def get_accession_locations(drive_service, cfg) -> Dict[str, Tuple[str, str]]: """return a mapping between accession ID's and their origin location""" accession_locations = {} accession_location_folder_id = drive.get_folder_id_of_path( drive_service, cfg.ACCESSION_LOCATIONS_FOLDER) accession_location_files = drive.get_contents_by_folder_id( drive_service, accession_location_folder_id, only_files=True) for accession_location_file in accession_location_files: with drive.get_file(drive_service, accession_location_file.id) as fh: accession_location_reader = csv.reader(fh, delimiter=",") for row in accession_location_reader: if row[0] == "Accession": # header row continue submitter_id = "" if len(row) == 3: accession, location, submitter_id = row else: accession, location = row accession_locations[accession] = location, submitter_id return accession_locations
def test_get_folder_id_of_path_one_level(gdrive_service: DriveService, gdrive_folder: DriveObject): """Tests that get_folder_id_of_path works with an unnested folder.""" retrieved_folder_id = get_folder_id_of_path(gdrive_service, [gdrive_folder.name]) assert retrieved_folder_id == gdrive_folder.id
def processing(cfg: Config, google_credentials: service_account.Credentials): git_info = get_git_info() drive_service = drive.get_service(google_credentials) logger.info(msg=f"Starting processing loop with code version: {git_info}") # qpcr logs folder logs_folder_id = drive.get_folder_id_of_path(drive_service, cfg.PCR_LOGS_FOLDER) # markers folder markers_folder_id = drive.get_folder_id_of_path(drive_service, cfg.PCR_MARKERS_FOLDER) # csv results folder csv_results_folder_id = drive.get_folder_id_of_path( drive_service, cfg.CSV_RESULTS_FOLDER) # CB rad results folder cb_report_folder_id = drive.get_folder_id_of_path( drive_service, cfg.CHINA_BASIN_CSV_REPORTS_FOLDER) # final reports folder final_results_folder_id = drive.get_folder_id_of_path( drive_service, cfg.FINAL_REPORTS_FOLDER) # get the collection spreadsheet collective_form = CollectiveForm( drive_service, cfg["DATA"]["collection_form_spreadsheet_id"]) logs_folder_contents = drive.get_contents_by_folder_id(drive_service, logs_folder_id, only_files=True) marker_folder_contents = drive.get_contents_by_folder_id(drive_service, markers_folder_id, only_files=True) plate_layout_folder_id = drive.get_folder_id_of_path( drive_service, cfg.PLATE_LAYOUT_FOLDER) completed_barcodes = set(marker_folder_entry.name for marker_folder_entry in marker_folder_contents) sample_metadata_form = collective_form[SampleMetadata.SHEET_NAME] rerun_form = collective_form[SampleRerun.SHEET_NAME] # group log file entries by barcode logger.info(msg="Checking for samples to process") barcodes_to_process = defaultdict(RunFiles) for entry in logs_folder_contents: m = RunFiles.get_qpcr_file_type(entry.name) if m is None or m[RunFiles.BARCODE] in completed_barcodes: continue else: barcodes_to_process[m[RunFiles.BARCODE]].add_file(m, entry) for barcode, barcode_files in barcodes_to_process.items(): # all files must be present, at least one quant_amp file if not barcode_files.all_files: message = f"Missing files for: {barcode}. Skipping for now" logger.critical(msg=message, extra={"notify_slack": True}) continue try: logger.info(msg=f"Found sample to process, barcode: {barcode}") logger.info(msg=f"Getting metadata and data for: {barcode}") bravo_metadata = BravoMetadata.load_from_spreadsheet( barcode, collective_form, ) if bravo_metadata.sop_protocol is None: message = f"Skipping sample plate: {barcode}, no protocol" logger.critical(msg=message, extra={"notify_slack": True}) continue protocol = get_protocol(bravo_metadata.sop_protocol) if not set(barcode_files.quant_amp).issuperset(protocol.mapping): missing = map( str, set(protocol.mapping) - set(barcode_files.quant_amp)) message = f"Missing quant amp files for {barcode}: {', '.join(missing)}" logger.critical(msg=message, extra={"notify_slack": True}) continue # process well data and check controls, return results logger.info( msg=f"Processing well data and controls for: {barcode}") accession_data = accession.get_accession_data_with_rerun( drive_service, plate_layout_folder_id, sample_metadata_form, rerun_form, bravo_metadata.sample_barcode, ) control_wells = get_control_wells_from_type( controls_type=bravo_metadata.controls_type, accession_data=accession_data, ) update_accession_data_with_controls(control_wells, accession_data, barcode) processing_results = process_barcode( cfg, barcode, barcode_files, bravo_metadata, protocol, control_wells, accession_data, ) with drive.put_file( drive_service, csv_results_folder_id, processing_results.results_filename, ) as fh: processing_results.write_results(fh) china_basin_result_file = drive.put_file( drive_service, cb_report_folder_id, processing_results.cb_report_filename, ) with china_basin_result_file as fh: processing_results.write_cb_report(fh) # create pdf report logger.info( msg=f"Generating and uploading results PDF for: {barcode}") final_pdf = io.BytesIO() create_final_pdf(processing_results, final_pdf) pdf_results_file = drive.put_file( drive_service, final_results_folder_id, processing_results.final_pdf_filename, ) with pdf_results_file as out_fh: out_fh.write(final_pdf.getvalue()) logger.info(msg=f"Sending email report: {barcode}") mail.send_email( google_credentials, sender=cfg["EMAIL"].get("sender"), recipients=cfg["EMAIL"].get("recipients"), subject=_format_email_subject( sample_barcode=bravo_metadata.sample_barcode, qpcr_barcode=barcode, ), body=_format_email_body( sample_barcode=bravo_metadata.sample_barcode, results_file_id=china_basin_result_file.id, ), attachments={processing_results.final_pdf_filename: final_pdf}, ) message = ( f"Processed sample plate: {bravo_metadata.sample_barcode}-{barcode}" f" using rev {git_info}") logger.critical(msg=message, extra={"notify_slack": True}) # write a marker so we don't process this file again. processing_results.write_marker_file(drive_service, markers_folder_id) except Exception as err: logger.critical(f"Error in [{cfg.aws_env}]: {err}", extra={"notify_slack": True}) logger.exception("Details:")
def parse_qpcr_csv(args): cfg = Config() create_logger(cfg, debug=args.debug) logger.info(msg=f"Started local processing in: {args.qpcr_run_path}") if args.use_gdrive and not args.barcodes: raise ValueError( "You must specify barcodes to process from Google Drive") run_path = pathlib.Path(args.qpcr_run_path) google_credentials = gutils.get_secrets_manager_credentials(args.secret_id) drive_service = drive.get_service(google_credentials) collective_form = CollectiveForm( drive_service, cfg["DATA"]["collection_form_spreadsheet_id"]) sample_metadata_form = collective_form[SampleMetadata.SHEET_NAME] rerun_form = collective_form[SampleRerun.SHEET_NAME] if args.use_gdrive: logs_folder_id = drive.get_folder_id_of_path(drive_service, cfg.PCR_LOGS_FOLDER) logs_folder_contents = [ drive_file for drive_file in drive.get_contents_by_folder_id( drive_service, logs_folder_id, only_files=True) ] plate_layout_folder_id = drive.get_folder_id_of_path( drive_service, cfg.PLATE_LAYOUT_FOLDER) else: logs_folder_contents = run_path.glob("*.csv") barcodes_to_process = defaultdict(RunFiles) for run_file in logs_folder_contents: m = RunFiles.get_qpcr_file_type(run_file.name) if m is None: continue elif args.barcodes and m[RunFiles.BARCODE] not in args.barcodes: continue else: barcodes_to_process[m[RunFiles.BARCODE]].add_file(m, run_file) for barcode, barcode_files in barcodes_to_process.items(): # all files must be present, at least one quant_amp file if not barcode_files.all_files: message = f"Missing files for: {barcode}. Skipping for now" logger.info(msg=message) continue logger.info(msg=f"Found sample to process, barcode: {barcode}") logger.info(msg=f"Getting metadata and data for: {barcode}") bravo_metadata = BravoMetadata.load_from_spreadsheet( barcode, collective_form) if args.protocol is not None: # user specified the protocol protocol = get_protocol(args.protocol) else: protocol = get_protocol(bravo_metadata.sop_protocol) if not set(barcode_files.quant_amp).issuperset(protocol.mapping): missing = map(str, set(protocol.mapping) - set(barcode_files.quant_amp)) message = f"Missing quant amp files for {barcode}: {', '.join(missing)}" logger.critical(msg=message) continue if args.plate_map_file is not None: plate_map_type = accession.get_plate_map_type_from_name( args.plate_map_file.name) accession_data = accession.read_accession_data( plate_map_type, args.plate_map_file) elif args.use_gdrive: accession_data = accession.get_accession_data_with_rerun( drive_service, plate_layout_folder_id, sample_metadata_form, rerun_form, bravo_metadata.sample_barcode, ) else: raise ValueError( "You must provide a plate map file or use Google Drive") control_wells = get_control_wells_from_type( controls_type=bravo_metadata.controls_type, accession_data=accession_data, ) # check for valid accessions update_accession_data_with_controls(control_wells, accession_data, barcode) # process well data and check controls, return results logger.info(msg=f"Processing well data and controls for: {barcode}") processing_results = process_barcode( cfg, barcode, barcode_files, bravo_metadata, protocol, control_wells, accession_data, ) with (run_path / processing_results.results_filename).open("w") as fh: processing_results.write_results(fh) with (run_path / processing_results.cb_report_filename).open("w") as fh: processing_results.write_cb_report(fh) # create pdf report logger.info(msg=f"Generating results PDF for: {barcode}") final_pdf_filename = run_path / processing_results.final_pdf_filename with open(final_pdf_filename, "wb") as output_file: create_final_pdf(processing_results, output_file)