def ready_jobs(self): """Return an ordered dict of all jobs that are ready to be processed (all dependencies are present) and the dates that dependencies were created. """ slices = all_slices() ready = OrderedDict() for ts, path in slices.items(): if path == 'place_holder': ready['%.3f'%0.0] = {'dep_time':timestamp_to_datetime(0.0), 'meta':{'source':path}} else: mtime = os.stat(os.path.join(path, '.index')).st_mtime # test file updates: # import random # if random.random() > 0.8: # mtime *= 2 ready[ts] = {'dep_time':timestamp_to_datetime(mtime), 'meta':{'source':path}} return ready
def get_expts_in_range(expts, start, end): in_range = [] for expt in expts: expt_date = timestamp_to_datetime(float(os.path.basename(expt))).date() if expt_date >= start and expt_date <= end: in_range.append(expt) if len(in_range) == 0: print('No experiments found within date range %s - %s' % (datetime.strftime( start, "%m/%d/%Y"), datetime.strftime(end, "%m/%d/%Y"))) exit() return in_range
def generate_daily_report(day): """ Generate a daily PatchSeq report for Kim's team. PatchSeq metadata is collected from the acq4 directories for every experiment. Only metadata associated with a Patched Cell Container are processed. """ if day == datetime.today().date(): day = day - timedelta(hours=24) file_name = '%s_mps_Transcriptomics_report.xlsx' % datetime.strftime( day, "%y%m%d") file_path = config.patchseq_report_path + '/' + file_name project_code = '102-01-010-10' columns = [ 'Patch Tube Name', 'Blank Fill Date', 'Patch Date', 'Library Prep Day1 Date', 'Species', 'Specimen ID', 'Cell Line', 'ROI Major', 'ROI Minor', 'Comments', 'Project Code', ] # collect experiments for the specified day expt_paths = get_expts_in_range(all_paths, day, day) site_paths = [ glob.glob(os.path.join(path, 'slice_*', 'site_*')) for path in expt_paths ] site_paths = [sp for paths in site_paths for sp in paths] #flatten site paths if nested list row_data = [] # look through each site directory for site in site_paths: if os.path.isdir(site) is False: continue errors = [] site_source = open(os.path.join(site, 'sync_source')).read() errors.append(site_source) site_dh = getHandle(site) site_info = site_dh.info() slice_info = site_dh.parent().info() day_info = site_dh.parent().parent().info() pip_meta = PipetteMetadata(site) headstages = site_info.get('headstages') # check that pipette yml file is present if pip_meta.pipettes is None: print('%s\tCheck pipette.yml file' % site_source) continue # check to make sure there are recorded headstages and patchseq tubes, else move to next site if headstages is None: print('%s\tNo recorded headstages' % site_source) continue tubes = [hs['Tube ID'] for hs in headstages.values()] no_tubes = all([t == '' for t in tubes]) if no_tubes: print('No tubes for %s' % site_source) continue patch_date_dt = timestamp_to_datetime(day_info.get('__timestamp__')) patch_date = datetime.strftime(patch_date_dt, "%m/%d/%Y") if isinstance( patch_date_dt, datetime) else None specimen_id = day_info.get('animal_ID') species = lims.specimen_species( slice_info.get('specimen_ID', '').strip()) species = organism.get(species) if species == 'Mouse': genotype = day_info.get('LIMS_donor_info', {}).get('genotype') else: genotype = None roi_major = format_roi_major(day_info.get('target_region')) blank_fill_date = slice_info.get('blank_fill_date', '') try: datetime.strptime(blank_fill_date, "%m/%d/%Y") except ValueError: errors.append('\tblank fill date has improper format') blank_fill_date = None # for headstages that have patchseq tubes log metadata for hs, info in headstages.items(): tube_name, tube_id, msg = parse_tube(info, patch_date_dt) if tube_name == None: if msg is not None: print('\t\t%s ' % hs + msg) continue row = OrderedDict([k, None] for k in columns) pip = pip_meta.pipettes[hs[-1]] nucleus_state = nucleus[info.get('Nucleus', '')] roi_minor = format_roi_minor(pip['target_layer']) row.update({ 'Blank Fill Date': blank_fill_date, 'Patch Date': patch_date, 'Specimen ID': specimen_id, 'Species': species, 'Cell Line': genotype, 'Patch Tube Name': tube_name, 'tube_id': tube_id, 'Comments': nucleus_state, 'ROI Major': roi_major, 'ROI Minor': roi_minor, 'Project Code': project_code, }) # check that all requried columns are filled in for k, v in row.items(): if v is None and k != 'Library Prep Day1 Date': if k == 'Cell Line' and row['Species'] == 'Human': continue row[k] = 'CHECK DATA' row_data.append(row) if len(errors) > 1: print('\n'.join(errors)) # convert report to a dataframe and export to excel report_df = to_df(row_data, report_type='daily') if report_df is not None: report_df.to_excel(file_path, index=False)
def generate_monthly_report(start_date, end_date): """ Generate a monthly PatchSeq report for Shiny. PatchSeq metadata is collected from the acq4 directories for every experiment. Only metadata associated with a Patched Cell Container are processed. """ file_name = '%s_%s_mps_metadata_report.xlsx' % (datetime.strftime( start_date, "%y%m%d"), datetime.strftime(end_date, "%y%m%d")) file_path = config.patchseq_report_path + '/' + file_name required_cols = { 'tubeID': 'A', 'patch.date': 'B', 'rigOperator': 'C', 'rigNumber': 'D', 'Fill.Date': 'E', 'internalFillDate': 'F', 'creCell': 'H', 'manualRoi': 'J', 'postPatch': 'S', 'endPipetteR': 'T', } not_required_cols = { 'pilotName': 'G', 'autoRoi': 'I', 'cell_depth': 'K', 'sliceHealth': 'L', 'timeWholeCellStart': 'M', 'timeExtractionStart': 'N', 'pressureApplied': 'O', 'timeExtractionEnd': 'P', 'retractionPressureApplied': 'Q', 'timeRetractionEnd': 'R', } # not all columns are required but they must be in a specified order columns = required_cols.copy() columns.update(not_required_cols) columns = [k for k, v in sorted(columns.items(), key=lambda item: item[1])] # collect experiments for the date range provided expt_paths = get_expts_in_range(all_paths, start_date, end_date) site_paths = [ glob.glob(os.path.join(path, 'slice_*', 'site_*')) for path in expt_paths ] site_paths = [sp for paths in site_paths for sp in paths] #flatten site paths if nested list row_data = [] # look through each site directory for patchseq data for site in site_paths: if os.path.isdir(site) is False: continue errors = [] site_source = open(os.path.join(site, 'sync_source')).read() errors.append(site_source) site_dh = getHandle(site) site_info = site_dh.info() slice_info = site_dh.parent().info() day_dh = site_dh.parent().parent() day_info = day_dh.info() pip_meta = PipetteMetadata(site) headstages = site_info.get('headstages') # check that pipette yml file is present if pip_meta.pipettes is None: print('%s\tCheck pipette.yml file' % site_source) continue # if no headstages were recorded or tubes collected, move along if headstages is None: print('%s\tNo recorded headstages' % site_source) continue tubes = [hs['Tube ID'] for hs in headstages.values()] no_tubes = all([t == '' for t in tubes]) if no_tubes: continue index_file = pg.configfile.readConfigFile( os.path.join(day_dh.path, '.index')) rig_name = index_file['.'].get('rig_name') patch_date_dt = timestamp_to_datetime(day_info.get('__timestamp__')) patch_date = datetime.strftime(patch_date_dt, "%m/%d/%Y") if isinstance( patch_date_dt, datetime) else None operator = day_info.get('rig_operator', '') roi = format_roi_major(day_info.get('target_region')) slic = Slice(site_dh.parent().name()) genotype = slic.genotype if genotype is None and slic.species == 'Mouse': errors.append( '\tno genotype for %s, this may affect the creCell column' % slic.lims_specimen_name) blank_fill_date = slice_info.get('blank_fill_date', '') try: datetime.strptime(blank_fill_date, "%m/%d/%Y") except ValueError: errors.append('\tblank fill date has improper format') blank_fill_date = None internal_fill_date = slice_info.get('internal_fill_date', '') try: datetime.strptime(internal_fill_date, "%m/%d/%Y") except ValueError: errors.append('\tinternal fill date has improper format') internal_fill_date = None for hs, info in headstages.items(): tube_name, tube_id, msg = parse_tube(info, patch_date_dt) if tube_name == None: if msg is not None: print('\t\t%s ' % hs + msg) continue row = OrderedDict([k, None] for k in columns) human_culture = True if tube_name[1] == 'T' else False if human_culture is True and genotype is None: errors.append( '\tno genotype for %s, this may affect the creCell column' % slic.lims_specimen_name) color = info.get('Reporter') reporter = None if color == '-': reporter = color elif color in ['red', 'green', 'yellow'] and genotype is not None: reporter = genotype.color_to_reporter(color) elif color == 'NA': reporter = '' pip = pip_meta.pipettes[hs[-1]] layer = pip['target_layer'] manual_roi = roi + layer if (roi not in [None, ''] and layer not in [None, '']) else None nucleus_state = nucleus[info.get('Nucleus', '')] end_seal = info['End Seal'] end_seal = 1000 if end_seal else 0 # in MOhms row.update({ 'internalFillDate': internal_fill_date, 'Fill.Date': blank_fill_date, 'tubeID': tube_name, 'tube_id': tube_id, 'patch.date': patch_date, 'rigOperator': operator, 'rigNumber': rig_name, 'creCell': reporter, 'manualRoi': manual_roi, 'postPatch': nucleus_state, 'endPipetteR': end_seal, }) # check that there is metadata for all required columns for k in required_cols.keys(): v = row[k] if v is None: row[k] = 'CHECK DATA' errors.append('\t\t%s %s has no data' % (hs, k)) row_data.append(row) if len(errors) > 1: print('\n'.join(errors)) report_df = to_df(row_data, report_type='monthly') # cross-check with daily reports to make sure all tubes are accounted for tube_cross_check(report_df['tubeID'], start_date, end_date) report_df.to_excel(file_path, index=False)