Esempio n. 1
0
 def ready_jobs(self):
     """Return an ordered dict of all jobs that are ready to be processed (all dependencies are present)
     and the dates that dependencies were created.
     """
     slices = all_slices()
     ready = OrderedDict()
     for ts, path in slices.items():
         if path == 'place_holder':
             ready['%.3f'%0.0] = {'dep_time':timestamp_to_datetime(0.0), 'meta':{'source':path}}
         else:
             mtime = os.stat(os.path.join(path, '.index')).st_mtime
             # test file updates:
             # import random
             # if random.random() > 0.8:
             #     mtime *= 2
             ready[ts] = {'dep_time':timestamp_to_datetime(mtime), 'meta':{'source':path}}
     return ready
Esempio n. 2
0
def get_expts_in_range(expts, start, end):
    in_range = []
    for expt in expts:
        expt_date = timestamp_to_datetime(float(os.path.basename(expt))).date()
        if expt_date >= start and expt_date <= end:
            in_range.append(expt)

    if len(in_range) == 0:
        print('No experiments found within date range %s - %s' %
              (datetime.strftime(
                  start, "%m/%d/%Y"), datetime.strftime(end, "%m/%d/%Y")))
        exit()
    return in_range
Esempio n. 3
0
def generate_daily_report(day):
    """ Generate a daily PatchSeq report for Kim's team. PatchSeq metadata is collected from the acq4 directories
    for every experiment. Only metadata associated with a Patched Cell Container are processed.
    """

    if day == datetime.today().date():
        day = day - timedelta(hours=24)

    file_name = '%s_mps_Transcriptomics_report.xlsx' % datetime.strftime(
        day, "%y%m%d")
    file_path = config.patchseq_report_path + '/' + file_name
    project_code = '102-01-010-10'
    columns = [
        'Patch Tube Name',
        'Blank Fill Date',
        'Patch Date',
        'Library Prep Day1 Date',
        'Species',
        'Specimen ID',
        'Cell Line',
        'ROI Major',
        'ROI Minor',
        'Comments',
        'Project Code',
    ]

    # collect experiments for the specified day
    expt_paths = get_expts_in_range(all_paths, day, day)
    site_paths = [
        glob.glob(os.path.join(path, 'slice_*', 'site_*'))
        for path in expt_paths
    ]
    site_paths = [sp for paths in site_paths
                  for sp in paths]  #flatten site paths if nested list

    row_data = []
    # look through each site directory
    for site in site_paths:
        if os.path.isdir(site) is False:
            continue
        errors = []
        site_source = open(os.path.join(site, 'sync_source')).read()
        errors.append(site_source)
        site_dh = getHandle(site)
        site_info = site_dh.info()
        slice_info = site_dh.parent().info()
        day_info = site_dh.parent().parent().info()
        pip_meta = PipetteMetadata(site)
        headstages = site_info.get('headstages')

        # check that pipette yml file is present
        if pip_meta.pipettes is None:
            print('%s\tCheck pipette.yml file' % site_source)
            continue

        # check to make sure there are recorded headstages and patchseq tubes, else move to next site
        if headstages is None:
            print('%s\tNo recorded headstages' % site_source)
            continue
        tubes = [hs['Tube ID'] for hs in headstages.values()]
        no_tubes = all([t == '' for t in tubes])
        if no_tubes:
            print('No tubes for %s' % site_source)
            continue

        patch_date_dt = timestamp_to_datetime(day_info.get('__timestamp__'))
        patch_date = datetime.strftime(patch_date_dt,
                                       "%m/%d/%Y") if isinstance(
                                           patch_date_dt, datetime) else None
        specimen_id = day_info.get('animal_ID')
        species = lims.specimen_species(
            slice_info.get('specimen_ID', '').strip())
        species = organism.get(species)
        if species == 'Mouse':
            genotype = day_info.get('LIMS_donor_info', {}).get('genotype')
        else:
            genotype = None
        roi_major = format_roi_major(day_info.get('target_region'))

        blank_fill_date = slice_info.get('blank_fill_date', '')
        try:
            datetime.strptime(blank_fill_date, "%m/%d/%Y")
        except ValueError:
            errors.append('\tblank fill date has improper format')
            blank_fill_date = None

        # for headstages that have patchseq tubes log metadata
        for hs, info in headstages.items():
            tube_name, tube_id, msg = parse_tube(info, patch_date_dt)
            if tube_name == None:
                if msg is not None:
                    print('\t\t%s ' % hs + msg)
                continue
            row = OrderedDict([k, None] for k in columns)

            pip = pip_meta.pipettes[hs[-1]]
            nucleus_state = nucleus[info.get('Nucleus', '')]
            roi_minor = format_roi_minor(pip['target_layer'])

            row.update({
                'Blank Fill Date': blank_fill_date,
                'Patch Date': patch_date,
                'Specimen ID': specimen_id,
                'Species': species,
                'Cell Line': genotype,
                'Patch Tube Name': tube_name,
                'tube_id': tube_id,
                'Comments': nucleus_state,
                'ROI Major': roi_major,
                'ROI Minor': roi_minor,
                'Project Code': project_code,
            })

            # check that all requried columns are filled in
            for k, v in row.items():
                if v is None and k != 'Library Prep Day1 Date':
                    if k == 'Cell Line' and row['Species'] == 'Human':
                        continue
                    row[k] = 'CHECK DATA'
            row_data.append(row)
        if len(errors) > 1:
            print('\n'.join(errors))

    # convert report to a dataframe and export to excel
    report_df = to_df(row_data, report_type='daily')

    if report_df is not None:
        report_df.to_excel(file_path, index=False)
Esempio n. 4
0
def generate_monthly_report(start_date, end_date):
    """ Generate a monthly PatchSeq report for Shiny. PatchSeq metadata is collected from the acq4 directories
    for every experiment. Only metadata associated with a Patched Cell Container are processed.
    """

    file_name = '%s_%s_mps_metadata_report.xlsx' % (datetime.strftime(
        start_date, "%y%m%d"), datetime.strftime(end_date, "%y%m%d"))
    file_path = config.patchseq_report_path + '/' + file_name

    required_cols = {
        'tubeID': 'A',
        'patch.date': 'B',
        'rigOperator': 'C',
        'rigNumber': 'D',
        'Fill.Date': 'E',
        'internalFillDate': 'F',
        'creCell': 'H',
        'manualRoi': 'J',
        'postPatch': 'S',
        'endPipetteR': 'T',
    }

    not_required_cols = {
        'pilotName': 'G',
        'autoRoi': 'I',
        'cell_depth': 'K',
        'sliceHealth': 'L',
        'timeWholeCellStart': 'M',
        'timeExtractionStart': 'N',
        'pressureApplied': 'O',
        'timeExtractionEnd': 'P',
        'retractionPressureApplied': 'Q',
        'timeRetractionEnd': 'R',
    }

    # not all columns are required but they must be in a specified order
    columns = required_cols.copy()
    columns.update(not_required_cols)
    columns = [k for k, v in sorted(columns.items(), key=lambda item: item[1])]

    # collect experiments for the date range provided
    expt_paths = get_expts_in_range(all_paths, start_date, end_date)
    site_paths = [
        glob.glob(os.path.join(path, 'slice_*', 'site_*'))
        for path in expt_paths
    ]
    site_paths = [sp for paths in site_paths
                  for sp in paths]  #flatten site paths if nested list

    row_data = []
    # look through each site directory for patchseq data
    for site in site_paths:
        if os.path.isdir(site) is False:
            continue
        errors = []
        site_source = open(os.path.join(site, 'sync_source')).read()
        errors.append(site_source)
        site_dh = getHandle(site)
        site_info = site_dh.info()
        slice_info = site_dh.parent().info()
        day_dh = site_dh.parent().parent()
        day_info = day_dh.info()
        pip_meta = PipetteMetadata(site)
        headstages = site_info.get('headstages')

        # check that pipette yml file is present
        if pip_meta.pipettes is None:
            print('%s\tCheck pipette.yml file' % site_source)
            continue

        # if no headstages were recorded or tubes collected, move along
        if headstages is None:
            print('%s\tNo recorded headstages' % site_source)
            continue
        tubes = [hs['Tube ID'] for hs in headstages.values()]
        no_tubes = all([t == '' for t in tubes])
        if no_tubes:
            continue

        index_file = pg.configfile.readConfigFile(
            os.path.join(day_dh.path, '.index'))
        rig_name = index_file['.'].get('rig_name')
        patch_date_dt = timestamp_to_datetime(day_info.get('__timestamp__'))
        patch_date = datetime.strftime(patch_date_dt,
                                       "%m/%d/%Y") if isinstance(
                                           patch_date_dt, datetime) else None
        operator = day_info.get('rig_operator', '')
        roi = format_roi_major(day_info.get('target_region'))
        slic = Slice(site_dh.parent().name())
        genotype = slic.genotype
        if genotype is None and slic.species == 'Mouse':
            errors.append(
                '\tno genotype for %s, this may affect the creCell column' %
                slic.lims_specimen_name)

        blank_fill_date = slice_info.get('blank_fill_date', '')
        try:
            datetime.strptime(blank_fill_date, "%m/%d/%Y")
        except ValueError:
            errors.append('\tblank fill date has improper format')
            blank_fill_date = None

        internal_fill_date = slice_info.get('internal_fill_date', '')
        try:
            datetime.strptime(internal_fill_date, "%m/%d/%Y")
        except ValueError:
            errors.append('\tinternal fill date has improper format')
            internal_fill_date = None

        for hs, info in headstages.items():
            tube_name, tube_id, msg = parse_tube(info, patch_date_dt)
            if tube_name == None:
                if msg is not None:
                    print('\t\t%s ' % hs + msg)
                continue
            row = OrderedDict([k, None] for k in columns)

            human_culture = True if tube_name[1] == 'T' else False
            if human_culture is True and genotype is None:
                errors.append(
                    '\tno genotype for %s, this may affect the creCell column'
                    % slic.lims_specimen_name)
            color = info.get('Reporter')
            reporter = None
            if color == '-':
                reporter = color
            elif color in ['red', 'green', 'yellow'] and genotype is not None:
                reporter = genotype.color_to_reporter(color)
            elif color == 'NA':
                reporter = ''

            pip = pip_meta.pipettes[hs[-1]]
            layer = pip['target_layer']
            manual_roi = roi + layer if (roi not in [None, '']
                                         and layer not in [None, '']) else None
            nucleus_state = nucleus[info.get('Nucleus', '')]
            end_seal = info['End Seal']
            end_seal = 1000 if end_seal else 0  # in MOhms

            row.update({
                'internalFillDate': internal_fill_date,
                'Fill.Date': blank_fill_date,
                'tubeID': tube_name,
                'tube_id': tube_id,
                'patch.date': patch_date,
                'rigOperator': operator,
                'rigNumber': rig_name,
                'creCell': reporter,
                'manualRoi': manual_roi,
                'postPatch': nucleus_state,
                'endPipetteR': end_seal,
            })

            # check that there is metadata for all required columns
            for k in required_cols.keys():
                v = row[k]
                if v is None:
                    row[k] = 'CHECK DATA'
                    errors.append('\t\t%s %s has no data' % (hs, k))
            row_data.append(row)
        if len(errors) > 1:
            print('\n'.join(errors))

    report_df = to_df(row_data, report_type='monthly')

    # cross-check with daily reports to make sure all tubes are accounted for
    tube_cross_check(report_df['tubeID'], start_date, end_date)

    report_df.to_excel(file_path, index=False)