예제 #1
0
    def create_db_entries(cls, job, session):
        db = job['database']
        job_id = job['job_id']

        # Load experiment from DB
        expt_entry = db.experiment_from_ext_id(job_id, session=session)
        elecs_by_ad_channel = {elec.device_id:elec for elec in expt_entry.electrodes}
        cell_entries = expt_entry.cells ## do this once here instead of multiple times later because it's slooooowwwww
        pairs_by_cell_id = expt_entry.pairs

        # load NWB file
        path = os.path.join(config.synphys_data, expt_entry.storage_path)
        expt = AI_Experiment(loader=OptoExperimentLoader(site_path=path))
        nwb = expt.data
        stim_log = expt.loader.load_stimulation_log()
        if stim_log['version'] < 3:
            ## gonna need to load an image in order to calculate spiral size later
            from acq4.util.DataManager import getHandle

        last_stim_pulse_time = {}
        # Load all data from NWB into DB
        for srec in nwb.contents:
            temp = srec.meta.get('temperature', None)
            srec_entry = db.SyncRec(ext_id=srec.key, experiment=expt_entry, temperature=temp)
            session.add(srec_entry)

            rec_entries = {}
            all_pulse_entries = {}
            for rec in srec.recordings:
                
                # import all recordings
                electrode_entry = elecs_by_ad_channel.get(rec.device_id, None)

                rec_entry = db.Recording(
                    sync_rec=srec_entry,
                    electrode=electrode_entry,
                    start_time=rec.start_time,
                    device_name=str(rec.device_id)
                )
                session.add(rec_entry)
                rec_entries[rec.device_id] = rec_entry

                # import patch clamp recording information
                if isinstance(rec, PatchClampRecording):
                    qc_pass, qc_failures = qc.recording_qc_pass(rec)
                    pcrec_entry = db.PatchClampRecording(
                        recording=rec_entry,
                        clamp_mode=rec.clamp_mode,
                        patch_mode=rec.patch_mode,
                        stim_name=rec.stimulus.description,
                        baseline_potential=rec.baseline_potential,
                        baseline_current=rec.baseline_current,
                        baseline_rms_noise=rec.baseline_rms_noise,
                        qc_pass=qc_pass,
                        meta=None if len(qc_failures) == 0 else {'qc_failures': qc_failures},
                    )
                    session.add(pcrec_entry)

                    # import test pulse information
                    tp = rec.nearest_test_pulse
                    if tp is not None:
                        indices = tp.indices or [None, None]
                        tp_entry = db.TestPulse(
                            electrode=electrode_entry,
                            recording=rec_entry,
                            start_index=indices[0],
                            stop_index=indices[1],
                            baseline_current=tp.baseline_current,
                            baseline_potential=tp.baseline_potential,
                            access_resistance=tp.access_resistance,
                            input_resistance=tp.input_resistance,
                            capacitance=tp.capacitance,
                            time_constant=tp.time_constant,
                        )
                        session.add(tp_entry)
                        pcrec_entry.nearest_test_pulse = tp_entry

                    psa = PatchClampStimPulseAnalyzer.get(rec)
                    pulses = psa.pulse_chunks()
                    pulse_entries = {}
                    all_pulse_entries[rec.device_id] = pulse_entries
                    cell_entry = electrode_entry.cell

                    for i,pulse in enumerate(pulses):
                        # Record information about all pulses, including test pulse.
                        t0, t1 = pulse.meta['pulse_edges']
                        resampled = pulse['primary'].resample(sample_rate=20000)
                        
                        clock_time = t0 + datetime_to_timestamp(rec_entry.start_time)
                        prev_pulse_dt = clock_time - last_stim_pulse_time.get(cell_entry.ext_id, -np.inf)
                        last_stim_pulse_time[cell_entry.ext_id] = clock_time

                        pulse_entry = db.StimPulse(
                            recording=rec_entry,
                            pulse_number=pulse.meta['pulse_n'],
                            onset_time=t0,
                            amplitude=pulse.meta['pulse_amplitude'],
                            duration=t1-t0,
                            data=resampled.data,
                            data_start_time=resampled.t0,
                            #cell=electrode_entry.cell if electrode_entry is not None else None,
                            cell=cell_entry,
                            #device_name=str(rec.device_id),
                            previous_pulse_dt=prev_pulse_dt
                        )
                        session.add(pulse_entry)
                        pulse_entries[pulse.meta['pulse_n']] = pulse_entry


                #elif isinstance(rec, OptoRecording) and (rec.device_name=='Fidelity'): 
                elif rec.device_type == 'Fidelity':
                    ## This is a 2p stimulation

                    ## get cell entry
                    stim_num = rec.meta['notebook']['USER_stim_num']
                    if stim_num is None: ### this is a trace that would have been labeled as 'unknown'
                        continue
                    stim = stim_log[str(int(stim_num))]
                    cell_entry = cell_entries[stim['stimulationPoint']['name']]

                    ## get stimulation shape parameters
                    if stim_log['version'] >=3:
                        shape={'spiral_revolutions':stim['shape']['spiral revolutions'], 'spiral_size':stim['shape']['size']}
                    else:
                        ## need to calculate spiral size from reference image, cause stimlog is from before we were saving spiral size
                        shape={'spiral_revolutions':stim.get('prairieCmds', {}).get('spiralRevolutions')}
                        prairie_size = stim['prairieCmds']['spiralSize']
                        ref_image = os.path.join(expt.path, stim['prairieImage'][-23:])
                        if os.path.exists(ref_image):
                            h = getHandle(ref_image)
                            xPixels = h.info()['PrairieMetaInfo']['Environment']['PixelsPerLine']
                            pixelLength = h.info()['PrairieMetaInfo']['Environment']['XAxis_umPerPixel']
                            size = prairie_size * pixelLength * xPixels * 1e-6
                            shape['spiral_size'] = size
                        else:
                            shape['spiral_size'] = None

                    ## calculate offset_distance
                    offset = stim.get('offset')
                    if offset is not None:
                        offset_distance = (offset[0]**2 + offset[1]**2 + offset[2]**2)**0.5
                    else:
                        offset_distance = None

                    pulse_entries = {}
                    all_pulse_entries[rec.device_id] = pulse_entries

                    ospa = GenericStimPulseAnalyzer.get(rec)

                    for i, pulse in enumerate(ospa.pulses(channel='reporter')):
                        ### pulse is (start, stop, amplitude)
                    # Record information about all pulses, including test pulse.
                        #t0, t1 = pulse.meta['pulse_edges']
                        #resampled = pulse['reporter'].resample(sample_rate=20000)

                        t0, t1 = pulse[0], pulse[1]
                        
                        clock_time = t0 + datetime_to_timestamp(rec_entry.start_time)
                        prev_pulse_dt = clock_time - last_stim_pulse_time.get(cell_entry.ext_id, -np.inf)
                        last_stim_pulse_time[cell_entry.ext_id] = clock_time
                        pulse_entry = db.StimPulse(
                            recording=rec_entry,
                            cell=cell_entry,
                            pulse_number=i, #pulse.meta['pulse_n'],
                            onset_time=pulse[0],#rec.pulse_start_times[i], #t0,
                            amplitude=power_cal.convert_voltage_to_power(pulse[2], timestamp_to_datetime(expt_entry.acq_timestamp), expt_entry.rig_name), ## need to fill in laser/objective correctly
                            duration=pulse[1]-pulse[0],#rec.pulse_duration()[i],
                            previous_pulse_dt=prev_pulse_dt,
                            #data=resampled.data,
                            #data_start_time=resampled.t0,
                            #wavelength,
                            #light_source,
                            position=stim['stimPos'],
                            #position_offset=stim['offset'],
                            #device_name=rec.device_id,
                            #qc_pass=None
                            meta = {'shape': shape,
                                    'pockel_cmd':stim.get('prairieCmds',{}).get('laserPower', [None]*100)[i],
                                    'pockel_voltage': float(pulse[2]),#rec.pulse_power()[i],
                                    'position_offset':offset,
                                    'offset_distance':offset_distance,
                                    'wavelength': 1070e-9
                                    } # TODO: put in light_source and wavelength
                            )
                        qc_pass, qc_failures = qc.opto_stim_pulse_qc_pass(pulse_entry)
                        pulse_entry.qc_pass = qc_pass
                        if not qc_pass:
                            pulse_entry.meta['qc_failures'] = qc_failures

                        session.add(pulse_entry)
                        pulse_entries[i] = pulse_entry


                elif 'LED' in rec.device_type:
                    #if rec.device_id == 'TTL1P_0': ## this is the ttl output to Prairie, not an LED stimulation
                    #    continue

                    ### This is an LED stimulation
                    #if rec.device_id in ['TTL1_1', 'TTL1P_1']:
                    #    lightsource = 'LED-470nm'
                    #elif rec.device_id in ['TTL1_2', 'TTL1P_2']:
                    #    lightsource = 'LED-590nm'
                    #else:
                    #    raise Exception("Don't know lightsource for device: %s" % rec.device_id)

                    pulse_entries = {}
                    all_pulse_entries[rec.device_id] = pulse_entries

                    spa = PWMStimPulseAnalyzer.get(rec)
                    pulses = spa.pulses(channel='reporter')
                    max_power=power_cal.get_led_power(timestamp_to_datetime(expt_entry.acq_timestamp), expt_entry.rig_name, rec.device_id)

                    for i, pulse in enumerate(pulses):
                        pulse_entry = db.StimPulse(
                            recording=rec_entry,
                            #cell=cell_entry, ## we're not stimulating just one cell here TODO: but maybe this should be a list of cells in the fov?
                            pulse_number=i,
                            onset_time=pulse.global_start_time,
                            amplitude=max_power*pulse.amplitude,
                            duration=pulse.duration,
                            #data=resampled.data, ## don't need data, it's just a square pulse
                            #data_start_time=resampled.t0,
                            #position=None, # don't have a 3D position, have a field
                            #device_name=rec.device_id,
                            meta = {'shape': 'wide-field', ## TODO: description of field of view
                                    'LED_voltage':str(pulse.amplitude),
                                    'light_source':rec.device_id,
                                    'pulse_width_modulation': spa.pwm_params(channel='reporter', pulse_n=i),
                                    #'position_offset':offset,
                                    #'offset_distance':offset_distance,
                                    } ## TODO: put in lightsource and wavelength
                            )
                        ## TODO: make qc function for LED stimuli
                        #qc_pass, qc_failures = qc.opto_stim_pulse_qc_pass(pulse_entry)
                        #pulse_entry.qc_pass = qc_pass
                        #if not qc_pass:
                        #    pulse_entry.meta['qc_failures'] = qc_failures

                        session.add(pulse_entry)
                        pulse_entries[i] = pulse_entry
                    
                elif rec.device_id == 'unknown': 
                    ## At the end of some .nwbs there are vc traces to check access resistance.
                    ## These have an AD6(fidelity) channel, but do not have an optical stimulation and
                    ## this channel is labeled unknown when it gets created in OptoRecording
                    pass

                elif rec.device_id== 'Prairie_Command':
                    ### this is just the TTL command sent to the laser, the actually data about when the Laser was active is in the Fidelity channel
                    pass
                    
                else:
                    raise Exception('Need to figure out recording type for %s (device_id:%s)' % (rec, rec.device_id))

            # collect and shuffle baseline chunks for each recording
            baseline_chunks = {}
            for post_rec in [rec for rec in srec.recordings if isinstance(rec, PatchClampRecording)]:
                post_dev = post_rec.device_id

                base_dist = BaselineDistributor.get(post_rec)
                chunks = list(base_dist.baseline_chunks())
                
                # generate a different random shuffle for each combination pre,post device
                # (we are not allowed to reuse the same baseline chunks for a particular pre-post pair,
                # but it is ok to reuse them across pairs)
                for pre_dev in srec.devices: 
                    # shuffle baseline chunks in a deterministic way:
                    # convert expt_id/srec_id/pre/post into an integer seed
                    seed_str = ("%s %s %s %s" % (job_id, srec.key, pre_dev, post_dev)).encode()
                    seed = struct.unpack('I', hashlib.sha1(seed_str).digest()[:4])[0]
                    rng = np.random.RandomState(seed)
                    rng.shuffle(chunks)
                    
                    baseline_chunks[pre_dev, post_dev] = chunks[:]

            baseline_qc_cache = {}
            baseline_entry_cache = {}

            ### import postsynaptic responses
            unmatched = 0
            osra = OptoSyncRecAnalyzer.get(srec)
            for stim_rec in srec.recordings:
                if stim_rec.device_type in ['Prairie_Command', 'unknown']: ### these don't actually contain data we want to use -- ignore them
                    continue
                if isinstance(stim_rec, PatchClampRecording):
                    ### exclude trying to analyze intrinsic pulses
                    stim_name = stim_rec.stimulus.description
                    if any(substr in stim_name for substr in ['intrins']):
                        continue

                for post_rec in [x for x in srec.recordings if isinstance(x, PatchClampRecording)]:
                    if stim_rec == post_rec:
                        continue

                    if 'Fidelity' in stim_rec.device_type:
                        stim_num = stim_rec.meta['notebook']['USER_stim_num']
                        if stim_num is None: ## happens when last sweep records a voltage offset - used to be labelled as 'unknown' device
                            continue
                        
                        stim = stim_log[str(int(stim_num))]
                        pre_cell_name = str(stim['stimulationPoint']['name'])

                        post_cell_name = str('electrode_'+ str(post_rec.device_id))

                        pair_entry = pairs_by_cell_id.get((pre_cell_name, post_cell_name))

                    elif 'led' in stim_rec.device_type.lower():
                        pair_entry = None

                    elif isinstance(stim_rec, PatchClampRecording):
                        pre_cell_name = str('electrode_' + str(stim_rec.device_id))
                        post_cell_name = str('electrode_'+ str(post_rec.device_id))
                        pair_entry = pairs_by_cell_id.get((pre_cell_name, post_cell_name))

                    # get all responses, regardless of the presence of a spike
                    responses = osra.get_responses(stim_rec, post_rec)
                    if len(responses) > 10:
                        raise Exception('Found more than 10 pulse responses for %s. Please investigate.'%srec)
                    for resp in responses:
                        if pair_entry is not None: ### when recordings are crappy cells are not always included in connections files so won't exist as pairs in the db, also led stimulations don't have pairs
                            if resp['ex_qc_pass']:
                                pair_entry.n_ex_test_spikes += 1
                            if resp['in_qc_pass']:
                                pair_entry.n_in_test_spikes += 1
                            
                        resampled = resp['response']['primary'].resample(sample_rate=20000)
                        resp_entry = db.PulseResponse(
                            recording=rec_entries[post_rec.device_id],
                            stim_pulse=all_pulse_entries[stim_rec.device_id][resp['pulse_n']],
                            pair=pair_entry,
                            data=resampled.data,
                            data_start_time=resampled.t0,
                            ex_qc_pass=resp['ex_qc_pass'],
                            in_qc_pass=resp['in_qc_pass'],
                            meta=None if resp['ex_qc_pass'] and resp['in_qc_pass'] else {'qc_failures': resp['qc_failures']},
                        )
                        session.add(resp_entry)

                        # find a baseline chunk from this recording with compatible qc metrics
                        got_baseline = False
                        for i, (start, stop) in enumerate(baseline_chunks[stim_rec.device_id, post_rec.device_id]):
                            key = (post_rec.device_id, start, stop)

                            # pull data and run qc if needed
                            if key not in baseline_qc_cache:
                                data = post_rec['primary'].time_slice(start, stop).resample(sample_rate=db.default_sample_rate).data
                                ex_qc_pass, in_qc_pass, qc_failures = qc.opto_pulse_response_qc_pass(post_rec, [start, stop])
                                baseline_qc_cache[key] = (data, ex_qc_pass, in_qc_pass)
                            else:
                                (data, ex_qc_pass, in_qc_pass) = baseline_qc_cache[key]

                            if resp_entry.ex_qc_pass is True and ex_qc_pass is not True:
                                continue
                            elif resp_entry.in_qc_pass is True and in_qc_pass is not True:
                                continue
                            else:
                                got_baseline = True
                                baseline_chunks[stim_rec.device_id, post_rec.device_id].pop(i)
                                break

                        if not got_baseline:
                            # no matching baseline available
                            unmatched += 1
                            continue

                        if key not in baseline_entry_cache:
                            # create a db record for this baseline chunk if it has not already appeared elsewhere
                            base_entry = db.Baseline(
                                recording=rec_entries[post_rec.device_id],
                                data=data,
                                data_start_time=start,
                                mode=float_mode(data),
                                ex_qc_pass=ex_qc_pass,
                                in_qc_pass=in_qc_pass,
                                meta=None if ex_qc_pass is True and in_qc_pass is True else {'qc_failures': qc_failures},
                            )
                            session.add(base_entry)
                            baseline_entry_cache[key] = base_entry
                        
                        resp_entry.baseline = baseline_entry_cache[key]

            if unmatched > 0:
                print("%s %s: %d pulse responses without matched baselines" % (job_id, srec, unmatched))
예제 #2
0
    def create_db_entries(cls, job, session):
        db = job['database']
        job_id = job['job_id']

        # Load experiment from DB
        expt = db.experiment_from_timestamp(job_id, session=session)
        amp_results = get_amp_results()
        mapping_results = get_mapping_results()
        cell_species = get_cell_species(db)

        path = os.path.join(config.synphys_data, expt.storage_path)
        site_info = getHandle(path).info()
        headstages = site_info.get('headstages')
        if headstages is not None:
            patchseq_tubes = {
                hs_name.split('HS')[1]: hs['Tube ID']
                for hs_name, hs in headstages.items()
            }
            nucleus = {
                hs_name.split('HS')[1]: hs['Nucleus']
                for hs_name, hs in headstages.items()
            }
            reseal = {
                hs_name.split('HS')[1]: hs['End Seal']
                for hs_name, hs in headstages.items()
            }
            no_tubes = all(t == '' for t in patchseq_tubes.values())
            if no_tubes is False:

                for cell_ext_id, cell in expt.cells.items():
                    tube_id = patchseq_tubes.get(cell_ext_id, '').strip()
                    cell_nucleus = nucleus.get(cell_ext_id, None)
                    if tube_id == '':
                        continue

                    results = {
                        'tube_id': tube_id,
                        'nucleus':
                        cell_nucleus if cell_nucleus != '' else None,
                        'reseal': reseal.get(cell_ext_id, False),
                        'patchseq_hash': None,
                    }

                    amp_result = amp_results.get(tube_id, {})
                    mapping_result = mapping_results.get(tube_id, {})

                    patchseq_results = amp_result.copy()
                    patchseq_results.update(mapping_result)
                    if bool(patchseq_results) is False:
                        continue

                    patchseq_hash = hashlib.md5(
                        str(tuple(
                            patchseq_results.values())).encode()).hexdigest()
                    results['patchseq_hash'] = patchseq_hash

                    for result_name, col_name in col_names.items():
                        data = patchseq_results.get(result_name)
                        if data is not None:
                            if col_name == 'meta':
                                data = {'amplification_comments': data}
                            if col_name == 'genes_detected':
                                data = int(data)
                            results[col_name] = data

                    tree_call = results.get('tree_call')
                    if tree_call is not None and tree_call in ['Core', 'I1']:
                        results['t_type'] = results['tree_first_cluster']

                    mapped_subclass = get_mapped_subclass(cell, results)
                    results['mapped_subclass'] = mapped_subclass

                    # Write new record to DB
                    patch_seq = db.PatchSeq(cell_id=cell.id, **results)
                    session.add(patch_seq)
예제 #3
0
    def ready_jobs(self):
        """Return an ordered dict of all jobs that are ready to be processed (all dependencies are present)
        and the dates that dependencies were created.
        """
        db = self.database
        # All experiments and their creation times in the DB
        expts = self.pipeline.get_module('experiment').finished_jobs()

        # Look up nwb file locations for all experiments
        session = db.session()
        # expts = session.query(db.Experiment).filter(db.Experiment.ext_id==1521667891.153).all()
        session.rollback()

        # Return the greater of NWB mod time and experiment DB record mtime
        ready = OrderedDict()

        try:
            amp_results = get_amp_results()
            mapping_results = get_mapping_results()
        except ImportError as exc:
            print("Skipping patchseq: %s" % str(exc))
            return ready

        patchseq_results = amp_results.copy()
        patchseq_results.update(mapping_results)

        for expt_id, (expt_mtime, success) in expts.items():
            if success is not True:
                continue

            expt = session.query(
                db.Experiment).filter(db.Experiment.ext_id == expt_id).all()[0]
            ready[expt_id] = {'dep_time': expt_mtime}

            path = os.path.join(config.synphys_data, expt.storage_path)
            site_info = getHandle(path).info()
            headstages = site_info.get('headstages')
            if headstages is None:
                continue

            patchseq_tubes = {
                hs_name.split('HS')[1]: hs['Tube ID']
                for hs_name, hs in headstages.items()
            }
            if patchseq_tubes is None:
                continue

            patchseq_hash_compare = []
            for cell_ext_id, cell in expt.cells.items():
                tube_id = patchseq_tubes.get(cell_ext_id, '').strip()
                if tube_id not in patchseq_results:
                    continue
                patchseq_data = patchseq_results[tube_id]
                patchseq_data_hash = hashlib.md5(
                    str(tuple(patchseq_data.values())).encode()).hexdigest()
                patchseq_rec = session.query(db.PatchSeq).join(
                    db.Cell).filter(db.Cell.id == cell.id).all()
                if len(patchseq_rec) == 1:
                    patchseq_rec_hash = patchseq_rec[0].patchseq_hash
                    patchseq_hash_compare.append(
                        patchseq_data_hash == patchseq_rec_hash)
                else:
                    patchseq_hash_compare.append(False)
            if all(patchseq_hash_compare) is False:
                ready[expt_id] = {'dep_time': datetime.datetime.now()}

        return ready
예제 #4
0
def generate_daily_report(day):
    """ Generate a daily PatchSeq report for Kim's team. PatchSeq metadata is collected from the acq4 directories
    for every experiment. Only metadata associated with a Patched Cell Container are processed.
    """

    if day == datetime.today().date():
        day = day - timedelta(hours=24)

    file_name = '%s_mps_Transcriptomics_report.xlsx' % datetime.strftime(
        day, "%y%m%d")
    file_path = config.patchseq_report_path + '/' + file_name
    project_code = '102-01-010-10'
    columns = [
        'Patch Tube Name',
        'Blank Fill Date',
        'Patch Date',
        'Library Prep Day1 Date',
        'Species',
        'Specimen ID',
        'Cell Line',
        'ROI Major',
        'ROI Minor',
        'Comments',
        'Project Code',
    ]

    # collect experiments for the specified day
    expt_paths = get_expts_in_range(all_paths, day, day)
    site_paths = [
        glob.glob(os.path.join(path, 'slice_*', 'site_*'))
        for path in expt_paths
    ]
    site_paths = [sp for paths in site_paths
                  for sp in paths]  #flatten site paths if nested list

    row_data = []
    # look through each site directory
    for site in site_paths:
        if os.path.isdir(site) is False:
            continue
        errors = []
        site_source = open(os.path.join(site, 'sync_source')).read()
        errors.append(site_source)
        site_dh = getHandle(site)
        site_info = site_dh.info()
        slice_info = site_dh.parent().info()
        day_info = site_dh.parent().parent().info()
        pip_meta = PipetteMetadata(site)
        headstages = site_info.get('headstages')

        # check that pipette yml file is present
        if pip_meta.pipettes is None:
            print('%s\tCheck pipette.yml file' % site_source)
            continue

        # check to make sure there are recorded headstages and patchseq tubes, else move to next site
        if headstages is None:
            print('%s\tNo recorded headstages' % site_source)
            continue
        tubes = [hs['Tube ID'] for hs in headstages.values()]
        no_tubes = all([t == '' for t in tubes])
        if no_tubes:
            print('No tubes for %s' % site_source)
            continue

        patch_date_dt = timestamp_to_datetime(day_info.get('__timestamp__'))
        patch_date = datetime.strftime(patch_date_dt,
                                       "%m/%d/%Y") if isinstance(
                                           patch_date_dt, datetime) else None
        specimen_id = day_info.get('animal_ID')
        species = lims.specimen_species(
            slice_info.get('specimen_ID', '').strip())
        species = organism.get(species)
        if species == 'Mouse':
            genotype = day_info.get('LIMS_donor_info', {}).get('genotype')
        else:
            genotype = None
        roi_major = format_roi_major(day_info.get('target_region'))

        blank_fill_date = slice_info.get('blank_fill_date', '')
        try:
            datetime.strptime(blank_fill_date, "%m/%d/%Y")
        except ValueError:
            errors.append('\tblank fill date has improper format')
            blank_fill_date = None

        # for headstages that have patchseq tubes log metadata
        for hs, info in headstages.items():
            tube_name, tube_id, msg = parse_tube(info, patch_date_dt)
            if tube_name == None:
                if msg is not None:
                    print('\t\t%s ' % hs + msg)
                continue
            row = OrderedDict([k, None] for k in columns)

            pip = pip_meta.pipettes[hs[-1]]
            nucleus_state = nucleus[info.get('Nucleus', '')]
            roi_minor = format_roi_minor(pip['target_layer'])

            row.update({
                'Blank Fill Date': blank_fill_date,
                'Patch Date': patch_date,
                'Specimen ID': specimen_id,
                'Species': species,
                'Cell Line': genotype,
                'Patch Tube Name': tube_name,
                'tube_id': tube_id,
                'Comments': nucleus_state,
                'ROI Major': roi_major,
                'ROI Minor': roi_minor,
                'Project Code': project_code,
            })

            # check that all requried columns are filled in
            for k, v in row.items():
                if v is None and k != 'Library Prep Day1 Date':
                    if k == 'Cell Line' and row['Species'] == 'Human':
                        continue
                    row[k] = 'CHECK DATA'
            row_data.append(row)
        if len(errors) > 1:
            print('\n'.join(errors))

    # convert report to a dataframe and export to excel
    report_df = to_df(row_data, report_type='daily')

    if report_df is not None:
        report_df.to_excel(file_path, index=False)
예제 #5
0
def generate_monthly_report(start_date, end_date):
    """ Generate a monthly PatchSeq report for Shiny. PatchSeq metadata is collected from the acq4 directories
    for every experiment. Only metadata associated with a Patched Cell Container are processed.
    """

    file_name = '%s_%s_mps_metadata_report.xlsx' % (datetime.strftime(
        start_date, "%y%m%d"), datetime.strftime(end_date, "%y%m%d"))
    file_path = config.patchseq_report_path + '/' + file_name

    required_cols = {
        'tubeID': 'A',
        'patch.date': 'B',
        'rigOperator': 'C',
        'rigNumber': 'D',
        'Fill.Date': 'E',
        'internalFillDate': 'F',
        'creCell': 'H',
        'manualRoi': 'J',
        'postPatch': 'S',
        'endPipetteR': 'T',
    }

    not_required_cols = {
        'pilotName': 'G',
        'autoRoi': 'I',
        'cell_depth': 'K',
        'sliceHealth': 'L',
        'timeWholeCellStart': 'M',
        'timeExtractionStart': 'N',
        'pressureApplied': 'O',
        'timeExtractionEnd': 'P',
        'retractionPressureApplied': 'Q',
        'timeRetractionEnd': 'R',
    }

    # not all columns are required but they must be in a specified order
    columns = required_cols.copy()
    columns.update(not_required_cols)
    columns = [k for k, v in sorted(columns.items(), key=lambda item: item[1])]

    # collect experiments for the date range provided
    expt_paths = get_expts_in_range(all_paths, start_date, end_date)
    site_paths = [
        glob.glob(os.path.join(path, 'slice_*', 'site_*'))
        for path in expt_paths
    ]
    site_paths = [sp for paths in site_paths
                  for sp in paths]  #flatten site paths if nested list

    row_data = []
    # look through each site directory for patchseq data
    for site in site_paths:
        if os.path.isdir(site) is False:
            continue
        errors = []
        site_source = open(os.path.join(site, 'sync_source')).read()
        errors.append(site_source)
        site_dh = getHandle(site)
        site_info = site_dh.info()
        slice_info = site_dh.parent().info()
        day_dh = site_dh.parent().parent()
        day_info = day_dh.info()
        pip_meta = PipetteMetadata(site)
        headstages = site_info.get('headstages')

        # check that pipette yml file is present
        if pip_meta.pipettes is None:
            print('%s\tCheck pipette.yml file' % site_source)
            continue

        # if no headstages were recorded or tubes collected, move along
        if headstages is None:
            print('%s\tNo recorded headstages' % site_source)
            continue
        tubes = [hs['Tube ID'] for hs in headstages.values()]
        no_tubes = all([t == '' for t in tubes])
        if no_tubes:
            continue

        index_file = pg.configfile.readConfigFile(
            os.path.join(day_dh.path, '.index'))
        rig_name = index_file['.'].get('rig_name')
        patch_date_dt = timestamp_to_datetime(day_info.get('__timestamp__'))
        patch_date = datetime.strftime(patch_date_dt,
                                       "%m/%d/%Y") if isinstance(
                                           patch_date_dt, datetime) else None
        operator = day_info.get('rig_operator', '')
        roi = format_roi_major(day_info.get('target_region'))
        slic = Slice(site_dh.parent().name())
        genotype = slic.genotype
        if genotype is None and slic.species == 'Mouse':
            errors.append(
                '\tno genotype for %s, this may affect the creCell column' %
                slic.lims_specimen_name)

        blank_fill_date = slice_info.get('blank_fill_date', '')
        try:
            datetime.strptime(blank_fill_date, "%m/%d/%Y")
        except ValueError:
            errors.append('\tblank fill date has improper format')
            blank_fill_date = None

        internal_fill_date = slice_info.get('internal_fill_date', '')
        try:
            datetime.strptime(internal_fill_date, "%m/%d/%Y")
        except ValueError:
            errors.append('\tinternal fill date has improper format')
            internal_fill_date = None

        for hs, info in headstages.items():
            tube_name, tube_id, msg = parse_tube(info, patch_date_dt)
            if tube_name == None:
                if msg is not None:
                    print('\t\t%s ' % hs + msg)
                continue
            row = OrderedDict([k, None] for k in columns)

            human_culture = True if tube_name[1] == 'T' else False
            if human_culture is True and genotype is None:
                errors.append(
                    '\tno genotype for %s, this may affect the creCell column'
                    % slic.lims_specimen_name)
            color = info.get('Reporter')
            reporter = None
            if color == '-':
                reporter = color
            elif color in ['red', 'green', 'yellow'] and genotype is not None:
                reporter = genotype.color_to_reporter(color)
            elif color == 'NA':
                reporter = ''

            pip = pip_meta.pipettes[hs[-1]]
            layer = pip['target_layer']
            manual_roi = roi + layer if (roi not in [None, '']
                                         and layer not in [None, '']) else None
            nucleus_state = nucleus[info.get('Nucleus', '')]
            end_seal = info['End Seal']
            end_seal = 1000 if end_seal else 0  # in MOhms

            row.update({
                'internalFillDate': internal_fill_date,
                'Fill.Date': blank_fill_date,
                'tubeID': tube_name,
                'tube_id': tube_id,
                'patch.date': patch_date,
                'rigOperator': operator,
                'rigNumber': rig_name,
                'creCell': reporter,
                'manualRoi': manual_roi,
                'postPatch': nucleus_state,
                'endPipetteR': end_seal,
            })

            # check that there is metadata for all required columns
            for k in required_cols.keys():
                v = row[k]
                if v is None:
                    row[k] = 'CHECK DATA'
                    errors.append('\t\t%s %s has no data' % (hs, k))
            row_data.append(row)
        if len(errors) > 1:
            print('\n'.join(errors))

    report_df = to_df(row_data, report_type='monthly')

    # cross-check with daily reports to make sure all tubes are accounted for
    tube_cross_check(report_df['tubeID'], start_date, end_date)

    report_df.to_excel(file_path, index=False)
예제 #6
0
def NAM(store=True, interval=30, lag=3, initialize=False):
    # Import required python modules
    import os
    import time
    import thread
    from acq4.Manager import getManager
    from acq4.util.DataManager import getHandle

    # Get valve devices
    man = getManager()
    V1 = man.getDevice('V1')
    V2 = man.getDevice('V2')
    V3 = man.getDevice('V3')
    V4 = man.getDevice('V4')
    V5 = man.getDevice('V5')
    V6 = man.getDevice('V6')
    V7 = man.getDevice('V7')
    V8 = man.getDevice('V8')

    # Prepare task for acquisition
    tr = man.getModule('Task Runner')
    protocol = getHandle(
        'C:\\Users\\Public\\Documents\\acq4 Settings\\config\\protocols\\wa62\\500ms'
    )
    tr.loadTask(protocol)
    tr.protoStateGroup.setState({'cycleTime': interval, 'repetitions': 6})

    # Define a function for the acquisition
    def glu500(tr):
        tr.runSequence(store)

    # Define a function for the solution switches
    def switch(tr, V1, V2, V3, V4, V5, V6, interval):
        V8.setChanHolding('8', 1)
        for i in range(6):
            if flag != 0:
                eval('V' + str(i + 1)).setChanHolding(str(i + 1), 1)
                for j in range(interval):
                    time.sleep(1)
                    if flag == 0:
                        break
                eval('V' + str(i + 1)).setChanHolding(str(i + 1), 0)
        V8.setChanHolding('8', 0)
        tr.stopSequence()

    # Define a function to monitor keyboard input
    def monitor_keyboard():
        global flag
        flag = 1
        flag = os.system("pause")

    # Define a function to coordinate the acquisition and solution switches
    def run_protocol(tr, V1, V2, V3, V4, V5, V6, interval, lag):
        thread.start_new_thread(monitor_keyboard, ())
        thread.start_new_thread(switch, (
            tr,
            V1,
            V2,
            V3,
            V4,
            V5,
            V6,
            interval,
        ))
        for k in range(interval - lag):
            time.sleep(1)
            if flag == 0:
                break
        if flag != 0:
            glu500(tr)

    # Initialise valves
    if initialize is True:
        for i in range(8):
            eval('V' + str(i + 1)).setChanHolding(str(i + 1), 0)

    # Run the protocol
    # To terminate at any time, make terminal window the active window and press any key
    run_protocol(tr, V1, V2, V3, V4, V5, V6, interval, lag)