Example #1
0
    def load_data(self,
                  key,
                  tier=None,
                  batch_size=1,
                  key_order=None,
                  exclude_from_normalization=None,
                  stimulus_types=None,
                  Sampler=None):
        log.info('Loading {} dataset with tier={}'.format(
            self._stimulus_type, tier))
        datasets = StaticMultiDataset().fetch_data(key, key_order=key_order)
        for k, dat in datasets.items():
            if 'stats_source' in key:
                log.info(
                    'Adding stats_source "{stats_source}" to dataset'.format(
                        **key))
                dat.stats_source = key['stats_source']

        log.info('Using statistics source ' + key['stats_source'])

        datasets = self.add_transforms(key,
                                       datasets,
                                       exclude=exclude_from_normalization)

        loaders = self.get_loaders(datasets, tier, batch_size, stimulus_types,
                                   Sampler)
        return datasets, loaders
Example #2
0
    def evaluate(self, key=None):
        if key is None:
            key = self.fetch1('KEY')


        model = self.load_network(key)
        model.eval()
        model.cuda()

        # get network configuration information
        net_key = NetworkConfig().net_key(key)
        train_key = TrainConfig().train_key(net_key)
        testsets, testloaders = DataConfig().load_data(net_key, tier='test', cuda=True, **train_key)

        scores, unit_scores = [], []
        for readout_key, testloader in testloaders.items():
            log.info('Computing test scores for ' + readout_key)

            y, y_hat = compute_predictions(testloader, model, readout_key)
            perf_scores = compute_scores(y, y_hat)

            member_key = (StaticMultiDataset.Member() & key & dict(name=readout_key)).fetch1(dj.key)
            member_key.update(key)

            unit_ids = testloader.dataset.neurons.unit_ids
            member_key['neurons'] = len(unit_ids)
            member_key['pearson'] = perf_scores.pearson.mean()

            scores.append(member_key)
            unit_scores.extend(
                [dict(member_key, unit_id=u, pearson=c) for u, c in zip(unit_ids, perf_scores.pearson)])
        return scores, unit_scores
Example #3
0
        def load_data(self, key, tier=None, batch_size=1,
                      Sampler=None, t_first=False, cuda=False):
            from .stats import BootstrapOracleTTest
            assert tier in [None, 'train', 'validation', 'test']
            datasets, loaders = super().load_data(
                key, tier=tier, batch_size=batch_size, Sampler=Sampler,
                cuda=cuda)
            for rok, dataset in datasets.items():
                member_key = (StaticMultiDataset.Member() & key &
                            dict(name=rok)).fetch1(dj.key)
                all_units, all_pvals = (
                    BootstrapOracleTTest.UnitPValue & member_key).fetch(
                        'unit_id', 'unit_p_value')
                assert len(all_pvals) > 0, \
                    'You forgot to populate BootstrapOracleTTest for group_id={}'.format(
                    member_key['group_id'])
                units_mask = np.isin(all_units, dataset.neurons.unit_ids)
                units, pvals = all_units[units_mask], all_pvals[units_mask]
                assert np.all(
                    units == dataset.neurons.unit_ids), 'order of neurons has changed'
                pval_thresh = np.power(10, float(key['p_val_power']))
                selection = pvals < pval_thresh
                log.info('Subsampling to {} neurons with BootstrapOracleTTest p-val < {:.0E}'.format(
                    selection.sum(), pval_thresh))
                dataset.transforms.insert(
                    -1, Subsample(np.where(selection)[0]))

                assert np.all(dataset.neurons.unit_ids ==
                            units[selection]), 'Units are inconsistent'
            return datasets, loaders
Example #4
0
        def load_data(self, key, tier=None, batch_size=1, key_order=None, stimulus_types=None, Sampler=None):
            from .stats import Oracle
            datasets, loaders = super().load_data(
                key, tier=tier, batch_size=batch_size, key_order=key_order,
                stimulus_types=stimulus_types, Sampler=Sampler)
            for rok, dataset in datasets.items():
                member_key = (StaticMultiDataset.Member() & key &
                              dict(name=rok)).fetch1(dj.key)

                okey = dict(key, **member_key)
                okey['data_hash'] = okey.pop('oracle_source')
                units, pearson = (Oracle.UnitScores() & okey).fetch(
                    'unit_id', 'pearson')
                assert len(pearson) > 0, 'You forgot to populate oracle for data_hash="{}"'.format(
                    key['oracle_source'])
                assert len(units) == len(
                    dataset.neurons.unit_ids), 'Number of neurons has changed'
                assert np.all(
                    units == dataset.neurons.unit_ids), 'order of neurons has changed'

                low, high = np.percentile(
                    pearson, [key['percent_low'], key['percent_high']])
                selection = (pearson >= low) & (pearson <= high)
                log.info(
                    'Subsampling to {} neurons above {:.2f} and below {} oracle'.format(selection.sum(), low, high))
                dataset.transforms.insert(-1,
                                          Subsample(np.where(selection)[0]))

                assert np.all(dataset.neurons.unit_ids ==
                              units[selection]), 'Units are inconsistent'
            return datasets, loaders
Example #5
0
    def compute_test_score_tuples(self, key, testloaders, model):
        scores, unit_scores = [], []
        for readout_key, testloader in testloaders.items():
            log.info('Computing test scores for ' + readout_key)

            y, y_hat = compute_predictions(testloader, model, readout_key)
            perf_scores = compute_scores(y, y_hat)

            member_key = (StaticMultiDataset.Member() & key & dict(name=readout_key)).fetch1(dj.key)
            member_key.update(key)

            unit_ids = testloader.dataset.neurons.unit_ids
            member_key['neurons'] = len(unit_ids)
            member_key['pearson'] = perf_scores.pearson.mean()

            scores.append(member_key)
            unit_scores.extend(
                [dict(member_key, unit_id=u, pearson=c) for u, c in zip(unit_ids, perf_scores.pearson)])
        return scores, unit_scores
    def process_static_scans(target_scans):
        """
        Function that goes and check for every table that needs to be populate as well as provide an option
        to manaully populate AreaMembership and LayerMembership, assuming that all the neurons can be label the same Area and Layer
        if not, they the user should manually do it.

        Please refer to neuro_data/notebooks/pipeline_management notebook for an example

        Args:
            target_scans (list(dict(animal_id, session, scan_idx))): A list of dicts where each dicts contains the keys for a specific scan

        Returns:
            None
        """

        for target_scan in target_scans:
            print('[NeuroDataPipelineManagement]: Processing ' +
                  str(target_scan))

            # Check if the scan has been processed completely
            if pipeline_fuse.ScanDone() & target_scan:
                print('[Preprocessing Check]: ScanDone Check Passed')
            else:
                print(
                    '[Preprocessing Check]: ' + str(target_scan) +
                    ' Scan has not been processed yet, please look into pipeline for details'
                )
                return

            # Check if neurons area are labeled
            if pipeline_anatomy.AreaMembership() & target_scan:
                print('[Preprocessing Check]: AreaMembership Check Passed')
            else:
                print('[Preprocessing Check]: ' + str(target_scan) +
                      " AreaMembership is not populated")
                user_input = None
                while user_input not in ['y', 'n']:
                    user_input = input(
                        'Should we manually insert if the area are known and all the same? [y/n]'
                    )

                    if user_input == 'y':
                        area = input(
                            'Input area to label neurons with [type exit for cancel]:'
                        )
                        while area not in [
                                'V1', 'LM', 'AL', 'RL', 'all-unknown'
                        ]:
                            if area == 'exit':
                                return
                            print('Invalid Area!')
                            area = input('Input area to label neurons with:')

                        NeuroDataPipelineManagement.manually_insert_area_for_scan(
                            target_scan, area)
                    elif user_input == 'n':
                        return

            # Check if neuron layers are labeled
            if pipeline_anatomy.LayerMembership() & target_scan:
                print('[Preprocessing Check]: LayerMembership Check Passed')
            else:
                print('[Preprocessing Check]: ' + str(target_scan) +
                      " LayerMembership is not populated")

                user_input = None
                while user_input not in ['y', 'n']:
                    user_input = input(
                        'Should we manually insert if the layer are known and all the same? [y/n]'
                    )

                    if user_input == 'y':
                        layer = input(
                            'Input layer to label neurons with [type exit to cancel]:'
                        )
                        while layer not in ['L1', 'L2/3', 'L4']:
                            if layer == 'exit':
                                return
                            print('Invalid Layer!')
                            layer = input('Input layer to label neurons with:')

                        NeuroDataPipelineManagement.manually_insert_layer_for_scan(
                            target_scan, layer)
                    elif user_input == 'n':
                        return

            # Check pipeline_stimulus.Sync() table
            if pipeline_stimulus.Sync() & target_scan:
                print('[Preprocessing Check]: Sync Check Passed')
            else:
                print(
                    '[Preprocessing Check]: ' + str(target_scan) +
                    ' pipeline_stimulus.Sync() table is not processed or failed to processed'
                )
                return

            # All tables requirements are met, begin neurodata dataset population
            print(
                '[Preprocessing Check]: All table requirements passed, beginning neuro_data populating:'
            )

            # Get the ScanDone primary key reference
            target_scan_done_key = (pipeline_fuse.ScanDone()
                                    & target_scan).fetch1('KEY')

            # Insert into StaticScanCandidate
            if StaticScanCandidate & target_scan_done_key:
                print(
                    '[NeuroData.Static Populate]: Scan has already been added to StaticScanCandidate'
                )
            else:
                StaticScanCandidate.insert1(target_scan_done_key)
                print(
                    '[NeuroData.Static Populate]: Successfully inserted Scan into StaticScanCandidate'
                )

            # Populating StaticScans
            print("[NeuroData.Static Populate]: Populating StaticScan:")
            StaticScan().populate(target_scan_done_key)

            # Populating ImageNetSplit
            print("[NeuroData.Static Populate]: Populating ImageNetSplit:")
            ImageNetSplit().fill(target_scan_done_key)

            # Populate ConditionTier
            print("[NeuroData.Static Populate]: Populating ConditionTier:")
            ConditionTier.populate(target_scan_done_key)

            # Check for incorrect flip times
            print(
                "[NeuroData.Static Populate]: Checking for Incorrect Flip Times:"
            )
            trials = (pipeline_stimulus.Trial()
                      & target_scan).proj('flip_times').fetch(as_dict=True)
            for trial in trials:
                if trial['flip_times'].shape[
                        1] != 3:  # correct number of flips, hardcoded
                    ExcludedTrial.insert1(trial, ignore_extra_fields=True)

            # Populate Frame
            print("[NeuroData.Static Populate]: Populating Frame:")
            Frame.populate(dict(preproc_id=0))

            # Populate InputResponse
            print("[NeuroData.Static Populate]: Populating InputResponse:")
            InputResponse().populate(target_scan_done_key, dict(preproc_id=0))

            # Populate Eye
            print("[NeuroData.Static Populate]: Populating Eye:")
            Eye().populate(target_scan_done_key)

            # Populate Treadmill
            print("[NeuroData.Static Populate]: Populating Treadmill:")
            Treadmill().populate(target_scan_done_key)

            # Insert Scan into StaticMultiDatasetGroupAssignment with whatever is the next highest_group_id
            print(
                "[NeuroData.Static Populate]: Inserting Scan into StaticMultiDatasetGroupAssignment with next largest group_id:"
            )
            target_input_response_key = (InputResponse & target_scan
                                         & dict(preproc_id=0)).fetch1('KEY')
            if StaticMultiDatasetGroupAssignment & target_input_response_key:
                print(
                    "[NeuroData.Static Populate]: Scan is already in StaticMultiDatasetGroupAssignment, skipping"
                )
            else:
                target_input_response_key[
                    'group_id'] = StaticMultiDatasetGroupAssignment().fetch(
                        'group_id').max() + 1
                target_input_response_key[
                    'description'] = 'Inserted from PipelineManagement'
                StaticMultiDatasetGroupAssignment.insert1(
                    target_input_response_key)

            # Fill StaticMultiDataset
            print("[NeuroData.Static Populate]: Filling StaticMultiDataset:")
            StaticMultiDataset().fill()

            print('[NeuroData.Static Populate]: Generating HDF5 File')
            InputResponse().get_filename(target_scan)

            print('[PROCESSING COMPLETED FOR SCAN: ' + str(target_scan) +
                  ']\n')