コード例 #1
0
    def sample_and_compute_oracle(self, dataset, condition_hashes, sample_size, min_frames):
        num_of_neurons = dataset[0].responses.shape[1]
        dataset_condition_hashes = dataset.condition_hashes

        # Oracle compuatation
        true_responses = np.empty(
            shape=[len(condition_hashes), sample_size * min_frames, num_of_neurons])
        true_oracles = np.empty(
            shape=[len(condition_hashes), sample_size * min_frames, num_of_neurons])

        null_responses = np.empty(
            shape=[len(condition_hashes), sample_size * min_frames, num_of_neurons])
        null_oracles = np.empty(
            shape=[len(condition_hashes), sample_size * min_frames, num_of_neurons])

        for i in range(0, len(condition_hashes)):
            # True Oracle Computation
            # For each condition_hashes, sample (sample_size) trials to construct the true_response_matrix
            # Select (sample_size) trials

            true_target_indices = self.sample_from_condition_hash(
                condition_hashes[i], dataset_condition_hashes, sample_size)

            # Check inputs for true_oracles
            self.check_input(true_target_indices, dataset, min_frames)

            response_matrix = self.sample_frames_from_dataset(
                true_target_indices, dataset, min_frames, num_of_neurons)
            true_responses[i] = response_matrix.reshape(
                -1, response_matrix.shape[-1])
            true_oracles[i] = self.compute_oracle(response_matrix)

            # Null Oracle Computation
            # Select (samples_size) hashes and sample from them
            target_hashes = np.random.choice(
                dataset_condition_hashes, sample_size, replace=False)

            # Get null_target_indices
            null_target_indices = np.array([self.sample_from_condition_hash(
                h, dataset_condition_hashes, 1)[0] for h in target_hashes])

            # Sample for each target index
            response_matrix = self.sample_frames_from_dataset(
                null_target_indices, dataset, min_frames, num_of_neurons)
            null_responses[i] = response_matrix.reshape(
                -1, response_matrix.shape[-1])
            null_oracles[i] = self.compute_oracle(response_matrix)

        true_responses = true_responses.reshape(-1, num_of_neurons)
        true_oracles = true_oracles.reshape(-1, num_of_neurons)
        null_responses = null_responses.reshape(-1, num_of_neurons)
        null_oracles = null_oracles.reshape(-1, num_of_neurons)

        return corr(true_responses, true_oracles, axis=0), corr(null_responses, null_oracles, axis=0)
コード例 #2
0
ファイル: stats.py プロジェクト: mudphudwang/neuro_data
    def make(self, key):
        fname = InputResponse().get_filename(key)
        dset = MovieSet(fname, 'inputs', 'responses')
        test_index = np.where(dset.tiers == 'test')[0]
        condition_hashes = dset.condition_hashes
        hashes, counts = np.unique(condition_hashes, return_counts=True)
        repeat_hashes = hashes[counts > 2]

        oracles, data = [], []
        for cond_hash in repeat_hashes:
            repeat_index = np.where(condition_hashes == cond_hash)[0]
            index = np.intersect1d(repeat_index, test_index).tolist()
            if len(index) < 3:
                continue
            inputs = np.stack([dset.inputs[str(i)][()] for i in index], axis=0)
            outputs = np.stack([dset.responses[str(i)][()] for i in index],
                               axis=0)
            assert (np.diff(inputs, axis=0) == 0
                    ).all(), 'Video inputs of oracle trials do not match'
            new_shape = (-1, outputs.shape[-1])
            r = outputs.shape[0]
            mu = outputs.mean(axis=0, keepdims=True)
            oracle = (mu * r - outputs) / (r - 1)
            oracles.append(oracle.reshape(new_shape))
            data.append(outputs.reshape(new_shape))
        pearsons = corr(np.vstack(data), np.vstack(oracles), axis=0)
        unit_ids = dset._fid['neurons']['unit_ids'][()]

        self.insert1(
            dict(key, n_neurons=len(pearsons), pearson=np.mean(pearsons)))
        self.Unit.insert([
            dict(key, unit_id=u, pearson=p)
            for u, p in zip(unit_ids, pearsons)
        ])
コード例 #3
0
    def _make_tuples(self, key):
        log.info('Populating ' + repr(key))
        # --- load data
        testsets, testloaders = DataConfig().load_data(key, tier='test', oracle=True)

        self.insert1(dict(key))
        for readout_key, loader in testloaders.items():
            log.info('Computing oracle for ' + readout_key)
            oracles, data = [], []
            for inputs, *_, outputs in loader:
                inputs = inputs.numpy()
                assert np.all(np.abs(np.diff(inputs, axis=0)) == 0), \
                    'Video inputs of oracle trials does not match'
                outputs = outputs.numpy()
                new_shape = (-1, outputs.shape[-1])
                r, _, n = outputs.shape  # responses X neurons
                mu = outputs.mean(axis=0, keepdims=True)
                oracle = (mu - outputs / r) * r / (r - 1)
                oracles.append(oracle.reshape(new_shape))
                data.append(outputs.reshape(new_shape))

            pearson = corr(np.vstack(data), np.vstack(oracles), axis=0)
            member_key = (MovieMultiDataset.Member() & key &
                          dict(name=readout_key)).fetch1(dj.key)
            member_key = dict(member_key, **key)
            self.Pearson().insert1(dict(member_key, pearson=np.mean(pearson), n_neurons=len(pearson)),
                                   ignore_extra_fields=True)
            unit_ids = testsets[readout_key].neurons.unit_ids
            assert len(unit_ids) == len(
                pearson) == outputs.shape[-1], 'Neuron numbers do not add up'
            self.UnitPearson().insert(
                [dict(member_key, pearson=c, unit_id=u)
                 for u, c in tqdm(zip(unit_ids, pearson), total=len(unit_ids))],
                ignore_extra_fields=True)
コード例 #4
0
    def make(self, key):
        # --- load data
        testsets, testloaders = DataConfig().load_data(key, tier='test', oracle=True)

        self.insert1(dict(key))
        for readout_key, loader in testloaders.items():
            log.info('Computing oracle for ' + readout_key)
            oracles, data = [], []
            for inputs, *_, outputs in loader:
                inputs = inputs.numpy()
                outputs = outputs.numpy()
                assert np.all(np.abs(np.diff(inputs, axis=0)) == 0), \
                    'Images of oracle trials does not match'
                r, n = outputs.shape  # responses X neurons
                log.info('\t    {} responses for {} neurons'.format(r, n))
                assert r > 4, 'need more than 4 trials for oracle computation'
                mu = outputs.mean(axis=0, keepdims=True)
                oracle = (mu - outputs / r) * r / (r - 1)
                oracles.append(oracle)
                data.append(outputs)
            if len(data) == 0:
                log.error('Found no oracle trials! Skipping ...')
                return

            # Pearson correlation
            pearson = corr(np.vstack(data), np.vstack(oracles), axis=0)

            # Spearman correlation
            data_rank = np.empty(np.vstack(data).shape)
            oracles_rank = np.empty(np.vstack(oracles).shape)

            for i in range(np.vstack(data).shape[1]):
                data_rank[:, i] = np.argsort(np.argsort(np.vstack(data)[:, i]))
                oracles_rank[:, i] = np.argsort(np.argsort(np.vstack(oracles)[:, i]))
            spearman = corr(data_rank, oracles_rank, axis=0)

            member_key = (StaticMultiDataset.Member() & key &
                          dict(name=readout_key)).fetch1(dj.key)
            member_key = dict(member_key, **key)
            self.Scores().insert1(dict(member_key, pearson=np.mean(pearson), spearman=np.mean(spearman)), ignore_extra_fields=True)
            unit_ids = testsets[readout_key].neurons.unit_ids
            assert len(unit_ids) == len(
                pearson) == len(spearman) == outputs.shape[-1], 'Neuron numbers do not add up'
            self.UnitScores().insert(
                [dict(member_key, pearson=c, spearman=s, unit_id=u)
                 for u, c, s in tqdm(zip(unit_ids, pearson, spearman), total=len(unit_ids))],
                ignore_extra_fields=True)
コード例 #5
0
    def sample_and_compute_oracle(self, dataset, frame_image_ids, condition_hashes, sample_size):
        num_natim = len(frame_image_ids)
        num_noise = len(condition_hashes)
        total_imgs = num_natim + num_noise

        # Consturct boolean matrix for null sampling
        is_natim_list = [True] * num_natim
        is_natim_list += [False] * num_noise

        # Save into memeory to save time from pulling it every time
        dataset_responses = dataset.responses
        dataset_frame_image_id = dataset.info.frame_image_id
        dataset_condition_hashes = dataset.condition_hashes
        dataset_images = dataset.images

        # True oracle computation
        # Matrices to store results
        true_responses = np.empty(
            shape=[total_imgs, sample_size, dataset_responses.shape[1]])
        true_oracles = np.empty(
            shape=[total_imgs, sample_size, dataset_responses.shape[1]])

        for i in range(0, num_natim):
            true_target_index = self.sample_from_id_or_hash(
                frame_image_ids[i], dataset_frame_image_id, sample_size)
            self.check_input(dataset_images[true_target_index])

            response_matrix = dataset_responses[true_target_index]
            true_responses[i] = response_matrix
            true_oracles[i] = self.compute_oracle(response_matrix)

        for i in range(0, num_noise):
            true_target_index = self.sample_from_id_or_hash(
                condition_hashes[i], dataset_condition_hashes, sample_size)
            self.check_input(dataset_images[true_target_index])

            response_matrix = dataset_responses[true_target_index]
            true_responses[i + num_natim] = response_matrix
            true_oracles[i + num_natim] = self.compute_oracle(response_matrix)

        # Null oracle computation
        null_responses = np.empty(
            shape=[total_imgs, sample_size, dataset_responses.shape[1]])
        null_oracles = np.empty(
            shape=[total_imgs, sample_size, dataset_responses.shape[1]])

        for i in range(0, total_imgs):
            id_or_hash_indices = np.random.choice(
                len(is_natim_list), sample_size, replace=False)
            null_target_indices = np.empty(
                shape=[len(id_or_hash_indices)], dtype='int_')

            for j, id_or_hash_indx in enumerate(id_or_hash_indices):
                # Determine if it is in natrual images or not
                if id_or_hash_indx < num_natim:
                    id_or_hash = frame_image_ids[id_or_hash_indx]
                else:
                    id_or_hash = condition_hashes[id_or_hash_indx - num_natim]

                # Sample from respective datasets
                if is_natim_list[id_or_hash_indx]:
                    null_target_indices[j] = self.sample_from_id_or_hash(
                        id_or_hash, dataset_frame_image_id, 1)
                else:
                    null_target_indices[j] = self.sample_from_id_or_hash(
                        id_or_hash, dataset_condition_hashes, 1)

            response_matrix = dataset_responses[null_target_indices]
            null_responses[i] = response_matrix
            null_oracles[i] = self.compute_oracle(response_matrix)

        true_responses = true_responses.reshape(
            [-1, dataset_responses.shape[1]])
        true_oracles = true_oracles.reshape([-1, dataset_responses.shape[1]])
        null_responses = null_responses.reshape(
            [-1, dataset_responses.shape[1]])
        null_oracles = null_oracles.reshape([-1, dataset_responses.shape[1]])

        return corr(true_responses, true_oracles, axis=0), corr(null_responses, null_oracles, axis=0)