def sample_and_compute_oracle(self, dataset, condition_hashes, sample_size, min_frames): num_of_neurons = dataset[0].responses.shape[1] dataset_condition_hashes = dataset.condition_hashes # Oracle compuatation true_responses = np.empty( shape=[len(condition_hashes), sample_size * min_frames, num_of_neurons]) true_oracles = np.empty( shape=[len(condition_hashes), sample_size * min_frames, num_of_neurons]) null_responses = np.empty( shape=[len(condition_hashes), sample_size * min_frames, num_of_neurons]) null_oracles = np.empty( shape=[len(condition_hashes), sample_size * min_frames, num_of_neurons]) for i in range(0, len(condition_hashes)): # True Oracle Computation # For each condition_hashes, sample (sample_size) trials to construct the true_response_matrix # Select (sample_size) trials true_target_indices = self.sample_from_condition_hash( condition_hashes[i], dataset_condition_hashes, sample_size) # Check inputs for true_oracles self.check_input(true_target_indices, dataset, min_frames) response_matrix = self.sample_frames_from_dataset( true_target_indices, dataset, min_frames, num_of_neurons) true_responses[i] = response_matrix.reshape( -1, response_matrix.shape[-1]) true_oracles[i] = self.compute_oracle(response_matrix) # Null Oracle Computation # Select (samples_size) hashes and sample from them target_hashes = np.random.choice( dataset_condition_hashes, sample_size, replace=False) # Get null_target_indices null_target_indices = np.array([self.sample_from_condition_hash( h, dataset_condition_hashes, 1)[0] for h in target_hashes]) # Sample for each target index response_matrix = self.sample_frames_from_dataset( null_target_indices, dataset, min_frames, num_of_neurons) null_responses[i] = response_matrix.reshape( -1, response_matrix.shape[-1]) null_oracles[i] = self.compute_oracle(response_matrix) true_responses = true_responses.reshape(-1, num_of_neurons) true_oracles = true_oracles.reshape(-1, num_of_neurons) null_responses = null_responses.reshape(-1, num_of_neurons) null_oracles = null_oracles.reshape(-1, num_of_neurons) return corr(true_responses, true_oracles, axis=0), corr(null_responses, null_oracles, axis=0)
def make(self, key): fname = InputResponse().get_filename(key) dset = MovieSet(fname, 'inputs', 'responses') test_index = np.where(dset.tiers == 'test')[0] condition_hashes = dset.condition_hashes hashes, counts = np.unique(condition_hashes, return_counts=True) repeat_hashes = hashes[counts > 2] oracles, data = [], [] for cond_hash in repeat_hashes: repeat_index = np.where(condition_hashes == cond_hash)[0] index = np.intersect1d(repeat_index, test_index).tolist() if len(index) < 3: continue inputs = np.stack([dset.inputs[str(i)][()] for i in index], axis=0) outputs = np.stack([dset.responses[str(i)][()] for i in index], axis=0) assert (np.diff(inputs, axis=0) == 0 ).all(), 'Video inputs of oracle trials do not match' new_shape = (-1, outputs.shape[-1]) r = outputs.shape[0] mu = outputs.mean(axis=0, keepdims=True) oracle = (mu * r - outputs) / (r - 1) oracles.append(oracle.reshape(new_shape)) data.append(outputs.reshape(new_shape)) pearsons = corr(np.vstack(data), np.vstack(oracles), axis=0) unit_ids = dset._fid['neurons']['unit_ids'][()] self.insert1( dict(key, n_neurons=len(pearsons), pearson=np.mean(pearsons))) self.Unit.insert([ dict(key, unit_id=u, pearson=p) for u, p in zip(unit_ids, pearsons) ])
def _make_tuples(self, key): log.info('Populating ' + repr(key)) # --- load data testsets, testloaders = DataConfig().load_data(key, tier='test', oracle=True) self.insert1(dict(key)) for readout_key, loader in testloaders.items(): log.info('Computing oracle for ' + readout_key) oracles, data = [], [] for inputs, *_, outputs in loader: inputs = inputs.numpy() assert np.all(np.abs(np.diff(inputs, axis=0)) == 0), \ 'Video inputs of oracle trials does not match' outputs = outputs.numpy() new_shape = (-1, outputs.shape[-1]) r, _, n = outputs.shape # responses X neurons mu = outputs.mean(axis=0, keepdims=True) oracle = (mu - outputs / r) * r / (r - 1) oracles.append(oracle.reshape(new_shape)) data.append(outputs.reshape(new_shape)) pearson = corr(np.vstack(data), np.vstack(oracles), axis=0) member_key = (MovieMultiDataset.Member() & key & dict(name=readout_key)).fetch1(dj.key) member_key = dict(member_key, **key) self.Pearson().insert1(dict(member_key, pearson=np.mean(pearson), n_neurons=len(pearson)), ignore_extra_fields=True) unit_ids = testsets[readout_key].neurons.unit_ids assert len(unit_ids) == len( pearson) == outputs.shape[-1], 'Neuron numbers do not add up' self.UnitPearson().insert( [dict(member_key, pearson=c, unit_id=u) for u, c in tqdm(zip(unit_ids, pearson), total=len(unit_ids))], ignore_extra_fields=True)
def make(self, key): # --- load data testsets, testloaders = DataConfig().load_data(key, tier='test', oracle=True) self.insert1(dict(key)) for readout_key, loader in testloaders.items(): log.info('Computing oracle for ' + readout_key) oracles, data = [], [] for inputs, *_, outputs in loader: inputs = inputs.numpy() outputs = outputs.numpy() assert np.all(np.abs(np.diff(inputs, axis=0)) == 0), \ 'Images of oracle trials does not match' r, n = outputs.shape # responses X neurons log.info('\t {} responses for {} neurons'.format(r, n)) assert r > 4, 'need more than 4 trials for oracle computation' mu = outputs.mean(axis=0, keepdims=True) oracle = (mu - outputs / r) * r / (r - 1) oracles.append(oracle) data.append(outputs) if len(data) == 0: log.error('Found no oracle trials! Skipping ...') return # Pearson correlation pearson = corr(np.vstack(data), np.vstack(oracles), axis=0) # Spearman correlation data_rank = np.empty(np.vstack(data).shape) oracles_rank = np.empty(np.vstack(oracles).shape) for i in range(np.vstack(data).shape[1]): data_rank[:, i] = np.argsort(np.argsort(np.vstack(data)[:, i])) oracles_rank[:, i] = np.argsort(np.argsort(np.vstack(oracles)[:, i])) spearman = corr(data_rank, oracles_rank, axis=0) member_key = (StaticMultiDataset.Member() & key & dict(name=readout_key)).fetch1(dj.key) member_key = dict(member_key, **key) self.Scores().insert1(dict(member_key, pearson=np.mean(pearson), spearman=np.mean(spearman)), ignore_extra_fields=True) unit_ids = testsets[readout_key].neurons.unit_ids assert len(unit_ids) == len( pearson) == len(spearman) == outputs.shape[-1], 'Neuron numbers do not add up' self.UnitScores().insert( [dict(member_key, pearson=c, spearman=s, unit_id=u) for u, c, s in tqdm(zip(unit_ids, pearson, spearman), total=len(unit_ids))], ignore_extra_fields=True)
def sample_and_compute_oracle(self, dataset, frame_image_ids, condition_hashes, sample_size): num_natim = len(frame_image_ids) num_noise = len(condition_hashes) total_imgs = num_natim + num_noise # Consturct boolean matrix for null sampling is_natim_list = [True] * num_natim is_natim_list += [False] * num_noise # Save into memeory to save time from pulling it every time dataset_responses = dataset.responses dataset_frame_image_id = dataset.info.frame_image_id dataset_condition_hashes = dataset.condition_hashes dataset_images = dataset.images # True oracle computation # Matrices to store results true_responses = np.empty( shape=[total_imgs, sample_size, dataset_responses.shape[1]]) true_oracles = np.empty( shape=[total_imgs, sample_size, dataset_responses.shape[1]]) for i in range(0, num_natim): true_target_index = self.sample_from_id_or_hash( frame_image_ids[i], dataset_frame_image_id, sample_size) self.check_input(dataset_images[true_target_index]) response_matrix = dataset_responses[true_target_index] true_responses[i] = response_matrix true_oracles[i] = self.compute_oracle(response_matrix) for i in range(0, num_noise): true_target_index = self.sample_from_id_or_hash( condition_hashes[i], dataset_condition_hashes, sample_size) self.check_input(dataset_images[true_target_index]) response_matrix = dataset_responses[true_target_index] true_responses[i + num_natim] = response_matrix true_oracles[i + num_natim] = self.compute_oracle(response_matrix) # Null oracle computation null_responses = np.empty( shape=[total_imgs, sample_size, dataset_responses.shape[1]]) null_oracles = np.empty( shape=[total_imgs, sample_size, dataset_responses.shape[1]]) for i in range(0, total_imgs): id_or_hash_indices = np.random.choice( len(is_natim_list), sample_size, replace=False) null_target_indices = np.empty( shape=[len(id_or_hash_indices)], dtype='int_') for j, id_or_hash_indx in enumerate(id_or_hash_indices): # Determine if it is in natrual images or not if id_or_hash_indx < num_natim: id_or_hash = frame_image_ids[id_or_hash_indx] else: id_or_hash = condition_hashes[id_or_hash_indx - num_natim] # Sample from respective datasets if is_natim_list[id_or_hash_indx]: null_target_indices[j] = self.sample_from_id_or_hash( id_or_hash, dataset_frame_image_id, 1) else: null_target_indices[j] = self.sample_from_id_or_hash( id_or_hash, dataset_condition_hashes, 1) response_matrix = dataset_responses[null_target_indices] null_responses[i] = response_matrix null_oracles[i] = self.compute_oracle(response_matrix) true_responses = true_responses.reshape( [-1, dataset_responses.shape[1]]) true_oracles = true_oracles.reshape([-1, dataset_responses.shape[1]]) null_responses = null_responses.reshape( [-1, dataset_responses.shape[1]]) null_oracles = null_oracles.reshape([-1, dataset_responses.shape[1]]) return corr(true_responses, true_oracles, axis=0), corr(null_responses, null_oracles, axis=0)