def _coords_match(elements, dim, match_values=False): first_coords = [(key, tuple(value)) if match_values else key for _, key, value in walk_coords(elements[0][dim])] other_coords = [[(key, tuple(value)) if match_values else key for _, key, value in walk_coords(element[dim])] for element in elements[1:]] return all(tuple(first_coords) == tuple(coords) for coords in other_coords)
def _package(self, layer_activations, stimuli_paths): shapes = [a.shape for a in layer_activations.values()] self._logger.debug('Activations shapes: {}'.format(shapes)) self._logger.debug("Packaging individual layers") layer_assemblies = [self._package_layer(single_layer_activations, layer=layer, stimuli_paths=stimuli_paths) for layer, single_layer_activations in tqdm(layer_activations.items(), desc='layer packaging')] # merge manually instead of using merge_data_arrays since `xarray.merge` is very slow with these large arrays # complication: (non)neuroid_coords are taken from the structure of layer_assemblies[0] i.e. the 1st assembly; # using these names/keys for all assemblies results in KeyError if the first layer contains flatten_coord_names # (see _package_layer) not present in later layers, e.g. first layer = conv, later layer = transformer layer self._logger.debug("Merging layer assemblies") model_assembly = np.concatenate([a.values for a in layer_assemblies], axis=layer_assemblies[0].dims.index('neuroid')) nonneuroid_coords = {coord: (dims, values) for coord, dims, values in walk_coords(layer_assemblies[0]) if set(dims) != {'neuroid'}} neuroid_coords = {coord: [dims, values] for coord, dims, values in walk_coords(layer_assemblies[0]) if set(dims) == {'neuroid'}} for layer_assembly in layer_assemblies[1:]: for coord in neuroid_coords: neuroid_coords[coord][1] = np.concatenate((neuroid_coords[coord][1], layer_assembly[coord].values)) assert layer_assemblies[0].dims == layer_assembly.dims for dim in set(layer_assembly.dims) - {'neuroid'}: for coord in layer_assembly[dim].coords: assert (layer_assembly[coord].values == nonneuroid_coords[coord][1]).all() neuroid_coords = {coord: (dims_values[0], dims_values[1]) # re-package as tuple instead of list for xarray for coord, dims_values in neuroid_coords.items()} model_assembly = type(layer_assemblies[0])(model_assembly, coords={**nonneuroid_coords, **neuroid_coords}, dims=layer_assemblies[0].dims) return model_assembly
def __call__(self, model_activations, target_rdm): model_activations = align(model_activations, target_rdm, on='stimulus_sentence') model_rdm = self._rdm(model_activations) values = model_rdm.values if np.isnan(values.flatten()).any(): warnings.warn(f"{np.isnan(values.flatten()).sum()} nan values found in model rdm - setting to 0") values[np.isnan(values)] = 0 model_rdm = type(model_rdm)(values, coords={coord: (dims, vals) for coord, dims, vals in walk_coords(model_rdm)}, dims=model_rdm.dims) leave_one_out = self.LeaveOneOutWrapper(self._similarity) # multi-dimensional coords with repeated dimensions not yet supported in CrossValidation drop_coords = [coord for coord, dims, value in walk_coords(target_rdm) if dims == ('stimulus', 'stimulus')] target_rdm = target_rdm.drop(drop_coords) return self._cross_validation(model_rdm, target_rdm, apply=leave_one_out)
def _package(self, layer_activations, stimuli_paths): shapes = [a.shape for a in layer_activations.values()] self._logger.debug('Activations shapes: {}'.format(shapes)) self._logger.debug("Packaging individual layers") layer_assemblies = [ self._package_layer(single_layer_activations, layer=layer, stimuli_paths=stimuli_paths) for layer, single_layer_activations in tqdm( layer_activations.items(), desc='layer packaging') ] # merge manually instead of using merge_data_arrays since `xarray.merge` is very slow with these large arrays self._logger.debug("Merging layer assemblies") model_assembly = np.concatenate( [a.values for a in layer_assemblies], axis=layer_assemblies[0].dims.index('neuroid')) nonneuroid_coords = { coord: (dims, values) for coord, dims, values in walk_coords(layer_assemblies[0]) if set(dims) != {'neuroid'} } neuroid_coords = { coord: [dims, values] for coord, dims, values in walk_coords(layer_assemblies[0]) if set(dims) == {'neuroid'} } for layer_assembly in layer_assemblies[1:]: for coord in neuroid_coords: neuroid_coords[coord][1] = np.concatenate( (neuroid_coords[coord][1], layer_assembly[coord].values)) assert layer_assemblies[0].dims == layer_assembly.dims for dim in set(layer_assembly.dims) - {'neuroid'}: for coord in layer_assembly[dim].coords: assert (layer_assembly[coord].values == nonneuroid_coords[coord][1]).all() neuroid_coords = { coord: (dims_values[0], dims_values[1] ) # re-package as tuple instead of list for xarray for coord, dims_values in neuroid_coords.items() } model_assembly = type(layer_assemblies[0])( model_assembly, coords={ **nonneuroid_coords, **neuroid_coords }, dims=layer_assemblies[0].dims) return model_assembly
def __call__(self, prediction, target): # align prediction = prediction.sortby( [self._correlation_coord, self._neuroid_coord]) target = target.sortby([self._correlation_coord, self._neuroid_coord]) assert np.array(prediction[self._correlation_coord].values == target[ self._correlation_coord].values).all() assert np.array(prediction[self._neuroid_coord].values == target[ self._neuroid_coord].values).all() # compute correlation per neuroid neuroid_dims = target[self._neuroid_coord].dims assert len(neuroid_dims) == 1 correlations = [] for i, coord_value in enumerate(target[self._neuroid_coord].values): target_neuroids = target.isel(**{ neuroid_dims[0]: i }) # `isel` is about 10x faster than `sel` prediction_neuroids = prediction.isel(**{neuroid_dims[0]: i}) r, p = self._correlation(target_neuroids, prediction_neuroids) correlations.append(r) # package result = Score(correlations, coords={ coord: (dims, values) for coord, dims, values in walk_coords(target) if dims == neuroid_dims }, dims=neuroid_dims) return result
def predict_proba(self, X): import tensorflow as tf assert len(X.shape) == 2, "expected 2-dimensional input" if self._zscore_feats: scaled_X = self._scaler.transform(X) else: scaled_X = X with self._graph.as_default(): preds = [] for batch in self._iterate_minibatches( scaled_X, batchsize=self._eval_batch_size, shuffle=False): feed_dict = { self._input_placeholder: batch, self._fc_keep_prob: 1.0 } softmax = self._sess.run([tf.nn.softmax(self._predictions)], feed_dict=feed_dict) preds.append(np.squeeze(softmax)) proba = np.concatenate(preds, axis=0) # we take only the 0th dimension because the 1st dimension is just the features X_coords = { coord: (dims, value) for coord, dims, value in walk_coords(X) if array_is_element(dims, X.dims[0]) } proba = BehavioralAssembly(proba, coords={ **X_coords, **{ 'choice': list(self._label_mapping.values()) } }, dims=[X.dims[0], 'choice']) return proba
def build_response_matrix_from_responses(self, responses): num_choices = [(image_id, choice) for image_id, choice in zip(responses['image_id'].values, responses.values)] num_choices = Counter(num_choices) num_objects = [[(image_id, sample_obj), (image_id, dist_obj)] for image_id, sample_obj, dist_obj in zip( responses['image_id'].values, responses['sample_obj'].values, responses['dist_obj'].values)] num_objects = Counter(itertools.chain(*num_objects)) choices = np.unique(responses) image_ids, indices = np.unique(responses['image_id'], return_index=True) truths = responses['truth'].values[indices] image_dim = responses['image_id'].dims coords = {**{coord: (dims, value) for coord, dims, value in walk_coords(responses)}, **{'choice': ('choice', choices)}} coords = {coord: (dims, value if dims != image_dim else value[indices]) # align image_dim coords with indices for coord, (dims, value) in coords.items()} response_matrix = np.zeros((len(image_ids), len(choices))) for (image_index, image_id), (choice_index, choice) in itertools.product( enumerate(image_ids), enumerate(choices)): if truths[image_index] == choice: # object == choice, ignore p = np.nan else: # divide by number of times where object was one of the two choices (target or distractor) p = (num_choices[(image_id, choice)] / num_objects[(image_id, choice)]) \ if num_objects[(image_id, choice)] > 0 else np.nan response_matrix[image_index, choice_index] = p response_matrix = DataAssembly(response_matrix, coords=coords, dims=responses.dims + ('choice',)) return response_matrix
def _dim_coord_values(assembly): dim_coord_values = defaultdict(dict) for coord, dims, values in walk_coords(assembly): assert len(dims) == 1 dim = dims[0] dim_coord_values[dim][coord] = values.tolist() return dim_coord_values
def from_paths(self, *args, **kwargs): raw_activations = super(TemporalExtractor, self).from_paths(*args, **kwargs) # introduce time dimension regions = defaultdict(list) for layer in set(raw_activations['layer'].values): match = re.match(r'(([^-]*)\..*|logits|avgpool)-t([0-9]+)', layer) region, timestep = match.group(2) if match.group(2) else match.group(1), match.group(3) stripped_layer = match.group(1) regions[region].append((layer, stripped_layer, timestep)) activations = {} for region, time_layers in regions.items(): for (full_layer, stripped_layer, timestep) in time_layers: region_time_activations = raw_activations.sel(layer=full_layer) region_time_activations['layer'] = 'neuroid', [stripped_layer] * len(region_time_activations['neuroid']) activations[(region, timestep)] = region_time_activations for key, key_activations in activations.items(): region, timestep = key key_activations['region'] = 'neuroid', [region] * len(key_activations['neuroid']) activations[key] = NeuroidAssembly([key_activations.values], coords={ **{coord: (dims, values) for coord, dims, values in walk_coords(activations[key]) if coord != 'neuroid_id'}, # otherwise, neuroid dim will be as large as before with nans **{'time_step': [int(timestep)]} }, dims=['time_step'] + list(key_activations.dims)) activations = list(activations.values()) activations = merge_data_arrays(activations) # rebuild neuroid_id without timestep neuroid_id = [".".join([f"{value}" for value in values]) for values in zip(*[ activations[coord].values for coord in ['model', 'region', 'neuroid_num']])] activations['neuroid_id'] = 'neuroid', neuroid_id return activations
def _package_prediction(self, predicted_values, source): coords = { coord: (dims, values) for coord, dims, values in walk_coords(source) if not array_is_element(dims, self._neuroid_dim) } # re-package neuroid coords dims = source.dims # if there is only one neuroid coordinate, it would get discarded and the dimension would be used as coordinate. # to avoid this, we can build the assembly first and then stack on the neuroid dimension. neuroid_level_dim = None if len( self._target_neuroid_values ) == 1: # extract single key: https://stackoverflow.com/a/20145927/2225200 (neuroid_level_dim, _), = self._target_neuroid_values.items() dims = [ dim if dim != self._neuroid_dim else neuroid_level_dim for dim in dims ] for target_coord, target_value in self._target_neuroid_values.items(): # this might overwrite values which is okay coords[target_coord] = (neuroid_level_dim or self._neuroid_dim), target_value prediction = NeuroidAssembly(predicted_values, coords=coords, dims=dims) if neuroid_level_dim: prediction = prediction.stack( **{self._neuroid_dim: [neuroid_level_dim]}) return prediction
def get_modified_coords(assembly, modifier=lambda name, dims, values: (name, (dims, values))): coords = {} for name, dims, values in walk_coords(assembly): name_dims_vals = modifier(name, dims, values) if name_dims_vals is not None: name, (dims, vals) = name_dims_vals coords[name] = dims, vals return coords
def avg_repr(assembly): presentation_coords = [ coord for coord, dims, values in walk_coords(assembly) if array_is_element(dims, 'presentation') and coord != 'repetition' ] assembly = assembly.multi_groupby(presentation_coords).mean( dim='presentation', skipna=True) return assembly
def average_repetition(self, assembly): attrs = assembly.attrs # workaround to keeping attrs presentation_coords = [coord for coord, dims, values in walk_coords(assembly) if array_is_element(dims, 'presentation')] presentation_coords = set(presentation_coords) - {'repetition_id', 'id'} assembly = assembly.multi_groupby(presentation_coords).mean(dim='presentation', skipna=True) assembly.attrs = attrs return assembly
def _average_repetitions(self, assembly): repetition_dims = assembly[self._split_coord].dims nonrepetition_coords = [ coord for coord, dims, values in walk_coords(assembly) if dims == repetition_dims and coord != self._split_coord ] average = assembly.multi_groupby(nonrepetition_coords).mean( dim=repetition_dims) return average
def multishape_preserved_sort(self, assembly): comparison_dims = assembly[self._comparison_coord].dims assert set(assembly.dims) == set(comparison_dims), "multi-dimensional case not implemented" indices = np.argsort(assembly[self._comparison_coord].values) assembly = type(assembly)(assembly.values[np.ix_(indices, indices)], coords={coord: (dims, values[indices] if dims == comparison_dims else values) for coord, dims, values in walk_coords(assembly)}, dims=assembly.dims) return assembly
def look_at_cached(self, model_identifier, stimuli_identifier, stimuli): responses = self.activations_model(stimuli, layers=self.recording_layers) # map time regions = set(responses['region'].values) if len(regions) > 1: raise NotImplementedError( "cannot handle more than one simultaneous region") region = list(regions)[0] time_bins = [ self.time_mapping[region][timestep] if timestep in self.time_mapping[region] else (None, None) for timestep in responses['time_step'].values ] responses['time_bin_start'] = 'time_step', [ time_bin[0] for time_bin in time_bins ] responses['time_bin_end'] = 'time_step', [ time_bin[1] for time_bin in time_bins ] responses = NeuroidAssembly(responses.rename({'time_step': 'time_bin'})) responses = responses[{ 'time_bin': [ not np.isnan(time_start) for time_start in responses['time_bin_start'] ] }] # select time time_responses = [] for time_bin in tqdm(self.recording_time_bins, desc='CORnet-time to recording time'): time_bin = time_bin if not isinstance( time_bin, np.ndarray) else time_bin.tolist() time_bin_start, time_bin_end = time_bin nearest_start = find_nearest(responses['time_bin_start'].values, time_bin_start) bin_responses = responses.sel(time_bin_start=nearest_start) bin_responses = NeuroidAssembly( bin_responses.values, coords={ **{ coord: (dims, values) for coord, dims, values in walk_coords(bin_responses) if coord not in [ 'time_bin_level_0', 'time_bin_end' ] }, **{ 'time_bin_start': ('time_bin', [time_bin_start]), 'time_bin_end': ('time_bin', [time_bin_end]) } }, dims=bin_responses.dims) time_responses.append(bin_responses) responses = merge_data_arrays(time_responses) return responses
def __call__(self, source, target): scaled_values = scale(target, copy=True) target = target.__class__( scaled_values, coords={ coord: (dims, value) for coord, dims, value in walk_coords(target) }, dims=target.dims) return self.cross_regressed_correlation(source, target)
def average_trials(assembly): non_repetition_coords = [ coord for coord, dim, values in walk_coords(assembly['presentation']) if array_is_element(dim, 'presentation') and coord != 'repetition' ] grouped = assembly.multi_groupby(non_repetition_coords) if np.issubdtype(assembly.dtype, np.number): return grouped.mean('presentation') else: # for non-numbers take majority return grouped.max('presentation')
def _strip_presentation_coords(assembly): presentation_columns = [ coord for coord, dim, values in walk_coords(assembly['presentation']) ] stimulus_set_columns = assembly.stimulus_set.columns redundant_coords = set(presentation_columns).intersection( set(stimulus_set_columns)) - {'image_id'} assembly = DataArray(assembly) assembly = assembly.reset_index('presentation') assembly = assembly.drop(redundant_coords) return assembly
def predict_proba(self, X): assert len(X.shape) == 2, "expected 2-dimensional input" scaled_X = self._scaler.transform(X) proba = self._classifier.predict_proba(scaled_X) # we take only the 0th dimension because the 1st dimension is just the features X_coords = {coord: (dims, value) for coord, dims, value in walk_coords(X) if array_is_element(dims, X.dims[0])} proba = BehavioralAssembly(proba, coords={**X_coords, **{'choice': list(self._label_mapping.values())}}, dims=[X.dims[0], 'choice']) return proba
def fit(self, source, target): source, target = self._align(source), self._align(target) source, target = source.sortby(self._stimulus_coord), target.sortby( self._stimulus_coord) self._regression.fit(source, target) self._target_neuroid_values = {} for name, dims, values in walk_coords(target): if self._neuroid_dim in dims: assert array_is_element(dims, self._neuroid_dim) self._target_neuroid_values[name] = values
def fix_timebin_naming(assembly): """ renames coordinate time_bin_level_0 to time_bin_start and time_bin_level_1 to time_bin_end to work around bug introduced in xarray 0.16.2 (and still present in 0.17.0). """ # jjpr had found that xarray 0.16.2 introduced a bug where xarray.core.alignment._get_joiner assumes Index when the # object is a MultiIndex. # xarray.rename for some reason does not work for some reason, it cannot find the coords rename = dict(time_bin_level_0='time_bin_start', time_bin_level_1='time_bin_end') assembly = type(assembly)(assembly.values, coords={ rename[coord] if coord in rename else coord: (dims, values) for coord, dims, values in walk_coords(assembly)}, dims=assembly.dims) return assembly
def look_at(self, stimuli, number_of_trials=1): if self.current_task is BrainModel.Task.passive: return logits = self.activations_model(stimuli, layers=['logits']) assert len(logits['neuroid']) == 1000 logits = logits.transpose('presentation', 'neuroid') prediction_indices = logits.values.argmax(axis=1) with open(os.path.join(os.path.dirname(__file__), 'imagenet_classes.txt')) as f: synsets = f.read().splitlines() prediction_synsets = [synsets[index] for index in prediction_indices] return BehavioralAssembly([prediction_synsets], coords={ **{coord: (dims, values) for coord, dims, values in walk_coords(logits['presentation'])}, **{'synset': ('presentation', prediction_synsets), 'logit': ('presentation', prediction_indices)}}, dims=['choice', 'presentation'])
def manual_merge(*elements, on='neuroid'): dims = elements[0].dims assert all(element.dims == dims for element in elements[1:]) merge_index = dims.index(on) # the coordinates in the merge index should have the same keys assert _coords_match( elements, dim=on, match_values=False ), f"coords in {[element[on] for element in elements]} do not match" # all other dimensions, their coordinates and values should already align for dim in set(dims) - {on}: assert _coords_match( elements, dim=dim, match_values=True ), f"coords in {[element[dim] for element in elements]} do not match" # merge values without meta merged_values = np.concatenate([element.values for element in elements], axis=merge_index) # piece together with meta result = type( elements[0] )(merged_values, coords={ **{ coord: (dims, values) for coord, dims, values in walk_coords(elements[0]) if not array_is_element( dims, on) }, **{ coord: (dims, np.concatenate([ element[coord].values for element in elements ])) for coord, dims, _ in walk_coords(elements[0]) if array_is_element( dims, on) } }, dims=elements[0].dims) return result
def _package_prediction(self, predicted_values, source): coords = { coord: (dims, values) for coord, dims, values in walk_coords(source) if not array_is_element(dims, self._neuroid_dim) } # re-package neuroid coords dims = source.dims for target_coord, target_value in self._target_neuroid_values.items(): # this might overwrite values which is okay coords[target_coord] = self._neuroid_dim, target_value prediction = NeuroidAssembly(predicted_values, coords=coords, dims=dims) return prediction
def aggregate(cls, values): center = values.mean('split') error = standard_error_of_the_mean(values, 'split') return Score( [center, error], coords={ **{ 'aggregation': ['center', 'error'] }, **{ coord: (dims, values) for coord, dims, values in walk_coords(center) } }, dims=('aggregation', ) + center.dims)
def cross_correlation(prediction, target, cross, correlation): assert (prediction[cross] == target[cross]).all() scores = [] coords = [coord for coord, dims, values in walk_coords(target[cross])] for cross_value in target[cross].values: _prediction = prediction.sel(**{cross: cross_value}) _target = target.sel(**{cross: cross_value}) score = correlation(_prediction, _target) for coord, coord_value in zip(coords, cross_value): score = score.expand_dims(coord) score[coord] = [coord_value] score = score.stack(**{cross: coords}) scores.append(score) score = merge_data_arrays(scores) score = apply_aggregate(lambda score: score.mean(cross), score) return score
def build_assembly(assembly, coord_list=['ty', 'tz']): values = np.stack( [getattr(assembly, coord).values for coord in coord_list], axis=1) coords = { 'neuroid_id': ('neuroid', list(range(len(coord_list)))), 'neuroid_meaning': ('neuroid', coord_list)} for coord, dims, value in walk_coords(assembly): if len(dims) == 0: continue if dims[0] == 'presentation': coords[coord] = ('presentation', value) new_assembly = NeuroidAssembly( values, coords=coords, dims=['presentation', 'neuroid']) new_assembly.attrs['stimulus_set'] = assembly.stimulus_set return new_assembly
def average_subregions(self, bold_shift, assembly): attrs = assembly.attrs del assembly['threshold'] # group by stimuli, fROI, subject after one another. # this gets rid of adjacent coords unfortunately, but we accept that for now. averaged_assembly = assembly.groupby('stimulus_id').apply( lambda stimulus_group: stimulus_group.groupby('fROI_area').apply( lambda fROI_group: fROI_group.groupby('subject_UID').mean() )) averaged_assembly = averaged_assembly.stack(presentation=['stimulus_id'], neuroid=['fROI_area', 'subject_UID']) # copy presentation coords back since those are needed for e.g. metric stratification order = [averaged_assembly['stimulus_id'].values.tolist().index(stimulus_id) for stimulus_id in assembly['stimulus_id'].values] for copy_coord, dims, copy_value in walk_coords(assembly): if not array_is_element(dims, 'presentation') or hasattr(averaged_assembly, copy_coord): continue averaged_assembly[copy_coord] = dims, copy_value[order] averaged_assembly.attrs = attrs averaged_assembly['neuroid_id'] = 'neuroid', [".".join([str(value) for value in values]) for values in zip(*[ averaged_assembly[coord].values for coord in ['subject_UID', 'fROI_area']])] return averaged_assembly
def build_cate_assembly(assembly): category_names = assembly.category_name.values unique_cate_names = np.unique(category_names) # Tricky solution for some weird requirements later new_category_names = [ [curr_name, curr_name] \ for curr_name in category_names] coords = { 'neuroid_id': ('neuroid', [0, 1]), 'neuroid_meaning': ('neuroid', ['category', 'category'])} for coord, dims, value in walk_coords(assembly): if len(dims) == 0: continue if dims[0] == 'presentation': coords[coord] = ('presentation', value) new_assembly = NeuroidAssembly( new_category_names, coords=coords, dims=['presentation', 'neuroid']) new_assembly.attrs['stimulus_set'] = assembly.stimulus_set return new_assembly