예제 #1
0
 def predict_proba(self, X):
     import tensorflow as tf
     assert len(X.shape) == 2, "expected 2-dimensional input"
     if self._zscore_feats:
         scaled_X = self._scaler.transform(X)
     else:
         scaled_X = X
     with self._graph.as_default():
         preds = []
         for batch in self._iterate_minibatches(
                 scaled_X, batchsize=self._eval_batch_size, shuffle=False):
             feed_dict = {
                 self._input_placeholder: batch,
                 self._fc_keep_prob: 1.0
             }
             softmax = self._sess.run([tf.nn.softmax(self._predictions)],
                                      feed_dict=feed_dict)
             preds.append(np.squeeze(softmax))
         proba = np.concatenate(preds, axis=0)
     # we take only the 0th dimension because the 1st dimension is just the features
     X_coords = {
         coord: (dims, value)
         for coord, dims, value in walk_coords(X)
         if array_is_element(dims, X.dims[0])
     }
     proba = BehavioralAssembly(proba,
                                coords={
                                    **X_coords,
                                    **{
                                        'choice':
                                        list(self._label_mapping.values())
                                    }
                                },
                                dims=[X.dims[0], 'choice'])
     return proba
예제 #2
0
 def average_subregions(self, bold_shift, assembly):
     attrs = assembly.attrs
     del assembly['threshold']
     # group by stimuli, fROI, subject after one another.
     # this gets rid of adjacent coords unfortunately, but we accept that for now.
     averaged_assembly = assembly.groupby('stimulus_id').apply(
         lambda stimulus_group: stimulus_group.groupby('fROI_area').apply(
             lambda fROI_group: fROI_group.groupby('subject_UID').mean()))
     averaged_assembly = averaged_assembly.stack(
         presentation=['stimulus_id'], neuroid=['fROI_area', 'subject_UID'])
     # copy presentation coords back since those are needed for e.g. metric stratification
     order = [
         averaged_assembly['stimulus_id'].values.tolist().index(stimulus_id)
         for stimulus_id in assembly['stimulus_id'].values
     ]
     for copy_coord, dims, copy_value in walk_coords(assembly):
         if not array_is_element(dims, 'presentation') or hasattr(
                 averaged_assembly, copy_coord):
             continue
         averaged_assembly[copy_coord] = dims, copy_value[order]
     averaged_assembly.attrs = attrs
     averaged_assembly['neuroid_id'] = 'neuroid', [
         ".".join([str(value) for value in values]) for values in zip(*[
             averaged_assembly[coord].values
             for coord in ['subject_UID', 'fROI_area']
         ])
     ]
     return averaged_assembly
예제 #3
0
    def _package_prediction(self, predicted_values, source):
        coords = {
            coord: (dims, values)
            for coord, dims, values in walk_coords(source)
            if not array_is_element(dims, self._neuroid_dim)
        }
        # re-package neuroid coords
        dims = source.dims
        # if there is only one neuroid coordinate, it would get discarded and the dimension would be used as coordinate.
        # to avoid this, we can build the assembly first and then stack on the neuroid dimension.
        neuroid_level_dim = None
        if len(
                self._target_neuroid_values
        ) == 1:  # extract single key: https://stackoverflow.com/a/20145927/2225200
            (neuroid_level_dim, _), = self._target_neuroid_values.items()
            dims = [
                dim if dim != self._neuroid_dim else neuroid_level_dim
                for dim in dims
            ]
        for target_coord, target_value in self._target_neuroid_values.items():
            # this might overwrite values which is okay
            coords[target_coord] = (neuroid_level_dim
                                    or self._neuroid_dim), target_value
        prediction = NeuroidAssembly(predicted_values,
                                     coords=coords,
                                     dims=dims)
        if neuroid_level_dim:
            prediction = prediction.stack(
                **{self._neuroid_dim: [neuroid_level_dim]})

        return prediction
예제 #4
0
 def avg_repr(assembly):
     presentation_coords = [
         coord for coord, dims, values in walk_coords(assembly)
         if array_is_element(dims, 'presentation') and coord != 'repetition'
     ]
     assembly = assembly.multi_groupby(presentation_coords).mean(
         dim='presentation', skipna=True)
     return assembly
예제 #5
0
    def fit(self, source, target):
        source, target = self._align(source), self._align(target)
        source, target = source.sortby(self._stimulus_coord), target.sortby(
            self._stimulus_coord)

        self._regression.fit(source, target)

        self._target_neuroid_values = {}
        for name, dims, values in walk_coords(target):
            if self._neuroid_dim in dims:
                assert array_is_element(dims, self._neuroid_dim)
                self._target_neuroid_values[name] = values
예제 #6
0
def manual_merge(*elements, on='neuroid'):
    dims = elements[0].dims
    assert all(element.dims == dims for element in elements[1:])
    merge_index = dims.index(on)
    # the coordinates in the merge index should have the same keys
    assert _coords_match(
        elements, dim=on, match_values=False
    ), f"coords in {[element[on] for element in elements]} do not match"
    # all other dimensions, their coordinates and values should already align
    for dim in set(dims) - {on}:
        assert _coords_match(
            elements, dim=dim, match_values=True
        ), f"coords in {[element[dim] for element in elements]} do not match"
    # merge values without meta
    merged_values = np.concatenate([element.values for element in elements],
                                   axis=merge_index)
    # piece together with meta
    result = type(
        elements[0]
    )(merged_values,
      coords={
          **{
              coord: (dims, values)
              for coord, dims, values in walk_coords(elements[0]) if not array_is_element(
                  dims, on)
          },
          **{
              coord: (dims,
                      np.concatenate([
                          element[coord].values for element in elements
                      ]))
              for coord, dims, _ in walk_coords(elements[0]) if array_is_element(
                  dims, on)
          }
      },
      dims=elements[0].dims)
    return result
예제 #7
0
 def average_repetition(self, assembly):
     attrs = assembly.attrs  # workaround to keeping attrs
     presentation_coords = [
         coord for coord, dims, values in walk_coords(assembly)
         if array_is_element(dims, 'presentation')
     ]
     presentation_coords = set(presentation_coords) - {
         'repetition_id', 'id'
     }
     assembly = assembly.multi_groupby(presentation_coords).mean(
         dim='presentation', skipna=True)
     assembly, stimulus_set = self.dropna(
         assembly, stimulus_set=attrs['stimulus_set'])
     attrs['stimulus_set'] = stimulus_set
     assembly.attrs = attrs
     return assembly
예제 #8
0
 def predict_proba(self, X):
     assert len(X.shape) == 2, "expected 2-dimensional input"
     scaled_X = self._scaler.transform(X)
     proba = self._classifier.predict_proba(scaled_X)
     # we take only the 0th dimension because the 1st dimension is just the features
     X_coords = {
         coord: (dims, value)
         for coord, dims, value in walk_coords(X)
         if array_is_element(dims, X.dims[0])
     }
     proba = BehavioralAssembly(
         proba,
         coords={
             **X_coords,
             **{
                 'choice': list(self._label_mapping.values())
             }
         },
         dims=[X.dims[0], 'choice'])
     return proba
예제 #9
0
 def add_neuroid_meta(self, target, source):
     target = target.expand_dims(self.extrapolation_dimension)
     for coord, dims, values in walk_coords(source):
         if array_is_element(dims, self.extrapolation_dimension):
             target[coord] = dims, values
     return target
예제 #10
0
def _merge_voxel_meta(data, meta, bold_shift_seconds):
    data_missing = set(meta['story'].values) - set(data['story'].values)
    if data_missing:
        warnings.warn(f"Stories missing from the data: {data_missing}")
    meta_missing = set(data['story'].values) - set(meta['story'].values)
    if meta_missing:
        warnings.warn(f"Stories missing from the meta: {meta_missing}")

    ignored_words = [None, '', '<s>', '</s>', '<s']
    annotated_data = []
    for story in tqdm(ordered_set(data['story'].values), desc='merge meta'):
        if story not in meta['story'].values:
            continue
        story_meta = meta.sel(story=story)
        story_meta = story_meta.sortby('time_end')

        story_data = data.sel(story=story).stack(timepoint=['timepoint_value'])
        story_data = story_data.sortby('timepoint_value')
        timepoints = story_data['timepoint_value'].values.tolist()
        assert is_sorted(timepoints)
        timepoints = [
            timepoint - bold_shift_seconds for timepoint in timepoints
        ]
        sentences = []
        last_timepoint = -np.inf
        for timepoint in timepoints:
            if last_timepoint >= max(story_meta['time_end'].values):
                break
            if timepoint <= 0:
                sentences.append(None)
                continue  # ignore fixation period
            timebin_meta = [
                last_timepoint < end <= timepoint
                for end in story_meta['time_end'].values
            ]
            timebin_meta = story_meta[{'time_bin': timebin_meta}]
            sentence = ' '.join(word.strip() for word in timebin_meta.values
                                if word not in ignored_words)
            sentence = sentence.lower().strip()
            # quick-fixes
            if story == 'Boar' and sentence == 'interactions the the':  # Boar duplicate
                sentence = 'interactions the'
            if story == 'KingOfBirds' and sentence == 'the fact that the larger':  # missing word in TextGrid
                sentence = 'earth ' + sentence
            if story == 'MrSticky' and sentence == 'worry don\'t worry i went extra slowly since it\'s':
                sentence = 'don\'t worry i went extra slowly since it\'s'
            sentences.append(sentence)
            last_timepoint = timebin_meta['time_end'].values[-1]
        sentence_index = [
            i for i, sentence in enumerate(sentences) if sentence
        ]
        sentences = np.array(sentences)[sentence_index]
        if story not in ['Boar', 'KingOfBirds',
                         'MrSticky']:  # ignore quick-fixes
            annotated_sentence = ' '.join(sentences)
            meta_sentence = ' '.join(word.strip() for word in story_meta.values if word not in ignored_words) \
                .lower().strip()
            assert annotated_sentence == meta_sentence
        # re-interpret timepoints as stimuli
        coords = {}
        for coord_name, dims, coord_value in walk_coords(story_data):
            dims = [
                dim if not dim.startswith('timepoint') else 'presentation'
                for dim in dims
            ]
            # discard the timepoints for which the stimulus did not change (empty word)
            coord_value = coord_value if not array_is_element(
                dims, 'presentation') else coord_value[sentence_index]
            coords[coord_name] = dims, coord_value
        coords = {
            **coords,
            **{
                'stimulus_sentence': ('presentation', sentences)
            }
        }
        story_data = story_data[{
            dim: slice(None) if dim != 'timepoint' else sentence_index
            for dim in story_data.dims
        }]
        dims = [
            dim if not dim.startswith('timepoint') else 'presentation'
            for dim in story_data.dims
        ]
        story_data = xr.DataArray(story_data.values, coords=coords, dims=dims)
        story_data['story'] = 'presentation', [story] * len(
            story_data['presentation'])
        gather_indexes(story_data)
        annotated_data.append(story_data)
    annotated_data = merge_data_arrays(annotated_data)
    return annotated_data
예제 #11
0
def package(
        features_path='/braintree/data2/active/users/qbilius/computed/hvm/ait'
):
    assert os.path.isdir(features_path)
    features_paths = [
        os.path.join(features_path, 'basenets_hvm_feats_V4'),
        os.path.join(features_path, 'basenets_hvm_feats_pIT'),
        os.path.join(features_path, 'basenets_hvm_feats')
    ]

    # alignment
    meta = pd.read_pickle(
        os.path.join(os.path.dirname(__file__), 'basenets-meta.pkl'))
    meta = meta[meta['var'] == 6]
    meta_ids = meta['id'].values.tolist()

    hvm = brainscore.get_assembly('dicarlo.Majaj2015') \
        .sel(variation=6) \
        .multi_groupby(['category_name', 'object_name', 'image_id']) \
        .mean(dim="presentation") \
        .squeeze("time_bin")
    hvm_ids = hvm['image_id'].values.tolist()

    assert len(hvm_ids) == len(meta_ids)
    indexes = [meta_ids.index(id) for id in hvm_ids]

    basenets = []
    for activations_path_v4 in glob.glob(
            os.path.join(features_paths[0], '*.npy')):
        activations_path_pit = os.path.abspath(
            os.path.join(features_paths[1],
                         os.path.basename(activations_path_v4)))
        activations_path_ait = os.path.abspath(
            os.path.join(features_paths[2],
                         os.path.basename(activations_path_v4)))
        assert os.path.isfile(activations_path_pit)
        assert os.path.isfile(activations_path_ait)
        print(activations_path_v4,
              activations_path_pit,
              activations_path_ait,
              end='')
        activations_v4 = np.load(activations_path_v4)
        activations_pit = np.load(activations_path_pit)
        activations_ait = np.load(activations_path_ait)
        assert activations_v4.shape[0] == activations_pit.shape[
            0] == activations_ait.shape[0] == len(indexes)
        activations_v4 = activations_v4[indexes, :]
        activations_pit = activations_ait[indexes, :]
        activations_ait = activations_ait[indexes, :]
        coords = {
            coord: (dims, values)
            for coord, dims, values in walk_coords(hvm)
            if array_is_element(dims, 'presentation')
        }
        coords['neuroid_id'] = 'neuroid', list(range(3000))
        coords['layer'] = 'neuroid', np.concatenate([
            np.repeat('basenet-layer_v4', 1000),
            np.repeat('basenet-layer_pit', 1000),
            np.repeat('basenet-layer_ait', 1000)
        ])
        activations = np.concatenate(
            [activations_v4, activations_pit, activations_ait], axis=1)
        print(activations.shape, end='')
        assert activations.shape[0] == len(indexes)
        assembly = NeuroidAssembly(activations,
                                   coords=coords,
                                   dims=['presentation', 'neuroid'])
        model_name = os.path.splitext(
            os.path.basename(activations_path_pit))[0]
        basenets.append(model_name)
        target_path = os.path.abspath(
            os.path.join(
                os.path.dirname(__file__), '..', '..', '..',
                'output/candidate_models.models.model_activations',
                'model={},stimulus_set=dicarlo.hvm,weights=imagenet,image_size=224,pca_components=1000.pkl'
                .format(model_name)))
        print("-->", target_path)
        with open(target_path, 'wb') as target_file:
            pickle.dump({'data': assembly}, target_file)

    print(" ".join(basenets))