Ejemplo n.º 1
0
    def _extract(self, stim):
        words = [w.text for w in stim]
        pos = nltk.pos_tag(words)
        if len(words) != len(pos):
            raise PliersError(
                "The number of words in the ComplexTextStim does not match "
                "the number of tagged words returned by nltk's part-of-speech"
                " tagger.")

        data = {}
        onsets = []
        durations = []
        for i, w in enumerate(stim):
            p = pos[i][1]
            if p not in data:
                data[p] = [0] * len(words)
            data[p][i] += 1
            onsets.append(w.onset)
            durations.append(w.duration)

        return ExtractorResult(np.array(list(data.values())).transpose(),
                               stim,
                               self,
                               features=list(data.keys()),
                               onsets=onsets,
                               durations=durations)
Ejemplo n.º 2
0
    def _extract(self, stims):
        words = [w.text for w in stims]
        pos = nltk.pos_tag(words)
        if len(words) != len(pos):
            raise PliersError(
                "The number of words does not match the number of tagged words"
                "returned by nltk's part-of-speech tagger.")

        results = []
        tagset = nltk.data.load('help/tagsets/upenn_tagset.pickle').keys()
        for i, s in enumerate(stims):
            pos_vector = dict.fromkeys(tagset, 0)
            pos_vector[pos[i][1]] = 1
            values = [list(pos_vector.values())]
            results.append(
                ExtractorResult(values,
                                s,
                                self,
                                features=list(pos_vector.keys())))

        return results