def _extract(self, stim): words = [w.text for w in stim] pos = nltk.pos_tag(words) if len(words) != len(pos): raise PliersError( "The number of words in the ComplexTextStim does not match " "the number of tagged words returned by nltk's part-of-speech" " tagger.") data = {} onsets = [] durations = [] for i, w in enumerate(stim): p = pos[i][1] if p not in data: data[p] = [0] * len(words) data[p][i] += 1 onsets.append(w.onset) durations.append(w.duration) return ExtractorResult(np.array(list(data.values())).transpose(), stim, self, features=list(data.keys()), onsets=onsets, durations=durations)
def _extract(self, stims): words = [w.text for w in stims] pos = nltk.pos_tag(words) if len(words) != len(pos): raise PliersError( "The number of words does not match the number of tagged words" "returned by nltk's part-of-speech tagger.") results = [] tagset = nltk.data.load('help/tagsets/upenn_tagset.pickle').keys() for i, s in enumerate(stims): pos_vector = dict.fromkeys(tagset, 0) pos_vector[pos[i][1]] = 1 values = [list(pos_vector.values())] results.append( ExtractorResult(values, s, self, features=list(pos_vector.keys()))) return results