def test_complex_text_stim(self): text_dir = join(_get_test_data_path(), 'text') stim = ComplexTextStim(join(text_dir, 'complex_stim_no_header.txt'), columns='ot', default_duration=0.2) self.assertEquals(len(stim.elements), 4) self.assertEquals(stim.elements[2].onset, 34) self.assertEquals(stim.elements[2].duration, 0.2) stim = ComplexTextStim(join(text_dir, 'complex_stim_with_header.txt')) self.assertEquals(len(stim.elements), 4) self.assertEquals(stim.elements[2].duration, 0.1)
def test_text_extractor(self): stim = ComplexTextStim(join(TEXT_DIR, 'sample_text.txt'), columns='to', default_duration=1) td = DictionaryExtractor(join(TEXT_DIR, 'test_lexical_dictionary.txt'), variables=['length', 'frequency']) self.assertEquals(td.data.shape, (7, 2)) timeline = stim.extract([td]) df = TimelineExporter.timeline_to_df(timeline) self.assertEquals(df.shape, (12, 4)) self.assertEquals(df.iloc[9, 3], 10.6)
def test_complex_stim_from_text(): textfile = join(get_test_data_path(), 'text', 'scandal.txt') text = open(textfile).read().strip() stim = ComplexTextStim.from_text(text) target = ['To', 'Sherlock', 'Holmes'] assert [w.text for w in stim.elements[:3]] == target assert len(stim.elements) == 231 stim = ComplexTextStim.from_text(text, unit='sent') # Custom tokenizer stim = ComplexTextStim.from_text(text, tokenizer='(\w+)') assert len(stim.elements) == 209
def test_complex_stim_from_text(self): textfile = join(_get_test_data_path(), 'text', 'scandal.txt') text = open(textfile).read().strip() stim = ComplexTextStim.from_text(text) target = ['To', 'Sherlock', 'Holmes'] self.assertEquals([w.text for w in stim.elements[:3]], target) self.assertEquals(len(stim.elements), 231) stim = ComplexTextStim.from_text(text, unit='sent') # Custom tokenizer stim = ComplexTextStim.from_text(text, tokenizer='(\w+)') self.assertEquals(len(stim.elements), 209)
def test_complex_stim_from_text(): textfile = join(_get_test_data_path(), 'text', 'scandal.txt') text = open(textfile).read().strip() stim = ComplexTextStim.from_text(text) target = ['To', 'Sherlock', 'Holmes'] assert [w.text for w in stim.elements[:3]] == target assert len(stim.elements) == 231 stim = ComplexTextStim.from_text(text, unit='sent') # Custom tokenizer stim = ComplexTextStim.from_text(text, tokenizer='(\w+)') assert len(stim.elements) == 209
def test_complex_text_stim(): text_dir = join(_get_test_data_path(), 'text') stim = ComplexTextStim(join(text_dir, 'complex_stim_no_header.txt'), columns='ot', default_duration=0.2) assert len(stim.elements) == 4 assert stim.elements[2].onset == 34 assert stim.elements[2].duration == 0.2 stim = ComplexTextStim(join(text_dir, 'complex_stim_with_header.txt')) assert len(stim.elements) == 4 assert stim.elements[2].duration == 0.1
def test_text_extractor(): stim = ComplexTextStim(join(TEXT_DIR, 'sample_text.txt'), columns='to', default_duration=1) td = DictionaryExtractor(join(TEXT_DIR, 'test_lexical_dictionary.txt'), variables=['length', 'frequency']) assert td.data.shape == (7, 2) timeline = stim.extract([td]) df = timeline.to_df() assert np.isnan(df.iloc[0, 3]) assert df.shape == (12, 4) target = df.query('name=="frequency" & onset==5')['value'].values assert target == 10.6
def test_complex_stim_from_srt(): srtfile = join(_get_test_data_path(), 'text', 'wonderful.srt') textfile = join(_get_test_data_path(), 'text', 'wonderful.txt') df = pd.read_csv(textfile, sep='\t') target = df["text"].tolist() srt_stim = ComplexTextStim(srtfile) texts = [sent.text.decode('UTF-8') for sent in srt_stim.elements] assert texts == target
def test_predefined_dictionary_extractor(): text = """enormous chunks of ice that have been frozen for thousands of years are breaking apart and melting away""" stim = ComplexTextStim.from_text(text) td = PredefinedDictionaryExtractor(['aoa/Freq_pm', 'affect/V.Mean.Sum']) timeline = stim.extract([td]) df = TimelineExporter.timeline_to_df(timeline) assert df.shape == (36, 4) valid_rows = df.query('name == "affect_V.Mean.Sum"').dropna() assert len(valid_rows) == 3
def test_indicoAPI_extractor(): srtfile = join(_get_test_data_path(), 'text', 'wonderful.srt') srt_stim = ComplexTextStim(srtfile) if 'INDICO_APP_KEY' in os.environ: ext = IndicoAPIExtractor(api_key=os.environ['INDICO_APP_KEY'],model = 'emotion') output = ext.apply(srt_stim) outdfKeys = set(output.to_df()['name']) outdfKeysCheck = set([ 'emotion_anger', 'emotion_fear', 'emotion_joy', 'emotion_sadness', 'emotion_surprise']) assert outdfKeys == outdfKeysCheck
def __init__(self, filename, transcription, **kwargs): if isinstance(transcription, six.string_types): transcription = ComplexTextStim(transcription, **kwargs) self.transcription = transcription super(TranscribedAudioStim, self).__init__(filename)
def test_part_of_speech_extractor(): stim = ComplexTextStim(join(TEXT_DIR, 'complex_stim_with_header.txt')) tl = stim.extract([PartOfSpeechExtractor()]) df = tl.to_df() assert df.iloc[1, 3] == 'NN' assert df.shape == (4, 4)
def test_part_of_speech_extractor(self): stim = ComplexTextStim(join(TEXT_DIR, 'complex_stim_with_header.txt')) tl = stim.extract([PartOfSpeechExtractor()]) df = tl.to_df() self.assertEquals(df.iloc[1, 3], 'NN') self.assertEquals(df.shape, (4, 4))
def test_part_of_speech_extractor(self): stim = ComplexTextStim(join(TEXT_DIR, 'complex_stim_with_header.txt')) tl = stim.extract([PartOfSpeechExtractor()]).to_df() self.assertEquals(tl.iloc[1, 3], 'NN') self.assertEquals(tl.shape, (4, 4))
def _convert(self, audio): import speech_recognition as sr with sr.AudioFile(audio.filename) as source: clip = self.recognizer.record(source) text = getattr(self.recognizer, self.recognize_method)(clip, self.api_key) return ComplexTextStim.from_text(text=text)