Esempio n. 1
0
    def _convert(self, audio):
        #import speech_recognition as sr

        with audio.get_filename() as filename:
            with sr.AudioFile(filename) as source:
                clip = self.recognizer.record(source)

        _json = self._query_api(clip)
        if 'results' in _json:
            results = _json['results']
        else:
            raise Exception(
                'received invalid results from API: {0}'.format(str(_json)))
        elements = []
        for result in results:
            if result['final'] is True:
                timestamps = result['alternatives'][0]['timestamps']
                if self.resolution is 'words':
                    for entry in timestamps:
                        text = entry[0]
                        start = entry[1]
                        end = entry[2]
                        elements.append(TextStim(text=text, onset=start,
                                                 duration=end-start))
                elif self.resolution is 'phrases':
                    text = result['alternatives'][0]['transcript']
                    start = timestamps[0][1]
                    end = timestamps[-1][2]
                    elements.append(TextStim(text=text, onset=start,
                                             duration=end-start))
        return ComplexTextStim(elements=elements)
Esempio n. 2
0
 def _convert(self, audio):
     import speech_recognition as sr
     with sr.AudioFile(audio.filename) as source:
         clip = self.recognizer.record(source)
     text = getattr(self.recognizer, self.recognize_method)(clip,
                                                            self.api_key)
     return ComplexTextStim(text=text)
Esempio n. 3
0
    def _convert(self, audio):
        verify_dependencies(['sr'])
        with audio.get_filename() as filename:
            with sr.AudioFile(filename) as source:
                clip = self.recognizer.record(source)

        text = getattr(self.recognizer, self.recognize_method)(clip, self.api_key)

        return ComplexTextStim(text=text)
Esempio n. 4
0
    def _convert(self, audio):
        import speech_recognition as sr
        with sr.AudioFile(audio.filename) as source:
            clip = self.recognizer.record(source)

        result = self._query_api(clip)

        timestamps = result['results'][0]['alternatives'][0]['timestamps']
        elements = []
        for i, entry in enumerate(timestamps):
            elements.append(
                TextStim(text=entry[0],
                         onset=entry[1],
                         duration=entry[2] - entry[1]))
        return ComplexTextStim(elements=elements)
Esempio n. 5
0
    def getVector(self, stim, cbow=False):

        if not isinstance(stim, list):
            stim = [stim]

        if cbow == True:
            stim_cbows = self.generateCbows(stim)
            stim = stim_cbows
        ''' 
            We only implement average word embedding'
        '''
        '''we already have the embeddings loaded '''
        num_dims = self.wvModel.vector_size
        '''the stimuli is a list '''
        ''' we create average embedding. 
            Need to decide whether move the functionality
            to an util class later 
        '''
        embedding_average_vectors = []
        for s in stim:
            complex_s = ComplexTextStim(text=s.lower())
            embeddings = self.transform(complex_s)
            embedding_average_vector = np.zeros(num_dims)

            numWords = 0
            for embedding in embeddings:
                if self.content_only == True:
                    if embedding.stim.data in self.stop_words or \
                        not embedding.stim.data.isalnum() :
                        continue

                for index, value in enumerate(embedding._data[0]):
                    embedding_average_vector[index] += value

                numWords += 1

            if numWords > 0:
                for index in range(num_dims):
                    embedding_average_vector[index] /= numWords

            embedding_average_vectors.append(embedding_average_vector)

        features = ['%s%d' % (self.prefix, i) for i in range(num_dims)]
        return ExtractorResult(embedding_average_vectors,
                               stim,
                               self,
                               features=features)
Esempio n. 6
0
    def _convert(self, stim):
        request = self._build_request(stim)
        response = self._query_api(request)

        if 'error' in response:
            raise Exception(response['error']['message'])

        words = []
        if 'results' in response:
            for result in response['results']:
                transcription = result['alternatives'][0]
                for w in transcription['words']:
                    onset = float(w['startTime'][:-1])
                    duration = float(w['endTime'][:-1]) - onset
                    words.append(TextStim(text=w['word'],
                                          onset=onset,
                                          duration=duration))

        return ComplexTextStim(elements=words)
Esempio n. 7
0
    def _convert(self, audio):
        verify_dependencies(['rev_ai'])
        msg = "Beginning audio transcription with a timeout of %fs. Even for "\
              "small audios, full transcription may take awhile." % self.timeout
        logging.warning(msg)

        if audio.url:
            job = self.client.submit_job_url(audio.url)
        else:
            with audio.get_filename() as filename:
                job = self.client.submit_job_local_file(filename)

        operation_start = time.time()
        response = self.client.get_job_details(job.id)
        while (response.status == rev_ai.JobStatus.IN_PROGRESS) and \
              (time.time() - operation_start) < self.timeout:
            response = self.client.get_job_details(job.id)
            time.sleep(self.request_rate)

        if (time.time() - operation_start) >= self.timeout:
            msg = "Conversion reached the timeout limit of %fs." % self.timeout
            logging.warning(msg)

        if response.status == rev_ai.JobStatus.FAILED:
            raise Exception('API failed: %s' % response.failure_detail)

        result = self.client.get_transcript_object(job.id)

        elements = []
        order = 0
        for m in result.monologues:
            for e in m.elements:
                if e.type_ == 'text':
                    start = e.timestamp
                    end = e.end_timestamp
                    elements.append(
                        TextStim(text=e.value,
                                 onset=start,
                                 duration=end - start,
                                 order=order))
                    order += 1

        return ComplexTextStim(elements=elements, onset=audio.onset)
Esempio n. 8
0
    def _convert(self, audio):
        verify_dependencies(['sr'])
        offset = 0.0 if audio.onset is None else audio.onset

        with audio.get_filename() as filename:
            with sr.AudioFile(filename) as source:
                clip = self.recognizer.record(source)

        _json = self._query_api(clip)
        if 'results' in _json:
            results = _json['results']
        else:
            raise Exception('received invalid results from API: {0}'.format(
                str(_json)))
        elements = []
        order = 0
        for result in results:
            if result['final'] is True:
                timestamps = result['alternatives'][0]['timestamps']
                if self.resolution is 'words':
                    for entry in timestamps:
                        text = entry[0]
                        start = entry[1]
                        end = entry[2]
                        elements.append(
                            TextStim(text=text,
                                     onset=offset + start,
                                     duration=end - start,
                                     order=order))
                        order += 1
                elif self.resolution is 'phrases':
                    text = result['alternatives'][0]['transcript']
                    start = timestamps[0][1]
                    end = timestamps[-1][2]
                    elements.append(
                        TextStim(text=text,
                                 onset=offset + start,
                                 duration=end - start,
                                 order=order))
                    order += 1
        return ComplexTextStim(elements=elements, onset=audio.onset)