return args[1] class DelayMiner(DataMiner): """ Data miner that sleeps for args[0] seconds and then stores args[1] """ def build(self, data): sleep(data[0]) return data[1] pipe = Pipeline() pipe.addAcquirer(DelayAcquirer(), 'da') pipe.addMiner(DelayMiner(), 'delay') print "\n ########################## Using blocking versions in an external thread ##################\n" t = Thread(target=pipe.acquireAndBuildCorpus, args=('da', 'delay', 'delay', [1, [2, "lalala"]], 0)) t.start() while (not pipe.rawData.has_key('da') or pipe.acquire['da'].checkStatus() > 0 ) or (not pipe.corpus.has_key('delay') or pipe.mine['delay'].checkStatus() > 0): print "Waiting..." sleep(0.5) print "rawData['da']:", pipe.rawData['da'] print "corpus['delay']:", pipe.corpus['delay']
print "" # add our various pipeline components here pl.addAcquirer( YoutubeAudioAcquirer(), 'ytaudio' ) # downloads a video from youtube (.webm), and converts it to wav for speech recognition pl.addAcquirer(AlwaysFailAcquirer(), 'fail') # Sample acquirer that does nothing but fail pl.addAcquirer( YoutubeAutoVSSAcquirer(), 'ytautosub' ) # downloads an autogenerated VSS file from Youtube to temp folder pl.addAcquirer( YoutubeVideoAcquirer(), 'ytvid' ) # Downloads a video from youtube at the highest possible quality pl.addMiner(FileToLineMiner(), 'fileline') # processes a file into a list of lines pl.addMiner( VSSChunkMiner(), 'vssminer' ) # processes a list of lines in VSS format into a list of SRTChunks pl.addMiner( VSSChunkMiner(), 'vssminer2' ) # processes a list of lines in VSS format into a list of SRTChunks pl.addMiner( AudioSplitSpeechRecog(3, 1, 'en-US'), 'speechRecog' ) # processes a single audio file in wav format into a list of SRTChunks pl.addMiner( SRTChunkListToRIDict(), 'chunkToRIDict' ) # builds a reverse-indexed dict of word => list of chunks containing word pl.addMiner( VideoFaceFinder(), 'faceFinder') # Finds faces in the frames of a video and outputs them.