def generate_data(sub_names, mov_names): speech = [] noise = [] for sub_name, mov_name in zip(sub_names, mov_names): fsub = open(sub_name) intervels = [] while True: if fsub.readline().strip() == '[Events]': break for line in fsub: if line.startswith('Dialogue:'): line = line.strip().split(',') if line[3] != 'Default' or line[9].startswith('{'): intervels.append( (parse_time(line[1]), parse_time(line[2]), False)) else: intervels.append( (parse_time(line[1]), parse_time(line[2]), True)) intervels.sort(cmp=lambda x, y: cmp(x[0], y[0])) i = 0 while i < len(intervels) - 1: if intervels[i][1] > intervels[i + 1][0]: intervels[i] = (intervels[i][0], intervels[i + 1][1], intervels[i][2] and intervels[i + 1][2]) del intervels[i + 1] else: i = i + 1 dec = ffmpeg_decoder(mov_name, SAMPLE_RATE) spec = spectrum(dec.ostream.get_handle(), squared=False) feat = feature_extractor(spec.ostream.get_handle(), intervels, speech, noise) dec.start() spec.start() feat.start() feat.join() return speech, noise
def generate_data(sub_names, mov_names): speech = [] noise = [] for sub_name, mov_name in zip(sub_names, mov_names): fsub = open(sub_name) intervels = [] while True: if fsub.readline().strip() == '[Events]': break for line in fsub: if line.startswith('Dialogue:'): line = line.strip().split(',') if line[3] != 'Default' or line[9].startswith('{'): intervels.append( (parse_time(line[1]), parse_time(line[2]), False) ) else: intervels.append( (parse_time(line[1]), parse_time(line[2]), True) ) intervels.sort(cmp=lambda x,y: cmp(x[0], y[0])) i = 0 while i < len(intervels)-1: if intervels[i][1] > intervels[i+1][0]: intervels[i] = (intervels[i][0], intervels[i+1][1], intervels[i][2] and intervels[i+1][2]) del intervels[i+1] else: i = i + 1 dec = ffmpeg_decoder(mov_name, SAMPLE_RATE) spec = spectrum(dec.ostream.get_handle(), squared = False) feat = feature_extractor(spec.ostream.get_handle(), intervels, speech, noise) dec.start() spec.start() feat.start() feat.join() return speech, noise
def OnOpen(self,evt): #self.playerpanel.OnOpen(None) self.playerpanel.OnStop(None) # Create a file dialog opened in the current home directory, where # you can display all kind of files, having as title "Choose a file". dlg = wx.FileDialog(self, "Choose a file", user.home, "","*.*", wx.OPEN) if dlg.ShowModal() == wx.ID_OK: dirname = dlg.GetDirectory() filename = dlg.GetFilename() # Creation self.playerpanel.mediapath=unicode(os.path.join(dirname, filename)) self.playerpanel.Media = self.playerpanel.Instance.media_new(self.playerpanel.mediapath) #m=self.Instance.media_new(r'D:\shiyan\number3\New folder\1.rmvb') self.playerpanel.player.set_media(self.playerpanel.Media) # Report the title of the file chosen title = self.playerpanel.player.get_title() # if an error was encountred while retriving the title, then use # filename if title == -1: title = filename #self.SetTitle("%s - AutoSub" % title) # set the window id where to render VLC's video output self.playerpanel.player.set_hwnd(self.playerpanel.videopanel.GetHandle()) # set the volume slider to the current volume self.playerpanel.volslider.SetValue(self.playerpanel.player.audio_get_volume() / 2) self.playerpanel.title=title # finally destroy the dialog dlg.Destroy() # create the new dialog to choose the recognization and translation self.playerpanel.select_dialog=SelectDialog(None,"Choice") self.playerpanel.select_dialog.ShowModal() # Finally Play~FIXME: this should be made cross-platform self.OnPlay(None) #self.Spec.GetAddr(self.Spec,self.mediapath) else: dlg.Destroy() time.sleep(0.5) leng=self.playerpanel.player.get_length() self.spec.GetLength(self.spec,length=leng) self.playerpanel.timer.Start(100) #self.bitmap.Hide() self.SetTitle("%s - AutoSub" % self.playerpanel.title) lan={"English":"en" ,"Chinese":"zh-cn" ,"Japanese":"ja"} lang_from = None lang_to = None source = None target = None if self.playerpanel.select_dialog.isrecognize==True: # Set recognize parameter lang_from=lan[self.playerpanel.select_dialog.sorcelan] if self.playerpanel.select_dialog.istranslate==True: # Set translation parameter lang_to=lan[self.playerpanel.select_dialog.targetlan] source=self.playerpanel.mediapath # Set target name if not target: target = source[:source.rfind('.')] + '.srt' self.subtitle=target self.currentfile=None self.dec = fd.ffmpeg_decoder(source,output_rate = 8000) self.vad = naive_vad(self.dec.ostream.get_handle()) self.sub = sg.sub_generator(self.vad.ostream.get_handle(), source, target, lang_from = lang_from, lang_to = lang_to) self.ohandle = self.sub.ostream.get_handle() self.specd = sp.spectrum(self.dec.ostream.get_handle(), window_size = 1024) handle = self.specd.ostream.get_handle() #self.Spec.OpenData(self.Spec,self.ohandle) self.dec.start() self.vad.start() self.sub.start() self.specd.start() self.spec.OpenData(self.spec,handle)
def OnOpen(self, evt): #self.playerpanel.OnOpen(None) self.playerpanel.OnStop(None) # Create a file dialog opened in the current home directory, where # you can display all kind of files, having as title "Choose a file". dlg = wx.FileDialog(self, "Choose a file", user.home, "", "*.*", wx.OPEN) if dlg.ShowModal() == wx.ID_OK: dirname = dlg.GetDirectory() filename = dlg.GetFilename() # Creation self.playerpanel.mediapath = unicode( os.path.join(dirname, filename)) self.playerpanel.Media = self.playerpanel.Instance.media_new( self.playerpanel.mediapath) #m=self.Instance.media_new(r'D:\shiyan\number3\New folder\1.rmvb') self.playerpanel.player.set_media(self.playerpanel.Media) # Report the title of the file chosen title = self.playerpanel.player.get_title() # if an error was encountred while retriving the title, then use # filename if title == -1: title = filename #self.SetTitle("%s - AutoSub" % title) # set the window id where to render VLC's video output self.playerpanel.player.set_hwnd( self.playerpanel.videopanel.GetHandle()) # set the volume slider to the current volume self.playerpanel.volslider.SetValue( self.playerpanel.player.audio_get_volume() / 2) self.playerpanel.title = title # finally destroy the dialog dlg.Destroy() # create the new dialog to choose the recognization and translation self.playerpanel.select_dialog = SelectDialog(None, "Choice") self.playerpanel.select_dialog.ShowModal() # Finally Play~FIXME: this should be made cross-platform self.OnPlay(None) #self.Spec.GetAddr(self.Spec,self.mediapath) else: dlg.Destroy() time.sleep(0.5) leng = self.playerpanel.player.get_length() self.spec.GetLength(self.spec, length=leng) self.playerpanel.timer.Start(100) #self.bitmap.Hide() self.SetTitle("%s - AutoSub" % self.playerpanel.title) lan = {"English": "en", "Chinese": "zh-cn", "Japanese": "ja"} lang_from = None lang_to = None source = None target = None if self.playerpanel.select_dialog.isrecognize == True: # Set recognize parameter lang_from = lan[self.playerpanel.select_dialog.sorcelan] if self.playerpanel.select_dialog.istranslate == True: # Set translation parameter lang_to = lan[self.playerpanel.select_dialog.targetlan] source = self.playerpanel.mediapath # Set target name if not target: target = source[:source.rfind('.')] + '.srt' self.subtitle = target self.currentfile = None self.dec = fd.ffmpeg_decoder(source, output_rate=8000) self.vad = naive_vad(self.dec.ostream.get_handle()) self.sub = sg.sub_generator(self.vad.ostream.get_handle(), source, target, lang_from=lang_from, lang_to=lang_to) self.ohandle = self.sub.ostream.get_handle() self.specd = sp.spectrum(self.dec.ostream.get_handle(), window_size=1024) handle = self.specd.ostream.get_handle() #self.Spec.OpenData(self.Spec,self.ohandle) self.dec.start() self.vad.start() self.sub.start() self.specd.start() self.spec.OpenData(self.spec, handle)
if line[3] != 'Default' or line[9].startswith('{\\an8'): intervels.append( (parse_time(line[1]), parse_time(line[2]), False) ) else: intervels.append( (parse_time(line[1]), parse_time(line[2]), True) ) intervels.sort(cmp=lambda x,y: cmp(x[0], y[0])) i = 0 while i < len(intervels)-1: if intervels[i][1] > intervels[i+1][0]: intervels[i] = (intervels[i][0], intervels[i+1][1], intervels[i][2] and intervels[i+1][2]) del intervels[i+1] else: i = i + 1 dec = ffmpeg_decoder(mov_name, SAMPLE_RATE) spec = spectrum(dec.ostream.get_handle(), squared = False) feat = feature_extractor(spec.ostream.get_handle(), intervels, speech, noise) dec.start() spec.start() feat.start() feat.join() random.shuffle(speech) random.shuffle(noise) for f in speech: plt.imshow(np.log(abs(f)**2).reshape((5, 128))) plt.show()