class Window(QtGui.QDialog): def __init__(self, parent=None): super(Window, self).__init__(parent) self.setWindowTitle("Multi-mp4-Sync") w = 1280; h = 720 self.resize(w, h) self.setAcceptDrops(True) self.menuBar = QtGui.QMenuBar(self) self.menuBar.setNativeMenuBar(False) menuFile = self.menuBar.addMenu('File') actOpen = QtGui.QAction('Open', self) actOpen.setShortcut("Ctrl+O") actOpen.triggered.connect(self.openFiles) menuFile.addAction(actOpen) actExit = QtGui.QAction('Exit', self) actExit.setShortcut("Ctrl+Q") actExit.triggered.connect(exit) menuFile.addAction(actExit) self.figure = Figure() self.canvas = FigureCanvas(self.figure) self.toolbar = NavigationToolbar(self.canvas, self) self.btnSync = QtGui.QPushButton('Sync') self.btnSync.clicked.connect(self.sync) self.btnSync.setFixedWidth(100) self.btnFuse = QtGui.QPushButton('Fuse') self.btnFuse.clicked.connect(self.fuse) self.btnFuse.setFixedWidth(100) self.btnClick = QtGui.QPushButton('Click') self.btnClick.clicked.connect(self.click) self.btnClick.setFixedWidth(100) self.btnGenerate = QtGui.QPushButton('Generate') self.btnGenerate.clicked.connect(self.generate) self.btnGenerate.setFixedWidth(100) self.cbBlank = QtGui.QCheckBox("Insert Blank") self.lbMinutes = QtGui.QLabel("Expected Maximum gap(min) :") self.lbMinutes.setFixedWidth(100) self.tbMinutes = QtGui.QLineEdit("") self.tbMinutes.setFixedWidth(100) self.tbMinutes.setText('2') layoutControl = QtGui.QGridLayout() layoutControl.addWidget(self.btnSync,0,0,1,1) layoutControl.addWidget(self.btnFuse,1,0,1,1) layoutControl.addWidget(self.btnClick,2,0,1,1) layoutControl.addWidget(self.btnGenerate,3,0,1,1) layoutControl.addWidget(self.cbBlank,0,1,1,1) layoutControl.addWidget(self.lbMinutes,1,1,1,1) layoutControl.addWidget(self.tbMinutes,2,1,1,1) self.edt = QtGui.QPlainTextEdit() self.edt.setDisabled(True) self.edt.setMaximumBlockCount(10) self.listFile = QtGui.QListWidget() self.listFile.installEventFilter(self) self.listFile.setFixedWidth(100) layout = QtGui.QGridLayout() layout.addWidget(self.menuBar,0,0,1,3) layout.addWidget(self.toolbar,1,0,1,3) layout.addWidget(self.canvas,2,0,1,3) layout.addLayout(layoutControl,3,0,1,1) layout.addWidget(self.listFile,3,1,1,1) layout.addWidget(self.edt,3,2,1,1) self.setLayout(layout) self.lsMp4 = [] self.dictWav = {} self.bClick = False self.lsSplitPosition = [] self.ax = self.figure.add_subplot(111) def eventFilter(self, obj, event): if event.type() == QEvent.KeyPress and obj == self.listFile: if event.key() == Qt.Key_Delete: listItems=self.listFile.selectedItems() if not listItems: return for item in listItems: self.listFile.takeItem(self.listFile.row(item)) for mp4 in self.lsMp4: if mp4['name'] == item.text(): self.lsMp4.remove(mp4) break self.plot() return super(Window, self).eventFilter(obj, event) else: return super(Window, self).eventFilter(obj, event) def dragEnterEvent(self, event): if event.mimeData().hasUrls(): event.accept() else: event.ignore() def dropEvent(self, event): lsUrl = [unicode(u.toLocalFile()) for u in event.mimeData().urls()] for url in lsUrl: _, ext = os.path.splitext(url) if ext.lower() == ".txt": txt = np.genfromtxt(url,dtype='str') for url_txt, t0 in txt: mp4 = self.loadMp4(url_txt, t0=float(t0)) if mp4: self.lsMp4.append(mp4) item = QtGui.QListWidgetItem(mp4['name']) self.listFile.addItem(item) elif ext.lower() == ".mp4": mp4 = self.loadMp4(url) if mp4: self.lsMp4.append(mp4) item = QtGui.QListWidgetItem(mp4['name']) self.listFile.addItem(item) self.keyPlot = 'wav' self.plot() def openFiles(self): dlg = QtGui.QFileDialog() dlg.setFileMode(QtGui.QFileDialog.ExistingFiles) dlg.setDirectory(os.getcwd()) dlg.setFilter("Text files (*.mp4)") if dlg.exec_(): lsUrl = dlg.selectedFiles() for url in lsUrl: mp4 = self.loadMp4(str(url)) if mp4: self.lsMp4.append(mp4) item = QtGui.QListWidgetItem(mp4['name']) self.listFile .addItem(item) self.keyPlot = 'wav' self.plot() def loadMp4(self, url, sec_cut = 3000, t0 = 0): if url in [mp4['mp4-file'] for mp4 in self.lsMp4]: return strBase = os.path.basename(url) strFilename, strExtension = os.path.splitext(strBase) if strExtension.lower() != ".mp4": return strFileWav = os.path.join("wav", strFilename + ".wav") command = "ffmpeg -n -i " + url + " -ac 1 -vn "+ strFileWav if not os.path.exists(strFileWav): subprocess.call(command, shell=True) if os.path.isfile(strFileWav): wavfile = wave.open(strFileWav,'r') numCh = wavfile.getnchannels() fr = float(wavfile.getframerate()) wav = np.fromstring( wavfile.readframes(-1) , 'Int16' ) t_end = wav.size / (numCh * fr) pad = int(t0 * (numCh * fr)) wav = np.pad(wav, (pad, 0), 'constant') wav = wav[:int(numCh * sec_cut * fr)].reshape(-1, numCh).mean(1) sigWav = MySignal(x=wav, f = fr) mp4 = {'mp4-file':url, 'wav-file':strFileWav, 'wav':sigWav, 'name':strFilename, 'time-end' : t_end, 'time-shift' : t0} print url, "loaded" return mp4 def plot(self): key = self.keyPlot if key == None: return self.ax.clear() lsLegend = [] for mp4 in self.lsMp4: step = 1000 legend, = self.ax.plot(mp4[key].getTimeAxis()[::step], mp4[key].x[::step], label=mp4['name']) lsLegend.append(legend) self.ax.legend(handles=lsLegend) self.ax.set_xlabel('t(sec)') self.canvas.draw() def sync(self): self.getTimeShift() self.keyPlot = 'wav' self.plot() self.edt.appendPlainText("Sync Done") sys.stdout.write('\a') sys.stdout.flush() def getTimeShift(self): nChunkSize = int(float(self.tbMinutes.text()) * 4 * 60 * 48000) ls2pow = [2**x for x in range(50)] nChunkSize = ls2pow[bisect.bisect(ls2pow, nChunkSize)] # find base signal - longest one lsT = [mp4['wav'].getTEnd() for mp4 in self.lsMp4] tMax = max(lsT) idxBase = lsT.index(tMax) signalBase = self.lsMp4[idxBase]['wav'] # print signalBase.f # 48000.0 wavBase = signalBase.x tBase = signalBase.getTimeAxis() nBase = signalBase.getLength() numChunk = np.ceil(float(nBase)/nChunkSize).astype(int) numMp4 = len(self.lsMp4) npCorrProd = np.ones((numMp4, nChunkSize)) for i in range(numChunk): print 'Chunk %d / %d'%(i,numChunk) idxS = i * nChunkSize idxE = min(idxS + nChunkSize, nBase) wavBaseChunk = wavBase[idxS:idxE] tBaseChunk = tBase[idxS:idxE] # FFT squared base signal # square is better for highlighting peaks wavBaseChunk = np.pad(wavBaseChunk, (0, nChunkSize-wavBaseChunk.size), 'constant') print 'FFT base', wavBaseChunk.size fftBaseChunk = scipy.fft(wavBaseChunk * wavBaseChunk * wavBaseChunk * wavBaseChunk) for j in range(numMp4): mp4 = self.lsMp4[j] # FFT squared signal # square is for highlighting peaks wav = np.interp(tBaseChunk, mp4['wav'].getTimeAxis(), mp4['wav'].x, left=0, right=0) wav = np.pad(wav, (0, nChunkSize-wav.size), 'constant') print 'FFT ' + mp4['name'], wav.size fftWav = scipy.fft(wav * wav * wav * wav) # get correlation function based on FFT (conjugate of convolution) corr = np.abs(scipy.ifft(fftBaseChunk * scipy.conj(fftWav))) # add offset to reduce effects from zero corr npCorrProd[j] = npCorrProd[j] * (corr/(corr.max()+1) + 1.0) # npCorrProd = npCorr.prod(axis=0) idxPeak = np.argmax(npCorrProd, axis=1) idxPeak[np.where(idxPeak > nChunkSize/2)] = idxPeak[np.where(idxPeak > nChunkSize/2)] - nChunkSize # allign minimum shifts to zero sampleShift = idxPeak - min(idxPeak) f = open(os.path.join("result","sync.txt"), "a") for j in range(numMp4): self.lsMp4[j]['wav'].shiftSample(sampleShift[j]) self.lsMp4[j]['time-end'] += float(sampleShift[j])/self.lsMp4[j]['wav'].f self.lsMp4[j]['time-shift'] += self.lsMp4[j]['wav'].t0 f.write(self.lsMp4[j]['mp4-file'] + ' ' + str(self.lsMp4[j]['time-shift']) + '\n') f.close() print 'Done' def fuse(self): fBase = 48000.0 # find base signal - longest one # lsT = [mp4['wav'].getTEnd() for mp4 in self.lsMp4] lsT = [mp4['time-end'] for mp4 in self.lsMp4] TMax = max(lsT) tBase = np.arange(0,TMax, 1.0/fBase) numMp4 = len(self.lsMp4) wavMean = np.zeros(tBase.size).astype(int) for mp4 in self.lsMp4: del mp4['wav'] for mp4 in self.lsMp4: print mp4['wav-file'] wavfile = wave.open(mp4['wav-file'],'r') numCh = wavfile.getnchannels() wav = np.fromstring( wavfile.readframes(-1) , 'Int16' ).reshape(-1, numCh).mean(1) fr = float(wavfile.getframerate()) sigWav = MySignal(x=wav, f = fr, t0 = mp4['time-shift']) wav = np.interp(tBase, sigWav.getTimeAxis(), sigWav.x, left=0, right=0) wavMean = wavMean + wav del wav wavMean = (wavMean / numMp4).astype(int) self.dictWav['fuse'] = MySignal(x=wavMean, f = fBase) self.ax.clear() self.ax.plot(tBase[::100], wavMean[::100], label='mean') self.ax.set_xlabel('t(sec)') self.canvas.draw() self.bClick = True self.edt.appendPlainText("Fuse Done") sys.stdout.write('\a') sys.stdout.flush() def click(self): if self.bClick: X, play = self.getClickedPoint() self.playSound(play, f = self.dictWav['fuse'].f) if QtGui.QMessageBox.question(self,'', "Is it the cutting point?", QtGui.QMessageBox.Yes | QtGui.QMessageBox.No) == QtGui.QMessageBox.Yes: rect = patches.Rectangle((X[0]-1.5,-40000),3,80000, facecolor='r', ec='none', zorder=10) self.ax.add_patch(rect) self.ax.plot(X[0],X[1],'go') self.canvas.draw() self.lsSplitPosition.append(X[0]) self.lsSplitPosition.sort() self.edt.appendPlainText(" ".join(str(x) for x in self.lsSplitPosition)) np.savetxt(os.path.join("result","click.txt"), np.array(self.lsSplitPosition), fmt='%f') def getClickedPoint(self): self.ax.set_xlim(self.ax.get_xlim()) self.ax.set_ylim(self.ax.get_ylim()) self.edt.appendPlainText("Click point") X_clicked = self.figure.ginput(1)[0] self.edt.appendPlainText(str(X_clicked)) x_plotted = self.dictWav['fuse'].x t_plotted = self.dictWav['fuse'].getTimeAxis() len_plotted = self.dictWav['fuse'].getLength() xmin, xmax = self.ax.get_xlim() ymin, ymax = self.ax.get_ylim() sx, sy = self.figure.get_size_inches() npScale = np.array([float(sx)/(xmax - xmin), float(sy)/(ymax - ymin)]).reshape(2,1) x_range = int(len_plotted/10) idxX = int(X_clicked[0] / t_plotted[-1] * len_plotted) idxFrom = max(idxX-x_range, 0) idxTo = min(idxX+x_range, len_plotted-1) subset = np.vstack([t_plotted[idxFrom:idxTo], x_plotted[idxFrom:idxTo]]) npX = np.array(X_clicked).reshape(2,1) diff = npScale * (subset - npX) dist = diff[0]*diff[0] + diff[1]*diff[1] idxMin = np.argmin(dist) X = subset[:,idxMin] f_plotted = self.dictWav['fuse'].f play = subset[1,int(idxMin - 1.5 * f_plotted):int(idxMin + 1.5 * f_plotted)] return X, play def playSound(self, play, f): p = pyaudio.PyAudio() stream = p.open(format = p.get_format_from_width(2), channels = 1, rate = int(f), output = True) nChunkSize = 1024 numChunk = np.ceil(float(play.size)/nChunkSize).astype(int) for i in range(numChunk): idxS = i * nChunkSize idxE = min(idxS + nChunkSize, play.size) wavChunk = play[idxS:idxE] data = struct.pack("%dh"%(len(wavChunk)), *list(wavChunk)) stream.write(data) def generate(self): for mp4 in self.lsMp4: cap = cv2.VideoCapture(mp4['mp4-file']) fps = cap.get(cv2.CAP_PROP_FPS) strFilename, _ = os.path.splitext(os.path.basename(mp4['mp4-file'])) cap.release() ls_nFrameEnd = [int(fps * t) - int(fps * mp4['time-shift']) for t in self.lsSplitPosition] np.savetxt(os.path.join("result", strFilename + "_frames.txt"), np.array(ls_nFrameEnd), fmt='%d') self.edt.appendPlainText("Done") sys.stdout.write('\a') sys.stdout.flush()