print "Starting preprocessing..." unigram_f, freqBand = unigram_frequencies(preprocessor, WORK, MAX_F, N_DOCS) while curr > 1: print "ngrams of n=%d\nngrams of n=%d: parsing...\nngrams of n=%d: starting parser..." % (curr, curr, curr) parser = Parser(curr, MIN_F, unigram_f, freqBand) print "ngrams of n=%d: started!\nngrams of n=%d: parsing input from stream..." % (curr, curr) streamer = Streamer(WORK, n=N_DOCS) for line in streamer: if WORK == CORPUS: parser(preprocessor(line), flush_at=FLUSHING_RATIO) else: parser(line, flush_at=FLUSHING_RATIO) print "ngrams of n=%d: finished parsing." % curr print "ngrams of n=%d: rewriting corpus with multiwords..." % curr parser.rewrite() print "ngrams of n=%d: storing temporary snapshot of the data for feedback..." % curr WORK = persist(TMP, parser) print "ngrams of n=%d: done!" % curr curr -= 1 print "Applying annotation on original corpus..." restore(CORPUS, TMP, OUT) print "Applying linguistic smoothing around high frequency tokens..." smooth(OUT, TMP, CONFIDENCE) print "Done!\nCleaning up..." clean_up(TMP) print "Done!\nComplete."
def main(): os.chdir(sys.argv[1]) try: os.mkdir(OUTPUT_DIR_NAME) except OSError: pass tree = et.parse("project.xml") movie = tree.getroot() fps = float(movie.attrib["fps"]) frames = float(movie.attrib["frames"]) os.chdir(OUTPUT_DIR_NAME) s = frames / fps m = s / 60.0 print(s, "seconds") h = s / float(60 * 60) print("%.2f hours" % h) percent = h / 2.0 print("%.2f %%" % percent) print("%d:%02d" % (math.floor(h), (h - math.floor(h)) * 60)) # ===== DURATION ================================================================================================== plt.axis(ymin=0, ymax=10, xmin=0, xmax=3 * 60) plt.xlabel("%d mins, %.2f %% of 2 hours" % (s / 60, percent)) lw = 20 plt.plot([0, 2 * 60], [1, 1], "k-", linewidth=lw, solid_capstyle="butt") plt.plot([0, h * 60], [2, 2], "b-", linewidth=lw, solid_capstyle="butt") r = 200 / 2 r2 = math.sqrt(percent * r * r) plt.plot([100], [6], "o", markeredgewidth=0, markersize=2 * r2, markerfacecolor="b") plt.plot([100], [6], "o", markeredgewidth=1, markersize=2 * r, markerfacecolor="none") plt.axis("off") plt.show() #plt.savefig(os.path.join(OUTPUT_DIR_NAME, "duration.ps")) # ===== SHOTS ================================================================================================== f = open(os.path.join("..", "shots.txt"), "r") values = [[int(values[0]), int(values[1]), int(values[2])] for values in [line.split("\t") for line in f if line]] f.close() fig = plt.figure() ax = fig.add_subplot(111) plt.ylim(ymin=5, ymax=15.5) #ax.set_yscale("log") for i, item in enumerate(values): #print item frame_start, frame_end, length = item y = 10 if i % 2 == 0: color = (0, 0, 0) else: color = (0.5, 0.5, 0.5) y = 10.5 #ax.hlines(length/100.0, frame_start, frame_end, color=color, lw=100) ax.hlines(y, frame_start, frame_end, color=color, lw=30) ax.axis("off") plt.show() # ===== TRENDLINES ================================================================================================== f = open(os.path.join("..", "motion_shot-avg.txt"), "r") values = [[float(values[0]), int(values[1])] for values in [line.split("\t") for line in f if line]] f.close() motions, durations = ([a for a, b in values], [b for a, b in values]) durations_sec = [float(d / fps) for d in durations] print len(durations), "shots" print "%.1f cuts per minute" % (len(durations) / m) print "min:", min(durations_sec), "s" print "max:", max(durations_sec), "s" print "range:", max(durations_sec) - min(durations_sec), "s" print "asl:", numpy.mean(durations_sec), "s" print "std:", numpy.std(durations_sec), "s" print "var:", numpy.var(durations_sec), "s" file = open(os.path.join("..", "subtitles.txt")) s = file.read() file.close() word_count = len(s.split()) words_per_minute = word_count / m print words_per_minute, "words / minute" WINDOW_LEN = 20 TREND_DEGREE = 1 # polynom 1ten grades data = numpy.array(WINDOW_LEN * [durations_sec[0]] + durations_sec + WINDOW_LEN * [0]) trend_duration = numpy.polyfit(range(len(data)), data, TREND_DEGREE) trend_duration = numpy.poly1d(trend_duration) smooth_data = smooth(data, window_len=WINDOW_LEN, window='hanning') plt.axis(ymin=0, ymax=60.0, xmin=0, xmax=len(durations_sec) - 1) plt.plot(smooth_data[WINDOW_LEN:-WINDOW_LEN], "r-", label="shot length (in seconds)") plt.plot(trend_duration(numpy.arange(len(data))), "m-", label="shot length trend") plt.legend(loc="upper left") data = numpy.array(WINDOW_LEN * [motions[0]] + motions + WINDOW_LEN * [0]) trend_motion = numpy.polyfit(range(len(data)), data, TREND_DEGREE) trend_motion = numpy.poly1d(trend_motion) smooth_data = smooth(data, window_len=WINDOW_LEN, window='hanning') plt.xlabel("shot / %d" % (len(durations))) plt.twinx() plt.axis(ymin=0, ymax=1.0) plt.plot(smooth_data[WINDOW_LEN:-WINDOW_LEN], "b-", label="motion (0..1)") plt.plot(trend_motion(numpy.arange(len(data))), "c-", label="motion trend") plt.legend(loc="upper right") plt.show() # ===== TEST ================================================================================================== if False: smooth_duration = 0.5 * smooth_data / numpy.max(smooth_data) smooth_deriv = 100 * smooth( numpy.diff(smooth_data), window_len=10 * WINDOW_LEN)[WINDOW_LEN:-WINDOW_LEN] smooth_motion = smooth_data[WINDOW_LEN:-WINDOW_LEN] '''for x, y in enumerate(smooth_deriv): m = smooth_motion[x] if x % 2 == 0: plt.vlines(x, y-m, y+m, lw=m*2) plt.plot(smooth_deriv, "w-", lw=1) mini = min(len(smooth_deriv), len(smooth_motion)) plt.fill_between(range(mini), smooth_deriv[:mini], smooth_deriv[:mini]+smooth_motion[mini], color="y") plt.axis(ymin=-1, ymax=1, xmin=0, xmax=len(durations_sec)-1) plt.show()''' # audio f = open(os.path.join("..", "smooth_audio.txt"), "r") values = [float(line) for line in f if line] f.close() audio_step = float(len(values)) / float(len(smooth_deriv)) audio_counter = 0 fig = plt.figure() ax = fig.add_subplot(111, polar=True) STEP = math.ceil(0.01 * len(smooth_deriv) / float(2 * math.pi)) for x, y in enumerate(smooth_deriv): if x % STEP == 0: x = 2 * math.pi * float(x) / len(smooth_deriv) y += 2 audio_value = 0.75 * values[int(audio_counter * audio_step)] ax.vlines(x, y + 0.01, y + 0.01 + audio_value, lw=audio_value * 2, color="y") audio_counter += 1 for x, y in enumerate(smooth_deriv): m = smooth_motion[x] #d = smooth_duration[x] if x % STEP == 0: x = 2 * math.pi * float(x) / len(smooth_deriv) y += 2 ax.vlines(x, y + 0.01, y + 0.01 + m, lw=m * 2) """audio_value = 0.75 * values[int( audio_counter * audio_step )] ax.vlines(x, y-0.01, y-0.01-audio_value, lw=audio_value*2) audio_counter += 1""" plt.show() # ===== RADAR ================================================================================================== asl = numpy.mean(durations) std = numpy.std(durations) avg_motion = numpy.mean(motions) properties = {} properties["duration"] = percent #properties["average shot length"] = 0.1 * asl / float(fps) properties["cuts / minute"] = (len(durations) / m) / 20.0 properties["average motion"] = avg_motion / 0.25 properties["words / minute"] = words_per_minute / 60.0 properties["average loudness"] = 0.5 angle_step = 360.0 / len(properties) angles = [] for i in range(len(properties)): angles.append(math.radians(i * angle_step)) fig = plt.figure() ax = fig.add_subplot(111, polar=True) ax.set_rmax(5.0) ax.set_xticks( [i * 2 * math.pi / len(properties) for i in range(len(properties))]) ax.set_xticklabels(properties.keys()) ax.plot(angles, properties.values()) for i in range(len(properties)): ax.vlines(angles[i], 0, properties[properties.keys()[i]], lw=15) #ax.axis("off") plt.show() """ # ===== COLOR ================================================================================================== f = open("colors.txt", "r") colors = [[int(values[0]), int(values[1]), int(values[2]), int(values[3])] for values in [line.split(", ") for line in f if line]] f.close() #print colors x = numpy.arange(0, 2*math.pi, 2*math.pi/len(colors)) #print x y = [values[3] for values in [color for color in colors]] #print y total = sum(y) for i, yps in enumerate(y): faktor = float(yps) / total y[i] = math.sqrt(faktor * total*total) fig = plt.figure() ax = fig.add_subplot(111, polar=True) for i, color in enumerate(colors): ax.bar(x[i], y[i], width=0.2*math.pi, edgecolor="none", color=(color[0]/255.0, color[1]/255.0, color[2]/255.0)) ax.axis("off") plt.show() """ #raw_input("- done -") return
def main(): os.chdir(sys.argv[1]) try: os.mkdir(OUTPUT_DIR_NAME) except OSError: pass tree = et.parse("project.xml") movie = tree.getroot() fps = float( movie.attrib["fps"] ) frames = float( movie.attrib["frames"] ) os.chdir(OUTPUT_DIR_NAME) s = frames / fps m = s / 60.0 print s, "seconds" h = s / float(60*60) print "%.2f hours" % h percent = h / 2.0 print "%.2f %%" % percent print "%d:%02d" % (math.floor(h), (h-math.floor(h))*60) # ===== DURATION ================================================================================================== plt.axis(ymin=0, ymax=10, xmin=0, xmax=3*60) plt.xlabel("%d mins, %.2f %% of 2 hours" % (s / 60, percent)) lw = 20 plt.plot([0, 2*60], [1, 1], "k-", linewidth=lw, solid_capstyle="butt") plt.plot([0, h*60], [2, 2], "b-", linewidth=lw, solid_capstyle="butt") r = 200 / 2 r2 = math.sqrt( percent * r*r ) plt.plot([100], [6], "o", markeredgewidth=0, markersize=2*r2, markerfacecolor="b") plt.plot([100], [6], "o", markeredgewidth=1, markersize=2*r, markerfacecolor="none") plt.axis("off") plt.show() #plt.savefig(os.path.join(OUTPUT_DIR_NAME, "duration.ps")) # ===== SHOTS ================================================================================================== f = open("..\\shots.txt", "r") values = [[int(values[0]), int(values[1]), int(values[2])] for values in [line.split("\t") for line in f if line]] f.close() fig = plt.figure() ax = fig.add_subplot(111) plt.ylim(ymin=5, ymax=15.5) #ax.set_yscale("log") for i, item in enumerate(values): #print item frame_start, frame_end, length = item y = 10 if i % 2 == 0: color = (0, 0, 0) else: color = (0.5, 0.5, 0.5) y = 10.5 #ax.hlines(length/100.0, frame_start, frame_end, color=color, lw=100) ax.hlines(y, frame_start, frame_end, color=color, lw=30) ax.axis("off") plt.show() # ===== TRENDLINES ================================================================================================== f = open("..\\motion_shot-avg.txt", "r") values = [[float(values[0]), int(values[1])] for values in [line.split("\t") for line in f if line]] f.close() motions, durations = ([a for a, b in values], [b for a, b in values]) durations_sec = [float(d/fps) for d in durations] print len(durations), "shots" print "%.1f cuts per minute" % (len(durations)/m) print "min:", min(durations_sec), "s" print "max:", max(durations_sec), "s" print "range:", max(durations_sec)-min(durations_sec), "s" print "asl:", numpy.mean(durations_sec), "s" print "std:", numpy.std(durations_sec), "s" print "var:", numpy.var(durations_sec), "s" file = open("..\\subtitles.txt") s = file.read() file.close() word_count = len( s.split() ) words_per_minute = word_count / m print words_per_minute, "words / minute" WINDOW_LEN = 20 TREND_DEGREE = 1 # polynom 1ten grades data = numpy.array(WINDOW_LEN*[durations_sec[0]] + durations_sec + WINDOW_LEN*[0]) trend_duration = numpy.polyfit(range(len(data)), data, TREND_DEGREE) trend_duration = numpy.poly1d(trend_duration) smooth_data = smooth( data, window_len=WINDOW_LEN, window='hanning' ) plt.axis(ymin=0, ymax=60.0, xmin=0, xmax=len(durations_sec)-1) plt.plot(smooth_data[WINDOW_LEN:-WINDOW_LEN], "r-", label="shot length (in seconds)") plt.plot(trend_duration(numpy.arange(len(data))), "m-", label="shot length trend") plt.legend(loc="upper left") data = numpy.array(WINDOW_LEN*[motions[0]] + motions + WINDOW_LEN*[0]) trend_motion = numpy.polyfit(range(len(data)), data, TREND_DEGREE) trend_motion = numpy.poly1d(trend_motion) smooth_data = smooth(data, window_len=WINDOW_LEN, window='hanning') plt.xlabel("shot / %d" % (len(durations))) plt.twinx() plt.axis(ymin=0, ymax=1.0) plt.plot(smooth_data[WINDOW_LEN:-WINDOW_LEN], "b-", label="motion (0..1)") plt.plot(trend_motion(numpy.arange(len(data))), "c-", label="motion trend") plt.legend(loc="upper right") plt.show() # ===== TEST ================================================================================================== if False: smooth_duration = 0.5 * smooth_data / numpy.max(smooth_data) smooth_deriv = 100 * smooth( numpy.diff( smooth_data ), window_len=10*WINDOW_LEN )[WINDOW_LEN:-WINDOW_LEN] smooth_motion = smooth_data[WINDOW_LEN:-WINDOW_LEN] '''for x, y in enumerate(smooth_deriv): m = smooth_motion[x] if x % 2 == 0: plt.vlines(x, y-m, y+m, lw=m*2) plt.plot(smooth_deriv, "w-", lw=1) mini = min(len(smooth_deriv), len(smooth_motion)) plt.fill_between(range(mini), smooth_deriv[:mini], smooth_deriv[:mini]+smooth_motion[mini], color="y") plt.axis(ymin=-1, ymax=1, xmin=0, xmax=len(durations_sec)-1) plt.show()''' # audio f = open("..\\smooth_audio.txt", "r") values = [float(line) for line in f if line] f.close() audio_step = float(len(values)) / float(len(smooth_deriv)) audio_counter = 0 fig = plt.figure() ax = fig.add_subplot(111, polar=True) STEP = math.ceil(0.01* len(smooth_deriv) / float(2*math.pi) ) for x, y in enumerate(smooth_deriv): if x % STEP == 0: x = 2*math.pi * float(x) / len(smooth_deriv) y += 2 audio_value = 0.75 * values[int( audio_counter * audio_step )] ax.vlines(x, y+0.01, y+0.01+audio_value, lw=audio_value*2, color="y") audio_counter += 1 for x, y in enumerate(smooth_deriv): m = smooth_motion[x] #d = smooth_duration[x] if x % STEP == 0: x = 2*math.pi * float(x) / len(smooth_deriv) y += 2 ax.vlines(x, y+0.01, y+0.01+m, lw=m*2) """audio_value = 0.75 * values[int( audio_counter * audio_step )] ax.vlines(x, y-0.01, y-0.01-audio_value, lw=audio_value*2) audio_counter += 1""" plt.show() # ===== RADAR ================================================================================================== asl = numpy.mean(durations) std = numpy.std(durations) avg_motion = numpy.mean(motions) properties = {} properties["duration"] = percent #properties["average shot length"] = 0.1 * asl / float(fps) properties["cuts / minute"] = (len(durations) / m) / 20.0 properties["average motion"] = avg_motion / 0.25 properties["words / minute"] = words_per_minute / 60.0 properties["average loudness"] = 0.5 angle_step = 360.0 / len(properties) angles = [] for i in range(len(properties)): angles.append(math.radians(i*angle_step)) fig = plt.figure() ax = fig.add_subplot(111, polar=True) ax.set_rmax(5.0) ax.set_xticks([i*2*math.pi/len(properties) for i in range(len(properties))]) ax.set_xticklabels(properties.keys()) ax.plot(angles, properties.values()) for i in range(len(properties)): ax.vlines(angles[i], 0, properties[properties.keys()[i]], lw=15) #ax.axis("off") plt.show() """ # ===== COLOR ================================================================================================== f = open("colors.txt", "r") colors = [[int(values[0]), int(values[1]), int(values[2]), int(values[3])] for values in [line.split(", ") for line in f if line]] f.close() #print colors x = numpy.arange(0, 2*math.pi, 2*math.pi/len(colors)) #print x y = [values[3] for values in [color for color in colors]] #print y total = sum(y) for i, yps in enumerate(y): faktor = float(yps) / total y[i] = math.sqrt(faktor * total*total) fig = plt.figure() ax = fig.add_subplot(111, polar=True) for i, color in enumerate(colors): ax.bar(x[i], y[i], width=0.2*math.pi, edgecolor="none", color=(color[0]/255.0, color[1]/255.0, color[2]/255.0)) ax.axis("off") plt.show() """ #raw_input("- done -") return
def main(wavFileName): ######################################################################################################################## #wavFileName = "/Users/toine/Documents/speech_recognition/sound/sample/test.wav" wavFile = wave.open(wavFileName) (nchannels, sampwidth, framerate, nframes, comptype, compname) = wavFile.getparams() frames = wavFile.readframes(-1) npFrames = np.fromstring(frames, "Int16") ######################################################################################################################## ## compute the spectrogram ## make sure FFT size is not too big for good accuracy nFft = 64 nOverlap = 32 fftWindow = nFft - nOverlap specgramFramerate = framerate / (fftWindow) ##TODO: check if this is needed ## pad the input for perfect FFT match ## npFrames = np.r_[npFrames, np.zeros(nFft - nframes % nFft)] ## spectrogram, return (Pxx, freqs, bins, im) # bins are the time points the spectrogram is calculated over # freqs is an array of frequencies # Pxx is an array of shape (len(times), len(freqs)) of power # im is a AxesImage instance (Pxx, freqs, bins, im) = plt.specgram(npFrames, Fs=framerate, NFFT=nFft, noverlap=nOverlap) #plt.show() plt.clf() ######################################################################################################################## ## extract the voice frequencies ## voice frequency range, from 300Hz to 3500Hz # create a mask vector with these frequency taken from B # sum over the voice frequency range, voiceArray is 0's, but 1 when in voice frequency range f300Ind = lib.overflow(freqs, 300) f3500Ind = lib.overflow(freqs, 3500) voiceArray = np.zeros(len(freqs)) voiceArray[f300Ind:f3500Ind] = 1 ## dot product of the specgram voiceFreq = np.transpose(np.dot(np.transpose(Pxx), voiceArray)) ######################################################################################################################## ## compute the interesting minimums based on minimums and threshold #TODO: consider using the mlab/numpy function histData = plt.hist(voiceFreq, bins=100, range=(min(voiceFreq), np.mean(voiceFreq))) #plt.show() plt.clf() overflowPercent = 0.7 overflowIndex = lib.overflow_hist(histData[0], overflowPercent) overflowValue = histData[1][overflowIndex] ## smooth the curve to find the minimums voiceFreqSmooth = lib.smooth(voiceFreq, 128) minimums = np.r_[True, voiceFreqSmooth[1:] < voiceFreqSmooth[:-1]] & \ np.r_[voiceFreqSmooth[:-1] < voiceFreqSmooth[1:], True] ##TODO: change name ## create the array of cutting points, points are local minimums under the histogram threshold cutPoints = np.where(minimums & (voiceFreqSmooth < overflowValue))[0] ######################################################################################################################## ## filter the minimums by roughly selecting one every 5 seconds # on npFrames, 5 sec = framerate * 5 # on voiceFreq, framerate -> framerate/32 avgSec = 3 cutPointsNSec = [0] for pt in cutPoints: pt *= fftWindow # convert cutPointsThres to npFrames framerate by multiplying with fftWindow if (pt - cutPointsNSec[-1]) > (framerate * avgSec): # subtract the last value cutPointsNSec.append(pt) ######################################################################################################################## ## create the cuts as additional files cutPointsNSecInSec = [(x / framerate) for x in cutPointsNSec] timestamp = [] timestampNFrames = [] for item1, item2 in lib.pairwise(cutPointsNSec, fillvalue=0): timestamp.append((item1, item2)) timestampNFrames.append(item2 - item1) # geenrate the extension to the filename, e.g. filename.X_Y.wav for a cut from seconds X to Y addExtension = [] timestampInSec = [] for item1, item2 in lib.pairwise(cutPointsNSecInSec, fillvalue="end"): tmp = str(item1) + "_" + str(item2) timestampInSec.append((item1, item2)) addExtension.append(tmp) logger = logging.getLogger(__name__) logger.debug("%s %s %s", timestamp, timestampNFrames, addExtension) logger.debug("%s %s %s", len(timestamp), len(timestampNFrames), len(addExtension)) ## test on 1 file first #for (cutExt, cutTime, cutFrame) in zip(timestamp, timestampNFrames, addExtension): totalRes = [] TESTINDEX = 6 #TODO: take care of the last index, when cutPointNSecInSec is "end" for TESTINDEX in range(len(timestamp)-1): #TODO: make a lib function out of that splitName = path.basename(wavFileName).split(".") filename = path.dirname(wavFileName) + "/" + splitName[0] + "." + addExtension[TESTINDEX] + "." + splitName[1] wavChunk = wave.open(filename, "w") wavChunk.setparams((nchannels, sampwidth, framerate, timestampNFrames[TESTINDEX], comptype, compname)) wavChunk.writeframes(npFrames[timestamp[TESTINDEX][0]:timestamp[TESTINDEX][1]].tostring()) wavChunk.close() pygsr = Pygsr(filename) pygsr.convert() res = pygsr.speech_to_text("en", indx=TESTINDEX) totalRes.append(res) logger.debug("%s %s %s", TESTINDEX, addExtension[TESTINDEX], timestamp[TESTINDEX]) h1 = str(datetime.timedelta(seconds=timestampInSec[TESTINDEX][0]))+",200" h2 = str(datetime.timedelta(seconds=timestampInSec[TESTINDEX][1]-1))+",800" logger.info("%s", TESTINDEX) logger.info("%s --> %s", h1, h2) logger.info("%s", res) logger.info("") #logger.debug("this should not appear in the srt file") logger.debug("%s", totalRes) return 1
def main(): os.chdir(sys.argv[1]) f_out = open("smooth_audio.txt", "w") tree = et.parse("project.xml") movie = tree.getroot() path = movie.attrib["path"] path = os.path.dirname(path) fps = float( movie.attrib["fps"] ) # os.chdir(path) file = os.path.join(path, "audio_trimmed.wav") print(file) f = wave.open(file, "rb") bit = f.getsampwidth() * 8 print bit, "bit" # usually: signed 16 bit [-32768, 32767] f.close() rate, data = scipy.io.wavfile.read(file) print rate, "hz" # http://en.wikipedia.org/wiki/Sound_level_meter#Exponentially_averaging_sound_level_meter chunk = rate / 8 #25 #print max(data) #print min(data) max = numpy.max( numpy.absolute(data) ) """fft = numpy.fft.rfft(data, chunk) fft = numpy.absolute(fft) print fft plt.plot(fft) plt.show()""" data_db = numpy.array([]) data_rms = numpy.array([]) for i in range(len(data) / chunk): values = numpy.array( data[i*chunk : (i+1)*chunk] ) # normalize [0, 1] #values = values / 2**(bit-1) values = values / float(max) #values = values * float(1) # why do I need that? # root mean square values = numpy.power(values, 2) rms = numpy.sqrt( numpy.mean(values) ) data_rms = numpy.append(data_rms, rms) # decibel db = 20 * numpy.log10( (1e-20+rms) ) #/ float(max) data_db = numpy.append(data_db, db) #plt.ylim(-60, 0) #plt.plot( smooth(data_rms/numpy.max(data_rms), window_len=rate/(fps*2)), "k-" ) #plt.plot(smooth(data_db, window_len=rate/fps), "g-") smooth_db = 1 + smooth(data_db, window_len=rate/(fps*3)) / (60.0) # [0..1] plt.ylim(0, 1) plt.plot(smooth_db, "g-") for item in smooth_db: if item < 0: item = 0 f_out.write("%f\n" % float(item)) f_out.close() #plt.plot(data_db) plt.show()
def main(): os.chdir(sys.argv[1]) f_out = open("smooth_audio.txt", "w") tree = et.parse("project.xml") movie = tree.getroot() path = movie.attrib["path"] path = os.path.dirname(path) fps = float(movie.attrib["fps"]) # os.chdir(path) file = os.path.join(path, "audio_trimmed.wav") print(file) f = wave.open(file, "rb") bit = f.getsampwidth() * 8 print bit, "bit" # usually: signed 16 bit [-32768, 32767] f.close() rate, data = scipy.io.wavfile.read(file) print rate, "hz" # http://en.wikipedia.org/wiki/Sound_level_meter#Exponentially_averaging_sound_level_meter chunk = rate / 8 #25 #print max(data) #print min(data) max = numpy.max(numpy.absolute(data)) """fft = numpy.fft.rfft(data, chunk) fft = numpy.absolute(fft) print fft plt.plot(fft) plt.show()""" data_db = numpy.array([]) data_rms = numpy.array([]) for i in range(len(data) / chunk): values = numpy.array(data[i * chunk:(i + 1) * chunk]) # normalize [0, 1] #values = values / 2**(bit-1) values = values / float(max) #values = values * float(1) # why do I need that? # root mean square values = numpy.power(values, 2) rms = numpy.sqrt(numpy.mean(values)) data_rms = numpy.append(data_rms, rms) # decibel db = 20 * numpy.log10((1e-20 + rms)) #/ float(max) data_db = numpy.append(data_db, db) #plt.ylim(-60, 0) #plt.plot( smooth(data_rms/numpy.max(data_rms), window_len=rate/(fps*2)), "k-" ) #plt.plot(smooth(data_db, window_len=rate/fps), "g-") smooth_db = 1 + smooth(data_db, window_len=rate / (fps * 3)) / (60.0) # [0..1] plt.ylim(0, 1) plt.plot(smooth_db, "g-") for item in smooth_db: if item < 0: item = 0 f_out.write("%f\n" % float(item)) f_out.close() #plt.plot(data_db) plt.show()