コード例 #1
0
    print "Starting preprocessing..."
    unigram_f, freqBand = unigram_frequencies(preprocessor, WORK, MAX_F, N_DOCS)

    while curr > 1:

        print "ngrams of n=%d\nngrams of n=%d: parsing...\nngrams of n=%d: starting parser..." % (curr, curr, curr)
        parser = Parser(curr, MIN_F, unigram_f, freqBand)
        print "ngrams of n=%d: started!\nngrams of n=%d: parsing input from stream..." % (curr, curr)
        streamer = Streamer(WORK, n=N_DOCS)
        for line in streamer:
            if WORK == CORPUS:
                parser(preprocessor(line), flush_at=FLUSHING_RATIO)
            else:
                parser(line, flush_at=FLUSHING_RATIO)
        print "ngrams of n=%d: finished parsing." % curr
        print "ngrams of n=%d: rewriting corpus with multiwords..." % curr
        parser.rewrite()

        print "ngrams of n=%d: storing temporary snapshot of the data for feedback..." % curr
        WORK = persist(TMP, parser)
        print "ngrams of n=%d: done!" % curr
        curr -= 1

    print "Applying annotation on original corpus..."
    restore(CORPUS, TMP, OUT)
    print "Applying linguistic smoothing around high frequency tokens..."
    smooth(OUT, TMP, CONFIDENCE)
    print "Done!\nCleaning up..."
    clean_up(TMP)
    print "Done!\nComplete."
コード例 #2
0
def main():
    os.chdir(sys.argv[1])
    try:
        os.mkdir(OUTPUT_DIR_NAME)
    except OSError:
        pass

    tree = et.parse("project.xml")

    movie = tree.getroot()
    fps = float(movie.attrib["fps"])
    frames = float(movie.attrib["frames"])

    os.chdir(OUTPUT_DIR_NAME)

    s = frames / fps
    m = s / 60.0
    print(s, "seconds")
    h = s / float(60 * 60)
    print("%.2f hours" % h)
    percent = h / 2.0
    print("%.2f %%" % percent)
    print("%d:%02d" % (math.floor(h), (h - math.floor(h)) * 60))

    # ===== DURATION ==================================================================================================
    plt.axis(ymin=0, ymax=10, xmin=0, xmax=3 * 60)
    plt.xlabel("%d mins, %.2f %% of 2 hours" % (s / 60, percent))

    lw = 20
    plt.plot([0, 2 * 60], [1, 1], "k-", linewidth=lw, solid_capstyle="butt")
    plt.plot([0, h * 60], [2, 2], "b-", linewidth=lw, solid_capstyle="butt")

    r = 200 / 2
    r2 = math.sqrt(percent * r * r)
    plt.plot([100], [6],
             "o",
             markeredgewidth=0,
             markersize=2 * r2,
             markerfacecolor="b")
    plt.plot([100], [6],
             "o",
             markeredgewidth=1,
             markersize=2 * r,
             markerfacecolor="none")

    plt.axis("off")
    plt.show()
    #plt.savefig(os.path.join(OUTPUT_DIR_NAME, "duration.ps"))

    # ===== SHOTS ==================================================================================================
    f = open(os.path.join("..", "shots.txt"), "r")
    values = [[int(values[0]), int(values[1]),
               int(values[2])]
              for values in [line.split("\t") for line in f if line]]
    f.close()

    fig = plt.figure()
    ax = fig.add_subplot(111)
    plt.ylim(ymin=5, ymax=15.5)
    #ax.set_yscale("log")
    for i, item in enumerate(values):
        #print item
        frame_start, frame_end, length = item
        y = 10
        if i % 2 == 0:
            color = (0, 0, 0)
        else:
            color = (0.5, 0.5, 0.5)
            y = 10.5
        #ax.hlines(length/100.0, frame_start, frame_end, color=color, lw=100)
        ax.hlines(y, frame_start, frame_end, color=color, lw=30)

    ax.axis("off")
    plt.show()

    # ===== TRENDLINES ==================================================================================================
    f = open(os.path.join("..", "motion_shot-avg.txt"), "r")
    values = [[float(values[0]), int(values[1])]
              for values in [line.split("\t") for line in f if line]]
    f.close()

    motions, durations = ([a for a, b in values], [b for a, b in values])
    durations_sec = [float(d / fps) for d in durations]

    print len(durations), "shots"
    print "%.1f cuts per minute" % (len(durations) / m)
    print "min:", min(durations_sec), "s"
    print "max:", max(durations_sec), "s"
    print "range:", max(durations_sec) - min(durations_sec), "s"
    print "asl:", numpy.mean(durations_sec), "s"
    print "std:", numpy.std(durations_sec), "s"
    print "var:", numpy.var(durations_sec), "s"

    file = open(os.path.join("..", "subtitles.txt"))
    s = file.read()
    file.close()
    word_count = len(s.split())
    words_per_minute = word_count / m
    print words_per_minute, "words / minute"

    WINDOW_LEN = 20
    TREND_DEGREE = 1  # polynom 1ten grades

    data = numpy.array(WINDOW_LEN * [durations_sec[0]] + durations_sec +
                       WINDOW_LEN * [0])
    trend_duration = numpy.polyfit(range(len(data)), data, TREND_DEGREE)
    trend_duration = numpy.poly1d(trend_duration)
    smooth_data = smooth(data, window_len=WINDOW_LEN, window='hanning')
    plt.axis(ymin=0, ymax=60.0, xmin=0, xmax=len(durations_sec) - 1)
    plt.plot(smooth_data[WINDOW_LEN:-WINDOW_LEN],
             "r-",
             label="shot length (in seconds)")
    plt.plot(trend_duration(numpy.arange(len(data))),
             "m-",
             label="shot length trend")
    plt.legend(loc="upper left")

    data = numpy.array(WINDOW_LEN * [motions[0]] + motions + WINDOW_LEN * [0])
    trend_motion = numpy.polyfit(range(len(data)), data, TREND_DEGREE)
    trend_motion = numpy.poly1d(trend_motion)
    smooth_data = smooth(data, window_len=WINDOW_LEN, window='hanning')
    plt.xlabel("shot / %d" % (len(durations)))
    plt.twinx()
    plt.axis(ymin=0, ymax=1.0)
    plt.plot(smooth_data[WINDOW_LEN:-WINDOW_LEN], "b-", label="motion (0..1)")
    plt.plot(trend_motion(numpy.arange(len(data))), "c-", label="motion trend")
    plt.legend(loc="upper right")

    plt.show()

    # ===== TEST ==================================================================================================
    if False:
        smooth_duration = 0.5 * smooth_data / numpy.max(smooth_data)
        smooth_deriv = 100 * smooth(
            numpy.diff(smooth_data),
            window_len=10 * WINDOW_LEN)[WINDOW_LEN:-WINDOW_LEN]
        smooth_motion = smooth_data[WINDOW_LEN:-WINDOW_LEN]
        '''for x, y in enumerate(smooth_deriv):
			m = smooth_motion[x]
			if x % 2 == 0:
				plt.vlines(x, y-m, y+m, lw=m*2)

		plt.plot(smooth_deriv, "w-", lw=1)
		mini = min(len(smooth_deriv), len(smooth_motion))
		plt.fill_between(range(mini), smooth_deriv[:mini], smooth_deriv[:mini]+smooth_motion[mini], color="y")
		plt.axis(ymin=-1, ymax=1, xmin=0, xmax=len(durations_sec)-1)
		plt.show()'''

        # audio
        f = open(os.path.join("..", "smooth_audio.txt"), "r")
        values = [float(line) for line in f if line]
        f.close()
        audio_step = float(len(values)) / float(len(smooth_deriv))
        audio_counter = 0

        fig = plt.figure()
        ax = fig.add_subplot(111, polar=True)

        STEP = math.ceil(0.01 * len(smooth_deriv) / float(2 * math.pi))

        for x, y in enumerate(smooth_deriv):
            if x % STEP == 0:
                x = 2 * math.pi * float(x) / len(smooth_deriv)
                y += 2
                audio_value = 0.75 * values[int(audio_counter * audio_step)]
                ax.vlines(x,
                          y + 0.01,
                          y + 0.01 + audio_value,
                          lw=audio_value * 2,
                          color="y")
                audio_counter += 1

        for x, y in enumerate(smooth_deriv):
            m = smooth_motion[x]
            #d = smooth_duration[x]
            if x % STEP == 0:
                x = 2 * math.pi * float(x) / len(smooth_deriv)
                y += 2
                ax.vlines(x, y + 0.01, y + 0.01 + m, lw=m * 2)
                """audio_value = 0.75 * values[int( audio_counter * audio_step )]
				ax.vlines(x, y-0.01, y-0.01-audio_value, lw=audio_value*2)
				audio_counter += 1"""

        plt.show()

    # ===== RADAR ==================================================================================================
    asl = numpy.mean(durations)
    std = numpy.std(durations)
    avg_motion = numpy.mean(motions)

    properties = {}
    properties["duration"] = percent
    #properties["average shot length"] = 0.1 * asl / float(fps)
    properties["cuts / minute"] = (len(durations) / m) / 20.0
    properties["average motion"] = avg_motion / 0.25
    properties["words / minute"] = words_per_minute / 60.0
    properties["average loudness"] = 0.5

    angle_step = 360.0 / len(properties)
    angles = []
    for i in range(len(properties)):
        angles.append(math.radians(i * angle_step))

    fig = plt.figure()
    ax = fig.add_subplot(111, polar=True)
    ax.set_rmax(5.0)
    ax.set_xticks(
        [i * 2 * math.pi / len(properties) for i in range(len(properties))])
    ax.set_xticklabels(properties.keys())
    ax.plot(angles, properties.values())
    for i in range(len(properties)):
        ax.vlines(angles[i], 0, properties[properties.keys()[i]], lw=15)
    #ax.axis("off")
    plt.show()
    """
	# ===== COLOR ==================================================================================================
	f = open("colors.txt", "r")
	colors = [[int(values[0]), int(values[1]), int(values[2]), int(values[3])] for values in [line.split(", ") for line in f if line]]
	f.close()

	#print colors
	x = numpy.arange(0, 2*math.pi, 2*math.pi/len(colors))
	#print x

	y = [values[3] for values in [color for color in colors]]
	#print y
	total = sum(y)
	for i, yps in enumerate(y):
		faktor = float(yps) / total
		y[i] = math.sqrt(faktor * total*total)

	fig = plt.figure()
	ax = fig.add_subplot(111, polar=True)
	for i, color in enumerate(colors):
		ax.bar(x[i], y[i], width=0.2*math.pi, edgecolor="none", color=(color[0]/255.0, color[1]/255.0, color[2]/255.0))
	ax.axis("off")
	plt.show()
	"""

    #raw_input("- done -")
    return
コード例 #3
0
def main():
	os.chdir(sys.argv[1])
	try:
		os.mkdir(OUTPUT_DIR_NAME)
	except OSError:
		pass
	
	tree = et.parse("project.xml")
	
	movie = tree.getroot()
	fps = float( movie.attrib["fps"] )
	frames = float( movie.attrib["frames"] )
	
	os.chdir(OUTPUT_DIR_NAME)
	
	s = frames / fps
	m = s / 60.0
	print s, "seconds"
	h = s / float(60*60)
	print "%.2f hours" % h
	percent = h / 2.0
	print "%.2f %%" % percent
	print "%d:%02d" % (math.floor(h), (h-math.floor(h))*60)
	
	# ===== DURATION ==================================================================================================
	plt.axis(ymin=0, ymax=10, xmin=0, xmax=3*60)
	plt.xlabel("%d mins, %.2f %% of 2 hours" % (s / 60, percent))
	
	lw = 20
	plt.plot([0, 2*60], [1, 1], "k-", linewidth=lw, solid_capstyle="butt")
	plt.plot([0, h*60], [2, 2], "b-", linewidth=lw, solid_capstyle="butt")
	
	r = 200 / 2
	r2 = math.sqrt( percent * r*r )
	plt.plot([100], [6], "o", markeredgewidth=0, markersize=2*r2, markerfacecolor="b")
	plt.plot([100], [6], "o", markeredgewidth=1, markersize=2*r, markerfacecolor="none")
	
	plt.axis("off")
	plt.show()
	#plt.savefig(os.path.join(OUTPUT_DIR_NAME, "duration.ps"))
	
	# ===== SHOTS ==================================================================================================
	f = open("..\\shots.txt", "r")
	values = [[int(values[0]), int(values[1]), int(values[2])] for values in [line.split("\t") for line in f if line]]
	f.close()
	
	fig = plt.figure()
	ax = fig.add_subplot(111)
	plt.ylim(ymin=5, ymax=15.5)
	#ax.set_yscale("log")
	for i, item in enumerate(values):
		#print item
		frame_start, frame_end, length = item
		y = 10
		if i % 2 == 0:
			color = (0, 0, 0)
		else:
			color = (0.5, 0.5, 0.5)
			y = 10.5
		#ax.hlines(length/100.0, frame_start, frame_end, color=color, lw=100)
		ax.hlines(y, frame_start, frame_end, color=color, lw=30)
	
	ax.axis("off")
	plt.show()
	
	# ===== TRENDLINES ==================================================================================================
	f = open("..\\motion_shot-avg.txt", "r")
	values = [[float(values[0]), int(values[1])] for values in [line.split("\t") for line in f if line]]
	f.close()
	
	motions, durations = ([a for a, b in values], [b for a, b in values])
	durations_sec = [float(d/fps) for d in durations]
	
	print len(durations), "shots"
	print "%.1f cuts per minute" % (len(durations)/m)
	print "min:", min(durations_sec), "s"
	print "max:", max(durations_sec), "s"
	print "range:", max(durations_sec)-min(durations_sec), "s"
	print "asl:", numpy.mean(durations_sec), "s"
	print "std:", numpy.std(durations_sec), "s"
	print "var:", numpy.var(durations_sec), "s"
	
	file = open("..\\subtitles.txt")
	s = file.read()
	file.close()
	word_count = len( s.split() )
	words_per_minute = word_count / m
	print words_per_minute, "words / minute"
	
	WINDOW_LEN = 20
	TREND_DEGREE = 1 # polynom 1ten grades
	
	data = numpy.array(WINDOW_LEN*[durations_sec[0]] + durations_sec + WINDOW_LEN*[0])
	trend_duration = numpy.polyfit(range(len(data)), data, TREND_DEGREE)
	trend_duration = numpy.poly1d(trend_duration)
	smooth_data = smooth( data, window_len=WINDOW_LEN, window='hanning' )
	plt.axis(ymin=0, ymax=60.0, xmin=0, xmax=len(durations_sec)-1)
	plt.plot(smooth_data[WINDOW_LEN:-WINDOW_LEN], "r-", label="shot length (in seconds)")
	plt.plot(trend_duration(numpy.arange(len(data))), "m-", label="shot length trend")
	plt.legend(loc="upper left")
	
	data = numpy.array(WINDOW_LEN*[motions[0]] + motions + WINDOW_LEN*[0])
	trend_motion = numpy.polyfit(range(len(data)), data, TREND_DEGREE)
	trend_motion = numpy.poly1d(trend_motion)
	smooth_data = smooth(data, window_len=WINDOW_LEN, window='hanning')
	plt.xlabel("shot / %d" % (len(durations)))
	plt.twinx()
	plt.axis(ymin=0, ymax=1.0)
	plt.plot(smooth_data[WINDOW_LEN:-WINDOW_LEN], "b-", label="motion (0..1)")
	plt.plot(trend_motion(numpy.arange(len(data))), "c-", label="motion trend")
	plt.legend(loc="upper right")
	
	plt.show()
	
	# ===== TEST ==================================================================================================
	if False:
		smooth_duration = 0.5 * smooth_data / numpy.max(smooth_data)
		smooth_deriv = 100 * smooth( numpy.diff( smooth_data ), window_len=10*WINDOW_LEN )[WINDOW_LEN:-WINDOW_LEN]
		smooth_motion = smooth_data[WINDOW_LEN:-WINDOW_LEN]
		
		'''for x, y in enumerate(smooth_deriv):
			m = smooth_motion[x]
			if x % 2 == 0:
				plt.vlines(x, y-m, y+m, lw=m*2)
		
		plt.plot(smooth_deriv, "w-", lw=1)
		mini = min(len(smooth_deriv), len(smooth_motion))
		plt.fill_between(range(mini), smooth_deriv[:mini], smooth_deriv[:mini]+smooth_motion[mini], color="y")
		plt.axis(ymin=-1, ymax=1, xmin=0, xmax=len(durations_sec)-1)
		plt.show()'''
		
		
		# audio
		f = open("..\\smooth_audio.txt", "r")
		values = [float(line) for line in f if line]
		f.close()
		audio_step = float(len(values)) / float(len(smooth_deriv))
		audio_counter = 0
		
		
		fig = plt.figure()
		ax = fig.add_subplot(111, polar=True)
		
		STEP = math.ceil(0.01* len(smooth_deriv) / float(2*math.pi) )
		
		for x, y in enumerate(smooth_deriv):
			if x % STEP == 0:
				x = 2*math.pi * float(x) / len(smooth_deriv)
				y += 2
				audio_value = 0.75 * values[int( audio_counter * audio_step )]
				ax.vlines(x, y+0.01, y+0.01+audio_value, lw=audio_value*2, color="y")
				audio_counter += 1
		
		for x, y in enumerate(smooth_deriv):
			m = smooth_motion[x]
			#d = smooth_duration[x]
			if x % STEP == 0:
				x = 2*math.pi * float(x) / len(smooth_deriv)
				y += 2
				ax.vlines(x, y+0.01, y+0.01+m, lw=m*2)
				
				"""audio_value = 0.75 * values[int( audio_counter * audio_step )]
				ax.vlines(x, y-0.01, y-0.01-audio_value, lw=audio_value*2)
				audio_counter += 1"""
		
		plt.show()
	
	# ===== RADAR ==================================================================================================
	asl = numpy.mean(durations)
	std = numpy.std(durations)
	avg_motion = numpy.mean(motions)
	
	properties = {}
	properties["duration"] = percent
	#properties["average shot length"] = 0.1 * asl / float(fps)
	properties["cuts / minute"] = (len(durations) / m) / 20.0
	properties["average motion"] = avg_motion / 0.25
	properties["words / minute"] = words_per_minute / 60.0
	properties["average loudness"] = 0.5

	angle_step = 360.0 / len(properties)
	angles = []
	for i in range(len(properties)):
		angles.append(math.radians(i*angle_step))
		
	fig = plt.figure()
	ax = fig.add_subplot(111, polar=True)
	ax.set_rmax(5.0)
	ax.set_xticks([i*2*math.pi/len(properties) for i in range(len(properties))])
	ax.set_xticklabels(properties.keys())
	ax.plot(angles, properties.values())
	for i in range(len(properties)):
		ax.vlines(angles[i], 0, properties[properties.keys()[i]], lw=15)
	#ax.axis("off")
	plt.show()
	
	"""
	# ===== COLOR ==================================================================================================
	f = open("colors.txt", "r")
	colors = [[int(values[0]), int(values[1]), int(values[2]), int(values[3])] for values in [line.split(", ") for line in f if line]]
	f.close()
	
	#print colors
	x = numpy.arange(0, 2*math.pi, 2*math.pi/len(colors))
	#print x
	
	y = [values[3] for values in [color for color in colors]]
	#print y
	total = sum(y)
	for i, yps in enumerate(y):
		faktor = float(yps) / total
		y[i] = math.sqrt(faktor * total*total)
	
	fig = plt.figure()
	ax = fig.add_subplot(111, polar=True)
	for i, color in enumerate(colors):
		ax.bar(x[i], y[i], width=0.2*math.pi, edgecolor="none", color=(color[0]/255.0, color[1]/255.0, color[2]/255.0))
	ax.axis("off")
	plt.show()
	"""
	
	#raw_input("- done -")
	return
コード例 #4
0
ファイル: main.py プロジェクト: toinsson/googlevoice
def main(wavFileName):

    ########################################################################################################################
    #wavFileName = "/Users/toine/Documents/speech_recognition/sound/sample/test.wav"
    wavFile = wave.open(wavFileName)
    (nchannels, sampwidth, framerate, nframes, comptype, compname) = wavFile.getparams()

    frames = wavFile.readframes(-1)
    npFrames = np.fromstring(frames, "Int16")


    ########################################################################################################################
    ## compute the spectrogram
    ## make sure FFT size is not too big for good accuracy
    nFft = 64
    nOverlap = 32
    fftWindow = nFft - nOverlap
    specgramFramerate = framerate / (fftWindow)

    ##TODO: check if this is needed
    ## pad the input for perfect FFT match
    ## npFrames = np.r_[npFrames, np.zeros(nFft - nframes % nFft)]

    ## spectrogram, return (Pxx, freqs, bins, im)
    # bins are the time points the spectrogram is calculated over
    # freqs is an array of frequencies
    # Pxx is an array of shape (len(times), len(freqs)) of power
    # im is a AxesImage instance
    (Pxx, freqs, bins, im) = plt.specgram(npFrames, Fs=framerate, NFFT=nFft, noverlap=nOverlap)
    #plt.show()
    plt.clf()

    ########################################################################################################################
    ## extract the voice frequencies
    ## voice frequency range, from 300Hz to 3500Hz
    # create a mask vector with these frequency taken from B
    # sum over the voice frequency range, voiceArray is 0's, but 1 when in voice frequency range
    f300Ind = lib.overflow(freqs, 300)
    f3500Ind = lib.overflow(freqs, 3500)
    voiceArray = np.zeros(len(freqs))
    voiceArray[f300Ind:f3500Ind] = 1
    ## dot product of the specgram
    voiceFreq = np.transpose(np.dot(np.transpose(Pxx), voiceArray))


    ########################################################################################################################
    ## compute the interesting minimums based on minimums and threshold
    #TODO: consider using the mlab/numpy function
    histData = plt.hist(voiceFreq, bins=100, range=(min(voiceFreq), np.mean(voiceFreq)))
    #plt.show()
    plt.clf()

    overflowPercent = 0.7
    overflowIndex = lib.overflow_hist(histData[0], overflowPercent)
    overflowValue = histData[1][overflowIndex]

    ## smooth the curve to find the minimums
    voiceFreqSmooth = lib.smooth(voiceFreq, 128)
    minimums = np.r_[True, voiceFreqSmooth[1:] < voiceFreqSmooth[:-1]] & \
               np.r_[voiceFreqSmooth[:-1] < voiceFreqSmooth[1:], True]

    ##TODO: change name
    ## create the array of cutting points, points are local minimums under the histogram threshold
    cutPoints = np.where(minimums & (voiceFreqSmooth < overflowValue))[0]


    ########################################################################################################################
    ## filter the minimums by roughly selecting one every 5 seconds
    # on npFrames, 5 sec = framerate * 5
    # on voiceFreq, framerate -> framerate/32
    avgSec = 3
    cutPointsNSec = [0]

    for pt in cutPoints:
        pt *= fftWindow  # convert cutPointsThres to npFrames framerate by multiplying with fftWindow
        if (pt - cutPointsNSec[-1]) > (framerate * avgSec):  # subtract the last value
            cutPointsNSec.append(pt)


    ########################################################################################################################
    ## create the cuts as additional files
    cutPointsNSecInSec = [(x / framerate) for x in cutPointsNSec]

    timestamp = []
    timestampNFrames = []
    for item1, item2 in lib.pairwise(cutPointsNSec, fillvalue=0):
        timestamp.append((item1, item2))
        timestampNFrames.append(item2 - item1)

    # geenrate the extension to the filename, e.g. filename.X_Y.wav for a cut from seconds X to Y
    addExtension = []
    timestampInSec = []
    for item1, item2 in lib.pairwise(cutPointsNSecInSec, fillvalue="end"):
        tmp = str(item1) + "_" + str(item2)
        timestampInSec.append((item1, item2))
        addExtension.append(tmp)


    logger = logging.getLogger(__name__)
    logger.debug("%s %s %s", timestamp, timestampNFrames, addExtension)
    logger.debug("%s %s %s", len(timestamp), len(timestampNFrames), len(addExtension))
    ## test on 1 file first
    #for (cutExt, cutTime, cutFrame) in zip(timestamp, timestampNFrames, addExtension):
    totalRes = []

    TESTINDEX = 6
    #TODO: take care of the last index, when cutPointNSecInSec is "end"
    for TESTINDEX in range(len(timestamp)-1):

        #TODO: make a lib function out of that
        splitName = path.basename(wavFileName).split(".")
        filename = path.dirname(wavFileName) + "/" + splitName[0] + "." + addExtension[TESTINDEX] + "." + splitName[1]

        wavChunk = wave.open(filename, "w")
        wavChunk.setparams((nchannels, sampwidth, framerate, timestampNFrames[TESTINDEX], comptype, compname))
        wavChunk.writeframes(npFrames[timestamp[TESTINDEX][0]:timestamp[TESTINDEX][1]].tostring())
        wavChunk.close()

        pygsr = Pygsr(filename)
        pygsr.convert()
        res = pygsr.speech_to_text("en", indx=TESTINDEX)
        totalRes.append(res)
        logger.debug("%s %s %s", TESTINDEX, addExtension[TESTINDEX], timestamp[TESTINDEX])

        h1 = str(datetime.timedelta(seconds=timestampInSec[TESTINDEX][0]))+",200"
        h2 = str(datetime.timedelta(seconds=timestampInSec[TESTINDEX][1]-1))+",800"

        logger.info("%s", TESTINDEX)
        logger.info("%s --> %s", h1, h2)
        logger.info("%s", res)
        logger.info("")

        #logger.debug("this should not appear in the srt file")

    logger.debug("%s", totalRes)

    return 1
コード例 #5
0
ファイル: 05_2_audio.py プロジェクト: jnv/cinemetrics
def main():
	os.chdir(sys.argv[1])
	f_out = open("smooth_audio.txt", "w")

	tree = et.parse("project.xml")
	movie = tree.getroot()
	path = movie.attrib["path"]
	path = os.path.dirname(path)
	fps = float( movie.attrib["fps"] )

	# os.chdir(path)
	file = os.path.join(path, "audio_trimmed.wav")
	print(file)

	f = wave.open(file, "rb")
	bit = f.getsampwidth() * 8
	print bit, "bit" # usually: signed 16 bit [-32768, 32767]
	f.close()

	rate, data = scipy.io.wavfile.read(file)
	print rate, "hz"
	# http://en.wikipedia.org/wiki/Sound_level_meter#Exponentially_averaging_sound_level_meter
	chunk = rate / 8 #25

	#print max(data)
	#print min(data)

	max = numpy.max( numpy.absolute(data) )

	"""fft = numpy.fft.rfft(data, chunk)
	fft = numpy.absolute(fft)
	print fft
	plt.plot(fft)
	plt.show()"""

	data_db = numpy.array([])
	data_rms = numpy.array([])
	for i in range(len(data) / chunk):
		values = numpy.array( data[i*chunk : (i+1)*chunk] )

		# normalize [0, 1]
		#values = values / 2**(bit-1)
		values = values / float(max)

		#values = values * float(1) # why do I need that?

		# root mean square
		values = numpy.power(values, 2)
		rms = numpy.sqrt( numpy.mean(values) )
		data_rms = numpy.append(data_rms, rms)

		# decibel
		db = 20 * numpy.log10( (1e-20+rms) ) #/ float(max)
		data_db = numpy.append(data_db, db)

	#plt.ylim(-60, 0)

	#plt.plot( smooth(data_rms/numpy.max(data_rms), window_len=rate/(fps*2)), "k-" )
	#plt.plot(smooth(data_db, window_len=rate/fps), "g-")

	smooth_db = 1 + smooth(data_db, window_len=rate/(fps*3)) / (60.0) # [0..1]
	plt.ylim(0, 1)
	plt.plot(smooth_db, "g-")

	for item in smooth_db:
		if item < 0:
			item = 0
		f_out.write("%f\n" % float(item))
	f_out.close()


	#plt.plot(data_db)

	plt.show()
コード例 #6
0
ファイル: 05_2_audio.py プロジェクト: jnv/cinemetrics
def main():
    os.chdir(sys.argv[1])
    f_out = open("smooth_audio.txt", "w")

    tree = et.parse("project.xml")
    movie = tree.getroot()
    path = movie.attrib["path"]
    path = os.path.dirname(path)
    fps = float(movie.attrib["fps"])

    # os.chdir(path)
    file = os.path.join(path, "audio_trimmed.wav")
    print(file)

    f = wave.open(file, "rb")
    bit = f.getsampwidth() * 8
    print bit, "bit"  # usually: signed 16 bit [-32768, 32767]
    f.close()

    rate, data = scipy.io.wavfile.read(file)
    print rate, "hz"
    # http://en.wikipedia.org/wiki/Sound_level_meter#Exponentially_averaging_sound_level_meter
    chunk = rate / 8  #25

    #print max(data)
    #print min(data)

    max = numpy.max(numpy.absolute(data))
    """fft = numpy.fft.rfft(data, chunk)
	fft = numpy.absolute(fft)
	print fft
	plt.plot(fft)
	plt.show()"""

    data_db = numpy.array([])
    data_rms = numpy.array([])
    for i in range(len(data) / chunk):
        values = numpy.array(data[i * chunk:(i + 1) * chunk])

        # normalize [0, 1]
        #values = values / 2**(bit-1)
        values = values / float(max)

        #values = values * float(1) # why do I need that?

        # root mean square
        values = numpy.power(values, 2)
        rms = numpy.sqrt(numpy.mean(values))
        data_rms = numpy.append(data_rms, rms)

        # decibel
        db = 20 * numpy.log10((1e-20 + rms))  #/ float(max)
        data_db = numpy.append(data_db, db)

    #plt.ylim(-60, 0)

    #plt.plot( smooth(data_rms/numpy.max(data_rms), window_len=rate/(fps*2)), "k-" )
    #plt.plot(smooth(data_db, window_len=rate/fps), "g-")

    smooth_db = 1 + smooth(data_db, window_len=rate /
                           (fps * 3)) / (60.0)  # [0..1]
    plt.ylim(0, 1)
    plt.plot(smooth_db, "g-")

    for item in smooth_db:
        if item < 0:
            item = 0
        f_out.write("%f\n" % float(item))
    f_out.close()

    #plt.plot(data_db)

    plt.show()