Beispiel #1
0
    	net = caffe.Net(prototxt, caffemodel, caffe.TEST)
        print '\n\nLoaded network {:s}'.format(caffemodel)

        # Warmup on a dummy image
        im = 128 * np.ones((1000, 512, 3), dtype=np.uint8)
        for i in xrange(2):
            _, __= im_detect(net, im)
   
        print '\nDetect begin: \n'
        timer=Timer()
        timer.tic()
        for wav_name in sorted(os.listdir(test_folder)):
	    if wav_name.endswith('.wav'):
	        full_wav_name=os.path.join(test_folder,wav_name)		
            	x,rate=load_audio(full_wav_name,common_fs)
	        image=stfft(x,nfft,noverlap)
	        IMG=Image.fromarray(image.astype(np.uint8))
	        IMG.save('temp.jpg')
	        im=cv2.imread('temp.jpg')
	        os.remove('temp.jpg')
	        '''
	        im=np.zeros((image.shape[0],image.shape[1],3))
	        for k in range(3):
	            im[:,:,k]=np.uint8(image)
	        '''
	        #timer.tic()
	        scores, boxes = im_detect(net, im)
	        #timer.toc()   
	        #print ('Detection took {:.3f}s for '
		        #'{:d} object proposals').format(timer.total_time, boxes.shape[0])
Beispiel #2
0
        image_num = 0
        for item in data:
            image_num += 1
            full_audio_name = audio + item['mixture_audio_filename']
            if os.path.exists(full_audio_name):
                x, rate = load_audio(full_audio_name, common_fs)
            else:
                continue
            picture_interest_start = int(
                rate * item['event_start_in_mixture_seconds'] /
                (nfft - noverlap))
            picture_interest_end = int(
                rate * (item['event_start_in_mixture_seconds'] +
                        item['event_length_seconds']) / (nfft - noverlap))

            im = stfft(x, nfft, noverlap)
            image = Image.fromarray(im.astype(np.uint8))
            image_name = 'IMG_' + str(i) + str(
                EBR[item['ebr']]) + "%04d" % (image_num)
            image.save(data_folder + 'JPEGImages/' + image_name + '.jpg')
            fpicture.write(image_name + '.jpg ' + full_audio_name + '\n')
            ftrainval.write(image_name + '\n')
            if (image_num < int(num_of_every_class * 0.8)):
                ftrain.write(image_name + '\n')
            else:
                fval.write(image_name + '\n')
            label_txt = data_folder + 'Labels/' + image_name + '.txt'
            flabel = open(label_txt, 'wt')
            flabel.write(image_name+'\n' \
          +str(image.size[0])+' '+str(image.size[1])+' 1\n')
            flabel.write(CLASS_NAMES[i]+'\n' \