net = caffe.Net(prototxt, caffemodel, caffe.TEST) print '\n\nLoaded network {:s}'.format(caffemodel) # Warmup on a dummy image im = 128 * np.ones((1000, 512, 3), dtype=np.uint8) for i in xrange(2): _, __= im_detect(net, im) print '\nDetect begin: \n' timer=Timer() timer.tic() for wav_name in sorted(os.listdir(test_folder)): if wav_name.endswith('.wav'): full_wav_name=os.path.join(test_folder,wav_name) x,rate=load_audio(full_wav_name,common_fs) image=stfft(x,nfft,noverlap) IMG=Image.fromarray(image.astype(np.uint8)) IMG.save('temp.jpg') im=cv2.imread('temp.jpg') os.remove('temp.jpg') ''' im=np.zeros((image.shape[0],image.shape[1],3)) for k in range(3): im[:,:,k]=np.uint8(image) ''' #timer.tic() scores, boxes = im_detect(net, im) #timer.toc() #print ('Detection took {:.3f}s for ' #'{:d} object proposals').format(timer.total_time, boxes.shape[0])
image_num = 0 for item in data: image_num += 1 full_audio_name = audio + item['mixture_audio_filename'] if os.path.exists(full_audio_name): x, rate = load_audio(full_audio_name, common_fs) else: continue picture_interest_start = int( rate * item['event_start_in_mixture_seconds'] / (nfft - noverlap)) picture_interest_end = int( rate * (item['event_start_in_mixture_seconds'] + item['event_length_seconds']) / (nfft - noverlap)) im = stfft(x, nfft, noverlap) image = Image.fromarray(im.astype(np.uint8)) image_name = 'IMG_' + str(i) + str( EBR[item['ebr']]) + "%04d" % (image_num) image.save(data_folder + 'JPEGImages/' + image_name + '.jpg') fpicture.write(image_name + '.jpg ' + full_audio_name + '\n') ftrainval.write(image_name + '\n') if (image_num < int(num_of_every_class * 0.8)): ftrain.write(image_name + '\n') else: fval.write(image_name + '\n') label_txt = data_folder + 'Labels/' + image_name + '.txt' flabel = open(label_txt, 'wt') flabel.write(image_name+'\n' \ +str(image.size[0])+' '+str(image.size[1])+' 1\n') flabel.write(CLASS_NAMES[i]+'\n' \