def main(): caffe.set_mode_cpu() model_name = 'UpResNet10' model_dir = 'waifu2x-chainer/models/{}'.format(model_name.lower()) model_class = srcnn.archs[model_name] for filename in os.listdir(model_dir): basename, ext = os.path.splitext(filename) if ext == '.npz': model_path = os.path.join(model_dir, filename) print(model_path) channels = 3 if 'rgb' in filename else 1 model = model_class(channels) size = 64 + model.offset data = np.zeros((1, channels, size, size), dtype=np.float32) x = chainer.Variable(data) chainer.serializers.load_npz(model_path, model) params = {} for path, param in model.namedparams(): params[path] = param.array net = caffe.Net('upresnet10_3.prototxt', caffe.TEST) for key in net.params: l = len(net.params[key]) net.params[key][0].data[...] = params[key + '/W'] if l >= 2: net.params[key][1].data[...] = params[key + '/b'] prototxt_path = '{}.prototxt'.format(fname_convert_table[basename]) caffemodel_path = '{}.json.caffemodel'.format(fname_convert_table[basename]) net.save(caffemodel_path) shutil.copy('upresnet10_3.prototxt', prototxt_path)
def setCaffeMode(gpu, device = 0): """Initialise caffe""" if gpu: caffe.set_mode_gpu() caffe.set_device(device) else: caffe.set_mode_cpu()
def main(args): caffe.set_mode_cpu() # #param_pairs = [('fc6', 'fc6'), ('fc7', 'fc7'), ('fc8', 'fc8')] # param_pairs = [('fc6', 'fc6-conv'), # ('fc7', 'fc7-conv'), # ('fc8', 'fc8-conv')] # make_fully_conv(os.path.join(CAFFE_ROOT, 'models/bvlc_reference_caffenet/deploy.prototxt'), # os.path.join(CAFFE_ROOT, 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'), # os.path.join(CAFFE_ROOT, 'examples/net_surgery/bvlc_caffenet_full_conv.prototxt'), # param_pairs, # os.path.join(CAFFE_ROOT, 'examples/net_surgery/bvlc_caffenet_full_conv.caffemodel'), # ) param_pairs = [("fc6", "fc6-conv"), ("fc7", "fc7-conv"), ("fc8", "fc8-conv")] make_fully_conv( "/home/kashefy/data/models/vgg-16/VGG_ILSVRC_16_layers_deploy.prototxt", "/home/kashefy/data/models/vgg-16/VGG_ILSVRC_16_layers.caffemodel", "/home/kashefy/data/models/vgg-16/VGG_ILSVRC_16_layers_fcn_deploy.prototxt", param_pairs, "/home/kashefy/data/models/vgg-16/VGG_ILSVRC_16_layers_fcn.caffemodel", ) return 0
def __init__(self): caffe.set_mode_cpu() self.net = caffe.Net(PROTOBUF_PATH, MODEL_PATH, caffe.TEST) self.transformer = caffe.io.Transformer({'data': self.net.blobs['data'].data.shape}) self.transformer.set_transpose('data', (2, 0, 1))
def load_caffe(model_desc, model_file): """ return a dict of params """ param_dict = {} param_processors = get_processor() with change_env('GLOG_minloglevel', '2'): import caffe caffe.set_mode_cpu() net = caffe.Net(model_desc, model_file, caffe.TEST) layer_names = net._layer_names blob_names = net.blobs.keys() for layername, layer in zip(layer_names, net.layers): try: prev_blob_name = blob_names[blob_names.index(layername)-1] prev_data_shape = net.blobs[prev_blob_name].data.shape[1:] except ValueError: prev_data_shape = None if layer.type in param_processors: param_dict.update(param_processors[layer.type]( layername, layer.blobs, prev_data_shape)) else: assert len(layer.blobs) == 0, len(layer.blobs) logger.info("Model loaded from caffe. Params: " + \ " ".join(sorted(param_dict.keys()))) return param_dict
def __init__(self, weights_path, image_net_proto, lstm_net_proto, vocab_path, device_id=-1): if device_id >= 0: caffe.set_mode_gpu() caffe.set_device(device_id) else: caffe.set_mode_cpu() # Setup image processing net. phase = caffe.TEST self.image_net = caffe.Net(image_net_proto, weights_path, phase) image_data_shape = self.image_net.blobs['data'].data.shape self.transformer = caffe.io.Transformer({'data': image_data_shape}) channel_mean = np.zeros(image_data_shape[1:]) channel_mean_values = [104, 117, 123] assert channel_mean.shape[0] == len(channel_mean_values) for channel_index, mean_val in enumerate(channel_mean_values): channel_mean[channel_index, ...] = mean_val self.transformer.set_mean('data', channel_mean) self.transformer.set_channel_swap('data', (2, 1, 0)) self.transformer.set_transpose('data', (2, 0, 1)) # Setup sentence prediction net. self.lstm_net = caffe.Net(lstm_net_proto, weights_path, phase) self.vocab = ['<EOS>'] with open(vocab_path, 'r') as vocab_file: self.vocab += [word.strip() for word in vocab_file.readlines()] assert(self.vocab[1] == '<unk>') self.vocab_inv = dict([(w,i) for i,w in enumerate(self.vocab)]) net_vocab_size = self.lstm_net.blobs['predict'].data.shape[2] if len(self.vocab) != net_vocab_size: raise Exception('Invalid vocab file: contains %d words; ' 'net expects vocab with %d words' % (len(self.vocab), net_vocab_size))
def __init__(self, model_def_file, pretrained_model_file, mean_file, raw_scale, class_labels_file, bet_file, image_dim, gpu_mode): logging.info('Loading net and associated files...') if gpu_mode: caffe.set_mode_gpu() else: caffe.set_mode_cpu() self.net = caffe.Classifier( model_def_file, pretrained_model_file, image_dims=(image_dim, image_dim), raw_scale=raw_scale, mean=np.load(mean_file).mean(1).mean(1), channel_swap=(2, 1, 0) ) with open(class_labels_file) as f: labels_df = pd.DataFrame([ { 'synset_id': l.strip().split(' ')[0], 'name': ' '.join(l.strip().split(' ')[1:]).split(',')[0] } for l in f.readlines() ]) self.labels = labels_df.sort('synset_id')['name'].values self.bet = cPickle.load(open(bet_file)) # A bias to prefer children nodes in single-chain paths # I am setting the value to 0.1 as a quick, simple model. # We could use better psychological models here... self.bet['infogain'] -= np.array(self.bet['preferences']) * 0.1
def main(argv): params = get_params() # check get_params.py in the same directory to see the parameters try: opts, args = getopt.getopt(argv,"hr:o:s:c:g:",["root=","out=","saliency_model=","caffe_path=", "gpu="]) except getopt.GetoptError: print 'ERROR' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'saliency.py -r <root> -o <out> -s <saliency_model> -c <caffe_path> -g <gpu>' sys.exit() elif opt in ("-r", "--root"): params['root'] = arg elif opt in ("-o", "--out"): params['out'] = arg elif opt in ("-s", "--saliency_model"): params['saliency_model'] = arg elif opt in ("-c", "--caffe_path"): params['caffe_path'] = arg elif opt in ("-g", "--gpu"): params['gpu'] = arg; sys.path.insert(0,os.path.join(params['caffe_path'],'python')) import caffe compute = 'True' # sys.argv[1] # write 'true' or 'false' in case you want to compute or just visualize if compute== 'true' or compute =='True': deploy_file = os.path.join(params['saliency_model'],'deploy.prototxt') model_file = os.path.join(params['saliency_model'],'model.caffemodel') # I am using the mean file from caffenet...but I guess we could use a grey image as well ? mean_file = '/media/HDD_2TB/mcarne/keyframe-extractor/src/Saliency/deep/meanfile.npy' if params['gpu'] == True: caffe.set_mode_gpu() print 'GPU mode selected' else: caffe.set_mode_cpu() print 'CPU mode selected' net = caffe.Classifier(deploy_file, model_file, mean=np.load(mean_file).mean(1).mean(1), channel_swap=(2,1,0),raw_scale=255) if not os.path.exists(params['out']): os.makedirs(params['out']) for imagepath in glob.glob(params['root']+"/*.jpg"): print "Procressing image..." scores = net.predict([caffe.io.load_image(imagepath)]) feat = net.blobs['deconv1'].data #feat = np.reshape(feat, (10,4096)) print feat, np.shape(feat) #meanfeat = np.average( feat, axis = 0 ) # saves to disk fout = params['out']+'/'+os.path.splitext(os.path.basename(imagepath))[0]; pickle.dump(feat,open(fout+'.p','wb')) scipy.io.savemat(fout+'.mat', mdict={'isal': feat})
def __init__(self, model_def_file, pretrained_model_file, raw_scale, class_labels_file, image_dim, gpu_mode): logging.info('Loading net and associated files...') if gpu_mode: caffe.set_mode_gpu() else: caffe.set_mode_cpu() ## load models googlenet self.net = caffe.Classifier( model_def_file, pretrained_model_file, image_dims=(image_dim, image_dim), raw_scale=raw_scale, mean=np.array([104.0, 116.0, 122.0]), channel_swap=(2, 1, 0)) logging.info('Load vision model, %s', model_def_file) # generate N bit lookup table self.lookup = np.asarray([bin(i).count('1') for i in range(1<<16)]) # load reference bit model file_reader = open(self.database_param, 'rb') self.database = cPickle.load(file_reader) file_reader.close() logging.info('Load database from {}'.format(self.database_param)) logging.info('database shape {}'.format(self.database['ref'].shape)) with open(class_labels_file) as f: labels_df = pd.DataFrame([ { 'synset_id': l.strip().split(' ')[0], 'name': ' '.join(l.strip().split(' ')[1:]).split(',')[0] } for l in f.readlines() ]) self.labels = labels_df.sort('synset_id')['name'].values
def caffe_set_device(gpu=True, devid='0'): if gpu: caffe.set_mode_gpu() os.environ["CUDA_VISIBLE_DEVICES"] = devid caffe.set_device(int(devid)) else: caffe.set_mode_cpu()
def run(): caffe.set_mode_cpu() net = caffe.Net('fake_model/deploy.prototxt', 'snapshots/fake.caffemodel', caffe.TEST) transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) transformer.set_transpose('data', (2,0,1)) #transformer.set_raw_scale('data', 255) net.blobs['data'].reshape(49, 1, 140, 140) n = 0 ok = 0 with open('fake_data/test.txt', 'r') as f: lines = f.readlines() for line in lines: l = line.split() if len(l) == 2: print('Processing image %s' % l[0].strip()) n += 1 img = caffe.io.load_image(l[0].strip(), color=False) img = img[:,:,[0]] net.blobs['data'].data[...] = transformer.preprocess('data', img) out = net.forward() if(out['prob'][0].argmax() == int(l[1].strip())): ok += 1 print('') print('Accuracy: %d%%' % ((ok * 100.0) / n))
def run_color(image, image_out): caffe.set_mode_cpu() net = caffe.Net('colorization_deploy_v0.prototxt', 'colorization_release_v0.caffemodel', caffe.TEST) (H_in,W_in) = net.blobs['data_l'].data.shape[2:] # get input shape (H_out,W_out) = net.blobs['class8_ab'].data.shape[2:] # get output shape net.blobs['Trecip'].data[...] = 6/np.log(10) # 1/T, set annealing temperature img_rgb = caffe.io.load_image(image) img_lab = color.rgb2lab(img_rgb) # convert image to lab color space img_l = img_lab[:,:,0] # pull out L channel (H_orig,W_orig) = img_rgb.shape[:2] # original image size # resize image to network input size img_rs = caffe.io.resize_image(img_rgb,(H_in,W_in)) # resize image to network input size img_lab_rs = color.rgb2lab(img_rs) img_l_rs = img_lab_rs[:,:,0] net.blobs['data_l'].data[0,0,:,:] = img_l_rs-50 # subtract 50 for mean-centering net.forward() # run network ab_dec = net.blobs['class8_ab'].data[0,:,:,:].transpose((1,2,0)) # this is our result ab_dec_us = sni.zoom(ab_dec,(1.*H_orig/H_out,1.*W_orig/W_out,1)) # upsample to match size of original image L img_lab_out = np.concatenate((img_l[:,:,np.newaxis],ab_dec_us),axis=2) # concatenate with original image L img_rgb_out = np.clip(color.lab2rgb(img_lab_out),0,1) # convert back to rgb scipy.misc.imsave(image_out, img_rgb_out)
def train(solver_prototxt_filename): ''' Train the ANN ''' caffe.set_mode_cpu() solver = caffe.get_solver(solver_prototxt_filename) solver.solve()
def __init__(self,params): self.dimension = params['dimension'] self.dataset = params['dataset'] self.pooling = params['pooling'] # Read image lists with open(params['query_list'],'r') as f: self.query_names = f.read().splitlines() with open(params['frame_list'],'r') as f: self.database_list = f.read().splitlines() # Parameters needed self.layer = params['layer'] self.save_db_feats = params['database_feats'] # Init network if params['gpu']: caffe.set_mode_gpu() caffe.set_device(0) else: caffe.set_mode_cpu() print "Extracting from:", params['net_proto'] cfg.TEST.HAS_RPN = True self.net = caffe.Net(params['net_proto'], params['net'], caffe.TEST)
def setUp(self): self.num_output = 13 net_f = simple_net_file(self.num_output) f = tempfile.NamedTemporaryFile(mode="w+", delete=False) f.write( """net: '""" + net_f + """' test_iter: 10 test_interval: 10 base_lr: 0.01 momentum: 0.9 weight_decay: 0.0005 lr_policy: 'inv' gamma: 0.0001 power: 0.75 display: 100 max_iter: 100 snapshot_after_train: false snapshot_prefix: "models" """ ) f.close() self.solver = caffe.SGDSolver(f.name) # also make sure get_solver runs caffe.get_solver(f.name) caffe.set_mode_cpu() # fill in valid labels self.solver.net.blobs["label"].data[...] = np.random.randint( self.num_output, size=self.solver.net.blobs["label"].data.shape ) self.solver.test_nets[0].blobs["label"].data[...] = np.random.randint( self.num_output, size=self.solver.test_nets[0].blobs["label"].data.shape ) os.remove(f.name) os.remove(net_f)
def load_and_fill_biases(src_model, src_weights, dst_model, dst_weights): with open(src_model) as f: model = caffe.proto.caffe_pb2.NetParameter() pb.text_format.Merge(f.read(), model) for i, layer in enumerate(model.layer): if layer.type == 'Convolution': # or layer.type == 'Scale': # Add bias layer if needed if layer.convolution_param.bias_term == False: layer.convolution_param.bias_term = True layer.convolution_param.bias_filler.type = 'constant' layer.convolution_param.bias_filler.value = 0.0 with open(dst_model, 'w') as f: f.write(pb.text_format.MessageToString(model)) caffe.set_mode_cpu() net_src = caffe.Net(src_model, src_weights, caffe.TEST) net_dst = caffe.Net(dst_model, caffe.TEST) for key in net_src.params.keys(): for i in range(len(net_src.params[key])): net_dst.params[key][i].data[:] = net_src.params[key][i].data[:] if dst_weights is not None: # Store params pass return net_dst
def set_caffe_mode(gpu): if gpu == 0: # cpu mode caffe.set_mode_cpu() else: # gpu mode caffe.set_device(0) caffe.set_mode_gpu() return 0
def main(): """ script entry point """ parser = argparse.ArgumentParser(description='Convert a YOLO cfg file.') parser.add_argument('model', type=str, help='YOLO cfg model') parser.add_argument('output', type=str, help='output prototxt') parser.add_argument('--loclayer', action='store_true', help='use locally connected layer') parser.add_argument('--train', action='store_true', help='generate train_val prototxt') args = parser.parse_args() config = load_configuration(args.model) caffe.set_mode_cpu() #layers_test = [] #cl.Convolution(layers_test[-1], kernel_size=3) layer_type = caffe.layer_type_list() for temp in layer_type: if temp == "Reorg": print("{0} layer recognized".format(temp)) elif temp == "convolutional": print("{0} layer recognized".format(temp)) model = convert_configuration(config, args.train, args.loclayer) suffix = "train_val" if args.train else "deploy" model_filename = "{}_{}.prototxt".format(args.output, suffix) if args.loclayer: model = adjust_params(model, model_filename) with open(model_filename, 'w') as fproto: fproto.write("{0}".format(model.to_proto()))
def run_crfasrnn(inputfile, outputfile, gpudevice): MODEL_FILE = 'TVG_CRFRNN_new_deploy.prototxt' PRETRAINED = 'TVG_CRFRNN_COCO_VOC.caffemodel' IMAGE_FILE = inputfile if gpudevice > 0: #Do you have GPU device? has_gpu = 1 #which gpu device is available? gpu_device=gpudevice#assume the first gpu device is available, e.g. Titan X else: has_gpu = 0 if has_gpu==1: caffe.set_device(gpu_device) caffe.set_mode_gpu() tic() net = caffe.Segmenter(MODEL_FILE, PRETRAINED,True) toc() else: caffe.set_mode_cpu() tic() net = caffe.Segmenter(MODEL_FILE, PRETRAINED,False) toc() input_image = 255 * caffe.io.load_image(IMAGE_FILE) width = input_image.shape[0] height = input_image.shape[1] maxDim = max(width,height) image = PILImage.fromarray(np.uint8(input_image)) image = np.array(image) pallete = getpallete(256) mean_vec = np.array([103.939, 116.779, 123.68], dtype=np.float32) reshaped_mean_vec = mean_vec.reshape(1, 1, 3); # Rearrange channels to form BGR im = image[:,:,::-1] # Subtract mean im = im - reshaped_mean_vec # Pad as necessary cur_h, cur_w, cur_c = im.shape pad_h = 500 - cur_h pad_w = 500 - cur_w im = np.pad(im, pad_width=((0, pad_h), (0, pad_w), (0, 0)), mode = 'constant', constant_values = 0) # Get predictions segmentation = net.predict([im]) segmentation2 = segmentation[0:cur_h, 0:cur_w] output_im = PILImage.fromarray(segmentation2) output_im.putpalette(pallete) plt.imshow(output_im) plt.savefig(outputfile)
def init_net(): cfg.TEST.HAS_RPN = True # Use RPN for proposals args = parse_args() # prototxt = os.path.join(cfg.MODELS_DIR, NETS[args.demo_net][0], # 'faster_rcnn_alt_opt', 'faster_rcnn_test.pt') prototxt = os.path.join('/home/dean/Documents/py-faster-rcnn/models/WIDER_FACE/VGG16/faster_rcnn_end2end', 'test.prototxt') # caffemodel = os.path.join(cfg.DATA_DIR, 'faster_rcnn_models', # NETS[args.demo_net][1]) caffemodel = os.path.join('/home/dean/Documents/py-faster-rcnn/output/faster_rcnn_end2end/voc_2007_train', 'vgg16_faster_rcnn_iter_50000.caffemodel') if not os.path.isfile(caffemodel): raise IOError(('{:s} not found.\nDid you run ./data/script/' 'fetch_faster_rcnn_models.sh?').format(caffemodel)) if args.cpu_mode: caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(args.gpu_id) cfg.GPU_ID = args.gpu_id net = caffe.Net(prototxt, caffemodel, caffe.TEST) print '\n\nLoaded network {:s}'.format(caffemodel) im = 128 * np.ones((300, 500, 3), dtype=np.uint8) for i in xrange(2): _, _= im_detect(net, im) return net
def __init__(self): self.caffe_root = expanduser("~")+'/rapp_platform/caffe/' if not os.path.isfile(self.caffe_root + \ 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'): print("Downloading pre-trained CaffeNet model...") os.system(self.caffe_root+"scripts/download_model_binary.py \ ../models/bvlc_reference_caffenet") caffe.set_mode_cpu() self.net = caffe.Net(self.caffe_root + \ 'models/bvlc_reference_caffenet/deploy.prototxt',\ self.caffe_root + \ 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel',\ caffe.TEST) self.transformer = \ caffe.io.Transformer({'data': self.net.blobs['data'].data.shape}) self.transformer.set_transpose('data', (2, 0, 1)) self.transformer.set_mean('data', \ np.load(self.caffe_root + \ 'python/caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1)) self.transformer.set_raw_scale('data', 255) self.transformer.set_channel_swap('data', (2, 1, 0)) self.net.blobs['data'].reshape(1, 3, 227, 227) imagenet_labels_filename = self.caffe_root + 'data/ilsvrc12/synset_words.txt' self.labels = np.loadtxt(imagenet_labels_filename, str, delimiter='\t')
def get_gradients(imagepath,neuron_num,layer_num): ##Loading the model where 2nd caffemodel has weights after innerproduct layer is incorporated caffe.set_mode_cpu() net = caffe.Net('models/bvlc_reference_caffenet/deploy.prototxt', 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel', caffe.TEST) transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) transformer.set_transpose('data', (2,0,1)) transformer.set_mean('data', np.load('caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1)) # mean pixel transformer.set_raw_scale('data', 255) transformer.set_channel_swap('data', (2,1,0)) net.blobs['data'].reshape(1,3,227,227) net.blobs['data'].data[...] = transformer.preprocess('data', caffe.io.load_image(imagepath)) net.blobs['label'].data[...]=281 out = net.forward() net.blobs['fc8'].diff[0,neuron_num] = 1 #print("Predicted class is #{}.".format(out['prob'].argmax())) back = net.backward() j = back['data'].copy() df_dwi= net.layers[layer_num].blobs[0].diff #Gradients obtained num=[] for i in range(0,df_dwi.shape[0]): num.append(np.linalg.norm(df_dwi[i,:,:,:])) return num
def pred_f(image, stepSize=stepSize, windowSize=windowSize, param=param, marge=None, marge_cut_off=0, ClearSmallObjects=20, list_f=list_f): caffe.set_mode_cpu() cn_1 = "FCN_0.01_0.99_0.0005" wd_1 = "/share/data40T_v2/Peter/pretrained_models" net_1 = GetNet(cn_1, wd_1) cn_2 = "DeconvNet_0.01_0.99_0.0005" net_2 = GetNet(cn_2, wd_1) prob_image, bin_image, thresh = pred_image_from_two_nets(image, net_1, net_2, stepSize, windowSize, param=param, marge=marge, method="avg", ClearBorder="Reconstruction") segmentation_mask = DynamicWatershedAlias(prob_image, param) segmentation_mask = remove_small_objects(segmentation_mask, ClearSmallObjects) table = bin_analyser(image, segmentation_mask, list_f, marge_cut_off) segmentation_mask[segmentation_mask > 0] = 1. contours = dilation(segmentation_mask, disk(2)) - \ erosion(segmentation_mask, disk(2)) x, y = np.where(contours == 1) image[x, y] = np.array([0, 0, 0]) segmentation_mask = img_as_ubyte(segmentation_mask) segmentation_mask[segmentation_mask > 0] = 255 if marge_cut_off != 0: c = marge_cut_off image = image[c:-c, c:-c] segmentation_mask = segmentation_mask[c:-c, c:-c] prob_image = prob_image[c:-c, c:-c] return image, table, segmentation_mask, prob_image
def main(argv): parser = argparse.ArgumentParser() parser.add_argument( 'video_list', help = 'Input video list. Put path to video file on each line.') parser.add_argument( 'output_dir', help = 'Output directory.') parser.add_argument( '--sample_rate', type = float, default = 5.0, help = 'Number of frames sampled per second') parser.add_argument( '--model_def', default = '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers_deploy.prototxt', help = 'Model definition file (default VGG16)') parser.add_argument( '--pretrained_model', default = '/auto/iris-00/rn/chensun/ThirdParty/caffe_models/vgg_16/VGG_ILSVRC_16_layers.caffemodel', help = 'Model parameter file (default VGG16)') parser.add_argument( '--layers', default = 'fc6,fc7', help = 'Layers to be extracted, separated by commas') parser.add_argument( '--cpu', action = 'store_true', help = 'Use CPU if set') parser.add_argument( '--oversample', action = 'store_true', help = 'Oversample 10 patches per frame if set') args = parser.parse_args() if args.cpu: caffe.set_mode_cpu() print 'CPU mode' else: caffe.set_mode_gpu() print 'GPU mode' oversample = False if args.oversample: oversample = True #feture extraction extractor = FeatExtractor(args.model_def, args.pretrained_model, oversample=oversample) blobs = args.layers.split(',') with open(args.video_list) as f: videos = [l.rstrip() for l in f] for video_file in videos: frames = load_video(video_file, args.sample_rate) if len(frames) < 1: # failed to open the video continue start = time.time() feats = extractor.extract_batch(frames, blobs) print '%s feature extracted in %f seconds.' % (os.path.basename(video_file), time.time()-start) # save the features for blob in blobs: feats[blob] = np.array(feats[blob]) save_matrix(feats, os.path.join(args.output_dir, '%s.mat' % os.path.basename(video_file).split('.')[0])) return
def __init__(self, model_file, pretrained_file, gpu=False, mean=None, input_scale=None, raw_scale=None, channel_swap=None, context_pad=None): """ Take gpu, mean, input_scale, raw_scale, channel_swap: params for preprocessing options. context_pad: amount of surrounding context to take s.t. a `context_pad` sized border of pixels in the network input image is context, as in R-CNN feature extraction. """ caffe.Net.__init__(self, model_file, pretrained_file) caffe.set_phase_test() if gpu: caffe.set_mode_gpu() else: caffe.set_mode_cpu() if mean is not None: self.set_mean(self.inputs[0], mean) if input_scale is not None: self.set_input_scale(self.inputs[0], input_scale) if raw_scale is not None: self.set_raw_scale(self.inputs[0], raw_scale) if channel_swap is not None: self.set_channel_swap(self.inputs[0], channel_swap) self.configure_crop(context_pad)
def main(): parser = argparse.ArgumentParser() parser.add_argument('dataset', nargs='?', choices=['pascal_voc', 'camvid', 'kitti', 'cityscapes']) parser.add_argument('input_path', nargs='?', default='', help='Required path to input image') parser.add_argument('-o', '--output_path', default=None) parser.add_argument('--gpu', type=int, default=-1, help='GPU ID to run CAFFE. ' 'If -1 (default), CPU is used') args = parser.parse_args() if args.input_path == '': raise IOError('Error: No path to input image') if not exists(args.input_path): raise IOError("Error: Can't find input image " + args.input_path) if args.gpu >= 0: caffe.set_mode_gpu() caffe.set_device(args.gpu) print('Using GPU ', args.gpu) else: caffe.set_mode_cpu() print('Using CPU') if args.output_path is None: args.output_path = '{}_{}.png'.format( splitext(args.input_path)[0], args.dataset) predict(args.dataset, args.input_path, args.output_path)
def __init__(self, model_def_file, pretrained_model_file, mean_file, raw_scale, class_labels_file, image_dim, gpu_mode): logging.info('Loading net and associated files...') if gpu_mode: caffe.set_mode_gpu() else: caffe.set_mode_cpu() with open(mean_file) as f: blob = caffe.proto.caffe_pb2.BlobProto() blob.ParseFromString(f.read()) mean_arr = np.array(caffe.io.blobproto_to_array(blob)) mean_resize = caffe.io.resize_image(mean_arr[0].transpose((1,2,0)), (224,224)).transpose((2,0,1)) self.net = caffe.Classifier( model_def_file, pretrained_model_file, image_dims=(image_dim, image_dim), raw_scale=raw_scale, # mean=np.load(mean_file).mean(1).mean(1), channel_swap=(2, 1, 0) mean=mean_resize, channel_swap=(2, 1, 0) ) with open(class_labels_file) as f: labels_df = pd.DataFrame([ { 'synset_id': l.strip().split(' ')[0], 'name': ' '.join(l.strip().split(' ')[1:]).split(',')[0] } for l in f.readlines() ]) self.labels = labels_df.sort('synset_id')['name'].values
def initialize_model(model, mode): if mode is None: mode = "cpu" if "cpu" in mode.lower(): caffe.set_mode_cpu() if "gpu" in mode.lower(): caffe.set_device(0) caffe.set_mode_gpu() if model is None: model = "cafferef" if "cafferef" in model.lower(): cnn_proto = caffe_root + "models/bvlc_reference_caffenet/deploy.prototxt" cnn_model = caffe_root + "models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel" imgnet_mean = np.load(caffe_root + "python/caffe/imagenet/ilsvrc_2012_mean.npy").mean(1).mean(1) cnn_imgmean = imgnet_mean cnn_imgsize = 227 if "vgg" in model.lower(): if "vgg16" in model.lower(): cnn_proto = caffe_root + "models/VGG_ILSVRC_16_layers/VGG_ILSVRC_16_layers_deploy.prototxt" cnn_model = caffe_root + "models/VGG_ILSVRC_16_layers/VGG_ILSVRC_16_layers.caffemodel" if "vgg19" in model.lower(): cnn_proto = caffe_root + "models/VGG_ILSVRC_19_layers/VGG_ILSVRC_19_layers_deploy.prototxt" cnn_model = caffe_root + "models/VGG_ILSVRC_19_layers/VGG_ILSVRC_19_layers.caffemodel" vgg_mean = np.array([103.939, 116.779, 123.68]) cnn_imgmean = vgg_mean cnn_imgsize = 224 if "action" in model.lower(): cnn_proto = caffe_root + "models/action_cube/deploy_extractpred.prototxt" cnn_model = caffe_root + "models/action_cube/action_cube.caffemodel" cnn_imgmean = np.array([128, 128, 128]) cnn_imgsize = 227 cf = caffe_feat(cnn_proto, cnn_model, cnn_imgmean, cnn_imgsize) return cf
def __init__(self, model_file, pretrained_file, image_dims=None, gpu=False, mean=None, input_scale=None, raw_scale=None, channel_swap=None): """ Take image_dims: dimensions to scale input for cropping/sampling. Default is to scale to net input size for whole-image crop. gpu, mean, input_scale, raw_scale, channel_swap: params for preprocessing options. """ caffe.Net.__init__(self, model_file, pretrained_file) caffe.set_phase_test() if gpu: caffe.set_mode_gpu() else: caffe.set_mode_cpu() if mean is not None: self.set_mean(self.inputs[0], mean) if input_scale is not None: self.set_input_scale(self.inputs[0], input_scale) if raw_scale is not None: self.set_raw_scale(self.inputs[0], raw_scale) if channel_swap is not None: self.set_channel_swap(self.inputs[0], channel_swap) #self.crop_dims = np.array(self.blobs[self.inputs[0]].data.shape[2:]) if not image_dims: image_dims = self.crop_dims self.image_dims = image_dims
def __init__(self, config, gpu=False): super(Net, self).__init__(str(config.deploy_file), str(config.model_file), caffe.TEST) if gpu: caffe.set_mode_gpu() else: caffe.set_mode_cpu()
def main(): #imglistfile = "./file.txt" #imglistfile = "/home/duino/project/mtcnn/error.txt" #imglistfile = "/home/duino/iactive/mtcnn/all.txt" imglistfile = "./imglist.txt" #imglistfile = "/home/duino/iactive/mtcnn/file_n.txt" #imglistfile = "/home/duino/iactive/mtcnn/file.txt" minsize = 20 caffe_model_path = "./model" threshold = [0.6, 0.7, 0.7] factor = 0.709 caffe.set_mode_cpu() PNet = caffe.Net(caffe_model_path + "/det1.prototxt", caffe_model_path + "/det1.caffemodel", caffe.TEST) RNet = caffe.Net(caffe_model_path + "/det2.prototxt", caffe_model_path + "/det2.caffemodel", caffe.TEST) ONet = caffe.Net(caffe_model_path + "/det3.prototxt", caffe_model_path + "/det3.caffemodel", caffe.TEST) #error = [] f = open(imglistfile, 'r') for imgpath in f.readlines(): imgpath = imgpath.split('\n')[0] print "######\n", imgpath img = cv2.imread(imgpath) img_matlab = img.copy() tmp = img_matlab[:, :, 2].copy() img_matlab[:, :, 2] = img_matlab[:, :, 0] img_matlab[:, :, 0] = tmp # check rgb position #tic() boundingboxes, points = detect_face(img_matlab, minsize, PNet, RNet, ONet, threshold, False, factor) #toc() ## copy img to positive folder #if boundingboxes.shape[0] > 0 : # import shutil # shutil.copy(imgpath, '/home/duino/Videos/3/disdata/positive/'+os.path.split(imgpath)[1] ) #else: # import shutil # shutil.copy(imgpath, '/home/duino/Videos/3/disdata/negetive/'+os.path.split(imgpath)[1] ) for i in range(len(boundingboxes)): cv2.rectangle(img, (int(boundingboxes[i][1]), int(boundingboxes[i][0])), (int(boundingboxes[i][3]), int(boundingboxes[i][2])), (0, 255, 0), 1) img = drawBoxes(img, boundingboxes) #cv2.imshow('img', img) #ch = cv2.waitKey(0) & 0xFF #if ch == 27: # break #if boundingboxes.shape[0] > 0: # error.append[imgpath] #print error f.close()
# insert into correct place in_data[0:len(batch_range), :, :, :] = batch_images # predict features ftrs = predict(in_data, net) toc = time.time() for j in range(len(batch_range)): allftrs[i+j,:] = ftrs[j,:] return allftrs input_idx = int(sys.argv[1]) model_def = '/home/t-yuche/neuraltalk/python_features/VGG_ILSVRC_16_layers_deploy.prototxt' model = '/home/t-yuche/caffe/models/vgg_ilsvrc_16/VGG_ILSVRC_16_layers.caffemodel' caffe.set_mode_cpu() net = caffe.Net(model_def, model, 0) OUTPUT_FOLDER = '/mnt/tags/fei-caption-all' # Load all images FRAME_FOLDER = '/mnt/frames' all_video_names = os.listdir(FRAME_FOLDER) if input_idx >= len(all_video_names): exit(-1) video_name = all_video_names[input_idx] all_frames = [os.path.join(FRAME_FOLDER, video_name, x) for x in os.listdir(os.path.join(FRAME_FOLDER, video_name))] # Load unprocessed frames to filenames
def customClassifyImages(jobPath, socketid, result_path): # Establishing connection to send results and write messages rs = redis.StrictRedis(host=config.REDIS_HOST, port=6379) try: ImagePath = os.path.join(jobPath, 'test') modelPath = os.path.join(jobPath, 'util') new_labels = sio.loadmat(os.path.join(modelPath, 'new_labels.mat')) new_labels_cells = new_labels['WNID'] # Set the right path to your model file, pretrained model, # and the image you would like to classify. MODEL_FILE = os.path.join(modelPath, 'newCaffeModel.prototxt') PRETRAINED = os.path.join(modelPath, 'newCaffeModel.caffemodel') # caffe.set_phase_test() caffe.set_mode_cpu() CAFFE_DIR = os.path.normpath( os.path.join(os.path.dirname(caffe.__file__), "..", "..")) net = caffe.Classifier( MODEL_FILE, PRETRAINED, mean=np.load( os.path.join( CAFFE_DIR, 'python/caffe/imagenet/ilsvrc_2012_mean.npy')).mean( 1).mean(1), channel_swap=(2, 1, 0), raw_scale=255, image_dims=(256, 256)) if os.path.isdir(ImagePath): for file_name in os.listdir(ImagePath): image_path = os.path.join(ImagePath, file_name) if os.path.isfile(image_path): tags = caffe_classify_image( net, image_path, new_labels_cells ) # NOTE: UNDEFINED NAME caffe_classify_image webResult = {} webResult[os.path.join(result_path, file_name)] = tags rs.publish( 'chat', json.dumps({ 'web_result': json.dumps(webResult), 'socketid': str(socketid) })) rs.publish( 'chat', json.dumps({ 'message': 'Classification completed. Thank you for using CloudCV', 'socketid': str(socketid) })) except: rs.publish( 'chat', json.dumps({ 'message': str(traceback.format_exc()), 'socketid': str(socketid) }))
def main(): parser = argparse.ArgumentParser() parser.add_argument("-m", "--modelname", type=str, required=True, help='Name of model without ".caffemodel" extension') parser.add_argument( "-t", "--testset", action='store_true', help='Evaluate on test set. If unspecified then val set.') parser.add_argument("-o", "--htmlout", action='store_true', help='output sentences as html to visually compare') parser.add_argument("-g", "--gold", action='store_true', help='groundtruth sentences for scoring/retrieval') args = parser.parse_args() # TODO: Input the snapshot directory, vocab path, frames (and sents) path DIR = './snapshots' VOCAB_FILE = './data/yt_coco_mvad_mpiimd_vocabulary.txt' FRAMEFEAT_FILE_PATTERN = './data/yt_allframes_vgg_fc7_{0}.txt' LSTM_NET_FILE = './s2vt.words_to_preds.deploy.prototxt' RESULTS_DIR = './results' MODEL_FILE = '%s/%s.caffemodel' % (DIR, args.modelname) SENTS_FILE = args.gold if args.gold else None # optional NET_TAG = args.modelname if DEVICE_ID >= 0: caffe.set_mode_gpu() caffe.set_device(DEVICE_ID) else: caffe.set_mode_cpu() print "Setting up LSTM NET" lstm_net = caffe.Net(LSTM_NET_FILE, MODEL_FILE, caffe.TEST) print "Done" nets = [lstm_net] STRATEGIES = [ { 'type': 'beam', 'beam_size': 1 }, ] NUM_OUT_PER_CHUNK = 30 START_CHUNK = 0 vocab_file = VOCAB_FILE DATASETS = [] # split_name, data_split_name, aligned if args.testset: DATASETS.append(('test', 'test', False)) else: DATASETS.append(('valid', 'val', False)) for split_name, data_split_name, aligned in DATASETS: filenames = [(FRAMEFEAT_FILE_PATTERN.format(data_split_name), SENTS_FILE)] fsg = fc7FrameSequenceGenerator(filenames, BUFFER_SIZE, vocab_file, max_words=MAX_WORDS, align=aligned, shuffle=False, pad=aligned, truncate=aligned) video_gt_pairs = all_video_gt_pairs(fsg) print 'Read %d videos pool feats' % len(fsg.vid_framefeats) NUM_CHUNKS = (len(fsg.vid_framefeats) / NUM_OUT_PER_CHUNK) + 1 eos_string = '<EOS>' # add english inverted vocab vocab_list = [eos_string] + fsg.vocabulary_inverted offset = 0 for c in range(START_CHUNK, NUM_CHUNKS): chunk_start = c * NUM_OUT_PER_CHUNK chunk_end = (c + 1) * NUM_OUT_PER_CHUNK chunk = video_gt_pairs.keys()[chunk_start:chunk_end] html_out_filename = '%s/%s.%s.%d_to_%d.html' % \ (RESULTS_DIR, data_split_name, NET_TAG, chunk_start, chunk_end) text_out_filename = '%s/%s.%s_' % \ (RESULTS_DIR, data_split_name, NET_TAG) if not os.path.exists(RESULTS_DIR): os.makedirs(RESULTS_DIR) outputs = run_pred_iters(lstm_net, chunk, video_gt_pairs, fsg, strategies=STRATEGIES, display_vocab=vocab_list) if args.htmlout: html_out = to_html_output(outputs, vocab_list) html_out_file = open(html_out_filename, 'w') html_out_file.write(html_out) html_out_file.close() text_out_types = to_text_output(outputs, vocab_list) for strat_type in text_out_types: text_out_fname = text_out_filename + strat_type + '.txt' text_out_file = open(text_out_fname, 'a') text_out_file.write(''.join(text_out_types[strat_type])) text_out_file.close() offset += NUM_OUT_PER_CHUNK print '(%d-%d) Appending to file: %s' % (chunk_start, chunk_end, text_out_fname)
def __init__(self, params): '''''' self.dataset = params['dataset'] self.image_path = params['database_images'] self.dimension = params['dimension'] self.layer = params['layer'] self.top_n = params['num_rerank'] self.reranking_path = params['reranking_path'] self.REG_BOXES = params['use_regressed_boxes'] self.pooling = params['pooling'] self.stage = params['stage'] self.N_QE = params['N_QE'] self.class_scores = params['use_class_scores'] print "\n\n" print "dataset:", self.dataset print "image_path:", self.image_path print "dimension:", self.dimension print "layer:", self.layer print "top_n:", self.top_n print "reranking_path:", self.reranking_path print "REG_BOXES:", self.REG_BOXES print "pooling:", self.pooling print "stage:", self.stage print "N_QE:", self.N_QE print "class_scores:", self.class_scores print "\n\n" with open(params['frame_list'], 'r') as f: self.database_list = f.read().splitlines() print "len of database:", len(self.database_list) print "example:", self.database_list[0] print "\n" with open(params['query_list'], 'r') as f: self.query_names = f.read().splitlines() print "len of queries:", len(self.query_names) print "example:", self.query_names[0] print "\n" # Distance type self.dist_type = params['distance'] # Where to store the rankings self.rankings_dir = params['rankings_dir'] print "dist_type:", self.dist_type print "\n" # Init network if params['gpu']: caffe.set_mode_gpu() caffe.set_device(0) else: caffe.set_mode_cpu() cfg.TEST.HAS_RPN = True self.net = caffe.Net(params['net_proto'], params['net'], caffe.TEST) self.queries = params['query_names'] # List of queries print "\n\n" for query in self.queries: print "query:", query print "len of queries:", len(self.queries) print "\n\n" if self.pooling is 'sum': # PCA Models if self.dataset is 'paris': pca_model_pkl = params['pca_model'] + '_oxford.pkl' elif self.dataset is 'oxford': pca_model_pkl = params['pca_model'] + '_paris.pkl' # Load self.pca = pickle.load(open(pca_model_pkl, 'rb')) print "pca_model_pkl:", pca_model_pkl print "\n\n" print "Init Done!" time.sleep(sleep_time)
def Inpt_OPT_New_Bias(original_prototxt_path, original_model_path, optimized_prototxt_path, new_model_path, mean_vector, scale, H, W, input_channel): net_param = caffe_pb2.NetParameter() with open(original_prototxt_path, 'rt') as f: Parse(f.read(), net_param) layer_num = len(net_param.layer) new_net_param = caffe_pb2.NetParameter() new_net_param.name = 'calc_new_bias' new_net_param.layer.add() new_net_param.layer[-1].name = "data" new_net_param.layer[-1].type = 'Input' new_net_param.layer[-1].top.append('data') new_net_param.layer[-1].input_param.shape.add() new_net_param.layer[-1].input_param.shape[-1].dim.append(1) new_net_param.layer[-1].input_param.shape[-1].dim.append( int(input_channel)) new_net_param.layer[-1].input_param.shape[-1].dim.append(int(H)) new_net_param.layer[-1].input_param.shape[-1].dim.append(int(W)) target_blob_name = '' target_layer_name = '' input_layer_type = ['Data', 'Input', 'AnnotatedData'] for layer_idx in range(0, layer_num): layer = net_param.layer[layer_idx] if layer.type not in input_layer_type: assert (layer.type == 'Convolution' or layer.type == 'InnerProduct' ), "## ERROR : First Layer MUST BE CONV or IP. ##" new_net_param.layer.extend([layer]) if layer.type == 'Convolution': try: assert ( new_net_param.layer[-1].convolution_param.pad[0] == 0 ), '## ERROR : MEAN cannot be mearged into CONV with padding > 0. ##' except: # padding not set pass target_blob_name = layer.top[0] target_layer_name = layer.name break new_proto_name = './tmpfile.prototxt' with open(new_proto_name, 'wt') as f: f.write(MessageToString(new_net_param)) caffe.set_mode_cpu() net = caffe.Net(new_proto_name, str(original_model_path), caffe.TEST) mean_array = mean_vector * (-1.0) * scale mean_array = mean_array.reshape(input_channel, 1) mean_array = np.tile(mean_array, (1, H * W)).reshape(1, input_channel, H, W) os.remove(new_proto_name) net.blobs['data'].data[...] = mean_array net.forward() mean_data = net.blobs[target_blob_name].data[...] mean_data = mean_data.reshape(mean_data.shape[1], mean_data.shape[2] * mean_data.shape[3]) new_bias = np.mean(mean_data, 1) print "INPUT PREPROCESS (SUB MEAN) OPT : Calc New Bias Done." del net caffe.set_mode_cpu() net = caffe.Net(original_prototxt_path, str(original_model_path), caffe.TEST) if len(net.params[target_layer_name]) == 2: # with bias net.params[target_layer_name][1].data[...] += new_bias[...] net.save(new_model_path) try: shutil.copyfile(original_prototxt_path, optimized_prototxt_path) except: # same file, not need to copy pass print "INPUT PREPROCESS (SUB MEAN) OPT : Merge Mean Done." print bcolors.OKGREEN + "INPUT PREPROCESS (SUB MEAN) OPT : Model at " + new_model_path + "." + bcolors.ENDC print bcolors.OKGREEN + "INPUT PREPROCESS (SUB MEAN) OPT : Prototxt at " + optimized_prototxt_path + "." + bcolors.ENDC print bcolors.WARNING + "INPUT PREPROCESS (SUB MEAN) OPT : ** WARNING ** Remember to set mean values to zero before test !!!" + bcolors.ENDC else: net_param = caffe_pb2.NetParameter() with open(original_prototxt_path, 'rt') as f: Parse(f.read(), net_param) layer_num = len(net_param.layer) for layer_idx in range(0, layer_num): layer = net_param.layer[layer_idx] if layer.name == target_layer_name: if layer.type == 'Convolution': net_param.layer[ layer_idx].convolution_param.bias_term = True else: net_param.layer[ layer_idx].inner_product_param.bias_term = True break with open(optimized_prototxt_path, 'wt') as f: f.write(MessageToString(net_param)) net_param_dict = net.params del net new_net = caffe.Net(optimized_prototxt_path, caffe.TEST) for param_name in net_param_dict.keys(): for i in range(0, len(net_param_dict[param_name])): new_net.params[param_name][i].data[ ...] = net_param_dict[param_name][i].data[...] new_net.params[target_layer_name][1].data[...] = new_bias[...] new_net.save(new_model_path) print "INPUT PREPROCESS (SUB MEAN) OPT : Merge Mean Done." print bcolors.OKGREEN + "INPUT PREPROCESS (SUB MEAN) OPT : Model at " + new_model_path + "." + bcolors.ENDC print bcolors.OKGREEN + "INPUT PREPROCESS (SUB MEAN) OPT : Prototxt at " + optimized_prototxt_path + "." + bcolors.ENDC print bcolors.WARNING + "INPUT PREPROCESS (SUB MEAN) OPT : ** WARNING ** Remember to set mean values to zero before test !!!" + bcolors.ENDC
def AFFine_OPT_Create_Caffemodel(original_prototxt_path, original_model_path, optimized_prototxt_path, new_model_path): net_param = caffe_pb2.NetParameter() with open(original_prototxt_path, 'rt') as f: Parse(f.read(), net_param) param_layer_type_list = [layer.type for layer in net_param.layer] param_layer_name_list = [layer.name for layer in net_param.layer] target_layer_type = ['Convolution', 'InnerProduct'] merge_layer_type = ['Scale', 'BatchNorm'] caffe.set_mode_cpu() net = caffe.Net(original_prototxt_path, original_model_path, caffe.TEST) net_param_dict = net.params del net new_net = caffe.Net(optimized_prototxt_path, caffe.TEST) for param_name in new_net.params.keys(): param_layer_idx = param_layer_name_list.index(param_name) param_layer_type = param_layer_type_list[param_layer_idx] if param_layer_type not in target_layer_type: # OTHER LAYERS for i in range(0, len(net_param_dict[param_name])): new_net.params[param_name][i].data[ ...] = net_param_dict[param_name][i].data[...] else: kernel_num = net_param_dict[param_name][0].num new_net.params[param_name][0].data[ ...] = net_param_dict[param_name][0].data[...] if len(net_param_dict[param_name]) == 2: new_net.params[param_name][1].data[ ...] = net_param_dict[param_name][1].data[...] #else: # print new_net.params[param_name][1].data[...] if param_layer_idx + 1 < len(param_layer_type_list): for i in range(param_layer_idx + 1, len(param_layer_type_list)): # CHECK : CONV + BN +SCALE / CONV + BN / IP + ... affine_layer_type = param_layer_type_list[i] affine_layer_name = param_layer_name_list[i] if affine_layer_type in merge_layer_type: # MERGE BN/SCALE if affine_layer_type == "Scale": if len(net_param.layer[i].bottom) >= 2: # NOT In-place Scale try: for j in range( 0, len(net_param_dict[ affine_layer_name])): new_net.params[affine_layer_name][ j].data[...] = net_param_dict[ affine_layer_name][j].data[...] except: # no parameter break else: # In-place Scale scale = net_param_dict[affine_layer_name][ 0].data if len(net_param_dict[affine_layer_name]) == 2: bias = net_param_dict[affine_layer_name][ 1].data else: bias = 0.0 * scale for k in range(0, kernel_num): new_net.params[param_name][0].data[ k] = new_net.params[param_name][ 0].data[k] * scale[k] new_net.params[param_name][1].data[ k] = new_net.params[param_name][ 1].data[k] * scale[k] + bias[k] elif affine_layer_type == "BatchNorm": epsilon = 1e-5 scale = net_param_dict[affine_layer_name][2].data[ 0] # print scale if scale != 0: mean = net_param_dict[affine_layer_name][ 0].data / scale std = np.sqrt( net_param_dict[affine_layer_name][1].data / scale + epsilon) else: mean = net_param_dict[affine_layer_name][ 0].data std = np.sqrt( net_param_dict[affine_layer_name][1].data + epsilon) for k in range(0, kernel_num): new_net.params[param_name][0].data[ k] = new_net.params[param_name][0].data[ k] / std[k] new_net.params[param_name][1].data[k] = ( new_net.params[param_name][1].data[k] - mean[k]) / std[k] else: # TODO assert ( 1 > 2 ), "## TODO ## : Other layers haven't been supported yet. ##" else: # NOT BN or SCALE, then BREAK break else: # LAST LAYER, then BREAK break new_net.save(new_model_path) print bcolors.OKGREEN + "BN SCALE OPT : Model at " + new_model_path + "." + bcolors.ENDC
def __init__(self, settings, key_bindings): super(CaffeVisApp, self).__init__(settings, key_bindings) print 'Got settings', settings self.settings = settings self.bindings = key_bindings self._net_channel_swap = settings.caffe_net_channel_swap if self._net_channel_swap is None: self._net_channel_swap_inv = None else: self._net_channel_swap_inv = tuple([ self._net_channel_swap.index(ii) for ii in range(len(self._net_channel_swap)) ]) self._range_scale = 1.0 # not needed; image already in [0,255] # Set the mode to CPU or GPU. Note: in the latest Caffe # versions, there is one Caffe object *per thread*, so the # mode must be set per thread! Here we set the mode for the # main thread; it is also separately set in CaffeProcThread. sys.path.insert(0, os.path.join(settings.caffevis_caffe_root, 'python')) import caffe if settings.caffevis_mode_gpu: caffe.set_mode_gpu() print 'CaffeVisApp mode (in main thread): GPU' else: caffe.set_mode_cpu() print 'CaffeVisApp mode (in main thread): CPU' self.net = caffe.Classifier( settings.caffevis_deploy_prototxt, settings.caffevis_network_weights, mean= None, # Set to None for now, assign later # self._data_mean, channel_swap=self._net_channel_swap, raw_scale=self._range_scale, ) if isinstance(settings.caffevis_data_mean, basestring): # If the mean is given as a filename, load the file try: filename, file_extension = os.path.splitext( settings.caffevis_data_mean) if file_extension == ".npy": # load mean from numpy array self._data_mean = np.load(settings.caffevis_data_mean) print "Loaded mean from numpy file, data_mean.shape: ", self._data_mean.shape elif file_extension == ".binaryproto": # load mean from binary protobuf file blob = caffe.proto.caffe_pb2.BlobProto() data = open(settings.caffevis_data_mean, 'rb').read() blob.ParseFromString(data) self._data_mean = np.array( caffe.io.blobproto_to_array(blob)) self._data_mean = np.squeeze(self._data_mean) print "Loaded mean from binaryproto file, data_mean.shape: ", self._data_mean.shape else: # unknown file extension, trying to load as numpy array self._data_mean = np.load(settings.caffevis_data_mean) print "Loaded mean from numpy file, data_mean.shape: ", self._data_mean.shape except IOError: print '\n\nCound not load mean file:', settings.caffevis_data_mean print 'Ensure that the values in settings.py point to a valid model weights file, network' print 'definition prototxt, and mean. To fetch a default model and mean file, use:\n' print '$ cd models/caffenet-yos/' print '$ ./fetch.sh\n\n' raise input_shape = self.net.blobs[self.net.inputs[0]].data.shape[ -2:] # e.g. 227x227 # Crop center region (e.g. 227x227) if mean is larger (e.g. 256x256) excess_h = self._data_mean.shape[1] - input_shape[0] excess_w = self._data_mean.shape[2] - input_shape[1] assert excess_h >= 0 and excess_w >= 0, 'mean should be at least as large as %s' % repr( input_shape) self._data_mean = self._data_mean[:, (excess_h / 2):(excess_h / 2 + input_shape[0]), (excess_w / 2):(excess_w / 2 + input_shape[1])] elif settings.caffevis_data_mean is None: self._data_mean = None else: # The mean has been given as a value or a tuple of values self._data_mean = np.array(settings.caffevis_data_mean) # Promote to shape C,1,1 while len(self._data_mean.shape) < 1: self._data_mean = np.expand_dims(self._data_mean, -1) #if not isinstance(self._data_mean, tuple): # # If given as int/float: promote to tuple # self._data_mean = tuple(self._data_mean) if self._data_mean is not None: self.net.transformer.set_mean(self.net.inputs[0], self._data_mean) check_force_backward_true(settings.caffevis_deploy_prototxt) self.labels = None if self.settings.caffevis_labels: self.labels = read_label_file(self.settings.caffevis_labels) self.proc_thread = None self.jpgvis_thread = None self.handled_frames = 0 if settings.caffevis_jpg_cache_size < 10 * 1024**2: raise Exception( 'caffevis_jpg_cache_size must be at least 10MB for normal operation.' ) self.img_cache = FIFOLimitedArrayCache( settings.caffevis_jpg_cache_size) self._populate_net_layer_info()
def main(argv): parser = argparse.ArgumentParser() # Required arguments: input and output files. parser.add_argument( "input_file", help="Input image, directory" ) parser.add_argument( "feature_file", help="Feature mat filename." ) parser.add_argument( "score_file", help="Score Output mat filename." ) # Optional arguments. parser.add_argument( "--model_def", default=os.path.join( "./models/market1501/caffenet/feature.proto"), help="Model definition file." ) parser.add_argument( "--pretrained_model", default=os.path.join( "./models/market1501/caffenet/caffenet_iter_17000.caffemodel"), help="Trained model weights file." ) parser.add_argument( "--gpu", type=int, default=-1, help="Switch for gpu computation." ) parser.add_argument( "--center_only", action='store_true', help="Switch for prediction from center crop alone instead of " + "averaging predictions across crops (default)." ) parser.add_argument( "--images_dim", default='256,256', help="Canonical 'height,width' dimensions of input images." ) parser.add_argument( "--mean_value", default=os.path.join( 'examples/market1501/market1501_mean.binaryproto'), help="Data set image mean of [Channels x Height x Width] dimensions " + "(numpy array). Set to '' for no mean subtraction." ) parser.add_argument( "--input_scale", type=float, help="Multiply input features by this scale to finish preprocessing." ) parser.add_argument( "--raw_scale", type=float, default=255.0, help="Multiply raw input by this scale before preprocessing." ) parser.add_argument( "--channel_swap", default='2,1,0', help="Order to permute input channels. The default converts " + "RGB -> BGR since BGR is the Caffe default by way of OpenCV." ) parser.add_argument( "--ext", default='jpg', help="Image file extension to take as input when a directory " + "is given as the input file." ) parser.add_argument( "--feature_name", default="fc7", help="feature blob name." ) parser.add_argument( "--score_name", default="prediction", help="prediction score blob name." ) args = parser.parse_args() image_dims = [int(s) for s in args.images_dim.split(',')] channel_swap = None if args.channel_swap: channel_swap = [int(s) for s in args.channel_swap.split(',')] mean_value = None if args.mean_value: mean_value = [float(s) for s in args.mean_value.split(',')] mean_value = np.array(mean_value) if args.gpu >= 0: caffe.set_mode_gpu() caffe.set_device(args.gpu) print("GPU mode, device : {}".format(args.gpu)) else: caffe.set_mode_cpu() print("CPU mode") # Make classifier. classifier = SClassifier(args.model_def, args.pretrained_model, image_dims=image_dims, mean_value=mean_value, input_scale=args.input_scale, raw_scale=args.raw_scale, channel_swap=channel_swap) # Load numpy, directory glob (*.jpg), or image file. args.input_file = os.path.expanduser(args.input_file) if args.input_file.endswith(args.ext): print("Loading file: %s" % args.input_file) inputs = [caffe.io.load_image(args.input_file)] labels = [-1] elif os.path.isdir(args.input_file): print("Loading folder: %s" % args.input_file) inputs =[caffe.io.load_image(im_f) for im_f in glob.glob(args.input_file + '/*.' + args.ext)] labels = [-1 for _ in xrange(len(inputs))] else: ## Image List Files print("Loading file: %s" % args.input_file) img_files, labels = load_txt(args.input_file) inputs = [caffe.io.load_image(im_f) for im_f in img_files] print("Classifying %d inputs." % len(inputs)) # Classify. ok = 0.0 save_feature = None save_score = None start_time = time.time() epoch_time = AverageMeter() for idx, _input in enumerate(inputs): _ = classifier.predict([_input], not args.center_only) feature = classifier.get_blob_data(args.feature_name) score = classifier.get_blob_data(args.score_name) assert (feature.shape[0] == 1 and score.shape[0] == 1) feature_shape = feature.shape score_shape = score.shape if save_feature is None: print('feature : {} : {}'.format(args.feature_name, feature_shape)) save_feature = np.zeros((len(inputs), feature.size),dtype=np.float32) save_feature[idx, :] = feature.reshape(1, feature.size) if save_score is None: print('score : {} : {}'.format(args.score_name, score_shape)) save_score = np.zeros((len(inputs), score.size),dtype=np.float32) save_score[idx, :] = score.reshape(1, score.size) mx_idx = np.argmax(score.view()) ok = ok + int(int(mx_idx) == int(labels[idx])) epoch_time.update(time.time() - start_time) start_time = time.time() need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (len(inputs)-idx-1)) need_time = '{:02d}:{:02d}:{:02d}'.format(need_hour, need_mins, need_secs) print("{:5d} / {:5d} images, need {:s}. [PRED: {:3d}] vs [OK: {:3d}] accuracy: {:.4f} = good: {:5d} bad: {:5d}".format( \ idx+1, len(inputs), need_time, mx_idx, labels[idx], ok/(idx+1), int(ok), idx+1-int(ok))) # Save if (args.feature_file): print("Saving feature into %s" % args.feature_file) sio.savemat(args.feature_file, {'feature':save_feature}) else: print("Without saving feature") if (args.score_file): print("Saving score into %s" % args.score_file) sio.savemat(args.score_file, {'feature':save_score}) else: print("Without saving score")
def run(self): print('CaffeProcThread.run called') frame = None import caffe # Set the mode to CPU or GPU. Note: in the latest Caffe # versions, there is one Caffe object *per thread*, so the # mode must be set per thread! Here we set the mode for the # CaffeProcThread thread; it is also set in the main thread. if self.mode_gpu: caffe.set_mode_gpu() print('CaffeVisApp mode (in CaffeProcThread): GPU') else: caffe.set_mode_cpu() print('CaffeVisApp mode (in CaffeProcThread): CPU') while not self.is_timed_out(): with self.state.lock: if self.state.quit: #print 'CaffeProcThread.run: quit is True' #print self.state.quit break #print 'CaffeProcThread.run: caffe_net_state is:', self.state.caffe_net_state #print 'CaffeProcThread.run loop: next_frame: %s, caffe_net_state: %s, back_enabled: %s' % ( # 'None' if self.state.next_frame is None else 'Avail', # self.state.caffe_net_state, # self.state.back_enabled) frame = None run_fwd = False run_back = False if self.state.caffe_net_state == 'free' and time.time( ) - self.state.last_key_at > self.pause_after_keys: frame = self.state.next_frame self.state.next_frame = None back_enabled = self.state.back_enabled back_mode = self.state.back_mode back_stale = self.state.back_stale backprop_layer_def = self.state.get_current_backprop_layer_definition( ) backprop_unit = self.state.backprop_unit # Forward should be run for every new frame run_fwd = (frame is not None) # Backward should be run if back_enabled and (there was a new frame OR back is stale (new backprop layer/unit selected)) run_back = (back_enabled and (run_fwd or back_stale)) self.state.caffe_net_state = 'proc' if ( run_fwd or run_back) else 'free' #print 'run_fwd,run_back =', run_fwd, run_back if run_fwd: #print 'TIMING:, processing frame' self.frames_processed_fwd += 1 if self.settings.is_siamese and ((type(frame), len(frame)) == (tuple, 2)): im_small = self.state.convert_image_pair_to_network_input_format( self.settings, frame, self.input_dims) else: im_small = resize_without_fit(frame, self.input_dims) with WithTimer('CaffeProcThread:forward', quiet=self.debug_level < 1): net_preproc_forward(self.settings, self.net, im_small, self.input_dims) if run_back: if back_mode == BackpropMode.GRAD: with WithTimer('CaffeProcThread:backward', quiet=self.debug_level < 1): self.state.backward_from_layer(self.net, backprop_layer_def, backprop_unit) elif back_mode == BackpropMode.DECONV_ZF: with WithTimer('CaffeProcThread:deconv', quiet=self.debug_level < 1): self.state.deconv_from_layer(self.net, backprop_layer_def, backprop_unit, 'Zeiler & Fergus') elif back_mode == BackpropMode.DECONV_GB: with WithTimer('CaffeProcThread:deconv', quiet=self.debug_level < 1): self.state.deconv_from_layer(self.net, backprop_layer_def, backprop_unit, 'Guided Backprop') with self.state.lock: self.state.back_stale = False if run_fwd or run_back: with self.state.lock: self.state.caffe_net_state = 'free' self.state.drawing_stale = True now = time.time() if self.last_process_finished_at: self.last_process_elapsed = now - self.last_process_finished_at self.last_process_finished_at = now else: time.sleep(self.loop_sleep) print('CaffeProcThread.run: finished') print('CaffeProcThread.run: processed %d frames fwd, %d frames back' % (self.frames_processed_fwd, self.frames_processed_back))
def main(): parser = argparse.ArgumentParser() parser.add_argument("-m", "--modelname", type=str, required=True, help='Name of model without ".caffemodel" extension') parser.add_argument( "-t", "--testset", action='store_true', help='Evaluate on test set. If unspecified then val set.') parser.add_argument("-o", "--htmlout", action='store_true', help='output sentences as html to visually compare') parser.add_argument("-g", "--gold", action='store_true', help='groundtruth sentences for scoring/retrieval') parser.add_argument("-s", "--snapshots", type=str, help='the snapshot directory') parser.add_argument("-vc", "--vocab", type=str, help='vocabulary path') parser.add_argument("-f", "--frames", type=str, help='frames path') args = parser.parse_args() snap_dir = args.snapshots if args.snapshots else './utils/data' vocab_file = args.vocab if args.vocab else './utils/data/yt_coco_mvad_mpiimd_vocabulary.txt' frame_feat_file = args.frames if args.frames else './utils/data/yt_allframes_vgg_fc7_{0}.txt' lstm_net_file = './utils/data/s2vt.words_to_preds.deploy.prototxt' results_dir = './utils/data/results' model_file = '%s/%s.caffemodel' % (snap_dir, args.modelname) sents_file = args.gold if args.gold else None # optional net_tag = args.modelname if DEVICE_ID >= 0: caffe.set_mode_gpu() caffe.set_device(DEVICE_ID) else: caffe.set_mode_cpu() print("Setting up LSTM NET") lstm_net = caffe.Net(lstm_net_file, model_file, caffe.TEST) print("Done") nets = [lstm_net] strategies = [ { 'type': 'beam', 'beam_size': 1 }, ] num_out_per_chunk = 30 start_chunk = 0 data_sets = [] # split_name, data_split_name, aligned if args.testset: data_sets.append(('test', 'test', False)) else: data_sets.append(('valid', 'val', False)) for split_name, data_split_name, aligned in data_sets: file_names = [(frame_feat_file.format(data_split_name), sents_file)] fsg = fc7FrameSequenceGenerator(file_names, BUFFER_SIZE, vocab_file, max_words=MAX_WORDS, align=aligned, shuffle=False, pad=aligned, truncate=aligned) video_gt_pairs = all_video_gt_pairs(fsg) print('Read %d videos pool feats' % len(fsg.vid_framefeats)) num_chunks = (len(fsg.vid_framefeats) / num_out_per_chunk) + 1 eos_string = '<EOS>' # add english inverted vocab vocab_list = [eos_string] + fsg.vocabulary_inverted offset = 0 for c in range(start_chunk, int(num_chunks)): chunk_start = c * num_out_per_chunk chunk_end = (c + 1) * num_out_per_chunk chunk = video_gt_pairs.keys()[chunk_start:chunk_end] html_out_filename = '%s/%s.%s.%d_to_%d.html' % \ (results_dir, data_split_name, net_tag, chunk_start, chunk_end) text_out_filename = '%s/%s.%s_' % \ (results_dir, data_split_name, net_tag) if not os.path.exists(results_dir): os.makedirs(results_dir) outputs = run_pred_iters(lstm_net, chunk, video_gt_pairs, fsg, strategies=strategies, display_vocab=vocab_list) if args.htmlout: html_out = to_html_output(outputs, vocab_list) html_out_file = open(html_out_filename, 'w') html_out_file.write(html_out) html_out_file.close() text_out_types = to_text_output(outputs, vocab_list) text_out_fname = '' for strat_type in text_out_types: text_out_fname = text_out_filename + strat_type + '.txt' text_out_file = open(text_out_fname, 'a') text_out_file.write(''.join(text_out_types[strat_type])) text_out_file.close() offset += num_out_per_chunk print('(%d-%d) Appending to file: %s' % (chunk_start, chunk_end, text_out_fname))
def run(self): print('CaffeProcThread.run called') frame = None import caffe # Set the mode to CPU or GPU. Note: in the latest Caffe # versions, there is one Caffe object *per thread*, so the # mode must be set per thread! Here we set the mode for the # CaffeProcThread thread; it is also set in the main thread. if self.mode_gpu: caffe.set_mode_gpu() print('CaffeVisApp mode (in CaffeProcThread): GPU') else: caffe.set_mode_cpu() print('CaffeVisApp mode (in CaffeProcThread): CPU') while not self.is_timed_out(): with self.state.lock: if self.state.quit: #print 'CaffeProcThread.run: quit is True' #print self.state.quit break #print 'CaffeProcThread.run: caffe_net_state is:', self.state.caffe_net_state #print 'CaffeProcThread.run loop: next_frame: %s, caffe_net_state: %s, back_enabled: %s' % ( # 'None' if self.state.next_frame is None else 'Avail', # self.state.caffe_net_state, # self.state.back_enabled) frame = None run_fwd = False run_back = False if self.state.caffe_net_state == 'free' and time.time() - self.state.last_key_at > self.pause_after_keys: frame = self.state.next_frame self.state.next_frame = None back_enabled = self.state.back_enabled back_mode = self.state.back_mode back_stale = self.state.back_stale #state_layer = self.state.layer #selected_unit = self.state.selected_unit backprop_layer = self.state.backprop_layer backprop_unit = self.state.backprop_unit # Forward should be run for every new frame run_fwd = (frame is not None) # Backward should be run if back_enabled and (there was a new frame OR back is stale (new backprop layer/unit selected)) run_back = (back_enabled and (run_fwd or back_stale)) self.state.caffe_net_state = 'proc' if (run_fwd or run_back) else 'free' #print 'run_fwd,run_back =', run_fwd, run_back if run_fwd: #print 'TIMING:, processing frame' self.frames_processed_fwd += 1 if self.settings.static_files_input_mode == "siamese_image_list": frame1 = frame[0] frame2 = frame[1] im_small1 = cv2.resize(frame1, self.input_dims) im_small2 = cv2.resize(frame2, self.input_dims) im_small = np.concatenate( (im_small1, im_small2), axis=2) else: im_small = cv2.resize(frame, self.input_dims) with WithTimer('CaffeProcThread:forward', quiet = self.debug_level < 1): net_preproc_forward(self.settings, self.net, im_small, self.input_dims) if run_back: diffs = self.net.blobs[backprop_layer].diff * 0 diffs[0][backprop_unit] = self.net.blobs[backprop_layer].data[0,backprop_unit] assert back_mode in ('grad', 'deconv') if back_mode == 'grad': with WithTimer('CaffeProcThread:backward', quiet = self.debug_level < 1): #print '**** Doing backprop with %s diffs in [%s,%s]' % (backprop_layer, diffs.min(), diffs.max()) try: self.net.backward_from_layer(backprop_layer, diffs, zero_higher = True) except AttributeError: print('ERROR: required bindings (backward_from_layer) not found! Try using the deconv-deep-vis-toolbox branch as described here: https://github.com/yosinski/deep-visualization-toolbox') raise else: with WithTimer('CaffeProcThread:deconv', quiet = self.debug_level < 1): #print '**** Doing deconv with %s diffs in [%s,%s]' % (backprop_layer, diffs.min(), diffs.max()) try: self.net.deconv_from_layer(backprop_layer, diffs, zero_higher = True) except AttributeError: print('ERROR: required bindings (deconv_from_layer) not found! Try using the deconv-deep-vis-toolbox branch as described here: https://github.com/yosinski/deep-visualization-toolbox') raise with self.state.lock: self.state.back_stale = False if run_fwd or run_back: with self.state.lock: self.state.caffe_net_state = 'free' self.state.drawing_stale = True now = time.time() if self.last_process_finished_at: self.last_process_elapsed = now - self.last_process_finished_at self.last_process_finished_at = now else: time.sleep(self.loop_sleep) print('CaffeProcThread.run: finished') print('CaffeProcThread.run: processed %d frames fwd, %d frames back' % (self.frames_processed_fwd, self.frames_processed_back))
def main(argv): pycaffe_dir = os.path.dirname(__file__) parser = argparse.ArgumentParser() # Required arguments: input and output files. parser.add_argument("input_file", help="Input image, directory, or npy.") parser.add_argument("output_file", help="Output npy filename.") # Optional arguments. parser.add_argument( "--model_def", default=os.path.join( pycaffe_dir, "../models/bvlc_reference_caffenet/deploy.prototxt"), help="Model definition file.") parser.add_argument( "--pretrained_model", default=os.path.join( pycaffe_dir, "../models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel" ), help="Trained model weights file.") parser.add_argument("--gpu", action='store_true', help="Switch for gpu computation.") parser.add_argument( "--center_only", action='store_true', help="Switch for prediction from center crop alone instead of " + "averaging predictions across crops (default).") parser.add_argument( "--images_dim", default='256,256', help="Canonical 'height,width' dimensions of input images.") parser.add_argument( "--mean_file", default=os.path.join(pycaffe_dir, 'caffe/imagenet/ilsvrc_2012_mean.npy'), help="Data set image mean of [Channels x Height x Width] dimensions " + "(numpy array). Set to '' for no mean subtraction.") parser.add_argument( "--input_scale", type=float, help="Multiply input features by this scale to finish preprocessing.") parser.add_argument( "--raw_scale", type=float, default=255.0, help="Multiply raw input by this scale before preprocessing.") parser.add_argument( "--channel_swap", default='2,1,0', help="Order to permute input channels. The default converts " + "RGB -> BGR since BGR is the Caffe default by way of OpenCV.") parser.add_argument( "--ext", default='jpg', help="Image file extension to take as input when a directory " + "is given as the input file.") args = parser.parse_args() image_dims = [int(s) for s in args.images_dim.split(',')] mean, channel_swap = None, None if args.mean_file: mean = np.load(args.mean_file) mean = mean.mean(1).mean(1) if args.channel_swap: channel_swap = [int(s) for s in args.channel_swap.split(',')] if args.gpu: caffe.set_mode_gpu() print("GPU mode") else: caffe.set_mode_cpu() print("CPU mode") # Make classifier. classifier = caffe.Classifier(args.model_def, args.pretrained_model, image_dims=image_dims, mean=mean, input_scale=args.input_scale, raw_scale=args.raw_scale, channel_swap=channel_swap) # Load numpy array (.npy), directory glob (*.jpg), or image file. args.input_file = os.path.expanduser(args.input_file) if args.input_file.endswith('npy'): print("Loading file: %s" % args.input_file) inputs = np.load(args.input_file) elif os.path.isdir(args.input_file): print("Loading folder: %s" % args.input_file) inputs = [ caffe.io.load_image(im_f) for im_f in glob.glob(args.input_file + '/*.' + args.ext) ] else: print("Loading file: %s" % args.input_file) inputs = [caffe.io.load_image(args.input_file)] print("Classifying %d inputs." % len(inputs)) # Classify. start = time.time() predictions = classifier.predict(inputs, not args.center_only) print("Done in %.2f s." % (time.time() - start)) imagenet_labels_filename = '../data/ilsvrc12/synset_words.txt' labels = np.loadtxt(imagenet_labels_filename, str, delimiter='\t') top_k = predictions.flatten().argsort()[-1:-6:-1] for i in np.arange(top_k.size): print top_k[i], labels[top_k[i]] # Save print("Saving results into %s" % args.output_file) np.save(args.output_file, predictions)
def mat2feat(stimulus, layer, phrase): """Get features of `layer` derived from CNN.""" caffe.set_mode_cpu() #caffe.set_mode_gpu() model_dir = r'/nfs/diskstation/workshop/huanglijie/caffe_models' # reorder the data shape: to NxHxWxC stimulus = np.transpose(stimulus, (0, 3, 2, 1)) print 'stimulus size :', stimulus.shape stim_len = stimulus.shape[0] if phrase=='train': part = 10 else: part = 1 unit = stim_len / part for i in range(part): # resize to 227 x 227 input_ = np.zeros((unit, 227, 227, 3), dtype=np.float32) print 'input size :', input_.shape print 'Resize input image ...' for ix, im in enumerate(stimulus[(i*unit):(i+1)*unit]): input_[ix] = caffe.io.resize_image(im.astype(np.float32),(227, 227)) # reorder the data shape: to NxCxHxW input_ = np.transpose(input_, (0, 3, 1, 2)) # RGB to BGR input_ = input_[:, ::-1] # substract mean mean_file = os.path.join(model_dir, 'python', 'caffe', 'imagenet', 'ilsvrc_2012_mean.npy') mean_im = np.load(mean_file) # take center crop center = np.array((256, 256)) / 2.0 crop = np.tile(center, (1, 2))[0] + np.concatenate( [-np.array([227, 227]) / 2.0, np.array([227, 227]) / 2.0]) crop = crop.astype(int) mean_im = mean_im[:, crop[0]:crop[2], crop[1]:crop[3]] mean_im = np.expand_dims(mean_im, 0) input_ -= mean_im # feedforward caffenet_dir =os.path.join(model_dir,'models','bvlc_reference_caffenet') caffenet = caffe.Net(os.path.join(caffenet_dir, 'deploy.prototxt'), os.path.join(caffenet_dir,'bvlc_reference_caffenet.caffemodel'), caffe.TEST) feat_s = caffenet.blobs[layer].data.shape if len(feat_s)>2: feat = np.zeros((input_.shape[0], feat_s[1]*feat_s[2]*feat_s[3]), dtype=np.float32) else: feat = np.zeros((input_.shape[0], feat_s[1]), dtype=np.float32) batch_unit = input_.shape[0] / 10 for j in range(batch_unit): batch_input = input_[(j*10):(j+1)*10] caffenet.forward(data=batch_input) tmp = np.copy(caffenet.blobs[layer].data) feat[(j*10):(j+1)*10] = tmp.reshape(10, -1) del caffenet if phrase=='val': np.save('%s_sti_%s.npy'%(layer, phrase), feat) else: np.save('%s_sti_%s_%s.npy'%(layer, phrase, i), feat)
def __init__(self, *argc, **argv): super(gTree, self).__init__() try : self.size_mstack = argv['size_mstack'] except : raise "memory stack size is not provided!" #setup CPU computations caffe.set_mode_cpu()
def main(): # set the filter of the video -- VSCO! still not working maybe later # here to try the method to moving the I/O blocking operations # to a separate thread and maitaining a queue of decoded frames # in an effort to improve FPS # .read() method is a blocking I/O operation camera = PiCamera() camera.resolution = (352, 240) camera.framerate = 32 rawCapture = PiRGBArray(camera, size=(352, 240)) stream = camera.capture_continuous(rawCapture, format="bgr", use_video_port=True) camera.close() vs = PiVideoStream().start() time.sleep(2.0) fps = FPS().start() minsize = 20 caffe_model_path = "./model" threshold = [0.6, 0.7, 0.7] #initial threshold: 0.6 0.7 0.7 factor = 0.709 caffe.set_mode_cpu() PNet = caffe.Net(caffe_model_path + "/det1.prototxt", caffe_model_path + "/det1.caffemodel", caffe.TEST) RNet = caffe.Net(caffe_model_path + "/det2.prototxt", caffe_model_path + "/det2.caffemodel", caffe.TEST) ONet = caffe.Net(caffe_model_path + "/det3.prototxt", caffe_model_path + "/det3.caffemodel", caffe.TEST) while True: start = timer() print("---------------------------------------------") frame = vs.read() #frame = imutils.resize(frame, width=400) #do we need to do the resize? # convert the frame to gray scale and restore the BGR info grayFrame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) restore = cv2.cvtColor(grayFrame, cv2.COLOR_GRAY2BGR) img = restore #img = frame img_matlab = img.copy() tmp = img_matlab[:, :, 2].copy() img_matlab[:, :, 2] = img_matlab[:, :, 0] img_matlab[:, :, 0] = tmp # check rgb position #tic() boundingboxes, points = detect_face(img_matlab, minsize, PNet, RNet, ONet, threshold, False, factor) #toc() img = drawBoxes(frame, boundingboxes) for i in points: for j in range(5): cv2.circle(img, (i[j], i[j + 5]), 1, (0, 255, 0), -1) cv2.namedWindow('cam', cv2.WINDOW_NORMAL) cv2.resizeWindow('cam', 960, 720) cv2.imshow('cam', img) if cv2.waitKey(1) & 0xFF == ord('q'): break end = timer() print("Total time:", end - start) fps.update() #When everything's done, release capture #cap.release() cv2.destroyAllWindows() vs.stop() vs.update()
def main(argv): pycaffe_dir = os.path.dirname(__file__) parser = argparse.ArgumentParser() # Required arguments: input and output files. parser.add_argument("input_file", help="Input image, directory, or npy.") parser.add_argument("output_file", help="Output npy filename.") # Optional arguments. parser.add_argument( "--model_def", default=os.path.join( pycaffe_dir, "../models/bvlc_reference_caffenet/deploy.prototxt"), help="Model definition file.") parser.add_argument( "--pretrained_model", default=os.path.join( pycaffe_dir, "../models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel" ), help="Trained model weights file.") parser.add_argument("--gpu", action='store_true', help="Switch for gpu computation.") parser.add_argument( "--center_only", action='store_true', help="Switch for prediction from center crop alone instead of " + "averaging predictions across crops (default).") parser.add_argument( "--images_dim", default='256,256', help="Canonical 'height,width' dimensions of input images.") parser.add_argument( "--mean_file", default=os.path.join(pycaffe_dir, 'caffe/imagenet/ilsvrc_2012_mean.npy'), help="Data set image mean of [Channels x Height x Width] dimensions " + "(numpy array). Set to '' for no mean subtraction.") parser.add_argument( "--input_scale", type=float, help="Multiply input features by this scale to finish preprocessing.") parser.add_argument( "--raw_scale", type=float, default=255.0, help="Multiply raw input by this scale before preprocessing.") parser.add_argument( "--channel_swap", default='2,1,0', help="Order to permute input channels. The default converts " + "RGB -> BGR since BGR is the Caffe default by way of OpenCV.") parser.add_argument( "--ext", default='jpg', help="Image file extension to take as input when a directory " + "is given as the input file.") parser.add_argument("--labels_file", default=os.path.join( pycaffe_dir, "../data/ilsvrc12/synset_words.txt"), help="Readable label definition file.") parser.add_argument( "--print_results", action='store_true', help="Write output text to stdout rather than serializing to a file.") parser.add_argument( "--force_grayscale", action='store_true', help="Converts RGB images down to single-channel grayscale versions," + "useful for single-channel networks like MNIST.") args = parser.parse_args() image_dims = [int(s) for s in args.images_dim.split(',')] mean, channel_swap = None, None if args.force_grayscale: channel_swap = None mean_file = None else: channel_swap = [int(s) for s in args.channel_swap.split(',')] mean_file = args.mean_file # Make classifier. classifier = caffe.Classifier(args.model_def, args.pretrained_model, image_dims=image_dims, mean=mean, input_scale=args.input_scale, raw_scale=args.raw_scale, channel_swap=channel_swap) if args.gpu: #caffe.set_mode_gpu() print("GPU mode") else: caffe.set_mode_cpu() print("CPU mode") # Load numpy array (.npy), directory glob (*.jpg), or image file. args.input_file = os.path.expanduser(args.input_file) if args.input_file.endswith('npy'): print("Loading file: %s" % args.input_file) inputs = np.load(args.input_file) elif os.path.isdir(args.input_file): print("Loading folder: %s" % args.input_file) inputs = [ caffe.io.load_image(im_f) for im_f in glob.glob(args.input_file + '/*.' + args.ext) ] else: print("Loading file: %s" % args.input_file) inputs = [caffe.io.load_image(args.input_file)] if args.force_grayscale: inputs = [rgb2gray(input) for input in inputs] print("Classifying %d inputs." % len(inputs)) # Classify. start = time.time() predictions = classifier.predict(inputs, not args.center_only) scores = classifier.predict(inputs, not args.center_only).flatten() print("Done in %.2f s." % (time.time() - start)) if args.print_results: with open(args.labels_file) as f: labels_df = pd.DataFrame([{ 'synset_id': l.strip().split(' ')[0], 'name': ' '.join(l.strip().split(' ')[1:]).split(',')[0] } for l in f.readlines()]) labels = labels_df.sort('synset_id')['name'].values indices = (-scores).argsort()[:5] predictions = labels[indices] meta = [(p, '%.5f' % scores[i]) for i, p in zip(indices, predictions)] print meta # Save print("Saving results into %s" % args.output_file) np.save(args.output_file, predictions)
def mainTest(): ##DO NOT CHANGE numReferencesToEval = 5 minWords = 3 precThresh = 0.5 ##### testSetName = 'coco' testSetSplit = 'valid2' imdb = meu.get_imdb(testSetName, testSetSplit) has_gpu = False if has_gpu: gpuId = 1 caffe.set_mode_gpu() caffe.set_device(gpuId) else: caffe.set_mode_cpu() print 'using CPU' #list of paths where we keep our caffe models caffeModelPaths = ['./experiments'] #output directory to write results #make sure it has >2GB free space detOutPath = './det-output' #list of models we want to evaluate #make sure they have an entry in the function modelVocabConfig() in data_model_utils.py solverProtoList = [ 'vgg/mil_finetune_solver.prototxt',\ ] #iterations to evaluate # evalIters = [80000, 160000, 240000, 320000, 400000]; evalIters = [320000] for i in range(len(solverProtoList)): solverProtoName = solverProtoList[i] vocab = meu.get_model_vocab(solverProtoName) infType = meu.get_model_inference_type(solverProtoList[i]) baseImageSize = meu.get_model_image_size(solverProtoList[i]) gtKeyedLabel = None for caffeModelPath in caffeModelPaths: solverProtoPath = os.path.join(caffeModelPath, solverProtoName) auxFiles = caffe_utils.get_model_aux_files_from_solver(\ solverProtoPath = solverProtoPath, caffeModelPath=caffeModelPath) if auxFiles == None: print 'could not find solver in %s' % (solverProtoPath) continue if len(auxFiles['snapshotFiles']) == 0: print 'no snapshots found ', solverProtoPath continue expSubDirBase = auxFiles['expSubDirBase'] expName = getExpNameFromSolverProtoName(solverProtoPath) expDirBase = os.path.join(expSubDirBase, expName) modelIterNums = [ caffe_utils.get_iter_from_model_file(snapFilePath)\ for snapFilePath in auxFiles['snapshotFiles'] ] runInds = im_utils.argsort(modelIterNums, reverse=True) for ci, s in enumerate(runInds): snapFilePath = auxFiles['snapshotFiles'][s] modelIterNumber = caffe_utils.get_iter_from_model_file( snapFilePath) if modelIterNumber not in evalIters: continue print solverProtoPath, modelIterNumber modelOuts = getModelOutputPaths(detOutPath, expDirBase,\ expName, snapFilePath , testSetName, testSetSplit,\ numReferencesToEval = numReferencesToEval, minWords = minWords, precThresh = precThresh, ext='.h5') detectionFile = modelOuts['detectionFile'] evalFile = modelOuts['evalFile'] #evaluate as in MILVC evalNoRefFile = evalFile.replace('.h5', '_noref.h5') #evaluate using standard definition of AP evalCocoManualGtFile = evalFile.replace( '.h5', '_cocomanualgt.h5') #evaluate using COCO fully-labeled ground truth bdir = os.path.split(detectionFile)[0] sg_utils.mkdir_if_missing(bdir) if not lock_utils.is_locked(detectionFile): model = loadModel(auxFiles['deployProtoPath'], snapFilePath, vocab, baseImageSize, infType) testModelBatch(imdb, model, detectionFile) lock_utils.unlock(detectionFile) else: print '%s locked' % (detectionFile) model = {} model['inf_type'] = infType model['vocab'] = vocab gtLabel = getLabels(imdb, model, solverProtoName) #evaluate as in MILVC: using "weighted" version of AP; requires multiple gt references per image #e.g. in COCO captions we have 5 captions per image. So we for each "visual concept" we have 5 gt references if imdb._name == 'coco' and \ lock_utils.file_ready_to_read(detectionFile) and (not lock_utils.is_locked(evalFile)): model = {} model['inf_type'] = infType model['vocab'] = vocab if infType == 'MILNoise': evalModelBatch(imdb, model, gtLabel, \ numReferencesToEval, detectionFile, evalFile, evalNoiseKey='noisy_comb_noimage') else: evalModelBatch(imdb, model, gtLabel,\ numReferencesToEval, detectionFile, evalFile) lock_utils.unlock(evalFile) #evaluate using standard AP definition. Does not need multiple references. Hence the name "NoRef" if imdb._name == 'coco' and \ lock_utils.file_ready_to_read(detectionFile) and (not lock_utils.is_locked(evalNoRefFile)): model = {} model['inf_type'] = infType model['vocab'] = vocab if infType == 'MILNoise': evalModelBatchNoRef(imdb, model, gtLabel,\ numReferencesToEval, detectionFile, evalNoRefFile, evalNoiseKey='noisy_comb_noimage') else: evalModelBatchNoRef(imdb, model, gtLabel,\ numReferencesToEval, detectionFile, evalNoRefFile) lock_utils.unlock(evalNoRefFile) #evaluate using fully labeled ground truth from COCO 80 detection classes. #we have a manual mapping defined from COCO 80 classes to the 1000 visual concepts if imdb._name == 'coco' and \ lock_utils.file_ready_to_read(detectionFile)\ and (not lock_utils.is_locked(evalCocoManualGtFile)): model = {} model['inf_type'] = infType model['vocab'] = vocab cocoFile = './data/coco_instancesGT_eval_%s.h5' % ( testSetSplit) dt = sg_utils.load(detectionFile) mil_prob = dt['mil_prob'] evalModelBatchOnClassificationCOCOManual(imdb, model,\ mil_prob, evalCocoManualGtFile, cocoFile) if infType == 'MILNoise': mil_prob = dt['noisy_comb_noimage'] evalCocoManualGtNoiseFile = evalCocoManualGtFile.replace( '.h5', '_noise.h5') evalModelBatchOnClassificationCOCOManual(imdb, model,\ mil_prob, evalCocoManualGtNoiseFile, cocoFile) lock_utils.unlock(evalCocoManualGtFile) if imdb.name == 'coco' and lock_utils.file_ready_to_read( evalFile): print '==' * 20 print 'AP (as computed in MILVC)' N_WORDS = len(vocab['words']) model = {} model['inf_type'] = infType model['vocab'] = vocab cap_eval_utils.print_benchmark_latex(evalFile, vocab=vocab) evalFile = evalFile.replace('.h5', '_noise.h5') if os.path.isfile(evalFile): print 'noise' cap_eval_utils.print_benchmark_latex(evalFile, vocab=vocab) if imdb.name == 'coco' and lock_utils.file_ready_to_read( evalNoRefFile): print '==' * 20 print 'AP (as computed in PASCAL VOC)' N_WORDS = len(vocab['words']) model = {} model['inf_type'] = infType model['vocab'] = vocab cap_eval_utils.print_benchmark_latex(evalNoRefFile, vocab=vocab) evalNoRefFile = evalNoRefFile.replace('.h5', '_noise.h5') if os.path.isfile(evalNoRefFile): print 'noise' cap_eval_utils.print_benchmark_latex(evalNoRefFile, vocab=vocab) if imdb.name == 'coco' and lock_utils.file_ready_to_read( evalCocoManualGtFile): dt = sg_utils.load(evalCocoManualGtFile) dtMeta = sg_utils.load( evalCocoManualGtFile.replace('.h5', '_meta.pkl')) classesFound = dtMeta['classesFound'] srtInds = im_utils.argsort(classesFound) accAP = np.zeros((1), dtype=np.float32) for ind in srtInds: accAP += dt['ap'][ind] print 'evaluate on fully-labeled GT:', print 'AP %.2f; classes %d' % ( 100 * accAP / len(classesFound), len(classesFound)) evalCocoManualGtNoiseFile = evalCocoManualGtFile.replace( '.h5', '_noise.h5') if os.path.isfile(evalCocoManualGtNoiseFile): dt = sg_utils.load(evalCocoManualGtNoiseFile) print '--noise--' accAP = np.zeros((1), dtype=np.float32) for ind in srtInds: print '{:.2f} '.format(100 * dt['ap'][ind]), accAP += dt['ap'][ind] print '' print '%.2f; %d' % (100 * accAP / len(classesFound), len(classesFound)) print '--' * 10
def initFeatureExtractingNetwork(self): caffe.set_mode_cpu() return caffe.Net(self.prototxt, caffe.TEST, weights=self.weights)
import os; os.environ['GLOG_minloglevel'] = '2' import caffe import cv2 import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D caffe.set_mode_cpu(); from time import time cap = cv2.VideoCapture(0) # Hand structure o1_parent = np.concatenate([ [0], np.arange(0,4), [0], np.arange(5,8), [0], np.arange(9,12), [0], np.arange(13,16), [0], np.arange(17,20), ]) net = caffe.Net('RegNet_deploy.prototxt','RegNet_weights.caffemodel',caffe.TEST); plt.ion() fig = plt.figure() ax = fig.gca(projection='3d') plt.ion() fig.show() fig.canvas.draw() while True: _, img = cap.read() cv2.imshow('img', img) if cv2.waitKey(1) == ord('q'): break
def main(input_file): print(input_file) pycaffe_dir = os.path.dirname(__file__) print(pycaffe_dir) model_def = os.path.join(pycaffe_dir, "network/foodclassify.prototxt") pretrained_model = os.path.join(pycaffe_dir, "model/mobilenet.caffemodel") image_dims = [224, 224] mean = np.load(os.path.join(pycaffe_dir, 'ilsvrc_2012_mean.npy')).mean(1).mean(1) channel_swap = [2, 1, 0] input_scale = None raw_scale = 255.0 labels_file = os.path.join(pycaffe_dir, "foodnames.txt") caffe.set_mode_cpu() # Make classifier. classifier = caffe.Classifier(model_def, pretrained_model, image_dims=image_dims, mean=mean, input_scale=input_scale, raw_scale=raw_scale, channel_swap=channel_swap) # Load numpy array (.npy), directory glob (*.jpg), or image file. #print(input_file) input_file = os.path.expanduser(input_file[-1]) if input_file.endswith('npy'): print("Loading file: %s" % input_file) inputs = np.load(args.input_file) elif os.path.isdir(input_file): print("Loading folder: %s" % input_file) inputs =[caffe.io.load_image(im_f) for im_f in glob.glob(input_file + '/*.' + args.ext)] else: print("Loading file: %s" % input_file) inputs = [caffe.io.load_image(input_file)] print("Classifying %d inputs." % len(inputs)) # Classify. start = time.time() print("INPUTS") print(inputs) scores = classifier.predict(inputs, True).flatten() print("Done in %.2f s." % (time.time() - start)) #Print Results with open(labels_file) as f: labels_df = pd.DataFrame([ { 'id': int(l.strip().split(' ')[0]), 'foodname': ' '.join(l.strip().split(' ')[1:]).split(',')[0] } for l in f.readlines() ]) labels = labels_df.sort_values('id')['foodname'] indices = (-scores).argsort()[:5] predictions = labels[indices] meta = [ (p, float(scores[i])) for i, p in zip(indices, predictions) ] return meta
def detect_faces(self, img, return_best=False): """ Computes a list of faces detected in the input image in the form of a list of bounding-boxes, one per each detected face. Arguments: img: The image to be input to the Faster R-CNN model return_best: boolean indicating whether to return just to best detection or the complete list of detections Returns: A list of lists. Each sublist contains the image coordinates of the corners of a bounding-box and the score of the detection in the form [x1,y1,x2,y2,score], where (x1,y1) are the integer coordinates of the top-left corner of the box and (x2,y2) are the coordinates of the bottom-right corner of the box. The score is a floating-point number. When return_best is True, the returned list will contain only one bounding-box """ if numpy.all(img != None): try: if not self.is_cuda_enable: caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(cfg.GPU_ID) scores, boxes = im_detect(self.net, img) cls_ind = 1 cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = numpy.hstack( (cls_boxes, cls_scores[:, numpy.newaxis])).astype(numpy.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] keep = numpy.where(dets[:, 4] > CONF_THRESH) dets = dets[keep] if len(dets) > 0: if return_best: # dets is ordered by confidence dets[:, 4], so the first one is the best det = [ int(dets[0, 0]), int(dets[0, 1]), int(dets[0, 2]), int(dets[0, 3]), dets[0, 4] ] # extend detection extend_factor = self.face_rect_expand_factor width = round(det[2] - det[0]) height = round(det[3] - det[1]) length = (width + height) / 2.0 centrepoint = [ round(det[0]) + width / 2.0, round(det[1]) + height / 2.0 ] det[0] = centrepoint[0] - round( (1 + extend_factor) * length / 2.0) det[1] = centrepoint[1] - round( (1 + extend_factor) * length / 2.0) det[2] = centrepoint[0] + round( (1 + extend_factor) * length / 2.0) det[3] = centrepoint[1] + round( (1 + extend_factor) * length / 2.0) ## prevent going off image det[0] = int(max(det[0], 0)) det[1] = int(max(det[1], 0)) det[2] = int(min(det[2], img.shape[1])) det[3] = int(min(det[3], img.shape[0])) return [det] else: det_list = [] for j in range(dets.shape[0]): det = [ int(dets[j, 0]), int(dets[j, 1]), int(dets[j, 2]), int(dets[j, 3]), dets[0, 4] ] # extend detection extend_factor = self.face_rect_expand_factor width = round(det[2] - det[0]) height = round(det[3] - det[1]) length = (width + height) / 2.0 centrepoint = [ round(det[0]) + width / 2.0, round(det[1]) + height / 2.0 ] det[0] = centrepoint[0] - round( (1 + extend_factor) * length / 2.0) det[1] = centrepoint[1] - round( (1 + extend_factor) * length / 2.0) det[2] = centrepoint[0] + round( (1 + extend_factor) * length / 2.0) det[3] = centrepoint[1] + round( (1 + extend_factor) * length / 2.0) ## prevent going off image det[0] = int(max(det[0], 0)) det[1] = int(max(det[1], 0)) det[2] = int(min(det[2], img.shape[1])) det[3] = int(min(det[3], img.shape[0])) det_list.append(det) return det_list else: return None except Exception as e: print('Exception in FaceDetectorFasterRCNN: ' + str(e)) pass return None
def dump_weights(model_proto, model_weights, weight_output, shape_output=None, caffe_home='~/caffe'): """Helper function to dump caffe model weithts in keras tf format # Arguments model_proto: path to the caffe model .prototxt file model_weights: path to the caffe model .caffemodel file weight_output: path to HDF5 output file shape_output: path to pickle output file # Notes caffe requres to run the function in python 2.x """ def expand(path): return os.path.abspath(os.path.expanduser(path)) caffe_home = expand(caffe_home) model_proto = expand(model_proto) model_weights = expand(model_weights) #print(caffe_home + '\n' + model_proto + '\n' + model_weights + '\n' + weight_output + '\n' + shape_output ) # import caffe sys.path.insert(0, os.path.join(caffe_home, 'python')) import caffe # create model caffe.set_mode_cpu() net = caffe.Net(model_proto, model_weights, caffe.TEST) if os.path.exists(weight_output): os.remove(weight_output) f = h5py.File(weight_output, 'w') # process the layers layer_names = list(net._layer_names) weights_shape = {} for name in net.params: layer = net.layers[layer_names.index(name)] blobs = net.params[name] blobs_shape = [list(b.shape) for b in blobs] weights_shape[name] = blobs_shape print('%-25s %-20s %-3s %s' % (name, layer.type, len(blobs), blobs_shape)) params = collections.OrderedDict() if layer.type == 'Convolution': W = blobs[0].data W = W.transpose(2, 3, 1, 0) params[name + '_W_1:0'] = W if len(blobs) > 1: b = blobs[1].data params[name + '_b_1:0'] = b elif layer.type == 'Normalize': gamma = blobs[0].data params[name + '_gamma_1:0'] = gamma elif layer.type == 'BatchNorm': size = blobs[0].shape[0] running_mean = blobs[0].data running_std = blobs[1].data gamma = np.empty(size) gamma.fill(blobs[2].data[0]) beta = np.zeros(size) params[name + '_gamma_1:0'] = gamma params[name + '_beta_1:0'] = beta params[name + '_running_mean_1:0'] = running_mean params[name + '_running_std_1:0'] = running_std elif layer.type == 'Scale': gamma = blobs[0].data beta = blobs[1].data params[name + '_gamma_1:0'] = gamma params[name + '_beta_1:0'] = beta elif layer.type == 'InnerProduct': W = blobs[0].data W = W.T b = blobs[1].data params[name + '_W_1:0'] = W params[name + '_b_1:0'] = b else: if len(blobs) > 0: print('UNRECOGNISED BLOBS') # create group and add parameters g = f.create_group(name) for weight_name, value in params.items(): param_dset = g.create_dataset(weight_name, value.shape, dtype=value.dtype) if not value.shape: # scalar param_dset[()] = value else: param_dset[:] = value g.attrs['weight_names'] = list(params.keys()) f.attrs['layer_names'] = layer_names f.flush() f.close() # output model shape if shape_output is not None: output_shape = {} for layer_name, blob in net.blobs.iteritems(): #print('%-40s %s' %(layer_name, str(blob.data.shape))) output_shape[layer_name] = blob.data.shape shape = {} shape['output_shape'] = output_shape shape['weights_shape'] = weights_shape shape_output = expand(shape_output) if os.path.exists(shape_output): os.remove(shape_output) with open(shape_output, 'wb') as f: pickle.dump(shape, f, protocol=pickle.HIGHEST_PROTOCOL)
def main(argv): pycaffe_dir = os.path.dirname(__file__) parser = argparse.ArgumentParser() # Required arguments: input and output. parser.add_argument( "input_file", help="Input txt/csv filename. If .txt, must be list of filenames.\ If .csv, must be comma-separated file with header\ 'filename, xmin, ymin, xmax, ymax'") parser.add_argument( "output_file", help="Output h5/csv filename. Format depends on extension.") # Optional arguments. parser.add_argument( "--model_def", default=os.path.join( pycaffe_dir, "../imagenet_models/bvlc_reference_caffenet/deploy.prototxt"), help="Model definition file.") parser.add_argument( "--pretrained_model", default=os.path.join( pycaffe_dir, "../imagenet_models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel" ), help="Trained model weights file.") parser.add_argument("--crop_mode", default="selective_search", choices=CROP_MODES, help="How to generate windows for detection.") parser.add_argument("--gpu", action='store_true', help="Switch for gpu computation.") parser.add_argument( "--mean_file", default=os.path.join(pycaffe_dir, 'caffe/imagenet/ilsvrc_2012_mean.npy'), help="Data set image mean of H x W x K dimensions (numpy array). " + "Set to '' for no mean subtraction.") parser.add_argument( "--input_scale", type=float, help="Multiply input features by this scale to finish preprocessing.") parser.add_argument( "--raw_scale", type=float, default=255.0, help="Multiply raw input by this scale before preprocessing.") parser.add_argument( "--channel_swap", default='2,1,0', help="Order to permute input channels. The default converts " + "RGB -> BGR since BGR is the Caffe default by way of OpenCV.") parser.add_argument( "--context_pad", type=int, default='16', help="Amount of surrounding context to collect in input window.") args = parser.parse_args() mean, channel_swap = None, None if args.mean_file: mean = np.load(args.mean_file) if mean.shape[1:] != (1, 1): mean = mean.mean(1).mean(1) if args.channel_swap: channel_swap = [int(s) for s in args.channel_swap.split(',')] if args.gpu: caffe.set_mode_gpu() print("GPU mode") else: caffe.set_mode_cpu() print("CPU mode") # Make detector. detector = caffe.Detector(args.model_def, args.pretrained_model, mean=mean, input_scale=args.input_scale, raw_scale=args.raw_scale, channel_swap=channel_swap, context_pad=args.context_pad) # Load input. t = time.time() print("Loading input...") if args.input_file.lower().endswith('txt'): with open(args.input_file) as f: inputs = [_.strip() for _ in f.readlines()] elif args.input_file.lower().endswith('csv'): inputs = pd.read_csv(args.input_file, sep=',', dtype={'filename': str}) inputs.set_index('filename', inplace=True) else: raise Exception("Unknown input file type: not in txt or csv.") # Detect. if args.crop_mode == 'list': # Unpack sequence of (image filename, windows). images_windows = [ (ix, inputs.iloc[np.where(inputs.index == ix)][COORD_COLS].values) for ix in inputs.index.unique() ] detections = detector.detect_windows(images_windows) else: detections = detector.detect_selective_search(inputs) print("Processed {} windows in {:.3f} s.".format(len(detections), time.time() - t)) # Collect into dataframe with labeled fields. df = pd.DataFrame(detections) df.set_index('filename', inplace=True) df[COORD_COLS] = pd.DataFrame(data=np.vstack(df['window']), index=df.index, columns=COORD_COLS) del (df['window']) # Save results. t = time.time() if args.output_file.lower().endswith('csv'): # csv # Enumerate the class probabilities. class_cols = ['class{}'.format(x) for x in range(NUM_OUTPUT)] df[class_cols] = pd.DataFrame(data=np.vstack(df['feat']), index=df.index, columns=class_cols) df.to_csv(args.output_file, cols=COORD_COLS + class_cols) else: # h5 df.to_hdf(args.output_file, 'df', mode='w') print("Saved to {} in {:.3f} s.".format(args.output_file, time.time() - t))
def main(argv): pycaffe_dir = caffe_root + 'python/' parser = argparse.ArgumentParser() # Required arguments: input and output files. parser.add_argument("--input_file", help="Input image, directory, or npy.") parser.add_argument("--output_file", help="Output npy filename.") # Optional arguments. parser.add_argument("--model_def", default=os.path.join( pycaffe_dir, "../examples/imagenet/imagenet_deploy.prototxt"), help="Model definition file.") parser.add_argument( "--pretrained_model", default=os.path.join( pycaffe_dir, "../examples/imagenet/caffe_reference_imagenet_model"), help="Trained model weights file.") parser.add_argument("--gpu", action='store_true', help="Switch for gpu computation.") parser.add_argument( "--center_only", action='store_true', help="Switch for prediction from center crop alone instead of " + "averaging predictions across crops (default).") parser.add_argument( "--images_dim", default='256,256', help="Canonical 'height,width' dimensions of input images.") parser.add_argument( "--mean_file", default=os.path.join(pycaffe_dir, 'caffe/imagenet/ilsvrc_2012_mean.npy'), help="Data set image mean of H x W x K dimensions (numpy array). " + "Set to '' for no mean subtraction.") parser.add_argument( "--input_scale", type=float, default=255, help="Multiply input features by this scale before input to net") parser.add_argument( "--channel_swap", default='2,1,0', help="Order to permute input channels. The default converts " + "RGB -> BGR since BGR is the Caffe default by way of OpenCV.") parser.add_argument( "--ext", default='jpg', help="Image file extension to take as input when a directory " + "is given as the input file.") args = parser.parse_args() image_dims = [int(s) for s in args.images_dim.split(',')] channel_swap = [int(s) for s in args.channel_swap.split(',')] mean = None if args.mean_file: mean = np.load(args.mean_file) # Resize mean (which requires H x W x K input in range [0,1]). if args.mean_file == os.path.join( pycaffe_dir, 'caffe/imagenet/ilsvrc_2012_mean.npy'): in_shape = (227, 227) else: in_shape = image_dims m_min, m_max = mean.min(), mean.max() normal_mean = (mean - m_min) / (m_max - m_min) mean = caffe.io.resize_image(normal_mean.transpose( (1, 2, 0)), in_shape).transpose( (2, 0, 1)) * (m_max - m_min) + m_min if args.gpu: caffe.set_mode_gpu() print("GPU mode") else: caffe.set_mode_cpu() print("CPU mode") # Make classifier. classifier = caffe.Classifier(args.model_def, args.pretrained_model, image_dims=image_dims, mean=mean, input_scale=1.0, raw_scale=255.0, channel_swap=channel_swap) # Load image file. args.input_file = os.path.expanduser(args.input_file) f = open(args.input_file) im_files_ = f.readlines() im_files = [] for i in range(len(im_files_)): im_f = im_files_[i].split(' ') if len(im_f) == 1: im_f[0] = im_f[0][:-1] im_files.append(im_f[0]) inputs = [caffe.io.load_image(im_f) for im_f in im_files] print "Classifying %d inputs." % len(inputs) # Classify. start = time.time() predictions = classifier.predict(inputs, not args.center_only) print "Done in %.2f s." % (time.time() - start) # Save np.save(args.output_file, predictions) print "Saved %s." % args.output_file
def load_model(self): caffe.set_mode_cpu() text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) self.text_detector = TextDetector(text_proposals_detector)
def main(args): image_dims = [int(s) for s in args.images_dim.split(',')] mean, channel_swap = None, None #if args.mean_file: # mean = np.load(args.mean_file) # mean = np.load(args.mean_file).mean(1).mean(1) mean = np.array([int(s) for s in args.mean.split(',')]) if args.channel_swap: channel_swap = [int(s) for s in args.channel_swap.split(',')] if args.gpu: caffe.set_mode_gpu() print("GPU mode") else: caffe.set_mode_cpu() print("CPU mode") extractor = FeatureExtractor(args.model_def, args.pretrained_model, image_dims, mean, args.input_scale, args.raw_scale, channel_swap) total = 1100 lfwParser = ParseLFW.ParseLFW(args.input_lfw_path) featuredot_match = np.zeros((total,1),np.float) featuredist_match = np.zeros((total,1),np.float) featuredot_Unmatch = np.zeros((total,1),np.float) featuredist_Unmatch = np.zeros((total,1),np.float) featuresL_match = [] featuresL_Unmatch = [] featuresR_match = [] featuresR_Unmatch = [] imgPair = [] crop_lb = 80#60 crop_ub = 210#190 viz = visdom.Visdom(port = 10141,env='LFW'+args.pretrained_model) print("Matched Pairs:") for i in range(0,total): [matchimgL, matchimgR] = lfwParser.MatchPair_extract() #[UnmatchimgL, UnmatchimgR] = lfwParser.MatchPair_extract() #matchimgL = cv2.imread("/root/modeltrans_shihl/face_ResNet/image1.jpg") #matchimgL = cv2.cvtColor(matchimgL,cv2.COLOR_BGR2RGB) #matchimgR = cv2.imread("/root/modeltrans_shihl/face_ResNet/image179414.jpg") #matchimgR = cv2.cvtColor(matchimgR,cv2.COLOR_BGR2RGB) if args.draw_visdom: matchimgL = matchimgL[crop_lb:crop_ub,crop_lb:crop_ub,:] matchL_RGB = matchimgL.transpose(2,0,1) matchimgR = matchimgR[crop_lb-20:crop_ub-20,crop_lb-20:crop_ub-20,:] matchR_RGB = matchimgR.transpose(2,0,1) viz.image(matchL_RGB,win='MatchedPairs:L',opts=dict(title='MatchedPairs:L')) viz.image(matchR_RGB,win='MatchedPairs:R',opts=dict(title='MatchedPairs:L')) imgPair = [matchimgL,matchimgR] features = extractor.extractFeatureFromMultiImage(imgPair, args.layer_name) featureL = features[0] featureR = features[1] #print(featureL[0:3],featureR[0:3]) featuresL_match.append(featureL) featuredot_match[i,0] = np.dot(featureL,featureR)/(np.linalg.norm(featureL)*np.linalg.norm(featureR)) featuredist_match[i,0] = np.linalg.norm(featureL - featureR) featuresR_match.append(featureR) #print("Feature Size, Data Type, Data Content\n") #print(featureL.shape, type(featureL), featureL,featureR) #print("imgL-imgR:",np.max(matchimgL-matchimgR),"max(FeatureL-FeatureR)",np.max(featureL-featureR)) np.save(args.layer_name + "L_match",featuresL_match) np.save(args.layer_name + "R_match",featuresR_match) np.save(args.layer_name + "correlation_match",featuredot_match[0:total,0]) np.save(args.layer_name + "Distance_match", featuredist_match[0:total,0]) print("Unmatched pairs:\n") for i in range(0,total): #[matchimgL, matchimgR] = lfwParser.MatchPair_extract() [UnmatchimgL, UnmatchimgR] = lfwParser.UnMatchPair_extract() #matchimgL = cv2.imread("/root/modeltrans_shihl/face_ResNet/image1.jpg") #matchimgL = cv2.cvtColor(matchimgL,cv2.COLOR_BGR2RGB) #matchimgR = cv2.imread("/root/modeltrans_shihl/face_ResNet/image179414.jpg") #matchimgR = cv2.cvtColor(matchimgR,cv2.COLOR_BGR2RGB) if args.draw_visdom: UnmatchimgL = UnmatchimgL[crop_lb:crop_ub,crop_lb:crop_ub,:] UnmatchL_RGB = UnmatchimgL.transpose(2,0,1) UnmatchimgR = UnmatchimgR[crop_lb:crop_ub,crop_lb:crop_ub,:] UnmatchR_RGB = UnmatchimgR.transpose(2,0,1) viz.image(UnmatchL_RGB,win='UnMatchedPairs:L',opts=dict(title='UnMatchedPairs:L')) viz.image(UnmatchR_RGB,win='UnMatchedPairs:R',opts=dict(title='UnMatchedPairs:R')) imgPair = [UnmatchimgL,UnmatchimgR] features = extractor.extractFeatureFromMultiImage(imgPair, args.layer_name) featureL = features[0] featureR = features[1] #print(featureL[0:3],featureR[0:3]) featuresL_Unmatch.append(featureL) featuredot_Unmatch[i,0] = np.dot(featureL,featureR)/(np.linalg.norm(featureL)*np.linalg.norm(featureR)) featuredist_Unmatch[i,0] = np.linalg.norm(featureL - featureR) featuresR_Unmatch.append(featureR) np.save(args.layer_name + "L_Unmatch",featuresL_Unmatch) np.save(args.layer_name + "R_Unmatch",featuresR_Unmatch) np.save(args.layer_name + "correlation_Unmatch",featuredot_Unmatch[0:total,0]) np.save(args.layer_name + "Distance_Unmatch", featuredist_Unmatch[0:total,0]) print("Correlations for matched pairs:",featuredot_match[0:total,0]) print("Correlations for Unmatched pairs:",featuredot_Unmatch[0:total,0]) print("Distances for matched pairs:",featuredist_match[0:total,0]) print("Distances for Unmatched pairs:",featuredist_Unmatch[0:total,0]) #print("Saved feature to file " + os.getcwd() + "/" + args.layer_name + ".npy") #print("Saved feature to file " + os.getcwd() + "/" + args.layer_name + ".npy") cor_unmatch = featuredot_Unmatch[0:total,0] cor_match = featuredot_match[0:total,0] dis_unmatch = featuredist_Unmatch[0:total,0] dis_match = featuredist_match[0:total,0] threshold = np.arange(np.min(dis_match),np.max(dis_unmatch),0.001) counts_d = np.zeros(len(threshold)) counts_fa = np.zeros(len(threshold)) for idx in range(0,len(threshold)): count_d = len((np.where(dis_match<threshold[idx]))[0]) count_fa = len((np.where(dis_unmatch<threshold[idx]))[0]) counts_d[idx] = np.copy(count_d) counts_fa[idx] = np.copy(count_fa) viz.line(X=counts_fa/total,Y=counts_d/total,win='ROC Curve',opts=dict(title='ROC Curve',xlabel='P_fa',ylabel='P_d'))
def caffeDetect(filename): if os.path.isfile(caffe_root + 'model/eyeDetector/conf5_iter_162000.caffemodel'): print 'Model found.' else: print 'Model Not Exist!.' caffe.set_mode_cpu() model_def = caffe_root + 'model/eyeDetector/deploy_output_resize_map.prototxt' model_weights = caffe_root + 'model/eyeDetector/conf5_iter_162000.caffemodel' net = caffe.Net(model_def, model_weights, caffe.TEST) # get img im_orig = cv2.imread(filename) # im_orig.shape img = im_orig lefteye, righteye = get_netoutput(im_orig, net) #res={} #res['lefteye'] = {'x':lefteye[1], 'y':lefteye[0]} #res['righteye'] = {'x':righteye[1], 'y':righteye[0]} #result={'lefteye':{'x'=lefteye[1],'y'=lefteye[0]},'righteye':{'x'=righteye[1],'y'=righteye[0]}} # lefteye, righteye #return res #cv2.imshow('init.png', img) #cv2.waitKey(1000) # we first estimate the eye then crop the area for finetune estimate_img, crop_x, crop_y = pre_process(im_orig, lefteye) ## estimate_img.shape #cv2.imshow('pre_process.png',estimate_img) #cv2.waitKey(1000) lefteye, righteye = get_netoutput(estimate_img, net) lefteye[1] += crop_x righteye[1] += crop_x lefteye[0] += crop_y righteye[0] += crop_y #cv2.circle(img, (int(lefteye[1]), int(lefteye[0])), 1, (0,255,0)) #cv2.circle(img, (int(righteye[0]), int(righteye[1])), 1, (0,255,0)) #cv2.imshow('finetune.png', img) #cv2.waitKey(1000) #cv2.imwrite('finetune.png', img) dis_x = righteye[1] - lefteye[1] dis_y = righteye[0] - lefteye[0] if dis_x == 0: angle = -90 if (lefteye['y'] > righteye['y']) else 90 else: angle = math.atan(dis_y / dis_x) * 180 / PI # 'angle is ', angle center = (img.shape[1] / 2, img.shape[0] / 2) affineMat = cv2.getRotationMatrix2D(center, angle, 1.0) ## affineMat, affineMat.shape rotate_img = cv2.warpAffine(img, affineMat, (img.shape[1], img.shape[0])) lefteye = point_rot(lefteye, affineMat) righteye = point_rot(righteye, affineMat) # 'after rotate' # lefteye, righteye #cv2.circle(rotate_img, (int(lefteye[0]), int(lefteye[1])), 1, (0,0,255)) #cv2.circle(rotate_img, (int(righteye[0]), int(righteye[1])), 1, (0,0,255)) #cv2.imshow('rotate_img', rotate_img) #cv2.waitKey(1000) #cv2.imwrite('rotata_img.png', rotate_img) dis_x = righteye[0] - lefteye[0] dis_y = righteye[1] - lefteye[1] distance = math.sqrt(dis_x * dis_x + dis_y * dis_y) # 'eye distance is ', distance scale = EYE_DISTANCE / distance # 'scale is ', scale img = cv2.resize(rotate_img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) # lefteye, righteye lefteye = lefteye * scale righteye = righteye * scale # 'left eye is ', lefteye # cv2.circle(img, (int(lefteye[0]), int(lefteye[1])), 1, (0,255,0)) #cv2.imshow('scale_img', img) #cv2.waitKey(1000) #cv2.imwrite('scale.png', img) min_x = int(lefteye[0] - EYE_EDGE_DISTANCE_X) min_y = int(lefteye[1] - EYE_EDGE_DISTANCE_Y) min_x = 0 if (min_x < 0) else min_x min_y = 0 if (min_y < 0) else min_y max_x = min_x + CROP_WIDTH if ( min_x + CROP_WIDTH < img.shape[1]) else img.shape[1] max_y = min_y + CROP_HEIGHT if ( min_y + CROP_HEIGHT < img.shape[0]) else img.shape[0] # img.shape img = img[min_y:max_y, min_x:max_x] # img.shape, min_x, max_x #cv2.imshow('reshape',img) ret = np.ones((1000, 500, 3), dtype=np.uint8) * 255 start_x = 0 start_y = 0 if lefteye[0] - EYE_EDGE_DISTANCE_X < 0: start_x = EYE_EDGE_DISTANCE_X - lefteye[0] #start_x=0 if lefteye[1] - EYE_EDGE_DISTANCE_Y < 0: #start_y=0 start_y = EYE_EDGE_DISTANCE_Y - lefteye[1] GrayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #cv2.imshow('hehe',GrayImage) GrayImage = np.reshape(GrayImage, (GrayImage.shape[0], GrayImage.shape[1], 1)) #ret = np.ones((1000, 500, 1), dtype = np.uint8) * 255 # start_y, start_y + max_y - min_y, start_x, start_x + max_x - min_x, GrayImage.shape ret[start_y:start_y + max_y - min_y, start_x:start_x + max_x - min_x, :] = img #cv2.waitKey(1000) #cv2.imshow('out_img', ret) #cv2.imwrite('final.png',ret) ret = 0.2989 * ret[0:200, 0:155, 0] + 0.5870 * ret[ 0:200, 0:155, 1] + 0.1140 * ret[0:200, 0:155, 2] #cv2.imwrite('final2.png',ret) #cv2.imshow('out_img', ret) #cv2.waitKey() #cv2.imwrite('final.png', ret) #res = Image.fromarray(ret) #res.show() return ret
def set_gpu(gpuID): if gpuID >= 0: caffe.set_mode_gpu() caffe.set_device(gpuID) else: caffe.set_mode_cpu()
def load_all(): """ Load everything we need for generating """ print (config.paths['decmodel']) # Skip-thoughts print ('Loading skip-thoughts...') stv = skipthoughts.load_model(config.paths['skmodels'], config.paths['sktables']) # Decoder print('Loading decoder...') dec = decoder.load_model(config.paths['decmodel'], config.paths['dictionary']) # Image-sentence embedding print ('Loading image-sentence embedding...') print(config.paths['vsemodel']) vse = embedding.load_model(config.paths['vsemodel']) # VGG-19 print ('Loading and initializing ConvNet...') if config.FLAG_CPU_MODE: sys.path.insert(0, config.paths['pycaffe']) import caffe caffe.set_mode_cpu() net = caffe.Net(config.paths['vgg_proto_caffe'], config.paths['vgg_model_caffe'], caffe.TEST) else: net = build_convnet(config.paths['vgg']) # Captions print ('Loading captions...') cap = [] with open(config.paths['captions'], 'rb') as f: for line in f: cap.append(line.strip().decode("utf-8")) # Caption embeddings print ('Embedding captions...') cvec = embedding.encode_sentences(vse, cap, verbose=False) # Biases print ('Loading biases...') bneg = numpy.load(config.paths['negbias'],encoding='latin1') bpos = numpy.load(config.paths['posbias'],encoding='latin1') # Pack up z = {} z['stv'] = stv z['dec'] = dec z['vse'] = vse z['net'] = net z['cap'] = cap z['cvec'] = cvec z['bneg'] = bneg z['bpos'] = bpos return z