def setup_net(self): caffe.set_mode_gpu() caffe.set_device(0) netFiles = cfg.get_caffe_net_files(self.prms_.netName) self.net_ = caffe.Net(netFiles.deployFile, netFiles.netFile, caffe.TEST) self.cls_ = cfg.dataset2classnames(self.prms_.trainDataSet)
def setCaffeMode(gpu, device = 0): """Initialise caffe""" if gpu: caffe.set_mode_gpu() caffe.set_device(device) else: caffe.set_mode_cpu()
def justCheckGradients(solver_file,deploy_file,model_file): model=model_file print model print os.path.exists(model); caffe.set_device(1) caffe.set_mode_gpu() solver=caffe.SGDSolver(solver_file); solver.net.forward(); net=caffe.Net(deploy_file,model); print list(net._layer_names); print net.blobs.keys(); # return net.blobs['data'].data[...]=solver.net.blobs['data'].data; net.blobs['thelabelscoarse'].data[...]=solver.net.blobs['thelabelscoarse'].data; net.forward(); # print net.blobs['thelabelscoarse'].data[:10,0,0,0,0]; # print net.blobs['reshapefc8'].data[0,39,0,:] net.backward(); # print net.blobs.keys(); layers_to_explore=['conv1','conv2','conv3','conv4','conv5','fc6_fix','fc7_fix','fc8_fix'] ratios=getRatios(net,layers_to_explore); for layer_name in ratios.keys(): print layer_name,ratios[layer_name];
def test(net_file,model_file,predict_file,gpunum,outdir,outputlayer): caffe.set_device(gpunum) caffe.set_mode_gpu() if not exists(outdir): makedirs(outdir) outfile = os.path.join(outdir,'bestiter.pred') outputlayer_split = outputlayer.split('_') outputlayer_cnt = len(outputlayer_split) flag = False outdata = [] net = caffe.Net(realpath(net_file), realpath(model_file),caffe.TEST) with open(predict_file,'r') as f: files = [x.strip() for x in f] with open(outfile,'w') as f: for batchfile in files: fi = h5py.File(batchfile, 'r') dataset = np.asarray(fi['data']) out = net.forward_all(data=dataset,blobs=outputlayer_split) for i in range(outputlayer_cnt): if not flag: outdata.append( np.vstack(np.asarray(out[outputlayer_split[i]])) ) else: outdata[i] = np.vstack((outdata[i],np.vstack(np.asarray(out[outputlayer_split[i]])))) flag = True for out in outdata[0]: f.write('%s\n' % '\t'.join([str(x) for x in out])) with open(join(outdir,'bestiter.pred.params.pkl'),'wb') as f: cPickle.dump((outdata,outputlayer_split),f,protocol=cPickle.HIGHEST_PROTOCOL)
def init(self): image_net = self.image_net caffe.set_device(0) caffe.set_mode_gpu() if image_net == 'caffenet': convnet_proto = './caffe/models/bvlc_reference_caffenet/deploy.prototxt' convnet_model = './caffe/models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel' elif image_net == 'vggnet': convnet_proto = './caffe/models/vggnet/VGG_ILSVRC_16_layers_deploy.prototxt' convnet_model = './caffe/models/vggnet/VGG_ILSVRC_16_layers.caffemodel' else: raise StandardError('Unknown CNN %s' % image_net) self.net = caffe.Net(convnet_proto, convnet_model, caffe.TEST) if image_net == 'caffenet': self.transformer = caffe.io.Transformer({'data': self.net.blobs['data'].data.shape}) self.transformer.set_transpose('data', (2, 0, 1)) self.transformer.set_mean('data', np.array([104, 117, 123])) self.transformer.set_raw_scale('data', 255) self.transformer.set_channel_swap('data', (2, 1, 0)) self.BATCH_SIZE = 100 self.net.blobs['data'].reshape(self.BATCH_SIZE, 3, 227, 227) elif image_net == 'vggnet': self.transformer = caffe.io.Transformer({'data': self.net.blobs['data'].data.shape}) self.transformer.set_transpose('data', (2, 0, 1)) self.transformer.set_mean('data', np.array([103.939, 116.779, 123.68])) self.transformer.set_raw_scale('data', 255) self.transformer.set_channel_swap('data', (2, 1, 0)) self.BATCH_SIZE = 100 self.net.blobs['data'].reshape(self.BATCH_SIZE, 3, 224, 224) self.image_net = image_net self.initialized = True print "Done initializing image feature extractor"
def run_quantized_network(self): if self.gpu==True: caffe.set_mode_gpu() net = caffe.Net(self.model_file,self.quant_weight_file,caffe.TEST) acc = np.zeros(self.iterations) for i in range(0,self.iterations): out = net.forward() acc[i] = out[self.accuracy_layer]*100 print("Accuracy with quantized weights/biases: %.2f%%" %(acc.mean())) for i in range(0,self.iterations): for layer_no in range(0,len(self.start_layer)): if layer_no==0: net.forward(end=str(self.end_layer[layer_no])) else: net.forward(start=str(self.start_layer[layer_no]),end=str(self.end_layer[layer_no])) if layer_no < len(self.start_layer)-1: # not quantizing accuracy layer net.blobs[self.end_layer[layer_no]].data[:]=np.floor(net.blobs[self.end_layer[layer_no]].data*\ (2**self.act_dec_bits[self.end_layer[layer_no]])) net.blobs[self.end_layer[layer_no]].data[net.blobs[self.end_layer[layer_no]].data>126]=127 net.blobs[self.end_layer[layer_no]].data[net.blobs[self.end_layer[layer_no]].data<-127]=-128 net.blobs[self.end_layer[layer_no]].data[:]=net.blobs[self.end_layer[layer_no]].data/\ (2**self.act_dec_bits[self.end_layer[layer_no]]) acc[i] = net.blobs[self.accuracy_layer].data*100 accuracy = acc.mean() print("Accuracy with quantized weights/biases and activations: %.2f%%" %(accuracy)) return accuracy
def detectFace(img_path,threshold): img = cv2.imread(img_path) caffe_img = img.copy()-128 origin_h,origin_w,ch = caffe_img.shape scales = tools.calculateScales(img) out = [] for scale in scales: hs = int(origin_h*scale) ws = int(origin_w*scale) scale_img = cv2.resize(caffe_img,(ws,hs)) scale_img = np.swapaxes(scale_img, 0, 2) net_12.blobs['data'].reshape(1,3,ws,hs) net_12.blobs['data'].data[...]=scale_img caffe.set_device(0) caffe.set_mode_gpu() out_ = net_12.forward() out.append(out_) image_num = len(scales) rectangles = [] for i in range(image_num): cls_prob = out[i]['cls_score'][0][1] roi = out[i]['conv4-2'][0] out_h,out_w = cls_prob.shape out_side = max(out_h,out_w) rectangle = tools.detect_face_12net(cls_prob,roi,out_side,1/scales[i],origin_w,origin_h,threshold[0]) rectangles.extend(rectangle) return rectangles
def style_labeler(): flickr_test_set = np.loadtxt(caffe_root + 'data/flickr_style/test.txt', str, delimiter='\t') flickr_test_set_path = [readline.split()[0] for readline in flickr_test_set] flickr_test_set_label = [int(readline.split()[1]) for readline in flickr_test_set] flickr_test_set_path = flickr_test_set_path[:10000] flickr_test_set_label = flickr_test_set_label[:10000] caffe.set_mode_gpu() our_model = BongguNet() true_res = [] our_res = [] our_res5 = [] #with open('./label_result_bonggunet_for_test.csv', 'w') as f: for i in range(len(flickr_test_set_path)): if i % 1000: print i gc.collect() img = caffe.io.load_image(flickr_test_set_path[i]) res = our_model.predict_our(img) our_res.append(res[0]) our_res5.append(res) true_res.append(flickr_test_set_label[i]) #print our_res, true_res #f.write(",".join([str(flickr_test_set_label[i]), #flickr_test_set_path[i], #str(true_res[i]), #str(our_res[i])]) + "\n") print "accuarcy@1:", np.mean([a == b for a, b in zip(true_res, our_res)]) print "accuarcy@5:", np.mean([a in b for a, b in zip(true_res, our_res5)])
def __init__(self, model_def_file, pretrained_model_file, mean_file, raw_scale, class_labels_file, bet_file, image_dim, gpu_mode): logging.info('Loading net and associated files...') if gpu_mode: caffe.set_mode_gpu() else: caffe.set_mode_cpu() self.net = caffe.Classifier( model_def_file, pretrained_model_file, image_dims=(image_dim, image_dim), raw_scale=raw_scale, mean=np.load(mean_file).mean(1).mean(1), channel_swap=(2, 1, 0) ) with open(class_labels_file) as f: labels_df = pd.DataFrame([ { 'synset_id': l.strip().split(' ')[0], 'name': ' '.join(l.strip().split(' ')[1:]).split(',')[0] } for l in f.readlines() ]) self.labels = labels_df.sort('synset_id')['name'].values self.bet = cPickle.load(open(bet_file)) # A bias to prefer children nodes in single-chain paths # I am setting the value to 0.1 as a quick, simple model. # We could use better psychological models here... self.bet['infogain'] -= np.array(self.bet['preferences']) * 0.1
def __init__(self, weights_path, image_net_proto, lstm_net_proto, vocab_path, device_id=-1): if device_id >= 0: caffe.set_mode_gpu() caffe.set_device(device_id) else: caffe.set_mode_cpu() # Setup image processing net. phase = caffe.TEST self.image_net = caffe.Net(image_net_proto, weights_path, phase) image_data_shape = self.image_net.blobs['data'].data.shape self.transformer = caffe.io.Transformer({'data': image_data_shape}) channel_mean = np.zeros(image_data_shape[1:]) channel_mean_values = [104, 117, 123] assert channel_mean.shape[0] == len(channel_mean_values) for channel_index, mean_val in enumerate(channel_mean_values): channel_mean[channel_index, ...] = mean_val self.transformer.set_mean('data', channel_mean) self.transformer.set_channel_swap('data', (2, 1, 0)) self.transformer.set_transpose('data', (2, 0, 1)) # Setup sentence prediction net. self.lstm_net = caffe.Net(lstm_net_proto, weights_path, phase) self.vocab = ['<EOS>'] with open(vocab_path, 'r') as vocab_file: self.vocab += [word.strip() for word in vocab_file.readlines()] assert(self.vocab[1] == '<unk>') self.vocab_inv = dict([(w,i) for i,w in enumerate(self.vocab)]) net_vocab_size = self.lstm_net.blobs['predict'].data.shape[2] if len(self.vocab) != net_vocab_size: raise Exception('Invalid vocab file: contains %d words; ' 'net expects vocab with %d words' % (len(self.vocab), net_vocab_size))
def main(input, output, disp, gpu): make_sure_path_exists(input) make_sure_path_exists(output) # should be picked up by caffe by default, but just in case # add by macpod if gpu: caffe.set_mode_gpu(); caffe.set_device(0); frame = np.float32(PIL.Image.open(input+'/0001.jpg')) frame_i = 1 # let max nr of frames nrframes =len([name for name in os.listdir('./input') if os.path.isfile(name)]) for i in xrange(frame_i,nrframes): frame = deepdream( net, frame, end = layersloop[frame_i % len(layersloop)], disp=disp, iter_n=5) saveframe = output + "/%04d.jpg" % frame_i PIL.Image.fromarray(np.uint8(frame)).save(saveframe) newframe = input + "/%04d.jpg" % frame_i frame = morphPicture(saveframe, newframe) # give it back 50% of original picture frame = np.float32(frame) frame_i += 1
def flowWarp(img, flow): import caffe width = img.shape[1] height = img.shape[0] print 'processing (%dx%d)' % (width, height) defFile = tempFilename('.prototxt') preprocessFile('/home/ilge/hackathon2/common/prototmp/apply_flow.prototmp', defFile, { 'WIDTH': width, 'HEIGHT': height }) caffe.set_logging_disabled() caffe.set_mode_gpu() net = caffe.Net(defFile, caffe.TEST) os.remove(defFile) print 'network forward pass' img_input = img[np.newaxis, :, :, :].transpose(0, 3, 1, 2) flow_input = flow[np.newaxis, :, :, :].transpose(0, 3, 1, 2) net.blobs['image'].reshape(*img_input.shape) net.blobs['image'].data[...] = img_input net.blobs['flow'].reshape(*flow_input.shape) net.blobs['flow'].data[...] = flow_input net.forward() output = net.blobs['output'].data[...].transpose(0, 2, 3, 1).squeeze() return output
def __init__(self, model_def_file, pretrained_model_file, raw_scale, class_labels_file, image_dim, gpu_mode): logging.info('Loading net and associated files...') if gpu_mode: caffe.set_mode_gpu() else: caffe.set_mode_cpu() ## load models googlenet self.net = caffe.Classifier( model_def_file, pretrained_model_file, image_dims=(image_dim, image_dim), raw_scale=raw_scale, mean=np.array([104.0, 116.0, 122.0]), channel_swap=(2, 1, 0)) logging.info('Load vision model, %s', model_def_file) # generate N bit lookup table self.lookup = np.asarray([bin(i).count('1') for i in range(1<<16)]) # load reference bit model file_reader = open(self.database_param, 'rb') self.database = cPickle.load(file_reader) file_reader.close() logging.info('Load database from {}'.format(self.database_param)) logging.info('database shape {}'.format(self.database['ref'].shape)) with open(class_labels_file) as f: labels_df = pd.DataFrame([ { 'synset_id': l.strip().split(' ')[0], 'name': ' '.join(l.strip().split(' ')[1:]).split(',')[0] } for l in f.readlines() ]) self.labels = labels_df.sort('synset_id')['name'].values
def evaluate_caffe_nn(train_data, valid_data, test_data): import caffe from caffe import layers as L, params as P import caffe_utils as utils def gen_net(net_path, data_shape, label_shape): net = caffe.NetSpec() net.data = L.Input(shape=dict(dim=list(data_shape))) net.label = L.Input(shape=dict(dim=list(label_shape))) net.fc0 = L.InnerProduct(net.data, num_output=30, weight_filler=dict(type='xavier')) net.relu0 = L.ReLU(net.fc0, in_place=True) net.output = L.InnerProduct(net.relu0, num_output=10, weight_filler=dict(type='xavier')) net.loss = L.SoftmaxWithLoss(net.output, net.label) net.accuracy = L.Accuracy(net.output, net.label) with open(net_path, 'w') as f: f.write(str(net.to_proto())) def gen_solver(solver_path, net_path): from caffe.proto import caffe_pb2 params = caffe_pb2.SolverParameter() params.train_net = net_path params.type = 'SGD' params.momentum = 0.9 params.base_lr = 0.5 params.lr_policy = 'step' params.gamma = 0.999 params.stepsize = 1 params.weight_decay = 0.0003 with open(solver_path, 'w') as f: f.write(str(params)) batch_size = 1024 data_shape = (batch_size, train_data[0][0].size) label_shape = (batch_size, 1) train_blobs = utils.as_dl_blobs(train_data, batch_size, data_shape, label_shape) valid_blobs = utils.as_dl_blobs(valid_data, batch_size, data_shape, label_shape) test_blobs = utils.as_dl_blobs(test_data, batch_size, data_shape, label_shape) net_path = 'temp/net.prototxt' solver_path = 'temp/solver.txt' gen_net(net_path, data_shape, label_shape) gen_solver(solver_path, net_path) caffe.set_device(0) caffe.set_mode_gpu() solver = caffe.SGDSolver(solver_path) utils.train(solver, train_blobs, valid_blobs, 50, 5) print('caffe nn: %.2f%%' % (utils.evaluate(solver, test_blobs)))
def main(): MODEL_FILE = sys.argv[1] PRETRAINED = sys.argv[2] mean_file = sys.argv[3] lmdb_folder = sys.argv[4] train_folder = sys.argv[5] seaNet = caffe.Net(MODEL_FILE, PRETRAINED, caffe.TEST) caffe.set_mode_gpu() image_mean = np.load(mean_file) file_name = 'seaNet_submission_' + ('%0.f' % time.time()) + '.csv' setup_submission_file(train_folder, file_name) submission_file = open(file_name, 'a') submission_writer = csv.writer(submission_file) env = lmdb.open(lmdb_folder) txn = env.begin() cursor = txn.cursor() count = 0 for key, value in cursor: count += 1 if count % 500 == 0: print 'Number of Images Processed: ' + str(count) datum = caffe.proto.caffe_pb2.Datum() datum.ParseFromString(value) label = datum.label image = caffe.io.datum_to_array(datum) image = image.astype(np.uint8) image = image - image_mean image = image * 0.00390625 result = seaNet.forward_all(data=np.array([image])) probs = result['prob'][0] img_row = [ '_'.join(key.split('_')[1:])] img_row.extend(probs) submission_writer.writerow(img_row) submission_file.close()
def setup(): global resnet_mean global resnet_net global vqa_net # data provider vqa_data_provider_layer.CURRENT_DATA_SHAPE = EXTRACT_LAYER_SIZE # mean substraction blob = caffe.proto.caffe_pb2.BlobProto() data = open( RESNET_MEAN_PATH , 'rb').read() blob.ParseFromString(data) resnet_mean = np.array( caffe.io.blobproto_to_array(blob)).astype(np.float32).reshape(3,224,224) resnet_mean = np.transpose(cv2.resize(np.transpose(resnet_mean,(1,2,0)), (448,448)),(2,0,1)) # resnet caffe.set_device(GPU_ID) caffe.set_mode_gpu() resnet_net = caffe.Net(RESNET_LARGE_PROTOTXT_PATH, RESNET_CAFFEMODEL_PATH, caffe.TEST) # our net vqa_net = caffe.Net(VQA_PROTOTXT_PATH, VQA_CAFFEMODEL_PATH, caffe.TEST) # uploads if not os.path.exists(UPLOAD_FOLDER): os.makedirs(UPLOAD_FOLDER) if not os.path.exists(VIZ_FOLDER): os.makedirs(VIZ_FOLDER) print 'Finished setup'
def main(argv): params = get_params() # check get_params.py in the same directory to see the parameters try: opts, args = getopt.getopt(argv,"hr:o:s:c:g:",["root=","out=","saliency_model=","caffe_path=", "gpu="]) except getopt.GetoptError: print 'ERROR' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'saliency.py -r <root> -o <out> -s <saliency_model> -c <caffe_path> -g <gpu>' sys.exit() elif opt in ("-r", "--root"): params['root'] = arg elif opt in ("-o", "--out"): params['out'] = arg elif opt in ("-s", "--saliency_model"): params['saliency_model'] = arg elif opt in ("-c", "--caffe_path"): params['caffe_path'] = arg elif opt in ("-g", "--gpu"): params['gpu'] = arg; sys.path.insert(0,os.path.join(params['caffe_path'],'python')) import caffe compute = 'True' # sys.argv[1] # write 'true' or 'false' in case you want to compute or just visualize if compute== 'true' or compute =='True': deploy_file = os.path.join(params['saliency_model'],'deploy.prototxt') model_file = os.path.join(params['saliency_model'],'model.caffemodel') # I am using the mean file from caffenet...but I guess we could use a grey image as well ? mean_file = '/media/HDD_2TB/mcarne/keyframe-extractor/src/Saliency/deep/meanfile.npy' if params['gpu'] == True: caffe.set_mode_gpu() print 'GPU mode selected' else: caffe.set_mode_cpu() print 'CPU mode selected' net = caffe.Classifier(deploy_file, model_file, mean=np.load(mean_file).mean(1).mean(1), channel_swap=(2,1,0),raw_scale=255) if not os.path.exists(params['out']): os.makedirs(params['out']) for imagepath in glob.glob(params['root']+"/*.jpg"): print "Procressing image..." scores = net.predict([caffe.io.load_image(imagepath)]) feat = net.blobs['deconv1'].data #feat = np.reshape(feat, (10,4096)) print feat, np.shape(feat) #meanfeat = np.average( feat, axis = 0 ) # saves to disk fout = params['out']+'/'+os.path.splitext(os.path.basename(imagepath))[0]; pickle.dump(feat,open(fout+'.p','wb')) scipy.io.savemat(fout+'.mat', mdict={'isal': feat})
def train_cnn(db_name, train_data): caffe.set_mode_gpu() if train_data == None: n_samples = 1000 else: n_samples = len(train_data) solver_param = caffe_pb2.SolverParameter() with open('./models/solver_template.prototxt') as f: google.protobuf.text_format.Merge(f.read(), solver_param) # nitem solver_param.stepsize = n_samples solver_param.max_iter = int(n_samples * 10) print solver_param with open('./models/solver_template_a7_' + db_name + '.prototxt', 'w') as f: f.write(google.protobuf.text_format.MessageToString(solver_param)) name = f.name print name solver = caffe.SGDSolver(name) solver.solve() trained_model = str(solver_param.snapshot_prefix) + '_iter_' + str(solver_param.max_iter) + '.caffemodel' return trained_model
def caffe_set_device(gpu=True, devid='0'): if gpu: caffe.set_mode_gpu() os.environ["CUDA_VISIBLE_DEVICES"] = devid caffe.set_device(int(devid)) else: caffe.set_mode_cpu()
def __init__(self,params): self.dimension = params['dimension'] self.dataset = params['dataset'] self.pooling = params['pooling'] # Read image lists with open(params['query_list'],'r') as f: self.query_names = f.read().splitlines() with open(params['frame_list'],'r') as f: self.database_list = f.read().splitlines() # Parameters needed self.layer = params['layer'] self.save_db_feats = params['database_feats'] # Init network if params['gpu']: caffe.set_mode_gpu() caffe.set_device(0) else: caffe.set_mode_cpu() print "Extracting from:", params['net_proto'] cfg.TEST.HAS_RPN = True self.net = caffe.Net(params['net_proto'], params['net'], caffe.TEST)
def main(): import caffe import numpy as np caffe_dir = "../caffe" MODEL_FILE = caffe_dir + "/models/bvlc_reference_caffenet/deploy.prototxt" PRETRAINED = caffe_dir + "/models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel" IMAGE_FILE = "../cat.jpg" with open("synset_words.txt") as f: words = f.readlines() words = map(lambda x: x.strip(), words) net = caffe.Classifier(MODEL_FILE, PRETRAINED, mean=np.load(caffe_dir + '/python/caffe/imagenet/ilsvrc_2012_mean.npy'), channel_swap=(2,1,0), raw_scale=255, image_dims=(256, 256)) caffe.set_phase_test() caffe.set_mode_gpu() input_image = caffe.io.load_image(IMAGE_FILE) #prediction = net.predict([input_image]) prediction = net.forward_all(data=np.asarray([net.preprocess('data', input_image)])) i = prediction["prob"].argmax() print(i) print(words[i])
def main(args_list): args = parse_args(args_list) print('Called with args:') print(args) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) cfg.GPU_ID = args.gpu_id print('Using config:') pprint.pprint(cfg) while not os.path.exists(args.caffemodel) and args.wait: print('Waiting for {} to exist...'.format(args.caffemodel)) time.sleep(10) caffe.set_mode_gpu() caffe.set_device(args.gpu_id) net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST) net.name = os.path.splitext(os.path.basename(args.caffemodel))[0] imdb = get_imdb(args.imdb_name) imdb.competition_mode(args.comp_mode) if not cfg.TEST.HAS_RPN: imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD) test_net(net, imdb, max_per_image=args.max_per_image, vis=args.vis)
def make_train_featvec(X, y): """ This methods needs sufficient memories """ ntrain = len(X) batch = 1000 nbatch = ntrain/batch feat_vec = np.zeros((nbatch*batch, 4096)) caffe.set_mode_gpu() our_model = OurFlickr() for n in range(nbatch): print "## batch {}/{} ##".format(n, nbatch) X_batch, _ = get_train_dataset(X[batch*n:batch*(n+1)], y[batch*n:batch*(n+1)]) featvec_batch = our_model.net_extractor(X=X_batch) feat_vec[batch*n:batch*(n+1)] = featvec_batch gc.collect() del our_model del X_batch del X del y gc.collect() print feat_vec print feat_vec.shape pickle.dump(feat_vec, open('feat_test_place.pickle', 'wb'), protocol=2)
def set_caffe_mode(gpu): if gpu == 0: # cpu mode caffe.set_mode_cpu() else: # gpu mode caffe.set_device(0) caffe.set_mode_gpu() return 0
def style_labeler(): ntrain = 70000 # max is 7000, no 7500 X_train, y_train = get_train_dataset(flickr_train_set_path[:ntrain], flickr_train_set_label[:ntrain]) caffe.set_mode_gpu() our_model = OurFlickr() our_model.fit(X_train, y_train) # TODO SVM batch fitting our_model.transform() our_model.compile() true_res = [] svm_res = [] sfmax_res = [] with open('./label_result_all.csv', 'w') as f: for i in range(len(flickr_test_set_path)): if i % 1000: gc.collect() print i img = caffe.io.load_image(flickr_test_set_path[i]) sfmax, svm = our_model.predict_our(img) sfmax_res.append(sfmax) svm_res.append(svm) true_res.append(flickr_test_set_label[i]) f.write(",".join([flickr_test_set_path[i], str(flickr_test_set_label[i]), str(sfmax), str(svm[0])]) + "\n") print "svm accuarcy:", np.mean([a == b for a, b in zip(true_res, svm_res)]) print "sfmax accuracy:", np.mean([a == b for a, b in zip(true_res, sfmax_res)])
def init_caffe_net(gpu_id, raw_image_size, crop_size, batch_size): ''' Initialize caffe configuration. The function is used to extract the RGB images. If your dataset contains gray images, set the channels 3 to 1 and comment the image preprocessing in transposing and channels changing. ''' caffe.set_mode_gpu() caffe.set_device(int(gpu_id)) # {0, 1, 2, 3} to four GPUs you want to choose. # The train_val.prototxt file defination. model_def = '/home/u514/caffe-i/caffe-master/caffe/models/vgg/vgg_2048/deploy-bak.prototxt' # The pre-trained model. caffemodel = '/home/u514/caffe-i/caffe-master/caffe/models/vgg/vgg_2048/pretrain_ilsvrc2012_vgg_2048.caffemodel' # The mean file of the image set used to train the model. mean_file = '/home/u514/caffe-i/caffe-master/caffe/models/vgg/vgg_2048/vgg_mean.npy' net = caffe.Net(model_def, caffemodel, caffe.TEST) transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) transformer.set_transpose('data', (2, 0, 1)) # (H,W,C) to (C,H,W) transformer.set_mean('data', np.load(mean_file).mean(1).mean(1)) transformer.set_raw_scale('data', int(raw_image_size)) transformer.set_channel_swap('data', (2, 1, 0)) # RGB to BGR # Set batch size (default: 50). net.blobs['data'].reshape(int(batch_size), 3, int(crop_size), int(crop_size)) return net, transformer
def init_testnet(test_net, trained_model=None, test_device=0): caffe.set_mode_gpu() caffe.select_device(test_device, False) if(trained_model == None): return caffe.Net(test_net, caffe.TEST) else: return caffe.Net(test_net, trained_model, caffe.TEST)
def main(): ntrain = 4000 # max is 7000, no 7500 #itest = 9 caffe.set_mode_gpu() our_model = OurFlickr() #### past fail model#### #X_train, y_train = get_train_dataset(flickr_train_set_path[:ntrain], flickr_train_set_label[:ntrain]) #our_model.fit(X_train, y_train) # TODO SVM batch fitting #our_model.transform() #our_model.compile() #### use pickled training data features #### our_model._fit(flickr_train_set_label[:63000], pickle_name='feat_train.pickle') our_model.compile() true_res = [] svm_res = [] sfmax_res = [] for i in range(15000): img = caffe.io.load_image(flickr_test_set_path[i]) sfmax, svm = our_model.predict_our(img) sfmax_res.append(sfmax) svm_res.append(svm) true_res.append(flickr_test_set_label[i]) print "true label", flickr_test_set_label[i] print "svm accuarcy:", np.mean([a == b for a, b in zip(true_res, svm_res)]) print "sfmax accuracy:", np.mean([a == b for a, b in zip(true_res, sfmax_res)])
def imgs_to_lmdb(path_src, src_imgs, path_dst, labels=None): ''' Generate LMDB file from set of images Source: https://github.com/BVLC/caffe/issues/1698#issuecomment-70211045 credit: Evan Shelhamer ''' caffe.set_mode_gpu() if (labels == None): labels = [0] * len(src_imgs) db = lmdb.open(path_dst, map_size=int(1e12)) with db.begin(write=True) as in_txn: for idx, img_name in enumerate(src_imgs): path_ = os.path.join(path_src, img_name) img = np.array(Image.open(path_).convert('RGB')).astype("uint8") img = img[:,:,::-1] img = img.transpose((2,0,1)) img_dat = caffe.io.array_to_datum(img, labels[idx]) in_txn.put('{:0>10d}'.format(idx), img_dat.SerializeToString()) db.close() return 0
def train(): with open('./seg_low_res_model/proto_train.prototxt', 'w') as f: f.write(str(segmodel.generate_model('train', config.N))) caffe.set_device(config.gpu_id) caffe.set_mode_gpu() solver = caffe.get_solver('./seg_low_res_model/solver.prototxt') if config.weights is not None: solver.net.copy_from(config.weights) cls_loss_avg = 0.0 avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0.0, 0.0, 0.0 decay = 0.99 for it in range(config.max_iter): solver.step(1) cls_loss_val = solver.net.blobs['loss'].data scores_val = solver.net.blobs['fcn_scores'].data.copy() label_val = solver.net.blobs['label'].data.copy() cls_loss_avg = decay*cls_loss_avg + (1-decay)*cls_loss_val print('\titer = %d, cls_loss (cur) = %f, cls_loss (avg) = %f' % (it, cls_loss_val, cls_loss_avg)) # Accuracy accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy(scores_val, label_val) avg_accuracy_all = decay*avg_accuracy_all + (1-decay)*accuracy_all avg_accuracy_pos = decay*avg_accuracy_pos + (1-decay)*accuracy_pos avg_accuracy_neg = decay*avg_accuracy_neg + (1-decay)*accuracy_neg print('\titer = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)' % (it, accuracy_all, accuracy_pos, accuracy_neg)) print('\titer = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)' % (it, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg))
def classify(net_config_location, net_weights, image_location, alpha_channel, use_gpu, win_size, crop_size): if use_gpu: caffe.set_device(0) caffe.set_mode_gpu() else: caffe.set_mode_cpu() f = open(net_config_location, 'r') s = f.read() if (alpha_channel): append = 'input: "data" input_dim: 1 input_dim: 4 input_dim: 512 input_dim: 512 \n' else: append = 'input: "data" input_dim: 1 input_dim: 3 input_dim: 512 input_dim: 512 \n' s = append + s f = open('temp.prototxt', 'w') f.write(s) f.close() net = caffe.Net('temp.prototxt', net_weights, caffe.TEST) print 'Opening image...' #open image in_ = getimage(image_location, alpha_channel) print 'Image opened...' #height and width of original image orig_img_h = in_.shape[1] orig_img_w = in_.shape[2] #size of valid output patch out_size = win_size - 2 * crop_size #number of patches horizontally and vertically n_patch_horiz = int(math.ceil(orig_img_w / float(out_size))) n_patch_vert = int(math.ceil(orig_img_h / float(out_size))) #pad image... #how much to pad? pad_w_before = crop_size pad_h_before = crop_size pad_w_after = n_patch_horiz * out_size + crop_size - orig_img_w pad_h_after = n_patch_vert * out_size + crop_size - orig_img_h #do padding in_ = np.pad(in_, ((0, 0), (pad_h_before, pad_h_after), (pad_w_before, pad_w_after)), mode='symmetric') # shape for input (data blob is N x C x H x W), set data if alpha_channel: channels = 4 else: channels = 3 net.blobs['data'].reshape(1, channels, win_size, win_size) print 'Predicting...' rows = [] for i in range(0, n_patch_vert): patches_in_row = [] for j in range(0, n_patch_horiz): input_ = in_[:, out_size * i:out_size * i + win_size, out_size * j:out_size * j + win_size] net.blobs['data'].data[...] = input_ # run net prediction net.forward() patch_out = net.blobs['prob'].data[0] #compute offset in case output patch provided by the network #is larger than it should be h_offset = (net.blobs['prob'].data[0].shape[1] - out_size) / 2 w_offset = (net.blobs['prob'].data[0].shape[2] - out_size) / 2 #crop patch_out = patch_out[:, h_offset:h_offset + out_size, w_offset:w_offset + out_size] patches_in_row.append(np.copy(patch_out)) row = np.concatenate(patches_in_row, 2) rows.append(np.copy(row)) entire_output = np.concatenate(rows, 1) #remove excess border output = entire_output[:, 0:orig_img_h, 0:orig_img_w] #out.astype('double').tofile("prob.dat"); if output.shape[0] == 1: pred = np.rint(np.squeeze(output)).astype(np.uint8) else: pred = output.argmax(axis=0).astype(np.uint8) print 'Done predicting.' return pred
if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) cfg.GPU_ID = args.gpu_id print('Using config:') pprint.pprint(cfg) if not args.randomize: # fix the random seeds (numpy and caffe) for reproducibility np.random.seed(cfg.RNG_SEED) caffe.set_random_seed(cfg.RNG_SEED) # set up caffe caffe.set_mode_gpu() caffe.set_device(args.gpu_id) imdb, roidb = combined_roidb(args.imdb_name) print '{:d} roidb entries'.format(len(roidb)) output_dir = get_output_dir(imdb) print 'Output will be saved to `{:s}`'.format(output_dir) train_net(args.solver, roidb, output_dir, pretrained_model=args.pretrained_model, max_iters=args.max_iters)
def main(): """ main function """ # time start time_start = datetime.datetime.now() print(args) if args.proto == None or args.model == None or args.mean == None or args.images == None: usage_info() return None # deploy caffe prototxt path net_file = args.proto # trained caffemodel path caffe_model = args.model # mean value mean = args.mean # norm value norm = 1.0 if args.norm != 1.0: norm = args.norm[0] # calibration dataset images_path = args.images # the output calibration file calibration_path = args.output # enable the group scale group_on = args.group # default use CPU to forwark if args.gpu != 0: caffe.set_device(0) caffe.set_mode_gpu() # initial caffe net and the forword model(GPU or CPU) net = caffe.Net(net_file, caffe_model, caffe.TEST) # prepare the cnn network transformer = network_prepare(net, mean, norm) # get the calibration datasets images files path images_files = file_name(images_path) # quanitze kernel weight of the caffemodel to find it's calibration table weight_quantize(net, net_file, group_on) # quantize activation value of the caffemodel to find it's calibration table activation_quantize(net, transformer, images_files) # save the calibration tables,best wish for your INT8 inference have low accuracy loss :) save_calibration_file(calibration_path) # time end time_end = datetime.datetime.now() print( "\nCaffe Int8 Calibration table create success, it's cost %s, best wish for your INT8 inference has a low accuracy loss...\(^▽^)/...2333..." % (time_end - time_start))
def main(): # caffe init gpu_id = 0 caffe.set_device(gpu_id) caffe.set_mode_gpu() # spatial prediction model_def_file = '../stack_motionnet_vgg16_deploy.prototxt' model_file = '../logs_end/hmdb51_split2_vgg16_hidden.caffemodel' FRAME_PATH = "TODO" spatial_net = caffe.Net(model_def_file, model_file, caffe.TEST) val_file = "./testlist02.txt" f_val = open(val_file, "r") val_list = f_val.readlines() print "we got %d test videos" % len(val_list) start_frame = 0 num_categories = 51 feature_layer = 'fc8_vgg16' spatial_mean_file = './rgb_mean.mat' dims = (len(val_list), num_categories) predict_results_before = np.zeros(shape=dims, dtype=np.float64) predict_results = np.zeros(shape=dims, dtype=np.float64) correct = 0 line_id = 0 spatial_results_before = {} spatial_results = {} for line in val_list: line_info = line.split(" ") input_video_dir_part = line_info[0] input_video_dir = os.path.join(FRAME_PATH, input_video_dir_part[:-4]) input_video_label = int(line_info[1]) spatial_prediction = HiddenTemporalPrediction( input_video_dir, spatial_mean_file, spatial_net, num_categories, feature_layer, start_frame) avg_spatial_pred_fc8 = np.mean(spatial_prediction, axis=1) avg_spatial_pred = np.asarray(softmax(avg_spatial_pred_fc8)) predict_label = np.argmax(avg_spatial_pred) predict_results_before[line_id, :] = avg_spatial_pred_fc8 predict_results[line_id, :] = avg_spatial_pred print input_video_dir print input_video_label-1, predict_label line_id += 1 if predict_label == input_video_label-1: correct += 1 print correct print "prediction accuracy is %4.4f" % (float(correct)/len(val_list)) spatial_results_before["hidden_prediction_before"] = predict_results_before spatial_results["hidden_prediction"] = predict_results sio.savemat("./hmdb51_split2_hidden_before.mat", spatial_results_before) sio.savemat("./hmdb51_split2_hidden.mat", spatial_results)
def caffe_load_from_ckpt(prototxt, checkpoint, to_caffemodel): ### load caffe model and weights caffe.set_mode_gpu() net = caffe.Net(prototxt, caffe.TEST) ### load tf model tf.reset_default_graph() images = tf.placeholder(tf.float32, shape=(None, image_scale, image_scale, 3)) with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=False)): logits, endpoints = mobilenet_v2.mobilenet( images, num_classes=1001, depth_multiplier=factor, finegrain_classification_mode=True) ema = tf.train.ExponentialMovingAverage(0.999) vars = ema.variables_to_restore() saver = tf.train.Saver(vars) ### convert variables from tf checkpoints to caffemodel with tf.Session() as sess: saver.restore(sess, checkpoint) tf_all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) # for i, var in enumerate(tf_all_vars): # print(i, var.name, var.shape.as_list()) print( '------------------------------------------------------------------' ) i = 0 # index for caffe_var_name in net.params.keys(): for n in range(len(net.params[caffe_var_name])): if list(net.params[caffe_var_name][n].data.shape) != [1]: var = tf_all_vars[i] print(i, caffe_var_name, net.params[caffe_var_name][n].data.shape, var.name, var.shape.as_list()) i += 1 # exit() """ tf name scope: convolutional layer: "MobilenetV2/....../...weights:0" "MobilenetV2/....../BatchNorm/gamma:0" "MobilenetV2/....../BatchNorm/beta:0" "MobilenetV2/....../BatchNorm/moving_mean:0" "MobilenetV2/....../BatchNorm/moving_variance:0" fully connected layer: "MobilenetV2/....../...weights:0" "MobilenetV2/....../biases:0" """ # name, shape list # caffe_var: caffe_var_name, list(net.params[caffe_var_name][n].data.shape) # tf_var : tf_var.name, tf_var.shape.as_list() ### 262 variables to convert from tf.ckpt to caffemodel i = 0 # index for caffe_var_name in net.params.keys(): for n in range(len(net.params[caffe_var_name])): if list(net.params[caffe_var_name][n].data.shape) != [1]: ### Compare caffe_var and tf_var here # caffe_var_name = caffe_var_name caffe_var_data = net.params[caffe_var_name][n].data caffe_var_shape = list(caffe_var_data.shape) tf_var_name = tf_all_vars[i].name tf_var_shape = tf_all_vars[i].shape.as_list() if 'weights:0' in tf_var_name: ### weight layer # print(caffe_var_name, caffe_var_shape, '|||||||||||', tf_var_name, tf_var_shape) tf_var_data = sess.run(tf_all_vars[i]) ### swap tf_var axis for caffe_var: ### tf_var shape: (height, width, channel_out, channel_in) for depthwise_weights ### (height, width, channel_in, channel_out) for other weights ### caffe_var shape: (channel_out, channel_in, height, width) tf_var_data = np.transpose(tf_var_data, axes=(3, 2, 0, 1)) if '/depthwise_weights' in tf_var_name: tf_var_data = np.swapaxes(tf_var_data, axis1=0, axis2=1) if 'Logits/' in tf_var_name: ### mismatched num_classes ### tf class 0: 'background' caffe_var_data[:, ...] = tf_var_data[1:, ...] else: caffe_var_data[...] = tf_var_data[...] if 'biases:0' in tf_var_name: ### bias layer # print(caffe_var_name, caffe_var_shape, '|||||||||||', tf_var_name, tf_var_shape) ### tf_var_shape: (1001,) ### caffe_var_shape: (1000,) tf_var_data = sess.run(tf_all_vars[i]) caffe_var_data[:] = tf_var_data[1:] if 'BatchNorm/gamma:0' in tf_var_name: ### batchnorm scaling layer, but convert mean # print(caffe_var_name, n, caffe_var_shape, '|||||||||||', tf_all_vars[i+2].name, tf_all_vars[i+2].shape.as_list()) ### tf_var_shape: (channel,) ### caffe_var_shape: (channel,) tf_var_data = sess.run(tf_all_vars[i + 2]) caffe_var_data[...] = tf_var_data[...] if 'BatchNorm/beta:0' in tf_var_name: ### batchnorm scaling layer, but convert variance # print(caffe_var_name, n, caffe_var_shape, '|||||||||||', tf_all_vars[i+2].name, tf_all_vars[i+2].shape.as_list()) ### tf_var_shape: (channel,) ### caffe_var_shape: (channel,) tf_var_data = sess.run(tf_all_vars[i + 2]) caffe_var_data[...] = tf_var_data[...] # + 1e-3 -1e-5 if 'BatchNorm/moving_mean:0' in tf_var_name: ### batchnorm moving average layer, but convert gamme # print(caffe_var_name, n, caffe_var_shape, '|||||||||||', tf_all_vars[i-2].name, tf_all_vars[i-2].shape.as_list()) ### tf_var_shape: (channel,) ### caffe_var_shape: (channel,) tf_var_data = sess.run(tf_all_vars[i - 2]) caffe_var_data[...] = tf_var_data[...] if 'BatchNorm/moving_variance:0' in tf_var_name: ### batchnorm moving average layer, but convert beta # print(caffe_var_name, n, caffe_var_shape, '|||||||||||', tf_all_vars[i-2].name, tf_all_vars[i-2].shape.as_list()) ### tf_var_shape: (channel,) ### caffe_var_shape: (channel,) tf_var_data = sess.run(tf_all_vars[i - 2]) caffe_var_data[...] = tf_var_data[...] i += 1 else: ### moving average factor, must set to 1 net.params[caffe_var_name][n].data[...] = 1. # print(caffe_var_name, n, list(net.params[caffe_var_name][n].data.shape), '|||||||||||', net.params[caffe_var_name][n].data) net.save(to_caffemodel) print('Save converted caffemodel to', to_caffemodel) return net
def build_tsv(gpu_id=0): # Set up the simulator sim = MatterSim.Simulator() sim.setCameraResolution(WIDTH, HEIGHT) sim.setCameraVFOV(math.radians(VFOV)) sim.setDiscretizedViewingAngles(False) sim.setBatchSize(1) sim.setPreloadingEnabled(True) sim.initialize() # Set up Caffe Faster R-CNN cfg_from_file(CFG_FILE) caffe.set_mode_gpu() caffe.set_device(gpu_id) net = caffe.Net(PROTO, caffe.TEST, weights=MODEL) classes, attributes = load_classes() count = 0 t_render = Timer() t_net = Timer() with open(OUTFILE % gpu_id, 'wt') as tsvfile: writer = csv.DictWriter(tsvfile, delimiter='\t', fieldnames=TSV_FIELDNAMES) # Loop all the viewpoints in the simulator viewpointIds = load_viewpointids(gpu_id) for scanId, viewpointId in viewpointIds: t_render.tic() # Loop all discretized views from this location ims = [] sim.newEpisode([scanId], [viewpointId], [0], [math.radians(ELEVATION_START)]) for ix in range(VIEWPOINT_SIZE): state = sim.getState()[0] # Transform and save generated image ims.append(transform_img(state.rgb)) # Build state if ix == 0: record = { 'scanId': state.scanId, 'viewpointId': state.location.viewpointId, 'viewHeading': np.zeros(VIEWPOINT_SIZE, dtype=np.float32), 'viewElevation': np.zeros(VIEWPOINT_SIZE, dtype=np.float32), 'image_h': HEIGHT, 'image_w': WIDTH, 'vfov': VFOV } record['viewHeading'][ix] = state.heading record['viewElevation'][ix] = state.elevation # Move the sim viewpoint so it ends in the same place elev = 0.0 heading_chg = math.pi * 2 / VIEWS_PER_SWEEP view = ix % VIEWS_PER_SWEEP sweep = ix // VIEWS_PER_SWEEP if view + 1 == VIEWS_PER_SWEEP: # Last viewpoint in sweep elev = math.radians(ELEVATION_INC) sim.makeAction([0], [heading_chg], [elev]) t_render.toc() t_net.tic() # Run detection for ix in range(VIEWPOINT_SIZE): get_detections_from_im(record, net, ims[ix]) if DRY_RUN: print('%d: Detected %d objects in pano' % (gpu_id, record['features'].shape[0])) filter(record, MAX_TOTAL_BOXES) if DRY_RUN: print('%d: Reduced to %d objects in pano' % (gpu_id, record['features'].shape[0])) for ix in range(VIEWPOINT_SIZE): fig = visual_overlay(ims[ix], record, ix, classes, attributes) fig.savefig('img_features/examples/%s-%s-%d.png' % (record['scanId'], record['viewpointId'], ix)) plt.close() for k, v in record.items(): if isinstance(v, np.ndarray): record[k] = str(base64.b64encode(v), "utf-8") writer.writerow(record) count += 1 t_net.toc() if count % 10 == 0: print('%d: Processed %d / %d viewpoints, %.1fs avg render time, %.1fs avg net time, projected %.1f hours' %\ (gpu_id, count,len(viewpointIds), t_render.average_time, t_net.average_time, (t_render.average_time+t_net.average_time)*len(viewpointIds)/3600)) if DRY_RUN: return
def image2mat(net, transformer, inputimagedir, outdir, labelfilepath, layername): mat = [] # lines = labelfile(labelfilepath) # print lines labels = [] pred = [] predroc = [] nn = 0 caffe.set_mode_gpu() allimages = GetFileList(inputimagedir, []) print allimages testimages = allimages # from random import shuffle import random # print allimages random.shuffle(testimages) errorimagelist = "./error/" + outdir.split(".")[0] if not os.path.exists(errorimagelist): os.makedirs(errorimagelist) # print testimages for image in testimages: print image gtlabel = int(image.split("\\")[-2]) # print gtlabel try: net.blobs['data'].data[...] = transformer.preprocess( 'data', caffe.io.load_image(image)) except Exception, e: print nn print str(e) nn += 1 continue out = net.forward() # pred.append(str(out['prob'].argmax())) # print (out['prob'].shape) # pred.append(out['prob'][1]) # print("image is {}Predicted class is #{}.".format(image,out['prob'].argmax())) if out['prob'].argmax() != gtlabel: print out['prob'].argmax(), gtlabel shutil.copy( image, errorimagelist + "/" + image.split("/")[-1].split(".")[0] + "_pred_" + str(out['prob'].argmax()) + ".png") # caffe.set_mode_gpu() # caffe.set_device(0) #net.forward() # call once for allocation # %timeit net.forward() # feat = net.blobs[layername].data[1] feat = net.blobs[net.blobs.keys()[-2]].data[0] # for layer_name, param in net.params.iteritems(): # print layer_name + '\t' + str(param[0].data.shape), str(param[1].data.shape) # print net.blobs.keys() # filters = net.params['conv1'][0].data # print filters predroc.append(net.blobs[net.blobs.keys()[-1]].data[0].flatten()) pred.append( np.argmax(net.blobs[net.blobs.keys()[-1]].data[0].flatten())) # print "===>>",net.blobs[net.blobs.keys()[-1]].data[0].flatten() # pred.append(out['prob']) # print out['prob'] # print net.blobs[net.blobs.keys()[-2]].data[0] #np.savetxt(image+'feature.txt', feat.flat) #print type(feat.flat) featline = feat.flatten() # print featline #print type(featline) #featlinet= zip(*(featline)) mat.append(featline) label = image.split("\\")[-2] # labels.append(str(lines[nn][1])) labels.append(int(label)) # print "===>>",out['prob'].argmax() # print "=====>>",lines[nn][1] if (nn % 100 == 0): with open(outdir, 'w') as f: scipy.io.savemat(f, {'data': mat, 'labels': labels}) #append nn += 1
def extract_feature(network_proto_path, network_model_path, image_list, data_mean, layer_name, image_as_grey = False): """ Extracts features for given model and image list. Input network_proto_path: network definition file, in prototxt format. network_model_path: trainded network model file image_list: A list contains paths of all images, which will be fed into the network and their features would be saved. layer_name: The name of layer whose output would be extracted. save_path: The file path of extracted features to be saved. """ caffe.set_mode_gpu() net = caffe.Net(network_proto_path, network_model_path,caffe.TEST) # input preprocessing: 'data' is the name of the input blob == net.inputs[0] transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) transformer.set_transpose('data', (2,0,1)) transformer.set_mean('data', np.float32([104.0, 117.0, 122.0])) # mean pixel transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1] transformer.set_channel_swap('data', (2,1,0)) blobs = OrderedDict( [(k, v.data) for k, v in net.blobs.items()]) #blobs = OrderedDict( [(k, v.data) for k, v in net.blobs.items()]) shp = blobs[layer_name].shape print blobs['data'].shape batch_size = blobs['data'].shape[0] print blobs[layer_name].shape #print 'debug-------\nexit' #exit() #params = OrderedDict( [(k, (v[0].data,v[1].data)) for k, v in net.params.items()]) features_shape = (len(image_list), shp[1], shp[2], shp[3]) features = np.empty(features_shape, dtype='float32', order='C') img_batch = [] for cnt, path in zip(range(features_shape[0]), image_list): img_batch.append(transformer.preprocess('data', caffe.io.load_image(path))) # dst = net.forward(end=layer_name) # features[cnt:cnt+1, :,:,:] = dst[layer_name] # print '%d images processed' % (cnt+1,) #print 'image shape: ', img.shape #print path, type(img), img.mean() if (len(img_batch) == batch_size) or cnt==features_shape[0]-1: net.blobs['data'].reshape(len(img_batch),3,112,112) #scores = net.predict(img_batch, oversample=False) net.blobs['data'].data[...] = img_batch dst = net.forward(end=layer_name) # syncs the memory between GPU and CPU blobs = OrderedDict( [(k, v.data) for k, v in net.blobs.items()]) print '%d images processed' % (cnt+1,) #print blobs[layer_name][0,:,:,:] # items of blobs are references, must make copy! features[cnt-len(img_batch)+1:cnt+1, :,:,:] = dst[layer_name] img_batch = [] #features.append(blobs[layer_name][0,:,:,:].copy()) features = np.asarray(features, dtype='float32') return features
def main(): LSTM_DIR = '/scratch/ay937/caffe-recurrent/examples/s2vt' # LSTM_NET_FILE = '/home/sv/jeff-recurrent/caffe/examples/coco_caption/yts2se2e_combo_si_noembed.words_to_preds.deploy.prototxt' LSTM_NET_FILE = './s2vt.deploy.prototxt' # ITER = 5000 # 5k,8k seem good for onlycoco ITER = 10000 # 8k, 10k, 14k seem good for surfacelm # ITER = 8000 # 5k, 8k, 10k seem good for surface+cocolm # MODEL_FILE = 'models/s2s_30k_iter_30000.caffemodel' MODEL_FILE = sys.argv[1] # 's2s_vgg_surfacelm120k_fac2_iter_%d.caffemodel' % ITER # 'cocolm40k_cont_surfacelm120k_fac2_iter_%d.caffemodel' % ITER # 's2s_vgg_onlycocolm40k_fac2_iter_%d.caffemodel' % ITER TAG = 's2s_vgg_cocolm40k_surfacelm120k_predlr0.5' # TAG = 's2s_vgg_surfacelm120k' # TAG = 's2s_vgg_onlycocolm40k' NET_TAG = '%s_iter_%d' % (TAG, ITER) FRAMEFEAT_FILE_PATTERN = '/scratch/ay937/caffe-recurrent/examples/s2vt/youtube/splits/yt_allframes_vgg_fc7_{0}.txt' SENTS_FILE_PATTERN = '/scratch/ay937/caffe-recurrent/examples/s2vt/youtube/splits/sents_{0}_lc_nopunc.txt' if DEVICE_ID >= 0: caffe.set_mode_gpu() caffe.set_device(DEVICE_ID) else: caffe.set_mode_cpu() # caffe.set_phase_test() #check if this is correct # Set up the nets. # import ipdb; ipdb.set_trace() print "Setting up LSTM NET" lstm_net = caffe.Net(LSTM_NET_FILE, MODEL_FILE, caffe.TEST) print "Done" nets = [lstm_net] RESULTS_DIR = './results_lm' STRATEGIES = [ { 'type': 'beam', 'beam_size': 1 }, # {'type': 'beam', 'beam_size': 3}, ] NUM_OUT_PER_CHUNK = 30 START_CHUNK = 0 vocab_file = '%s/vocab/vocab.txt' % LSTM_DIR DATASETS = [ # split_name, data_split_name, aligned ('valid', 'val', False), # ('test', 'test', False), ] for split_name, data_split_name, aligned in DATASETS: filenames = [(FRAMEFEAT_FILE_PATTERN.format(data_split_name), SENTS_FILE_PATTERN.format(data_split_name))] fsg = fc7FrameSequenceGenerator(filenames, BUFFER_SIZE, vocab_file, max_words=MAX_WORDS, align=aligned, shuffle=False, pad=aligned, truncate=aligned) video_gt_pairs = all_video_gt_pairs(fsg) print 'Read %d videos pool feats' % len(fsg.vid_framefeats) NUM_CHUNKS = (len(fsg.vid_framefeats) / NUM_OUT_PER_CHUNK) + 1 # num videos in batches of 30 eos_string = '<EOS>' # add english inverted vocab vocab_list = [eos_string] + fsg.vocabulary_inverted offset = 0 for c in range(START_CHUNK, NUM_CHUNKS): chunk_start = c * NUM_OUT_PER_CHUNK chunk_end = (c + 1) * NUM_OUT_PER_CHUNK # Can you PROCESS 1 video at a tim? chunk = video_gt_pairs.keys()[chunk_start:chunk_end] # chunk = image_gt_pairs[chunk_start:chunk_end] html_out_filename = '%s/%s.%s.%d_to_%d.html' % \ (RESULTS_DIR, data_split_name, NET_TAG, chunk_start, chunk_end) text_out_filename = '%s/%s.%s_' % \ (RESULTS_DIR, data_split_name, NET_TAG) if os.path.exists(html_out_filename): print 'HTML output exists, skipping:', html_out_filename continue else: print 'HTML output will be written to:', html_out_filename outputs = run_pred_iters(lstm_net, chunk, video_gt_pairs, fsg, strategies=STRATEGIES, display_vocab=vocab_list) # html_out = to_html_output(outputs, vocab_list) # if not os.path.exists(RESULTS_DIR): os.makedirs(RESULTS_DIR) # html_out_file = open(html_out_filename, 'w') # html_out_file.write(html_out) # html_out_file.close() text_out_types = to_text_output(outputs, vocab_list) print text_out_types for strat_type in text_out_types: #text_out_fname = text_out_filename + strat_type + '.txt' text_out_fname = sys.argv[2] text_out_file = open(text_out_fname, 'a') text_out_file.write(''.join(text_out_types[strat_type])) text_out_file.close() offset += NUM_OUT_PER_CHUNK print 'Wrote HTML output to:', html_out_filename
def main(argv): pycaffe_dir = caffe_root + 'python/' parser = argparse.ArgumentParser() # Required arguments: input and output files. parser.add_argument("--input_file", help="Input image, directory, or npy.") parser.add_argument("--output_file", help="Output npy filename.") # Optional arguments. parser.add_argument("--model_def", default=os.path.join( pycaffe_dir, "../examples/imagenet/imagenet_deploy.prototxt"), help="Model definition file.") parser.add_argument( "--pretrained_model", default=os.path.join( pycaffe_dir, "../examples/imagenet/caffe_reference_imagenet_model"), help="Trained model weights file.") parser.add_argument("--gpu", action='store_true', help="Switch for gpu computation.") parser.add_argument( "--center_only", action='store_true', help="Switch for prediction from center crop alone instead of " + "averaging predictions across crops (default).") parser.add_argument( "--images_dim", default='256,256', help="Canonical 'height,width' dimensions of input images.") parser.add_argument( "--mean_file", default=os.path.join(pycaffe_dir, 'caffe/imagenet/ilsvrc_2012_mean.npy'), help="Data set image mean of H x W x K dimensions (numpy array). " + "Set to '' for no mean subtraction.") parser.add_argument( "--input_scale", type=float, default=255, help="Multiply input features by this scale before input to net") parser.add_argument( "--channel_swap", default='2,1,0', help="Order to permute input channels. The default converts " + "RGB -> BGR since BGR is the Caffe default by way of OpenCV.") parser.add_argument( "--ext", default='jpg', help="Image file extension to take as input when a directory " + "is given as the input file.") args = parser.parse_args() image_dims = [int(s) for s in args.images_dim.split(',')] channel_swap = [int(s) for s in args.channel_swap.split(',')] mean = None if args.mean_file: mean = np.load(args.mean_file) # Resize mean (which requires H x W x K input in range [0,1]). in_shape = image_dims m_min, m_max = mean.min(), mean.max() normal_mean = (mean - m_min) / (m_max - m_min) mean = caffe.io.resize_image(normal_mean.transpose( (1, 2, 0)), in_shape).transpose( (2, 0, 1)) * (m_max - m_min) + m_min if args.gpu: caffe.set_mode_gpu() print("GPU mode") else: caffe.set_mode_cpu() print("CPU mode") # Make classifier. classifier = caffe.Classifier(args.model_def, args.pretrained_model, image_dims=image_dims, mean=mean, input_scale=1.0, raw_scale=255.0, channel_swap=channel_swap) # Load image file. args.input_file = os.path.expanduser(args.input_file) f = open(args.input_file) im_files_ = f.readlines() im_files = [] for i in range(len(im_files_)): im_f = im_files_[i].split(' ') if len(im_f) == 1: im_f[0] = im_f[0][:-1] im_files.append(im_f[0]) inputs = [caffe.io.load_image(im_f) for im_f in im_files] print "Classifying %d inputs." % len(inputs) # Classify. start = time.time() predictions = classifier.predict(inputs, not args.center_only) print "Done in %.2f s." % (time.time() - start) # Save np.save(args.output_file, predictions) print "Saved %s." % args.output_file
def exec_validation(device_id, mode, it='', visualize=False): caffe.set_device(device_id) caffe.set_mode_gpu() net = caffe.Net('./result/proto_test.prototxt',\ './result/tmp.caffemodel',\ caffe.TEST) dp = VQADataProvider(mode=mode, batchsize=64) total_questions = len(dp.getQuesIds()) epoch = 0 pred_list = [] testloss_list = [] stat_list = [] while epoch == 0: t_word, t_cont, t_img_feature, t_answer, t_qid_list, t_iid_list, epoch = dp.get_batch_vec( ) net.blobs['data'].data[...] = np.transpose(t_word, (1, 0)) net.blobs['cont'].data[...] = np.transpose(t_cont, (1, 0)) net.blobs['img_feature'].data[...] = t_img_feature net.blobs['label'].data[...] = t_answer net.forward() t_pred_list = net.blobs['prediction'].data.argmax(axis=1) t_pred_str = [ dp.vec_to_answer(pred_symbol) for pred_symbol in t_pred_list ] testloss_list.append(net.blobs['loss'].data) for qid, iid, ans, pred in zip(t_qid_list, t_iid_list, t_answer.tolist(), t_pred_str): pred_list.append({ u'answer': pred, u'question_id': int(dp.getStrippedQuesId(qid)) }) if visualize: q_list = dp.seq_to_list(dp.getQuesStr(qid)) if mode == 'test-dev' or 'test': ans_str = '' ans_list = [''] * 10 else: ans_str = dp.vec_to_answer(ans) ans_list = [ dp.getAnsObj(qid)[i]['answer'] for i in xrange(10) ] stat_list.append({\ 'qid' : qid, 'q_list' : q_list, 'iid' : iid, 'answer': ans_str, 'ans_list': ans_list, 'pred' : pred }) percent = 100 * float(len(pred_list)) / total_questions sys.stdout.write('\r' + ('%.2f' % percent) + '%') sys.stdout.flush() mean_testloss = np.array(testloss_list).mean() if mode == 'val': valFile = './result/val2015_resfile' with open(valFile, 'w') as f: json.dump(pred_list, f) if visualize: visualize_failures(stat_list, mode) annFile = config.DATA_PATHS['val']['ans_file'] quesFile = config.DATA_PATHS['val']['ques_file'] vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(valFile, quesFile) vqaEval = VQAEval(vqa, vqaRes, n=2) vqaEval.evaluate() acc_overall = vqaEval.accuracy['overall'] acc_perQuestionType = vqaEval.accuracy['perQuestionType'] acc_perAnswerType = vqaEval.accuracy['perAnswerType'] return mean_testloss, acc_overall, acc_perQuestionType, acc_perAnswerType elif mode == 'test-dev': filename = './result/vqa_OpenEnded_mscoco_test-dev2015_v3t' + str( it).zfill(8) + '_results' with open(filename + '.json', 'w') as f: json.dump(pred_list, f) if visualize: visualize_failures(stat_list, mode) elif mode == 'test': filename = './result/vqa_OpenEnded_mscoco_test2015_v3c' + str( it).zfill(8) + '_results' with open(filename + '.json', 'w') as f: json.dump(pred_list, f) if visualize: visualize_failures(stat_list, mode)
def test_network(model_file, weights_file, image_file) : caffe.set_mode_gpu() net = caffe.Net(model_file, caffe.TEST, weights=weights_file)
def main(argv, image_name): use_cpu = False gpu_dev = 0 prototxt_path = 'models/trancos/hydra2/hydra_deploy.prototxt' caffemodel_path = 'models/trancos/hydra2/trancos_hydra2.caffemodel' try: opts, _ = getopt.getopt( argv, "h:", ["prototxt=", "caffemodel=", "cpu_only", "dev=", "cfg="]) except getopt.GetoptError as err: print("Error while parsing parameters: ", err) return for opt, arg in opts: if opt in ("--prototxt"): prototxt_path = arg elif opt in ("--caffemodel"): caffemodel_path = arg elif opt in ("--cpu_only"): use_cpu = True elif opt in ("--dev"): gpu_dev = int(arg) elif opt in ("--cfg"): cfg_file = arg (dataset, use_mask, mask_file, test_names_file, im_folder, dot_ending, pw, sigmadots, n_scales, perspective_path, use_perspective, is_colored, results_file, resize_im) = init_parameters_from_config(cfg_file) if use_cpu: caffe.set_mode_cpu() else: # Use GPU caffe.set_device(gpu_dev) caffe.set_mode_gpu() # Init CNN CNN = CaffePredictor(prototxt_path, caffemodel_path, n_scales) print("\nStart prediction for " + image_name) im_path = utl.extendName(image_name, im_folder) im = load_image(im_path, color=is_colored) if resize_im > 0: im = utl.resizeMaxSize(im, resize_im) mask = None if use_mask: mask_im_path = utl.extendName(image_name, im_folder, use_ending=True, pattern=mask_file) mask = sio.loadmat(mask_im_path, chars_as_strings=1, matlab_compatible=1) mask = mask.get('BW') s = time.time() npred, resImg = count_objects(CNN, im, pw, mask) print("image : %s, npred = %.2f , time =%.2f sec" % (image_name, npred, time.time() - s)) return npred
def main(): """Main function""" description = ('Test Fast-RCNN style datalayer') parser = argparse.ArgumentParser(description=description) parser.add_argument("dataset", help="ImageDataset JSON file") parser.add_argument("-n", "--net_file", required=True, help="Net (prototxt) file") parser.add_argument("-g", "--gpu", type=int, default=0, help="Gpu Id.") parser.add_argument("-e", "--epochs", type=int, default=2, help="Number of epochs") parser.add_argument( "-p", "--pause", default=0, type=int, help="Set number of milliseconds to pause. Use 0 to pause indefinitely" ) args = parser.parse_args() # init caffe caffe.set_device(args.gpu) caffe.set_mode_gpu() assert osp.exists(args.net_file), 'Net file "{}" do not exist'.format( args.net_file) net = caffe.Net(args.net_file, caffe.TEST) print 'Loading dataset from {}'.format(args.dataset) dataset = ImageDataset.from_json(args.dataset) print 'Loaded {} dataset with {} annotations'.format( dataset.name(), dataset.num_of_images()) net.layers[0].add_dataset(dataset) net.layers[0].print_params() net.layers[0].generate_datum_ids() required_object_info_fields = net.layers[0].required_object_info_fields print( "required_object_info_fields = {}".format(required_object_info_fields)) # Make sure we remove bad objects like tha data layer does filter_dataset(dataset, required_object_info_fields) number_of_images = dataset.num_of_images() assert net.layers[0].number_of_datapoints() == number_of_images num_of_layer_objects = sum([ len(img_info['object_infos']) for img_info in net.layers[0].data_samples ]) num_of_dataset_objects = sum( [len(img_info['object_infos']) for img_info in dataset.image_infos()]) assert num_of_layer_objects == num_of_dataset_objects, "{} != {}".format( num_of_layer_objects, num_of_dataset_objects) cv2.namedWindow('blob_image', cv2.WINDOW_AUTOSIZE) cv2.namedWindow('original_image', cv2.WINDOW_AUTOSIZE) image_blob_shape = net.blobs['input_image'].data.shape assert len(image_blob_shape) == 4, 'Expects 4D data blob' assert image_blob_shape[ 1] == 3, 'Expects 2nd channel to be 3 for BGR image' batch_size = image_blob_shape[0] num_of_batches = int(np.ceil(dataset.num_of_images() / float(batch_size))) exit_loop = False for epoch_id in xrange(args.epochs): print "-----------------------Epoch # {} / {} -----------------------------".format( epoch_id, args.epochs) for b in trange(num_of_batches): start_idx = batch_size * b end_idx = min(batch_size * (b + 1), number_of_images) # print 'Working on batch: {}/{} (Images# {} - {}) of epoch {}'.format(b, num_of_batches, start_idx, end_idx, epoch_id) # Run forward pass _ = net.forward() # Get image_scales and image_flippings image_scales = net.blobs['image_scales'].data image_flippings = net.blobs['image_flippings'].data.astype(np.bool) assert image_scales.shape == image_flippings.shape == ( batch_size, ) # Get roi_blob and from that determine number_of_rois roi_blob = net.blobs['roi'].data assert roi_blob.ndim == 2 and roi_blob.shape[1] == 5 number_of_rois = roi_blob.shape[0] for roi_id in xrange(number_of_rois): roi_batch_index = roi_blob[roi_id, 0] assert 0 <= roi_batch_index <= batch_size assert_bbx(roi_blob[roi_id, -4:]) # Check the bbx blobs for bbx_blob_name in ['gt_bbx_amodal', 'gt_bbx_crop']: if bbx_blob_name in net.blobs: bbx_blob = net.blobs[bbx_blob_name].data assert bbx_blob.shape == (number_of_rois, 4) for roi_id in xrange(number_of_rois): assert_bbx(bbx_blob[roi_id, :]) # Check the center proj blobs center_proj_blob = net.blobs['gt_center_proj'].data assert center_proj_blob.shape == (number_of_rois, 2) # Check vp blobs vp_blob = net.blobs['gt_viewpoint'].data assert vp_blob.shape == (number_of_rois, 3), "Weird vp shape = {}".format(vp_blob) assert (vp_blob >= -np.pi).all() and ( vp_blob < np.pi).all(), "Bad vp = \n{}".format(vp_blob) for i in xrange(start_idx, end_idx): original_image = cv2.imread( osp.join(dataset.rootdir(), dataset.image_infos()[i]['image_file'])) cv2.imshow('original_image', original_image) image_blob = net.blobs['input_image'].data[i - start_idx] image_blob_bgr8 = net.layers[0].make_bgr8_from_blob( image_blob).copy() for roi_id in xrange(roi_blob.shape[0]): roi_batch_index = roi_blob[roi_id, 0] if roi_batch_index == (i - start_idx): bbx_roi = roi_blob[roi_id, -4:].astype(np.float32) cv2.rectangle(image_blob_bgr8, tuple(bbx_roi[:2]), tuple(bbx_roi[2:]), (0, 255, 0), 1) cv2.imshow('blob_image', image_blob_bgr8) cv2.displayOverlay( 'blob_image', 'Flipped' if image_flippings[i - start_idx] else 'Original') key = cv2.waitKey(args.pause) if key == 27: cv2.destroyAllWindows() exit_loop = True break elif key == ord('p'): args.pause = not args.pause if exit_loop is True: print 'User presessed ESC. Exiting epoch {}'.format(epoch_id) exit_loop = False break print "-----------------------End of epoch -----------------------------" # No check the data_layer.data_samples print "Verifying data_samples ...", for im_info_layer, im_info_dataset in zip(net.layers[0].data_samples, dataset.image_infos()): for im_field in ['image_size', 'image_intrinsic']: if im_field in im_info_dataset: assert np.all( im_info_layer[im_field] == im_info_dataset[im_field]) layer_obj_infos = im_info_layer['object_infos'] dataset_obj_infos = im_info_dataset['object_infos'] assert len(layer_obj_infos) == len( dataset_obj_infos), "{} != {}".format(len(layer_obj_infos), len(dataset_obj_infos)) for obj_info_layer, obj_info_dataset in zip( layer_obj_infos, dataset_obj_infos): assert obj_info_layer['id'] == obj_info_dataset['id'] assert obj_info_layer['category'] == obj_info_dataset[ 'category'] for obj_field in required_object_info_fields: assert np.all(obj_info_layer[obj_field] == np.array(obj_info_dataset[obj_field])), \ "For obj_field '{}': {} vs {}".format(obj_field, obj_info_layer[obj_field], obj_info_dataset[obj_field]) print "Done."
#coding=utf-8 import numpy as np import sys, os import cv2 import time caffe_root = '/home/gjw/caffe-ssd-mobile/' sys.path.insert(0, caffe_root + 'python') import caffe caffe.set_mode_gpu() ### 设置GPU模式 CLASSES = ('background', 'car', 'cyclist', 'pedestrain') # 全局变量 colours = np.random.rand(32, 3) * 255 class MobileNet_SSD: # 构造函数 def __init__(self, net_file, caffe_model): self.net = caffe.Net(net_file, caffe_model, caffe.TEST) # 图像归一化 def preprocess(self, src): img = cv2.resize(src, (300, 300)) return (img - 127.5) * 0.007843 def detect(self, frame):
def test(): caffe.set_mode_gpu() caffe.set_device(0) #caffe.set_mode_cpu(); info = os.listdir(r'VIDEO_test_img'); model = r'EVD-Net.caffemodel' net = caffe.Net('test.prototxt', model, caffe.TEST); imagesnum=0; for line in info: reg = re.compile(r'(.*?).jpg'); all = reg.findall(line) if (all != []): imagename = str(all[0]); line=imagename reg = re.compile(r'ILSVRC2015_train_00124006_([0-9]{6})_1_3'); all = reg.findall(line) labelnum = int(all[0]); if (os.path.isfile(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum-1).zfill(6)) == False or os.path.isfile(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum-2).zfill(6)) == False or os.path.isfile(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum+1).zfill(6)) == False or os.path.isfile(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum+2).zfill(6)) == False): continue; else: imagesnum = imagesnum + 1; npstore_1 = caffe.io.load_image(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum-2).zfill(6)) npstore_2 = caffe.io.load_image(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum-1).zfill(6)) npstore = caffe.io.load_image(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum).zfill(6)) npstore_3 = caffe.io.load_image(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum+1).zfill(6)) npstore_4 = caffe.io.load_image(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum+2).zfill(6)) batchdata = [] data = npstore_1 data = data.transpose((2, 0, 1)) batchdata.append(data) net.blobs['img_1'].data[...] = batchdata; batchdata = [] data = npstore_2 data = data.transpose((2, 0, 1)) batchdata.append(data) net.blobs['img_2'].data[...] = batchdata; batchdata = [] data = npstore data = data.transpose((2, 0, 1)) batchdata.append(data) net.blobs['img'].data[...] = batchdata; batchdata = [] data = npstore_3 data = data.transpose((2, 0, 1)) batchdata.append(data) net.blobs['img_3'].data[...] = batchdata; batchdata = [] data = npstore_4 data = data.transpose((2, 0, 1)) batchdata.append(data) net.blobs['img_4'].data[...] = batchdata; net.forward() data = net.blobs['sum'].data[0]; data = data.transpose((1, 2, 0)); data = data[:, :, ::-1] savepath = 'result\\' + imagename + '_EVD-Net.jpg' cv2.imwrite(savepath, data * 255.0,[cv2.IMWRITE_JPEG_QUALITY, 100]) print imagename print 'image numbers:',imagesnum;
def run_test_save_result(): caffe.set_mode_gpu() caffe.set_device(0) m = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5', 'r', 'core') net = caffe.Net('models/sg_vrd/vgg16/faster_rcnn_end2end/test.prototxt', 'output/faster_rcnn_end2end/sg_vrd_2016_train/vgg16_faster_rcnn_finetune_iter_80000.caffemodel', caffe.TEST) # net.name = os.path.splitext(os.path.basename(args.caffemodel))[0] net.name = 'sgvrd' imdb = get_imdb('sg_vrd_2016_test') imdb.competition_mode(0) if not cfg.TEST.HAS_RPN: imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD) h5path = 'output/sg_vrd_2016_test_more.hdf5' #h5path = 'output/' + imdb.name + '.hdf5' # if os.path.exists(h5path): # os.remove(h5path) h5f = h5py.File(h5path) """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] root = 'data/sg_vrd_2016/Data/sg_test_images/' _t = {'im_detect': Timer(), 'misc': Timer()} cnt = 0 thresh = .15 for path, subdirs, files in os.walk(root): for name in files: cnt += 1 im_idx = name.split('.')[0] fpath = os.path.join(path, name) im = cv2.imread(fpath) if im == None: print fpath box_proposals = None _t['im_detect'].tic() score_raw, scores, fc7, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() # scores = score_raw res_locations = [] res_visuals = [] res_classemes = [] res_cls_confs = [] boxes_tosort = [] _t['misc'].tic() for j in xrange(1, 101): inds = np.where(scores[:, j] > 0.01)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if len(cls_scores) <= 0: boxes_tosort.append(cls_dets) continue res_loc = cls_boxes res_vis = fc7[inds] res_classeme = scores[inds] res_cls_conf = np.column_stack((np.zeros(cls_scores.shape[0]) + j, cls_scores)) keep = nms(cls_dets, .2, force_cpu=True) # nms threshold cls_dets = cls_dets[keep, :] res_loc = res_loc[keep] res_vis = res_vis[keep] res_classeme = res_classeme[keep] res_cls_conf = res_cls_conf[keep] res_classemes.extend(res_classeme) res_visuals.extend(res_vis) res_locations.extend(res_loc) res_cls_confs.extend(res_cls_conf) boxes_tosort.append(cls_dets) # filter based on confidence inds = np.where(np.array(res_cls_confs)[:, 1] > thresh)[0] res_classemes = np.array(res_classemes)[inds] res_visuals = np.array(res_visuals)[inds] res_locations = np.array(res_locations)[inds] res_cls_confs = np.array(res_cls_confs)[inds] h5f.create_dataset(im_idx + '/classemes', dtype='float16', data=res_classemes.astype(np.float16)) h5f.create_dataset(im_idx + '/visuals', dtype='float16', data=res_visuals.astype(np.float16)) h5f.create_dataset(im_idx + '/locations', dtype='short', data=res_locations.astype(np.short)) h5f.create_dataset(im_idx + '/cls_confs', dtype='float16', data=res_cls_confs.astype(np.float16)) # filter end ''' image_scores = np.hstack(boxes_tosort[j][:, -1] for j in xrange(30)) #print len(image_scores) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(30): keep = np.where(boxes_tosort[j][:, -1] >= image_thresh)[0] boxes_tosort[j] = boxes_tosort[j][keep, :] ''' for j in xrange(len(boxes_tosort)): cls_dets = boxes_tosort[j] for di in xrange(cls_dets.shape[0]): # print 'here' di = cls_dets[di] score = di[-1] cls_idx = j + 1 cls_name = str(m['meta/cls/idx2name/' + str(cls_idx)][...]) if score > 1: score = 1 x, y = int(di[0]), int(di[1]) if x < 10: x = 15 if y < 10: y = 15 # cv2.putText(im, cls_name, (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 1) # cv2.rectangle(im, (di[0], di[1]), (di[2], di[3]), (255, 0, 0), 2) # print '%s %d %f %d %d %d %f\n' % (im_idx, j + 1, score, di[0], di[1], di[2], di[3]) # cv2.imshow('im', im) # cv2.imwrite(str(cnt) + '.jpg', im) # if cv2.waitKey(0) & 0xFF == 27: # exit(0) _t['misc'].toc() print 'im_detect: {:d} {:.3f}s {:.3f}s' \ .format(cnt, _t['im_detect'].average_time, _t['misc'].average_time)
def convertToFeatureVecs(inputPath, inputfile, outputFile): count = 0 with open(inputfile, 'r') as reader: for path in reader: count += 1 print('Preparing to read {} images'.format(count)) caffe.set_device(0) caffe.set_mode_gpu() # Loading the Caffe model, setting preprocessing parameters net = caffe.Classifier(model_prototxt, model_trained, mean=np.load(mean_path).mean(1).mean(1), channel_swap=(2, 1, 0), raw_scale=255, image_dims=(448, 448)) # Loading class labels with open(imagenet_labels) as f: labels = f.readlines() print('Results edited in: {}'.format(outputFile)) errorMessages = [] countDone = 0 count dataMap = shelve.open(outputFile, flag='w', protocol=pickle.HIGHEST_PROTOCOL) print(len(dataMap)) dataMap.close() exit() print('Extracting from layer: {}'.format(layer_name)) with open(inputfile, 'r') as reader: for image_path in reader: image_path = image_path.strip() img_id = getImageID(image_path) if (str(img_id) in dataMap): if countDone % 10 == 0: print('Contains {}, count {}'.format(img_id, countDone)) countDone += 1 continue input_image = caffe.io.load_image(inputPath + image_path) prediction = net.predict([input_image], oversample=False) msg = ('{} : {} ( {} )'.format( os.path.basename(image_path), labels[prediction[0].argmax()].strip(), prediction[0][prediction[0].argmax()])) count = count + 1 try: # filename, array data to be saved, format, delimiter featureData = net.blobs[layer_name].data[0] dataMap[str(img_id)] = featureData msg2 = ('\nImages processed: {}\n'.format(count)) except ValueError: print('Error reading image_path') errorMessages.append(image_path) if count % 200 == 0: print(featureData.shape) print(msg) print(msg2) if count % 1000 == 0: print('Doing a data sync...') dataMap.sync() print('Data sync done.') dataMap.close() print('Completed processing {} images'.format(count)) print('Error messages: {}'.format(errorMessages))
def run_test_visualize(): caffe.set_mode_gpu() caffe.set_device(0) m = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5', 'r', 'core') net = caffe.Net('models/sg_vrd/vgg16/faster_rcnn_end2end/test.prototxt', 'output/faster_rcnn_end2end/sg_vrd_2016_train/vgg16_faster_rcnn_finetune_iter_60000.caffemodel', caffe.TEST) # net.name = os.path.splitext(os.path.basename(args.caffemodel))[0] net.name = 'sgvrd' imdb = get_imdb('sg_vrd_2016_test') imdb.competition_mode(0) if not cfg.TEST.HAS_RPN: imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD) """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] root = 'data/sg_vrd_2016/Data/sg_test_images/' _t = {'im_detect': Timer(), 'misc': Timer()} cnt = 0 thresh = .05 for path, subdirs, files in os.walk(root): for name in files: cnt += 1 im_idx = name.split('.')[0] fpath = os.path.join(path, name) im = cv2.imread(fpath) if im == None: print fpath box_proposals = None _t['im_detect'].tic() score_raw, scores, fc7, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() # scores = score_raw # scores=np.apply_along_axis(softmax,1,scores) # scores[:,16]+=icr boxes_tosort = [] for j in xrange(1, 101): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] # cls_boxes = boxes[inds] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, .2, force_cpu=True) # nms threshold # keep = nms_fast(cls_dets,.3) cls_dets = cls_dets[keep, :] boxes_tosort.append(cls_dets) for j in xrange(len(boxes_tosort)): cls_dets = boxes_tosort[j] for di in xrange(cls_dets.shape[0]): # print 'here' di = cls_dets[di] score = di[-1] cls_idx = j + 1 cls_name = str(m['meta/cls/idx2name/' + str(cls_idx)][...]) if score > 1: score = 1 if score < 0.2: continue x, y = int(di[0]), int(di[1]) if x < 10: x = 15 if y < 10: y = 15 cv2.putText(im, cls_name, (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 1) cv2.rectangle(im, (di[0], di[1]), (di[2], di[3]), (255, 0, 0), 2) print '%s %d %f %d %d %d %f\n' % (im_idx, j + 1, score, di[0], di[1], di[2], di[3]) cv2.imshow('im', im) cv2.imwrite(str(cnt) + '.jpg', im) if cv2.waitKey(0) & 0xFF == 27: exit(0)
def iou(model, weights, input_source, input_source_label): iou_list = [] timer = [] for i in range(NUM_CLASSES): iou_list.append([]) caffe.set_mode_gpu() # Load Caffe network net = caffe.Net(model, weights, caffe.TEST) # Access blob data input_shape = net.blobs['data'].data.shape confidence_output = net.blobs['prob'].data cap = cv2.VideoCapture(input_source) cap_label = cv2.VideoCapture(input_source_label) rval = True while rval: # Get image from VideoCapture rval, frame = cap.read() rval_lab, frame_lab = cap_label.read() if not rval: print("No image found!") break # Resize input image resized_image = crop_input(frame, (input_shape[3], input_shape[2])) cropped = numpy.int32(resized_image) # Subtract per-channel mean B_mean = 129 G_mean = 126 R_mean = 126 cropped[:, :, 0] -= R_mean cropped[:, :, 1] -= G_mean cropped[:, :, 2] -= B_mean # Input shape is (y, x, 3), needs to be reshaped to (3, y, x) input_image = cropped.transpose((2, 0, 1)) # Repeat image according to batch size for inference. MCDO_samples = input_shape[0] input_image = numpy.repeat(input_image[numpy.newaxis, :, :, :], MCDO_samples, axis=0) # Inference using Bayesian SegNet start = time.time() out = net.forward_all(data=input_image) end = time.time() timer.append(end - start) # By Alex Kendall mean_confidence = numpy.mean(confidence_output, axis=0, dtype=numpy.float64) var_confidence = numpy.var(confidence_output, axis=0, dtype=numpy.float64) # Prepare segmented image results classes = numpy.argmax(mean_confidence, axis=0) # Calculae IOU CLASS 1 frame_lab = frame_lab[:, :, 0] for i in range(NUM_CLASSES): boolean_frame = numpy.int32((frame_lab == i)) boolean_classes = numpy.int32((classes == i)) union = boolean_frame | boolean_classes intersection = boolean_frame & boolean_classes iou = numpy.sum(intersection.flatten()) / numpy.sum( union.flatten()) print(round(iou, 3)) iou_list[i].append(iou) miou = [] for i in range(NUM_CLASSES): miou.append(numpy.round(numpy.mean(iou_list[i]), 3)) cap.release() cv2.destroyAllWindows() print("NAZDAR") mtimer = numpy.mean(timer) return miou, mtimer
def extractor(video_dir, detection_file, output_file, gpu_id, model, model_weights): # deploy pre-trained caffe model caffe.set_device(gpu_id) caffe.set_mode_gpu() net = caffe.Net(model, model_weights, caffe.TEST) ''' try: os.makedirs(output_dir) except OSError as exception: if exception.errno == errno.EEXIST and os.path.isdir(output_dir): pass else: raise ValueError('Failed to created output directory %s' % output_dir) ''' #img_dir = os.path.join(video_dir, 'img1') img_dir = video_dir print img_dir img_filenames = { int(os.path.splitext(f)[0]): os.path.join(img_dir, f) for f in os.listdir(img_dir) } #det_file = os.path.join(detection_dir, 'dets0704.txt') dets_in = np.loadtxt(detection_file, delimiter=',').astype(np.float) dets_out = [] frame_indices = dets_in[:, 0].astype(np.int) min_frame_idx = frame_indices.min() max_frame_idx = frame_indices.max() print 'min_idx: %d, max_idx: %d' % (min_frame_idx, max_frame_idx) for idx in range(min_frame_idx, max_frame_idx + 1): mask = (frame_indices == idx) sub_dets = dets_in[mask] if idx not in img_filenames: print('WARNING could not find image for frame %d' % idx) continue batch_patch = np.zeros((len(sub_dets), 3, 144, 56), dtype=np.float32) bgr_img = cv2.imread(img_filenames[idx]) for i in range(len(sub_dets)): # NOTE: its img[y: y + h, x: x + w] and *not* img[x: x + w, y: y + h] patch = bgr_img.copy( )[abs(int(sub_dets[i][3])):abs(int(sub_dets[i][3] + sub_dets[i][5])) - 1, abs(int(sub_dets[i][2])):abs(int(sub_dets[i][2] + sub_dets[i][4])) - 1] patch = cv2.resize(patch, (56, 144)) patch = np.transpose(patch, (2, 0, 1)) norm_patch = np.array(patch, dtype=np.float32) norm_patch[0, :] = norm_patch[0, :] - 102.0 norm_patch[1, :] = norm_patch[1, :] - 102.0 norm_patch[2, :] = norm_patch[2, :] - 101.0 batch_patch[i, :, :, :] = norm_patch net.blobs['data'].reshape(*(batch_patch.shape)) net.blobs['data'].data[:, :, :, :] = batch_patch output = net.forward() try: feature = net.blobs['fc7_bn'].data[:] except: feature = net.blobs['fc7'].data[:] #merge dets and features dets_out += [np.r_[(d, f)] for d, f in zip(sub_dets, feature)] if idx % 100 == 0: print 'processed {}'.format(idx) #out_path = os.path.join(output_dir, 'demo0707.npy') np.save(output_file, np.asarray(dets_out), allow_pickle=False)
def run_test_object_detection_eval(): caffe.set_mode_gpu() caffe.set_device(0) h5f = h5py.File('output/vr_object_detections.hdf5') m = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5', 'r', 'core') net = caffe.Net('models/sg_vrd/vgg16/faster_rcnn_end2end/test.prototxt', 'output/faster_rcnn_end2end/sg_vrd_2016_train/vgg16_faster_rcnn_finetune_iter_60000.caffemodel', caffe.TEST) # net.name = os.path.splitext(os.path.basename(args.caffemodel))[0] net.name = 'sgvrd' imdb = get_imdb('sg_vrd_2016_test') imdb.competition_mode(0) if not cfg.TEST.HAS_RPN: imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD) """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] root = 'data/sg_vrd_2016/Data/sg_test_images/' _t = {'im_detect': Timer(), 'misc': Timer()} cnt = 0 thresh = .05 img_set_file = 'data/sg_vrd_2016/ImageSets/test.txt' imlist = {line.strip().split(' ')[1]:line.strip().split(' ')[0] for line in open(img_set_file)} for imid in imlist.keys(): im_path = root + imlist[imid] + '.jpg' cnt += 1 im = cv2.imread(im_path) if im == None: print im_path box_proposals = None _t['im_detect'].tic() score_raw, scores, fc7, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() # scores = score_raw _t['misc'].tic() h5f.create_dataset(imid + '/scores',dtype='float16', data=scores.astype(np.float16)) h5f.create_dataset(imid + '/boxes',dtype='short', data=boxes.astype(np.short)) # scores=np.apply_along_axis(softmax,1,scores) # scores[:,16]+=icr # boxes_tosort = [] # for j in xrange(1, 101): # inds = np.where(scores[:, j] > 0.01)[0] # cls_scores = scores[inds, j] # cls_boxes = boxes[inds, j * 4:(j + 1) * 4] # # cls_boxes = boxes[inds] # cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ # .astype(np.float32, copy=False) # keep = nms(cls_dets, .2, force_cpu=True) # nms threshold # # keep = nms_fast(cls_dets,.3) # cls_dets = cls_dets[keep, :] # boxes_tosort.append(cls_dets) # for j in xrange(len(boxes_tosort)): # cls_dets = boxes_tosort[j] # for di in xrange(cls_dets.shape[0]): # # print 'here' # di = cls_dets[di] # score = di[-1] # cls_idx = j + 1 # cls_name = str(m['meta/cls/idx2name/' + str(cls_idx)][...]) # if score > 1: # score = 1 # if score < thresh: # continue # x, y = int(di[0]), int(di[1]) # if x < 10: # x = 15 # if y < 10: # y = 15 # res_line = '%s %d %f %d %d %d %d'%(imid,cls_idx,score,di[0],di[1],di[2],di[3]) # output.write(res_line+'\n') _t['misc'].toc() print 'im_detect: {:d} {:.3f}s {:.3f}s' \ .format(cnt, _t['im_detect'].average_time, _t['misc'].average_time)
parser1 = make_parser() args = parser1.parse_args() net_file= args.ssd_model_def caffe_model= args.ssd_model_weights ccpd_file= args.recog_model_def ccpd_model= args.recog_model_weights test_dir = "../images" if not os.path.exists(caffe_model): print(caffe_model + " does not exist") exit() if not os.path.exists(net_file): print(net_file + " does not exist") exit() caffe.set_mode_gpu(); caffe.set_device(0); net = caffe.Net(net_file,caffe_model,caffe.TEST) ccpd_net = caffe.Net(ccpd_file,ccpd_model,caffe.TEST) inputShape = net.blobs['data'].data.shape det_inputSize = (inputShape[3], inputShape[2]) inputShape = ccpd_net.blobs['data'].data.shape rec_inputSize = (inputShape[3], inputShape[2]) CLASSES = ('background', 'liceneseplate') def max_(m,n):
def run_test_save_pool5(): caffe.set_mode_gpu() caffe.set_device(0) m = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5', 'r', 'core') net = caffe.Net('models/sg_vrd/vgg16/faster_rcnn_end2end/test.prototxt', 'output/faster_rcnn_end2end/sg_vrd_2016_train/vgg16_faster_rcnn_finetune_iter_40000.caffemodel', caffe.TEST) # net.name = os.path.splitext(os.path.basename(args.caffemodel))[0] net.name = 'sgvrd' imdb = get_imdb('sg_vrd_2016_test') imdb.competition_mode(0) if not cfg.TEST.HAS_RPN: imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD) h5path = 'output/' + imdb.name + '_pool5.hdf5' # if os.path.exists(h5path): # os.remove(h5path) h5f = h5py.File(h5path) """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] root = 'data/sg_vrd_2016/Data/sg_test_images/' _t = {'im_detect': Timer(), 'misc': Timer()} cnt = 0 thresh = .01 for path, subdirs, files in os.walk(root): for name in files: cnt += 1 if cnt %100==0: print cnt im_idx = name.split('.')[0] fpath = os.path.join(path, name) im = cv2.imread(fpath) if im == None: print fpath if im_idx + '/classemes' in h5f: continue box_proposals = None _t['im_detect'].tic() score_raw, scores, fc7, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() rpn_rois = net.blobs['rois'].data pool5 = net.blobs['pool5'].data # scores = score_raw res_rpn_rois = [] res_pool5s = [] res_locations = [] res_visuals = [] res_classemes = [] res_cls_confs = [] boxes_tosort = [] _t['misc'].tic() for j in xrange(1, 101): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if len(cls_scores) <= 0: boxes_tosort.append(cls_dets) continue res_loc = cls_boxes res_vis = fc7[inds] res_classeme = scores[inds] res_cls_conf = np.column_stack((np.zeros(cls_scores.shape[0]) + j, cls_scores)) res_pool5 = pool5[inds] res_rpn_roi = rpn_rois[inds] keep = nms(cls_dets, .2, force_cpu=True) # nms threshold cls_dets = cls_dets[keep, :] res_loc = res_loc[keep] res_vis = res_vis[keep] res_classeme = res_classeme[keep] res_cls_conf = res_cls_conf[keep] res_pool5 = res_pool5[keep] res_rpn_roi = res_rpn_roi[keep] res_classemes.extend(res_classeme) res_visuals.extend(res_vis) res_locations.extend(res_loc) res_cls_confs.extend(res_cls_conf) res_pool5s.extend(res_pool5) res_rpn_rois.extend(res_rpn_roi) boxes_tosort.append(cls_dets) # filter based on confidence inds = np.where(np.array(res_cls_confs)[:, 1] > 0.2)[0] res_classemes = np.array(res_classemes)[inds] res_visuals = np.array(res_visuals)[inds] res_locations = np.array(res_locations)[inds] res_cls_confs = np.array(res_cls_confs)[inds] res_pool5s = np.array(res_pool5s)[inds] res_rpn_rois = np.array(res_rpn_rois)[inds] h5f.create_dataset(im_idx + '/classemes', dtype='float16', data=res_classemes.astype(np.float16)) h5f.create_dataset(im_idx + '/visuals', dtype='float16', data=res_visuals.astype(np.float16)) h5f.create_dataset(im_idx + '/locations', dtype='short', data=res_locations.astype(np.short)) h5f.create_dataset(im_idx + '/cls_confs', dtype='float16', data=res_cls_confs.astype(np.float16)) h5f.create_dataset(im_idx + '/rpn_rois', dtype='float16', data=res_rpn_rois.astype(np.float16)) h5f.create_dataset(im_idx + '/pool5s', dtype='float16', data=res_pool5s.astype(np.float16)) _t['misc'].toc() cnt += 1 print 'im_detect: {:d} {:.3f}s {:.3f}s' \ .format(cnt, _t['im_detect'].average_time, _t['misc'].average_time)
def main(): ''' 5-fold cross validation ''' root = '../data/faces' network_file = './resnext_deploy.prototxt' pretrained_model = ['../models/1/hinge_R3CNN.caffemodel', '../models/2/hinge_R3CNN.caffemodel', \ '../models/3/hinge_R3CNN.caffemodel', '../models/4/hinge_R3CNN.caffemodel', '../models/5/hinge_R3CNN.caffemodel'] # pretrained_model = ['../models/1/lsep_R3CNN.caffemodel', '../models/2/lsep_R3CNN.caffemodel', # '../models/3/lsep_R3CNN.caffemodel', '../models/4/lsep_R3CNN.caffemodel', '../models/5/lsep_R3CNN.caffemodel'] mean_file = ["../data/1/256_train_mean.binaryproto", "../data/2/256_train_mean.binaryproto", \ "../data/3/256_train_mean.binaryproto", "../data/4/256_train_mean.binaryproto", "../data/5/256_train_mean.binaryproto"] test_file = ['../data/1/test_1.txt', '../data/2/test_2.txt', '../data/3/test_3.txt', \ '../data/4/test_4.txt', '../data/5/test_5.txt'] for i in range(5): print('start testing------') # get mean file batch_shape = (1, 3, 224, 224) means = get_mean_npy(mean_file[i], crop_size = batch_shape[2:]) # set mode caffe.set_mode_gpu() # set caffe model null_fds = os.open(os.devnull, os.O_RDWR) out_orig = os.dup(2) os.dup2(null_fds, 2) net = caffe.Net(network_file, pretrained_model[i], caffe.TEST) os.dup2(out_orig, 2) os.close(null_fds) # open test file with open(test_file[i], 'r') as f: lines = f.readlines() label_list = [] prec_list = [] for line in lines: linesplit = line.split(' ') label = float(linesplit[1].split("\r")[0]) img = os.path.join(root, linesplit[0]) img_data = load_img(img, resize = (256, 256), isColor = True, crop_size = 224, crop_type = 'center_crop', raw_scale = 255, means = means) net.blobs['data'].data[...] = img_data out = net.forward() prec = net.blobs['feat1'].data[...][0][0] label_list.append(label) prec_list.append(prec) label_list = np.array(label_list) prec_list = np.array(prec_list) correlation = np.corrcoef(label_list, prec_list)[0][1] mae = np.mean(np.abs(label_list - prec_list)) rmse = np.sqrt(np.mean(np.square(label_list - prec_list))) print('Model: {name}\t' 'Correlation: {correlation:.4f}\t' 'Mae: {mae:.4f}\t' 'Rmse: {rmse:.4f}\t'.format(name=pretrained_model[i], correlation=float(correlation), mae=float(mae), rmse=float(rmse)))
def main(input, output, model_path, model_name, octaves, octave_scale, iterations, jitter, stepsize, blend, layers, guide_image, start_frame, end_frame, verbose): make_sure_path_exists(input) make_sure_path_exists(output) # let max nr of frames nrframes = len([ name for name in os.listdir(input) if os.path.isfile(os.path.join(input, name)) ]) if nrframes == 0: print("no frames to process found") sys.exit(0) if octaves is None: octaves = 5 if octave_scale is None: octave_scale = 1.4 if iterations is None: iterations = 4 if jitter is None: jitter = 32 if stepsize is None: stepsize = 1.5 if blend is None: blend = 0.5 #can be nr (constant), random, or loop if verbose is None: verbose = 1 if layers is None: layers = 'inception_5a/pool_proj' #['inception_4c/output'] if start_frame is None: frame_i = 1 else: frame_i = int(start_frame) if not end_frame is None: nrframes = int(end_frame) + 1 else: nrframes = nrframes + 1 # If your GPU supports CUDA and Caffe was built with CUDA support, # uncomment the following to run Caffe operations on the GPU. caffe.set_mode_gpu() # caffe.set_device(0) # select GPU device if multiple devices exist # Loading DNN Model net_fn = model_path + 'deploy.prototxt' param_fn = model_path + model_name # Patching model to be able to compute gradients. # Note that you can also manually add "force_backward: true" line to "deploy.prototxt". model = caffe.io.caffe_pb2.NetParameter() text_format.Merge(open(net_fn).read(), model) model.force_backward = True open('tmp.prototxt', 'w').write(str(model)) net = caffe.Classifier( 'tmp.prototxt', param_fn, mean=np.float32([104.0, 116.0, 122.0]), # ImageNet mean, training set dependent channel_swap=( 2, 1, 0)) # the reference model has channels in BGR order instead of RGB if verbose == 3: from IPython.display import clear_output, Image, display print("display turned on") img = np.float32(PIL.Image.open(input + '/%08d.png' % (frame_i))) h, w, c = img.shape #Choosing between normal dreaming, and guided dreaming if guide_image is None: hallu = deepdream(net, img, iter_n=iterations, step_size=stepsize, octave_n=octaves, octave_scale=octave_scale, jitter=jitter, end=layers) else: guide = np.float32(PIL.Image.open(guide_image)) print('Setting up Guide with selected image') guide_features = prepare_guide(net, PIL.Image.open(guide_image), end=layers) hallu = deepdream_guided(net, img, iter_n=iterations, step_size=stepsize, octave_n=octaves, octave_scale=octave_scale, jitter=jitter, end=layers, objective_fn=objective_guide, guide_features=guide_features) np.clip(hallu, 0, 255, out=hallu) PIL.Image.fromarray(np.uint8(hallu)).save(output + '/%08d.png' % (frame_i)) grayImg = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) blend_forward = True blend_at = 0 blend_step = 0.02 for i in xrange(frame_i, nrframes): previousImg = img previousGrayImg = grayImg img = np.float32(PIL.Image.open(input + '/%08d.png' % (i + 1))) grayImg = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) flow = cv2.calcOpticalFlowFarneback(previousGrayImg, grayImg, None, pyr_scale=0.5, levels=3, winsize=15, iterations=3, poly_n=5, poly_sigma=1.2, flags=0) flow = -flow flow[:, :, 0] += np.arange(w) flow[:, :, 1] += np.arange(h)[:, np.newaxis] halludiff = hallu - previousImg halludiff = cv2.remap(halludiff, flow, None, cv2.INTER_LINEAR) hallu = img + halludiff if guide_image is None: hallu = deepdream(net, hallu, iter_n=iterations, step_size=stepsize, octave_n=octaves, octave_scale=octave_scale, jitter=jitter, end=layers) else: guide = np.float32(PIL.Image.open(guide_image)) print('Setting up Guide with selected image') guide_features = prepare_guide(net, PIL.Image.open(guide_image), end=layers) hallu = deepdream_guided(net, hallu, iter_n=iterations, step_size=stepsize, octave_n=octaves, octave_scale=octave_scale, jitter=jitter, end=layers, objective_fn=objective_guide, guide_features=guide_features) np.clip(hallu, 0, 255, out=hallu) PIL.Image.fromarray(np.uint8(hallu)).save(output + '/%08d.png' % (i + 1)) # if blend_at > 1 - blend_step: blend_forward = False # elif blend_at <= 0.5: blend_forward = True # if blend_forward: blend_at += blend_step # else: blend_at -= blend_step # blendval = blend_at blendval = 0.5 img = morphPicture(input + '/%08d.png' % (i + 1), output + '/%08d.png' % (i), blendval)
def main(args): caffe.set_mode_gpu() caffe.set_device(0) solver1 = caffe.SGDSolver(args.dss_proto) if 'vgg' in args.dss_weights: interp_layers = [k for k in solver1.net.params.keys() if 'up' in k] interp_surgery(solver1.net, interp_layers) solver1.net.copy_from(args.dss_weights) print('loaded solver1') inputsize = 500 gt_ = np.zeros(shape=(1,1,inputsize,inputsize)) img_ = np.zeros(shape=(1,4,inputsize,inputsize)) matfile1 = sio.loadmat(args.datalist1)['trainImgSet'] datalist1 = [matfile1[i][0][0] for i in range(matfile1.shape[0])] matfile2 = sio.loadmat(args.datalist2)['trainImgSet'] datalist2 = [matfile2[i][0][0] for i in range(matfile2.shape[0])] matfile3 = sio.loadmat(args.datalist3)['trainImgSet'] datalist3 = [matfile3[i][0][0] for i in range(matfile3.shape[0])] valinput = './dataset/'+args.valdata+'/imgs/' valgt = './dataset/'+args.valdata+'/gt/' valmatfile = './dataset/'+args.valdata+'/valImgSet.mat' valmatfile = sio.loadmat(valmatfile)['valImgSet'] vallist = [valmatfile[i][0][0] for i in range(valmatfile.shape[0])] logfile = args.logfile if logfile == '': logfile = args.prefix+'.log' if os.path.isfile(logfile): os.system('rm '+logfile) learn_data3_prob = 0.3 learn_data2_prob = 0.4 # learn_data2_prob = 0 # learn_data3_prob = 0 loss_arch = [] loss_bigarch = [] start_t = time.time() it = args.start_snapshot while it < args.max_iter: '''if it < args.max_iter/3 and it+1 >= args.max_iter/3: learn_data3_prob = 0.6 learn_data2_prob = 0.3 if it < args.max_iter*2/3 and it+1 >= args.max_iter*2/3: learn_data3_prob = 0.4 learn_data2_prob = 0.4''' tmpinput = args.inputdir1 tmpgt = args.gtdir1 tmplist = datalist1 tmpext = '.jpg' r = random.uniform(0.,1.) if r < learn_data3_prob: tmpinput = args.inputdir3 tmpgt = args.gtdir3 tmplist = datalist3 tmpext = '.png' elif r < learn_data2_prob + learn_data3_prob: tmpinput = args.inputdir2 tmpgt = args.gtdir2 tmplist = datalist2 i = it%len(tmplist) gt = Image.open(tmpgt + tmplist[i][:-4] + '.png') img = Image.open(tmpinput + tmplist[i][:-4] + tmpext) if random.random() > args.flip_prob: gt = gt.transpose(Image.FLIP_LEFT_RIGHT) img = img.transpose(Image.FLIP_LEFT_RIGHT) imgw, imgh = img.size gt = preprocess_gt(gt) img = vgg_preprocess(img) img_[:,:3,:,:] = img img_[:,3,:,:] = prior solver1.net.clear_param_diffs() solver1.net.blobs['R1'].data[...] = img_ solver1.net.forward() sm = solver1.net.blobs['fc8_saliency_reg'].data.copy() gt_t = torch.tensor(gt_, dtype=torch.float32, device=cuda0, requires_grad=False) sm_t = torch.tensor(sm, dtype=torch.float32, device=cuda0, requires_grad=True) weight_t = torch.tensor(weight_, dtype=torch.float32, device=cuda0, requires_grad=False) loss = F.binary_cross_entropy(sm_t, gt_t, weight=weight_t, size_average=False) if len(loss_arch) < args.display_every: loss_arch.append(float(loss)) else: loss_arch[it % args.display_every] = float(loss) if len(loss_bigarch) < args.snapshot_every: loss_bigarch.append(float(loss)) else: loss_bigarch[it % args.snapshot_every] = float(loss) # sigmoid_fuse = solver1.net.blobs['sigmoid-fuse'].data.copy() solver1.net.backward() solver1.apply_update() solver1.increment_iter() if it % args.display_every == 0: meanloss = sum(loss_arch) * 1.0 / len(loss_arch) print >> sys.stderr, "[%s] Iteration %d: %.2f seconds loss:%.4f" % ( time.strftime("%c"), it, time.time() - start_t, meanloss) if it % args.snapshot_every == 0: trainloss = sum(loss_bigarch) * 1.0 / len(loss_bigarch) vallosses = [] tmpdir = 'tmp/' if os.path.isdir(tmpdir): os.system('rm '+tmpdir+'*') else: os.makedirs(tmpdir) for j in range(len(vallist)): gt = Image.open(valgt+vallist[j][:-4]+'.png') img = Image.open(valinput+vallist[j][:-4]+'.jpg') gt = preprocess_gt(gt) img = vgg_preprocess(img) solver1.net.clear_param_diffs() solver1.net.blobs['data'].reshape(*img.shape) solver1.net.blobs['data'].data[...] = img solver1.net.blobs['label'].reshape(*gt.shape) solver1.net.blobs['label'].data[...] = gt solver1.net.forward() loss = solver1.net.blobs['loss-fuse'].data.copy() vallosses.append(float(loss)) sigmoid_fuse = solver1.net.blobs['sigmoid-fuse'].data.copy() pred = Image.fromarray(np.squeeze(np.rint(sigmoid_fuse*255.0).astype(np.uint8))) pred.save(tmpdir+vallist[j][:-4]+'.png') valloss = sum(vallosses) * 1.0 / len(vallosses) import matlab.engine eng = matlab.engine.start_matlab() eng.addpath('/research/adv_saliency/evaluation') mae,p,r,fm = eng.callEvalFunc(tmpdir, valgt, nargout=4) with open(logfile,'a') as f: f.write('iter:%d trainloss:%.4f valloss:%.4f mae:%.4f p:%.4f r:%.4f f:%.4f\n'%( it,trainloss,valloss,mae,p,r,fm)) curr_snapshot_folder = args.snapshot_folder +'/' + str(it) print >> sys.stderr, '\n === Saving snapshot to ' + curr_snapshot_folder + ' ===\n' solver1.snapshot() it = it + 1
def __init__(self, settings, key_bindings): super(CaffeVisApp, self).__init__(settings, key_bindings) print 'Got settings', settings self.settings = settings self.bindings = key_bindings self._net_channel_swap = (2, 1, 0) self._net_channel_swap_inv = tuple([ self._net_channel_swap.index(ii) for ii in range(len(self._net_channel_swap)) ]) self._range_scale = 1.0 # not needed; image already in [0,255] # Set the mode to CPU or GPU. Note: in the latest Caffe # versions, there is one Caffe object *per thread*, so the # mode must be set per thread! Here we set the mode for the # main thread; it is also separately set in CaffeProcThread. sys.path.insert(0, os.path.join(settings.caffevis_caffe_root, 'python')) import caffe if settings.caffevis_mode_gpu: caffe.set_mode_gpu() print 'CaffeVisApp mode (in main thread): GPU' else: caffe.set_mode_cpu() print 'CaffeVisApp mode (in main thread): CPU' self.net = caffe.Classifier( settings.caffevis_deploy_prototxt, settings.caffevis_network_weights, mean= None, # Set to None for now, assign later # self._data_mean, channel_swap=self._net_channel_swap, raw_scale=self._range_scale, ) if isinstance(settings.caffevis_data_mean, basestring): # If the mean is given as a filename, load the file try: self._data_mean = np.load(settings.caffevis_data_mean) except IOError: print '\n\nCound not load mean file:', settings.caffevis_data_mean print 'Ensure that the values in settings.py point to a valid model weights file, network' print 'definition prototxt, and mean. To fetch a default model and mean file, use:\n' print '$ cd models/caffenet-yos/' print '$ ./fetch.sh\n\n' raise input_shape = self.net.blobs[self.net.inputs[0]].data.shape[ -2:] # e.g. 227x227 # Crop center region (e.g. 227x227) if mean is larger (e.g. 256x256) excess_h = self._data_mean.shape[1] - input_shape[0] excess_w = self._data_mean.shape[2] - input_shape[1] assert excess_h >= 0 and excess_w >= 0, 'mean should be at least as large as %s' % repr( input_shape) self._data_mean = self._data_mean[:, (excess_h / 2):(excess_h / 2 + input_shape[0]), (excess_w / 2):(excess_w / 2 + input_shape[1])] elif settings.caffevis_data_mean is None: self._data_mean = None else: # The mean has been given as a value or a tuple of values self._data_mean = np.array(settings.caffevis_data_mean) # Promote to shape C,1,1 while len(self._data_mean.shape) < 1: self._data_mean = np.expand_dims(self._data_mean, -1) #if not isinstance(self._data_mean, tuple): # # If given as int/float: promote to tuple # self._data_mean = tuple(self._data_mean) if self._data_mean is not None: self.net.transformer.set_mean(self.net.inputs[0], self._data_mean) check_force_backward_true(settings.caffevis_deploy_prototxt) self.labels = None if self.settings.caffevis_labels: self.labels = read_label_file(self.settings.caffevis_labels) self.proc_thread = None self.jpgvis_thread = None self.handled_frames = 0 if settings.caffevis_jpg_cache_size < 10 * 1024**2: raise Exception( 'caffevis_jpg_cache_size must be at least 10MB for normal operation.' ) self.img_cache = FIFOLimitedArrayCache( settings.caffevis_jpg_cache_size) self._populate_net_layer_info()
def gpu(cls, id=0): """open GPU""" caffe.set_device(id) caffe.set_mode_gpu()
def build_tsv(): # Set up the simulator sim = MatterSim.Simulator() sim.setCameraResolution(WIDTH, HEIGHT) sim.setCameraVFOV(math.radians(VFOV)) sim.setDiscretizedViewingAngles(True) sim.init() # Set up Caffe resnet caffe.set_device(GPU_ID) caffe.set_mode_gpu() net = caffe.Net(PROTO, MODEL, caffe.TEST) net.blobs['data'].reshape(BATCH_SIZE, 3, HEIGHT, WIDTH) count = 0 t_render = Timer() t_net = Timer() with open(OUTFILE, 'wb') as tsvfile: writer = csv.DictWriter(tsvfile, delimiter='\t', fieldnames=TSV_FIELDNAMES) # Loop all the viewpoints in the simulator viewpointIds = load_viewpointids() for scanId, viewpointId in viewpointIds: t_render.tic() # Loop all discretized views from this location blobs = [] features = np.empty([VIEWPOINT_SIZE, FEATURE_SIZE], dtype=np.float32) for ix in range(VIEWPOINT_SIZE): if ix == 0: sim.newEpisode(scanId, viewpointId, 0, math.radians(-30)) elif ix % 12 == 0: sim.makeAction(0, 1.0, 1.0) else: sim.makeAction(0, 1.0, 0) state = sim.getState() assert state.viewIndex == ix # Transform and save generated image blobs.append(transform_img(state.rgb)) t_render.toc() t_net.tic() # Run as many forward passes as necessary assert VIEWPOINT_SIZE % BATCH_SIZE == 0 forward_passes = VIEWPOINT_SIZE / BATCH_SIZE ix = 0 for f in range(forward_passes): for n in range(BATCH_SIZE): # Copy image blob to the net net.blobs['data'].data[n, :, :, :] = blobs[ix] ix += 1 # Forward pass output = net.forward() features[f * BATCH_SIZE:(f + 1) * BATCH_SIZE, :] = net.blobs['pool5'].data[:, :, 0, 0] writer.writerow({ 'scanId': scanId, 'viewpointId': viewpointId, 'image_w': WIDTH, 'image_h': HEIGHT, 'vfov': VFOV, 'features': base64.b64encode(features) }) count += 1 t_net.toc() if count % 100 == 0: print('Processed %d / %d viewpoints, %.1fs avg render time, %.1fs avg net time, projected %.1f hours' %\ (count,len(viewpointIds), t_render.average_time, t_net.average_time, (t_render.average_time+t_net.average_time)*len(viewpointIds)/3600))