Ejemplo n.º 1
0
	def setup_net(self):
		caffe.set_mode_gpu()
		caffe.set_device(0)
		netFiles  = cfg.get_caffe_net_files(self.prms_.netName)
		self.net_ = caffe.Net(netFiles.deployFile,
				netFiles.netFile, caffe.TEST)
		self.cls_ = cfg.dataset2classnames(self.prms_.trainDataSet)
Ejemplo n.º 2
0
def setCaffeMode(gpu, device = 0):
    """Initialise caffe"""
    if gpu:
        caffe.set_mode_gpu()
        caffe.set_device(device)
    else:
        caffe.set_mode_cpu()
def justCheckGradients(solver_file,deploy_file,model_file):
    model=model_file
    print model
    print os.path.exists(model);

    caffe.set_device(1)
    caffe.set_mode_gpu()

    solver=caffe.SGDSolver(solver_file);
    solver.net.forward();

    net=caffe.Net(deploy_file,model);

    print list(net._layer_names);
    print net.blobs.keys();
    # return
    net.blobs['data'].data[...]=solver.net.blobs['data'].data;
    net.blobs['thelabelscoarse'].data[...]=solver.net.blobs['thelabelscoarse'].data;

    net.forward();
    # print net.blobs['thelabelscoarse'].data[:10,0,0,0,0];
    # print net.blobs['reshapefc8'].data[0,39,0,:]
    net.backward();
    # print net.blobs.keys();

    layers_to_explore=['conv1','conv2','conv3','conv4','conv5','fc6_fix','fc7_fix','fc8_fix']
    ratios=getRatios(net,layers_to_explore);

    for layer_name in ratios.keys():
        print layer_name,ratios[layer_name];
Ejemplo n.º 4
0
def test(net_file,model_file,predict_file,gpunum,outdir,outputlayer):
    caffe.set_device(gpunum)
    caffe.set_mode_gpu()
    if not exists(outdir):
        makedirs(outdir)
    outfile = os.path.join(outdir,'bestiter.pred')
    outputlayer_split = outputlayer.split('_')
    outputlayer_cnt = len(outputlayer_split)
    flag = False
    outdata = []

    net = caffe.Net(realpath(net_file), realpath(model_file),caffe.TEST)
    with open(predict_file,'r') as f:
        files = [x.strip() for x in f]

    with open(outfile,'w') as f:
        for batchfile in files:
            fi    = h5py.File(batchfile, 'r')
            dataset = np.asarray(fi['data'])
            out = net.forward_all(data=dataset,blobs=outputlayer_split)
            for i in range(outputlayer_cnt):
                if not flag:
                    outdata.append( np.vstack(np.asarray(out[outputlayer_split[i]])) )
                else:
                    outdata[i] = np.vstack((outdata[i],np.vstack(np.asarray(out[outputlayer_split[i]]))))
            flag = True
        for out in outdata[0]:
            f.write('%s\n' % '\t'.join([str(x) for x in out]))

    with open(join(outdir,'bestiter.pred.params.pkl'),'wb') as f:
        cPickle.dump((outdata,outputlayer_split),f,protocol=cPickle.HIGHEST_PROTOCOL)
Ejemplo n.º 5
0
  def init(self):
    image_net = self.image_net
    caffe.set_device(0)
    caffe.set_mode_gpu()
    if image_net == 'caffenet':
      convnet_proto = './caffe/models/bvlc_reference_caffenet/deploy.prototxt'
      convnet_model = './caffe/models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
    elif image_net == 'vggnet':
      convnet_proto = './caffe/models/vggnet/VGG_ILSVRC_16_layers_deploy.prototxt'
      convnet_model = './caffe/models/vggnet/VGG_ILSVRC_16_layers.caffemodel'
    else:
      raise StandardError('Unknown CNN %s' % image_net)

    self.net = caffe.Net(convnet_proto, convnet_model, caffe.TEST)

    if image_net == 'caffenet':
      self.transformer = caffe.io.Transformer({'data': self.net.blobs['data'].data.shape})
      self.transformer.set_transpose('data', (2, 0, 1))
      self.transformer.set_mean('data', np.array([104, 117, 123]))
      self.transformer.set_raw_scale('data', 255)
      self.transformer.set_channel_swap('data', (2, 1, 0))
      self.BATCH_SIZE = 100
      self.net.blobs['data'].reshape(self.BATCH_SIZE, 3, 227, 227)
    elif image_net == 'vggnet':
      self.transformer = caffe.io.Transformer({'data': self.net.blobs['data'].data.shape})
      self.transformer.set_transpose('data', (2, 0, 1))
      self.transformer.set_mean('data', np.array([103.939, 116.779, 123.68]))
      self.transformer.set_raw_scale('data', 255)
      self.transformer.set_channel_swap('data', (2, 1, 0))
      self.BATCH_SIZE = 100
      self.net.blobs['data'].reshape(self.BATCH_SIZE, 3, 224, 224)

    self.image_net = image_net
    self.initialized = True
    print "Done initializing image feature extractor"
Ejemplo n.º 6
0
 def run_quantized_network(self):
     if self.gpu==True:
         caffe.set_mode_gpu()
     net = caffe.Net(self.model_file,self.quant_weight_file,caffe.TEST)
     acc = np.zeros(self.iterations)
     for i in range(0,self.iterations):
         out = net.forward()
         acc[i] = out[self.accuracy_layer]*100
     print("Accuracy with quantized weights/biases: %.2f%%" %(acc.mean()))
     for i in range(0,self.iterations):
         for layer_no in range(0,len(self.start_layer)):
             if layer_no==0:
                 net.forward(end=str(self.end_layer[layer_no]))
             else:
                 net.forward(start=str(self.start_layer[layer_no]),end=str(self.end_layer[layer_no]))
             if layer_no < len(self.start_layer)-1: # not quantizing accuracy layer
                 net.blobs[self.end_layer[layer_no]].data[:]=np.floor(net.blobs[self.end_layer[layer_no]].data*\
                     (2**self.act_dec_bits[self.end_layer[layer_no]]))
                 net.blobs[self.end_layer[layer_no]].data[net.blobs[self.end_layer[layer_no]].data>126]=127
                 net.blobs[self.end_layer[layer_no]].data[net.blobs[self.end_layer[layer_no]].data<-127]=-128
                 net.blobs[self.end_layer[layer_no]].data[:]=net.blobs[self.end_layer[layer_no]].data/\
                     (2**self.act_dec_bits[self.end_layer[layer_no]])
         acc[i] = net.blobs[self.accuracy_layer].data*100
     accuracy = acc.mean()
     print("Accuracy with quantized weights/biases and activations: %.2f%%" %(accuracy))
     return accuracy
def detectFace(img_path,threshold):
    img = cv2.imread(img_path)
    caffe_img = img.copy()-128
    origin_h,origin_w,ch = caffe_img.shape
    scales = tools.calculateScales(img)
    out = []
    for scale in scales:
        hs = int(origin_h*scale)
        ws = int(origin_w*scale)
        scale_img = cv2.resize(caffe_img,(ws,hs))
        scale_img = np.swapaxes(scale_img, 0, 2)
        net_12.blobs['data'].reshape(1,3,ws,hs)
        net_12.blobs['data'].data[...]=scale_img
	caffe.set_device(0)
	caffe.set_mode_gpu()
	out_ = net_12.forward()
        out.append(out_)
    image_num = len(scales)
    rectangles = []
    for i in range(image_num):    
        cls_prob = out[i]['cls_score'][0][1]
        roi      = out[i]['conv4-2'][0]
        out_h,out_w = cls_prob.shape
        out_side = max(out_h,out_w)
        rectangle = tools.detect_face_12net(cls_prob,roi,out_side,1/scales[i],origin_w,origin_h,threshold[0])
        rectangles.extend(rectangle)
    return rectangles
Ejemplo n.º 8
0
def style_labeler():
    flickr_test_set = np.loadtxt(caffe_root + 'data/flickr_style/test.txt', str, delimiter='\t')
    flickr_test_set_path = [readline.split()[0] for readline in flickr_test_set]
    flickr_test_set_label = [int(readline.split()[1]) for readline in flickr_test_set]
    flickr_test_set_path = flickr_test_set_path[:10000]
    flickr_test_set_label = flickr_test_set_label[:10000]

    caffe.set_mode_gpu()
    our_model = BongguNet()

    true_res = []
    our_res = []
    our_res5 = []

    #with open('./label_result_bonggunet_for_test.csv', 'w') as f:
    for i in range(len(flickr_test_set_path)):
        if i % 1000:
            print i
            gc.collect()
        img = caffe.io.load_image(flickr_test_set_path[i])
        res = our_model.predict_our(img)

        our_res.append(res[0])
        our_res5.append(res)
        true_res.append(flickr_test_set_label[i])
        #print our_res, true_res
        #f.write(",".join([str(flickr_test_set_label[i]),
                            #flickr_test_set_path[i],
                            #str(true_res[i]),
                            #str(our_res[i])]) + "\n")

    print "accuarcy@1:", np.mean([a == b for a, b in zip(true_res, our_res)])
    print "accuarcy@5:", np.mean([a in b for a, b in zip(true_res, our_res5)])
Ejemplo n.º 9
0
Archivo: app.py Proyecto: 0hm/caffe
    def __init__(self, model_def_file, pretrained_model_file, mean_file,
                 raw_scale, class_labels_file, bet_file, image_dim, gpu_mode):
        logging.info('Loading net and associated files...')
        if gpu_mode:
            caffe.set_mode_gpu()
        else:
            caffe.set_mode_cpu()
        self.net = caffe.Classifier(
            model_def_file, pretrained_model_file,
            image_dims=(image_dim, image_dim), raw_scale=raw_scale,
            mean=np.load(mean_file).mean(1).mean(1), channel_swap=(2, 1, 0)
        )

        with open(class_labels_file) as f:
            labels_df = pd.DataFrame([
                {
                    'synset_id': l.strip().split(' ')[0],
                    'name': ' '.join(l.strip().split(' ')[1:]).split(',')[0]
                }
                for l in f.readlines()
            ])
        self.labels = labels_df.sort('synset_id')['name'].values

        self.bet = cPickle.load(open(bet_file))
        # A bias to prefer children nodes in single-chain paths
        # I am setting the value to 0.1 as a quick, simple model.
        # We could use better psychological models here...
        self.bet['infogain'] -= np.array(self.bet['preferences']) * 0.1
Ejemplo n.º 10
0
 def __init__(self, weights_path, image_net_proto, lstm_net_proto,
              vocab_path, device_id=-1):
   if device_id >= 0:
     caffe.set_mode_gpu()
     caffe.set_device(device_id)
   else:
     caffe.set_mode_cpu()
   # Setup image processing net.
   phase = caffe.TEST
   self.image_net = caffe.Net(image_net_proto, weights_path, phase)
   image_data_shape = self.image_net.blobs['data'].data.shape
   self.transformer = caffe.io.Transformer({'data': image_data_shape})
   channel_mean = np.zeros(image_data_shape[1:])
   channel_mean_values = [104, 117, 123]
   assert channel_mean.shape[0] == len(channel_mean_values)
   for channel_index, mean_val in enumerate(channel_mean_values):
     channel_mean[channel_index, ...] = mean_val
   self.transformer.set_mean('data', channel_mean)
   self.transformer.set_channel_swap('data', (2, 1, 0))
   self.transformer.set_transpose('data', (2, 0, 1))
   # Setup sentence prediction net.
   self.lstm_net = caffe.Net(lstm_net_proto, weights_path, phase)
   self.vocab = ['<EOS>']
   with open(vocab_path, 'r') as vocab_file:
     self.vocab += [word.strip() for word in vocab_file.readlines()]
   assert(self.vocab[1] == '<unk>')
   self.vocab_inv = dict([(w,i) for i,w in enumerate(self.vocab)])
   net_vocab_size = self.lstm_net.blobs['predict'].data.shape[2]
   if len(self.vocab) != net_vocab_size:
     raise Exception('Invalid vocab file: contains %d words; '
         'net expects vocab with %d words' % (len(self.vocab), net_vocab_size))
def main(input, output, disp, gpu):
    make_sure_path_exists(input)
    make_sure_path_exists(output)

    # should be picked up by caffe by default, but just in case
    # add by macpod
    if gpu:
        caffe.set_mode_gpu();
        caffe.set_device(0);
        
    frame = np.float32(PIL.Image.open(input+'/0001.jpg'))
    frame_i = 1
    
    # let max nr of frames
    nrframes =len([name for name in os.listdir('./input') if os.path.isfile(name)])

    for i in xrange(frame_i,nrframes):
        frame = deepdream(
            net, frame, end = layersloop[frame_i % len(layersloop)], disp=disp, iter_n=5)
        saveframe = output + "/%04d.jpg" % frame_i
        PIL.Image.fromarray(np.uint8(frame)).save(saveframe)
        newframe = input + "/%04d.jpg" % frame_i
        frame = morphPicture(saveframe, newframe) # give it back 50% of original picture
        frame = np.float32(frame)
        frame_i += 1
Ejemplo n.º 12
0
def flowWarp(img, flow):
    import caffe
    width = img.shape[1]
    height = img.shape[0]

    print 'processing (%dx%d)' % (width, height)

    defFile = tempFilename('.prototxt')
    preprocessFile('/home/ilge/hackathon2/common/prototmp/apply_flow.prototmp',
                   defFile, {
                       'WIDTH': width,
                       'HEIGHT': height
                   })

    caffe.set_logging_disabled()
    caffe.set_mode_gpu()
    net = caffe.Net(defFile, caffe.TEST)

    os.remove(defFile)

    print 'network forward pass'

    img_input = img[np.newaxis, :, :, :].transpose(0, 3, 1, 2)
    flow_input = flow[np.newaxis, :, :, :].transpose(0, 3, 1, 2)

    net.blobs['image'].reshape(*img_input.shape)
    net.blobs['image'].data[...] = img_input
    net.blobs['flow'].reshape(*flow_input.shape)
    net.blobs['flow'].data[...] = flow_input

    net.forward()
    output = net.blobs['output'].data[...].transpose(0, 2, 3, 1).squeeze()

    return output
Ejemplo n.º 13
0
  def __init__(self, model_def_file, pretrained_model_file,
         raw_scale, class_labels_file, image_dim, gpu_mode):
    logging.info('Loading net and associated files...')
    if gpu_mode: caffe.set_mode_gpu()
    else: caffe.set_mode_cpu()

    ## load models googlenet
    self.net = caffe.Classifier(
      model_def_file, pretrained_model_file,
      image_dims=(image_dim, image_dim), raw_scale=raw_scale,
      mean=np.array([104.0, 116.0, 122.0]), channel_swap=(2, 1, 0))
    logging.info('Load vision model, %s', model_def_file)

    # generate N bit lookup table
    self.lookup = np.asarray([bin(i).count('1') for i in range(1<<16)])

    # load reference bit model
    file_reader = open(self.database_param, 'rb')
    self.database = cPickle.load(file_reader)
    file_reader.close()
    logging.info('Load database from {}'.format(self.database_param))
    logging.info('database shape {}'.format(self.database['ref'].shape))

    with open(class_labels_file) as f:
      labels_df = pd.DataFrame([
        {
          'synset_id': l.strip().split(' ')[0],
          'name': ' '.join(l.strip().split(' ')[1:]).split(',')[0]
        }
        for l in f.readlines()
      ])
    self.labels = labels_df.sort('synset_id')['name'].values
Ejemplo n.º 14
0
def evaluate_caffe_nn(train_data, valid_data, test_data):
    import caffe
    from caffe import layers as L, params as P
    import caffe_utils as utils

    def gen_net(net_path, data_shape, label_shape):
        net = caffe.NetSpec()
        
        net.data = L.Input(shape=dict(dim=list(data_shape)))
        net.label = L.Input(shape=dict(dim=list(label_shape)))
        
        net.fc0 = L.InnerProduct(net.data, num_output=30, weight_filler=dict(type='xavier'))
        net.relu0 = L.ReLU(net.fc0, in_place=True)
        net.output = L.InnerProduct(net.relu0, num_output=10, weight_filler=dict(type='xavier'))

        net.loss = L.SoftmaxWithLoss(net.output, net.label)
        net.accuracy = L.Accuracy(net.output, net.label)
        
        with open(net_path, 'w') as f:
            f.write(str(net.to_proto()))

    def gen_solver(solver_path, net_path):
        from caffe.proto import caffe_pb2

        params = caffe_pb2.SolverParameter()

        params.train_net = net_path
        
        params.type = 'SGD'
        params.momentum = 0.9
        params.base_lr = 0.5

        params.lr_policy = 'step'
        params.gamma = 0.999
        params.stepsize = 1

        params.weight_decay = 0.0003

        with open(solver_path, 'w') as f:
            f.write(str(params))

    batch_size = 1024
    data_shape = (batch_size, train_data[0][0].size)
    label_shape = (batch_size, 1)
    train_blobs = utils.as_dl_blobs(train_data, batch_size, data_shape, label_shape)
    valid_blobs = utils.as_dl_blobs(valid_data, batch_size, data_shape, label_shape)
    test_blobs = utils.as_dl_blobs(test_data, batch_size, data_shape, label_shape)

    net_path = 'temp/net.prototxt'
    solver_path = 'temp/solver.txt'
    gen_net(net_path, data_shape, label_shape)
    gen_solver(solver_path, net_path)

    caffe.set_device(0)
    caffe.set_mode_gpu()

    solver = caffe.SGDSolver(solver_path)

    utils.train(solver, train_blobs, valid_blobs, 50, 5)
    print('caffe nn: %.2f%%' % (utils.evaluate(solver, test_blobs)))
Ejemplo n.º 15
0
def main():
    MODEL_FILE = sys.argv[1]
    PRETRAINED = sys.argv[2]
    mean_file = sys.argv[3]
    lmdb_folder = sys.argv[4]
    train_folder = sys.argv[5]
    seaNet = caffe.Net(MODEL_FILE, PRETRAINED, caffe.TEST)
    caffe.set_mode_gpu()
    image_mean = np.load(mean_file)
    file_name = 'seaNet_submission_' + ('%0.f' % time.time()) + '.csv'
    setup_submission_file(train_folder, file_name)
    submission_file = open(file_name, 'a')
    submission_writer = csv.writer(submission_file)
    env = lmdb.open(lmdb_folder)
    txn = env.begin()
    cursor = txn.cursor()
    count = 0
    for key, value in cursor:
        count += 1
        if count % 500 == 0:
            print 'Number of Images Processed: ' + str(count)
        datum = caffe.proto.caffe_pb2.Datum()
        datum.ParseFromString(value)
        label = datum.label
        image = caffe.io.datum_to_array(datum)
        image = image.astype(np.uint8)
        image = image - image_mean
        image = image * 0.00390625
        result = seaNet.forward_all(data=np.array([image]))
        probs = result['prob'][0]
        img_row = [ '_'.join(key.split('_')[1:])]
        img_row.extend(probs)
        submission_writer.writerow(img_row)
    submission_file.close()
Ejemplo n.º 16
0
def setup():
    global resnet_mean
    global resnet_net
    global vqa_net
    # data provider
    vqa_data_provider_layer.CURRENT_DATA_SHAPE = EXTRACT_LAYER_SIZE

    # mean substraction
    blob = caffe.proto.caffe_pb2.BlobProto()
    data = open( RESNET_MEAN_PATH , 'rb').read()
    blob.ParseFromString(data)
    resnet_mean = np.array( caffe.io.blobproto_to_array(blob)).astype(np.float32).reshape(3,224,224)
    resnet_mean = np.transpose(cv2.resize(np.transpose(resnet_mean,(1,2,0)), (448,448)),(2,0,1))

    # resnet
    caffe.set_device(GPU_ID)
    caffe.set_mode_gpu()

    resnet_net = caffe.Net(RESNET_LARGE_PROTOTXT_PATH, RESNET_CAFFEMODEL_PATH, caffe.TEST)

    # our net
    vqa_net = caffe.Net(VQA_PROTOTXT_PATH, VQA_CAFFEMODEL_PATH, caffe.TEST)

    # uploads
    if not os.path.exists(UPLOAD_FOLDER):
        os.makedirs(UPLOAD_FOLDER)

    if not os.path.exists(VIZ_FOLDER):
        os.makedirs(VIZ_FOLDER)

    print 'Finished setup'
Ejemplo n.º 17
0
def main(argv):
    
    params = get_params() # check get_params.py in the same directory to see the parameters
    
    try:
      opts, args = getopt.getopt(argv,"hr:o:s:c:g:",["root=","out=","saliency_model=","caffe_path=", "gpu="])
    except getopt.GetoptError:
      print 'ERROR'
      sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
              print 'saliency.py -r <root> -o <out> -s <saliency_model> -c <caffe_path> -g <gpu>'
              sys.exit()
        elif opt in ("-r", "--root"):
              params['root'] = arg
        elif opt in ("-o", "--out"):
              params['out'] = arg
        elif opt in ("-s", "--saliency_model"):
              params['saliency_model'] = arg
        elif opt in ("-c", "--caffe_path"):
              params['caffe_path'] = arg
        elif opt in ("-g", "--gpu"):
              params['gpu'] = arg;
              
    sys.path.insert(0,os.path.join(params['caffe_path'],'python'))
    import caffe
    
    compute = 'True' # sys.argv[1] # write 'true' or 'false' in case you want to compute or just visualize
    
    
    if compute== 'true' or compute =='True':
        
        
        deploy_file = os.path.join(params['saliency_model'],'deploy.prototxt')
        model_file = os.path.join(params['saliency_model'],'model.caffemodel')
        # I am using the mean file from caffenet...but I guess we could use a grey image as well ?
        mean_file = '/media/HDD_2TB/mcarne/keyframe-extractor/src/Saliency/deep/meanfile.npy'
        
        if params['gpu'] == True:
            caffe.set_mode_gpu()
            print 'GPU mode selected'
        else: 
            caffe.set_mode_cpu()
            print 'CPU mode selected'
            
        net = caffe.Classifier(deploy_file, model_file, mean=np.load(mean_file).mean(1).mean(1), channel_swap=(2,1,0),raw_scale=255)
        if not os.path.exists(params['out']):
            os.makedirs(params['out'])
        
        for imagepath in glob.glob(params['root']+"/*.jpg"):
            print "Procressing image..."
            scores = net.predict([caffe.io.load_image(imagepath)])    
            feat = net.blobs['deconv1'].data
            #feat = np.reshape(feat, (10,4096))
            print feat, np.shape(feat)
            #meanfeat = np.average( feat, axis = 0 ) 
            # saves to disk
            fout = params['out']+'/'+os.path.splitext(os.path.basename(imagepath))[0];
            pickle.dump(feat,open(fout+'.p','wb'))
            scipy.io.savemat(fout+'.mat', mdict={'isal': feat})
Ejemplo n.º 18
0
def train_cnn(db_name, train_data):

    caffe.set_mode_gpu()
    if train_data == None:
        n_samples = 1000
    else:
        n_samples = len(train_data) 
    
    solver_param = caffe_pb2.SolverParameter()
    with open('./models/solver_template.prototxt') as f:
        google.protobuf.text_format.Merge(f.read(), solver_param)


    # nitem 
    solver_param.stepsize = n_samples
    solver_param.max_iter = int(n_samples * 10) 

    print solver_param 
    with open('./models/solver_template_a7_' + db_name + '.prototxt', 'w') as f:
        f.write(google.protobuf.text_format.MessageToString(solver_param))
    name = f.name

    print name
    solver = caffe.SGDSolver(name)

    solver.solve()

    trained_model = str(solver_param.snapshot_prefix) + '_iter_' + str(solver_param.max_iter) + '.caffemodel'

    return trained_model
Ejemplo n.º 19
0
def caffe_set_device(gpu=True, devid='0'):
	if gpu:
		caffe.set_mode_gpu()	
		os.environ["CUDA_VISIBLE_DEVICES"] = devid
		caffe.set_device(int(devid))
	else:
		caffe.set_mode_cpu()
Ejemplo n.º 20
0
    def __init__(self,params):

        self.dimension = params['dimension']
        self.dataset = params['dataset']
        self.pooling = params['pooling']
        # Read image lists
        with open(params['query_list'],'r') as f:
            self.query_names = f.read().splitlines()

        with open(params['frame_list'],'r') as f:
            self.database_list = f.read().splitlines()

        # Parameters needed
        self.layer = params['layer']
        self.save_db_feats = params['database_feats']

        # Init network
        if params['gpu']:
            caffe.set_mode_gpu()
            caffe.set_device(0)
        else:
            caffe.set_mode_cpu()
        print "Extracting from:", params['net_proto']
        cfg.TEST.HAS_RPN = True
        self.net = caffe.Net(params['net_proto'], params['net'], caffe.TEST)
Ejemplo n.º 21
0
Archivo: test.py Proyecto: haneul/mcdnn
def main():
    import caffe
    import numpy as np
    caffe_dir = "../caffe"
    MODEL_FILE = caffe_dir + "/models/bvlc_reference_caffenet/deploy.prototxt"
    PRETRAINED = caffe_dir + "/models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel"
    IMAGE_FILE = "../cat.jpg"

    with open("synset_words.txt") as f:
        words = f.readlines()
    words = map(lambda x: x.strip(), words)

    net = caffe.Classifier(MODEL_FILE, PRETRAINED,
                           mean=np.load(caffe_dir + '/python/caffe/imagenet/ilsvrc_2012_mean.npy'),
                           channel_swap=(2,1,0),
                           raw_scale=255,
                           image_dims=(256, 256)) 
    caffe.set_phase_test()
    caffe.set_mode_gpu()
    input_image = caffe.io.load_image(IMAGE_FILE)
    #prediction = net.predict([input_image])
    prediction = net.forward_all(data=np.asarray([net.preprocess('data', input_image)]))

    i = prediction["prob"].argmax()
    print(i)
    print(words[i])
Ejemplo n.º 22
0
def main(args_list):
    args = parse_args(args_list)

    print('Called with args:')
    print(args)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    cfg.GPU_ID = args.gpu_id

    print('Using config:')
    pprint.pprint(cfg)

    while not os.path.exists(args.caffemodel) and args.wait:
        print('Waiting for {} to exist...'.format(args.caffemodel))
        time.sleep(10)

    caffe.set_mode_gpu()
    caffe.set_device(args.gpu_id)
    net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST)
    net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]

    imdb = get_imdb(args.imdb_name)
    imdb.competition_mode(args.comp_mode)
    if not cfg.TEST.HAS_RPN:
        imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD)

    test_net(net, imdb, max_per_image=args.max_per_image, vis=args.vis)
Ejemplo n.º 23
0
def make_train_featvec(X, y):
    """
    This methods needs sufficient memories
    """
    ntrain = len(X)
    batch = 1000
    nbatch = ntrain/batch
    feat_vec = np.zeros((nbatch*batch, 4096))

    caffe.set_mode_gpu()
    our_model = OurFlickr()

    for n in range(nbatch):
        print "## batch {}/{} ##".format(n, nbatch)
        X_batch, _ = get_train_dataset(X[batch*n:batch*(n+1)], y[batch*n:batch*(n+1)])
        featvec_batch = our_model.net_extractor(X=X_batch)
        feat_vec[batch*n:batch*(n+1)] = featvec_batch
        gc.collect()

    del our_model
    del X_batch
    del X
    del y
    gc.collect()

    print feat_vec
    print feat_vec.shape
    pickle.dump(feat_vec, open('feat_test_place.pickle', 'wb'), protocol=2)
def set_caffe_mode(gpu):
    if gpu == 0: # cpu mode
        caffe.set_mode_cpu()
    else: # gpu mode
        caffe.set_device(0)
        caffe.set_mode_gpu()
    return 0
Ejemplo n.º 25
0
def style_labeler():
    ntrain = 70000  # max is 7000, no 7500
    X_train, y_train = get_train_dataset(flickr_train_set_path[:ntrain], flickr_train_set_label[:ntrain])

    caffe.set_mode_gpu()
    our_model = OurFlickr()
    our_model.fit(X_train, y_train)  # TODO SVM batch fitting
    our_model.transform()
    our_model.compile()

    true_res = []
    svm_res = []
    sfmax_res = []
    with open('./label_result_all.csv', 'w') as f:
        for i in range(len(flickr_test_set_path)):
            if i % 1000: gc.collect()
            print i
            img = caffe.io.load_image(flickr_test_set_path[i])
            sfmax, svm = our_model.predict_our(img)
            sfmax_res.append(sfmax)
            svm_res.append(svm)
            true_res.append(flickr_test_set_label[i])
            f.write(",".join([flickr_test_set_path[i],
                              str(flickr_test_set_label[i]),
                              str(sfmax),
                              str(svm[0])]) + "\n")

    print "svm accuarcy:", np.mean([a == b for a, b in zip(true_res, svm_res)])
    print "sfmax accuracy:", np.mean([a == b for a, b in zip(true_res, sfmax_res)])
Ejemplo n.º 26
0
def init_caffe_net(gpu_id, raw_image_size, crop_size, batch_size):
    '''
     Initialize caffe configuration.
     The function is used to extract the RGB images.
     If your dataset contains gray images, set the channels 3 to 1 and comment the image preprocessing in transposing and channels changing.
    '''
    caffe.set_mode_gpu()
    caffe.set_device(int(gpu_id)) # {0, 1, 2, 3} to four GPUs you want to choose.
    # The train_val.prototxt file defination.
    model_def = '/home/u514/caffe-i/caffe-master/caffe/models/vgg/vgg_2048/deploy-bak.prototxt'
    # The pre-trained model.
    caffemodel = '/home/u514/caffe-i/caffe-master/caffe/models/vgg/vgg_2048/pretrain_ilsvrc2012_vgg_2048.caffemodel'
    # The mean file of the image set used to train the model.
    mean_file = '/home/u514/caffe-i/caffe-master/caffe/models/vgg/vgg_2048/vgg_mean.npy'
    net = caffe.Net(model_def, caffemodel, caffe.TEST)

    transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
    transformer.set_transpose('data', (2, 0, 1)) # (H,W,C) to (C,H,W)
    transformer.set_mean('data', np.load(mean_file).mean(1).mean(1))
    transformer.set_raw_scale('data', int(raw_image_size))
    transformer.set_channel_swap('data', (2, 1, 0)) # RGB to BGR

    # Set batch size (default: 50).
    net.blobs['data'].reshape(int(batch_size), 3, int(crop_size), int(crop_size))
    return net, transformer
Ejemplo n.º 27
0
def init_testnet(test_net, trained_model=None, test_device=0):
    caffe.set_mode_gpu()
    caffe.select_device(test_device, False)
    if(trained_model == None):
        return caffe.Net(test_net, caffe.TEST)
    else:
        return caffe.Net(test_net, trained_model, caffe.TEST)
Ejemplo n.º 28
0
def main():
    ntrain = 4000  # max is 7000, no 7500
    #itest = 9

    caffe.set_mode_gpu()

    our_model = OurFlickr()
    #### past fail model####
    #X_train, y_train = get_train_dataset(flickr_train_set_path[:ntrain], flickr_train_set_label[:ntrain])
    #our_model.fit(X_train, y_train)  # TODO SVM batch fitting
    #our_model.transform()
    #our_model.compile()

    #### use pickled training data features ####
    our_model._fit(flickr_train_set_label[:63000], pickle_name='feat_train.pickle')
    our_model.compile()

    true_res = []
    svm_res = []
    sfmax_res = []
    for i in range(15000):
        img = caffe.io.load_image(flickr_test_set_path[i])
        sfmax, svm = our_model.predict_our(img)
        sfmax_res.append(sfmax)
        svm_res.append(svm)
        true_res.append(flickr_test_set_label[i])
        print "true label", flickr_test_set_label[i]

    print "svm accuarcy:", np.mean([a == b for a, b in zip(true_res, svm_res)])
    print "sfmax accuracy:", np.mean([a == b for a, b in zip(true_res, sfmax_res)])
def imgs_to_lmdb(path_src, src_imgs, path_dst, labels=None):
    '''
    Generate LMDB file from set of images
    Source: https://github.com/BVLC/caffe/issues/1698#issuecomment-70211045
    credit: Evan Shelhamer
    '''

    caffe.set_mode_gpu()

    if (labels == None):
        labels = [0] * len(src_imgs)

    db = lmdb.open(path_dst, map_size=int(1e12))

    with db.begin(write=True) as in_txn:
        for idx, img_name in enumerate(src_imgs):

            path_ = os.path.join(path_src, img_name)

            img = np.array(Image.open(path_).convert('RGB')).astype("uint8")
            img = img[:,:,::-1]
            img = img.transpose((2,0,1))
            img_dat = caffe.io.array_to_datum(img, labels[idx])
            in_txn.put('{:0>10d}'.format(idx), img_dat.SerializeToString())

    db.close()

    return 0
def train():
    with open('./seg_low_res_model/proto_train.prototxt', 'w') as f:
        f.write(str(segmodel.generate_model('train', config.N)))

    caffe.set_device(config.gpu_id)
    caffe.set_mode_gpu()

    solver = caffe.get_solver('./seg_low_res_model/solver.prototxt')
    if config.weights is not None:
        solver.net.copy_from(config.weights)

    cls_loss_avg = 0.0
    avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0.0, 0.0, 0.0
    decay = 0.99

    for it in range(config.max_iter):
        solver.step(1)

        cls_loss_val = solver.net.blobs['loss'].data
        scores_val = solver.net.blobs['fcn_scores'].data.copy()
        label_val = solver.net.blobs['label'].data.copy()

        cls_loss_avg = decay*cls_loss_avg + (1-decay)*cls_loss_val
        print('\titer = %d, cls_loss (cur) = %f, cls_loss (avg) = %f'
            % (it, cls_loss_val, cls_loss_avg))

        # Accuracy
        accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy(scores_val, label_val)
        avg_accuracy_all = decay*avg_accuracy_all + (1-decay)*accuracy_all
        avg_accuracy_pos = decay*avg_accuracy_pos + (1-decay)*accuracy_pos
        avg_accuracy_neg = decay*avg_accuracy_neg + (1-decay)*accuracy_neg
        print('\titer = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)'
              % (it, accuracy_all, accuracy_pos, accuracy_neg))
        print('\titer = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)'
              % (it, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg))
Ejemplo n.º 31
0
def classify(net_config_location, net_weights, image_location, alpha_channel,
             use_gpu, win_size, crop_size):

    if use_gpu:
        caffe.set_device(0)
        caffe.set_mode_gpu()
    else:
        caffe.set_mode_cpu()

    f = open(net_config_location, 'r')
    s = f.read()

    if (alpha_channel):
        append = 'input: "data" input_dim: 1 input_dim: 4 input_dim: 512 input_dim: 512 \n'
    else:
        append = 'input: "data" input_dim: 1 input_dim: 3 input_dim: 512 input_dim: 512 \n'

    s = append + s

    f = open('temp.prototxt', 'w')
    f.write(s)
    f.close()

    net = caffe.Net('temp.prototxt', net_weights, caffe.TEST)

    print 'Opening image...'

    #open image
    in_ = getimage(image_location, alpha_channel)

    print 'Image opened...'

    #height and width of original image
    orig_img_h = in_.shape[1]
    orig_img_w = in_.shape[2]

    #size of valid output patch
    out_size = win_size - 2 * crop_size

    #number of patches horizontally and vertically
    n_patch_horiz = int(math.ceil(orig_img_w / float(out_size)))
    n_patch_vert = int(math.ceil(orig_img_h / float(out_size)))

    #pad image...

    #how much to pad?
    pad_w_before = crop_size
    pad_h_before = crop_size
    pad_w_after = n_patch_horiz * out_size + crop_size - orig_img_w
    pad_h_after = n_patch_vert * out_size + crop_size - orig_img_h

    #do padding
    in_ = np.pad(in_, ((0, 0), (pad_h_before, pad_h_after),
                       (pad_w_before, pad_w_after)),
                 mode='symmetric')

    # shape for input (data blob is N x C x H x W), set data

    if alpha_channel:
        channels = 4
    else:
        channels = 3

    net.blobs['data'].reshape(1, channels, win_size, win_size)

    print 'Predicting...'

    rows = []

    for i in range(0, n_patch_vert):
        patches_in_row = []
        for j in range(0, n_patch_horiz):

            input_ = in_[:, out_size * i:out_size * i + win_size,
                         out_size * j:out_size * j + win_size]
            net.blobs['data'].data[...] = input_

            # run net prediction
            net.forward()
            patch_out = net.blobs['prob'].data[0]

            #compute offset in case output patch provided by the network
            #is larger than it should be
            h_offset = (net.blobs['prob'].data[0].shape[1] - out_size) / 2
            w_offset = (net.blobs['prob'].data[0].shape[2] - out_size) / 2

            #crop
            patch_out = patch_out[:, h_offset:h_offset + out_size,
                                  w_offset:w_offset + out_size]

            patches_in_row.append(np.copy(patch_out))

        row = np.concatenate(patches_in_row, 2)

        rows.append(np.copy(row))

    entire_output = np.concatenate(rows, 1)

    #remove excess border
    output = entire_output[:, 0:orig_img_h, 0:orig_img_w]

    #out.astype('double').tofile("prob.dat");

    if output.shape[0] == 1:
        pred = np.rint(np.squeeze(output)).astype(np.uint8)
    else:
        pred = output.argmax(axis=0).astype(np.uint8)

    print 'Done predicting.'

    return pred
Ejemplo n.º 32
0
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    cfg.GPU_ID = args.gpu_id

    print('Using config:')
    pprint.pprint(cfg)

    if not args.randomize:
        # fix the random seeds (numpy and caffe) for reproducibility
        np.random.seed(cfg.RNG_SEED)
        caffe.set_random_seed(cfg.RNG_SEED)

    # set up caffe
    caffe.set_mode_gpu()
    caffe.set_device(args.gpu_id)

    imdb, roidb = combined_roidb(args.imdb_name)
    print '{:d} roidb entries'.format(len(roidb))

    output_dir = get_output_dir(imdb)
    print 'Output will be saved to `{:s}`'.format(output_dir)

    train_net(args.solver,
              roidb,
              output_dir,
              pretrained_model=args.pretrained_model,
              max_iters=args.max_iters)
Ejemplo n.º 33
0
def main():
    """
    main function
    """

    # time start
    time_start = datetime.datetime.now()

    print(args)

    if args.proto == None or args.model == None or args.mean == None or args.images == None:
        usage_info()
        return None

    # deploy caffe prototxt path
    net_file = args.proto

    # trained caffemodel path
    caffe_model = args.model

    # mean value
    mean = args.mean

    # norm value
    norm = 1.0
    if args.norm != 1.0:
        norm = args.norm[0]

    # calibration dataset
    images_path = args.images

    # the output calibration file
    calibration_path = args.output

    # enable the group scale
    group_on = args.group

    # default use CPU to forwark
    if args.gpu != 0:
        caffe.set_device(0)
        caffe.set_mode_gpu()

    # initial caffe net and the forword model(GPU or CPU)
    net = caffe.Net(net_file, caffe_model, caffe.TEST)

    # prepare the cnn network
    transformer = network_prepare(net, mean, norm)

    # get the calibration datasets images files path
    images_files = file_name(images_path)

    # quanitze kernel weight of the caffemodel to find it's calibration table
    weight_quantize(net, net_file, group_on)

    # quantize activation value of the caffemodel to find it's calibration table
    activation_quantize(net, transformer, images_files)

    # save the calibration tables,best wish for your INT8 inference have low accuracy loss :)
    save_calibration_file(calibration_path)

    # time end
    time_end = datetime.datetime.now()

    print(
        "\nCaffe Int8 Calibration table create success, it's cost %s, best wish for your INT8 inference has a low accuracy loss...\(^▽^)/...2333..."
        % (time_end - time_start))
Ejemplo n.º 34
0
def main():

    # caffe init
    gpu_id = 0
    caffe.set_device(gpu_id)
    caffe.set_mode_gpu()

    # spatial prediction
    model_def_file = '../stack_motionnet_vgg16_deploy.prototxt'
    model_file = '../logs_end/hmdb51_split2_vgg16_hidden.caffemodel'
    FRAME_PATH = "TODO"
    spatial_net = caffe.Net(model_def_file, model_file, caffe.TEST)

    val_file = "./testlist02.txt"
    f_val = open(val_file, "r")
    val_list = f_val.readlines()
    print "we got %d test videos" % len(val_list)

    start_frame = 0
    num_categories = 51
    feature_layer = 'fc8_vgg16'
    spatial_mean_file = './rgb_mean.mat'
    dims = (len(val_list), num_categories)
    predict_results_before = np.zeros(shape=dims, dtype=np.float64)
    predict_results = np.zeros(shape=dims, dtype=np.float64)

    correct = 0
    line_id = 0
    spatial_results_before = {}
    spatial_results = {}

    for line in val_list:
        line_info = line.split(" ")
        input_video_dir_part = line_info[0]
        input_video_dir = os.path.join(FRAME_PATH, input_video_dir_part[:-4])
        input_video_label = int(line_info[1])

        spatial_prediction = HiddenTemporalPrediction(
                input_video_dir,
                spatial_mean_file,
                spatial_net,
                num_categories,
                feature_layer,
                start_frame)
        avg_spatial_pred_fc8 = np.mean(spatial_prediction, axis=1)
        avg_spatial_pred = np.asarray(softmax(avg_spatial_pred_fc8))
        predict_label = np.argmax(avg_spatial_pred)

        predict_results_before[line_id, :] = avg_spatial_pred_fc8
        predict_results[line_id, :] = avg_spatial_pred

        print input_video_dir
        print input_video_label-1, predict_label

        line_id += 1
        if predict_label == input_video_label-1:
            correct += 1

    print correct
    print "prediction accuracy is %4.4f" % (float(correct)/len(val_list))

    spatial_results_before["hidden_prediction_before"] = predict_results_before
    spatial_results["hidden_prediction"] = predict_results

    sio.savemat("./hmdb51_split2_hidden_before.mat", spatial_results_before)
    sio.savemat("./hmdb51_split2_hidden.mat", spatial_results)
Ejemplo n.º 35
0
def caffe_load_from_ckpt(prototxt, checkpoint, to_caffemodel):
    ### load caffe model and weights
    caffe.set_mode_gpu()
    net = caffe.Net(prototxt, caffe.TEST)

    ### load tf model
    tf.reset_default_graph()
    images = tf.placeholder(tf.float32,
                            shape=(None, image_scale, image_scale, 3))
    with tf.contrib.slim.arg_scope(
            mobilenet_v2.training_scope(is_training=False)):
        logits, endpoints = mobilenet_v2.mobilenet(
            images,
            num_classes=1001,
            depth_multiplier=factor,
            finegrain_classification_mode=True)
    ema = tf.train.ExponentialMovingAverage(0.999)
    vars = ema.variables_to_restore()
    saver = tf.train.Saver(vars)

    ### convert variables from tf checkpoints to caffemodel
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        tf_all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        # for i, var in enumerate(tf_all_vars):
        #     print(i, var.name, var.shape.as_list())
        print(
            '------------------------------------------------------------------'
        )
        i = 0  # index
        for caffe_var_name in net.params.keys():
            for n in range(len(net.params[caffe_var_name])):
                if list(net.params[caffe_var_name][n].data.shape) != [1]:
                    var = tf_all_vars[i]
                    print(i, caffe_var_name,
                          net.params[caffe_var_name][n].data.shape, var.name,
                          var.shape.as_list())
                    i += 1
        # exit()
        """ tf name scope:
        convolutional layer:
        "MobilenetV2/....../...weights:0"
        "MobilenetV2/....../BatchNorm/gamma:0"
        "MobilenetV2/....../BatchNorm/beta:0"
        "MobilenetV2/....../BatchNorm/moving_mean:0"
        "MobilenetV2/....../BatchNorm/moving_variance:0"
        fully connected layer:
        "MobilenetV2/....../...weights:0"
        "MobilenetV2/....../biases:0"
        """

        #            name,           shape list
        # caffe_var: caffe_var_name, list(net.params[caffe_var_name][n].data.shape)
        # tf_var   : tf_var.name,    tf_var.shape.as_list()

        ### 262 variables to convert from tf.ckpt to caffemodel

        i = 0  # index
        for caffe_var_name in net.params.keys():
            for n in range(len(net.params[caffe_var_name])):
                if list(net.params[caffe_var_name][n].data.shape) != [1]:

                    ### Compare caffe_var and tf_var here

                    # caffe_var_name = caffe_var_name
                    caffe_var_data = net.params[caffe_var_name][n].data
                    caffe_var_shape = list(caffe_var_data.shape)

                    tf_var_name = tf_all_vars[i].name
                    tf_var_shape = tf_all_vars[i].shape.as_list()
                    if 'weights:0' in tf_var_name:
                        ### weight layer
                        # print(caffe_var_name, caffe_var_shape, '|||||||||||', tf_var_name, tf_var_shape)

                        tf_var_data = sess.run(tf_all_vars[i])

                        ### swap tf_var axis for caffe_var:
                        ### tf_var shape: (height, width, channel_out, channel_in) for depthwise_weights
                        ###               (height, width, channel_in, channel_out) for other weights
                        ### caffe_var shape: (channel_out, channel_in, height, width)

                        tf_var_data = np.transpose(tf_var_data,
                                                   axes=(3, 2, 0, 1))

                        if '/depthwise_weights' in tf_var_name:
                            tf_var_data = np.swapaxes(tf_var_data,
                                                      axis1=0,
                                                      axis2=1)

                        if 'Logits/' in tf_var_name:
                            ### mismatched num_classes
                            ### tf class 0: 'background'
                            caffe_var_data[:, ...] = tf_var_data[1:, ...]
                        else:
                            caffe_var_data[...] = tf_var_data[...]

                    if 'biases:0' in tf_var_name:
                        ### bias layer
                        # print(caffe_var_name, caffe_var_shape, '|||||||||||', tf_var_name, tf_var_shape)
                        ### tf_var_shape: (1001,)
                        ### caffe_var_shape: (1000,)
                        tf_var_data = sess.run(tf_all_vars[i])
                        caffe_var_data[:] = tf_var_data[1:]

                    if 'BatchNorm/gamma:0' in tf_var_name:
                        ### batchnorm scaling layer, but convert mean
                        # print(caffe_var_name, n, caffe_var_shape, '|||||||||||', tf_all_vars[i+2].name, tf_all_vars[i+2].shape.as_list())
                        ### tf_var_shape: (channel,)
                        ### caffe_var_shape: (channel,)
                        tf_var_data = sess.run(tf_all_vars[i + 2])
                        caffe_var_data[...] = tf_var_data[...]

                    if 'BatchNorm/beta:0' in tf_var_name:
                        ### batchnorm scaling layer, but convert variance
                        # print(caffe_var_name, n, caffe_var_shape, '|||||||||||', tf_all_vars[i+2].name, tf_all_vars[i+2].shape.as_list())
                        ### tf_var_shape: (channel,)
                        ### caffe_var_shape: (channel,)
                        tf_var_data = sess.run(tf_all_vars[i + 2])
                        caffe_var_data[...] = tf_var_data[...]  # + 1e-3 -1e-5

                    if 'BatchNorm/moving_mean:0' in tf_var_name:
                        ### batchnorm moving average layer, but convert gamme
                        # print(caffe_var_name, n, caffe_var_shape, '|||||||||||', tf_all_vars[i-2].name, tf_all_vars[i-2].shape.as_list())
                        ### tf_var_shape: (channel,)
                        ### caffe_var_shape: (channel,)
                        tf_var_data = sess.run(tf_all_vars[i - 2])
                        caffe_var_data[...] = tf_var_data[...]

                    if 'BatchNorm/moving_variance:0' in tf_var_name:
                        ### batchnorm moving average layer, but convert beta
                        # print(caffe_var_name, n, caffe_var_shape, '|||||||||||', tf_all_vars[i-2].name, tf_all_vars[i-2].shape.as_list())
                        ### tf_var_shape: (channel,)
                        ### caffe_var_shape: (channel,)
                        tf_var_data = sess.run(tf_all_vars[i - 2])
                        caffe_var_data[...] = tf_var_data[...]
                    i += 1
                else:
                    ### moving average factor, must set to 1
                    net.params[caffe_var_name][n].data[...] = 1.
                    # print(caffe_var_name, n, list(net.params[caffe_var_name][n].data.shape), '|||||||||||', net.params[caffe_var_name][n].data)

    net.save(to_caffemodel)
    print('Save converted caffemodel to', to_caffemodel)
    return net
def build_tsv(gpu_id=0):
    # Set up the simulator
    sim = MatterSim.Simulator()
    sim.setCameraResolution(WIDTH, HEIGHT)
    sim.setCameraVFOV(math.radians(VFOV))
    sim.setDiscretizedViewingAngles(False)
    sim.setBatchSize(1)
    sim.setPreloadingEnabled(True)
    sim.initialize()

    # Set up Caffe Faster R-CNN
    cfg_from_file(CFG_FILE)
    caffe.set_mode_gpu()
    caffe.set_device(gpu_id)
    net = caffe.Net(PROTO, caffe.TEST, weights=MODEL)
    classes, attributes = load_classes()

    count = 0
    t_render = Timer()
    t_net = Timer()
    with open(OUTFILE % gpu_id, 'wt') as tsvfile:
        writer = csv.DictWriter(tsvfile,
                                delimiter='\t',
                                fieldnames=TSV_FIELDNAMES)

        # Loop all the viewpoints in the simulator
        viewpointIds = load_viewpointids(gpu_id)
        for scanId, viewpointId in viewpointIds:
            t_render.tic()
            # Loop all discretized views from this location
            ims = []
            sim.newEpisode([scanId], [viewpointId], [0],
                           [math.radians(ELEVATION_START)])
            for ix in range(VIEWPOINT_SIZE):
                state = sim.getState()[0]

                # Transform and save generated image
                ims.append(transform_img(state.rgb))
                # Build state
                if ix == 0:
                    record = {
                        'scanId': state.scanId,
                        'viewpointId': state.location.viewpointId,
                        'viewHeading': np.zeros(VIEWPOINT_SIZE,
                                                dtype=np.float32),
                        'viewElevation': np.zeros(VIEWPOINT_SIZE,
                                                  dtype=np.float32),
                        'image_h': HEIGHT,
                        'image_w': WIDTH,
                        'vfov': VFOV
                    }
                record['viewHeading'][ix] = state.heading
                record['viewElevation'][ix] = state.elevation

                # Move the sim viewpoint so it ends in the same place
                elev = 0.0
                heading_chg = math.pi * 2 / VIEWS_PER_SWEEP
                view = ix % VIEWS_PER_SWEEP
                sweep = ix // VIEWS_PER_SWEEP
                if view + 1 == VIEWS_PER_SWEEP:  # Last viewpoint in sweep
                    elev = math.radians(ELEVATION_INC)
                sim.makeAction([0], [heading_chg], [elev])

            t_render.toc()
            t_net.tic()
            # Run detection
            for ix in range(VIEWPOINT_SIZE):
                get_detections_from_im(record, net, ims[ix])
            if DRY_RUN:
                print('%d: Detected %d objects in pano' %
                      (gpu_id, record['features'].shape[0]))
            filter(record, MAX_TOTAL_BOXES)
            if DRY_RUN:
                print('%d: Reduced to %d objects in pano' %
                      (gpu_id, record['features'].shape[0]))
                for ix in range(VIEWPOINT_SIZE):
                    fig = visual_overlay(ims[ix], record, ix, classes,
                                         attributes)
                    fig.savefig('img_features/examples/%s-%s-%d.png' %
                                (record['scanId'], record['viewpointId'], ix))
                    plt.close()

            for k, v in record.items():
                if isinstance(v, np.ndarray):
                    record[k] = str(base64.b64encode(v), "utf-8")
            writer.writerow(record)
            count += 1
            t_net.toc()
            if count % 10 == 0:
                print('%d: Processed %d / %d viewpoints, %.1fs avg render time, %.1fs avg net time, projected %.1f hours' %\
                  (gpu_id, count,len(viewpointIds), t_render.average_time, t_net.average_time,
                  (t_render.average_time+t_net.average_time)*len(viewpointIds)/3600))
                if DRY_RUN:
                    return
Ejemplo n.º 37
0
def image2mat(net, transformer, inputimagedir, outdir, labelfilepath,
              layername):
    mat = []
    # lines =  labelfile(labelfilepath)
    #  print lines
    labels = []
    pred = []
    predroc = []
    nn = 0
    caffe.set_mode_gpu()
    allimages = GetFileList(inputimagedir, [])
    print allimages
    testimages = allimages
    # from random import shuffle
    import random
    # print allimages
    random.shuffle(testimages)
    errorimagelist = "./error/" + outdir.split(".")[0]
    if not os.path.exists(errorimagelist):
        os.makedirs(errorimagelist)

    # print testimages

    for image in testimages:
        print image
        gtlabel = int(image.split("\\")[-2])
        # print gtlabel
        try:
            net.blobs['data'].data[...] = transformer.preprocess(
                'data', caffe.io.load_image(image))
        except Exception, e:
            print nn
            print str(e)
            nn += 1
            continue
        out = net.forward()
        # pred.append(str(out['prob'].argmax()))
        #  print (out['prob'].shape)
        #  pred.append(out['prob'][1])
        # print("image is {}Predicted class is #{}.".format(image,out['prob'].argmax()))
        if out['prob'].argmax() != gtlabel:
            print out['prob'].argmax(), gtlabel
            shutil.copy(
                image,
                errorimagelist + "/" + image.split("/")[-1].split(".")[0] +
                "_pred_" + str(out['prob'].argmax()) + ".png")
        # caffe.set_mode_gpu()
        # caffe.set_device(0)
        #net.forward()  # call once for allocation
        # %timeit net.forward()
        # feat = net.blobs[layername].data[1]
        feat = net.blobs[net.blobs.keys()[-2]].data[0]
        # for layer_name, param in net.params.iteritems():
        # print layer_name + '\t' + str(param[0].data.shape), str(param[1].data.shape)
        # print net.blobs.keys()
        # filters = net.params['conv1'][0].data
        # print filters
        predroc.append(net.blobs[net.blobs.keys()[-1]].data[0].flatten())
        pred.append(
            np.argmax(net.blobs[net.blobs.keys()[-1]].data[0].flatten()))
        # print "===>>",net.blobs[net.blobs.keys()[-1]].data[0].flatten()
        # pred.append(out['prob'])
        # print out['prob']
        # print net.blobs[net.blobs.keys()[-2]].data[0]
        #np.savetxt(image+'feature.txt', feat.flat)
        #print type(feat.flat)
        featline = feat.flatten()
        # print featline
        #print type(featline)
        #featlinet= zip(*(featline))
        mat.append(featline)
        label = image.split("\\")[-2]
        # labels.append(str(lines[nn][1]))
        labels.append(int(label))
        #  print "===>>",out['prob'].argmax()
        #  print "=====>>",lines[nn][1]
        if (nn % 100 == 0):
            with open(outdir, 'w') as f:
                scipy.io.savemat(f, {'data': mat, 'labels': labels})  #append
        nn += 1
Ejemplo n.º 38
0
def extract_feature(network_proto_path,
                    network_model_path,
                    image_list, data_mean, layer_name, image_as_grey = False):
    """
    Extracts features for given model and image list.

    Input
    network_proto_path: network definition file, in prototxt format.
    network_model_path: trainded network model file
    image_list: A list contains paths of all images, which will be fed into the
                network and their features would be saved.
    layer_name: The name of layer whose output would be extracted.
    save_path: The file path of extracted features to be saved.
    """

    caffe.set_mode_gpu()


    net = caffe.Net(network_proto_path, network_model_path,caffe.TEST)
    


    # input preprocessing: 'data' is the name of the input blob == net.inputs[0]
    transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
    transformer.set_transpose('data', (2,0,1))
    transformer.set_mean('data', np.float32([104.0, 117.0, 122.0])) # mean pixel
    transformer.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]
    transformer.set_channel_swap('data', (2,1,0))


    blobs = OrderedDict( [(k, v.data) for k, v in net.blobs.items()])

    #blobs = OrderedDict( [(k, v.data) for k, v in net.blobs.items()])
    shp = blobs[layer_name].shape
    print blobs['data'].shape

    batch_size = blobs['data'].shape[0]
    print blobs[layer_name].shape
    #print 'debug-------\nexit'
    #exit()

    #params = OrderedDict( [(k, (v[0].data,v[1].data)) for k, v in net.params.items()])
    features_shape = (len(image_list), shp[1], shp[2], shp[3])
    features = np.empty(features_shape, dtype='float32', order='C')
    img_batch = []
    for cnt, path in zip(range(features_shape[0]), image_list):
        img_batch.append(transformer.preprocess('data', caffe.io.load_image(path)))

        # dst = net.forward(end=layer_name)
        # features[cnt:cnt+1, :,:,:] = dst[layer_name]
        # print '%d images processed' % (cnt+1,)

        #print 'image shape: ', img.shape
        #print path, type(img), img.mean()
        if (len(img_batch) == batch_size) or cnt==features_shape[0]-1:
            net.blobs['data'].reshape(len(img_batch),3,112,112)
            #scores = net.predict(img_batch, oversample=False)
            net.blobs['data'].data[...] = img_batch
            dst = net.forward(end=layer_name)

            # syncs the memory between GPU and CPU
            blobs = OrderedDict( [(k, v.data) for k, v in net.blobs.items()])

            print '%d images processed' % (cnt+1,)

            #print blobs[layer_name][0,:,:,:]
            # items of blobs are references, must make copy!
            features[cnt-len(img_batch)+1:cnt+1, :,:,:] = dst[layer_name] 
            img_batch = []

        #features.append(blobs[layer_name][0,:,:,:].copy())
    features = np.asarray(features, dtype='float32')
    return features
def main():
    LSTM_DIR = '/scratch/ay937/caffe-recurrent/examples/s2vt'
    # LSTM_NET_FILE = '/home/sv/jeff-recurrent/caffe/examples/coco_caption/yts2se2e_combo_si_noembed.words_to_preds.deploy.prototxt'
    LSTM_NET_FILE = './s2vt.deploy.prototxt'
    # ITER = 5000 # 5k,8k seem good for onlycoco
    ITER = 10000  # 8k, 10k, 14k seem good for surfacelm
    # ITER = 8000 # 5k, 8k, 10k seem good for surface+cocolm
    # MODEL_FILE = 'models/s2s_30k_iter_30000.caffemodel'
    MODEL_FILE = sys.argv[1]
    #  's2s_vgg_surfacelm120k_fac2_iter_%d.caffemodel' % ITER
    #  'cocolm40k_cont_surfacelm120k_fac2_iter_%d.caffemodel' % ITER
    #  's2s_vgg_onlycocolm40k_fac2_iter_%d.caffemodel' % ITER
    TAG = 's2s_vgg_cocolm40k_surfacelm120k_predlr0.5'
    # TAG = 's2s_vgg_surfacelm120k'
    # TAG = 's2s_vgg_onlycocolm40k'
    NET_TAG = '%s_iter_%d' % (TAG, ITER)
    FRAMEFEAT_FILE_PATTERN = '/scratch/ay937/caffe-recurrent/examples/s2vt/youtube/splits/yt_allframes_vgg_fc7_{0}.txt'
    SENTS_FILE_PATTERN = '/scratch/ay937/caffe-recurrent/examples/s2vt/youtube/splits/sents_{0}_lc_nopunc.txt'

    if DEVICE_ID >= 0:
        caffe.set_mode_gpu()
        caffe.set_device(DEVICE_ID)
    else:
        caffe.set_mode_cpu()
    # caffe.set_phase_test() #check if this is correct
    # Set up the nets.
    # import ipdb; ipdb.set_trace()
    print "Setting up LSTM NET"
    lstm_net = caffe.Net(LSTM_NET_FILE, MODEL_FILE, caffe.TEST)
    print "Done"
    nets = [lstm_net]

    RESULTS_DIR = './results_lm'
    STRATEGIES = [
        {
            'type': 'beam',
            'beam_size': 1
        },
        # {'type': 'beam', 'beam_size': 3},
    ]
    NUM_OUT_PER_CHUNK = 30
    START_CHUNK = 0

    vocab_file = '%s/vocab/vocab.txt' % LSTM_DIR
    DATASETS = [  # split_name, data_split_name, aligned
        ('valid', 'val', False),
        #  ('test', 'test', False),
    ]
    for split_name, data_split_name, aligned in DATASETS:
        filenames = [(FRAMEFEAT_FILE_PATTERN.format(data_split_name),
                      SENTS_FILE_PATTERN.format(data_split_name))]
        fsg = fc7FrameSequenceGenerator(filenames,
                                        BUFFER_SIZE,
                                        vocab_file,
                                        max_words=MAX_WORDS,
                                        align=aligned,
                                        shuffle=False,
                                        pad=aligned,
                                        truncate=aligned)
        video_gt_pairs = all_video_gt_pairs(fsg)
        print 'Read %d videos pool feats' % len(fsg.vid_framefeats)
        NUM_CHUNKS = (len(fsg.vid_framefeats) /
                      NUM_OUT_PER_CHUNK) + 1  # num videos in batches of 30
        eos_string = '<EOS>'
        # add english inverted vocab
        vocab_list = [eos_string] + fsg.vocabulary_inverted
        offset = 0
        for c in range(START_CHUNK, NUM_CHUNKS):
            chunk_start = c * NUM_OUT_PER_CHUNK
            chunk_end = (c + 1) * NUM_OUT_PER_CHUNK
            # Can you PROCESS 1 video at a tim?
            chunk = video_gt_pairs.keys()[chunk_start:chunk_end]
            # chunk = image_gt_pairs[chunk_start:chunk_end]
            html_out_filename = '%s/%s.%s.%d_to_%d.html' % \
                (RESULTS_DIR, data_split_name, NET_TAG, chunk_start, chunk_end)
            text_out_filename = '%s/%s.%s_' % \
                (RESULTS_DIR, data_split_name, NET_TAG)
            if os.path.exists(html_out_filename):
                print 'HTML output exists, skipping:', html_out_filename
                continue
            else:
                print 'HTML output will be written to:', html_out_filename
            outputs = run_pred_iters(lstm_net,
                                     chunk,
                                     video_gt_pairs,
                                     fsg,
                                     strategies=STRATEGIES,
                                     display_vocab=vocab_list)
            # html_out = to_html_output(outputs, vocab_list)
            # if not os.path.exists(RESULTS_DIR): os.makedirs(RESULTS_DIR)
            # html_out_file = open(html_out_filename, 'w')
            # html_out_file.write(html_out)
            # html_out_file.close()
            text_out_types = to_text_output(outputs, vocab_list)
            print text_out_types
            for strat_type in text_out_types:
                #text_out_fname = text_out_filename + strat_type + '.txt'
                text_out_fname = sys.argv[2]
                text_out_file = open(text_out_fname, 'a')
                text_out_file.write(''.join(text_out_types[strat_type]))
                text_out_file.close()
            offset += NUM_OUT_PER_CHUNK
            print 'Wrote HTML output to:', html_out_filename
Ejemplo n.º 40
0
def main(argv):
    pycaffe_dir = caffe_root + 'python/'

    parser = argparse.ArgumentParser()
    # Required arguments: input and output files.
    parser.add_argument("--input_file", help="Input image, directory, or npy.")
    parser.add_argument("--output_file", help="Output npy filename.")
    # Optional arguments.
    parser.add_argument("--model_def",
                        default=os.path.join(
                            pycaffe_dir,
                            "../examples/imagenet/imagenet_deploy.prototxt"),
                        help="Model definition file.")
    parser.add_argument(
        "--pretrained_model",
        default=os.path.join(
            pycaffe_dir,
            "../examples/imagenet/caffe_reference_imagenet_model"),
        help="Trained model weights file.")
    parser.add_argument("--gpu",
                        action='store_true',
                        help="Switch for gpu computation.")
    parser.add_argument(
        "--center_only",
        action='store_true',
        help="Switch for prediction from center crop alone instead of " +
        "averaging predictions across crops (default).")
    parser.add_argument(
        "--images_dim",
        default='256,256',
        help="Canonical 'height,width' dimensions of input images.")
    parser.add_argument(
        "--mean_file",
        default=os.path.join(pycaffe_dir,
                             'caffe/imagenet/ilsvrc_2012_mean.npy'),
        help="Data set image mean of H x W x K dimensions (numpy array). " +
        "Set to '' for no mean subtraction.")
    parser.add_argument(
        "--input_scale",
        type=float,
        default=255,
        help="Multiply input features by this scale before input to net")
    parser.add_argument(
        "--channel_swap",
        default='2,1,0',
        help="Order to permute input channels. The default converts " +
        "RGB -> BGR since BGR is the Caffe default by way of OpenCV.")
    parser.add_argument(
        "--ext",
        default='jpg',
        help="Image file extension to take as input when a directory " +
        "is given as the input file.")
    args = parser.parse_args()

    image_dims = [int(s) for s in args.images_dim.split(',')]
    channel_swap = [int(s) for s in args.channel_swap.split(',')]

    mean = None
    if args.mean_file:
        mean = np.load(args.mean_file)
        # Resize mean (which requires H x W x K input in range [0,1]).
        in_shape = image_dims
        m_min, m_max = mean.min(), mean.max()
        normal_mean = (mean - m_min) / (m_max - m_min)
        mean = caffe.io.resize_image(normal_mean.transpose(
            (1, 2, 0)), in_shape).transpose(
                (2, 0, 1)) * (m_max - m_min) + m_min

    if args.gpu:
        caffe.set_mode_gpu()
        print("GPU mode")
    else:
        caffe.set_mode_cpu()
        print("CPU mode")

    # Make classifier.
    classifier = caffe.Classifier(args.model_def,
                                  args.pretrained_model,
                                  image_dims=image_dims,
                                  mean=mean,
                                  input_scale=1.0,
                                  raw_scale=255.0,
                                  channel_swap=channel_swap)

    # Load image file.
    args.input_file = os.path.expanduser(args.input_file)
    f = open(args.input_file)
    im_files_ = f.readlines()
    im_files = []
    for i in range(len(im_files_)):
        im_f = im_files_[i].split(' ')
        if len(im_f) == 1:
            im_f[0] = im_f[0][:-1]
        im_files.append(im_f[0])

    inputs = [caffe.io.load_image(im_f) for im_f in im_files]

    print "Classifying %d inputs." % len(inputs)

    # Classify.
    start = time.time()
    predictions = classifier.predict(inputs, not args.center_only)
    print "Done in %.2f s." % (time.time() - start)

    # Save
    np.save(args.output_file, predictions)
    print "Saved %s." % args.output_file
Ejemplo n.º 41
0
def exec_validation(device_id, mode, it='', visualize=False):

    caffe.set_device(device_id)
    caffe.set_mode_gpu()
    net = caffe.Net('./result/proto_test.prototxt',\
              './result/tmp.caffemodel',\
              caffe.TEST)

    dp = VQADataProvider(mode=mode, batchsize=64)
    total_questions = len(dp.getQuesIds())
    epoch = 0

    pred_list = []
    testloss_list = []
    stat_list = []

    while epoch == 0:
        t_word, t_cont, t_img_feature, t_answer, t_qid_list, t_iid_list, epoch = dp.get_batch_vec(
        )
        net.blobs['data'].data[...] = np.transpose(t_word, (1, 0))
        net.blobs['cont'].data[...] = np.transpose(t_cont, (1, 0))
        net.blobs['img_feature'].data[...] = t_img_feature
        net.blobs['label'].data[...] = t_answer
        net.forward()
        t_pred_list = net.blobs['prediction'].data.argmax(axis=1)
        t_pred_str = [
            dp.vec_to_answer(pred_symbol) for pred_symbol in t_pred_list
        ]
        testloss_list.append(net.blobs['loss'].data)
        for qid, iid, ans, pred in zip(t_qid_list, t_iid_list,
                                       t_answer.tolist(), t_pred_str):
            pred_list.append({
                u'answer': pred,
                u'question_id': int(dp.getStrippedQuesId(qid))
            })
            if visualize:
                q_list = dp.seq_to_list(dp.getQuesStr(qid))
                if mode == 'test-dev' or 'test':
                    ans_str = ''
                    ans_list = [''] * 10
                else:
                    ans_str = dp.vec_to_answer(ans)
                    ans_list = [
                        dp.getAnsObj(qid)[i]['answer'] for i in xrange(10)
                    ]
                stat_list.append({\
                                    'qid'   : qid,
                                    'q_list' : q_list,
                                    'iid'   : iid,
                                    'answer': ans_str,
                                    'ans_list': ans_list,
                                    'pred'  : pred })
        percent = 100 * float(len(pred_list)) / total_questions
        sys.stdout.write('\r' + ('%.2f' % percent) + '%')
        sys.stdout.flush()

    mean_testloss = np.array(testloss_list).mean()

    if mode == 'val':
        valFile = './result/val2015_resfile'
        with open(valFile, 'w') as f:
            json.dump(pred_list, f)
        if visualize:
            visualize_failures(stat_list, mode)
        annFile = config.DATA_PATHS['val']['ans_file']
        quesFile = config.DATA_PATHS['val']['ques_file']
        vqa = VQA(annFile, quesFile)
        vqaRes = vqa.loadRes(valFile, quesFile)
        vqaEval = VQAEval(vqa, vqaRes, n=2)
        vqaEval.evaluate()
        acc_overall = vqaEval.accuracy['overall']
        acc_perQuestionType = vqaEval.accuracy['perQuestionType']
        acc_perAnswerType = vqaEval.accuracy['perAnswerType']
        return mean_testloss, acc_overall, acc_perQuestionType, acc_perAnswerType
    elif mode == 'test-dev':
        filename = './result/vqa_OpenEnded_mscoco_test-dev2015_v3t' + str(
            it).zfill(8) + '_results'
        with open(filename + '.json', 'w') as f:
            json.dump(pred_list, f)
        if visualize:
            visualize_failures(stat_list, mode)
    elif mode == 'test':
        filename = './result/vqa_OpenEnded_mscoco_test2015_v3c' + str(
            it).zfill(8) + '_results'
        with open(filename + '.json', 'w') as f:
            json.dump(pred_list, f)
        if visualize:
            visualize_failures(stat_list, mode)
def test_network(model_file, weights_file, image_file) :
    caffe.set_mode_gpu()
    net = caffe.Net(model_file, caffe.TEST, weights=weights_file)
Ejemplo n.º 43
0
def main(argv, image_name):
    use_cpu = False
    gpu_dev = 0
    prototxt_path = 'models/trancos/hydra2/hydra_deploy.prototxt'
    caffemodel_path = 'models/trancos/hydra2/trancos_hydra2.caffemodel'

    try:
        opts, _ = getopt.getopt(
            argv, "h:",
            ["prototxt=", "caffemodel=", "cpu_only", "dev=", "cfg="])
    except getopt.GetoptError as err:
        print("Error while parsing parameters: ", err)
        return

    for opt, arg in opts:
        if opt in ("--prototxt"):
            prototxt_path = arg
        elif opt in ("--caffemodel"):
            caffemodel_path = arg
        elif opt in ("--cpu_only"):
            use_cpu = True
        elif opt in ("--dev"):
            gpu_dev = int(arg)
        elif opt in ("--cfg"):
            cfg_file = arg

    (dataset, use_mask, mask_file, test_names_file, im_folder, dot_ending, pw,
     sigmadots, n_scales, perspective_path, use_perspective, is_colored,
     results_file, resize_im) = init_parameters_from_config(cfg_file)

    if use_cpu:
        caffe.set_mode_cpu()
    else:
        # Use GPU
        caffe.set_device(gpu_dev)
        caffe.set_mode_gpu()

    # Init CNN
    CNN = CaffePredictor(prototxt_path, caffemodel_path, n_scales)

    print("\nStart prediction for " + image_name)

    im_path = utl.extendName(image_name, im_folder)
    im = load_image(im_path, color=is_colored)

    if resize_im > 0:
        im = utl.resizeMaxSize(im, resize_im)

    mask = None
    if use_mask:
        mask_im_path = utl.extendName(image_name,
                                      im_folder,
                                      use_ending=True,
                                      pattern=mask_file)
        mask = sio.loadmat(mask_im_path,
                           chars_as_strings=1,
                           matlab_compatible=1)
        mask = mask.get('BW')

    s = time.time()
    npred, resImg = count_objects(CNN, im, pw, mask)
    print("image : %s, npred = %.2f , time =%.2f sec" %
          (image_name, npred, time.time() - s))

    return npred
def main():
    """Main function"""
    description = ('Test Fast-RCNN style datalayer')
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument("dataset", help="ImageDataset JSON file")
    parser.add_argument("-n",
                        "--net_file",
                        required=True,
                        help="Net (prototxt) file")
    parser.add_argument("-g", "--gpu", type=int, default=0, help="Gpu Id.")
    parser.add_argument("-e",
                        "--epochs",
                        type=int,
                        default=2,
                        help="Number of epochs")
    parser.add_argument(
        "-p",
        "--pause",
        default=0,
        type=int,
        help="Set number of milliseconds to pause. Use 0 to pause indefinitely"
    )
    args = parser.parse_args()

    # init caffe
    caffe.set_device(args.gpu)
    caffe.set_mode_gpu()

    assert osp.exists(args.net_file), 'Net file "{}" do not exist'.format(
        args.net_file)
    net = caffe.Net(args.net_file, caffe.TEST)

    print 'Loading dataset from {}'.format(args.dataset)
    dataset = ImageDataset.from_json(args.dataset)
    print 'Loaded {} dataset with {} annotations'.format(
        dataset.name(), dataset.num_of_images())

    net.layers[0].add_dataset(dataset)
    net.layers[0].print_params()
    net.layers[0].generate_datum_ids()

    required_object_info_fields = net.layers[0].required_object_info_fields
    print(
        "required_object_info_fields = {}".format(required_object_info_fields))

    # Make sure we remove bad objects like tha data layer does
    filter_dataset(dataset, required_object_info_fields)

    number_of_images = dataset.num_of_images()
    assert net.layers[0].number_of_datapoints() == number_of_images
    num_of_layer_objects = sum([
        len(img_info['object_infos'])
        for img_info in net.layers[0].data_samples
    ])
    num_of_dataset_objects = sum(
        [len(img_info['object_infos']) for img_info in dataset.image_infos()])
    assert num_of_layer_objects == num_of_dataset_objects, "{} != {}".format(
        num_of_layer_objects, num_of_dataset_objects)

    cv2.namedWindow('blob_image', cv2.WINDOW_AUTOSIZE)
    cv2.namedWindow('original_image', cv2.WINDOW_AUTOSIZE)

    image_blob_shape = net.blobs['input_image'].data.shape
    assert len(image_blob_shape) == 4, 'Expects 4D data blob'
    assert image_blob_shape[
        1] == 3, 'Expects 2nd channel to be 3 for BGR image'
    batch_size = image_blob_shape[0]
    num_of_batches = int(np.ceil(dataset.num_of_images() / float(batch_size)))

    exit_loop = False
    for epoch_id in xrange(args.epochs):
        print "-----------------------Epoch # {} / {} -----------------------------".format(
            epoch_id, args.epochs)
        for b in trange(num_of_batches):
            start_idx = batch_size * b
            end_idx = min(batch_size * (b + 1), number_of_images)
            # print 'Working on batch: {}/{} (Images# {} - {}) of epoch {}'.format(b, num_of_batches, start_idx, end_idx, epoch_id)

            # Run forward pass
            _ = net.forward()

            # Get image_scales and image_flippings
            image_scales = net.blobs['image_scales'].data
            image_flippings = net.blobs['image_flippings'].data.astype(np.bool)
            assert image_scales.shape == image_flippings.shape == (
                batch_size, )

            # Get roi_blob and from that determine number_of_rois
            roi_blob = net.blobs['roi'].data
            assert roi_blob.ndim == 2 and roi_blob.shape[1] == 5

            number_of_rois = roi_blob.shape[0]
            for roi_id in xrange(number_of_rois):
                roi_batch_index = roi_blob[roi_id, 0]
                assert 0 <= roi_batch_index <= batch_size
                assert_bbx(roi_blob[roi_id, -4:])

            # Check the bbx blobs
            for bbx_blob_name in ['gt_bbx_amodal', 'gt_bbx_crop']:
                if bbx_blob_name in net.blobs:
                    bbx_blob = net.blobs[bbx_blob_name].data
                    assert bbx_blob.shape == (number_of_rois, 4)
                    for roi_id in xrange(number_of_rois):
                        assert_bbx(bbx_blob[roi_id, :])

            # Check the center proj blobs
            center_proj_blob = net.blobs['gt_center_proj'].data
            assert center_proj_blob.shape == (number_of_rois, 2)

            # Check vp blobs
            vp_blob = net.blobs['gt_viewpoint'].data
            assert vp_blob.shape == (number_of_rois,
                                     3), "Weird vp shape = {}".format(vp_blob)
            assert (vp_blob >= -np.pi).all() and (
                vp_blob < np.pi).all(), "Bad vp = \n{}".format(vp_blob)

            for i in xrange(start_idx, end_idx):
                original_image = cv2.imread(
                    osp.join(dataset.rootdir(),
                             dataset.image_infos()[i]['image_file']))
                cv2.imshow('original_image', original_image)

                image_blob = net.blobs['input_image'].data[i - start_idx]
                image_blob_bgr8 = net.layers[0].make_bgr8_from_blob(
                    image_blob).copy()

                for roi_id in xrange(roi_blob.shape[0]):
                    roi_batch_index = roi_blob[roi_id, 0]
                    if roi_batch_index == (i - start_idx):
                        bbx_roi = roi_blob[roi_id, -4:].astype(np.float32)
                        cv2.rectangle(image_blob_bgr8, tuple(bbx_roi[:2]),
                                      tuple(bbx_roi[2:]), (0, 255, 0), 1)

                cv2.imshow('blob_image', image_blob_bgr8)
                cv2.displayOverlay(
                    'blob_image',
                    'Flipped' if image_flippings[i -
                                                 start_idx] else 'Original')

                key = cv2.waitKey(args.pause)
                if key == 27:
                    cv2.destroyAllWindows()
                    exit_loop = True
                    break
                elif key == ord('p'):
                    args.pause = not args.pause

            if exit_loop is True:
                print 'User presessed ESC. Exiting epoch {}'.format(epoch_id)
                exit_loop = False
                break
        print "-----------------------End of epoch -----------------------------"

        # No check the data_layer.data_samples
        print "Verifying data_samples ...",
        for im_info_layer, im_info_dataset in zip(net.layers[0].data_samples,
                                                  dataset.image_infos()):
            for im_field in ['image_size', 'image_intrinsic']:
                if im_field in im_info_dataset:
                    assert np.all(
                        im_info_layer[im_field] == im_info_dataset[im_field])

            layer_obj_infos = im_info_layer['object_infos']
            dataset_obj_infos = im_info_dataset['object_infos']

            assert len(layer_obj_infos) == len(
                dataset_obj_infos), "{} != {}".format(len(layer_obj_infos),
                                                      len(dataset_obj_infos))
            for obj_info_layer, obj_info_dataset in zip(
                    layer_obj_infos, dataset_obj_infos):
                assert obj_info_layer['id'] == obj_info_dataset['id']
                assert obj_info_layer['category'] == obj_info_dataset[
                    'category']
                for obj_field in required_object_info_fields:
                    assert np.all(obj_info_layer[obj_field] == np.array(obj_info_dataset[obj_field])), \
                        "For obj_field '{}': {} vs {}".format(obj_field, obj_info_layer[obj_field], obj_info_dataset[obj_field])
        print "Done."
Ejemplo n.º 45
0
#coding=utf-8

import numpy as np
import sys, os
import cv2

import time

caffe_root = '/home/gjw/caffe-ssd-mobile/'
sys.path.insert(0, caffe_root + 'python')

import caffe
caffe.set_mode_gpu()  ### 设置GPU模式

CLASSES = ('background', 'car', 'cyclist', 'pedestrain')

# 全局变量
colours = np.random.rand(32, 3) * 255


class MobileNet_SSD:
    # 构造函数
    def __init__(self, net_file, caffe_model):
        self.net = caffe.Net(net_file, caffe_model, caffe.TEST)

    # 图像归一化
    def preprocess(self, src):
        img = cv2.resize(src, (300, 300))
        return (img - 127.5) * 0.007843

    def detect(self, frame):
Ejemplo n.º 46
0
def test():
    caffe.set_mode_gpu()
    caffe.set_device(0)
    #caffe.set_mode_cpu();

    info = os.listdir(r'VIDEO_test_img');

    model = r'EVD-Net.caffemodel'

    net = caffe.Net('test.prototxt', model, caffe.TEST);

    imagesnum=0;
    for line in info:
        reg = re.compile(r'(.*?).jpg');
        all = reg.findall(line)
        if (all != []):
            imagename = str(all[0]);
            line=imagename
            reg = re.compile(r'ILSVRC2015_train_00124006_([0-9]{6})_1_3');
            all = reg.findall(line)
            labelnum = int(all[0]);
            if (os.path.isfile(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum-1).zfill(6)) == False or
                os.path.isfile(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum-2).zfill(6)) == False or
                os.path.isfile(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum+1).zfill(6)) == False or
                os.path.isfile(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum+2).zfill(6)) == False):
                continue;
            else:
                imagesnum = imagesnum + 1;

                npstore_1 = caffe.io.load_image(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum-2).zfill(6))
                npstore_2 = caffe.io.load_image(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum-1).zfill(6))
                npstore = caffe.io.load_image(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum).zfill(6))
                npstore_3 = caffe.io.load_image(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum+1).zfill(6))
                npstore_4 = caffe.io.load_image(r'VIDEO_test_img\ILSVRC2015_train_00124006_%s_1_3.jpg' % str(labelnum+2).zfill(6))



                batchdata = []
                data = npstore_1
                data = data.transpose((2, 0, 1))
                batchdata.append(data)
                net.blobs['img_1'].data[...] = batchdata;
                batchdata = []
                data = npstore_2
                data = data.transpose((2, 0, 1))
                batchdata.append(data)
                net.blobs['img_2'].data[...] = batchdata;
                batchdata = []
                data = npstore
                data = data.transpose((2, 0, 1))
                batchdata.append(data)
                net.blobs['img'].data[...] = batchdata;
                batchdata = []
                data = npstore_3
                data = data.transpose((2, 0, 1))
                batchdata.append(data)
                net.blobs['img_3'].data[...] = batchdata;
                batchdata = []
                data = npstore_4
                data = data.transpose((2, 0, 1))
                batchdata.append(data)
                net.blobs['img_4'].data[...] = batchdata;
                net.forward()
                data = net.blobs['sum'].data[0];
                data = data.transpose((1, 2, 0));
                data = data[:, :, ::-1]


                savepath = 'result\\' + imagename + '_EVD-Net.jpg'
                cv2.imwrite(savepath, data * 255.0,[cv2.IMWRITE_JPEG_QUALITY, 100])

                print imagename

        print 'image numbers:',imagesnum;
Ejemplo n.º 47
0
def run_test_save_result():
    caffe.set_mode_gpu()
    caffe.set_device(0)
    m = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5', 'r', 'core')

    net = caffe.Net('models/sg_vrd/vgg16/faster_rcnn_end2end/test.prototxt',
                    'output/faster_rcnn_end2end/sg_vrd_2016_train/vgg16_faster_rcnn_finetune_iter_80000.caffemodel',
                    caffe.TEST)
    # net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]
    net.name = 'sgvrd'
    imdb = get_imdb('sg_vrd_2016_test')
    imdb.competition_mode(0)
    if not cfg.TEST.HAS_RPN:
        imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD)
    h5path = 'output/sg_vrd_2016_test_more.hdf5'
    #h5path = 'output/' + imdb.name + '.hdf5'

    # if os.path.exists(h5path):
    # os.remove(h5path)
    h5f = h5py.File(h5path)
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    root = 'data/sg_vrd_2016/Data/sg_test_images/'
    _t = {'im_detect': Timer(), 'misc': Timer()}
    cnt = 0
    thresh = .15
    for path, subdirs, files in os.walk(root):
        for name in files:
            cnt += 1
            im_idx = name.split('.')[0]
            fpath = os.path.join(path, name)
            im = cv2.imread(fpath)
            if im == None:
                print fpath
            box_proposals = None
            _t['im_detect'].tic()
            score_raw, scores, fc7, boxes = im_detect(net, im, box_proposals)
            _t['im_detect'].toc()
            # scores = score_raw
            res_locations = []
            res_visuals = []
            res_classemes = []
            res_cls_confs = []
            boxes_tosort = []
            _t['misc'].tic()
            for j in xrange(1, 101):
                inds = np.where(scores[:, j] > 0.01)[0]
                cls_scores = scores[inds, j]
                cls_boxes = boxes[inds, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)

                if len(cls_scores) <= 0:
                    boxes_tosort.append(cls_dets)
                    continue

                res_loc = cls_boxes
                res_vis = fc7[inds]
                res_classeme = scores[inds]
                res_cls_conf = np.column_stack((np.zeros(cls_scores.shape[0]) + j, cls_scores))

                keep = nms(cls_dets, .2, force_cpu=True)  # nms threshold
                cls_dets = cls_dets[keep, :]

                res_loc = res_loc[keep]
                res_vis = res_vis[keep]
                res_classeme = res_classeme[keep]
                res_cls_conf = res_cls_conf[keep]

                res_classemes.extend(res_classeme)
                res_visuals.extend(res_vis)
                res_locations.extend(res_loc)
                res_cls_confs.extend(res_cls_conf)

                boxes_tosort.append(cls_dets)
            # filter based on confidence
            inds = np.where(np.array(res_cls_confs)[:, 1] > thresh)[0]

            res_classemes = np.array(res_classemes)[inds]
            res_visuals = np.array(res_visuals)[inds]
            res_locations = np.array(res_locations)[inds]
            res_cls_confs = np.array(res_cls_confs)[inds]

            h5f.create_dataset(im_idx + '/classemes', dtype='float16', data=res_classemes.astype(np.float16))
            h5f.create_dataset(im_idx + '/visuals', dtype='float16', data=res_visuals.astype(np.float16))
            h5f.create_dataset(im_idx + '/locations', dtype='short', data=res_locations.astype(np.short))
            h5f.create_dataset(im_idx + '/cls_confs', dtype='float16', data=res_cls_confs.astype(np.float16))
            # filter end
            '''
            image_scores = np.hstack(boxes_tosort[j][:, -1] for j in xrange(30))
            #print len(image_scores)
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(30):
                    keep = np.where(boxes_tosort[j][:, -1] >= image_thresh)[0]
                    boxes_tosort[j] = boxes_tosort[j][keep, :]
            '''
            for j in xrange(len(boxes_tosort)):
                cls_dets = boxes_tosort[j]
                for di in xrange(cls_dets.shape[0]):
                    #    print 'here'
                    di = cls_dets[di]
                    score = di[-1]
                    cls_idx = j + 1
                    cls_name = str(m['meta/cls/idx2name/' + str(cls_idx)][...])
                    if score > 1:
                        score = 1
                    x, y = int(di[0]), int(di[1])
                    if x < 10:
                        x = 15
                    if y < 10:
                        y = 15
                    # cv2.putText(im, cls_name, (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 1)
                    # cv2.rectangle(im, (di[0], di[1]), (di[2], di[3]), (255, 0, 0), 2)
                    # print '%s %d %f %d %d %d %f\n' % (im_idx, j + 1, score, di[0], di[1], di[2], di[3])
            # cv2.imshow('im', im)
            # cv2.imwrite(str(cnt) + '.jpg', im)
            # if cv2.waitKey(0) & 0xFF == 27:
                # exit(0)

            _t['misc'].toc()

            print 'im_detect: {:d} {:.3f}s {:.3f}s' \
                .format(cnt, _t['im_detect'].average_time,
                        _t['misc'].average_time)
Ejemplo n.º 48
0
def convertToFeatureVecs(inputPath, inputfile, outputFile):
    count = 0
    with open(inputfile, 'r') as reader:
        for path in reader:
            count += 1
    print('Preparing to read {} images'.format(count))

    caffe.set_device(0)
    caffe.set_mode_gpu()
    # Loading the Caffe model, setting preprocessing parameters
    net = caffe.Classifier(model_prototxt,
                           model_trained,
                           mean=np.load(mean_path).mean(1).mean(1),
                           channel_swap=(2, 1, 0),
                           raw_scale=255,
                           image_dims=(448, 448))

    # Loading class labels
    with open(imagenet_labels) as f:
        labels = f.readlines()

    print('Results edited in: {}'.format(outputFile))

    errorMessages = []

    countDone = 0
    count
    dataMap = shelve.open(outputFile,
                          flag='w',
                          protocol=pickle.HIGHEST_PROTOCOL)
    print(len(dataMap))
    dataMap.close()
    exit()
    print('Extracting from layer: {}'.format(layer_name))
    with open(inputfile, 'r') as reader:
        for image_path in reader:
            image_path = image_path.strip()

            img_id = getImageID(image_path)
            if (str(img_id) in dataMap):
                if countDone % 10 == 0:
                    print('Contains {}, count {}'.format(img_id, countDone))
                countDone += 1
                continue

            input_image = caffe.io.load_image(inputPath + image_path)
            prediction = net.predict([input_image], oversample=False)
            msg = ('{} : {} ( {} )'.format(
                os.path.basename(image_path),
                labels[prediction[0].argmax()].strip(),
                prediction[0][prediction[0].argmax()]))

            count = count + 1

            try:

                # filename, array data to be saved, format, delimiter
                featureData = net.blobs[layer_name].data[0]
                dataMap[str(img_id)] = featureData

                msg2 = ('\nImages processed: {}\n'.format(count))
            except ValueError:
                print('Error reading image_path')
                errorMessages.append(image_path)

            if count % 200 == 0:
                print(featureData.shape)
                print(msg)
                print(msg2)
            if count % 1000 == 0:
                print('Doing a data sync...')
                dataMap.sync()
                print('Data sync done.')
    dataMap.close()

    print('Completed processing {} images'.format(count))
    print('Error messages: {}'.format(errorMessages))
Ejemplo n.º 49
0
def run_test_visualize():
    caffe.set_mode_gpu()
    caffe.set_device(0)
    m = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5', 'r', 'core')
    net = caffe.Net('models/sg_vrd/vgg16/faster_rcnn_end2end/test.prototxt',
                    'output/faster_rcnn_end2end/sg_vrd_2016_train/vgg16_faster_rcnn_finetune_iter_60000.caffemodel',
                    caffe.TEST)
    # net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]
    net.name = 'sgvrd'
    imdb = get_imdb('sg_vrd_2016_test')
    imdb.competition_mode(0)
    if not cfg.TEST.HAS_RPN:
        imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD)

    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    root = 'data/sg_vrd_2016/Data/sg_test_images/'
    _t = {'im_detect': Timer(), 'misc': Timer()}
    cnt = 0
    thresh = .05
    for path, subdirs, files in os.walk(root):
        for name in files:
            cnt += 1
            im_idx = name.split('.')[0]
            fpath = os.path.join(path, name)
            im = cv2.imread(fpath)
            if im == None:
                print fpath
            box_proposals = None
            _t['im_detect'].tic()
            score_raw, scores, fc7, boxes = im_detect(net, im, box_proposals)
            _t['im_detect'].toc()

            # scores = score_raw

            # scores=np.apply_along_axis(softmax,1,scores)
            # scores[:,16]+=icr
            boxes_tosort = []
            for j in xrange(1, 101):
                inds = np.where(scores[:, j] > thresh)[0]
                cls_scores = scores[inds, j]
                cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
                # cls_boxes = boxes[inds]
                cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                keep = nms(cls_dets, .2, force_cpu=True)  # nms threshold
                # keep = nms_fast(cls_dets,.3)
                cls_dets = cls_dets[keep, :]
                boxes_tosort.append(cls_dets)
            for j in xrange(len(boxes_tosort)):
                cls_dets = boxes_tosort[j]
                for di in xrange(cls_dets.shape[0]):
                    #    print 'here'
                    di = cls_dets[di]
                    score = di[-1]
                    cls_idx = j + 1
                    cls_name = str(m['meta/cls/idx2name/' + str(cls_idx)][...])
                    if score > 1:
                        score = 1
                    if score < 0.2:
                        continue
                    x, y = int(di[0]), int(di[1])
                    if x < 10:
                        x = 15
                    if y < 10:
                        y = 15
                    cv2.putText(im, cls_name, (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 1)
                    cv2.rectangle(im, (di[0], di[1]), (di[2], di[3]), (255, 0, 0), 2)
                    print '%s %d %f %d %d %d %f\n' % (im_idx, j + 1, score, di[0], di[1], di[2], di[3])
            cv2.imshow('im', im)
            cv2.imwrite(str(cnt) + '.jpg', im)
            if cv2.waitKey(0) & 0xFF == 27:
                exit(0)
Ejemplo n.º 50
0
def iou(model, weights, input_source, input_source_label):
    iou_list = []
    timer = []
    for i in range(NUM_CLASSES):
        iou_list.append([])
    caffe.set_mode_gpu()
    # Load Caffe network
    net = caffe.Net(model, weights, caffe.TEST)

    # Access blob data
    input_shape = net.blobs['data'].data.shape
    confidence_output = net.blobs['prob'].data
    cap = cv2.VideoCapture(input_source)
    cap_label = cv2.VideoCapture(input_source_label)

    rval = True
    while rval:
        # Get image from VideoCapture
        rval, frame = cap.read()
        rval_lab, frame_lab = cap_label.read()
        if not rval:
            print("No image found!")
            break

        # Resize input image
        resized_image = crop_input(frame, (input_shape[3], input_shape[2]))
        cropped = numpy.int32(resized_image)
        # Subtract per-channel mean
        B_mean = 129
        G_mean = 126
        R_mean = 126
        cropped[:, :, 0] -= R_mean
        cropped[:, :, 1] -= G_mean
        cropped[:, :, 2] -= B_mean
        # Input shape is (y, x, 3), needs to be reshaped to (3, y, x)
        input_image = cropped.transpose((2, 0, 1))
        # Repeat image according to batch size for inference.
        MCDO_samples = input_shape[0]
        input_image = numpy.repeat(input_image[numpy.newaxis, :, :, :],
                                   MCDO_samples,
                                   axis=0)
        # Inference using Bayesian SegNet
        start = time.time()
        out = net.forward_all(data=input_image)
        end = time.time()
        timer.append(end - start)

        # By Alex Kendall
        mean_confidence = numpy.mean(confidence_output,
                                     axis=0,
                                     dtype=numpy.float64)
        var_confidence = numpy.var(confidence_output,
                                   axis=0,
                                   dtype=numpy.float64)
        # Prepare segmented image results
        classes = numpy.argmax(mean_confidence, axis=0)

        # Calculae IOU CLASS 1
        frame_lab = frame_lab[:, :, 0]

        for i in range(NUM_CLASSES):
            boolean_frame = numpy.int32((frame_lab == i))
            boolean_classes = numpy.int32((classes == i))
            union = boolean_frame | boolean_classes
            intersection = boolean_frame & boolean_classes
            iou = numpy.sum(intersection.flatten()) / numpy.sum(
                union.flatten())
            print(round(iou, 3))
            iou_list[i].append(iou)

    miou = []
    for i in range(NUM_CLASSES):
        miou.append(numpy.round(numpy.mean(iou_list[i]), 3))

    cap.release()
    cv2.destroyAllWindows()
    print("NAZDAR")
    mtimer = numpy.mean(timer)
    return miou, mtimer
Ejemplo n.º 51
0
def extractor(video_dir, detection_file, output_file, gpu_id, model,
              model_weights):
    # deploy pre-trained caffe model
    caffe.set_device(gpu_id)
    caffe.set_mode_gpu()
    net = caffe.Net(model, model_weights, caffe.TEST)
    '''
	try:
		os.makedirs(output_dir)
	except OSError as exception:
		if exception.errno == errno.EEXIST and os.path.isdir(output_dir):
			pass
		else:
			raise ValueError('Failed to created output directory %s' % output_dir)
	'''
    #img_dir = os.path.join(video_dir, 'img1')
    img_dir = video_dir
    print img_dir
    img_filenames = {
        int(os.path.splitext(f)[0]): os.path.join(img_dir, f)
        for f in os.listdir(img_dir)
    }
    #det_file = os.path.join(detection_dir, 'dets0704.txt')
    dets_in = np.loadtxt(detection_file, delimiter=',').astype(np.float)
    dets_out = []

    frame_indices = dets_in[:, 0].astype(np.int)
    min_frame_idx = frame_indices.min()
    max_frame_idx = frame_indices.max()

    print 'min_idx: %d, max_idx: %d' % (min_frame_idx, max_frame_idx)

    for idx in range(min_frame_idx, max_frame_idx + 1):
        mask = (frame_indices == idx)
        sub_dets = dets_in[mask]

        if idx not in img_filenames:
            print('WARNING could not find image for frame %d' % idx)
            continue

        batch_patch = np.zeros((len(sub_dets), 3, 144, 56), dtype=np.float32)
        bgr_img = cv2.imread(img_filenames[idx])
        for i in range(len(sub_dets)):
            # NOTE: its img[y: y + h, x: x + w] and *not* img[x: x + w, y: y + h]
            patch = bgr_img.copy(
            )[abs(int(sub_dets[i][3])):abs(int(sub_dets[i][3] +
                                               sub_dets[i][5])) - 1,
              abs(int(sub_dets[i][2])):abs(int(sub_dets[i][2] +
                                               sub_dets[i][4])) - 1]
            patch = cv2.resize(patch, (56, 144))
            patch = np.transpose(patch, (2, 0, 1))
            norm_patch = np.array(patch, dtype=np.float32)
            norm_patch[0, :] = norm_patch[0, :] - 102.0
            norm_patch[1, :] = norm_patch[1, :] - 102.0
            norm_patch[2, :] = norm_patch[2, :] - 101.0
            batch_patch[i, :, :, :] = norm_patch

        net.blobs['data'].reshape(*(batch_patch.shape))
        net.blobs['data'].data[:, :, :, :] = batch_patch
        output = net.forward()
        try:
            feature = net.blobs['fc7_bn'].data[:]
        except:
            feature = net.blobs['fc7'].data[:]

        #merge dets and features
        dets_out += [np.r_[(d, f)] for d, f in zip(sub_dets, feature)]
        if idx % 100 == 0:
            print 'processed {}'.format(idx)

    #out_path = os.path.join(output_dir, 'demo0707.npy')
    np.save(output_file, np.asarray(dets_out), allow_pickle=False)
Ejemplo n.º 52
0
def run_test_object_detection_eval():
    caffe.set_mode_gpu()
    caffe.set_device(0)
    h5f = h5py.File('output/vr_object_detections.hdf5')
    m = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5', 'r', 'core')
    net = caffe.Net('models/sg_vrd/vgg16/faster_rcnn_end2end/test.prototxt',
                    'output/faster_rcnn_end2end/sg_vrd_2016_train/vgg16_faster_rcnn_finetune_iter_60000.caffemodel',
                    caffe.TEST)
    # net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]
    net.name = 'sgvrd'
    imdb = get_imdb('sg_vrd_2016_test')
    imdb.competition_mode(0)
    if not cfg.TEST.HAS_RPN:
        imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD)

    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    root = 'data/sg_vrd_2016/Data/sg_test_images/'
    _t = {'im_detect': Timer(), 'misc': Timer()}
    cnt = 0
    thresh = .05
    img_set_file = 'data/sg_vrd_2016/ImageSets/test.txt'
    imlist = {line.strip().split(' ')[1]:line.strip().split(' ')[0] for line in open(img_set_file)}
    for imid in imlist.keys():
        im_path = root  + imlist[imid] + '.jpg'
        cnt += 1
        im = cv2.imread(im_path)
        if im == None:
            print im_path
        box_proposals = None
        _t['im_detect'].tic()
        score_raw, scores, fc7, boxes = im_detect(net, im, box_proposals)
        _t['im_detect'].toc()

        # scores = score_raw
        _t['misc'].tic()
        h5f.create_dataset(imid + '/scores',dtype='float16', data=scores.astype(np.float16))
        h5f.create_dataset(imid + '/boxes',dtype='short', data=boxes.astype(np.short))
        # scores=np.apply_along_axis(softmax,1,scores)
        # scores[:,16]+=icr
        # boxes_tosort = []
        # for j in xrange(1, 101):
            # inds = np.where(scores[:, j] > 0.01)[0]
            # cls_scores = scores[inds, j]
            # cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            # # cls_boxes = boxes[inds]
            # cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                # .astype(np.float32, copy=False)
            # keep = nms(cls_dets, .2, force_cpu=True)  # nms threshold
            # # keep = nms_fast(cls_dets,.3)
            # cls_dets = cls_dets[keep, :]
            # boxes_tosort.append(cls_dets)
        # for j in xrange(len(boxes_tosort)):
            # cls_dets = boxes_tosort[j]
            # for di in xrange(cls_dets.shape[0]):
                # #    print 'here'
                # di = cls_dets[di]
                # score = di[-1]
                # cls_idx = j + 1
                # cls_name = str(m['meta/cls/idx2name/' + str(cls_idx)][...])
                # if score > 1:
                    # score = 1
                # if score < thresh:
                    # continue
                # x, y = int(di[0]), int(di[1])
                # if x < 10:
                    # x = 15
                # if y < 10:
                    # y = 15
                # res_line = '%s %d %f %d %d %d %d'%(imid,cls_idx,score,di[0],di[1],di[2],di[3])
                # output.write(res_line+'\n')

        _t['misc'].toc()

        print 'im_detect: {:d} {:.3f}s {:.3f}s' \
            .format(cnt, _t['im_detect'].average_time,
                    _t['misc'].average_time)
Ejemplo n.º 53
0
parser1 = make_parser()
args = parser1.parse_args()
net_file= args.ssd_model_def
caffe_model= args.ssd_model_weights
ccpd_file= args.recog_model_def
ccpd_model= args.recog_model_weights
test_dir = "../images"

if not os.path.exists(caffe_model):
    print(caffe_model + " does not exist")
    exit()
if not os.path.exists(net_file):
    print(net_file + " does not exist")
    exit()
caffe.set_mode_gpu();
caffe.set_device(0);
net = caffe.Net(net_file,caffe_model,caffe.TEST)  
ccpd_net = caffe.Net(ccpd_file,ccpd_model,caffe.TEST)

inputShape = net.blobs['data'].data.shape
det_inputSize = (inputShape[3], inputShape[2])

inputShape = ccpd_net.blobs['data'].data.shape
rec_inputSize = (inputShape[3], inputShape[2])

CLASSES = ('background',
           'liceneseplate')


def max_(m,n):
Ejemplo n.º 54
0
def run_test_save_pool5():
    caffe.set_mode_gpu()
    caffe.set_device(0)
    m = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5', 'r', 'core')

    net = caffe.Net('models/sg_vrd/vgg16/faster_rcnn_end2end/test.prototxt',
                    'output/faster_rcnn_end2end/sg_vrd_2016_train/vgg16_faster_rcnn_finetune_iter_40000.caffemodel',
                    caffe.TEST)
    # net.name = os.path.splitext(os.path.basename(args.caffemodel))[0]
    net.name = 'sgvrd'
    imdb = get_imdb('sg_vrd_2016_test')
    imdb.competition_mode(0)
    if not cfg.TEST.HAS_RPN:
        imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD)
    h5path = 'output/' + imdb.name + '_pool5.hdf5'

    # if os.path.exists(h5path):
    # os.remove(h5path)
    h5f = h5py.File(h5path)
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    root = 'data/sg_vrd_2016/Data/sg_test_images/'
    _t = {'im_detect': Timer(), 'misc': Timer()}
    cnt = 0
    thresh = .01
    for path, subdirs, files in os.walk(root):
        for name in files:
            cnt += 1
            if cnt %100==0:
                print cnt
            im_idx = name.split('.')[0]
            fpath = os.path.join(path, name)
            im = cv2.imread(fpath)
            if im == None:
                print fpath
            if im_idx + '/classemes' in h5f:
                continue
            box_proposals = None
            _t['im_detect'].tic()
            score_raw, scores, fc7, boxes = im_detect(net, im, box_proposals)
            _t['im_detect'].toc()
            rpn_rois = net.blobs['rois'].data
            pool5 = net.blobs['pool5'].data
            # scores = score_raw
            res_rpn_rois = []
            res_pool5s = []
            res_locations = []
            res_visuals = []
            res_classemes = []
            res_cls_confs = []
            boxes_tosort = []
            _t['misc'].tic()
            for j in xrange(1, 101):
                inds = np.where(scores[:, j] > thresh)[0]
                cls_scores = scores[inds, j]
                cls_boxes = boxes[inds, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)

                if len(cls_scores) <= 0:
                    boxes_tosort.append(cls_dets)
                    continue

                res_loc = cls_boxes
                res_vis = fc7[inds]
                res_classeme = scores[inds]
                res_cls_conf = np.column_stack((np.zeros(cls_scores.shape[0]) + j, cls_scores))
                res_pool5 = pool5[inds]
                res_rpn_roi = rpn_rois[inds]

                keep = nms(cls_dets, .2, force_cpu=True)  # nms threshold
                cls_dets = cls_dets[keep, :]

                res_loc = res_loc[keep]
                res_vis = res_vis[keep]
                res_classeme = res_classeme[keep]
                res_cls_conf = res_cls_conf[keep]
                res_pool5 = res_pool5[keep]
                res_rpn_roi = res_rpn_roi[keep]

                res_classemes.extend(res_classeme)
                res_visuals.extend(res_vis)
                res_locations.extend(res_loc)
                res_cls_confs.extend(res_cls_conf)
                res_pool5s.extend(res_pool5)
                res_rpn_rois.extend(res_rpn_roi)

                boxes_tosort.append(cls_dets)
            # filter based on confidence
            inds = np.where(np.array(res_cls_confs)[:, 1] > 0.2)[0]

            res_classemes = np.array(res_classemes)[inds]
            res_visuals = np.array(res_visuals)[inds]
            res_locations = np.array(res_locations)[inds]
            res_cls_confs = np.array(res_cls_confs)[inds]

            res_pool5s = np.array(res_pool5s)[inds]
            res_rpn_rois = np.array(res_rpn_rois)[inds]

            h5f.create_dataset(im_idx + '/classemes', dtype='float16', data=res_classemes.astype(np.float16))
            h5f.create_dataset(im_idx + '/visuals', dtype='float16', data=res_visuals.astype(np.float16))
            h5f.create_dataset(im_idx + '/locations', dtype='short', data=res_locations.astype(np.short))
            h5f.create_dataset(im_idx + '/cls_confs', dtype='float16', data=res_cls_confs.astype(np.float16))
            h5f.create_dataset(im_idx + '/rpn_rois', dtype='float16', data=res_rpn_rois.astype(np.float16))
            h5f.create_dataset(im_idx + '/pool5s', dtype='float16', data=res_pool5s.astype(np.float16))
            _t['misc'].toc()
            cnt += 1
            print 'im_detect: {:d} {:.3f}s {:.3f}s' \
                .format(cnt, _t['im_detect'].average_time,
                        _t['misc'].average_time)
Ejemplo n.º 55
0
def main():
'''
    5-fold cross validation
'''
    root = '../data/faces'
    network_file = './resnext_deploy.prototxt'
    pretrained_model = ['../models/1/hinge_R3CNN.caffemodel', '../models/2/hinge_R3CNN.caffemodel', \
        '../models/3/hinge_R3CNN.caffemodel', '../models/4/hinge_R3CNN.caffemodel', '../models/5/hinge_R3CNN.caffemodel']
    # pretrained_model = ['../models/1/lsep_R3CNN.caffemodel', '../models/2/lsep_R3CNN.caffemodel', 
    #     '../models/3/lsep_R3CNN.caffemodel', '../models/4/lsep_R3CNN.caffemodel', '../models/5/lsep_R3CNN.caffemodel']     
    
    mean_file = ["../data/1/256_train_mean.binaryproto", "../data/2/256_train_mean.binaryproto", \
        "../data/3/256_train_mean.binaryproto", "../data/4/256_train_mean.binaryproto", "../data/5/256_train_mean.binaryproto"]
        
    test_file = ['../data/1/test_1.txt', '../data/2/test_2.txt', '../data/3/test_3.txt', \
            '../data/4/test_4.txt', '../data/5/test_5.txt']

    for i in range(5):
        print('start testing------')

        # get mean file
        batch_shape = (1, 3, 224, 224)
        means = get_mean_npy(mean_file[i], crop_size = batch_shape[2:])

        # set mode
        caffe.set_mode_gpu()

        # set caffe model
        null_fds = os.open(os.devnull, os.O_RDWR)
        out_orig = os.dup(2)
        os.dup2(null_fds, 2)
        net = caffe.Net(network_file, pretrained_model[i], caffe.TEST)
        os.dup2(out_orig, 2)
        os.close(null_fds)

        # open test file
        with open(test_file[i], 'r') as f:
            lines = f.readlines()

        label_list = []
        prec_list = []

        for line in lines:
            linesplit = line.split(' ')
            label = float(linesplit[1].split("\r")[0])
            img = os.path.join(root, linesplit[0])
            img_data = load_img(img, resize = (256, 256), isColor = True, crop_size = 224, crop_type = 'center_crop',
                     raw_scale = 255, means = means)

            net.blobs['data'].data[...] = img_data
            out = net.forward()
            prec = net.blobs['feat1'].data[...][0][0]
            label_list.append(label)
            prec_list.append(prec)

        label_list = np.array(label_list)
        prec_list = np.array(prec_list)
        correlation = np.corrcoef(label_list, prec_list)[0][1]
        mae = np.mean(np.abs(label_list - prec_list))
        rmse = np.sqrt(np.mean(np.square(label_list - prec_list)))

        print('Model: {name}\t'
            'Correlation: {correlation:.4f}\t'
            'Mae: {mae:.4f}\t'
            'Rmse: {rmse:.4f}\t'.format(name=pretrained_model[i], correlation=float(correlation), mae=float(mae), rmse=float(rmse)))
Ejemplo n.º 56
0
def main(input, output, model_path, model_name, octaves, octave_scale,
         iterations, jitter, stepsize, blend, layers, guide_image, start_frame,
         end_frame, verbose):

    make_sure_path_exists(input)
    make_sure_path_exists(output)

    # let max nr of frames
    nrframes = len([
        name for name in os.listdir(input)
        if os.path.isfile(os.path.join(input, name))
    ])
    if nrframes == 0:
        print("no frames to process found")
        sys.exit(0)

    if octaves is None: octaves = 5
    if octave_scale is None: octave_scale = 1.4
    if iterations is None: iterations = 4
    if jitter is None: jitter = 32
    if stepsize is None: stepsize = 1.5
    if blend is None: blend = 0.5  #can be nr (constant), random, or loop
    if verbose is None: verbose = 1
    if layers is None:
        layers = 'inception_5a/pool_proj'  #['inception_4c/output']
    if start_frame is None:
        frame_i = 1
    else:
        frame_i = int(start_frame)
    if not end_frame is None:
        nrframes = int(end_frame) + 1
    else:
        nrframes = nrframes + 1

    # If your GPU supports CUDA and Caffe was built with CUDA support,
    # uncomment the following to run Caffe operations on the GPU.
    caffe.set_mode_gpu()
    # caffe.set_device(0) # select GPU device if multiple devices exist

    # Loading DNN Model
    net_fn = model_path + 'deploy.prototxt'
    param_fn = model_path + model_name

    # Patching model to be able to compute gradients.
    # Note that you can also manually add "force_backward: true" line to "deploy.prototxt".
    model = caffe.io.caffe_pb2.NetParameter()
    text_format.Merge(open(net_fn).read(), model)
    model.force_backward = True
    open('tmp.prototxt', 'w').write(str(model))

    net = caffe.Classifier(
        'tmp.prototxt',
        param_fn,
        mean=np.float32([104.0, 116.0,
                         122.0]),  # ImageNet mean, training set dependent
        channel_swap=(
            2, 1,
            0))  # the reference model has channels in BGR order instead of RGB

    if verbose == 3:
        from IPython.display import clear_output, Image, display
        print("display turned on")
    img = np.float32(PIL.Image.open(input + '/%08d.png' % (frame_i)))
    h, w, c = img.shape

    #Choosing between normal dreaming, and guided dreaming
    if guide_image is None:
        hallu = deepdream(net,
                          img,
                          iter_n=iterations,
                          step_size=stepsize,
                          octave_n=octaves,
                          octave_scale=octave_scale,
                          jitter=jitter,
                          end=layers)
    else:
        guide = np.float32(PIL.Image.open(guide_image))
        print('Setting up Guide with selected image')
        guide_features = prepare_guide(net,
                                       PIL.Image.open(guide_image),
                                       end=layers)
        hallu = deepdream_guided(net,
                                 img,
                                 iter_n=iterations,
                                 step_size=stepsize,
                                 octave_n=octaves,
                                 octave_scale=octave_scale,
                                 jitter=jitter,
                                 end=layers,
                                 objective_fn=objective_guide,
                                 guide_features=guide_features)

    np.clip(hallu, 0, 255, out=hallu)
    PIL.Image.fromarray(np.uint8(hallu)).save(output + '/%08d.png' % (frame_i))
    grayImg = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    blend_forward = True
    blend_at = 0
    blend_step = 0.02
    for i in xrange(frame_i, nrframes):
        previousImg = img
        previousGrayImg = grayImg
        img = np.float32(PIL.Image.open(input + '/%08d.png' % (i + 1)))
        grayImg = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        flow = cv2.calcOpticalFlowFarneback(previousGrayImg,
                                            grayImg,
                                            None,
                                            pyr_scale=0.5,
                                            levels=3,
                                            winsize=15,
                                            iterations=3,
                                            poly_n=5,
                                            poly_sigma=1.2,
                                            flags=0)
        flow = -flow
        flow[:, :, 0] += np.arange(w)
        flow[:, :, 1] += np.arange(h)[:, np.newaxis]
        halludiff = hallu - previousImg
        halludiff = cv2.remap(halludiff, flow, None, cv2.INTER_LINEAR)
        hallu = img + halludiff
        if guide_image is None:
            hallu = deepdream(net,
                              hallu,
                              iter_n=iterations,
                              step_size=stepsize,
                              octave_n=octaves,
                              octave_scale=octave_scale,
                              jitter=jitter,
                              end=layers)
        else:
            guide = np.float32(PIL.Image.open(guide_image))
            print('Setting up Guide with selected image')
            guide_features = prepare_guide(net,
                                           PIL.Image.open(guide_image),
                                           end=layers)
            hallu = deepdream_guided(net,
                                     hallu,
                                     iter_n=iterations,
                                     step_size=stepsize,
                                     octave_n=octaves,
                                     octave_scale=octave_scale,
                                     jitter=jitter,
                                     end=layers,
                                     objective_fn=objective_guide,
                                     guide_features=guide_features)
        np.clip(hallu, 0, 255, out=hallu)
        PIL.Image.fromarray(np.uint8(hallu)).save(output + '/%08d.png' %
                                                  (i + 1))

        # if blend_at > 1 - blend_step: blend_forward = False
        # elif blend_at <= 0.5: blend_forward = True
        # if blend_forward: blend_at += blend_step
        # else: blend_at -= blend_step
        # blendval = blend_at
        blendval = 0.5
    img = morphPicture(input + '/%08d.png' % (i + 1),
                       output + '/%08d.png' % (i), blendval)
Ejemplo n.º 57
0
def main(args):
	caffe.set_mode_gpu()
	caffe.set_device(0)
	solver1 = caffe.SGDSolver(args.dss_proto)
	if 'vgg' in args.dss_weights:
		interp_layers = [k for k in solver1.net.params.keys() if 'up' in k]
		interp_surgery(solver1.net, interp_layers)
	solver1.net.copy_from(args.dss_weights)
	print('loaded solver1')

	inputsize = 500
	gt_ = np.zeros(shape=(1,1,inputsize,inputsize))
	img_ = np.zeros(shape=(1,4,inputsize,inputsize))

	matfile1 = sio.loadmat(args.datalist1)['trainImgSet']
	datalist1 = [matfile1[i][0][0] for i in range(matfile1.shape[0])]
	matfile2 = sio.loadmat(args.datalist2)['trainImgSet']
	datalist2 = [matfile2[i][0][0] for i in range(matfile2.shape[0])]
	matfile3 = sio.loadmat(args.datalist3)['trainImgSet']
	datalist3 = [matfile3[i][0][0] for i in range(matfile3.shape[0])]

	valinput = './dataset/'+args.valdata+'/imgs/'
	valgt = './dataset/'+args.valdata+'/gt/' 
	valmatfile = './dataset/'+args.valdata+'/valImgSet.mat'
	valmatfile = sio.loadmat(valmatfile)['valImgSet'] 
	vallist = [valmatfile[i][0][0] for i in range(valmatfile.shape[0])] 

	logfile = args.logfile
	if logfile == '': logfile = args.prefix+'.log'
	if os.path.isfile(logfile): os.system('rm '+logfile)

	learn_data3_prob = 0.3
	learn_data2_prob = 0.4
	# learn_data2_prob = 0 
	# learn_data3_prob = 0
	loss_arch = []
	loss_bigarch = []
	start_t = time.time()
	it = args.start_snapshot 
	while it < args.max_iter:

		'''if it < args.max_iter/3 and it+1 >= args.max_iter/3:
			learn_data3_prob = 0.6
			learn_data2_prob = 0.3
		if it < args.max_iter*2/3 and it+1 >= args.max_iter*2/3:
			learn_data3_prob = 0.4
			learn_data2_prob = 0.4'''

		tmpinput = args.inputdir1
		tmpgt = args.gtdir1
		tmplist = datalist1
		tmpext = '.jpg'
		r = random.uniform(0.,1.)
		if r < learn_data3_prob:
			tmpinput = args.inputdir3
			tmpgt = args.gtdir3
			tmplist = datalist3
			tmpext = '.png'
		elif r < learn_data2_prob + learn_data3_prob:
			tmpinput = args.inputdir2 
			tmpgt = args.gtdir2 
			tmplist = datalist2 

		i = it%len(tmplist) 
		gt = Image.open(tmpgt + tmplist[i][:-4] + '.png')
		img = Image.open(tmpinput + tmplist[i][:-4] + tmpext)
		if random.random() > args.flip_prob:
			gt = gt.transpose(Image.FLIP_LEFT_RIGHT)
			img = img.transpose(Image.FLIP_LEFT_RIGHT)
		imgw, imgh = img.size

		gt = preprocess_gt(gt)
		img = vgg_preprocess(img)
		
		img_[:,:3,:,:] = img 
		img_[:,3,:,:] = prior 
		solver1.net.clear_param_diffs()
		solver1.net.blobs['R1'].data[...] = img_
		solver1.net.forward()
		sm = solver1.net.blobs['fc8_saliency_reg'].data.copy()
		gt_t = torch.tensor(gt_, dtype=torch.float32, device=cuda0, requires_grad=False)
		sm_t = torch.tensor(sm, dtype=torch.float32, device=cuda0, requires_grad=True)
		weight_t = torch.tensor(weight_, dtype=torch.float32, device=cuda0, requires_grad=False)
		loss = F.binary_cross_entropy(sm_t, gt_t, weight=weight_t, size_average=False)
		if len(loss_arch) < args.display_every:
			loss_arch.append(float(loss))
		else:
			loss_arch[it % args.display_every] = float(loss)
		if len(loss_bigarch) < args.snapshot_every:
			loss_bigarch.append(float(loss)) 
		else:
			loss_bigarch[it % args.snapshot_every] = float(loss)

		# sigmoid_fuse = solver1.net.blobs['sigmoid-fuse'].data.copy()

		solver1.net.backward()
		solver1.apply_update()
		solver1.increment_iter()

		if it % args.display_every == 0:
			meanloss = sum(loss_arch) * 1.0 / len(loss_arch)
			print >> sys.stderr, "[%s] Iteration %d: %.2f seconds loss:%.4f" % (
				time.strftime("%c"), it, time.time() - start_t, meanloss)

		if it % args.snapshot_every == 0:
			trainloss = sum(loss_bigarch) * 1.0 / len(loss_bigarch) 
			vallosses = []
			tmpdir = 'tmp/'
			if os.path.isdir(tmpdir):
				os.system('rm '+tmpdir+'*')
			else:
				os.makedirs(tmpdir) 
			for j in range(len(vallist)):
				gt = Image.open(valgt+vallist[j][:-4]+'.png') 
				img = Image.open(valinput+vallist[j][:-4]+'.jpg') 
				gt = preprocess_gt(gt) 
				img = vgg_preprocess(img)
				solver1.net.clear_param_diffs()
				solver1.net.blobs['data'].reshape(*img.shape)
				solver1.net.blobs['data'].data[...] = img
				solver1.net.blobs['label'].reshape(*gt.shape)
				solver1.net.blobs['label'].data[...] = gt
				solver1.net.forward()
				loss = solver1.net.blobs['loss-fuse'].data.copy()
				vallosses.append(float(loss))
				sigmoid_fuse = solver1.net.blobs['sigmoid-fuse'].data.copy()
				pred = Image.fromarray(np.squeeze(np.rint(sigmoid_fuse*255.0).astype(np.uint8)))
				pred.save(tmpdir+vallist[j][:-4]+'.png')
			valloss = sum(vallosses) * 1.0 / len(vallosses)
			import matlab.engine
			eng  = matlab.engine.start_matlab()
			eng.addpath('/research/adv_saliency/evaluation')
			mae,p,r,fm = eng.callEvalFunc(tmpdir, valgt, nargout=4)
			with open(logfile,'a') as f:
				f.write('iter:%d trainloss:%.4f valloss:%.4f mae:%.4f p:%.4f r:%.4f f:%.4f\n'%(
					it,trainloss,valloss,mae,p,r,fm))

			curr_snapshot_folder = args.snapshot_folder +'/' + str(it)
			print >> sys.stderr, '\n === Saving snapshot to ' + curr_snapshot_folder + ' ===\n'
			solver1.snapshot()
		it = it + 1
Ejemplo n.º 58
0
    def __init__(self, settings, key_bindings):
        super(CaffeVisApp, self).__init__(settings, key_bindings)
        print 'Got settings', settings
        self.settings = settings
        self.bindings = key_bindings

        self._net_channel_swap = (2, 1, 0)
        self._net_channel_swap_inv = tuple([
            self._net_channel_swap.index(ii)
            for ii in range(len(self._net_channel_swap))
        ])
        self._range_scale = 1.0  # not needed; image already in [0,255]

        # Set the mode to CPU or GPU. Note: in the latest Caffe
        # versions, there is one Caffe object *per thread*, so the
        # mode must be set per thread! Here we set the mode for the
        # main thread; it is also separately set in CaffeProcThread.
        sys.path.insert(0, os.path.join(settings.caffevis_caffe_root,
                                        'python'))
        import caffe
        if settings.caffevis_mode_gpu:
            caffe.set_mode_gpu()
            print 'CaffeVisApp mode (in main thread):     GPU'
        else:
            caffe.set_mode_cpu()
            print 'CaffeVisApp mode (in main thread):     CPU'
        self.net = caffe.Classifier(
            settings.caffevis_deploy_prototxt,
            settings.caffevis_network_weights,
            mean=
            None,  # Set to None for now, assign later         # self._data_mean,
            channel_swap=self._net_channel_swap,
            raw_scale=self._range_scale,
        )

        if isinstance(settings.caffevis_data_mean, basestring):
            # If the mean is given as a filename, load the file
            try:
                self._data_mean = np.load(settings.caffevis_data_mean)
            except IOError:
                print '\n\nCound not load mean file:', settings.caffevis_data_mean
                print 'Ensure that the values in settings.py point to a valid model weights file, network'
                print 'definition prototxt, and mean. To fetch a default model and mean file, use:\n'
                print '$ cd models/caffenet-yos/'
                print '$ ./fetch.sh\n\n'
                raise
            input_shape = self.net.blobs[self.net.inputs[0]].data.shape[
                -2:]  # e.g. 227x227
            # Crop center region (e.g. 227x227) if mean is larger (e.g. 256x256)
            excess_h = self._data_mean.shape[1] - input_shape[0]
            excess_w = self._data_mean.shape[2] - input_shape[1]
            assert excess_h >= 0 and excess_w >= 0, 'mean should be at least as large as %s' % repr(
                input_shape)
            self._data_mean = self._data_mean[:,
                                              (excess_h / 2):(excess_h / 2 +
                                                              input_shape[0]),
                                              (excess_w / 2):(excess_w / 2 +
                                                              input_shape[1])]
        elif settings.caffevis_data_mean is None:
            self._data_mean = None
        else:
            # The mean has been given as a value or a tuple of values
            self._data_mean = np.array(settings.caffevis_data_mean)
            # Promote to shape C,1,1
            while len(self._data_mean.shape) < 1:
                self._data_mean = np.expand_dims(self._data_mean, -1)

            #if not isinstance(self._data_mean, tuple):
            #    # If given as int/float: promote to tuple
            #    self._data_mean = tuple(self._data_mean)
        if self._data_mean is not None:
            self.net.transformer.set_mean(self.net.inputs[0], self._data_mean)

        check_force_backward_true(settings.caffevis_deploy_prototxt)

        self.labels = None
        if self.settings.caffevis_labels:
            self.labels = read_label_file(self.settings.caffevis_labels)
        self.proc_thread = None
        self.jpgvis_thread = None
        self.handled_frames = 0
        if settings.caffevis_jpg_cache_size < 10 * 1024**2:
            raise Exception(
                'caffevis_jpg_cache_size must be at least 10MB for normal operation.'
            )
        self.img_cache = FIFOLimitedArrayCache(
            settings.caffevis_jpg_cache_size)

        self._populate_net_layer_info()
 def gpu(cls, id=0):
     """open GPU"""
     caffe.set_device(id)
     caffe.set_mode_gpu()
Ejemplo n.º 60
0
def build_tsv():
    # Set up the simulator
    sim = MatterSim.Simulator()
    sim.setCameraResolution(WIDTH, HEIGHT)
    sim.setCameraVFOV(math.radians(VFOV))
    sim.setDiscretizedViewingAngles(True)
    sim.init()

    # Set up Caffe resnet
    caffe.set_device(GPU_ID)
    caffe.set_mode_gpu()
    net = caffe.Net(PROTO, MODEL, caffe.TEST)
    net.blobs['data'].reshape(BATCH_SIZE, 3, HEIGHT, WIDTH)

    count = 0
    t_render = Timer()
    t_net = Timer()
    with open(OUTFILE, 'wb') as tsvfile:
        writer = csv.DictWriter(tsvfile,
                                delimiter='\t',
                                fieldnames=TSV_FIELDNAMES)

        # Loop all the viewpoints in the simulator
        viewpointIds = load_viewpointids()
        for scanId, viewpointId in viewpointIds:
            t_render.tic()
            # Loop all discretized views from this location
            blobs = []
            features = np.empty([VIEWPOINT_SIZE, FEATURE_SIZE],
                                dtype=np.float32)
            for ix in range(VIEWPOINT_SIZE):
                if ix == 0:
                    sim.newEpisode(scanId, viewpointId, 0, math.radians(-30))
                elif ix % 12 == 0:
                    sim.makeAction(0, 1.0, 1.0)
                else:
                    sim.makeAction(0, 1.0, 0)

                state = sim.getState()
                assert state.viewIndex == ix

                # Transform and save generated image
                blobs.append(transform_img(state.rgb))

            t_render.toc()
            t_net.tic()
            # Run as many forward passes as necessary
            assert VIEWPOINT_SIZE % BATCH_SIZE == 0
            forward_passes = VIEWPOINT_SIZE / BATCH_SIZE
            ix = 0
            for f in range(forward_passes):
                for n in range(BATCH_SIZE):
                    # Copy image blob to the net
                    net.blobs['data'].data[n, :, :, :] = blobs[ix]
                    ix += 1
                # Forward pass
                output = net.forward()
                features[f * BATCH_SIZE:(f + 1) *
                         BATCH_SIZE, :] = net.blobs['pool5'].data[:, :, 0, 0]

            writer.writerow({
                'scanId': scanId,
                'viewpointId': viewpointId,
                'image_w': WIDTH,
                'image_h': HEIGHT,
                'vfov': VFOV,
                'features': base64.b64encode(features)
            })
            count += 1
            t_net.toc()
            if count % 100 == 0:
                print('Processed %d / %d viewpoints, %.1fs avg render time, %.1fs avg net time, projected %.1f hours' %\
                  (count,len(viewpointIds), t_render.average_time, t_net.average_time,
                  (t_render.average_time+t_net.average_time)*len(viewpointIds)/3600))