def data_loader(dataset_file): db = leveldb.LevelDB(dataset_file) datum = caffe_pb2.Datum() img1s = [] img2s = [] labels = [] for key, value in db.RangeIter(): datum.ParseFromString(value) label = datum.label data = caffe.io.datum_to_array(datum) # split data from 6-channel image into 2 3-channel images img1 = data[:3, :, :] img2 = data[3:, :, :] labels.append(label) img1s.append(img1) img2s.append(img2) return labels, img1s, img2s
def write_lmdb(db_path, list_filename, height, width, count_start=0, encode=False): map_size = 2 * 50000 * 256 * 256 * 3 db = lmdb.open(db_path, map_size=map_size) writer = db.begin(write=True) datum = caffe_pb2.Datum() for count, line in enumerate(open(list_filename, 'r')): img_filename, label = line.strip().split(' ') datum.label = int(label) datum.channels = 3 datum.height = height datum.width = width if encode: datum.encoded = True img = cv2.resize(cv2.imread(img_filename, 1), (height, width)) _, img_jpg = cv2.imencode('.jpg', img) datum.data = img_jpg.tostring() else: datum.data = np.rollaxis(img, 2, 0).tostring() key = '%010d' % (count + count_start) writer.put(key, datum.SerializeToString(), append=True) print (key, label) writer.commit() db.close()
def load_lmdb_label(lmdb_path, label_cnt): """load the label vector Returns: label_vector """ print 'loading lmdb label vector ...' feature_lmdb_env = lmdb.open(lmdb_path) lmdb_txn = feature_lmdb_env.begin() lmdb_cursor = lmdb_txn.cursor() datum = caffe_pb2.Datum() label_vector = np.zeros((label_cnt), dtype=np.int32) for ix, (key, value) in enumerate(lmdb_cursor): if ix == label_cnt: break datum.ParseFromString(value) label_vector[ix] = datum.label data = caffe.io.datum_to_array(datum) data = np.squeeze(data)[:] if (ix + 1) % 2000 == 0: print 'label vector process %d' % (ix + 1) print 'finished loading lmdb label ...' return label_vector
def load_lmdb(lmdb_path, feature_cnt, feature_dim, pre_mode): """change the txt file to dump store Args: pre_mode : [0 no process] [1 : L2-normalize] [2 : binarize] Returns: data_feature_vector """ print 'loading lmdb ...' feature_lmdb_env = lmdb.open(lmdb_path) lmdb_txn = feature_lmdb_env.begin() lmdb_cursor = lmdb_txn.cursor() datum = caffe_pb2.Datum() num = feature_cnt data_feature_vector = np.zeros((num, feature_dim), dtype=np.float64) for ix, (key, value) in enumerate(lmdb_cursor): if ix == feature_cnt: break # datum.ParseFromString(key) # data = caffe.io.datum_to_array(datum) # image_name = data[0] datum.ParseFromString(value) data = caffe.io.datum_to_array(datum) data = np.squeeze(data)[:] data_feature_vector[ix, :] = data if pre_mode > 0: if pre_mode == 1: data_feature_vector[ix, :] = normalize_1d(data) elif pre_mode == 3: cache = sign_power(data) data_feature_vector[ix, :] = cache else: data_feature_vector[ix, :] = binarize(data, feature_dim) if (ix + 1) % 2000 == 0: print 'feature process %d' % (ix + 1) print 'finished loading lmdb ...' return data_feature_vector
def convertData2Lmdb(datapath): datadict = sio.loadmat(datapath) datadict.pop('__version__') datadict.pop('__header__') datadict.pop('__globals__') allplans = getPlan() for index, currplan in enumerate(allplans): map_size = currplan[0][0] * 20659 * 8 env = lmdb.open('gene_' + str(index) + '_lmdb', map_size=map_size) txn = env.begin(write=True) count = 0 for dictkeys in datadict: keywords = dictkeys.split('_') if keywords[1] not in currplan: continue if keywords[2] not in currplan[keywords[1]]: continue if currplan[keywords[1]][keywords[2]] == 0: continue currplan[keywords[1]][keywords[2]] -= 1 data = [[[]]] label = 0 if keywords[2] == 'F' else 1 for dictvals in datadict[dictkeys]: for dataelem in dictvals: #dataelem = (dataelem - datamini) / (datamaxi - datamini) * (usermaxi - usermini) + usermini data[0][0].append(float(dataelem)) dataarr = np.array(data) datum = caffe_pb2.Datum() datum = caffe.io.array_to_datum(dataarr, label) str_id = '{:08}'.format(count) txn.put(str_id, datum.SerializeToString()) count += 1 if count % 1000 == 0 or count == currplan[0][0]: print('train: already handled with {} samples'.format(count)) txn.commit() txn = env.begin(write=True) if count == currplan[0][0]: break txn.commit() env.close()
def write_lmdb(db_path, dataset_name, count_start=0, encode=False): dataset = unpickle(dataset_name) map_size = (count_start + len(dataset['labels'])) * 2 * (3 * 32 * 32 + 128) db = lmdb.open(db_path, map_size=map_size) writer = db.begin(write=True) datum = caffe_pb2.Datum() for count, (img, label) in enumerate(zip(dataset['data'], dataset['labels'])): datum.label = label datum.channels = 3 datum.height = 32 datum.width = 32 if encode: datum.encoded = True img_rgb = np.rollaxis(img.reshape(3, 32, 32), 0, 3) _, img_png = cv2.imencode('.jpg', img_rgb) datum.data = img_png.tostring() else: datum.data = img.tostring() key = '%010d' % (count + count_start) writer.put(key, datum.SerializeToString(), append=True) print(key, label) writer.commit() db.close()
def Read_lmdb(lmdb_path, opt, mode='RGB'): # lmdb_path, trina/val, RGB if opt == 'train': lmdb_file = lmdb_path + 'train_lmdb/' elif opt == 'val': lmdb_file = lmdb_path + 'val_lmdb/' lmdb_env = lmdb.open(lmdb_file) lmdb_txn = lmdb_env.begin() lmdb_cursor = lmdb_txn.cursor() datum = caffe_pb2.Datum() images = [] labels = [] for key, value in lmdb_cursor: datum.ParseFromString(value) label = datum.label data = caffe.io.datum_to_array(datum) im = data.astype(np.uint8) im = np.array(im) images.append(im) # add image to array labels.append(label) # add label to array return {'images': images, 'labels': labels}
def read_images_from_lmdb(db_name, visualize): env = lmdb.open(db_name) txn = env.begin() cursor = txn.cursor() X = [] y = [] idxs = [] for idx, (key, value) in enumerate(cursor): datum = caffe_pb2.Datum() datum.ParseFromString(value) X.append(np.array(datum_to_array(datum)).swapaxes(0, 2)) y.append(datum.label) idxs.append(idx) if visualize: print("Visualizing a few images...") for i in range(9): img = X[i]**(1 / 8) plt.subplot(3, 3, i + 1) plt.imshow(img) plt.title(y[i]) plt.axis('off') plt.show() print(" ".join(["Reading from", db_name, "done!"])) return X, y, idxs
def main(): """ 从头遍历所有图片 """ db = lmdb.open('train_lmdb') txn = db.begin() cursor = txn.cursor() datum = caffe_pb2.Datum() cnt = 0 for key, value in cursor: datum.ParseFromString(value) label = datum.label data = caffe.io.datum_to_array(datum) # CxHxW to HxWxC in cv2 image = np.transpose(data, (1, 2, 0)) if cnt == 2: cv2.imwrite('test.png', image) print('{},{}'.format(key, label)) break cnt += 1 print(cnt)
def get_classes(self): """ :return: the different classes available inside the LMDB file. """ lmdb_env = lmdb.open(self.lmdb_folder) lmdb_txn = lmdb_env.begin() lmdb_cursor = lmdb_txn.cursor() classes = {} datum = caffe_pb2.Datum() for key, value in lmdb_cursor: datum.ParseFromString(value) label = datum.label if label not in classes: classes[label] = 0 classes[label] += 1 lmdb_env.close() return classes
def save_to_lmdb(images, labels, lmdb_file): if not os.path.exists(lmdb_file): batch_size = 256 lmdb_env = lmdb.open(lmdb_file, map_size=int(1e12)) lmdb_txn = lmdb_env.begin(write=True) item_id = 0 datum = caffe_pb2.Datum() for i in range(images.shape[0]): im = cv2.imread(images[i]) if im is None: continue im = cv2.resize(im, (IM_HEIGHT, IM_WIDTH)) datum.channels = im.shape[2] datum.height = im.shape[0] datum.width = im.shape[1] datum.data = im.tobytes() datum.label = labels[i] keystr = '{:0>8d}'.format(item_id) lmdb_txn.put(keystr, datum.SerializeToString()) # write batch if (item_id + 1) % batch_size == 0: lmdb_txn.commit() lmdb_txn = lmdb_env.begin(write=True) print('converted {} images'.format(item_id + 1)) item_id += 1 # write last batch if (item_id + 1) % batch_size != 0: lmdb_txn.commit() print('converted {} images'.format(item_id + 1)) print('Generated ' + lmdb_file) else: print(lmdb_file + ' already exists')
def NegativeSampleMining(lmdbWriter, imgData, gtList, stride): width = gtList[0][4] height = gtList[0][5] datum = caffe_pb2.Datum() for row in range(0,imgData.size[1]-height-1,stride): for col in range(0, imgData.size[0]-width-1, stride): hit = False imgDisp = imgData.copy() draw = ImageDraw.Draw(imgDisp) b = (col, row, col + width, row + height, width, height) for gt in gtList: # print col, row, width, height, CalcIoU(a,b) # draw.rectangle([(col, row), (col + width, row + height)]) iou = CalcIoU(gt, b) plt.title(iou) if (iou > 0.2): hit = True break if hit == False: draw.rectangle([(col,row),(col+width,row+height)]) count[0] = count[0] + 1 patch = GetCropImage(imgData, 0, (col, row, col + width, row + height)) lmdbWriter.Put(np.array(patch), 0)
import sys caffe_root = '/usr/local/caffe/' sys.path.insert(0, caffe_root + 'python') import caffe import lmdb import numpy as np from caffe.proto import caffe_pb2 from util import yuv2rgb from PIL import Image y_lmdb_file = '/home/jiangliang/code/caffe_colorization/data/flowers/train_y.lmdb' uv_lmdb_file = '/home/jiangliang/code/caffe_colorization/data/flowers/train_uv.lmdb' y_lmdb = lmdb.open(y_lmdb_file, map_size=int(1e12)) uv_lmdb = lmdb.open(uv_lmdb_file, map_size=int(1e12)) y_txn = y_lmdb.begin() y_cursor = y_txn.cursor() y_datum = caffe_pb2.Datum() uv_txn = uv_lmdb.begin() uv_cursor = uv_txn.cursor() uv_datum = caffe_pb2.Datum() for key, value in y_cursor: print key y_datum.ParseFromString(value) data = caffe.io.datum_to_array(y_datum) for key_l, value_l in uv_cursor: uv_datum.ParseFromString(value_l) label = caffe.io.datum_to_array(uv_datum) yuv = np.zeros((3, 227, 227)) yuv[0, :, :] = data yuv[1:3, :, :] = label
def convertData2Lmdb(datapath): datadict = {} domnlist = [ x for x in os.listdir(datapath) if os.path.isdir(os.path.join(datapath, x)) ] domnlist.sort() for domnitem in domnlist: datadict[domnitem] = {} labllist = [ x for x in os.listdir(os.path.join(datapath, domnitem)) if os.path.isdir(os.path.join(datapath, domnitem, x)) ] labllist.sort() for c, lablitem in enumerate(labllist): datadict[domnitem][c] = [] lablpath = os.path.join(datapath, domnitem, lablitem) filelist = glob.iglob(os.path.join(lablpath, '*.jpg')) filenumb = 0 for f in filelist: img = cv2.imread(f) img = cv2.resize(img, (256, 256)) img = img.transpose((2, 0, 1)) datadict[domnitem][c].append(img) tranplan, testplan = getPlan() map_size = 4000 * 196608 * 8 env = lmdb.open('image_train_lmdb', map_size=map_size) txn = env.begin(write=True) count = 0 counter = {} checker = True while checker: checker = False for domnitem in tranplan: if domnitem not in counter: counter[domnitem] = {} for c in tranplan[domnitem]: if c not in counter[domnitem]: counter[domnitem][c] = 0 if counter[domnitem][c] < tranplan[domnitem][c]: index = counter[domnitem][c] % len(datadict[domnitem][c]) data = datadict[domnitem][c][index] datum = caffe_pb2.Datum() datum.channels = 3 datum.height = data.shape[1] datum.width = data.shape[2] datum.data = data.tostring() datum.label = c str_id = '{:08}'.format(count) txn.put(str_id, datum.SerializeToString()) count += 1 if count % 1000 == 0: print('train: already handled with {} samples'.format( count)) txn.commit() txn = env.begin(write=True) counter[domnitem][c] += 1 checker = True txn.commit() env.close() map_size = 2000 * 196608 * 8 env = lmdb.open('image_test_lmdb', map_size=map_size) txn = env.begin(write=True) count = 0 counter = {} checker = True while checker: checker = False for domnitem in testplan: if domnitem not in counter: counter[domnitem] = {} for c in testplan[domnitem]: if c not in counter[domnitem]: counter[domnitem][c] = 0 if counter[domnitem][c] < testplan[domnitem][c]: index = counter[domnitem][c] % len(datadict[domnitem][c]) data = datadict[domnitem][c][index] datum = caffe_pb2.Datum() datum.channels = 3 datum.height = data.shape[1] datum.width = data.shape[2] datum.data = data.tostring() datum.label = c str_id = '{:08}'.format(count) txn.put(str_id, datum.SerializeToString()) count += 1 if count % 1000 == 0: print('train: already handled with {} samples'.format( count)) txn.commit() txn = env.begin(write=True) counter[domnitem][c] += 1 checker = True txn.commit() env.close() tranplan, testplan = getPlan() map_size = 4000 * 128 * 8 env = lmdb.open('image_train_domain_lmdb', map_size=map_size) txn = env.begin(write=True) count = 0 counter = {} checker = True while checker: checker = False for d, domnitem in enumerate(tranplan): if domnitem not in counter: counter[domnitem] = {} for c in tranplan[domnitem]: if c not in counter[domnitem]: counter[domnitem][c] = 0 if counter[domnitem][c] < tranplan[domnitem][c]: #index = counter[domnitem][c] % len(datadict[domnitem][c]); data = np.array([[[0]]]) datum = caffe_pb2.Datum() datum.channels = 3 datum.height = data.shape[1] datum.width = data.shape[2] datum.data = data.tostring() datum.label = d str_id = '{:08}'.format(count) txn.put(str_id, datum.SerializeToString()) count += 1 if count % 1000 == 0: print('train: already handled with {} samples'.format( count)) txn.commit() txn = env.begin(write=True) counter[domnitem][c] += 1 checker = True txn.commit() env.close()
import lmdb import cv2 import numpy as np from caffe.proto import caffe_pb2 lmdb_train = '../data/mnist/mnist_train_lmdb' lmdb_test = '../data/mnist/mnist_test_lmdb' lmdb_train_new = '../data/mnist/mnist_500' # train_cnt_list = [50,50,50,2000,4000,10,100,1000,2000,4000] train_cnt_list = [50, 50, 50, 50, 50, 50, 50, 50, 50, 50] # read from the training lmdb lmdb_env_read = lmdb.open(lmdb_train) lmdb_txn_read = lmdb_env_read.begin() lmdb_cursor_read = lmdb_txn_read.cursor() datum_read = caffe_pb2.Datum() write_train_lmdb_env = lmdb.open(lmdb_train_new, map_size=int(1e8)) write_train_lmdb_txn = write_train_lmdb_env.begin(write=True) id_list = [-1] * 10 id_train = -1 id_test = -1 batch_size = 1000 for key, value in lmdb_cursor_read: datum_read.ParseFromString(value) label = datum_read.label data = caffe.io.datum_to_array(datum_read) id_list[label] += 1 if id_list[label] < train_cnt_list[label]: id_train += 1 keystr = '{:0>8d}'.format(id_train)
def test(): import time labels = load_label() deploy_prototxt = "./models/bvlc_googlenet-modified/deploy-color.prototxt" weight_file = "./models/bvlc_googlenet-modified/snapshot/googlenet__iter_55000.caffemodel" test_lmdb = lmdb.open("./create_lmdb/test_color_lmdb_200") test_lmdb_txn = test_lmdb.begin() test_lmdb_cursor = test_lmdb_txn.cursor() batch_size = 300 new_deploy_file = get_dynamic_batch_size(batch_size, deploy_prototxt) caffe.set_mode_gpu() caffe.set_device(0) net = caffe.Net(new_deploy_file, weight_file, caffe.TEST) datum = caffe_pb2.Datum() total, main_acc, sub_acc = 0, 0, 0 index = 0 key_name = [] ground_truth_label = np.zeros((batch_size, ), dtype=np.float32) start_time = time.time() for key, value in test_lmdb_cursor: datum.ParseFromString(value) label = datum.label key_name.append(key) ground_truth_label[index] = label img = caffe.io.datum_to_array(datum) img = np.transpose(img, (1, 2, 0)) img = cv2.resize(img, (224, 224)).astype(np.float32) img = np.transpose(img, (2, 0, 1)) #img = img[:,1:225, 1:225].astype(np.float32) mean = np.array([126.88187408, 137.67976379 , 162.782653810]) mean = mean[:, np.newaxis, np.newaxis] img -= mean net.blobs['data'].data[index] = img index = index + 1 total = total + 1 if index == batch_size: print total, time.time()-start_time out = net.forward() predict_labels = np.argmax(out['prob'], axis=1) for idx in range(batch_size): if ground_truth_label[idx] == predict_labels[idx]: sub_acc = sub_acc+1 main_acc = main_acc+1 else: label_name = labels[ground_truth_label[idx]] predict_label_name = labels[predict_labels[idx]] if label_name.split('_')[2] == predict_label_name.split('_')[2]: main_acc = main_acc+1 predict_log.info('{} {} {} {} {}'.format( key_name[idx], label_name, predict_label_name, label_name.split('_')[2], predict_label_name.split('_')[2] ) ) index = 0 key_name = [] if index!=0: out = net.forward() predict_labels = np.argmax(out['prob'], axis=1) for idx in range(index): if ground_truth_label[idx] == predict_labels[idx]: sub_acc = sub_acc+1 main_acc = main_acc+1 else: label_name = labels[ground_truth_label[idx]] predict_label_name = labels[predict_labels[idx]] if label_name.split('_')[2] == predict_label_name.split('_')[2]: main_acc = main_acc+1 predict_log.info('{} {} {} {} {}'.format( key_name[idx], label_name, predict_label_name, label_name.split('_')[2], predict_label_name.split('_')[2] ) ) predict_log.info('{} {} {}'.format(total, sub_acc*1.0/total, main_acc*1.0/total)) print 'finished', time.time()-start_time, batch_size
def convertData2Lmdb(datapath): datadict = {} domnlist = [ x for x in os.listdir(datapath) if os.path.isdir(os.path.join(datapath, x)) ] domnlist.sort() for domnitem in domnlist: labllist = [ x for x in os.listdir(os.path.join(datapath, domnitem)) if os.path.isdir(os.path.join(datapath, domnitem, x)) ] labllist.sort() for c, lablitem in enumerate(labllist): lablpath = os.path.join(datapath, domnitem, lablitem) filelist = glob.iglob(os.path.join(lablpath, '*.jpg')) filenumb = 0 for f in filelist: dictkeys = '{}_{}_{}'.format(filenumb, domnitem, c) img = cv2.imread(f) img = cv2.resize(img, (256, 256)) img = img.transpose((2, 0, 1)) datadict[dictkeys] = img filenumb = filenumb + 1 # infodict = getInfoFromDict(datadict) # plantupl = getPlanFromDict(infodict) tranplan, testplan = getPlan() map_size = tranplan[0][0] * 196608 * 8 env = lmdb.open('image_train_lmdb', map_size=map_size) txn = env.begin(write=True) count = 0 for dictkeys in datadict: keywords = dictkeys.split('_') if keywords[1] not in tranplan: continue if int(keywords[2]) not in tranplan[keywords[1]]: continue if tranplan[keywords[1]][int(keywords[2])] == 0: continue tranplan[keywords[1]][int(keywords[2])] -= 1 data = datadict[dictkeys] label = int(keywords[2]) ## data datum = caffe_pb2.Datum() datum.channels = 3 datum.height = data.shape[1] datum.width = data.shape[2] datum.data = data.tostring() datum.label = label str_id = '{:08}'.format(count) txn.put(str_id, datum.SerializeToString()) count += 1 if count % 1000 == 0 or count == tranplan[0][0]: print('train: already handled with {} samples'.format(count)) txn.commit() txn = env.begin(write=True) if count == tranplan[0][0]: break txn.commit() env.close() map_size = testplan[0][0] * 196608 * 8 env = lmdb.open('image_test_lmdb', map_size=map_size) txn = env.begin(write=True) count = 0 for dictkeys in datadict: keywords = dictkeys.split('_') if keywords[1] not in testplan: continue if int(keywords[2]) not in testplan[keywords[1]]: continue if testplan[keywords[1]][int(keywords[2])] == 0: continue testplan[keywords[1]][int(keywords[2])] -= 1 data = datadict[dictkeys] label = int(keywords[2]) ## data datum = caffe_pb2.Datum() datum.channels = 3 datum.height = data.shape[1] datum.width = data.shape[2] datum.data = data.tostring() datum.label = label str_id = '{:08}'.format(count) txn.put(str_id, datum.SerializeToString()) count += 1 if count % 1000 == 0 or count == testplan[0][0]: print('test: already handled with {} samples'.format(count)) txn.commit() txn = env.begin(write=True) if count == testplan[0][0]: break txn.commit() env.close() dom2numb = {} tranplan, testplan = getPlan() map_size = tranplan[0][0] * 128 * 8 env = lmdb.open('image_train_domain_lmdb', map_size=map_size) txn = env.begin(write=True) count = 0 for dictkeys in datadict: keywords = dictkeys.split('_') if keywords[1] not in tranplan: continue if int(keywords[2]) not in tranplan[keywords[1]]: continue if tranplan[keywords[1]][int(keywords[2])] == 0: continue if keywords[1] not in dom2numb: numb = len(dom2numb) dom2numb[keywords[1]] = numb tranplan[keywords[1]][int(keywords[2])] -= 1 data = [[[]]] label = -1 if dom2numb[keywords[1]] == source_1: label = 0 if dom2numb[keywords[1]] == source_2: label = 1 #for dictvals in datadict[dictkeys] : #for dataelem in dictvals : #dataelem = (dataelem - datamini) / (datamaxi - datamini) * (usermaxi - usermini) + usermini #data[0][0].append(float(dataelem)) data[0][0].append(float(dom2numb[keywords[1]])) dataarr = np.array(data) datum = caffe_pb2.Datum() datum = caffe.io.array_to_datum(dataarr, label) str_id = '{:08}'.format(count) txn.put(str_id, datum.SerializeToString()) count += 1 if count % 1000 == 0 or count == tranplan[0][0]: print('train: already handled with {} samples'.format(count)) txn.commit() txn = env.begin(write=True) if count == tranplan[0][0]: break txn.commit() env.close()
def main(argv): db_path_label = '' db_path_feats = '' mat_file = '' print argv try: opts, args = getopt.getopt(argv, "t:l:r:g:m", [ "test_label_db=", "test_feature_db=", "train_label_db=", "train_feature_db=", "mat_file=" ]) except getopt.GetoptError: print 'feature_LDB_to_mat.py -t <test_label_db> -l <test_feature_db> -r <train_label_db> -g <train_feature_db> -m <output_mat_file>' sys.exit(2) print opts print args for opt, arg in opts: if opt in ("-t", "--test_label_db"): db_path_test_label = arg elif opt in ("-l", "--test_feature_db"): db_path_test_feats = arg if opt in ("-r", "--train_label_db"): db_path_train_label = arg elif opt in ("-g", "--train_feature_db"): db_path_train_feats = arg elif opt in ("-m", "--mat_file"): mat_file = arg #print arg+" "+opt #print(db_path_label) #print(db_path_feats) #print(mat_file) if not os.path.exists(db_path_test_label): raise Exception('db test label not found') if not os.path.exists(db_path_test_feats): raise Exception('db test feature not found') if not os.path.exists(db_path_train_label): print 'db_path_rain_label is:' + db_path_train_label raise Exception('db train label not found') if not os.path.exists(db_path_train_feats): raise Exception('db train feature not found') db_test_label = leveldb.LevelDB(db_path_test_label) db_test_feats = leveldb.LevelDB(db_path_test_feats) db_train_label = leveldb.LevelDB(db_path_train_label) db_train_feats = leveldb.LevelDB(db_path_train_feats) #window_num =686 datum = caffe_pb2.Datum() datum_lb = caffe_pb2.Datum() start = time.time() #ft = np.zeros((window_num, float(81))) #ft = np.zeros((window_num, float(100352))) #lb = np.zeros((window_num, float(81))) window_num = 0 for key in db_test_feats.RangeIter(include_value=False): window_num = window_num + 1 n = 0 for key, value in db_test_feats.RangeIter(): n = n + 1 #f_size=len(value) datum.ParseFromString(value) f_size = len(datum.float_data) if n > 0: break n = 0 for key, value in db_test_label.RangeIter(): n = n + 1 #l_size=len(value) datum.ParseFromString(value) l_size = len(datum.float_data) if n == 1: break te_ft = np.zeros((window_num, float(f_size))) te_lb = np.zeros((window_num, float(l_size))) window_num = 0 for key in db_train_feats.RangeIter(include_value=False): window_num = window_num + 1 n = 0 for key, value in db_train_feats.RangeIter(): n = n + 1 #f_size=len(value) datum.ParseFromString(value) f_size = len(datum.float_data) if n > 0: break n = 0 for key, value in db_train_label.RangeIter(): n = n + 1 #l_size=len(value) datum.ParseFromString(value) l_size = len(datum.float_data) if n == 1: break tr_ft = np.zeros((window_num, float(f_size))) tr_lb = np.zeros((window_num, float(l_size))) # for im_idx in range(window_num): count = 0 for key in db_test_feats.RangeIter(include_value=False): datum.ParseFromString(db_test_feats.Get(key)) datum_lb.ParseFromString(db_test_label.Get(key)) te_ft[count, :] = datum.float_data te_lb[count, :] = datum_lb.float_data count = count + 1 print 'convert feature # : %d key is %s' % (count, key) count = 0 for key in db_train_feats.RangeIter(include_value=False): datum.ParseFromString(db_train_feats.Get(key)) datum_lb.ParseFromString(db_train_label.Get(key)) tr_ft[count, :] = datum.float_data tr_lb[count, :] = datum_lb.float_data count = count + 1 print 'time 1: %f' % (time.time() - start) prob = problem(tr_lb[1, :], tr_ft) m = train(prob, '-c 4') p_label, p_acc, p_val = predict(te_lb[:], te, m) print 'done!'
import lmdb import numpy as np from caffe.proto import caffe_pb2 npy_path ='/home/zhaoliu/car_brand/lmdb_data_new/train.npy' npy_path_label = '/home/zhaoliu/car_brand/datasets_new/tongji/train_tongji.npy' lmdb_path ="/mnt/disk/zhaoliu_data/small_car_lmdb/train_val_lmdb" txt = '/home/zhaoliu/car_brand/lmdb_data_new/weight_txt/weight.txt' lmdb_env=lmdb.open(lmdb_path) lmdb_txn=lmdb_env.begin() datum = caffe_pb2.Datum() keys_list = np.load(npy_path).tolist() label_list= np.load(npy_path_label).tolist() f = open(txt,'a') # i = 0 txt_list = [] print(len(keys_list)) print(len(label_list)) # for i in range(len(keys_list)):
def generate_arrays_from_file(params_transform, params_train): path = params_train['lmdb_path'] batch_size = params_train['batch_size'] lmdb_env = lmdb.open(path, readonly=True) with lmdb_env.begin() as lmdb_txn: lmdb_cursor = lmdb_txn.cursor() datum = caffe_pb2.Datum() cnt = 0 X = [] Y = [] GT = [] # print('================================ load a new batch \n') while (True): lmdb_cursor.first() for idx, (key, value) in enumerate(lmdb_cursor): datum.ParseFromString(value) label = datum.label data = caffe.io.datum_to_array(datum) cocoImg = COCOLmdb(data, params_transform) # cocoImg.set_meta_data() cocoImg.add_neck() # cocoImg.visualize() cocoImg.aug_scale() # cocoImg.visualize() cocoImg.aug_croppad() # cocoImg.visualize() cocoImg.aug_flip() # cocoImg.visualize() cocoImg.set_ground_truth() # cocoImg.visualize_pafs_single_figure() sample, label, gt = cocoImg.get_sample_label() # print(sample.shape,label.shape) # cocoImg.visualize_heat_maps() # cocoImg.visualize() # cocoImg.aug_scale() # cocoImg.aug_croppad() # img,anno,mask_miss,mask_all = cocoImg.get_meta_data() # print(type(img)) # img = np.rollaxis(img,0,3) # cocoImg.add_neck() # pprint.pprint(cocoImg.anno) # visualize_body_part(img,anno['joint_others']) # print(anno['joint_others'].shape) # cocoImg.aug_scale() # cocoImg.visualize() X.append(sample) Y.append(label) gt = np.zeros((1, )) GT.append(gt) cnt += 1 if cnt == batch_size: cnt = 0 X = np.array(X) Y = np.array(Y) # GT = np.array(GT) # GT = np.zeros((1,1,1,1)) # GTs = [GT for i in range(6)] GT = np.array(GT) GTs = [GT for i in range(6)] # print(X.shape,Y.shape) yield (dict(image=X, label=Y), GTs) X = [] Y = [] GT = []
test_labels= np.array(test_labels) print("\t\t"+ str(len(test_labels))+" successfully labels loaded.\n") ####################### ## Load Features ## ####################### #train print("Loading features from train_"+layer+"_"+net+ " database...") db1 = lmdb.open("train_"+layer+"_"+net) txn1 = db1.begin() kvpairs1 = list(txn1.cursor().iternext(keys=True, values=True)) blob1 = cpb.Datum() for key, value in kvpairs1: blob1.ParseFromString(value) feature_vector = np.array(blob1.float_data) train_features.append(feature_vector) train_features= np.array(train_features) #converting back to np array print("\t\tfeatures successfully saved!\n") #test print("Loading features from test_"+layer+"_"+net+ " database...") db2 = lmdb.open("test_"+layer+"_"+net) txn2 = db2.begin() kvpairs2 = list(txn2.cursor().iternext(keys=True, values=True)) blob2 = cpb.Datum()
def convertData2Lmdb(datapath): datadict = sio.loadmat(datapath) datadict.pop('__version__') datadict.pop('__header__') datadict.pop('__globals__') infodict = getInfoFromDict(datadict) plantupl = getPlanFromDict(infodict) #scaltupl = getScalFromPlan(datadict, plantupl) #seeScalFromTupl(scaltupl) #datamini = scaltupl[0][4] #datamaxi = scaltupl[1][4] #usermini = int(input('please input the minimal value for scaling: ')) #usermaxi = int(input('please input the maximal value for scaling: ')) tranplan, testplan = cpyPlanFromPlan(plantupl) map_size = tranplan[0][0] * 20659 * 8 env = lmdb.open('gene_train_lmdb', map_size=map_size) txn = env.begin(write=True) count = 0 for dictkeys in datadict: keywords = dictkeys.split('_') if keywords[1] not in tranplan: continue if keywords[2] not in tranplan[keywords[1]]: continue if tranplan[keywords[1]][keywords[2]] == 0: continue tranplan[keywords[1]][keywords[2]] -= 1 data = [[[]]] label = 0 if keywords[2] == 'F' else 1 for dictvals in datadict[dictkeys]: for dataelem in dictvals: #dataelem = (dataelem - datamini) / (datamaxi - datamini) * (usermaxi - usermini) + usermini data[0][0].append(float(dataelem)) dataarr = np.array(data) datum = caffe_pb2.Datum() datum = caffe.io.array_to_datum(dataarr, label) str_id = '{:08}'.format(count) txn.put(str_id, datum.SerializeToString()) count += 1 if count % 1000 == 0 or count == tranplan[0][0]: print('train: already handled with {} samples'.format(count)) txn.commit() txn = env.begin(write=True) if count == tranplan[0][0]: break txn.commit() env.close() map_size = testplan[0][0] * 20659 * 8 env = lmdb.open('gene_test_lmdb', map_size=map_size) txn = env.begin(write=True) count = 0 for dictkeys in datadict: keywords = dictkeys.split('_') if keywords[1] not in testplan: continue if keywords[2] not in testplan[keywords[1]]: continue if testplan[keywords[1]][keywords[2]] == 0: continue testplan[keywords[1]][keywords[2]] -= 1 data = [[[]]] label = 0 if keywords[2] == 'F' else 1 for dictvals in datadict[dictkeys]: for dataelem in dictvals: #dataelem = (dataelem - datamini) / (datamaxi - datamini) * (usermaxi - usermini) + usermini data[0][0].append(float(dataelem)) dataarr = np.array(data) datum = caffe_pb2.Datum() datum = caffe.io.array_to_datum(dataarr, label) str_id = '{:08}'.format(count) txn.put(str_id, datum.SerializeToString()) count += 1 if count % 1000 == 0 or count == testplan[0][0]: print('test: already handled with {} samples'.format(count)) txn.commit() txn = env.begin(write=True) if count == testplan[0][0]: break txn.commit() env.close()
def main(leveldb_dir, limit): datum = caffe_pb2.Datum() db = leveldb.LevelDB(leveldb_dir) for i in range(0, limit): datum.ParseFromString(db.Get(str(i))) print datum.float_data, datum.label
def find_image_dimension(leveldb): _, v = leveldb.iterator().next() datum = caffe_pb2.Datum() datum.ParseFromString(v) return datum.height, datum.width, datum.channels
def main(): train=sio.loadmat('data_svhn/train_32x32.mat') test=sio.loadmat('data_svhn/test_32x32.mat') train_data=train['X'] train_label=train['y'] test_data=test['X'] test_label=test['y'] train_data = np.swapaxes(train_data, 0, 3) train_data = np.swapaxes(train_data, 1, 2) train_data = np.swapaxes(train_data, 2, 3) test_data = np.swapaxes(test_data, 0, 3) test_data = np.swapaxes(test_data, 1, 2) test_data = np.swapaxes(test_data, 2, 3) N=train_label.shape[0] map_size=train_data.nbytes*10 env=lmdb.open('svhn_train_lmdb',map_size=map_size) txn=env.begin(write=True) #shuffle the training data r=list(range(N)) random.shuffle(r) count=0 for i in r: datum=caffe_pb2.Datum() label=int(train_label[i][0]) if label==10: label=0 datum=caffe.io.array_to_datum(train_data[i],label) str_id='{:08}'.format(count) txn.put(str_id,datum.SerializeToString()) count += 1 if count % 1000 == 0: print('already handled with {} pictures'.format(count)) txn.commit() txn = env.begin(write=True) txn.commit() env.close() map_size = test_data.nbytes * 10 env = lmdb.open('svhn_test_lmdb', map_size=map_size) txn = env.begin(write=True) count = 0 for i in range(test_label.shape[0]): datum = caffe_pb2.Datum() label = int(test_label[i][0]) if label == 10: label = 0 datum = caffe.io.array_to_datum(test_data[i], label) str_id = '{:08}'.format(count) txn.put(str_id, datum.SerializeToString()) count += 1 if count % 1000 == 0: print('already handled with {} pictures'.format(count)) txn.commit() txn = env.begin(write=True) txn.commit() env.close()
def make_datum(img, label): return caffe_pb2.Datum(channels=3, width=SUB_W, height=SUB_H, label=label, data=np.rollaxis(img, 2).tostring())
mkdir_if_missing(lmdb_file) batch_size = 200 #lmdb对于数据进行的是先缓存后一次性写入从而提高效率,因此定义一个batch_size控制每次写入的量 channel = 6 resize_height = 160 resize_width = 72 # get file_name from txt #txt_file = '/home/jiening/dgd_person_reid/external/exp/db/SYSU_3/val.txt' txt_file = '/home/jiening/dgd_person_reid/external/exp/db/SYSU_3/train.txt' data_b_path = '/home/jiening/dgd_person_reid/external/exp/datasets/SYSU_3_b_x2' data_path = '/home/jiening/dgd_person_reid/external/exp/datasets/SYSU_3' # create the leveldb file lmdb_env = lmdb.open(lmdb_file, map_size=int(1e12)) #生成一个数据文件,定义最大空间 lmdb_txn = lmdb_env.begin(write=True) #打开数据库的句柄 datum = caffe_pb2.Datum() #这是caffe中定义数据的重要类型 f = open(txt_file, 'r') file_length = len(f.readlines()) print(file_length) f.close() # img_new = np.zeros((channel, resize_height, resize_width)) f = open(txt_file, 'r') count = 0 count_cam1 = 0 for line in open(txt_file): count += 1 line = f.readline() # print line line = line.split(' ') # divide the file_name and label
def make_datum(img, label): return caffe_pb2.Datum(channels=3, width=IMAGE_WIDTH, height=IMAGE_HEIGHT, label=label, data=np.rollaxis(img, 2).tostring())
def convert_imageid2shapeid(datum_string, def_param=imageid2shapeid_mapping): datum = caffe_pb2.Datum() datum.ParseFromString(datum_string) datum.label = imageid2shapeid_mapping[datum.label] assert(datum.label != -1) return datum.SerializeToString()
def main(argv): db_path_label = '' db_path_feats = '' mat_file = '' print argv try: opts, args = getopt.getopt(argv, "l:f:o", ["label_db=", "feature_db=", "mat_file="]) except getopt.GetoptError: print 'feature_LDB_to_mat.py -l <label_db> -f <feature_db> -m <output_mat_file>' sys.exit(2) print opts print args for opt, arg in opts: if opt in ("-l", "--label_db"): db_path_label = arg elif opt in ("-f", "--feature_db"): db_path_feats = arg elif opt in ("-o", "--mat_file"): mat_file = arg print arg + " " + opt print(db_path_label) print(db_path_feats) print(mat_file) if not os.path.exists(db_path_label): raise Exception('db label not found') if not os.path.exists(db_path_feats): raise Exception('db feature not found') db_label = leveldb.LevelDB(db_path_label) db_feats = leveldb.LevelDB(db_path_feats) #window_num =686 datum = caffe_pb2.Datum() # print 'show datum: ' + str(len(datum)) datum_lb = caffe_pb2.Datum() start = time.time() #ft = np.zeros((window_num, float(81))) #ft = np.zeros((window_num, float(100352))) #lb = np.zeros((window_num, float(81))) is_float_data = True window_num = 0 for key in db_feats.RangeIter(include_value=False): window_num = window_num + 1 print 'window_num = ' + str(window_num) n = 0 for key, value in db_feats.RangeIter(): n = n + 1 if n > 1: break #f_size=len(value) datum.ParseFromString(db_feats.Get(key)) f_size = len(datum.float_data) # print 'show datum: ' + str(len(datum.float_data)) if f_size == 0: f_size = len(datum.data) is_float_data = False print f_size n = 0 for key, value in db_label.RangeIter(): n = n + 1 if n > 1: break #l_size=len(value) datum.ParseFromString(value) l_size = len(datum.float_data) ft = np.zeros((window_num, float(f_size))) lb = np.zeros((window_num, float(l_size))) #ft = np.zeros((10, float(f_size))) #lb = np.zeros((10, float(l_size))) count = 0 for key in db_feats.RangeIter(include_value=False): datum.ParseFromString(db_feats.Get(key)) datum_lb.ParseFromString(db_label.Get(key)) print 'show label' print datum_lb if f_size > 0: if is_float_data: ft[count, :] = datum.float_data else: ft[count, :] = datum.data lb[count, :] = datum_lb.float_data print 'convert feature # : %d key is %s' % (count, key) count = count + 1 #print 'show datum: ' + str(len(datum.data)) #print 'show datum: ' + str(len(datum.float_data)) print 'time 1: %f' % (time.time() - start) data = { u'feat_label': { u'feat': ft, u'label': lb, } } print 'save result to : %s' % (mat_file) hdf5storage.savemat(mat_file, data, format='7.3') print 'time 2: %f' % (time.time() - start) print 'done!'