def train(poss, negs, modelname): outsize = 2 net = mlpbase.MLP_PROXY(modelname) posnum = len(poss) negnum = len(negs) samplenum = 2 * np.minimum(posnum, negnum) featdim = len(poss[0]) samples = np.zeros((samplenum, featdim)) targets = [[0] for k in range(samples.shape[0])] for k in range(samples.shape[0] / 2): samples[k, :] = np.array(poss[k]) targets[k][0] = 1 for k in range(samples.shape[0] / 2): idx = k + samples.shape[0] / 2 samples[idx, :] = np.array(negs[k]) targets[idx][0] = 0 targets = net.target_vec2mat(targets, outsize) samples, targets = net.shuffle(samples, targets) net.pre_normalization(samples) samples = net.normalization(samples) insize = samples.shape[1] net.create([insize, np.int64((insize + outsize) / 3), outsize]) print 'train :', samples.shape net.train(samples, targets, 1000, 0.001) net.save() print modelname, ' saved!' return
def predict_images(indir, posdir, negdir, netname): net = mlpbase.MLP_PROXY(netname) net.load() posnum = 0 negnum = 0 for root, dirs, names in os.walk(indir): for name in names: sname, ext = os.path.splitext(name) ext.lower() if 0 != cmp(ext, '.jpg') and 0 != cmp(ext, '.jpeg'): continue feat = gen_feat(os.path.join(root, name)) label = predictK(np.reshape(np.array(feat), (1, -1)), net) label = label[0] if label == 0: negnum += 1 if negdir != None: shutil.copy(os.path.join(root, name), negdir) else: posnum += 1 if posdir != None: shutil.copy(os.path.join(root, name), posdir) if (negnum + posnum) % 1000 == 0: print 'pos ratio = ', posnum * 1.0 / (negnum + posnum) print 'finished! pos ratio = ', posnum * 1.0 / (negnum + posnum) return
def demo(rootdir): outsize = 4 mlp = mb.MLP_PROXY('ocrmlp.dat') feats, target_list = load_all_sample(os.path.join(rootdir, 'train')) samples = np.array(feats) targets = mlp.target_vec2mat(target_list, outsize) samples, targets = mlp.shuffle(samples, targets) insize = samples.shape[1] mlp.create([insize, 64, outsize]) print 'train ', samples.shape, ',', targets.shape #normalization mlp.pre_normalization(samples) samples = mlp.normalization(samples) mlp.train(samples, targets, 1000) mlp.save() mlp.load() print 'predict...' feats, target_list0 = load_all_sample(os.path.join(rootdir, 'test')) samples = np.array(feats) #norm samples = mlp.normalization(samples) targets = mlp.predict(samples) target_list1 = mlp.target_mat2vec(targets, outsize, -1) hit = 0 for a, b in zip(target_list0, target_list1): if len(a) == len(b) and len(a) == 1 and a[0] == b[0]: hit += 1 print len(target_list1), ',', hit, ',', hit * 1.0 / len(target_list1)
def load_net_for_predict(netinfo): nets = [] with open(netinfo, 'r') as f: for line in f: line = line.strip() netname, featid, votew = line.split(',') net = mlpbase.MLP_PROXY(netname) net.load() featid = np.int64(featid) votew = np.float64(votew) nets.append([net, featid, votew]) return nets
def predict_slidingwindowK(imgpath, netname, outpath, netsinfo): nets = [] if netname is not None: net = mlpbase.MLP_PROXY(netname) net.load() nets.append([net, 0, 100]) else: nets = load_net_for_predict(netsinfo) img = cv2.imread(imgpath, 1) img = cv2.cvtColor(img, cv2.COLOR_BGR2YUV) objs = [] sizelist = [] lastw = 90 for k in range(5): sizelist.append(np.int64(lastw)) lastw = lastw * 1.2 for objw in sizelist: objh = np.int64(objw / 3) if objh < 10: continue stepw = objw / 5 steph = objh / 5 if stepw < 5: stepw = 5 if steph < 5: steph = 5 for y in range(0, img.shape[0] - objh, steph): for x in range(0, img.shape[1] - objw, stepw): subimg = img[y:y + objh, x:x + objw, :] votescore = 0 for netinfo in nets: net, fid, votew = netinfo feat = gen_featK(subimg, fid) label, score = predictK( np.reshape(np.array(feat), (1, -1)), net, -2048)[0] if label != 1: break votescore += votew * score if label != 1: continue if votescore > 0.5: objs.append([x, y, x + objw, y + objh]) if outpath != None: img = cv2.cvtColor(img, cv2.COLOR_YUV2BGR) for rect in objs: x0, y0, x1, y1 = rect cv2.rectangle(img, (x0, y0), (x1, y1), (255, 0, 0), 2) cv2.imwrite(outpath, img) return objs
if len(sys.argv) == 4 and 0 == cmp(sys.argv[1], '-train'): posdir = sys.argv[2] negdir = sys.argv[3] poss = load_feats(posdir) negs = load_feats(negdir) train(poss, negs, netpath) if len(sys.argv) == 3 and 0 == cmp(sys.argv[1], '-prdimg'): indir = sys.argv[2] predict_images(indir, None, None, netpath) if len(sys.argv) == 5 and 0 == cmp(sys.argv[1], '-prdimg'): indir = sys.argv[2] posdir = sys.argv[3] negdir = sys.argv[4] predict_images(indir, posdir, negdir, netpath) if len(sys.argv) == 4 and 0 == cmp(sys.argv[1], '-prdsw'): indir = sys.argv[2] outdir = sys.argv[3] predict_slidingwindow(indir, outdir, netpath) if len(sys.argv) == 5 and 0 == cmp(sys.argv[1], '-prdsw'): indir = sys.argv[2] outdir = sys.argv[3] netsinfo = sys.argv[4] predict_slidingwindow(indir, outdir, None, netsinfo) if len(sys.argv) == 4 and 0 == cmp(sys.argv[1], '-cfmt'): modelpath = sys.argv[2] outpath = sys.argv[3] net = mlpbase.MLP_PROXY(modelpath) net.load() net.write_in_c_format(outpath) print outpath, ' saved'