def __init__(self): # data structure self.data = [] self.features = [] self.width = 27 self.height = 27 self.featureExtractor = AutoEncoderFeatureExtractor( self.width, self.height) self.processor = PreProcessor() self.driver = ProbDriver()
def __init__(self): # data structure self.data = [] self.features = [] self.width = 27 self.height = 27 self.featureExtractor = AutoEncoderFeatureExtractor(self.width, self.height) self.processor = PreProcessor() self.driver = ProbDriver()
def get_data_from_susig_file(fileName): preProcessor = PreProcessor() print "Getting data from %s" % fileName X = [] Y = [] P = [] with open(fileName) as fp: lines = fp.readlines() for line in lines[2:]: items = line.split() x = float(items[0]) y = float(items[1]) p = float(items[3]) X.append(x) Y.append(y) P.append(p) X, Y = preProcessor.size_normalization(X, Y, 400, 200) return X, Y, P
def get_data_from_susig_file(fileName): preProcessor = PreProcessor() print "Getting data from %s" % fileName X = [] Y = [] P = [] with open(fileName) as fp: lines = fp.readlines() for line in lines[2:]: items = line.split() x = float(items[0]) y = float(items[1]) p = float(items[3]) X.append(x) Y.append(y) P.append(p) X, Y = preProcessor.size_normalization(X,Y,400,200) return X, Y, P
def inference(bmodel_path, input_path, loops, tpu_id, compare_path): """ Load a bmodel and do inference. Args: bmodel_path: Path to bmodel input_path: Path to input file loops: Number of loops to run tpu_id: ID of TPU to use compare_path: Path to correct result file Returns: True for success and False for failure """ # init Engine to load bmodel and allocate input and output tensors engine = sail.Engine(bmodel_path, tpu_id, sail.SYSIO) # init preprocessor and postprocessor preprocessor = PreProcessor([127.5, 127.5, 127.5], 0.0078125) postprocessor = PostProcessor([0.5, 0.3, 0.7]) reference = postprocessor.get_reference(compare_path) status = True # pipeline of inference for i in range(loops): # read image image = cv2.imread(input_path) image = cv2.transpose(image) # run PNet, the first stage of MTCNN boxes = run_pnet(engine, preprocessor, postprocessor, image) if boxes is not None and len(boxes) > 0: # run RNet, the second stage of MTCNN boxes = run_rnet(engine, preprocessor, postprocessor, boxes, image) if boxes is not None and len(boxes) > 0: # run ONet, the third stage of MTCNN boxes = run_onet(engine, preprocessor, postprocessor, boxes, image) # print detected result if postprocessor.compare(reference, boxes, i): print_result(boxes, tpu_id) else: status = False break return status
class AutoEncoderDriver(AutoEncoderFeatureDriver): def __init__(self): # data structure self.data = [] self.features = [] self.width = 27 self.height = 27 self.featureExtractor = AutoEncoderFeatureExtractor(self.width, self.height) self.processor = PreProcessor() self.driver = ProbDriver() def load_data(self): dataDir = "../data/Task2" os.chdir(dataDir) curDir = os.getcwd() self.data = [] for uid in range(1, settings.USER_COUNT + 1): uidData = [] for sid in range(1, 41): fileName = "U%dS%d.TXT" % (uid, sid) X, Y, T, P = self.get_data_from_file(fileName) uidData.append((X, Y)) self.data.append(uidData) os.chdir("../..") def size_normalization(self): data = [] for uid in range(40): uidData = [] for sid in range(40): X, Y = self.processor.size_normalization( self.data[uid][sid][0], self.data[uid][sid][1], self.width, self.height ) uidData.append((X, Y)) data.append(uidData) self.data = data def imagize(self): data = [] for uid in range(40): uidData = [] for sid in range(40): image = self.featureExtractor.imagize(self.data[uid][sid][0], self.data[uid][sid][1]) uidData.append(image) data.append(uidData) self.data = data def train(self, layer_sizes=[500, 300, 100, 50], epoch=1000): if not self.data: self.imagize() train_set_x = numpy.asarray(self.data) (uCnt, sCnt, pCnt) = train_set_x.shape train_set_x = train_set_x.reshape((uCnt * sCnt, pCnt)) n_ins = (self.width + 1) * (self.height + 1) # train data # self.featureExtractor.train(train_set_x, n_ins, layer_sizes, epoch) self.featureExtractor.train_with_mnist( pretraining_epochs=15, training_epochs=epoch, hidden_layers_sizes=layer_sizes ) def generate_features(self): """ generate feature from image to features using stacked autoencoder """ # train data first self.train() print "generating features..." self.features = [] for uid in range(40): uidFeatures = [] for sid in range(40): feature = self.featureExtractor.generate_features(self.data[uid][sid]) uidFeatures.append(feature) # print ">>>uid: %d, sid: %d ends" % (uid, sid) self.features.append(uidFeatures) def dump_feature(self): print "... dumpint features" dataDir = "./data" os.chdir(dataDir) autoFeatureDir = "auto_features" if not os.path.exists(autoFeatureDir): os.mkdir(autoFeatureDir) os.chdir(autoFeatureDir) for uid in range(40): for sid in range(40): fileName = "u%ds%d.txt" % (uid, sid) numpy.savetxt(fileName, self.features[uid][sid], fmt="%10.5f") os.chdir("../..") def train_test_set(self, uid, cnt): uidFeatures = self.features[uid] train_set_x = [] pos_set_x = [] neg_set_x_ori = [] neg_set_x_oth = [] for sid in range(cnt): train_set_x.append(uidFeatures[sid].tolist()) for sid in range(cnt, 20): pos_set_x.append(uidFeatures[sid].tolist()) for sid in range(20, 40): neg_set_x_ori.append(uidFeatures[sid].tolist()) for i in range(40): if i == uid: continue for sid in range(40): neg_set_x_oth.append(self.features[i][sid].tolist()) return train_set_x, pos_set_x, neg_set_x_ori, neg_set_x_oth def score_of_uid(self, uid, cnt): train_set_x, pos_set_x, neg_set_x_ori, neg_set_x_oth = self.train_test_set(uid, cnt) driver = ProbDriver() # print ">>> training..." driver.ps_temp(train_set_x) # print ">>> train set" trainPS = [] for X in train_set_x: ps = driver.PS(X) # print ps trainPS.append(ps) threshold = min(trainPS) # print ">>> train set min is ", threshold def _score_of_set(set_x, pos=True): size = len(set_x) setPS = [] for X in set_x: ps = driver.PS(X) setPS.append(ps) if pos: correctSize = len([ps for ps in setPS if ps >= threshold]) else: correctSize = len([ps for ps in setPS if ps <= threshold]) return correctSize / float(size) # testing process # print ">>> postive test set" scoreOfPos = _score_of_set(pos_set_x, pos=True) # print ">>> total postive set %d, greater than threshold %f" % (len(pos_set_x), scoreOfPos) # print ">>> negtive test set" scoreOfNegOri = _score_of_set(neg_set_x_ori, pos=False) # print ">>> original negtive set %d, less than threshold %f" % (len(neg_set_x_ori), scoreOfNegOri) # print ">>> other negtive test set" scoreOfNegOth = _score_of_set(neg_set_x_oth, pos=False) # print ">>> total negtive set %d, less than threhold %f" % (len(neg_set_x_oth), scoreOfNegOth) return scoreOfPos, scoreOfNegOri, scoreOfNegOth def score(self): self.load_feature() scoreOfPos = [] scoreOfNegOri = [] scoreOfNegOth = [] for cnt in [3, 5, 7, 10, 15]: for uid in range(40): pos, negOri, negOth = self.score_of_uid(uid, cnt) scoreOfPos.append(pos) scoreOfNegOri.append(negOri) scoreOfNegOth.append(negOth) print numpy.mean(scoreOfPos), numpy.mean(scoreOfNegOri), numpy.mean(scoreOfNegOth)
def __init__(self): self.svmProcessor = SVMProcessor() self.probFeatureExtractor = ProbFeatureExtractor() self.preProcessor = PreProcessor()
class AutoEncoderDriver(AutoEncoderFeatureDriver): def __init__(self): # data structure self.data = [] self.features = [] self.width = 27 self.height = 27 self.featureExtractor = AutoEncoderFeatureExtractor( self.width, self.height) self.processor = PreProcessor() self.driver = ProbDriver() def load_data(self): dataDir = "../data/Task2" os.chdir(dataDir) curDir = os.getcwd() self.data = [] for uid in range(1, settings.USER_COUNT + 1): uidData = [] for sid in range(1, 41): fileName = "U%dS%d.TXT" % (uid, sid) X, Y, T, P = self.get_data_from_file(fileName) uidData.append((X, Y)) self.data.append(uidData) os.chdir("../..") def size_normalization(self): data = [] for uid in range(40): uidData = [] for sid in range(40): X, Y = self.processor.size_normalization( self.data[uid][sid][0], self.data[uid][sid][1], self.width, self.height) uidData.append((X, Y)) data.append(uidData) self.data = data def imagize(self): data = [] for uid in range(40): uidData = [] for sid in range(40): image = self.featureExtractor.imagize(self.data[uid][sid][0], self.data[uid][sid][1]) uidData.append(image) data.append(uidData) self.data = data def train(self, layer_sizes=[500, 300, 100, 50], epoch=1000): if not self.data: self.imagize() train_set_x = numpy.asarray(self.data) (uCnt, sCnt, pCnt) = train_set_x.shape train_set_x = train_set_x.reshape((uCnt * sCnt, pCnt)) n_ins = (self.width + 1) * (self.height + 1) # train data # self.featureExtractor.train(train_set_x, n_ins, layer_sizes, epoch) self.featureExtractor.train_with_mnist(pretraining_epochs=15, training_epochs=epoch, hidden_layers_sizes=layer_sizes) def generate_features(self): """ generate feature from image to features using stacked autoencoder """ # train data first self.train() print "generating features..." self.features = [] for uid in range(40): uidFeatures = [] for sid in range(40): feature = self.featureExtractor.generate_features( self.data[uid][sid]) uidFeatures.append(feature) # print ">>>uid: %d, sid: %d ends" % (uid, sid) self.features.append(uidFeatures) def dump_feature(self): print "... dumpint features" dataDir = "./data" os.chdir(dataDir) autoFeatureDir = "auto_features" if not os.path.exists(autoFeatureDir): os.mkdir(autoFeatureDir) os.chdir(autoFeatureDir) for uid in range(40): for sid in range(40): fileName = "u%ds%d.txt" % (uid, sid) numpy.savetxt(fileName, self.features[uid][sid], fmt="%10.5f") os.chdir("../..") def train_test_set(self, uid, cnt): uidFeatures = self.features[uid] train_set_x = [] pos_set_x = [] neg_set_x_ori = [] neg_set_x_oth = [] for sid in range(cnt): train_set_x.append(uidFeatures[sid].tolist()) for sid in range(cnt, 20): pos_set_x.append(uidFeatures[sid].tolist()) for sid in range(20, 40): neg_set_x_ori.append(uidFeatures[sid].tolist()) for i in range(40): if i == uid: continue for sid in range(40): neg_set_x_oth.append(self.features[i][sid].tolist()) return train_set_x, pos_set_x, neg_set_x_ori, neg_set_x_oth def score_of_uid(self, uid, cnt): train_set_x, pos_set_x, neg_set_x_ori, neg_set_x_oth = self.train_test_set( uid, cnt) driver = ProbDriver() # print ">>> training..." driver.ps_temp(train_set_x) # print ">>> train set" trainPS = [] for X in train_set_x: ps = driver.PS(X) # print ps trainPS.append(ps) threshold = min(trainPS) # print ">>> train set min is ", threshold def _score_of_set(set_x, pos=True): size = len(set_x) setPS = [] for X in set_x: ps = driver.PS(X) setPS.append(ps) if pos: correctSize = len([ps for ps in setPS if ps >= threshold]) else: correctSize = len([ps for ps in setPS if ps <= threshold]) return correctSize / float(size) # testing process # print ">>> postive test set" scoreOfPos = _score_of_set(pos_set_x, pos=True) # print ">>> total postive set %d, greater than threshold %f" % (len(pos_set_x), scoreOfPos) # print ">>> negtive test set" scoreOfNegOri = _score_of_set(neg_set_x_ori, pos=False) # print ">>> original negtive set %d, less than threshold %f" % (len(neg_set_x_ori), scoreOfNegOri) # print ">>> other negtive test set" scoreOfNegOth = _score_of_set(neg_set_x_oth, pos=False) # print ">>> total negtive set %d, less than threhold %f" % (len(neg_set_x_oth), scoreOfNegOth) return scoreOfPos, scoreOfNegOri, scoreOfNegOth def score(self): self.load_feature() scoreOfPos = [] scoreOfNegOri = [] scoreOfNegOth = [] for cnt in [3, 5, 7, 10, 15]: for uid in range(40): pos, negOri, negOth = self.score_of_uid(uid, cnt) scoreOfPos.append(pos) scoreOfNegOri.append(negOri) scoreOfNegOth.append(negOth) print numpy.mean(scoreOfPos), numpy.mean( scoreOfNegOri), numpy.mean(scoreOfNegOth)
def test_pre_processor(): file = get_file("tests/test_sample/82251504.png") new_file = PreProcessor(100).run(file) assert type(new_file) == io.BytesIO
def test_ocr_processor(): file = get_file("tests/test_sample/82251504.png") fixed_text = ocr_precess(file, PreProcessor(100), OCRTesseractProcessor(), PostProcessor()) assert type(fixed_text) == str