Beispiel #1
0
 def __init__(self):
     # data structure
     self.data = []
     self.features = []
     self.width = 27
     self.height = 27
     self.featureExtractor = AutoEncoderFeatureExtractor(
         self.width, self.height)
     self.processor = PreProcessor()
     self.driver = ProbDriver()
Beispiel #2
0
 def __init__(self):
     # data structure
     self.data = []
     self.features = []
     self.width = 27
     self.height = 27
     self.featureExtractor = AutoEncoderFeatureExtractor(self.width, self.height)
     self.processor = PreProcessor()
     self.driver = ProbDriver()
Beispiel #3
0
def get_data_from_susig_file(fileName):
    preProcessor = PreProcessor()
    print "Getting data from %s" % fileName
    X = []
    Y = []
    P = []
    with open(fileName) as fp:
        lines = fp.readlines()
        for line in lines[2:]:
            items = line.split()
            x = float(items[0])
            y = float(items[1])
            p = float(items[3])
            X.append(x)
            Y.append(y)
            P.append(p)
    X, Y = preProcessor.size_normalization(X, Y, 400, 200)
    return X, Y, P
Beispiel #4
0
def get_data_from_susig_file(fileName):
    preProcessor = PreProcessor()
    print "Getting data from %s" % fileName
    X = []
    Y = []
    P = []
    with open(fileName) as fp:
        lines = fp.readlines()
        for line in lines[2:]:
            items = line.split()
            x = float(items[0])
            y = float(items[1])
            p = float(items[3])
            X.append(x)
            Y.append(y)
            P.append(p)
    X, Y = preProcessor.size_normalization(X,Y,400,200)
    return X, Y, P
Beispiel #5
0
def inference(bmodel_path, input_path, loops, tpu_id, compare_path):
    """ Load a bmodel and do inference.

  Args:
   bmodel_path: Path to bmodel
   input_path: Path to input file
   loops: Number of loops to run
   tpu_id: ID of TPU to use
   compare_path: Path to correct result file

  Returns:
    True for success and False for failure
  """
    # init Engine to load bmodel and allocate input and output tensors
    engine = sail.Engine(bmodel_path, tpu_id, sail.SYSIO)
    # init preprocessor and postprocessor
    preprocessor = PreProcessor([127.5, 127.5, 127.5], 0.0078125)
    postprocessor = PostProcessor([0.5, 0.3, 0.7])
    reference = postprocessor.get_reference(compare_path)
    status = True
    # pipeline of inference
    for i in range(loops):
        # read image
        image = cv2.imread(input_path)
        image = cv2.transpose(image)
        # run PNet, the first stage of MTCNN
        boxes = run_pnet(engine, preprocessor, postprocessor, image)
        if boxes is not None and len(boxes) > 0:
            # run RNet, the second stage of MTCNN
            boxes = run_rnet(engine, preprocessor, postprocessor, boxes, image)
            if boxes is not None and len(boxes) > 0:
                # run ONet, the third stage of MTCNN
                boxes = run_onet(engine, preprocessor, postprocessor, boxes,
                                 image)
        # print detected result
        if postprocessor.compare(reference, boxes, i):
            print_result(boxes, tpu_id)
        else:
            status = False
            break
    return status
Beispiel #6
0
class AutoEncoderDriver(AutoEncoderFeatureDriver):
    def __init__(self):
        # data structure
        self.data = []
        self.features = []
        self.width = 27
        self.height = 27
        self.featureExtractor = AutoEncoderFeatureExtractor(self.width, self.height)
        self.processor = PreProcessor()
        self.driver = ProbDriver()

    def load_data(self):
        dataDir = "../data/Task2"
        os.chdir(dataDir)
        curDir = os.getcwd()
        self.data = []
        for uid in range(1, settings.USER_COUNT + 1):
            uidData = []
            for sid in range(1, 41):
                fileName = "U%dS%d.TXT" % (uid, sid)
                X, Y, T, P = self.get_data_from_file(fileName)
                uidData.append((X, Y))
            self.data.append(uidData)
        os.chdir("../..")

    def size_normalization(self):
        data = []
        for uid in range(40):
            uidData = []
            for sid in range(40):
                X, Y = self.processor.size_normalization(
                    self.data[uid][sid][0], self.data[uid][sid][1], self.width, self.height
                )
                uidData.append((X, Y))
            data.append(uidData)
        self.data = data

    def imagize(self):
        data = []
        for uid in range(40):
            uidData = []
            for sid in range(40):
                image = self.featureExtractor.imagize(self.data[uid][sid][0], self.data[uid][sid][1])
                uidData.append(image)
            data.append(uidData)
        self.data = data

    def train(self, layer_sizes=[500, 300, 100, 50], epoch=1000):
        if not self.data:
            self.imagize()

        train_set_x = numpy.asarray(self.data)
        (uCnt, sCnt, pCnt) = train_set_x.shape
        train_set_x = train_set_x.reshape((uCnt * sCnt, pCnt))

        n_ins = (self.width + 1) * (self.height + 1)
        # train data
        # self.featureExtractor.train(train_set_x, n_ins, layer_sizes, epoch)
        self.featureExtractor.train_with_mnist(
            pretraining_epochs=15, training_epochs=epoch, hidden_layers_sizes=layer_sizes
        )

    def generate_features(self):
        """
        generate feature from image to features using stacked autoencoder 
        """

        # train data first
        self.train()

        print "generating features..."
        self.features = []
        for uid in range(40):
            uidFeatures = []
            for sid in range(40):
                feature = self.featureExtractor.generate_features(self.data[uid][sid])
                uidFeatures.append(feature)
                # print ">>>uid: %d, sid: %d ends" % (uid, sid)
            self.features.append(uidFeatures)

    def dump_feature(self):
        print "... dumpint features"
        dataDir = "./data"
        os.chdir(dataDir)
        autoFeatureDir = "auto_features"
        if not os.path.exists(autoFeatureDir):
            os.mkdir(autoFeatureDir)
        os.chdir(autoFeatureDir)
        for uid in range(40):
            for sid in range(40):
                fileName = "u%ds%d.txt" % (uid, sid)
                numpy.savetxt(fileName, self.features[uid][sid], fmt="%10.5f")
        os.chdir("../..")

    def train_test_set(self, uid, cnt):
        uidFeatures = self.features[uid]
        train_set_x = []
        pos_set_x = []
        neg_set_x_ori = []
        neg_set_x_oth = []
        for sid in range(cnt):
            train_set_x.append(uidFeatures[sid].tolist())
        for sid in range(cnt, 20):
            pos_set_x.append(uidFeatures[sid].tolist())
        for sid in range(20, 40):
            neg_set_x_ori.append(uidFeatures[sid].tolist())
        for i in range(40):
            if i == uid:
                continue
            for sid in range(40):
                neg_set_x_oth.append(self.features[i][sid].tolist())

        return train_set_x, pos_set_x, neg_set_x_ori, neg_set_x_oth

    def score_of_uid(self, uid, cnt):

        train_set_x, pos_set_x, neg_set_x_ori, neg_set_x_oth = self.train_test_set(uid, cnt)

        driver = ProbDriver()
        # print ">>> training..."
        driver.ps_temp(train_set_x)

        # print ">>> train set"
        trainPS = []
        for X in train_set_x:
            ps = driver.PS(X)
            # print ps
            trainPS.append(ps)
        threshold = min(trainPS)
        # print ">>> train set min is ", threshold

        def _score_of_set(set_x, pos=True):
            size = len(set_x)
            setPS = []
            for X in set_x:
                ps = driver.PS(X)
                setPS.append(ps)
            if pos:
                correctSize = len([ps for ps in setPS if ps >= threshold])
            else:
                correctSize = len([ps for ps in setPS if ps <= threshold])
            return correctSize / float(size)

        # testing process
        # print ">>> postive test set"
        scoreOfPos = _score_of_set(pos_set_x, pos=True)
        # print ">>> total postive set %d, greater than threshold %f" % (len(pos_set_x), scoreOfPos)

        # print ">>> negtive test set"
        scoreOfNegOri = _score_of_set(neg_set_x_ori, pos=False)
        # print ">>> original negtive set %d, less than threshold %f" % (len(neg_set_x_ori), scoreOfNegOri)

        # print ">>> other negtive test set"
        scoreOfNegOth = _score_of_set(neg_set_x_oth, pos=False)
        # print ">>> total negtive set %d, less than threhold %f" % (len(neg_set_x_oth), scoreOfNegOth)

        return scoreOfPos, scoreOfNegOri, scoreOfNegOth

    def score(self):
        self.load_feature()
        scoreOfPos = []
        scoreOfNegOri = []
        scoreOfNegOth = []
        for cnt in [3, 5, 7, 10, 15]:
            for uid in range(40):
                pos, negOri, negOth = self.score_of_uid(uid, cnt)
                scoreOfPos.append(pos)
                scoreOfNegOri.append(negOri)
                scoreOfNegOth.append(negOth)
            print numpy.mean(scoreOfPos), numpy.mean(scoreOfNegOri), numpy.mean(scoreOfNegOth)
Beispiel #7
0
 def __init__(self):
     self.svmProcessor = SVMProcessor()
     self.probFeatureExtractor = ProbFeatureExtractor()
     self.preProcessor = PreProcessor()
Beispiel #8
0
class AutoEncoderDriver(AutoEncoderFeatureDriver):
    def __init__(self):
        # data structure
        self.data = []
        self.features = []
        self.width = 27
        self.height = 27
        self.featureExtractor = AutoEncoderFeatureExtractor(
            self.width, self.height)
        self.processor = PreProcessor()
        self.driver = ProbDriver()

    def load_data(self):
        dataDir = "../data/Task2"
        os.chdir(dataDir)
        curDir = os.getcwd()
        self.data = []
        for uid in range(1, settings.USER_COUNT + 1):
            uidData = []
            for sid in range(1, 41):
                fileName = "U%dS%d.TXT" % (uid, sid)
                X, Y, T, P = self.get_data_from_file(fileName)
                uidData.append((X, Y))
            self.data.append(uidData)
        os.chdir("../..")

    def size_normalization(self):
        data = []
        for uid in range(40):
            uidData = []
            for sid in range(40):
                X, Y = self.processor.size_normalization(
                    self.data[uid][sid][0], self.data[uid][sid][1], self.width,
                    self.height)
                uidData.append((X, Y))
            data.append(uidData)
        self.data = data

    def imagize(self):
        data = []
        for uid in range(40):
            uidData = []
            for sid in range(40):
                image = self.featureExtractor.imagize(self.data[uid][sid][0],
                                                      self.data[uid][sid][1])
                uidData.append(image)
            data.append(uidData)
        self.data = data

    def train(self, layer_sizes=[500, 300, 100, 50], epoch=1000):
        if not self.data:
            self.imagize()

        train_set_x = numpy.asarray(self.data)
        (uCnt, sCnt, pCnt) = train_set_x.shape
        train_set_x = train_set_x.reshape((uCnt * sCnt, pCnt))

        n_ins = (self.width + 1) * (self.height + 1)
        # train data
        # self.featureExtractor.train(train_set_x, n_ins, layer_sizes, epoch)
        self.featureExtractor.train_with_mnist(pretraining_epochs=15,
                                               training_epochs=epoch,
                                               hidden_layers_sizes=layer_sizes)

    def generate_features(self):
        """
        generate feature from image to features using stacked autoencoder 
        """

        # train data first
        self.train()

        print "generating features..."
        self.features = []
        for uid in range(40):
            uidFeatures = []
            for sid in range(40):
                feature = self.featureExtractor.generate_features(
                    self.data[uid][sid])
                uidFeatures.append(feature)
                # print ">>>uid: %d, sid: %d ends" % (uid, sid)
            self.features.append(uidFeatures)

    def dump_feature(self):
        print "... dumpint features"
        dataDir = "./data"
        os.chdir(dataDir)
        autoFeatureDir = "auto_features"
        if not os.path.exists(autoFeatureDir):
            os.mkdir(autoFeatureDir)
        os.chdir(autoFeatureDir)
        for uid in range(40):
            for sid in range(40):
                fileName = "u%ds%d.txt" % (uid, sid)
                numpy.savetxt(fileName, self.features[uid][sid], fmt="%10.5f")
        os.chdir("../..")

    def train_test_set(self, uid, cnt):
        uidFeatures = self.features[uid]
        train_set_x = []
        pos_set_x = []
        neg_set_x_ori = []
        neg_set_x_oth = []
        for sid in range(cnt):
            train_set_x.append(uidFeatures[sid].tolist())
        for sid in range(cnt, 20):
            pos_set_x.append(uidFeatures[sid].tolist())
        for sid in range(20, 40):
            neg_set_x_ori.append(uidFeatures[sid].tolist())
        for i in range(40):
            if i == uid:
                continue
            for sid in range(40):
                neg_set_x_oth.append(self.features[i][sid].tolist())

        return train_set_x, pos_set_x, neg_set_x_ori, neg_set_x_oth

    def score_of_uid(self, uid, cnt):

        train_set_x, pos_set_x, neg_set_x_ori, neg_set_x_oth = self.train_test_set(
            uid, cnt)

        driver = ProbDriver()
        # print ">>> training..."
        driver.ps_temp(train_set_x)

        # print ">>> train set"
        trainPS = []
        for X in train_set_x:
            ps = driver.PS(X)
            # print ps
            trainPS.append(ps)
        threshold = min(trainPS)

        # print ">>> train set min is ", threshold

        def _score_of_set(set_x, pos=True):
            size = len(set_x)
            setPS = []
            for X in set_x:
                ps = driver.PS(X)
                setPS.append(ps)
            if pos:
                correctSize = len([ps for ps in setPS if ps >= threshold])
            else:
                correctSize = len([ps for ps in setPS if ps <= threshold])
            return correctSize / float(size)

        # testing process
        # print ">>> postive test set"
        scoreOfPos = _score_of_set(pos_set_x, pos=True)
        # print ">>> total postive set %d, greater than threshold %f" % (len(pos_set_x), scoreOfPos)

        # print ">>> negtive test set"
        scoreOfNegOri = _score_of_set(neg_set_x_ori, pos=False)
        # print ">>> original negtive set %d, less than threshold %f" % (len(neg_set_x_ori), scoreOfNegOri)

        # print ">>> other negtive test set"
        scoreOfNegOth = _score_of_set(neg_set_x_oth, pos=False)
        # print ">>> total negtive set %d, less than threhold %f" % (len(neg_set_x_oth), scoreOfNegOth)

        return scoreOfPos, scoreOfNegOri, scoreOfNegOth

    def score(self):
        self.load_feature()
        scoreOfPos = []
        scoreOfNegOri = []
        scoreOfNegOth = []
        for cnt in [3, 5, 7, 10, 15]:
            for uid in range(40):
                pos, negOri, negOth = self.score_of_uid(uid, cnt)
                scoreOfPos.append(pos)
                scoreOfNegOri.append(negOri)
                scoreOfNegOth.append(negOth)
            print numpy.mean(scoreOfPos), numpy.mean(
                scoreOfNegOri), numpy.mean(scoreOfNegOth)
def test_pre_processor():
    file = get_file("tests/test_sample/82251504.png")
    new_file = PreProcessor(100).run(file)
    assert type(new_file) == io.BytesIO
def test_ocr_processor():
    file = get_file("tests/test_sample/82251504.png")
    fixed_text = ocr_precess(file, PreProcessor(100), OCRTesseractProcessor(), PostProcessor())
    assert type(fixed_text) == str