Esempio n. 1
0
 def __init__(self):
     # data structure
     self.data = []
     self.features = []
     self.width = 27
     self.height = 27
     self.featureExtractor = AutoEncoderFeatureExtractor(
         self.width, self.height)
     self.processor = PreProcessor()
     self.driver = ProbDriver()
Esempio n. 2
0
 def __init__(self):
     # data structure
     self.data = []
     self.features = []
     self.width = 27
     self.height = 27
     self.featureExtractor = AutoEncoderFeatureExtractor(self.width, self.height)
     self.processor = PreProcessor()
     self.driver = ProbDriver()
Esempio n. 3
0
class AutoEncoderDriver(AutoEncoderFeatureDriver):
    def __init__(self):
        # data structure
        self.data = []
        self.features = []
        self.width = 27
        self.height = 27
        self.featureExtractor = AutoEncoderFeatureExtractor(self.width, self.height)
        self.processor = PreProcessor()
        self.driver = ProbDriver()

    def load_data(self):
        dataDir = "../data/Task2"
        os.chdir(dataDir)
        curDir = os.getcwd()
        self.data = []
        for uid in range(1, settings.USER_COUNT + 1):
            uidData = []
            for sid in range(1, 41):
                fileName = "U%dS%d.TXT" % (uid, sid)
                X, Y, T, P = self.get_data_from_file(fileName)
                uidData.append((X, Y))
            self.data.append(uidData)
        os.chdir("../..")

    def size_normalization(self):
        data = []
        for uid in range(40):
            uidData = []
            for sid in range(40):
                X, Y = self.processor.size_normalization(
                    self.data[uid][sid][0], self.data[uid][sid][1], self.width, self.height
                )
                uidData.append((X, Y))
            data.append(uidData)
        self.data = data

    def imagize(self):
        data = []
        for uid in range(40):
            uidData = []
            for sid in range(40):
                image = self.featureExtractor.imagize(self.data[uid][sid][0], self.data[uid][sid][1])
                uidData.append(image)
            data.append(uidData)
        self.data = data

    def train(self, layer_sizes=[500, 300, 100, 50], epoch=1000):
        if not self.data:
            self.imagize()

        train_set_x = numpy.asarray(self.data)
        (uCnt, sCnt, pCnt) = train_set_x.shape
        train_set_x = train_set_x.reshape((uCnt * sCnt, pCnt))

        n_ins = (self.width + 1) * (self.height + 1)
        # train data
        # self.featureExtractor.train(train_set_x, n_ins, layer_sizes, epoch)
        self.featureExtractor.train_with_mnist(
            pretraining_epochs=15, training_epochs=epoch, hidden_layers_sizes=layer_sizes
        )

    def generate_features(self):
        """
        generate feature from image to features using stacked autoencoder 
        """

        # train data first
        self.train()

        print "generating features..."
        self.features = []
        for uid in range(40):
            uidFeatures = []
            for sid in range(40):
                feature = self.featureExtractor.generate_features(self.data[uid][sid])
                uidFeatures.append(feature)
                # print ">>>uid: %d, sid: %d ends" % (uid, sid)
            self.features.append(uidFeatures)

    def dump_feature(self):
        print "... dumpint features"
        dataDir = "./data"
        os.chdir(dataDir)
        autoFeatureDir = "auto_features"
        if not os.path.exists(autoFeatureDir):
            os.mkdir(autoFeatureDir)
        os.chdir(autoFeatureDir)
        for uid in range(40):
            for sid in range(40):
                fileName = "u%ds%d.txt" % (uid, sid)
                numpy.savetxt(fileName, self.features[uid][sid], fmt="%10.5f")
        os.chdir("../..")

    def train_test_set(self, uid, cnt):
        uidFeatures = self.features[uid]
        train_set_x = []
        pos_set_x = []
        neg_set_x_ori = []
        neg_set_x_oth = []
        for sid in range(cnt):
            train_set_x.append(uidFeatures[sid].tolist())
        for sid in range(cnt, 20):
            pos_set_x.append(uidFeatures[sid].tolist())
        for sid in range(20, 40):
            neg_set_x_ori.append(uidFeatures[sid].tolist())
        for i in range(40):
            if i == uid:
                continue
            for sid in range(40):
                neg_set_x_oth.append(self.features[i][sid].tolist())

        return train_set_x, pos_set_x, neg_set_x_ori, neg_set_x_oth

    def score_of_uid(self, uid, cnt):

        train_set_x, pos_set_x, neg_set_x_ori, neg_set_x_oth = self.train_test_set(uid, cnt)

        driver = ProbDriver()
        # print ">>> training..."
        driver.ps_temp(train_set_x)

        # print ">>> train set"
        trainPS = []
        for X in train_set_x:
            ps = driver.PS(X)
            # print ps
            trainPS.append(ps)
        threshold = min(trainPS)
        # print ">>> train set min is ", threshold

        def _score_of_set(set_x, pos=True):
            size = len(set_x)
            setPS = []
            for X in set_x:
                ps = driver.PS(X)
                setPS.append(ps)
            if pos:
                correctSize = len([ps for ps in setPS if ps >= threshold])
            else:
                correctSize = len([ps for ps in setPS if ps <= threshold])
            return correctSize / float(size)

        # testing process
        # print ">>> postive test set"
        scoreOfPos = _score_of_set(pos_set_x, pos=True)
        # print ">>> total postive set %d, greater than threshold %f" % (len(pos_set_x), scoreOfPos)

        # print ">>> negtive test set"
        scoreOfNegOri = _score_of_set(neg_set_x_ori, pos=False)
        # print ">>> original negtive set %d, less than threshold %f" % (len(neg_set_x_ori), scoreOfNegOri)

        # print ">>> other negtive test set"
        scoreOfNegOth = _score_of_set(neg_set_x_oth, pos=False)
        # print ">>> total negtive set %d, less than threhold %f" % (len(neg_set_x_oth), scoreOfNegOth)

        return scoreOfPos, scoreOfNegOri, scoreOfNegOth

    def score(self):
        self.load_feature()
        scoreOfPos = []
        scoreOfNegOri = []
        scoreOfNegOth = []
        for cnt in [3, 5, 7, 10, 15]:
            for uid in range(40):
                pos, negOri, negOth = self.score_of_uid(uid, cnt)
                scoreOfPos.append(pos)
                scoreOfNegOri.append(negOri)
                scoreOfNegOth.append(negOth)
            print numpy.mean(scoreOfPos), numpy.mean(scoreOfNegOri), numpy.mean(scoreOfNegOth)
Esempio n. 4
0
class AutoEncoderDriver(AutoEncoderFeatureDriver):
    def __init__(self):
        # data structure
        self.data = []
        self.features = []
        self.width = 27
        self.height = 27
        self.featureExtractor = AutoEncoderFeatureExtractor(
            self.width, self.height)
        self.processor = PreProcessor()
        self.driver = ProbDriver()

    def load_data(self):
        dataDir = "../data/Task2"
        os.chdir(dataDir)
        curDir = os.getcwd()
        self.data = []
        for uid in range(1, settings.USER_COUNT + 1):
            uidData = []
            for sid in range(1, 41):
                fileName = "U%dS%d.TXT" % (uid, sid)
                X, Y, T, P = self.get_data_from_file(fileName)
                uidData.append((X, Y))
            self.data.append(uidData)
        os.chdir("../..")

    def size_normalization(self):
        data = []
        for uid in range(40):
            uidData = []
            for sid in range(40):
                X, Y = self.processor.size_normalization(
                    self.data[uid][sid][0], self.data[uid][sid][1], self.width,
                    self.height)
                uidData.append((X, Y))
            data.append(uidData)
        self.data = data

    def imagize(self):
        data = []
        for uid in range(40):
            uidData = []
            for sid in range(40):
                image = self.featureExtractor.imagize(self.data[uid][sid][0],
                                                      self.data[uid][sid][1])
                uidData.append(image)
            data.append(uidData)
        self.data = data

    def train(self, layer_sizes=[500, 300, 100, 50], epoch=1000):
        if not self.data:
            self.imagize()

        train_set_x = numpy.asarray(self.data)
        (uCnt, sCnt, pCnt) = train_set_x.shape
        train_set_x = train_set_x.reshape((uCnt * sCnt, pCnt))

        n_ins = (self.width + 1) * (self.height + 1)
        # train data
        # self.featureExtractor.train(train_set_x, n_ins, layer_sizes, epoch)
        self.featureExtractor.train_with_mnist(pretraining_epochs=15,
                                               training_epochs=epoch,
                                               hidden_layers_sizes=layer_sizes)

    def generate_features(self):
        """
        generate feature from image to features using stacked autoencoder 
        """

        # train data first
        self.train()

        print "generating features..."
        self.features = []
        for uid in range(40):
            uidFeatures = []
            for sid in range(40):
                feature = self.featureExtractor.generate_features(
                    self.data[uid][sid])
                uidFeatures.append(feature)
                # print ">>>uid: %d, sid: %d ends" % (uid, sid)
            self.features.append(uidFeatures)

    def dump_feature(self):
        print "... dumpint features"
        dataDir = "./data"
        os.chdir(dataDir)
        autoFeatureDir = "auto_features"
        if not os.path.exists(autoFeatureDir):
            os.mkdir(autoFeatureDir)
        os.chdir(autoFeatureDir)
        for uid in range(40):
            for sid in range(40):
                fileName = "u%ds%d.txt" % (uid, sid)
                numpy.savetxt(fileName, self.features[uid][sid], fmt="%10.5f")
        os.chdir("../..")

    def train_test_set(self, uid, cnt):
        uidFeatures = self.features[uid]
        train_set_x = []
        pos_set_x = []
        neg_set_x_ori = []
        neg_set_x_oth = []
        for sid in range(cnt):
            train_set_x.append(uidFeatures[sid].tolist())
        for sid in range(cnt, 20):
            pos_set_x.append(uidFeatures[sid].tolist())
        for sid in range(20, 40):
            neg_set_x_ori.append(uidFeatures[sid].tolist())
        for i in range(40):
            if i == uid:
                continue
            for sid in range(40):
                neg_set_x_oth.append(self.features[i][sid].tolist())

        return train_set_x, pos_set_x, neg_set_x_ori, neg_set_x_oth

    def score_of_uid(self, uid, cnt):

        train_set_x, pos_set_x, neg_set_x_ori, neg_set_x_oth = self.train_test_set(
            uid, cnt)

        driver = ProbDriver()
        # print ">>> training..."
        driver.ps_temp(train_set_x)

        # print ">>> train set"
        trainPS = []
        for X in train_set_x:
            ps = driver.PS(X)
            # print ps
            trainPS.append(ps)
        threshold = min(trainPS)

        # print ">>> train set min is ", threshold

        def _score_of_set(set_x, pos=True):
            size = len(set_x)
            setPS = []
            for X in set_x:
                ps = driver.PS(X)
                setPS.append(ps)
            if pos:
                correctSize = len([ps for ps in setPS if ps >= threshold])
            else:
                correctSize = len([ps for ps in setPS if ps <= threshold])
            return correctSize / float(size)

        # testing process
        # print ">>> postive test set"
        scoreOfPos = _score_of_set(pos_set_x, pos=True)
        # print ">>> total postive set %d, greater than threshold %f" % (len(pos_set_x), scoreOfPos)

        # print ">>> negtive test set"
        scoreOfNegOri = _score_of_set(neg_set_x_ori, pos=False)
        # print ">>> original negtive set %d, less than threshold %f" % (len(neg_set_x_ori), scoreOfNegOri)

        # print ">>> other negtive test set"
        scoreOfNegOth = _score_of_set(neg_set_x_oth, pos=False)
        # print ">>> total negtive set %d, less than threhold %f" % (len(neg_set_x_oth), scoreOfNegOth)

        return scoreOfPos, scoreOfNegOri, scoreOfNegOth

    def score(self):
        self.load_feature()
        scoreOfPos = []
        scoreOfNegOri = []
        scoreOfNegOth = []
        for cnt in [3, 5, 7, 10, 15]:
            for uid in range(40):
                pos, negOri, negOth = self.score_of_uid(uid, cnt)
                scoreOfPos.append(pos)
                scoreOfNegOri.append(negOri)
                scoreOfNegOth.append(negOth)
            print numpy.mean(scoreOfPos), numpy.mean(
                scoreOfNegOri), numpy.mean(scoreOfNegOth)