Esempio n. 1
2
	def getHouseList(self):
		"""
		从文件中获取房屋信息列表
		"""
		data = xlrd.open_workbook('houseInfo_origin.xls')
		table = data.sheet_by_name(u'houseinfo')
		for index in range(1,table.nrows):
			houseId = table.cell(index,0).value
			address = table.cell(index,1).value
			area = table.cell(index,2).value
			price = table.cell(index,3).value
		
			house = House()
			house.houseId = houseId
			house.address = address
			house.price = price
			house.area = area
			self.houses.append(house)

			if (index % PageNumber == 0):
				self.processHouses()
				self.saveToFile()
				self.excelIndex += len(self.houses)
				self.houses = []
		if (len(self.houses) != 0):
			self.processHouses()
			self.saveToFile()
			self.houses = []
Esempio n. 2
0
    def processHouse(self, houseStr):
        """
		对每个学区房记录进行处理
		"""
        houseInfo = houseStr.find("div", attrs={"class": "inventory_list_r_tit_list"}).find_all("a")
        houseId = houseInfo[0]["href"][len(PrefixURL) :][:-1]
        address = houseStr.find("div", attrs={"class": "inventory_list_r_name_ad"}).text
        detailInfo = houseStr.find("div", attrs={"class": "inventory_list_r_details_r"})
        details = detailInfo.find_all("span")
        price = details[2].text
        area = details[1].text
        # 如果houseId没被处理过
        if not houseId in self.success_houseIds:
            print houseId, address, price, area

            house = House()
            house.houseId = houseId
            house.address = address
            house.price = price
            house.area = area
            house.flage = 0
            self.houses.append(house)
            self.success_houseIds[houseId] = house

        else:
            print "id:%s  exist" % houseId
            print houseId, address, price, area
            house = self.success_houseIds[houseId]
            if houseId == house.houseId and address == house.address and price == house.price and area == house.area:
                house.flage = 1
            else:
                house.flage = 2
                print "The same houseId have different data"
            self.houses.append(house)
Esempio n. 3
0
    def load(self):
        self.slytherin = House()
        # print self.slytherin
        self.gryffindor = House()
        self.ravenclaw = House()
        self.hufflepuff = House()

        self.slytherin.load('Slytherin.sv')
        # print self.Slytherin
        self.gryffindor.load('Gryffindor.sv')
        self.ravenclaw.load('Ravenclaw.sv')
        self.hufflepuff.load('Hufflepuff.sv')

        self.houses = [self.slytherin, self.gryffindor, self.hufflepuff, self.ravenclaw]
Esempio n. 4
0
	def processHouse(self,houseStr):
		"""
		对每个学区房记录进行处理
		"""
		houseInfo = houseStr.find_all('a')
		houseId = houseInfo[0]['href'][len(self.prefixText):][:-5]
		addressUrl = self.prefixUrl + houseInfo[1]['href']
		address = self.processAddress(addressUrl)
		price = (houseStr.find('div',attrs={"class": "price-pre"})).text
		area = (houseStr.find('div',attrs={"class": "where"}).find_all('span'))[3].text
		#如果houseId没被处理过
		if not houseId in self.success_houseIds:
			print houseId,address,price,area
			house = House()
			house.houseId = houseId
			house.address = address
			house.price = price
			house.area = area
			house.flage = 0
			self.houses.append(house)
			self.success_houseIds[houseId] = house
		else:
			print "id:%s  exist" % houseId
			print houseId,address,price,area
			house = self.success_houseIds[houseId]
			if(houseId == house.houseId and address == house.address and price == house.price and area == house.area):
				house.flage = 1
			else:
				house.flage = 2
				print "The same houseId have different data"
			self.houses.append(house)
Esempio n. 5
0
    def assign_house_slots(self):
        max_students_per_house = int(math.ceil(self.num_students / 4))
        min_students_per_house = int(self.num_students / 4)

        # purely laptop class
        self.slytherin = House('Slytherin', min_students_per_house, max_students_per_house, 0, -1)

        # 10 slots without laptop, the rest with laptops
        self.ravenclaw = House('Ravenclaw', min_students_per_house, max_students_per_house, 10, -1)

        # normal labs
        self.hufflepuff = House('Hufflepuff', min_students_per_house, max_students_per_house)
        self.gryffindor = House('Gryffindor', min_students_per_house, max_students_per_house)

        self.houses = [self.slytherin, self.ravenclaw, self.hufflepuff, self.gryffindor]
	def __init__(self, filename=None, size_of_player=20, turns=10000000, players_number=20, mask_size=3, convergence=1000, mask=MaskMoveCore):
		self.convergence = convergence
		self.mask_size = mask_size
		self.mask = mask
		self.turns = turns
		self.house = House()
		if filename is not None:
			self.event = Event(filename=filename)
		else:
			self.event = Event(size=size_of_player)

		self.size_of_player = len(self.event)
		
		for index in range(players_number):
			player = Player(self.size_of_player)
			self.players.append(player)
def test():
    print("SETUP")
    house = House('house')
    house.add('a')
    house.add('b')
    house.add('c')
    house.add('d')
    house.add('e')
    house.add('f')
    house.add('g')
    house.add('h')
    house.add('i')
    house.add('j')
    print("CONNECTIONS")
    house.connect('a','b')
    house.connect('c','d')
    house.connect('a','e')
    house.connect('b','e')
    house.connect('a','c')
    house.connect('f','c')
    house.connect('g','d')
    house.connect('h','b')
    house.connect('i','c')
    house.connect('j','a')
    house.connect('j','h')
    print("CONNECTIONS COMPLETE")
    print("SETUP COMPLETE")
    traverse_house(house)
e_filt_lpf = moving_average_erssi_filter(e, 5)

# Kalman (linear)
e_filt_kalman, kalman_error_cov = kalman(e)
#print e_filt_kalman

# final filter chosen
e_filt = e_filt_lpf

# for i in range(len(e_filt_lpf)):
#     print (e[i,1], e_filt_lpf[i,1], g[i])

# map RSSI values to approximate location in house
beaconcoors = {'bedroom 1': (18, 6, 10), 'bathroom': (20, 18, 10), 'living room': (6, 25, 10),
                 'kitchen': (5, 10, 8), 'bedroom 2': (22, 27, 3), 'bedroom 3': (6, 45, 10)}
myhouse = House(beaconcoors, roomdict)


# separate into training and test sets
# first 1000 elements define training set
TRAIN_LIMT = 2*len(t)/3

ttrainr = t[:TRAIN_LIMT]
etrainr = e_filt[:TRAIN_LIMT]
gtrainr = g[:TRAIN_LIMT]
# clean training set
(ttrain, etrain, gtrain) = CSVRoomReader.clean_teg(ttrainr, etrainr, gtrainr)
# find WPL functionals
ecoortrain = myhouse.get_coor_from_rssi(etrain)

ttestr = t[TRAIN_LIMT:] - t[TRAIN_LIMT]
Esempio n. 9
0
class SortingHat:

    def start(self):
        self.load_config()
        self.assign_house_slots()
        self.sort_students()

    def load_config(self):
        lines = []
        with open('config.cfg', 'r') as f:
            lines = [x.strip() for x in f.readlines()]

        config = dict()
        for x in lines:
            temp = x.split(':')
            key = temp[0]
            val  = temp[1]
            config[key] = val

        self.num_students = int(config['num_students'])

    def assign_house_slots(self):
        max_students_per_house = int(math.ceil(self.num_students / 4))
        min_students_per_house = int(self.num_students / 4)

        # purely laptop class
        self.slytherin = House('Slytherin', min_students_per_house, max_students_per_house, 0, -1)

        # 10 slots without laptop, the rest with laptops
        self.ravenclaw = House('Ravenclaw', min_students_per_house, max_students_per_house, 10, -1)

        # normal labs
        self.hufflepuff = House('Hufflepuff', min_students_per_house, max_students_per_house)
        self.gryffindor = House('Gryffindor', min_students_per_house, max_students_per_house)

        self.houses = [self.slytherin, self.ravenclaw, self.hufflepuff, self.gryffindor]

        # for x in self.houses:
        #     print x

    def sort_students(self):
        while True:
            os.system('clear')
            self.display_art('bootcamp_ascii.txt')
            self.display_art('sorting_hat_ascii.txt')

            name = raw_input('Enter your name:\t\t')
            has_laptop = raw_input('Can you bring a laptop (y/n):\t').lower()
            has_laptop = has_laptop == 'y' or has_laptop == 'yes'
            # print has_laptop

            if has_laptop:
                self.assign_student_with_laptop(name)
            else:
                self.assign_student_without_laptop(name)

            if raw_input('Add another student? (y/n): ') == 'n':
                break
        self.print_students_to_file()
        self.print_students()
        self.save()

    def save(self):
        for x in self.houses:
            x.save()

    def load(self):
        self.slytherin = House()
        # print self.slytherin
        self.gryffindor = House()
        self.ravenclaw = House()
        self.hufflepuff = House()

        self.slytherin.load('Slytherin.sv')
        # print self.Slytherin
        self.gryffindor.load('Gryffindor.sv')
        self.ravenclaw.load('Ravenclaw.sv')
        self.hufflepuff.load('Hufflepuff.sv')

        self.houses = [self.slytherin, self.gryffindor, self.hufflepuff, self.ravenclaw]


    def print_students_to_file(self):
        for x in self.houses:
            print ''
            # x.print_students()
            x.print_students_to_file()

    def print_students(self):
        for x in self.houses:
             x.print_students()

    def display_art(self, filename):
        lines = []
        with open(filename, 'r') as f:
            lines = [x.strip('\n') for x in f.readlines()]
        for x in lines:
            print x

    def assign_student_with_laptop(self, name):
        while True:
            choices = [self.slytherin, self.ravenclaw]
            house = random.choice(choices)
            # print house

            if house.slots_with_laptop != 0 and house.slots_taken < house.min_slots:
                house.add_student_with_laptop(name)
                return
            elif house.min_slots <= house.slots_taken < house.max_slots:
                unfilled_houses = 0
                for x in choices:
                    if x.slots_taken < x.min_slots:
                        unfilled_houses += 1
                if unfilled_houses == 0:
                    house.add_student_with_laptop(name)
                    return

    def assign_student_without_laptop(self, name):
        while True:
            choices = [self.ravenclaw, self.gryffindor, self.hufflepuff]
            house = random.choice(choices)
            # print house

            if house.slots_without_laptop != 0 and house.slots_taken < house.min_slots:
                house.add_student_without_laptop(name)
                return
            elif house.min_slots <= house.slots_taken < house.max_slots:
                unfilled_houses = 0
                for x in choices:
                    if x.slots_taken < x.min_slots:
                        unfilled_houses += 1
                if unfilled_houses == 0:
                    house.add_student_without_laptop(name)
                    return
    def train_classifier(self, rssilist, gtlist):
        # this is the main function to be used from this class
        # it trains a classifier based on the raw rssi and ground trust data
        # Inputs:
        #   rssilist - 2D list of rssi data in the raw format as rows of strings
        #   gtlist   - 2D list of ground trust data in the raw format as rows of strings
        # Outputs:
        #   trainedclassifier - sci-kit learn classifier object trained on data set
        #   self.summarymap   - map of description:value pairs that summarize training results

        # read in training data
        rssiarray, groundmatcharray = self.read_training_pair(rssilist, gtlist)

        # create house object
        self.myhouse = House(self.beaconcoordict, self.roomlist)

        # create numeric mapping (roomname: beacon coordinates) for regression
        # groundmatchnumeric = np.zeros([len(groundmatcharray), 3])
        # for i in range(len(groundmatcharray)):
        #     groundmatchnumeric[i, :] = self.beaconcoordict[groundmatcharray[i]]

        ## filter
        # no filter
        # efilt = rssiarray

        # simple LPF
        # efilt = self.moving_average_erssi_filter(rssiarray)

        # Kalman with respect to time and no prior model
        # efilt = self.kalman_time_erssi_filter(rssiarray, np.eye(6))

        # state transition matrix for time prior built from LPF coefficients
        filter_coeff = np.array([0.0525, 0.1528, 0.2947, 0.2947, 0.1528, 0.0525])
        A = np.zeros([len(filter_coeff), len(filter_coeff)])
        A[:, 0] = filter_coeff
        A[1:, 1] = filter_coeff[:5]
        A[2:, 2] = filter_coeff[:4]
        A[3:, 3] = filter_coeff[:3]
        A[4:, 4] = filter_coeff[:2]
        A[5:, 5] = filter_coeff[:1]

        # Kalman with respect to time and LPF coefficients used as prior
        # efilt = self.kalman_time_erssi_filter(rssiarray, A)

        # A priori estimated gradient adaptive filter
        # efilt = self.adaptive_grad_erssi_filter(rssiarray)

        # A priori estimated momentum adaptive filter
        # efilt = self.adaptive_momen_erssi_filter(rssiarray)

        # A priori estimated Newton adaptive filter
        # efilt = self.adaptive_newt_erssi_filter(rssiarray)

        # LMS gradient adaptive filter
        # efilt = self.LMS_grad_erssi_filter(rssiarray)

        # LMS HB adaptive filter
        # efilt = self.LMS_HB_erssi_filter(rssiarray)

        # LMS momentum adaptive filter
        efilt = self.LMS_momen_erssi_filter(rssiarray, restarts=True)

        # LMS Newtwon adaptive filter (stochastic 2nd order)
        # efilt = self.LMS_newt_erssi_filter(rssiarray)

        # RLS sliding window filter
        # efilt = self.RLS_erssi_filter(rssiarray)

        # LMS gradient adaptive filter with l1 error function
        # efilt = self.LMS_l1_grad_erssi_filter(rssiarray)

        # LMS momentum adaptive filter with l1 error function
        # efilt = self.LMS_l1_momen_erssi_filter(rssiarray)

        # state transition matrix for Kalman - power law distribution (fat tailed)
        gamma = 0.1
        A = np.array([
            [gamma, gamma**2, gamma**2, gamma**2, gamma**3, gamma**3],
            [gamma**2, gamma, gamma**2, gamma**2, gamma**2, gamma**2],
            [gamma**2, gamma**2, gamma, gamma**2, gamma**2, gamma**2],
            [gamma**2, gamma**2, gamma**2, gamma, gamma**3, gamma**3],
            [gamma**3, gamma**2, gamma**2, gamma**3, gamma, gamma**2],
            [gamma**3, gamma**2, gamma**2, gamma**3, gamma**2, gamma]
        ])
        # normalize each row
        for i in range(A.shape[0]):
            rownorm = np.linalg.norm(A[i, :], ord=1)
            for j in range(A.shape[1]):
                A[i, j] /= rownorm

        # separate training and test sets
        SETBREAK1 = len(self.timearray)/3
        SETBREAK2 = 2*len(self.timearray)/3

        t0 = self.timearray[:SETBREAK1]
        e0 = efilt[:SETBREAK1, :]
        g0 = groundmatcharray[:SETBREAK1]
        #g0n= groundmatchnumeric[:SETBREAK1, :]

        t1 = self.timearray[SETBREAK1:SETBREAK2]
        e1 = efilt[SETBREAK1:SETBREAK2, :]
        g1 = groundmatcharray[SETBREAK1:SETBREAK2]
        #g1n= groundmatchnumeric[SETBREAK1:SETBREAK2, :]

        t2 = self.timearray[SETBREAK2:]
        e2 = efilt[SETBREAK2:, :]
        g2 = groundmatcharray[SETBREAK2:]
        #g2n= groundmatchnumeric[SETBREAK2:, :]

        tset = [t0, t1, t2]
        eset = [e0, e1, e2]
        gset = [g0, g1, g2]
        #gsetn= [g0n, g1n, g2n]

        # create error dictionary with mapping {errorname: [(error value 1, trained classifier 1), ...]}
        errordict = {'logistic regression': [], 'SVMl': [], 'SVMr': [], 'LDA': [], 'decision tree': [],
                     'random forest': [], 'extra trees': [], 'ada boosting': [], 'gradient boosting': [],
                     'filtered RSSI error': [], 'KNN': [], 'KNNb': []}

        # iterate over each fold of cross validation
        for idx in range(len(tset)):

            print "Starting iteration " + str(idx+1) + " of 3"

            ttrain = np.hstack((tset[idx], tset[(idx+1) % (len(tset))]))
            ttest  = tset[(idx+2) % (len(tset))]
            etrain = np.vstack((eset[idx], eset[(idx+1) % (len(eset))]))
            etest  = eset[(idx+2) % (len(eset))]
            gtrain = np.hstack((gset[idx], gset[(idx+1) % (len(gset))]))
            gtest  = gset[(idx+2) % (len(gset))]
            #gtrainn= np.vstack((gsetn[idx], gsetn[(idx+1) % (len(gsetn))]))
            #gtestn = gsetn[(idx+2) % (len(gsetn))]

            # find WPL functionals
            ecoortrain = self.myhouse.get_coor_from_rssi(etrain)
            ecoortest = self.myhouse.get_coor_from_rssi(etest)

            # create total data vector + functionals
            etotaltrain = np.hstack([etrain, ecoortrain])
            etotaltest = np.hstack([etest,  ecoortest])

            # # standardize features by removing mean and scaling to unit variance
            # etotaltrain = StandardScaler().fit_transform(etotaltrain)
            # etotaltest = StandardScaler().fit_transform(etotaltest)

            # grab highest rssi as indicator of current room for baseline comparison
            efcompare = self.get_highest_rssi(etest)

            # logistic regression
            logclf = linear_model.LogisticRegression().fit(etotaltrain, gtrain)
            logepredict = logclf.predict(etotaltest)

            # SVM with hyperparameter optimization
            #C_range = np.logspace(-2, 10, 13)
            #gamma_range = np.logspace(-9, 3, 13)
            #param_grid = dict(gamma=gamma_range, C=C_range)
            #grid = GridSearchCV(svm.SVC(kernel='rbf'), param_grid=param_grid)
            #svmrclf = grid.fit(etotaltrain, gtrain)
            svmrclf = svm.SVC(kernel='rbf', C=100, gamma=10**-6).fit(etotaltrain, gtrain)
            svmrepredict = svmrclf.predict(etotaltest)

            #print grid.best_params_

            # SVM with linear kernel
            # svmlclf = svm.SVC(kernel='linear').fit(etotaltrain, gtrain)
            # svmlepredict = svmlclf.predict(etotaltest)

            # LDA
            ldaclf = LDA(solver='svd').fit(etotaltrain, gtrain)
            ldaepredict = ldaclf.predict(etotaltest)

            # decision tree - seems to work very well; extensions below
            dtclf = tree.DecisionTreeClassifier().fit(etotaltrain, gtrain)
            dtepredict = dtclf.predict(etotaltest)

            # random forest (ensemble of decision trees)
            rfclf = RandomForestClassifier(n_estimators=1000, criterion='entropy', bootstrap=False, n_jobs=-1).fit(etotaltrain, gtrain)
            rfepredict = rfclf.predict(etotaltest)

            # extra trees (ensemble of decision trees)
            etclf = ExtraTreesClassifier(n_estimators=1000, n_jobs=-1).fit(etotaltrain, gtrain)
            etepredict = etclf.predict(etotaltest)

            # AdaBoost (ensemble of decision trees)
            abclf = AdaBoostClassifier(n_estimators=100, learning_rate=0.075).fit(etotaltrain, gtrain)
            abepredict = abclf.predict(etotaltest)

            # gradient boost (ensemble of decision trees)
            # n_estf=np.logspace(0,3,6)
            # n_esti=[int(a) for a in n_estf]
            # hyperparam = dict(n_estimators=n_esti, learning_rate=np.logspace(-3,0,6))
            # hypergrid = GridSearchCV(GradientBoostingClassifier(), hyperparam)
            # gbclf = hypergrid.fit(etotaltrain, gtrain)
            gbclf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.075).fit(etotaltrain, gtrain)
            gbepredict = gbclf.predict(etotaltest)
            #print hypergrid.best_params_

            # LASSO - READ: tested LASSO and removed because no gain and requires second step in testing to pin to beacon
            # lsclf = linear_model.Lasso(alpha=10).fit(etotaltrain, gtrainn)
            # lsenpredict = lsclf.predict(etotaltest)
            # # works with numeric values -- pin result to closest beacon
            # lsenclf = neighbors.KNeighborsClassifier(n_neighbors=1).fit(self.beaconcoordict.values(), self.beaconcoordict.keys())
            # lsepredict = lsenclf.predict(lsenpredict)

            # k-nearest neighbors
            knnclf = neighbors.KNeighborsClassifier(n_neighbors=10).fit(etotaltrain, gtrain)
            knnepredict = knnclf.predict(etotaltest)

            # k-nearest neighbors
            knnbclf = BaggingClassifier(neighbors.KNeighborsClassifier(n_neighbors=10), n_estimators=100, n_jobs=-1).fit(etotaltrain, gtrain)
            knnbepredict = knnbclf.predict(etotaltest)

            # determine error of all classifiers
            logecount = 0
            srcount = 0
            slcount = 0
            ldacount = 0
            dtcount = 0
            rfcount = 0
            etcount = 0
            abcount = 0
            gbcount = 0
            efcount = 0
            knncount = 0
            knnbcount = 0
            for i in range(len(svmrepredict)):
                #print (gtest[i], ecompare[i], svm1epredict[i], svm2epredict[i], ldaepredict[i], dtepredict[i])
                #print (gtest[i], ecompare[i], svm1epredict[i])
                if efcompare[i] != gtest[i]:
                    efcount = efcount + 1
                if logepredict[i] != gtest[i]:
                    logecount = logecount + 1
                if svmrepredict[i] != gtest[i]:
                    srcount = srcount + 1
                #if svmlepredict[i] != gtest[i]:
                    #slcount = slcount + 1
                if ldaepredict[i] != gtest[i]:
                    ldacount = ldacount + 1
                if dtepredict[i] != gtest[i]:
                    dtcount = dtcount + 1
                if rfepredict[i] != gtest[i]:
                    rfcount = rfcount + 1
                if etepredict[i] != gtest[i]:
                    etcount = etcount + 1
                if abepredict[i] != gtest[i]:
                    abcount = abcount + 1
                if gbepredict[i] != gtest[i]:
                    gbcount = gbcount + 1
                if knnepredict[i] != gtest[i]:
                    knncount = knncount + 1
                if knnbepredict[i] != gtest[i]:
                    knnbcount = knnbcount + 1

            errordict['logistic regression'].append(float(logecount) / len(gtest))
            errordict['SVMr'].append(float(srcount) / len(gtest))
            #errordict['SVMl'].append(float(slcount) / len(gtest))
            errordict['LDA'].append(float(ldacount) / len(gtest))
            errordict['decision tree'].append(float(dtcount) / len(gtest))
            errordict['random forest'].append(float(rfcount) / len(gtest))
            errordict['extra trees'].append(float(etcount) / len(gtest))
            errordict['ada boosting'].append(float(abcount) / len(gtest))
            errordict['gradient boosting'].append(float(gbcount) / len(gtest))
            errordict['KNN'].append(float(knncount) / len(gtest))
            errordict['KNNb'].append(float(knnbcount) / len(gtest))
            errordict['filtered RSSI error'].append(float(efcount) / len(gtest))

        # average over cross validation values
        ferrordict = {}
        for key in errordict.keys():
            ferrordict[key] = np.mean(errordict[key])


        # determine minimal error classifier and store summary data
        sorted_error = sorted(ferrordict.items(), key=operator.itemgetter(1))
        # sorted_error is tuple sorted low to high by error rate so that the first element is the one with minimum error

        # create summary
        self.summarymap['size of training set'] = len(ttrain)
        self.summarymap['size of test set'] = len(ttest)
        self.summarymap['classifier used'] = sorted_error[0][0]
        self.summarymap['classifier error'] = sorted_error[0][1]
        self.summarymap['filtered RSSI error'] = ferrordict['filtered RSSI error']
        self.summarymap['error dictionary'] = ferrordict

        # retrain on all of the data (i.e., include test set)
        ttrain = self.timearray
        etrain = efilt
        gtrain = groundmatcharray

        # find WPL functionals
        self.myhouse = House(self.beaconcoordict, self.roomlist)
        ecoortrain = self.myhouse.get_coor_from_rssi(etrain)

        # create total data vector + functionals
        etotaltrain = np.hstack([etrain, ecoortrain])

        print "Training final classifier"

        trainedclassifier = {
            'logistic regression': linear_model.LogisticRegression().fit(etotaltrain, gtrain),
            'SVMr': svm.SVC(kernel='rbf').fit(etotaltrain, gtrain),
            #'SVMl': svm.SVC(kernel='linear').fit(etotaltrain, gtrain),
            'LDA': LDA(solver='svd').fit(etotaltrain, gtrain),
            'decision tree': tree.DecisionTreeClassifier().fit(etotaltrain, gtrain),
            'random forest': RandomForestClassifier(n_estimators=100, criterion='entropy', bootstrap=False, n_jobs=-1).fit(etotaltrain, gtrain),
            'extra trees': ExtraTreesClassifier(n_estimators=100, n_jobs=-1).fit(etotaltrain, gtrain),
            'ada boosting': AdaBoostClassifier(n_estimators=100, learning_rate=0.075).fit(etotaltrain, gtrain),
            'gradient boosting': GradientBoostingClassifier(n_estimators=100, learning_rate=0.075).fit(etotaltrain, gtrain),
            'KNN': neighbors.KNeighborsClassifier(n_neighbors=10).fit(etotaltrain, gtrain),
            'KNNb': BaggingClassifier(neighbors.KNeighborsClassifier(n_neighbors=10), n_estimators=100, n_jobs=-1).fit(etotaltrain, gtrain),
            'filtered RSSI error': NaiveRSSIClassifier(roomlist=self.roomlist).fit(etotaltrain, gtrain)
        }[self.summarymap['classifier used']]

        self.summarymap['classifier size'] = getsizeof(trainedclassifier)

        #
        # with open('groundmatcharray.txt', 'wb') as file:
        #     for room in groundmatcharray:
        #         file.write('%s\n' % room)
        #
        # np.savetxt('rssiarray.txt', rssiarray, fmt='%.d', delimiter=',')

        # trainedclassifier = linear_model.LogisticRegression().fit(etotaltrain, gtrain)

        print "classifier used", self.summarymap['classifier used']
        print "classifier error", self.summarymap['classifier error']

        return trainedclassifier, self.summarymap
class TrainingRoomEstimator(RoomEstimator):

    def __init__(self, beaconcoordict):
        # constructor
        super(TrainingRoomEstimator, self).__init__()
        # beaconcoordict = dictionary mapping room names to (x,y,z) tuples with location
        self.beaconcoordict = beaconcoordict
        # summarymap = map for description:value containing summary results from training
        self.summarymap = {}
        # myhouse = house object for WPL functionals
        self.myhouse = None

    def get_highest_rssi(self, erssiarray):
        # create an array where the room is estimated from the highest rssi value
        # erssiarray = estimote rssi array (d data points by n beacons)
        # output = n x 1 array of expected rooms from each data point
        eroom = []
        for erssi in erssiarray:
            maxerssi = -1000
            room = 0
            for i in range(0, len(erssi)):
                if erssi[i] > maxerssi:
                    maxerssi = erssi[i]
                    room = i
            eroom.append(self.roomlist[room])
        return np.array(eroom)

    def read_ground_match_single_point_window(self, earray, gtlist):
        """
        read the ground truth list where each entry is (roomname, starttime, endtime)
        create list of ground truth values at matching times for the estimote rssi array
        using self.timearray
        :param erray: estimote array at all initially collected values
        :param gtlist: ground truth as list of (roomname, starttime, endtime) values
        :return: ground truth at times matching estimote rssi array and estimote array of actual values used
        """

        print "DEBUG: earray", earray
        print "DEBUG: gtlist", gtlist

        gformat = []
        for row in gtlist:
            # [room, start, end] -> [[start, room], [end, room]]
            gformat.append([row[1], row[0]])
            gformat.append([row[2], row[0]])

        rawarray = np.array(gformat)

        gtimer = []
        for timestamp in rawarray[:, 0]:
            graw = self._get_time(timestamp)
            gtimer.append(graw)
        gtime = np.array(gtimer)

        # get ground truth at matching times
        groomarray = []
        gcounter = 0  # marks progression in ground truth array
        i = 0
        while i < len(self.timearray):
            # define bounds within the ground truth is known
            gtimelower = gtime[gcounter]
            if self.timearray[i] > (int(rawarray[-1, 0]) - int(rawarray[0, 0])):
                earray[i-1:-1, :].fill(-1)
                break
            elif gcounter < len(gtime):
                gtimeupper = gtime[gcounter+1]
            else:
                 gtimeupper = maxint
            # append NULL if before the first ground truth has been acquired
            if gcounter == 0 and self.timearray[i] < gtimelower:
                groomarray.append('NULL')
            elif self.timearray[i] >= gtimeupper:
                # increase bounding box once self.timearray has progressed past it
                # cannot simply increment because estimote may miss a whole room
                # if the watch wearer just walks through
                for j in range(gcounter, len(gtime)):
                    if gtime[j] > self.timearray[i]:
                        gcounter = j-1
                        break
                # check if still within window of matching ground truth room names
                if self._get_room(rawarray[gcounter, 1]) != self._get_room(rawarray[gcounter+1, 1]):
                    gtimeupper = gtime[gcounter+1]
                    while self.timearray[i] < gtimeupper:
                        # set to -1 to identify value should be removed (cannot removed yet because of indexing)
                        self.timearray[i] = -1
                        earray[i, :].fill(-1)
                        i += 1
                    gcounter += 1

                groomarray.append(self._get_room(rawarray[gcounter, 1]))
                i += 1
            else:
                groomarray.append(self._get_room(rawarray[gcounter, 1]))
                i += 1

        # remove unused values from self.timearray
        timelist = self.timearray.tolist()
        timelistf = [time for time in timelist if time != -1]
        self.timearray = np.array(timelistf)

        # remove unused values from estimote array
        elist = earray.tolist()
        erowlist = [row for row in elist if row[0] != -1]
        errayfinal = np.array(erowlist)

        # remove unused values from ground trust array
        estimoteRSSIarray, groundmatcharray = self._remove_NULL(errayfinal, groomarray)

        return estimoteRSSIarray, groundmatcharray

    def read_ground_match_windowed(self, earray, gtlist):
        # read the ground truth 2D list and create array at time points
        # matching the estimote measurement points. assumes estimote values fall within a window of ground
        # truth values with the same room name
        # Input:
        #   earray = dxr numpy array of estimote rssi values where r is number of rooms
        #   gtlist = dx2 list of strings with timestamp in first column and room in second
        # Output:
        #   groundmatcharray = numpy array of ground truth rooms at times matching the estimote data collection times
        #            in the training set -- may contain 'NULL' values at the start if the first estimote value
        #            appears before the first ground trust value
        #   estimoteRSSIarray = numpy array of estimote rssi values

        rawarray = np.array(gtlist)

        rawarray = np.vstack([rawarray, [rawarray[-1, 0], maxint]])
        gtimer = []
        for timestamp in rawarray[:, 0]:
            graw = self._get_time(timestamp)
            gtimer.append(graw)
        gtime = np.array(gtimer)

        # get ground truth at matching times
        groomarray = []
        gcounter = 0  # marks progression in ground truth array
        i = 0
        while i < len(self.timearray):
            # define bounds within the ground truth is known
            gtimelower = gtime[gcounter]
            if gcounter < len(gtime):
                gtimeupper = gtime[gcounter+1]
            else:
                 gtimeupper = maxint
            # append NULL if before the first ground truth has been acquired
            if gcounter == 0 and self.timearray[i] < gtimelower:
                groomarray.append('NULL')
            elif self.timearray[i] >= gtimeupper:
                # increase bounding box once self.timearray has progressed past it
                # cannot simply increment because estimote may miss a whole room
                # if the watch wearer just walks through
                for j in range(gcounter, len(gtime)):
                    if gtime[j] > self.timearray[i]:
                        gcounter = j-1
                        break
                # check if still within window of matching ground truth room names
                if self._get_room(rawarray[gcounter, 1]) != self._get_room(rawarray[gcounter+1, 1]):
                    gtimeupper = gtime[gcounter+1]
                    while self.timearray[i] < gtimeupper:
                        # set to -1 to identify value should be removed (cannot removed yet because of indexing)
                        self.timearray[i] = -1
                        earray[i, :].fill(-1)
                        i += 1
                    gcounter += 1

                groomarray.append(self._get_room(rawarray[gcounter, 1]))
                i += 1
            else:
                groomarray.append(self._get_room(rawarray[gcounter, 1]))
                i += 1

        # remove unused values from self.timearray
        timelist = self.timearray.tolist()
        timelistf = [time for time in timelist if time != -1]
        self.timearray = np.array(timelistf)

        # remove unused values from estimote array
        elist = earray.tolist()
        erowlist = [row for row in elist if row[0] != -1]
        errayfinal = np.array(erowlist)

        # remove unused values from ground trust array
        estimoteRSSIarray, groundmatcharray = self._remove_NULL(errayfinal, groomarray)

        return estimoteRSSIarray, groundmatcharray

    def read_ground_match_general(self, gtlist):
        # read the ground truth 2D list and create array at time points
        # matching the estimote measurement points
        # if ground truth is not yet known at an estimote measurement point,
        # the room value is set 'NULL' and removed by _remove_NULL()
        # Input:
        #   gtlist = dx2 list of strings with timestamp in first column and room in second
        # Output:
        #   groomarray = numpy array of ground truth rooms at times matching the estimote data collection times
        #                in the training set -- may contain 'NULL' values at the start if the first estimote value
        #                appears before the first ground trust value

        rawarray = np.array(gtlist)

        rawarray = np.vstack([rawarray, [rawarray[-1, 0], maxint]])
        gtimer = []
        for timestamp in rawarray[:, 0]:
            graw = self._get_time(timestamp)
            gtimer.append(graw)
        gtime = np.array(gtimer)

        # get ground truth at matching times
        groomarray = []
        gcounter = 0  # marks progression in ground truth array
        for i in range(len(self.timearray)):
            # define bounds within the ground truth is known
            gtimelower = gtime[gcounter]
            if gcounter < len(gtime):
                gtimeupper = gtime[gcounter+1]
            else:
                 gtimeupper = maxint
             # append NULL if before the first ground truth has been acquired
            if gcounter == 0 and self.timearray[i] < gtimelower:
                groomarray.append('NULL')
            elif self.timearray[i] >= gtimeupper:
                # increase bounding box once self.timearray has progressed past it
                # cannot simply increment because estimote may miss a whole room
                # if the watch wearer just walks through
                for j in range(gcounter, len(gtime)):
                    if gtime[j] > self.timearray[i]:
                        gcounter = j-1
                        break
                #groomarray[i] = self._get_room(rawarray[gcounter, 1])
                groomarray.append(self._get_room(rawarray[gcounter, 1]))
            else:
                #groomarray[i] = self._get_room(rawarray[gcounter, 1])
                groomarray.append(self._get_room(rawarray[gcounter, 1]))
                #print (groomarray[i], self.timearray[i])

        return groomarray

    def read_training_pair(self, rssilist, gtlist):
        # this method reads an rssi 2D list and the matching ground truth 2D list
        # it returns 3 numpy arrays
        #   self.timearray - array of time points when estimote measurements taken
        #   estimoteRSSIarray - matrix of estimote RSSI values (measurements x rooms)
        #   groundmatcharray - ground truth array of equal length with ground truth
        #                      value at each point in timearray

        estimoteRSSIarray = self.read_rssi_list(rssilist)
        estimoteRSSIarray, groundmatcharray = self.read_ground_match_single_point_window(estimoteRSSIarray, gtlist)

        # if data is not contained within ground trust windows, use the following two lines
        # estimoteRSSIarray = self.read_ground_match_general(gtlist)
        # estimoteRSSIarray, groundmatcharray = self._remove_NULL(estimoteRSSIarray, groundmatcharray)

        # quick sanity checking
        if len(self.timearray) == 0:
            raise ValueError('{timearray} cannot be size zero'.format(
                timearray=repr(self.timearray)))
        elif len(self.timearray) != len(groundmatcharray) or len(self.timearray) != len(estimoteRSSIarray):
            raise ValueError('timearray: {tlen} or estimoteRSSIarray: {elen} '
                'or groundmatcharray: {glen} is not the right size'.format(
                tlen=repr(len(self.timearray)), elen=len(estimoteRSSIarray),
                glen=len(groundmatcharray)))
        return estimoteRSSIarray, groundmatcharray

    def _remove_NULL(self, erssiarray, gmatcharray):
        """
        this method removes NULL values that may be at the start of the gmatcharray and
        shifts the erssiarray and self.timearray appropriately
        :param erssiarray: the numpy array of estimote rssi data
        :param gmatcharray: the ground truth list at matching time points that may contain NULL values
        :return: processed rssi, ground trust, and time arrays
        """
        idx = 0
        for room in gmatcharray:
            if room != 'NULL':
                idx = gmatcharray.index(room)
                break

        toffset = self.timearray[idx]

        rfinal = np.zeros([erssiarray.shape[0]-idx, erssiarray.shape[1]])
        gfinal = []
        tfinal = []
        for i in range(idx, len(gmatcharray)):
            rfinal[i-idx, :] = erssiarray[i, :]
            gfinal.append(gmatcharray[i])
            tfinal.append(self.timearray[i] - toffset)

        self.timearray = np.array(tfinal)
        return rfinal, gfinal


    def train_classifier(self, rssilist, gtlist):
        # this is the main function to be used from this class
        # it trains a classifier based on the raw rssi and ground trust data
        # Inputs:
        #   rssilist - 2D list of rssi data in the raw format as rows of strings
        #   gtlist   - 2D list of ground trust data in the raw format as rows of strings
        # Outputs:
        #   trainedclassifier - sci-kit learn classifier object trained on data set
        #   self.summarymap   - map of description:value pairs that summarize training results

        # read in training data
        rssiarray, groundmatcharray = self.read_training_pair(rssilist, gtlist)

        # create house object
        self.myhouse = House(self.beaconcoordict, self.roomlist)

        # create numeric mapping (roomname: beacon coordinates) for regression
        # groundmatchnumeric = np.zeros([len(groundmatcharray), 3])
        # for i in range(len(groundmatcharray)):
        #     groundmatchnumeric[i, :] = self.beaconcoordict[groundmatcharray[i]]

        ## filter
        # no filter
        # efilt = rssiarray

        # simple LPF
        # efilt = self.moving_average_erssi_filter(rssiarray)

        # Kalman with respect to time and no prior model
        # efilt = self.kalman_time_erssi_filter(rssiarray, np.eye(6))

        # state transition matrix for time prior built from LPF coefficients
        filter_coeff = np.array([0.0525, 0.1528, 0.2947, 0.2947, 0.1528, 0.0525])
        A = np.zeros([len(filter_coeff), len(filter_coeff)])
        A[:, 0] = filter_coeff
        A[1:, 1] = filter_coeff[:5]
        A[2:, 2] = filter_coeff[:4]
        A[3:, 3] = filter_coeff[:3]
        A[4:, 4] = filter_coeff[:2]
        A[5:, 5] = filter_coeff[:1]

        # Kalman with respect to time and LPF coefficients used as prior
        # efilt = self.kalman_time_erssi_filter(rssiarray, A)

        # A priori estimated gradient adaptive filter
        # efilt = self.adaptive_grad_erssi_filter(rssiarray)

        # A priori estimated momentum adaptive filter
        # efilt = self.adaptive_momen_erssi_filter(rssiarray)

        # A priori estimated Newton adaptive filter
        # efilt = self.adaptive_newt_erssi_filter(rssiarray)

        # LMS gradient adaptive filter
        # efilt = self.LMS_grad_erssi_filter(rssiarray)

        # LMS HB adaptive filter
        # efilt = self.LMS_HB_erssi_filter(rssiarray)

        # LMS momentum adaptive filter
        efilt = self.LMS_momen_erssi_filter(rssiarray, restarts=True)

        # LMS Newtwon adaptive filter (stochastic 2nd order)
        # efilt = self.LMS_newt_erssi_filter(rssiarray)

        # RLS sliding window filter
        # efilt = self.RLS_erssi_filter(rssiarray)

        # LMS gradient adaptive filter with l1 error function
        # efilt = self.LMS_l1_grad_erssi_filter(rssiarray)

        # LMS momentum adaptive filter with l1 error function
        # efilt = self.LMS_l1_momen_erssi_filter(rssiarray)

        # state transition matrix for Kalman - power law distribution (fat tailed)
        gamma = 0.1
        A = np.array([
            [gamma, gamma**2, gamma**2, gamma**2, gamma**3, gamma**3],
            [gamma**2, gamma, gamma**2, gamma**2, gamma**2, gamma**2],
            [gamma**2, gamma**2, gamma, gamma**2, gamma**2, gamma**2],
            [gamma**2, gamma**2, gamma**2, gamma, gamma**3, gamma**3],
            [gamma**3, gamma**2, gamma**2, gamma**3, gamma, gamma**2],
            [gamma**3, gamma**2, gamma**2, gamma**3, gamma**2, gamma]
        ])
        # normalize each row
        for i in range(A.shape[0]):
            rownorm = np.linalg.norm(A[i, :], ord=1)
            for j in range(A.shape[1]):
                A[i, j] /= rownorm

        # separate training and test sets
        SETBREAK1 = len(self.timearray)/3
        SETBREAK2 = 2*len(self.timearray)/3

        t0 = self.timearray[:SETBREAK1]
        e0 = efilt[:SETBREAK1, :]
        g0 = groundmatcharray[:SETBREAK1]
        #g0n= groundmatchnumeric[:SETBREAK1, :]

        t1 = self.timearray[SETBREAK1:SETBREAK2]
        e1 = efilt[SETBREAK1:SETBREAK2, :]
        g1 = groundmatcharray[SETBREAK1:SETBREAK2]
        #g1n= groundmatchnumeric[SETBREAK1:SETBREAK2, :]

        t2 = self.timearray[SETBREAK2:]
        e2 = efilt[SETBREAK2:, :]
        g2 = groundmatcharray[SETBREAK2:]
        #g2n= groundmatchnumeric[SETBREAK2:, :]

        tset = [t0, t1, t2]
        eset = [e0, e1, e2]
        gset = [g0, g1, g2]
        #gsetn= [g0n, g1n, g2n]

        # create error dictionary with mapping {errorname: [(error value 1, trained classifier 1), ...]}
        errordict = {'logistic regression': [], 'SVMl': [], 'SVMr': [], 'LDA': [], 'decision tree': [],
                     'random forest': [], 'extra trees': [], 'ada boosting': [], 'gradient boosting': [],
                     'filtered RSSI error': [], 'KNN': [], 'KNNb': []}

        # iterate over each fold of cross validation
        for idx in range(len(tset)):

            print "Starting iteration " + str(idx+1) + " of 3"

            ttrain = np.hstack((tset[idx], tset[(idx+1) % (len(tset))]))
            ttest  = tset[(idx+2) % (len(tset))]
            etrain = np.vstack((eset[idx], eset[(idx+1) % (len(eset))]))
            etest  = eset[(idx+2) % (len(eset))]
            gtrain = np.hstack((gset[idx], gset[(idx+1) % (len(gset))]))
            gtest  = gset[(idx+2) % (len(gset))]
            #gtrainn= np.vstack((gsetn[idx], gsetn[(idx+1) % (len(gsetn))]))
            #gtestn = gsetn[(idx+2) % (len(gsetn))]

            # find WPL functionals
            ecoortrain = self.myhouse.get_coor_from_rssi(etrain)
            ecoortest = self.myhouse.get_coor_from_rssi(etest)

            # create total data vector + functionals
            etotaltrain = np.hstack([etrain, ecoortrain])
            etotaltest = np.hstack([etest,  ecoortest])

            # # standardize features by removing mean and scaling to unit variance
            # etotaltrain = StandardScaler().fit_transform(etotaltrain)
            # etotaltest = StandardScaler().fit_transform(etotaltest)

            # grab highest rssi as indicator of current room for baseline comparison
            efcompare = self.get_highest_rssi(etest)

            # logistic regression
            logclf = linear_model.LogisticRegression().fit(etotaltrain, gtrain)
            logepredict = logclf.predict(etotaltest)

            # SVM with hyperparameter optimization
            #C_range = np.logspace(-2, 10, 13)
            #gamma_range = np.logspace(-9, 3, 13)
            #param_grid = dict(gamma=gamma_range, C=C_range)
            #grid = GridSearchCV(svm.SVC(kernel='rbf'), param_grid=param_grid)
            #svmrclf = grid.fit(etotaltrain, gtrain)
            svmrclf = svm.SVC(kernel='rbf', C=100, gamma=10**-6).fit(etotaltrain, gtrain)
            svmrepredict = svmrclf.predict(etotaltest)

            #print grid.best_params_

            # SVM with linear kernel
            # svmlclf = svm.SVC(kernel='linear').fit(etotaltrain, gtrain)
            # svmlepredict = svmlclf.predict(etotaltest)

            # LDA
            ldaclf = LDA(solver='svd').fit(etotaltrain, gtrain)
            ldaepredict = ldaclf.predict(etotaltest)

            # decision tree - seems to work very well; extensions below
            dtclf = tree.DecisionTreeClassifier().fit(etotaltrain, gtrain)
            dtepredict = dtclf.predict(etotaltest)

            # random forest (ensemble of decision trees)
            rfclf = RandomForestClassifier(n_estimators=1000, criterion='entropy', bootstrap=False, n_jobs=-1).fit(etotaltrain, gtrain)
            rfepredict = rfclf.predict(etotaltest)

            # extra trees (ensemble of decision trees)
            etclf = ExtraTreesClassifier(n_estimators=1000, n_jobs=-1).fit(etotaltrain, gtrain)
            etepredict = etclf.predict(etotaltest)

            # AdaBoost (ensemble of decision trees)
            abclf = AdaBoostClassifier(n_estimators=100, learning_rate=0.075).fit(etotaltrain, gtrain)
            abepredict = abclf.predict(etotaltest)

            # gradient boost (ensemble of decision trees)
            # n_estf=np.logspace(0,3,6)
            # n_esti=[int(a) for a in n_estf]
            # hyperparam = dict(n_estimators=n_esti, learning_rate=np.logspace(-3,0,6))
            # hypergrid = GridSearchCV(GradientBoostingClassifier(), hyperparam)
            # gbclf = hypergrid.fit(etotaltrain, gtrain)
            gbclf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.075).fit(etotaltrain, gtrain)
            gbepredict = gbclf.predict(etotaltest)
            #print hypergrid.best_params_

            # LASSO - READ: tested LASSO and removed because no gain and requires second step in testing to pin to beacon
            # lsclf = linear_model.Lasso(alpha=10).fit(etotaltrain, gtrainn)
            # lsenpredict = lsclf.predict(etotaltest)
            # # works with numeric values -- pin result to closest beacon
            # lsenclf = neighbors.KNeighborsClassifier(n_neighbors=1).fit(self.beaconcoordict.values(), self.beaconcoordict.keys())
            # lsepredict = lsenclf.predict(lsenpredict)

            # k-nearest neighbors
            knnclf = neighbors.KNeighborsClassifier(n_neighbors=10).fit(etotaltrain, gtrain)
            knnepredict = knnclf.predict(etotaltest)

            # k-nearest neighbors
            knnbclf = BaggingClassifier(neighbors.KNeighborsClassifier(n_neighbors=10), n_estimators=100, n_jobs=-1).fit(etotaltrain, gtrain)
            knnbepredict = knnbclf.predict(etotaltest)

            # determine error of all classifiers
            logecount = 0
            srcount = 0
            slcount = 0
            ldacount = 0
            dtcount = 0
            rfcount = 0
            etcount = 0
            abcount = 0
            gbcount = 0
            efcount = 0
            knncount = 0
            knnbcount = 0
            for i in range(len(svmrepredict)):
                #print (gtest[i], ecompare[i], svm1epredict[i], svm2epredict[i], ldaepredict[i], dtepredict[i])
                #print (gtest[i], ecompare[i], svm1epredict[i])
                if efcompare[i] != gtest[i]:
                    efcount = efcount + 1
                if logepredict[i] != gtest[i]:
                    logecount = logecount + 1
                if svmrepredict[i] != gtest[i]:
                    srcount = srcount + 1
                #if svmlepredict[i] != gtest[i]:
                    #slcount = slcount + 1
                if ldaepredict[i] != gtest[i]:
                    ldacount = ldacount + 1
                if dtepredict[i] != gtest[i]:
                    dtcount = dtcount + 1
                if rfepredict[i] != gtest[i]:
                    rfcount = rfcount + 1
                if etepredict[i] != gtest[i]:
                    etcount = etcount + 1
                if abepredict[i] != gtest[i]:
                    abcount = abcount + 1
                if gbepredict[i] != gtest[i]:
                    gbcount = gbcount + 1
                if knnepredict[i] != gtest[i]:
                    knncount = knncount + 1
                if knnbepredict[i] != gtest[i]:
                    knnbcount = knnbcount + 1

            errordict['logistic regression'].append(float(logecount) / len(gtest))
            errordict['SVMr'].append(float(srcount) / len(gtest))
            #errordict['SVMl'].append(float(slcount) / len(gtest))
            errordict['LDA'].append(float(ldacount) / len(gtest))
            errordict['decision tree'].append(float(dtcount) / len(gtest))
            errordict['random forest'].append(float(rfcount) / len(gtest))
            errordict['extra trees'].append(float(etcount) / len(gtest))
            errordict['ada boosting'].append(float(abcount) / len(gtest))
            errordict['gradient boosting'].append(float(gbcount) / len(gtest))
            errordict['KNN'].append(float(knncount) / len(gtest))
            errordict['KNNb'].append(float(knnbcount) / len(gtest))
            errordict['filtered RSSI error'].append(float(efcount) / len(gtest))

        # average over cross validation values
        ferrordict = {}
        for key in errordict.keys():
            ferrordict[key] = np.mean(errordict[key])


        # determine minimal error classifier and store summary data
        sorted_error = sorted(ferrordict.items(), key=operator.itemgetter(1))
        # sorted_error is tuple sorted low to high by error rate so that the first element is the one with minimum error

        # create summary
        self.summarymap['size of training set'] = len(ttrain)
        self.summarymap['size of test set'] = len(ttest)
        self.summarymap['classifier used'] = sorted_error[0][0]
        self.summarymap['classifier error'] = sorted_error[0][1]
        self.summarymap['filtered RSSI error'] = ferrordict['filtered RSSI error']
        self.summarymap['error dictionary'] = ferrordict

        # retrain on all of the data (i.e., include test set)
        ttrain = self.timearray
        etrain = efilt
        gtrain = groundmatcharray

        # find WPL functionals
        self.myhouse = House(self.beaconcoordict, self.roomlist)
        ecoortrain = self.myhouse.get_coor_from_rssi(etrain)

        # create total data vector + functionals
        etotaltrain = np.hstack([etrain, ecoortrain])

        print "Training final classifier"

        trainedclassifier = {
            'logistic regression': linear_model.LogisticRegression().fit(etotaltrain, gtrain),
            'SVMr': svm.SVC(kernel='rbf').fit(etotaltrain, gtrain),
            #'SVMl': svm.SVC(kernel='linear').fit(etotaltrain, gtrain),
            'LDA': LDA(solver='svd').fit(etotaltrain, gtrain),
            'decision tree': tree.DecisionTreeClassifier().fit(etotaltrain, gtrain),
            'random forest': RandomForestClassifier(n_estimators=100, criterion='entropy', bootstrap=False, n_jobs=-1).fit(etotaltrain, gtrain),
            'extra trees': ExtraTreesClassifier(n_estimators=100, n_jobs=-1).fit(etotaltrain, gtrain),
            'ada boosting': AdaBoostClassifier(n_estimators=100, learning_rate=0.075).fit(etotaltrain, gtrain),
            'gradient boosting': GradientBoostingClassifier(n_estimators=100, learning_rate=0.075).fit(etotaltrain, gtrain),
            'KNN': neighbors.KNeighborsClassifier(n_neighbors=10).fit(etotaltrain, gtrain),
            'KNNb': BaggingClassifier(neighbors.KNeighborsClassifier(n_neighbors=10), n_estimators=100, n_jobs=-1).fit(etotaltrain, gtrain),
            'filtered RSSI error': NaiveRSSIClassifier(roomlist=self.roomlist).fit(etotaltrain, gtrain)
        }[self.summarymap['classifier used']]

        self.summarymap['classifier size'] = getsizeof(trainedclassifier)

        #
        # with open('groundmatcharray.txt', 'wb') as file:
        #     for room in groundmatcharray:
        #         file.write('%s\n' % room)
        #
        # np.savetxt('rssiarray.txt', rssiarray, fmt='%.d', delimiter=',')

        # trainedclassifier = linear_model.LogisticRegression().fit(etotaltrain, gtrain)

        print "classifier used", self.summarymap['classifier used']
        print "classifier error", self.summarymap['classifier error']

        return trainedclassifier, self.summarymap
class Game():
	turns=5
	event=None
	house=None
	players=[]
	solution=[]
	fileName = None
	
	def __init__(self, filename=None, size_of_player=20, turns=10000000, players_number=20, mask_size=3, convergence=1000, mask=MaskMoveCore):
		self.convergence = convergence
		self.mask_size = mask_size
		self.mask = mask
		self.turns = turns
		self.house = House()
		if filename is not None:
			self.event = Event(filename=filename)
		else:
			self.event = Event(size=size_of_player)

		self.size_of_player = len(self.event)
		
		for index in range(players_number):
			player = Player(self.size_of_player)
			self.players.append(player)
	
	def generateMask(self):
		return self.mask(self.mask_size, self.size_of_player)
	
	def play(self):
		self.solution = self.event.getInitialSolution()
		convergence = 0
		#print "graph: "
		#print self.event
		print "iteration: -1; f(solution) = "+str(self.event.f(self.solution))
		for turn in range(self.turns):
			mask = self.generateMask()
			
			for player in self.players:
				player.calculateWeights(mask)
			
			self.house.calculateWeights(self.players)
			#print "Weights of house: "+ str(self.house.weights)
			
			for player in self.players:
				player.makeBets(self.house)
				#print "The player "+ player.name +" has "+ str(player.bankroll) +" and makes bets." + str(player.bets)
			
			result = mask.calculateBestMask(self.event, self.solution)
			#print "The result was "+ str(result["index"])
			
			for index in range(len(self.players)):
				player = self.players[index]
				if player.isWinner(result["index"]):
					player.receiveAward(self.house, result["index"])
					#print "The player "+ player.name +" receives award."
				if player.isBroken():
					#print "The player "+ player.name +" is out."
					self.players[index] = player.createNew()
					#print "The new player "+ self.players[index].name +" is in."
				#print "The player "+ player.name +" has "+ str(player.bankroll)
			
			#print "The house has "+ str(self.house.bankroll)
			if result["distance"] < self.event.f(self.solution):
				convergence = 0
				self.solution = result["solution"]
				#print "iteration: "+str(turn)+"; f(solution) = "+str(self.event.f(self.solution))
			else:
				convergence += 1
			
			
			if convergence >= self.convergence:
				break
		
		
		return self.solution
Esempio n. 13
0
def main():
    pygame.init()
    FPS = 30  # 30 frames per second
    fps_clock = pygame.time.Clock()

    # Code to create the initial window
    window_size = (800, 800)
    screen = pygame.display.set_mode(window_size)

    # set the title of the window
    pygame.display.set_caption("A House")

    #Set of Colors
    WHITE = pygame.Color(255, 255, 255)
    GOLD = pygame.Color(255, 215, 0)
    RED = pygame.Color(255, 0, 0)

    screen.fill(WHITE)
    colors_for_house = {'house': GOLD, 'roof': RED}

    house = House(200, 200, 200, colors_for_house)
    house.draw(screen)

    move_left = False;
    move_right = False;
    move_down = False;
    move_up = False;

    while True:  # <--- main game loop
        for event in pygame.event.get():
            if event.type == QUIT:  # QUIT event to exit the game
                pygame.quit()
                sys.exit()

            ################################

            if event.type == KEYDOWN:
                if event.key == K_UP:
                    move_up = True
                if event.key == K_DOWN:
                    move_down = True
                if event.key == K_LEFT:
                    move_left = True
                if event.key == K_RIGHT:
                    move_right = True
            if event.type == KEYUP:
                if event.key == K_UP:
                    move_up = False
                if event.key == K_DOWN:
                    move_down = False
                if event.key == K_LEFT:
                    move_left = False
                if event.key == K_RIGHT:
                    move_right = False

            ###############################

        if move_up:
            house.change_y(-5)
        if move_down:
            house.change_y(5)
        if move_left:
            house.change_x(-5)
        if move_right:
            house.change_x(5)

        screen.fill(WHITE)
        house.draw(screen)
        pygame.display.update()  # Update the display when all events have been processed
        fps_clock.tick(FPS)
Esempio n. 14
0
from datetime import datetime
import BeautifulSoup

from house import House
from HouseList import HouseList
from requestParams import *
from getPrice import PriceData

dateDate = datetime.today()
date = dateDate.strftime('%Y-%m-%d')
region = state_id[u'Винницкая']

h1 = House(1,10,2,100,3,date,region)
h2 = House(2,20,4,100,2,date,region)
h3 = House(3,29,2,39,1,date,region)
h4 = House(4,45,9,None,4,date,region)
h5 = House(5,3,None,45,1,date,region)
h6 = House(5,56,None,40,2,date,region)

p3 = h3.getMetGrnPrice()
p4 = h4.getMetGrnPrice()
p5 = h5.getMetGrnPrice()
p6 = h6.getMetGrnPrice()

houseList = HouseList([h1,h2,h3,h4,h5,h6])

prices = houseList.compMetPriceByRooms()
print(prices)

params['period'] = 'per_hour'