コード例 #1
0
ファイル: onlineMLC.py プロジェクト: pi224/TopkBoostingMLR
	def update(self, Y, X=None, verbose=False):
		# TODO: change this to use topkY feedback set
		'''Runs the entire updating procedure, updating interal 
		tracking of wl_weights and expert_weights
		Args:
			X (list): A list of the covariates of the current data point. 
					  Float for numerical, string for categorical. Categorical 
					  data must have been in initial dataset. If not given
					  the last X used for prediction will be used.
			Y (string): The true class
		'''

		if X is None:
			X = self.X

		self.X = np.array(X)
		Ystr = reduce(lambda x,y:x+y, Y)
		Yset = utils.str_to_set(Ystr)
		self.Y = Yset
		self.num_data +=1
		self.cum_error += utils.rank_loss(self.Yhat, Yset)
		expert_votes = np.zeros(self.num_classes)
		cost_vec = self.compute_cost(expert_votes, 0)
			
		for i in xrange(self.num_wls):
			alpha = self.wl_weights[i]
			w = self.weight_consts[i]
			# if self.loss == 'zero_one':
			#     w *= 5
			data_indices = self.data_indices[i]
			_max = max(cost_vec)
			for l in Yset:
				full_inst = self.make_full_instance(self.X[data_indices], l)
				full_inst.set_weight(w*(_max - cost_vec[l]))
				self.weaklearners[i].update_classifier(full_inst) 

			if verbose is True:
				print i, _max - min(cost_vec)

			# updating the quality weights and weighted vote vector
			expert_votes = self.expert_votes_mat[i,:]
			cost_vec = self.compute_cost(expert_votes, i+1)
			self.wl_weights[i] = \
								self.update_alpha(cost_vec, i, alpha)
			if self.loss == 'logistic':
				self.expert_weights[i] *= \
								np.exp(-utils.rank_loss(expert_votes, Yset) \
								* self.exp_step_size)

		self.expert_weights = self.expert_weights/sum(self.expert_weights)
コード例 #2
0
ファイル: batchMLC.py プロジェクト: pi224/TopkBoostingMLR
 def record_losses(self, cum_votes):
     exp_sum = 0.0
     rank_sum = 0.0
     for t in xrange(self.num_data):
         s = cum_votes[t]
         Y = self.class_sets[t]
         exp_sum += utils.univ_logistic_loss(s, Y)
         rank_sum += utils.rank_loss(s, Y)
     self.exp_losses.append(exp_sum/self.num_data)
     self.rank_losses.append(rank_sum/self.num_data)
コード例 #3
0
ファイル: batchMLC.py プロジェクト: pi224/TopkBoostingMLR
    def get_test_results(self, num_wls=0):
        if num_wls == 0:
            num_wls = self.num_wls
        cum_votes = np.zeros((self.test_num_data, self.num_classes))
        for i in xrange(num_wls):
            alpha = self.wl_weights[i]
            wl = self.weaklearners[i]
            preds = wl.predict_proba(self.test_data[:,self.data_indices[i]])
            for t in xrange(self.test_num_data):
                cum_votes[t] += alpha*preds[t]

        exp_sum = 0.0
        rank_sum = 0.0
        for t in xrange(self.test_num_data):
            s = cum_votes[t]
            Y = self.test_class_sets[t]
            exp_sum += utils.exp_loss(s, Y)
            rank_sum += utils.rank_loss(s, Y)
        exp_sum /= self.test_num_data
        rank_sum /= self.test_num_data
        return exp_sum, rank_sum
コード例 #4
0
ファイル: testtopkMLC.py プロジェクト: pi224/TopkBoostingMLR
cum_error = 0
cum_unnorm_error = 0

model.verbose = True

for row in test_rows:
    X = row[:class_index]
    Y = row[class_index:]
    pred = model.predict(X)

    topk = utils.topk(pred, k)
    Yset = utils.label_array_to_set(Y)
    topkRel = topk.intersection(Yset)
    model.update(topkRel)

    cum_error += utils.rank_loss(pred, Yset)
    cum_unnorm_error += utils.unnormalized_rank_loss(pred, Yset)
    total_unnorm_error += utils.unnormalized_rank_loss(pred, Yset)
    history_counter += 1.0
    error_history.append(total_unnorm_error / history_counter)

end = time.time()
print 'Training time', mid - start
print 'Test time', end - mid
if loss == 'zero_one':
    print 'cache hit percentage:', float(model.cache_hits) / float(
        model.potential_calls)
print 'Average rank loss', round(cum_error / float(len(test_rows)), 4)
print 'Average unnormalized rank loss', round(
    cum_unnorm_error / float(len(test_rows)), 4)
コード例 #5
0
def main():
    seed = np.random.randint(1, 999)
    # Read params.csv file and parse the options
    params = utils.read_params()
    loss = params['loss']
    data_source = params['data_source']
    num_wls = int(params['num_wls'])
    num_covs = int(params['num_covs'])
    M = int(params['M'])
    gamma = params['gamma']

    # Load the train data
    fp = utils.get_filepath(data_source, 'train')
    data = arff.load(open(fp, 'rb'))
    class_index, _, _ = utils.parse_attributes(data)
    train_rows = data['data']

    # Load the test data
    fp = utils.get_filepath(data_source, 'test')
    data = arff.load(open(fp, 'rb'))
    test_rows = data['data']

    start = time.time()

    model = AdaOLMR(data_source, loss=loss, num_covs=num_covs, gamma=gamma)
    model.M = M
    model.gen_weaklearners(num_wls,
                           min_grace=5,
                           max_grace=20,
                           min_tie=0.01,
                           max_tie=0.9,
                           min_conf=0.01,
                           max_conf=0.9,
                           min_weight=3,
                           max_weight=10,
                           seed=seed)

    for i, row in enumerate(train_rows):
        X = row[:class_index]
        Y = row[class_index:]
        pred = model.predict(X)
        model.update(Y)

    cum_error = 0

    for i, row in enumerate(test_rows):
        X = row[:class_index]
        Y = row[class_index:]
        pred = model.predict(X)
        model.update(Y)
        cum_error += utils.rank_loss(pred, model.Y)

    end = time.time()
    runtime = round(end - start, 2)
    avg_loss = round(cum_error / float(len(test_rows)), 4)

    print 'data_source', data_source
    print 'loss', loss
    print 'gamma', gamma
    print 'num_wls', num_wls
    print 'num_covs', num_covs
    print 'M', M
    print 'seed', seed
    print 'runtime', runtime
    print 'avg_loss', avg_loss