예제 #1
0
    features = []

    f = open(data_features_file, 'r+')
    for line in f:
        features.append(line.strip())
    f.close()

    return features


def get_data():
    data = np.loadtxt(fname=data_training_file, delimiter=',')
    return data


if __name__ == '__main__':

    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    if rank == p_root:
        data = get_data()
        features = get_features()
    else:
        data, features = None, None

    trees = parallel_create_random_forest(comm, rank, data, features)

    if rank == p_root:
        forest = RandomForest(trees)
        forest.dump(data_classifier_file)
예제 #2
0

def get_features():
    features = []

    f = open(data_features_file, 'r+')
    for line in f:
        features.append(line.strip())
    f.close()

    return features


if __name__ == '__main__':

    forest = RandomForest.load(forest_file)

    total_diff = 0
    errors = 0
    reviews = get_reviews()
    off_by = [0] * 5

    for review in reviews:
        answer = forest.classify(review)
        if answer != review['star']:
            diff = abs(answer - float(review['star']))
            off_by[int(diff)] += 1
            #print "Answer: %f, Star: %f, Diff: %f" %(answer, float(review['star']), diff)
            errors += 1
            total_diff += diff
예제 #3
0

def get_features():
	features = []
	
	f = open(data_features_file, 'r+')
	for line in f:
		features.append(line.strip())
	f.close()

	return features


if __name__ == '__main__':

	forest = RandomForest.load(forest_file)

	total_diff = 0
	errors = 0
	reviews = get_reviews()
	off_by = [0]*5

	for review in reviews:
		answer = forest.classify(review)
		if answer != review['star']:
			diff = abs(answer-float(review['star']))
			off_by[int(diff)] += 1
			#print "Answer: %f, Star: %f, Diff: %f" %(answer, float(review['star']), diff) 
			errors += 1
			total_diff += diff
예제 #4
0
def get_features():
	features = []
	
	f = open(data_features_file, 'r+')
	for line in f:
		features.append(line.strip())
	f.close()

	return features

def get_data():
	data = np.loadtxt(fname=data_training_file, delimiter=',')
	return data


if __name__ == '__main__':

	comm = MPI.COMM_WORLD
	rank = comm.Get_rank()

	if rank == p_root:
		data = get_data()
		features = get_features()
	else:
		data, features = None, None

	trees = parallel_create_random_forest(comm, rank, data, features)

	if rank == p_root:
		forest = RandomForest(trees)
		forest.dump(data_classifier_file)