features = [] f = open(data_features_file, 'r+') for line in f: features.append(line.strip()) f.close() return features def get_data(): data = np.loadtxt(fname=data_training_file, delimiter=',') return data if __name__ == '__main__': comm = MPI.COMM_WORLD rank = comm.Get_rank() if rank == p_root: data = get_data() features = get_features() else: data, features = None, None trees = parallel_create_random_forest(comm, rank, data, features) if rank == p_root: forest = RandomForest(trees) forest.dump(data_classifier_file)
def get_features(): features = [] f = open(data_features_file, 'r+') for line in f: features.append(line.strip()) f.close() return features if __name__ == '__main__': forest = RandomForest.load(forest_file) total_diff = 0 errors = 0 reviews = get_reviews() off_by = [0] * 5 for review in reviews: answer = forest.classify(review) if answer != review['star']: diff = abs(answer - float(review['star'])) off_by[int(diff)] += 1 #print "Answer: %f, Star: %f, Diff: %f" %(answer, float(review['star']), diff) errors += 1 total_diff += diff
def get_features(): features = [] f = open(data_features_file, 'r+') for line in f: features.append(line.strip()) f.close() return features if __name__ == '__main__': forest = RandomForest.load(forest_file) total_diff = 0 errors = 0 reviews = get_reviews() off_by = [0]*5 for review in reviews: answer = forest.classify(review) if answer != review['star']: diff = abs(answer-float(review['star'])) off_by[int(diff)] += 1 #print "Answer: %f, Star: %f, Diff: %f" %(answer, float(review['star']), diff) errors += 1 total_diff += diff
def get_features(): features = [] f = open(data_features_file, 'r+') for line in f: features.append(line.strip()) f.close() return features def get_data(): data = np.loadtxt(fname=data_training_file, delimiter=',') return data if __name__ == '__main__': comm = MPI.COMM_WORLD rank = comm.Get_rank() if rank == p_root: data = get_data() features = get_features() else: data, features = None, None trees = parallel_create_random_forest(comm, rank, data, features) if rank == p_root: forest = RandomForest(trees) forest.dump(data_classifier_file)