def read_data(filename): with open(filename) as f: samples = [] target = [] for line in f: line = line.strip().split() target = int(line[0]) sample = [float(x) for x in line[1:]] samples.append(sample) return samples, target def write_delimited_file(filename, data, labels): with open(filename, "w") as f: for line, label in zip(data, labels): f.write(label + " ".join(line) + "\n") if __name__ == '__main__': homeDir = os.environ['HOME'] assert(homeDir is not None) train, target = read_data(homeDir + "/out.cvs") test, _ = read_data(homeDir + "/test") rf = RandomForestClassifier(n_estimators=100, min_split=2) rf.fit(train, target) predicted_probs = rf.predicted_proba(test) predicted_probs = ["%f" % x[1] for x in predicted_probs] write_delimited_file("./result.cvs", predicted_probs)