Ejemplo n.º 1
0
def main():
    print "get data"
    data = cu.get_dataframe("train.csv")
    print "sort by creation date"
    data = data.sort_index(by="PostCreationDate")
    print "cut off"
    header = cu.get_header("train.csv")
    splits = np.array_split(data, 3)
    frames = [splits[0], splits[1]]
    train_data = pd.concat(frames)
    test_data = splits[2]
    # cutoff = datetime.datetime(2012, 7, 18)
    print "write to csv"
    cu.write_sample("train_data.csv", header, train_data)
    train_data.to_csv(os.path.join(cu.data_path, "train_data.csv"), index=False, header=header)
    test_data.to_csv(os.path.join(cu.data_path, "test_data.csv"), index=False, header=header)
def main():
    header, sample = sample_train(os.path.join(cu.data_path, "train.csv"))
    cu.write_sample("train-sample1.csv", header, sample)

    header, sample = sample_train(os.path.join(cu.data_path, "train-A.csv"))
    cu.write_sample("train-A-sample1.csv", header, sample)
def main():
    print("Reading the data", train_file)
    header = cu.get_header(train_file)

    records = cu.get_lines(train_file, lines)
    cu.write_sample(output_file, header, records)
Ejemplo n.º 4
0
def main():
    header, sample = sample_train(os.path.join(cu.data_path, "train.csv"))
    cu.write_sample("train-sample1.csv", header, sample)

    header, sample = sample_train(os.path.join(cu.data_path, "train-A.csv"))
    save_sample("train-A-sample1.csv", header, sample)
def main():
    # header, sample = sample_train("train.csv")
    # cu.write_sample("train-sample1.csv", header, sample)
    header, sample = sample_train("train_data.csv")
    cu.write_sample("train-sample.csv", header, sample)
def main():
    # header, sample = sample_train("train.csv")
    # cu.write_sample("train-sample1.csv", header, sample)
    header, sample = sample_train("train_data.csv")
    cu.write_sample("train-sample.csv", header, sample)