Example #1
0
    for row in reader:
        my_obj = {}
        for index, col in enumerate(row):
            my_obj[headers[index]] = col
        data.append(my_obj)

np.random.shuffle(data)
split_point = int(np.floor(len(data) * 0.9))
train = data[0:split_point]
test = data[split_point:-1]

train_datatypes = list(map(lambda item: item['datatype'], train))
train_unique_datatypes, train_unique_datatypes_count = np.unique(
    train_datatypes, return_counts=True)

print.info("Lets look at the statistics of the training dataset")
sort_index = np.argsort(-train_unique_datatypes_count)

y_pos = np.arange(len(train_unique_datatypes))

plt.bar(y_pos,
        train_unique_datatypes_count[sort_index],
        align='center',
        alpha=0.5)
plt.xticks(y_pos, train_unique_datatypes[sort_index], rotation=90)
plt.subplots_adjust(bottom=0.35)
plt.ylabel('Usage')
plt.title('Data type')

plt.show()