for row in reader: my_obj = {} for index, col in enumerate(row): my_obj[headers[index]] = col data.append(my_obj) np.random.shuffle(data) split_point = int(np.floor(len(data) * 0.9)) train = data[0:split_point] test = data[split_point:-1] train_datatypes = list(map(lambda item: item['datatype'], train)) train_unique_datatypes, train_unique_datatypes_count = np.unique( train_datatypes, return_counts=True) print.info("Lets look at the statistics of the training dataset") sort_index = np.argsort(-train_unique_datatypes_count) y_pos = np.arange(len(train_unique_datatypes)) plt.bar(y_pos, train_unique_datatypes_count[sort_index], align='center', alpha=0.5) plt.xticks(y_pos, train_unique_datatypes[sort_index], rotation=90) plt.subplots_adjust(bottom=0.35) plt.ylabel('Usage') plt.title('Data type') plt.show()