Beispiel #1
0
            plt.bar([a for a, b in wrong_np_num_l], [b for a, b in wrong_np_num_l])
            plt.title('Wrong NP Num', fontsize=10)

            plt.subplot(2, 3, 4)
            plt.bar([a for a, b in np_dis_l], [b for a, b in np_dis_l])
            plt.title('NP Dis', fontsize=10)
            plt.subplot(2, 3, 5)
            plt.bar([a for a, b in correct_np_dis_l], [b for a, b in correct_np_dis_l])
            plt.title('Correct NP Dis', fontsize=10)
            plt.subplot(2, 3, 6)
            plt.bar([a for a, b in wrong_np_dis_l], [b for a, b in wrong_np_dis_l])
            plt.title('Wrong NP Dis', fontsize=10)

            plt.savefig('results/data_analysis.png')


if __name__ == '__main__':
    # build data from raw OntoNotes data
    print 'Processing'
    generate_vector_data()
    generate_input_data()
    # split training data into dev and train, saved in ./data/train_data
    print 'Dividing'
    train_generator = DataGenerator("train", args.batch_size)
    train_generator.devide()
    save_f = file("./data/train_data", 'wb')
    cPickle.dump(train_generator, save_f, protocol=cPickle.HIGHEST_PROTOCOL)
    save_f.close()
    print 'Analysing'
    analysis_data(train_generator)