Exemple #1
0
def run_train_data(train_data_csv):
    aristo_train_data = AristoData(train_data_csv, range(0,2000))
    aristo_test_data = AristoData(train_data_csv, range(100,110))
    aristo_test_data.print_summary()
    aristo_train_data.print_summary()
    pipeline = SimilarityPipeline(train_data=aristo_train_data, test_data=aristo_test_data)
    pipeline.run_pipeline()
    out_dir=os.path.join(os.path.dirname(__file__),"../../../outputdata/train_{}".format(time.strftime('%Y%m%d_%H%M%S')))

    os.makedirs(out_dir)
    pipeline.write_to_disk((out_dir))
def run_test_data(data_csv):
    out_dir=os.path.join(os.path.dirname(__file__),"../../../outputdata/test_{}".format(time.strftime('%Y%m%d_%H%M%S')))
    os.makedirs(out_dir)
    logger = setup_log(out_dir)
    aristo_data = AristoData(data_csv)

    aristo_data.print_summary()
    pipeline = SolrWikipediaAllAnswerThenQuestionPipeline(data=aristo_data, logger = logger)
    pipeline.run_pipeline()


    pipeline.write_to_disk((out_dir))
    print(pipeline.score())
class IntegrationTestAristoData(unittest.TestCase):
    def setUp(self):
        data_file_path = os.path.join(os.path.dirname(__file__), "../../../inputdata/training_set.tsv")
        print(os.path.abspath(data_file_path))
        self._aristo_data = AristoData(data_file_path)

    def test_should_print_summary(self):
        self._aristo_data.print_summary()

    def test_should_get_x(self):
        self.assertEqual(len(self._aristo_data.x.columns), 5,
                         "The expected number of columns does not match the actual")

    def test_should_get_y_columns(self):
        self.assertEqual(len(self._aristo_data.y.columns), 1,
                         "The expected number of columns does not match the actual")

    def test_should_get_all_questions_as_raw(self):
        self.assertTrue(type(self._aristo_data.get_all_questions_as_raw()) is str)

    def test_should_get_all_questions_as_list(self):
        self.assertEqual(len(self._aristo_data.get_all_questions_answers_as_list()),2500*5)