예제 #1
0
    def test_save_and_load_model_pmi(self):
        pmi = {('ъ', 'ь'): 0.42}

        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'model')
            save_model(path, 'pmi', pmi)
            model = load_model(path)

        self.assertEqual(len(model), 2)
        self.assertEqual(model[0], 'pmi')
        self.assertEqual(model[1], pmi)
예제 #2
0
    def run(self, raw_args=None):
        """
        Parse the given args (if these are None, default to parsing sys.argv,
        which is what you would want unless you are unit testing).
        """
        args = self.parser.parse_args(raw_args)
        output_cldf_dataset = False

        start_time = time.time()

        if args.ipa:
            sc = pyclts.SoundClasses(args.sound_class_model)

            def transform(sound):
                return sc[sound]
        else:
            transform = None

        try:
            try:
                dataset = CLDFDataset(args.dataset, transform=transform)
                output_cldf_dataset = True
            except JSONDecodeError:
                dataset = Dataset(args.dataset,
                                  args.dialect_input,
                                  transform=transform)
            algorithm, model = load_model(args.model)
        except (DatasetError, ModelError) as err:
            self.parser.error(str(err))

        print('running {} on {}, conversion={}{}{}'.format(
            args.model, args.dataset, 'ipa→' if args.ipa else '(',
            args.sound_class_model, '' if args.ipa else ')'))

        if algorithm == 'phmm':
            scores = apply_phmm(dataset, *model)
        else:
            scores = apply_pmi(dataset, model)

        clusters = cluster(dataset, scores, method=args.cluster_method)
        if output_cldf_dataset:
            write_cldf_clusters(clusters, args.output, dataset.dataset)
        else:
            write_clusters(clusters, args.output, args.dialect_output)

        if args.time:
            print('running time: {:.2f} sec'.format(time.time() - start_time))

        if args.evaluate:
            score = calc_f_score(dataset.get_clusters(), clusters)
            print('f-score: {:.4f}'.format(score))
예제 #3
0
    def test_save_and_load_model_phmm(self):
        em = np.array([0.1, 0.2])
        gx = np.array([0.3, 0.4])
        gy = np.array([0.5, 0.6])
        trans = np.array([0.7, 0.8])

        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'model')
            save_model(path, 'phmm', [em, gx, gy, trans])
            model = load_model(path)

        self.assertEqual(len(model), 2)
        self.assertEqual(model[0], 'phmm')

        for index, param in enumerate([em, gx, gy, trans]):
            self.assertTrue(type(model[1][index]) is np.ndarray)
            np.testing.assert_array_equal(model[1][index], param)
예제 #4
0
    def test_load_model_with_bad_file(self):
        with self.assertRaises(ModelError) as cm:
            load_model(os.path.abspath(__file__))

        self.assertTrue(str(cm.exception).startswith('Could not read model'))
예제 #5
0
    def test_load_model_with_bad_path(self):
        with self.assertRaises(ModelError) as cm:
            load_model('')

        self.assertTrue(str(cm.exception).startswith('Could not open model'))