Ejemplo n.º 1
0
    def test_load_and_categorise(self):
        from pynab.db import db_session, Release, Group, windowed_query
        from pickle import load

        with open('release_categoriser.pkl', 'rb') as cat_file:
            categoriser = load(cat_file)

        with db_session() as db:
            errors = []
            i = 0
            query = db.query(Release).join(Group)
            count = query.count()
            for result in windowed_query(query, Release.id, 500):
                features = extract_features(result.name)
                features['group'] = result.group.name
                features['name'] = result.name

                guess = categoriser.classify(features)
                if guess[:2] != str(result.category_id)[:2]:
                    errors.append((result.category_id, guess, features))

                i += 1
                if i % 500 == 0:
                    print('{} - {:.3f}%'.format((i/count)*100, (1 - (len(errors) / i)) * 100))

        for tag, guess, features in errors:
            print('correct={} guess={} name={}'.format(tag, guess, features['name'].encode('utf-8')))

        print('accuracy={}'.format(1 - (len(errors)/i)))
Ejemplo n.º 2
0
        def load_data(filename):
            with open(filename, encoding='utf-8') as f:
                f.readline()
                csvfile = csv.reader(f, delimiter=',', quotechar='"')
                data = []
                for line in csvfile:
                    features = extract_features(line[1])
                    features['group'] = line[2]
                    features['name'] = line[1]
                    data.append((features, line[3]))

                random.shuffle(data)

            return data