Esempio n. 1
0
def main():
    FORMAT = '%(asctime)s %(levelname)s %(message)s'
    logging.basicConfig(format=FORMAT)
    logging.getLogger().setLevel(logging.INFO)
    args = parse_args()
    lang_map = {i: fn for i, fn in enumerate(sorted(listdir(args.lang_map)))}
    if args.train.endswith('.mtx'):
        mtx = mmread(args.train).todense()
        t_mtx = mmread(args.test).todense()
    else:
        with open(args.train) as stream:
            mtx = np.loadtxt(stream, np.float64)
        with open(args.test) as stream:
            t_mtx = np.loadtxt(stream, np.float64)
    labels = np.ravel(mtx[:, 0])
    test_labels = t_mtx[:, 0]
    test_mtx = t_mtx[:, 1:]
    if args.scale:
        train = scale(mtx[:, 1:], with_mean=False)
    else:
        train = mtx[:, 1:]
    kwargs = {}
    for a in args.params:
        k, v = a.split('=')
        try:
            v = int(v)
        except:
            pass
        kwargs[k] = v
    r = Representation(args.encoder, args.classifier, **kwargs)
    r.encode(train)
    logging.info('Matrix encoded')
    r.train_classifier(labels)
    logging.info('Model trained')
    acc = 0
    N = 0
    for vec_ in test_mtx:
        vec = np.ravel(vec_)
        cl = r.classify_vector(vec, with_probs=args.with_probs)
        try:
            lab = test_labels[N, 0]
        except IndexError:
            lab = test_labels[N]
        N += 1
        if args.with_probs:
            guess = max(enumerate(cl[0, :]), key=lambda x: x[1])[0]
            print('{0}\t{1}\t{2}'.format('\t'.join(map(str, cl[0, :])), lang_map[guess], lang_map[int(lab)]))
        else:
            try:
                guess = int(cl[0, 0])
            except IndexError:
                guess = int(cl + 0.5)
            print('{0}\t{1}'.format(lang_map[guess], lang_map[int(lab)]))
        if int(guess) == int(lab):
            acc += 1
Esempio n. 2
0
def main():
    r = Representation('pca',  'naive_bayes', dimension=3)
    raw_mtx = numpy.array([[1, 1, 1, 0, 0, 0], [1, 0, 1, 0, 0, 0], [1, 1, 1, 0, 0, 0], [0, 0, 1, 1, 1, 0],  [0, 0, 1, 1, 0, 0], [0, 0, 1, 1, 1, 0]])
    r.encode(raw_mtx)
    r.train_classifier([0, 0, 0, 1, 1, 1])
    print r.classify_vector([1, 2, 1, 0, 1, 0])