Beispiel #1
0
def main():
    args = getargs()

    try:
        dt_name = os.path.basename(args.input)
        dt = DataSet(dt_name, args.input, args.data_type)

        start_time = time.time()
        m = SOL(batch_size=args.batch_size, buf_size=args.buf_size)
        m.load(args.model)

        algo = m.name
        logging.info("testing algorithm %s ..." % (algo))
        if args.output == None:
            accu = m.score(dt.data_path, dt.dtype)
        else:
            scores, predicts, labels = m.decision_function(dt.data_path, dt.dtype,get_labels=True)
            accu = np.sum(predicts == labels, dtype=np.float64) / predicts.shape[0]
        logging.info("test accuracy of %s: %.4f" % (algo, accu))
        logging.info("test time of %s: %.4f seconds" %
                     (algo, time.time() - start_time))

        if args.output != None:
            logging.info("write prediction results to %s" %(args.output))
            with open(args.output, 'w') as fh:
                if m.n_classes == 2:
                    for i in xrange(scores.shape[0]):
                        fh.write('%d\t%d\t%f\n' %(int(labels[i]), int(predicts[i]), scores[i]))
                else:
                    for i in xrange(scores.shape[0]):
                        fh.write('%d\t%d\t%s\n' %(int(labels[i]), int(predicts[i]), '\t'.join([str(v) for v in scores[i,:]])))
    except Exception as err:
        print 'test failed %s' % (err.message)
Beispiel #2
0
def main():
    args = getargs()
    dt_name = osp.basename(args.input)
    dt = DataSet(dt_name, args.input, args.data_type, args.passes)

    model_params = {'verbose': args.verbose,
                    'batch_size': args.batch_size,
                    'buf_size': args.buf_size,
                    'norm': args.norm}

    if args.model == None and args.algo != None:
        if args.params != None:
            for item in args.params:
                parts = item.split('=')
                model_params[parts[0]] = parts[1]
        cv_params = None
        if args.cv != None:
            cv_params = {}
            for item in args.cv:
                parts = item.split('=')
                cv_params[parts[0]] = ast.literal_eval(parts[1])

        train(dt, model_name = args.algo,
              model_params = model_params,
              output_path = args.output,
              fold_num = args.fold_num,
              cv_params = cv_params,
              retrain = args.retrain)
    elif args.model != None and args.algo == None:
        finetune(dt, model_path = args.model, model_params = model_params,
                 output_path = args.output)
    else:
        raise Exception("either model or algo should be specified")
Beispiel #3
0
def main():
    args = getargs()
    try:
        dt_name = os.path.basename(args.input)
        dt = DataSet(dt_name, args.input, args.data_type)
        model_params = [('verbose', args.verbose)]
        if args.params != None:
            model_params = [item.split('=') for item in args.cv]

        if args.cv != None:
            cv_output_path = os.path.join(dt.work_dir,
                                          'cv-%s.txt' % (args.algo))
            if os.path.exists(cv_output_path) and args.retrain == False:
                best_params = CV.load_results(cv_output_path)
            else:
                #cross validation
                cv_params = [item.split('=') for item in args.cv]
                cv = CV(dt, args.fold_num, cv_params, model_params)
                cv.train_val(args.algo)
                best_params = cv.get_best_param()[0]
                cv.save_results(cv_output_path)
            logging.info('cross validation parameters: %s' %
                         (str(best_params)))
            for k, v in best_params:
                model_params.append((k, v))

        model_params = dict(model_params)

        start_time = time.time()
        m = SOL(args.algo,
                dt.class_num,
                batch_size=args.batch_size,
                buf_size=args.buf_size,
                **model_params)
        logging.info("learn model with %s algorithm..." % (args.algo))
        accu = m.fit(dt.data_path, dt.dtype, args.passes)
        logging.info("training accuracy of %s: %.4f" % (args.algo, accu))
        logging.info("training time of %s: %.4f seconds" %
                     (args.algo, time.time() - start_time))

        if args.output != None:
            logging.info("save model of %s to %s" % (args.algo, args.output))
            m.save(args.output)

    except Exception as err:
        print 'train failed: %s' % (err.message)
Beispiel #4
0
    args = getargs()

    try:
        dt_opts = importlib.import_module(args.dtname)
    except ImportError as e:
        print e
        print 'make sure that your have <dtname>.py, refer to rcv1.py for an example'
        sys.exit()

    assert 'fs_opts' in dt_opts.__dict__

    passes = 1
    if 'passes' in dt_opts.__dict__:
        passes = dt_opts.passes

    dt_train = DataSet(args.dtname, args.train_file, args.dtype, passes)
    dt_test = DataSet(args.dtname, args.test_file, args.dtype)

    if args.output == None:
        args.output = osp.join(dt_train.work_dir,
                               args.dtname + '-fs-cache.pkl')

    if 'draw_opts' not in dt_opts.__dict__:
        draw_opts = {'accu': {}, 'time': {}}
    else:
        draw_opts = dt_opts.draw_opts

    #remove liblinear if not svm format
    if 'fs_opts' in dt_opts.__dict__:
        fs_task = 'fs'
        exp_fs(dt_train,
Beispiel #5
0
    if len(clf.coef_.shape) == 2:
        feat_num = np.count_nonzero(clf.coef_) / float(clf.coef_.shape[0])
    else:
        feat_num = np.count_nonzero(clf.coef_)

    logging.info("test accuracy: %.4f" % (test_accu))
    logging.info("test time: %.4f sec" % (test_time))

    return feat_num, test_accu, test_time, train_accu, train_time


if __name__ == '__main__':
    if len(sys.argv) != 4:
        print 'Usage dt_name train_file test_file'
        sys.exit()

    from sol.dataset import DataSet

    dtrain = DataSet(sys.argv[1], sys.argv[2], 'svm')
    dtest = DataSet(sys.argv[1], sys.argv[3], 'svm')

    C_list = [0.001, 0.01, 0.1, 1]

    print 'train test l1-svm'
    for C in C_list:
        print train_test_l1(dtrain, dtest, C=C)

    print 'train test l2-svm'
    for C in C_list:
        print train_test_l2(dtrain, dtest, C=C)