Example #1
0
def test(model, test_file_path):
    total = 0
    correct = 0
    decoder = NaiveDecoder(model)
    outfile = open("predicted.dat", 'w')
    _logger.info("Testing %s" % test_file_path)
    with open(test_file_path) as test_file:
        processed = 1
        for line in test_file:
            line = line.strip().decode('utf-8')
            if not line:
                continue
            total += 1
            sentence, tag = line.split('\t')

            #sentence = extract(sentence)

            result = decoder.decode(sentence)
            predicted, _ = conv.argmax(result.items())
            outfile.write("%s\t%s\t%s\n" % (sentence.encode('utf-8'), predicted.encode('utf-8'), tag.encode('utf-8')))
            if predicted == tag:
                correct += 1
            if processed % 1000 == 0:
                _logger.debug("%d lines processed" % processed)
            processed += 1
    outfile.close()
    _logger.info("accuracy: %f" % (float(correct) / total))
Example #2
0
 def load_data(self):
     _logger.info("Loading training data from %s" % self.train_path)
     self.X = []
     self.y = []
     with open(self.train_path) as train_file:
         for line in train_file:
             line = line.strip().decode('utf-8')
             if not line:
                 continue
             terms, domain = line.split('\t')
             self.X.append(terms)
             self.y.append(domain)
Example #3
0
def load_data(train_path):
    _logger.info("Loading data from %s" % train_path)
    X = []
    y = []
    with open(train_path) as train_file:
        for line in train_file:
            line = line.strip().decode("utf-8")
            if not line:
                continue
            terms, domain = line.split("\t")
            X.append(terms)
            y.append(domain)
    return np.array(X), np.array(y)
Example #4
0
def vectorize(tfidf=False,binary=False):
    
    _logger.info("Loding...")
    
    trainX = [r[0] for r in tsv.reader(conv.redirect('train.tokenized.dat'))]
    testX = [r[0] for r in tsv.reader(conv.redirect('test.tokenized.dat'))]
    
    vectorizer = None
    if tfidf:
        vectorizer = TfidfVectorizer
    else:
        vectorizer = CountVectorizer
    
    _logger.info("Fitting and transforming...")
    vectorizer = vectorizer(token_pattern=u'(?u)\\b\\w+\\b',binary=binary, ngram_range = (1, 3))
    trainX = vectorizer.fit_transform(trainX)
    testX = vectorizer.transform(testX)
    
    _logger.info("Dumping binaries...")
    pickle.dump(vectorizer,open("vectorizer.bin",'w'))
    pickle.dump(trainX,open("train.vectorized.mat",'w'))
    pickle.dump(testX,open("test.vectorized.mat",'w'))
    
    schema = vectorizer.get_feature_names()
    codecs.open("schema.dat",'w',encoding='utf-8').write('\n'.join(schema))

    # debug
#    _logger.info("Dumping inversered...")
#    codecs.open("test.vectorized.dat",'w',encoding='utf-8').write( '\n'.join( [(' '.join(i)) for i in vectorizer.inverse_transform(testX)] ) )
#    codecs.open("train.vectorized.dat",'w',encoding='utf-8').write( '\n'.join( [(' '.join(i)) for i in vectorizer.inverse_transform(trainX)] ) )

    trainX = trainX.tocoo(False)
    testX = testX.tocoo(False)
    
    _logger.info("Dumping test.vectorized.dat...")
    with codecs.open("test.vectorized.dat",'w',encoding='utf-8') as fl:
        dc = defaultdict(list)
        for r,c,v in zip(testX.row,testX.col,testX.data):
            dc[r].append( "%s(%s)=%s"%(schema[c],c,v) )
        for i in sorted(dc.keys()):
            fl.write("%s\t%s\n" % (i, " , ".join(list(dc[i])) ))
    
    
    _logger.info("Dumping train.vectorized.dat...")
    with codecs.open("train.vectorized.dat",'w',encoding='utf-8') as fl:
        dc = defaultdict(list)
        for r,c,v in zip(trainX.row,trainX.col,trainX.data):
            dc[r].append( "%s(%s)=%s"%(schema[c],c,v) )
        for i in sorted(dc.keys()):
            fl.write("%s\t%s\n" % (i, " , ".join(list(dc[i])) ))
Example #5
0
def test(X, y):
    by_domain = defaultdict(list)
    sz = len(y)
    for i in xrange(sz):
        by_domain[y[i]].append(X[i])

    domains = ['alarm', 'calendar', 'communication', 'note', 'places',
               'reminder', 'weather', 'web']
    for p in domains:
        for q in domains:
            if p < q:
                clf = svms[p, q]
                p_len = len(by_domain[p])
                q_len = len(by_domain[q])
                X = list(by_domain[p])
                X.extend(by_domain[q])
                y = [p] * p_len
                y.extend([q] * q_len)
                _logger.info("%.4f, %s - %s" % (clf.score(X, y), p, q))
Example #6
0
    def cv(self, fold):
        size = len(self.y)
        kf = cross_validation.KFold(size, fold, shuffle=True)
        iteration = 0
        scores = list()
        for train_idx, test_idx in kf:
            X = [self.X[idx] for idx in train_idx]
            y = [self.y[idx] for idx in train_idx]
            X_test = [self.X[idx] for idx in test_idx]
            y_test = [self.y[idx] for idx in test_idx]
            _logger.debug("Training...")
            self.fit(X, y)
            _logger.debug("Testing...")
            score = self.get_test_accuracy(X_test, y_test)
            scores.append(score)
            iteration += 1
            _logger.info("CV iteration %d: CV accuracy: %f" % \
                             (iteration, score))

        scores = np.array(scores)
        return scores.mean(), scores.std()
Example #7
0
    def train(self):
        _logger.info("reading posterior probabilities from naive bayes model")
        self.words = list()
        self.words_seen = set()
        X = np.array([])
        for term in g_term_count:
            term = term_category(term)
            if term in self.words_seen:
                continue
            self.words_seen.add(term)
            self.words.append(term)
            x = list()
            for domain in self.naive.model.domains:
                val = self.naive.posterior_prob(term, domain)
                x.append(val)
            X = np.append(X, x)
        _logger.info("%d terms need to be clustered" % len(self.words))

        X = np.reshape(X, (len(self.words), len(self.naive.model.domains)))
        kmeans = KMeans(n_clusters = len(self.words) / 10)
        y = kmeans.fit_predict(X)

        with open(OUTFILE_PATH, "w") as outfile:
            for i in xrange(len(y)):
                outfile.write("%s\t%d\n" % (self.words[i].encode('utf-8'), y[i]))
        _logger.info("clustering result wrote to %s" % OUTFILE_PATH)
Example #8
0
def clean(X, y, k=10):
    _logger.info("cleaning base on %d-fold cross validation" % k)

    size = len(y)
    kf = KFold(size, n_folds=k, shuffle=True)
    fold = 1
    for train_idx, test_idx in kf:
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        pipeline = Pipeline([
                ("vert", TfidfVectorizer(min_df = 1, binary = True, ngram_range = (1, 3),
                                         tokenizer = Tokenizer())),
                ("clf", LinearSVC(loss='l1',
                                  penalty="l2",
                                  multi_class="ovr",
                                  class_weight="auto")),
                ])
        _logger.debug("Training fold %d" % fold)
        pipeline.fit(X_train, y_train)
        _logger.debug("Predicting for fold %d" % fold)
        y_pred = pipeline.predict(X_test)
        _logger.info("fold %d got accuracy: %f" % (fold, accuracy_score(y_test, y_pred)))

        right_f = open("fold%d.right.dat" % fold, "w")
        wrong_f = open("fold%d.wrong.dat" % fold, "w")

        size = len(y_test)
        for i in xrange(size):
            sent, pred, gold = X_test[i].encode('utf-8'), y_pred[i].encode('utf-8'), y_test[i].encode('utf-8')
            if pred != gold:
                wrong_f.write("%s\t%s\t%s\n" % (pred, gold, sent))
            else:
                right_f.write("%s\t%s\n" % (sent, gold))

        right_f.close()
        wrong_f.close()

        fold +=1
Example #9
0
def test(test_file_path, clf):
    X, y = load_data(test_file_path)
    size = len(y)

    scores = clf.decision_function(X)
    # y_pred = []
    # for i in xrange(size):
    #     score = scores[i]
    #     detail = sorted(zip(clf.named_steps['clf'].classes_,
    #                         score),
    #                     key = lambda x: -x[1])
    #     if detail[0][1] >= 1.1:
    #         y_pred.append(detail[0][0])
    #     else:
    #         y_pred.append(u'web')

    y_pred = clf.predict(X)
    outfile = open("predicted.dat", 'w')
    for i in range(len(y)):
        sentence, pred, gold = X[i], y_pred[i], y[i]
        outfile.write("%s\t%s\t%s\n" % (sentence.encode('utf-8'), pred.encode('utf-8'), gold.encode('utf-8')))
    _logger.info("accuracy: %f, %d records" % (accuracy_score(y, y_pred),
                                               len(y)))
Example #10
0
def test(X, y):
    _logger.info("Fisrt stage accuracy: %f" % front.score(X, y))
    import decode_svm
    outfile = open("predicted.dat", "w")
    discfile = open("discriminated.dat", "w")
    y_pred = list()
    sz = len(y)
    domains = front.named_steps["clf"].classes_
    for i in xrange(sz):
        sent = X[i]
        gold = y[i]
        
        front_result = sorted(zip(domains, front.decision_function([sent])[0]),
                              key = lambda x: -x[1])
        
        pred = front_result[0][0]
        assert pred == front.predict([sent])[0]

        if front_result[0][1]  < 0.0 or front_result[1][1] > 0.0:
            p = front_result[0][0]
            q = front_result[1][0]
            svm_pred = decode_svm.discriminate(p, q, sent)[0]
            discfile.write("%s\t%s\t%s\t%s\t%s\n" % \
                               (sent.encode('utf-8'),
                                p.encode('utf-8'),
                                q.encode('utf-8'),
                                svm_pred.encode('utf-8'), gold.encode('utf-8')))
            pred = svm_pred

        y_pred.append(pred)

        outfile.write("%s\t%s\t%s\n" % (sent.encode('utf-8'), pred.encode('utf-8'), gold.encode('utf-8')))

    _logger.info("ensembled accuracy: %f" % accuracy_score(y, y_pred))

    outfile.close()
    discfile.close()
Example #11
0
    def train_pair(self, p, q):
        if p > q:
            p, q = q, p

        p_len = len(self.by_domain_data[p])
        q_len = len(self.by_domain_data[q])

        _logger.info("Training SVM for %s V.S. %s, %d + %d = %d recored" % \
                         (p, q, p_len, q_len, p_len + q_len))

        X = list(self.by_domain_data[p])
        X.extend(self.by_domain_data[q])
        y = [p] * p_len
        y.extend([q] * q_len)

        pipeline = Pipeline([
                ("vert", TfidfVectorizer(min_df = 1, binary = False, ngram_range = (1, 1),
                                         tokenizer = Tokenizer())),
                ("svm", LinearSVC(loss='l2', penalty="l1",
                                  dual=False, tol=1e-3)),
                ])

        if self.cv > 0:
            _logger.info("Doing grid search on %d fold CV" % self.cv)
            params = {
                "svm__C": [1, 10, 50, 100, 500, 1000],
                }
            grid = GridSearchCV(pipeline, params, cv=self.cv, verbose=50)
            grid.fit(X, y)
            pipeline = grid.best_estimator_
            _logger.info("Grid search got best score:%f" % grid.best_score_)
            pipeline.accur = grid.best_score_
        else:
            pipeline.fit(X, y)
            _logger.debug("Testing on training data")
            accur = accuracy_score(y, pipeline.predict(X))
            pipeline.accur = accur
            _logger.info("Trainig accuracy (%s - %s): %f" % (p, q, accur))
        self.svms[p,q] = pipeline
        return pipeline
Example #12
0
    #     else:
    #         y_pred.append(u'web')

    y_pred = clf.predict(X)
    outfile = open("predicted.dat", 'w')
    for i in range(len(y)):
        sentence, pred, gold = X[i], y_pred[i], y[i]
        outfile.write("%s\t%s\t%s\n" % (sentence.encode('utf-8'), pred.encode('utf-8'), gold.encode('utf-8')))
    _logger.info("accuracy: %f, %d records" % (accuracy_score(y, y_pred),
                                               len(y)))


if __name__ == "__main__":

    cmd = argparse.ArgumentParser()
    cmd.add_argument("--path", help = "path to the test data", default=TEST_FILE_PATH)
    cmd.add_argument("--serv", help = "run as server", dest="as_server", action='store_true')
    cmd.add_argument("--model", help = "path to the pickled model", required=True,
                     choices = ["%s.model" % algo for algo in CLFs.keys()])
    args = cmd.parse_args()

    _logger.info("loading model from %s" % args.model)
    clf = pickle.load(open(args.model))

    if args.as_server:
        serv(clf)
        sys.exit(0)

    test(args.path, clf)

Example #13
0
                self.terms.add(term)
                self.domains.add(domain)

        v = len(self.terms)
        for term in self.terms:
            p = dict()
            for domain in self.domains:
                p[domain] = (1.0 + self.count[term, domain]) / (v + self.count[domain])
            wcp = dict()
            s = sum(p.values())
            for domain in self.domains:
                wcp[domain] = p[domain] / s
            self.gini[term] = sum([v ** 2 for v in wcp.values()])

    def dump(self, out_path):
        with open(out_path, 'w') as outfile:
            for k, v in self.gini.items():
                outfile.write("%s %f\n" % (k, v))

if __name__ == "__main__":
    cmd = argparse.ArgumentParser()
    cmd.add_argument("--input", help="path of the count data")
    cmd.add_argument("--output", help="path to dump the model", default=MODEL_PATH)
    args = cmd.parse_args()

    gini = GiniCoe(args.input)
    _logger.info("Training Gini coefficient from count file: %s" % args.input)
    gini.train()
    _logger.info("Dumping model to %s" % args.output)
    gini.dump(args.output)
Example #14
0
from util import *
from util.log import _logger
from model.naive.train import NaiveBayes
from feat.terms.term_categorize import term_category, g_term_count
import rep.word_clustering.decode as word_clustering

class ClusteredNaiveBayes(NaiveBayes):
    def get_category(self, term):
        term = term_category(term)
        return word_clustering.get_cluster(term)


if __name__ == "__main__":
    cmd = argparse.ArgumentParser()
    cmd.add_argument("--input", help="path of the training data")
    cmd.add_argument("--terms", help="path of the terms file")
    cmd.add_argument("--alpha", help="alpha of discounting", type=float, default=0.5)
    cmd.add_argument("--cv", help="enable cross validation", type=int, default=0)

    args = cmd.parse_args()

    naive = ClusteredNaiveBayes(args.input, args.terms, args.alpha)
    if args.cv > 0:
        _logger.info("CV accuracy: %f +/- %f" % naive.cv(args.cv))
    else:
        _logger.info("Start training");
        naive.train()
        with open("naive.clustered.model", "w") as outfile:
            pickle.dump(naive, outfile)
            _logger.info("Model dumped to naive.clustered.model")
Example #15
0
        l = np.array(l)
        l.shape = len(l), 1
        ret = sparse.hstack([ret, l])
        _logger.debug("vectorization transform done")

        return ret


if __name__ == "__main__":
    cmd = argparse.ArgumentParser()
    cmd.add_argument("--input", help="path of the training data", default = TRAIN_FILE_PATH)
    cmd.add_argument("--algo", help="alogrithm to use", required=True, choices = CLFs.keys())
    args = cmd.parse_args()

    X, y = load_data(args.input)
    _logger.info("training using %s" % args.algo)

    pipeline = Pipeline([
            ("vert", TfidfVectorizer(min_df = 1, binary = True, ngram_range = (1, 3),
                                     tokenizer = Tokenizer())),
            #("vert", Vectorizer()),
            ("clf", CLFs[args.algo]),
            ])

    pipeline.fit(X, y)
    from decode import test
    test(TEST_FILE_PATH, pipeline)

    outpath = "%s.model" % args.algo
    with open(outpath, "w") as outfile:
        pickle.dump(pipeline, outfile)
Example #16
0
            accur = accuracy_score(y, pipeline.predict(X))
            pipeline.accur = accur
            _logger.info("Trainig accuracy (%s - %s): %f" % (p, q, accur))
        self.svms[p,q] = pipeline
        return pipeline
                    

if __name__ == "__main__":
    cmd = argparse.ArgumentParser()
    cmd.add_argument("--input", help="path of the training data", default=TRAIN_FILE_PATH)
    cmd.add_argument("--classes", help="the pair of classes need to train, train all combination if not specified",
                     nargs=2, default=None)
    cmd.add_argument("--cv", help="fold of cross validation 0 for not doing", default=0, type=int)
    args = cmd.parse_args()

    _logger.info("Loading training data from %s" % args.input)
    X, y = load_data(args.input)

    if args.classes:
        _logger.info("Will train 1v1 SVM between %s and %s" % (args.classes[0], args.classes[1]))
        gp = SVMGroup(cv=args.cv)
        if os.path.isfile("svms.model"):
            gp.svms = pickle.load(open("svms.model"))

        gp.collect_by_domain(X, y)
        gp.train_pair(args.classes[0], args.classes[1])

    else:
        gp = SVMGroup()
        _logger.info("Start training")
        gp.train(X, y)
Example #17
0
        ])

params = {
    "nb__alpha": [0.001, 0.01, 0.1, 0.5],
    }

if __name__ == "__main__":
    cmd = argparse.ArgumentParser()
    cmd.add_argument("--input", help="path of the training data", default=TRAIN_FILE_PATH)
    cmd.add_argument("--cv", help="enable cross validation", type=int, default=0)
    args = cmd.parse_args()

    X, y = load_data(args.input)

    if args.cv > 0:
        _logger.info("Doing %d fold cross validation" % args.cv)
        gs = GridSearchCV(pipeline, params, cv = args.cv, verbose=5)
        gs.fit(X, y)

        with open("sk_naive.model", "w") as outfile:
            pickle.dump(gs.best_estimator_, outfile)
            _logger.info("Model dumped to sk_naive.model")        
        print gs.best_estimator_
        print gs.best_score_
    else:
        _logger.info("Start training")
        pipeline.fit(X, y)
        with open("sk_naive.model", "w") as outfile:
            pickle.dump(pipeline, outfile)
            _logger.info("Model dumped to sk_naive.model")
Example #18
0
from train import Vectorizer


def gen(path, clf):
    X, y = load_data(path)
    scores = clf.decision_function(X)
    sz = len(y)
    with open("web_split.dat", "w") as outfile:
        for i in xrange(sz):
            assert y[i] == "web"
            score = scores[i]
            detail = sorted(zip(clf.named_steps["clf"].classes_, score), key=lambda x: -x[1])
            outfile.write("%s %f\n" % (detail[0][0], detail[0][1]))


if __name__ == "__main__":

    cmd = argparse.ArgumentParser()
    cmd.add_argument("--path", help="path to only-web training data")
    cmd.add_argument("--serv", help="run as server", dest="as_server", action="store_true")
    cmd.add_argument("--gen", help="generate training data", dest="generate", action="store_true")

    args = cmd.parse_args()

    _logger.info("loading model from %s" % "svm_ovr.model")
    clf = pickle.load(open("svm_ovr.model"))

    if args.generate:
        gen(args.path, clf)
Example #19
0
    domains = ['alarm', 'calendar', 'communication', 'note', 'places',
               'reminder', 'weather', 'web']
    for p in domains:
        for q in domains:
            if p < q:
                clf = svms[p, q]
                p_len = len(by_domain[p])
                q_len = len(by_domain[q])
                X = list(by_domain[p])
                X.extend(by_domain[q])
                y = [p] * p_len
                y.extend([q] * q_len)
                _logger.info("%.4f, %s - %s" % (clf.score(X, y), p, q))


_logger.info("loading model from svms.model")
svms = pickle.load(open('svms.model'))
        
if __name__ == "__main__":

    cmd = argparse.ArgumentParser()
    cmd.add_argument("--path", help = "path to the test data", default=TEST_FILE_PATH)
    cmd.add_argument("--serv", help = "run as server", dest="as_server", action='store_true')
    args = cmd.parse_args()
    X, y = load_data(args.path)

    if args.as_server:
        serv()
    else:
        test(X, y)
Example #20
0
if __name__ == "__main__":
    cmd = argparse.ArgumentParser()
<<<<<<< HEAD
    cmd.add_argument("--serv", help = "run as server", dest="as_server", action='store_true')
    cmd.add_argument("--serv-prob", help = "run as server compare posterior probability of terms under every domain", dest="as_server_prob", action='store_true')
    cmd.add_argument("--path", help = "path to the test data", default=TEST_FILE_PATH)
    cmd.add_argument("--model-path", help = "path to the naive bayes model file")
=======
    cmd.add_argument("--serv", help = "run as server", default=False, dest="as_server", action='store_true')
    cmd.add_argument("--path", help = "path to the test data", default='test.dat')
>>>>>>> bf1b826a908169fa2340477f367736f63a5f7875
    args = cmd.parse_args()
    print args

    _logger.info("Loading model")
<<<<<<< HEAD
    model = pickle.load(open(args.model_path))
=======
    model = pickle.load(open(conv.redirect('naive.model')))
>>>>>>> bf1b826a908169fa2340477f367736f63a5f7875

    if args.as_server:
        serv(model)
    elif args.as_server_prob:
        serv_prob(model)
    else:
<<<<<<< HEAD
        test(model, args.path)

=======
Example #21
0
                terms, domain = line.split('\t')
                term_set = set()
                for term in terms.split(' '):
                    term = term_category(term)
                    if term not in term_set:
                        term_set.add(term)
                        self.count[(term, domain)] += 1
                c += 1
                if c % 10000 == 0:
                    _logger.debug("%d records processed" % c)

    def dump(self, path):
        with open(path, 'w') as outfile:
            for key, val in self.count.items():
                term, domain = key
                outfile.write("%s %s %d\n" % (term.encode('utf-8'), domain.encode('utf-8'), val))
                


if __name__ == "__main__":
    cmd = argparse.ArgumentParser()
    cmd.add_argument("--input", help="path of the training data")
    cmd.add_argument("--output", help="path to dump the model", default=DEFAULT_OUTPATH)
    args = cmd.parse_args()

    counter = Counter(args.input)
    _logger.info("training from %s" % args.input)
    counter.train()
    _logger.info("dumping model to %s" % args.output)
    counter.dump(args.output)
Example #22
0
 def __init__(self, naive_model_path):
     _logger.info("loading naive bayes model from %s" % naive_model_path)
     model = pickle.load(open(naive_model_path))
     self.naive = NaiveDecoder(model)
     self.words = dict()
Example #23
0
    vert = clf.named_steps['vert']
    terms = list(set(sentence.split()))
    terms = sorted([(term, sel.scores_[get_vert_idx(vert, term_category(term))]) for term in terms], 
                   key = lambda x: -x[1])[:7]
    return ' '.join([term[0] for term in terms])


def extract(X, clf):
    ret = []
    for sentence in X:
        ret.append(slim(sentence, clf))
    return ret
            

if __name__ == "__main__":
    _logger.info("loading model")
    clf = pickle.load(open('sk_naive.model'))
    cmd = argparse.ArgumentParser()
    cmd.add_argument("--path", help = "path to the test data", default=TEST_FILE_PATH)
    cmd.add_argument("--serv", help = "run as server", dest="as_server", action='store_true')
    args = cmd.parse_args()

    if args.as_server:
        serv(clf)

    X, y = load_data(args.path)

    # _logger.debug("Extracting merites for long sentences")
    # X = extract(X, clf)
    
    y_pred = clf.predict(X)
Example #24
0
            q = front_result[1][0]
            svm_pred = decode_svm.discriminate(p, q, sent)[0]
            discfile.write("%s\t%s\t%s\t%s\t%s\n" % \
                               (sent.encode('utf-8'),
                                p.encode('utf-8'),
                                q.encode('utf-8'),
                                svm_pred.encode('utf-8'), gold.encode('utf-8')))
            pred = svm_pred

        y_pred.append(pred)

        outfile.write("%s\t%s\t%s\n" % (sent.encode('utf-8'), pred.encode('utf-8'), gold.encode('utf-8')))

    _logger.info("ensembled accuracy: %f" % accuracy_score(y, y_pred))

    outfile.close()
    discfile.close()

if __name__ == "__main__":
    cmd = argparse.ArgumentParser()
    cmd.add_argument("--serv", help = "run as server", dest="as_server", action='store_true')
    cmd.add_argument("--path", help = "path to the test data", default=TEST_FILE_PATH)
    cmd.add_argument("--front-model-path", help = "path to the first stage model")
    args = cmd.parse_args()

    _logger.info("Loading naive bayes model from %s" % args.front_model_path)
    front = pickle.load(open(args.front_model_path))
    X, y = load_data(args.path)
    test(X, y)