Esempio n. 1
0
def train(train_filename, param_filename, **kwparams):
    if "penalty" in kwparams:
        penalty = kwparams["penalty"]
    else:
        penalty = "l2"

    if "dual" in kwparams:
        dual =  kwparams["dual"].upper() in ['true', '1', 't', 'y']
    else:
        dual = False

    if "inv_reg" in kwparams:
        C = float(kwparams["inv_reg"])
    else:
        C = 1.0

    if "fit_intercept" in kwparams:
        fit_intercept =  kwparams["fit_intercept"].upper() in ['true', '1', 't', 'y']
    else:
        fit_intercept = True

    if "intercept_scaling" in kwparams:
        intercept_scaling = float(kwparams["intercept_scaling"])
    else:
        intercept_scaling = 1.0

    if "class_weight" in kwparams and kwparams["class_weight"]:
        class_weight = kwparams["class_weight"]
    else:
        class_weight = None

    if "random_state" in kwparams and kwparams["random_state"]:
        random_state = int(kwparams["random_state"])
    else:
        random_state = None

    if "tol" in kwparams:
        tol = float(kwparams["tol"])
    else:
        tol = 0.0001

    # Separating target from inputs
    X, y = design_matrix(train_filename=train_filename)

    print "Training Logistic Regression Classifier..."

    # Initializing LR classifier
    clf = linear_model.LogisticRegression(penalty=penalty, dual=dual, C=C,
                                          fit_intercept=fit_intercept, intercept_scaling=intercept_scaling,
                                          class_weight=class_weight, random_state=random_state,
                                          tol=tol)

    # Fitting LR classifier
    clf.fit(X, y)

    # Pickle and save
    f = open(param_filename, 'wb')
    pickle.dump(clf, f)

    print "Done."
Esempio n. 2
0
def miss(test_filename, train_filename, param_filename):
    fn = test_filename.replace("-data", "-index")
    meta = None
    if os.path.exists(fn):
        with open(fn, "r") as idxfile:
            meta = idxfile.readlines()

    X, y, df = design_matrix(test_filename, train_filename, get_df=True)
    predictor = gen_predictor(param_filename)
    probs = predictor(X)
    indices = get_misses(probs, y)
    for i in indices:
        print "----------------"
        if meta: print "META:", ",".join(meta[i].split(",")).strip()
        print df.ix[i]
    return indices
Esempio n. 3
0
def miss(test_filename, train_filename, param_filename):
    fn = test_filename.replace("-data", "-index")
    meta = None
    if os.path.exists(fn):
        with open(fn, "r") as idxfile:
            meta = idxfile.readlines()

    X, y, df = design_matrix(test_filename, train_filename, get_df=True)
    predictor = gen_predictor(param_filename)
    probs = predictor(X)
    indices = get_misses(probs, y)
    for i in indices:
        print "----------------"
        if meta: print "META:",",".join(meta[i].split(",")).strip()
        print df.ix[i]
    return indices
Esempio n. 4
0
def eval(test_filename, train_filename, param_filename, method, **kwparams):
    X, y = design_matrix(test_filename, train_filename)
    predictor = gen_predictor(param_filename)
    probs = predictor(X)
    return run_eval(probs, y, method, **kwparams)
Esempio n. 5
0
def pred(test_filename, train_filename, param_filename):
    X, y = design_matrix(test_filename, train_filename)
    predictor = gen_predictor(param_filename)
    probs = predictor(X)
    return probs, y
Esempio n. 6
0
def train(train_filename, param_filename, **kwparams):
    if "error" in kwparams:
        C = float(kwparams["error"])
    else:
        C = 1.0
        
    if "kernel" in kwparams:
        kernel = kwparams["kernel"]
    else:
        kernel = "rbf"

    if "degree" in kwparams:
        degree = int(kwparams["degree"])
    else:
        degree = 3

    if "gamma" in kwparams:
        gamma = float(kwparams["gamma"])
    else:
        gamma = 0.0

    if "coef0" in kwparams:
        coef0 = float(kwparams["coef0"])
    else:
        coef0 = 0.0

    if "shrinking" in kwparams:
        shrinking =  kwparams["shrinking"].upper() in ['true', '1', 't', 'y']
    else:
        shrinking = True
        
    if "tol" in kwparams:
        tol = float(kwparams["tol"])
    else:
        tol = 0.0001

    if "cache_size" in kwparams:
        cache_size = float(kwparams["cache_size"])
    else:
        cache_size = 200

    if "class_weight" in kwparams and kwparams["class_weight"]:
        class_weight = kwparams["class_weight"]
    else:
        class_weight = None

    if "max_iter" in kwparams:
        max_iter = int(kwparams["max_iter"])
    else:
        max_iter = -1

    if "random_state" in kwparams and kwparams["random_state"]:
        random_state = int(kwparams["random_state"])
    else:
        random_state = None

    # Separating target from inputs
    X, y = design_matrix(train_filename=train_filename)

    print "Training Support Vector Machine Classifier..."

    # Initializing SVM classifier
    clf = svm.SVC(probability=True,
                  C=C, kernel=kernel, degree=degree, gamma=gamma,
                  coef0=coef0, shrinking=shrinking, tol=tol, cache_size=cache_size,
                  class_weight=class_weight, max_iter=max_iter, random_state=random_state)

    # Fitting LR classifier
    clf.fit(X, y)

    # Pickle and save
    f = open(param_filename, 'wb')
    pickle.dump(clf, f)

    print "Done."
Esempio n. 7
0
def train(train_filename, param_filename, **kwparams):
    if "n_estimators" in kwparams:
        n_estimators = int(kwparams["n_estimators"])
    else:
        n_estimators = 10

    if "criterion" in kwparams:
        criterion = kwparams["criterion"]
    else:
        criterion = "gini"

    max_features = None
    if "max_features" in kwparams:
        temp = kwparams["max_features"]
        if temp in ["auto", "sqrt", "log2"]:
            max_features = temp
        elif temp:
            try:
                max_features = int(temp)
            except ValueError:
                try:
                    max_features = float(temp)
                except ValueError:
                    pass

    max_depth = None
    if "max_depth" in kwparams:
        temp = kwparams["max_depth"]
        if temp: max_depth = int(temp)

    if "min_samples_split" in kwparams:
        min_samples_split = int(kwparams["min_samples_split"])
    else:
        min_samples_split = 2

    if "min_samples_leaf" in kwparams:
        min_samples_leaf = int(kwparams["min_samples_leaf"])
    else:
        min_samples_leaf = 1

    if "min_weight_fraction_leaf" in kwparams:
        min_weight_fraction_leaf = float(kwparams["min_weight_fraction_leaf"])
    else:
        min_weight_fraction_leaf = 0

    max_leaf_nodes = None
    if "max_leaf_nodes" in kwparams:
        temp = kwparams["max_leaf_nodes"]
        if temp: max_leaf_nodes = int(temp)

    if "bootstrap" in kwparams:
        bootstrap = kwparams["bootstrap"].upper() in ['true', '1', 't', 'y']
    else:
        bootstrap = True

    if "oob_score" in kwparams:
        oob_score = kwparams["oob_score"].upper() in ['true', '1', 't', 'y']
    else:
        oob_score = False

    if "n_jobs" in kwparams:
        n_jobs = int(kwparams["n_jobs"])
    else:
        n_jobs = 1

    if "random_state" in kwparams and kwparams["random_state"]:
        random_state = int(kwparams["random_state"])
    else:
        random_state = None

    if "class_weight" in kwparams and kwparams["class_weight"]:
        class_weight = kwparams["class_weight"]
    else:
        class_weight = None

    # Separating target from inputs
    X, y = design_matrix(train_filename=train_filename)

    print "Training Random Forest Classifier..."
    clf = ensemble.RandomForestClassifier(
        n_estimators=n_estimators,
        criterion=criterion,
        max_features=max_features,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        #                                           min_weight_fraction_leaf=min_weight_fraction_leaf,
        max_leaf_nodes=max_leaf_nodes,
        bootstrap=bootstrap,
        oob_score=oob_score,
        n_jobs=n_jobs,
        random_state=random_state)
    #                                           class_weight=class_weight)

    # Fitting LR classifier
    clf.fit(X, y)

    # Pickle and save
    f = open(param_filename, 'wb')
    pickle.dump(clf, f)

    print "Done."
Esempio n. 8
0
def eval(test_filename, train_filename, param_filename, method, **kwparams):
    X, y = design_matrix(test_filename, train_filename)
    predictor = gen_predictor(param_filename)
    probs = predictor(X)
    return run_eval(probs, y, method, **kwparams)
Esempio n. 9
0
def pred(test_filename, train_filename, param_filename):
    X, y = design_matrix(test_filename, train_filename)
    predictor = gen_predictor(param_filename)
    probs = predictor(X)
    return probs, y
Esempio n. 10
0
def train(train_filename, param_filename, **kwparams):
    if "criterion" in kwparams:
        criterion = kwparams["criterion"]
    else:
        criterion = "gini"

    if "splitter" in kwparams:
        splitter = kwparams["splitter"]
    else:
        splitter = "best"

    max_features = None
    if "max_features" in kwparams:
        temp = kwparams["max_features"]
        if temp in ["auto", "sqrt", "log2"]:
            max_features = temp
        elif temp:
            try:
                max_features = int(temp)
            except ValueError:
                try:
                    max_features = float(temp)
                except ValueError:
                    pass

    max_depth = None
    if "max_depth" in kwparams:
        temp = kwparams["max_depth"]
        if temp: max_depth = int(temp)

    if "min_samples_split" in kwparams:
        min_samples_split = int(kwparams["min_samples_split"])
    else:
        min_samples_split = 2

    if "min_samples_leaf" in kwparams:
        min_samples_leaf = int(kwparams["min_samples_leaf"])
    else:
        min_samples_leaf = 1

    max_leaf_nodes = None
    if "max_leaf_nodes" in kwparams:
        temp = kwparams["max_leaf_nodes"]
        if temp: max_leaf_nodes = int(temp)

    # Separating target from inputs
    X, y = design_matrix(train_filename=train_filename)

    print "Training Decision Tree..."

    # Initializing DT classifier
    clf = tree.DecisionTreeClassifier(criterion=criterion, splitter=splitter,
                                      max_features=max_features,
                                      max_depth=max_depth,
                                      min_samples_split=min_samples_split,
                                      min_samples_leaf=min_samples_leaf,
                                      max_leaf_nodes=max_leaf_nodes)

    # Fitting DT classifier
    clf.fit(X, y)

    # Pickle and save
    f = open(param_filename, 'wb')
    pickle.dump(clf, f)

    print "Done."
Esempio n. 11
0
def train(train_filename, param_filename, **kwparams):
    if "penalty" in kwparams:
        penalty = kwparams["penalty"]
    else:
        penalty = "l2"

    if "dual" in kwparams:
        dual = kwparams["dual"].upper() in ['true', '1', 't', 'y']
    else:
        dual = False

    if "inv_reg" in kwparams:
        C = float(kwparams["inv_reg"])
    else:
        C = 1.0

    if "fit_intercept" in kwparams:
        fit_intercept = kwparams["fit_intercept"].upper() in [
            'true', '1', 't', 'y'
        ]
    else:
        fit_intercept = True

    if "intercept_scaling" in kwparams:
        intercept_scaling = float(kwparams["intercept_scaling"])
    else:
        intercept_scaling = 1.0

    if "class_weight" in kwparams and kwparams["class_weight"]:
        class_weight = kwparams["class_weight"]
    else:
        class_weight = None

    if "random_state" in kwparams and kwparams["random_state"]:
        random_state = int(kwparams["random_state"])
    else:
        random_state = None

    if "tol" in kwparams:
        tol = float(kwparams["tol"])
    else:
        tol = 0.0001

    # Separating target from inputs
    X, y = design_matrix(train_filename=train_filename)

    print "Training Logistic Regression Classifier..."

    # Initializing LR classifier
    clf = linear_model.LogisticRegression(penalty=penalty,
                                          dual=dual,
                                          C=C,
                                          fit_intercept=fit_intercept,
                                          intercept_scaling=intercept_scaling,
                                          class_weight=class_weight,
                                          random_state=random_state,
                                          tol=tol)

    # Fitting LR classifier
    clf.fit(X, y)

    # Pickle and save
    f = open(param_filename, 'wb')
    pickle.dump(clf, f)

    print "Done."