def train(train_filename, param_filename, **kwparams): if "penalty" in kwparams: penalty = kwparams["penalty"] else: penalty = "l2" if "dual" in kwparams: dual = kwparams["dual"].upper() in ['true', '1', 't', 'y'] else: dual = False if "inv_reg" in kwparams: C = float(kwparams["inv_reg"]) else: C = 1.0 if "fit_intercept" in kwparams: fit_intercept = kwparams["fit_intercept"].upper() in ['true', '1', 't', 'y'] else: fit_intercept = True if "intercept_scaling" in kwparams: intercept_scaling = float(kwparams["intercept_scaling"]) else: intercept_scaling = 1.0 if "class_weight" in kwparams and kwparams["class_weight"]: class_weight = kwparams["class_weight"] else: class_weight = None if "random_state" in kwparams and kwparams["random_state"]: random_state = int(kwparams["random_state"]) else: random_state = None if "tol" in kwparams: tol = float(kwparams["tol"]) else: tol = 0.0001 # Separating target from inputs X, y = design_matrix(train_filename=train_filename) print "Training Logistic Regression Classifier..." # Initializing LR classifier clf = linear_model.LogisticRegression(penalty=penalty, dual=dual, C=C, fit_intercept=fit_intercept, intercept_scaling=intercept_scaling, class_weight=class_weight, random_state=random_state, tol=tol) # Fitting LR classifier clf.fit(X, y) # Pickle and save f = open(param_filename, 'wb') pickle.dump(clf, f) print "Done."
def miss(test_filename, train_filename, param_filename): fn = test_filename.replace("-data", "-index") meta = None if os.path.exists(fn): with open(fn, "r") as idxfile: meta = idxfile.readlines() X, y, df = design_matrix(test_filename, train_filename, get_df=True) predictor = gen_predictor(param_filename) probs = predictor(X) indices = get_misses(probs, y) for i in indices: print "----------------" if meta: print "META:", ",".join(meta[i].split(",")).strip() print df.ix[i] return indices
def miss(test_filename, train_filename, param_filename): fn = test_filename.replace("-data", "-index") meta = None if os.path.exists(fn): with open(fn, "r") as idxfile: meta = idxfile.readlines() X, y, df = design_matrix(test_filename, train_filename, get_df=True) predictor = gen_predictor(param_filename) probs = predictor(X) indices = get_misses(probs, y) for i in indices: print "----------------" if meta: print "META:",",".join(meta[i].split(",")).strip() print df.ix[i] return indices
def eval(test_filename, train_filename, param_filename, method, **kwparams): X, y = design_matrix(test_filename, train_filename) predictor = gen_predictor(param_filename) probs = predictor(X) return run_eval(probs, y, method, **kwparams)
def pred(test_filename, train_filename, param_filename): X, y = design_matrix(test_filename, train_filename) predictor = gen_predictor(param_filename) probs = predictor(X) return probs, y
def train(train_filename, param_filename, **kwparams): if "error" in kwparams: C = float(kwparams["error"]) else: C = 1.0 if "kernel" in kwparams: kernel = kwparams["kernel"] else: kernel = "rbf" if "degree" in kwparams: degree = int(kwparams["degree"]) else: degree = 3 if "gamma" in kwparams: gamma = float(kwparams["gamma"]) else: gamma = 0.0 if "coef0" in kwparams: coef0 = float(kwparams["coef0"]) else: coef0 = 0.0 if "shrinking" in kwparams: shrinking = kwparams["shrinking"].upper() in ['true', '1', 't', 'y'] else: shrinking = True if "tol" in kwparams: tol = float(kwparams["tol"]) else: tol = 0.0001 if "cache_size" in kwparams: cache_size = float(kwparams["cache_size"]) else: cache_size = 200 if "class_weight" in kwparams and kwparams["class_weight"]: class_weight = kwparams["class_weight"] else: class_weight = None if "max_iter" in kwparams: max_iter = int(kwparams["max_iter"]) else: max_iter = -1 if "random_state" in kwparams and kwparams["random_state"]: random_state = int(kwparams["random_state"]) else: random_state = None # Separating target from inputs X, y = design_matrix(train_filename=train_filename) print "Training Support Vector Machine Classifier..." # Initializing SVM classifier clf = svm.SVC(probability=True, C=C, kernel=kernel, degree=degree, gamma=gamma, coef0=coef0, shrinking=shrinking, tol=tol, cache_size=cache_size, class_weight=class_weight, max_iter=max_iter, random_state=random_state) # Fitting LR classifier clf.fit(X, y) # Pickle and save f = open(param_filename, 'wb') pickle.dump(clf, f) print "Done."
def train(train_filename, param_filename, **kwparams): if "n_estimators" in kwparams: n_estimators = int(kwparams["n_estimators"]) else: n_estimators = 10 if "criterion" in kwparams: criterion = kwparams["criterion"] else: criterion = "gini" max_features = None if "max_features" in kwparams: temp = kwparams["max_features"] if temp in ["auto", "sqrt", "log2"]: max_features = temp elif temp: try: max_features = int(temp) except ValueError: try: max_features = float(temp) except ValueError: pass max_depth = None if "max_depth" in kwparams: temp = kwparams["max_depth"] if temp: max_depth = int(temp) if "min_samples_split" in kwparams: min_samples_split = int(kwparams["min_samples_split"]) else: min_samples_split = 2 if "min_samples_leaf" in kwparams: min_samples_leaf = int(kwparams["min_samples_leaf"]) else: min_samples_leaf = 1 if "min_weight_fraction_leaf" in kwparams: min_weight_fraction_leaf = float(kwparams["min_weight_fraction_leaf"]) else: min_weight_fraction_leaf = 0 max_leaf_nodes = None if "max_leaf_nodes" in kwparams: temp = kwparams["max_leaf_nodes"] if temp: max_leaf_nodes = int(temp) if "bootstrap" in kwparams: bootstrap = kwparams["bootstrap"].upper() in ['true', '1', 't', 'y'] else: bootstrap = True if "oob_score" in kwparams: oob_score = kwparams["oob_score"].upper() in ['true', '1', 't', 'y'] else: oob_score = False if "n_jobs" in kwparams: n_jobs = int(kwparams["n_jobs"]) else: n_jobs = 1 if "random_state" in kwparams and kwparams["random_state"]: random_state = int(kwparams["random_state"]) else: random_state = None if "class_weight" in kwparams and kwparams["class_weight"]: class_weight = kwparams["class_weight"] else: class_weight = None # Separating target from inputs X, y = design_matrix(train_filename=train_filename) print "Training Random Forest Classifier..." clf = ensemble.RandomForestClassifier( n_estimators=n_estimators, criterion=criterion, max_features=max_features, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, # min_weight_fraction_leaf=min_weight_fraction_leaf, max_leaf_nodes=max_leaf_nodes, bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state) # class_weight=class_weight) # Fitting LR classifier clf.fit(X, y) # Pickle and save f = open(param_filename, 'wb') pickle.dump(clf, f) print "Done."
def train(train_filename, param_filename, **kwparams): if "criterion" in kwparams: criterion = kwparams["criterion"] else: criterion = "gini" if "splitter" in kwparams: splitter = kwparams["splitter"] else: splitter = "best" max_features = None if "max_features" in kwparams: temp = kwparams["max_features"] if temp in ["auto", "sqrt", "log2"]: max_features = temp elif temp: try: max_features = int(temp) except ValueError: try: max_features = float(temp) except ValueError: pass max_depth = None if "max_depth" in kwparams: temp = kwparams["max_depth"] if temp: max_depth = int(temp) if "min_samples_split" in kwparams: min_samples_split = int(kwparams["min_samples_split"]) else: min_samples_split = 2 if "min_samples_leaf" in kwparams: min_samples_leaf = int(kwparams["min_samples_leaf"]) else: min_samples_leaf = 1 max_leaf_nodes = None if "max_leaf_nodes" in kwparams: temp = kwparams["max_leaf_nodes"] if temp: max_leaf_nodes = int(temp) # Separating target from inputs X, y = design_matrix(train_filename=train_filename) print "Training Decision Tree..." # Initializing DT classifier clf = tree.DecisionTreeClassifier(criterion=criterion, splitter=splitter, max_features=max_features, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, max_leaf_nodes=max_leaf_nodes) # Fitting DT classifier clf.fit(X, y) # Pickle and save f = open(param_filename, 'wb') pickle.dump(clf, f) print "Done."
def train(train_filename, param_filename, **kwparams): if "penalty" in kwparams: penalty = kwparams["penalty"] else: penalty = "l2" if "dual" in kwparams: dual = kwparams["dual"].upper() in ['true', '1', 't', 'y'] else: dual = False if "inv_reg" in kwparams: C = float(kwparams["inv_reg"]) else: C = 1.0 if "fit_intercept" in kwparams: fit_intercept = kwparams["fit_intercept"].upper() in [ 'true', '1', 't', 'y' ] else: fit_intercept = True if "intercept_scaling" in kwparams: intercept_scaling = float(kwparams["intercept_scaling"]) else: intercept_scaling = 1.0 if "class_weight" in kwparams and kwparams["class_weight"]: class_weight = kwparams["class_weight"] else: class_weight = None if "random_state" in kwparams and kwparams["random_state"]: random_state = int(kwparams["random_state"]) else: random_state = None if "tol" in kwparams: tol = float(kwparams["tol"]) else: tol = 0.0001 # Separating target from inputs X, y = design_matrix(train_filename=train_filename) print "Training Logistic Regression Classifier..." # Initializing LR classifier clf = linear_model.LogisticRegression(penalty=penalty, dual=dual, C=C, fit_intercept=fit_intercept, intercept_scaling=intercept_scaling, class_weight=class_weight, random_state=random_state, tol=tol) # Fitting LR classifier clf.fit(X, y) # Pickle and save f = open(param_filename, 'wb') pickle.dump(clf, f) print "Done."