def __init__(self): optimizer = optim.SGD(0.1) self.model = compose.Pipeline( preprocessing.StandardScaler(), linear_model.LogisticRegression(optimizer)) self.metric = metrics.Accuracy() self.count = 0
def test_clone_idempotent(): model = preprocessing.StandardScaler() | linear_model.LogisticRegression( optimizer=optim.Adam(), l2=0.1) trace = [] for x, y in datasets.Phishing(): trace.append(model.predict_proba_one(x)) model.learn_one(x, y) clone = model.clone() for i, (x, y) in enumerate(datasets.Phishing()): assert clone.predict_proba_one(x) == trace[i] clone.learn_one(x, y)
def test_learn_one_warm_up_mode(): pipeline = compose.Pipeline( ("scale", preprocessing.StandardScaler()), ("log_reg", linear_model.LogisticRegression()), ) dataset = [(dict(a=x, b=x), bool(x % 2)) for x in range(100)] for x, y in dataset: counts_pre = dict(pipeline["scale"].counts) with utils.warm_up_mode(): pipeline.learn_one(x, y) counts_post = dict(pipeline["scale"].counts) pipeline.learn_one(x, y) counts_no_learn = dict(pipeline["scale"].counts) assert counts_pre != counts_post assert counts_post == counts_no_learn
def test_learn_many_warm_up_mode(): pipeline = compose.Pipeline( ("scale", preprocessing.StandardScaler()), ("log_reg", linear_model.LogisticRegression()), ) dataset = [(dict(a=x, b=x), x) for x in range(100)] for i in range(0, len(dataset), 5): X = pd.DataFrame([x for x, _ in dataset][i:i + 5]) y = pd.Series([bool(y % 2) for _, y in dataset][i:i + 5]) counts_pre = dict(pipeline["scale"].counts) with utils.warm_up_mode(): pipeline.learn_many(X, y) counts_post = dict(pipeline["scale"].counts) pipeline.learn_many(X, y) counts_no_learn = dict(pipeline["scale"].counts) assert counts_pre != counts_post assert counts_post == counts_no_learn
Logistic_model = compose.Pipeline( ('features', compose.TransformerUnion( ('pipe1', compose.Pipeline(('drop_non_features', compose.Discard('body', 'date', 'subject', 'text', 'title', 'title_clean')), ('scale', preprocessing.StandardScaler()))), ('pipe2', compose.Pipeline( ('drop_non_featuress', compose.Discard('body', 'body_len', 'body_num', 'date', 'punct%', 'subject', 'text', 'title', 'title_len', 'title_num')), ('tfidf', feature_extraction.TFIDF(on='title_clean')))))), ('modeling', linear_model.LogisticRegression())) #metric = metrics.Accuracy() #evaluate.progressive_val_score(dataset_tuple_a, model, metric) #model.predict_proba_one(z) #model.predict_one(z) #print(Logistic_model.draw()) metric = metrics.ROCAUC() train1 = train[:]
def _unit_test_params(cls): return {"classifier": linear_model.LogisticRegression(), "code_size": 6}