def test_stateful_transform(): data_train = patsy.demo_data("x1", "x2", "y") data_train['x1'][:] = 1 # mean of x1 is 1 data_test = patsy.demo_data("x1", "x2", "y") data_test['x1'][:] = 0 # center x1 est = PatsyTransformer("center(x1) + x2") est.fit(data_train) data_trans = est.transform(data_test) # make sure that mean of training, not test data was removed assert_array_equal(data_trans[:, 0], -1)
def test_error_on_y_transform(): data = patsy.demo_data("x1", "x2", "x3", "y") est = PatsyTransformer("y ~ x1 + x2") msg = ("encountered outcome variables for a model" " that does not expect them") assert_raise_message(patsy.PatsyError, msg, est.fit, data) assert_raise_message(patsy.PatsyError, msg, est.fit_transform, data)
def test_stateful_transform_dataframe(): data_train = pd.DataFrame(patsy.demo_data("x1", "x2", "y")) data_train['x1'][:] = 1 # mean of x1 is 1 data_test = pd.DataFrame(patsy.demo_data("x1", "x2", "y")) data_test['x1'][:] = 0 # center x1 est = PatsyTransformer("center(x1) + x2", return_type='dataframe') est.fit(data_train) data_trans = est.transform(data_test) # make sure result is pandas dataframe assert type(data_trans) is pd.DataFrame # make sure that mean of training, not test data was removed assert_array_equal(data_trans['center(x1)'][:],-1)
def test_stateful_transform_dataframe(): data_train = pd.DataFrame(patsy.demo_data("x1", "x2", "y")) data_train['x1'][:] = 1 # mean of x1 is 1 data_test = pd.DataFrame(patsy.demo_data("x1", "x2", "y")) data_test['x1'][:] = 0 # center x1 est = PatsyTransformer("center(x1) + x2", return_type='dataframe') est.fit(data_train) data_trans = est.transform(data_test) # make sure result is pandas dataframe assert type(data_trans) is pd.DataFrame # make sure that mean of training, not test data was removed assert_array_equal(data_trans['center(x1)'][:], -1)
def test_intercept_transformer(): data = patsy.demo_data("x1", "x2", "x3", "y") # check wether X contains only the two features, no intercept est = PatsyTransformer("x1 + x2") est.fit(data) assert_equal(est.transform(data).shape[1], 2) # check wether X does contain intercept est = PatsyTransformer("x1 + x2", add_intercept=True) est.fit(data) data_transformed = est.transform(data) assert_array_equal(data_transformed[:, 0], 1) assert_equal(est.transform(data).shape[1], 3)
def test_scope_transformer(): data = patsy.demo_data("x1", "x2", "x3", "y") def myfunc(x): tmp = np.ones_like(x) tmp.fill(42) return tmp est = PatsyTransformer("x1 + myfunc(x2)") est.fit(data) data_trans = est.transform(data) assert_array_equal(data_trans[:, 1], 42) est = PatsyTransformer("x1 + myfunc(x2)") data_trans = est.fit_transform(data) assert_array_equal(data_trans[:, 1], 42) # test feature names assert_equal(est.feature_names_, ["x1", "myfunc(x2)"])
def test_coxph_model(): data = lifelines.datasets.load_dd() # create sklearn pipeline coxph_surv_ppl = make_pipeline( PatsyTransformer('un_continent_name + regime + start_year -1', return_type='dataframe'), CoxPHFitterModel(duration_column='duration', event_col='observed')) #split data to train and test data_train, data_test = train_test_split(data) #fit CoxPH model coxph_surv_ppl.fit(data_train, y=data_train) #use pipeline to predict expected lifetime exp_lifetime = coxph_surv_ppl.predict(data_test[0:1]) assert (exp_lifetime > 4)
def test_scope_transformer(): data = patsy.demo_data("x1", "x2", "x3", "y") def myfunc(x): tmp = np.ones_like(x) tmp.fill(42) return tmp est = PatsyTransformer("x1 + myfunc(x2)") est.fit(data) data_trans = est.transform(data) assert_array_equal(data_trans[:, 1], 42) est = PatsyTransformer("x1 + myfunc(x2)") data_trans = est.fit_transform(data) assert_array_equal(data_trans[:, 1], 42)
feature_extraction_model.load_weights(cfg.VGG16_pretrained_model_path, by_name=True) sampled_cell_Indexs = random_sample_index( cells_lists, 2000) #random sample 2000 cell features feature_lists = [] for index in sampled_cell_Indexs: a_cell_feature = feature_extraction_model.predict(cells_lists[index]) feature_lists.append(a_cell_feature) feature_lists = np.array(feature_lists) C = llc(feature_lists) # the codes f = np.sum(C, axis=1) # the single vector of the patient. #f=np.max(C,axis=1)#the single vector of the patient. data = lifelines.datasets.load_dd() # create sklearn pipeline coxph_surv_ppl = make_pipeline(PatsyTransformer('un_continent_name + regime + start_year -1', \ return_type='dataframe'), CoxPHFitterModel(duration_column='duration', event_col='observed')) # split data to train and test data_train, data_test = train_test_split(data) # fit CoxPH model coxph_surv_ppl.fit(data_train, y=data_train) # use pipeline to predict expected lifetime exp_lifetime = coxph_surv_ppl.predict(data_test[0:1]) print('expected lifetime: ' + str(exp_lifetime)) # or you can extract the model from the pipeline to access more methods coxmodel = coxph_surv_ppl.named_steps['coxphfittermodel'].estimator coxmodel.print_summary()
# Schedule delivery time during the entire working hours i.e. between 8 am to 6 pm data["Is_delivery_working_hours"] = (data["time_slot_from"].str[:2].astype( int) >= 9) & (data["time_slot_to"].str[:2].astype(int) <= 18) # Whether committed delivery day is friday (weekend) or not data["Is_committed_delivery_friday"] = ( data["commitment_date_weekday"] == "Friday").astype(int) # dataframe of predictor variables feature_df = data[[ "Schedule_loc_type", "schedule_channel", "Is_committed_delivery_friday", "Is_delivery_working_hours" ]] y_df = data["accurate"] interaction_transformer = PatsyTransformer( "C(Schedule_loc_type) + C(schedule_channel)+ Is_committed_delivery_friday + Is_delivery_working_hours + C(Schedule_loc_type):C(schedule_channel)" ) non_interaction_transformer = PatsyTransformer( "C(Schedule_loc_type) + C(schedule_channel)+ Is_committed_delivery_friday + Is_delivery_working_hours" ) naive_bayes_clf = Pipeline( [("encoding", non_interaction_transformer), ("nb_clf", GaussianNB())] ) # Because we assume conditional independence among diffrent predictor variables in Naive Bayes, interaction terms are not necessary logistic_clf = Pipeline([('int_feature', interaction_transformer), ('log_clf', LogisticRegression(C=10.0, fit_intercept=True, random_state=100))]) supportvector_clf = Pipeline([('int_feature', interaction_transformer), ('svm_clf',