def setUp(self): name = "grid search test" author = "srinidhi" description = "Grid search cross validation - 3 folds" syncer_obj = SyncerTest( NewOrExistingProject(name, author, description), DefaultExperiment(), NewExperimentRun("Abc"), ThriftConfig(None, None)) X = pd.DataFrame(np.random.randint( 0, 100, size=(2000, 4)), columns=list('ABCD')) y = pd.DataFrame(np.random.randint( 0, 100, size=(2000, 1)), columns=['output']) # Add tag for dataframe syncer_obj.add_tag(X, "digits-dataset") syncer_obj.clear_buffer() tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [10, 100]}] clf = GridSearchCV(SVC(), tuned_parameters, cv=3) y = y.values.ravel() clf.fit_sync(X, y) events = syncer_obj.sync() self.grid_search_event = events[0]
def setUp(self): name = "logistic-test" author = "srinidhi" description = "income-level logistic regression" syncer_obj = SyncerTest( NewOrExistingProject(name, author, description), DefaultExperiment(), NewExperimentRun("Abc"), ThriftConfig(None, None)) # Creating the pipeline pca = decomposition.PCA() lr = linear_model.LinearRegression() pipe = Pipeline(steps=[('pca', pca), ('logistic', lr)]) model = linear_model.LinearRegression() np.random.seed(0) X = pd.DataFrame(np.random.randint(0, 100, size=(100, 2)), columns=list('AB')) y = pd.DataFrame(np.random.randint(0, 100, size=(100, 1)), columns=['output']) # Add tags for models / dataframes syncer_obj.add_tag(X, "digits-dataset") syncer_obj.add_tag(pipe, "pipeline with pca + logistic") syncer_obj.add_tag(pca, "decomposition PCA") syncer_obj.add_tag(lr, "basic linear reg") syncer_obj.clear_buffer() pipe.fit_sync(X, y) events = syncer_obj.sync() self.pipeline_event = events[0]
def setUp(self): name = "random split test" author = "srinidhi" description = "70/30 split" syncer_obj = SyncerTest( NewOrExistingProject(name, author, description), DefaultExperiment(), NewExperimentRun("Abc"), ThriftConfig(None, None)) X = pd.DataFrame(np.random.randint( 0, 100, size=(100, 4)), columns=list('ABCD')) y = pd.DataFrame(np.random.randint( 0, 100, size=(100, 1)), columns=['output']) # Add tag for dataframe syncer_obj.add_tag(X, "digits-dataset") seed = 1 weights = [0.7, 0.3] syncer_obj.clear_buffer() x_train, x_test, y_train, y_test = ( cross_validation.train_test_split_sync( X, y, train_size=0.7)) events = syncer_obj.sync() self.random_split_event = events[0]
def setUpClass(self): syncer_obj = SyncerTest(NewOrExistingProject("name", "author", "desc"), NewOrExistingExperiment("expName", "expDesc"), NewExperimentRun("expRunDesc"), ThriftConfig(None, None)) events = syncer_obj.sync() self.project_event = events[0] self.experiment_event = events[1] self.experiment_run_event = events[2]
def setUpClass(self): name = "logistic-test" author = "srinidhi" description = "income-level logistic regression" syncer_obj = SyncerTest( NewOrExistingProject(name, author, description), DefaultExperiment(), NewExperimentRun("Abc"), ThriftConfig(None, None), ) events = syncer_obj.sync() self.project_event = events[0] self.experiment_event = events[1] self.experiment_run_event = events[2]
def setUpClass(self): name = "logistic-test" author = "srinidhi" description = "income-level logistic regression" syncer_obj = SyncerTest( NewOrExistingProject(name, author, description), DefaultExperiment(), NewExperimentRun("Abc"), ThriftConfig(None, None)) letters = ['A', 'B', 'C', 'D'] X = np.random.choice(letters, size=(100, 1)).ravel() model = preprocessing.LabelEncoder() # Add tag for model syncer_obj.add_tag(model, "label encoder") syncer_obj.clear_buffer() model.fit_transform_sync(X) events = syncer_obj.sync() self.fit_event = events[0] self.transform_event = events[1]
def setUp(self): name = "logistic-test" author = "srinidhi" description = "income-level logistic regression" syncer_obj = SyncerTest( NewOrExistingProject(name, author, description), DefaultExperiment(), NewExperimentRun("Abc"), ThriftConfig(None, None)) model = linear_model.LinearRegression() np.random.seed(0) X = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)), columns=list('ABCD')) y = pd.DataFrame(np.random.randint(0, 100, size=(100, 1)), columns=['output']) # Add tags for models / dataframes syncer_obj.add_tag(X, "digits-dataset") syncer_obj.add_tag(model, "linear reg") syncer_obj.clear_buffer() model.fit_sync(X, y) events = syncer_obj.sync() self.fit_event = events[0]