예제 #1
0
    def setUp(self):
        name = "grid search test"
        author = "srinidhi"
        description = "Grid search cross validation - 3 folds"
        syncer_obj = SyncerTest(
            NewOrExistingProject(name, author, description),
            DefaultExperiment(),
            NewExperimentRun("Abc"),
            ThriftConfig(None, None))
        X = pd.DataFrame(np.random.randint(
            0, 100, size=(2000, 4)), columns=list('ABCD'))
        y = pd.DataFrame(np.random.randint(
            0, 100, size=(2000, 1)), columns=['output'])

        # Add tag for dataframe
        syncer_obj.add_tag(X, "digits-dataset")
        syncer_obj.clear_buffer()

        tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                             'C': [10, 100]}]
        clf = GridSearchCV(SVC(), tuned_parameters, cv=3)
        y = y.values.ravel()
        clf.fit_sync(X, y)
        events = syncer_obj.sync()
        self.grid_search_event = events[0]
예제 #2
0
    def setUp(self):
        name = "logistic-test"
        author = "srinidhi"
        description = "income-level logistic regression"
        syncer_obj = SyncerTest(
            NewOrExistingProject(name, author, description),
            DefaultExperiment(), NewExperimentRun("Abc"),
            ThriftConfig(None, None))

        # Creating the pipeline
        pca = decomposition.PCA()
        lr = linear_model.LinearRegression()
        pipe = Pipeline(steps=[('pca', pca), ('logistic', lr)])
        model = linear_model.LinearRegression()
        np.random.seed(0)
        X = pd.DataFrame(np.random.randint(0, 100, size=(100, 2)),
                         columns=list('AB'))
        y = pd.DataFrame(np.random.randint(0, 100, size=(100, 1)),
                         columns=['output'])

        # Add tags for models / dataframes
        syncer_obj.add_tag(X, "digits-dataset")
        syncer_obj.add_tag(pipe, "pipeline with pca + logistic")
        syncer_obj.add_tag(pca, "decomposition PCA")
        syncer_obj.add_tag(lr, "basic linear reg")

        syncer_obj.clear_buffer()
        pipe.fit_sync(X, y)
        events = syncer_obj.sync()
        self.pipeline_event = events[0]
예제 #3
0
    def setUp(self):
        name = "random split test"
        author = "srinidhi"
        description = "70/30 split"
        syncer_obj = SyncerTest(
            NewOrExistingProject(name, author, description),
            DefaultExperiment(),
            NewExperimentRun("Abc"),
            ThriftConfig(None, None))
        X = pd.DataFrame(np.random.randint(
            0, 100, size=(100, 4)), columns=list('ABCD'))
        y = pd.DataFrame(np.random.randint(
            0, 100, size=(100, 1)), columns=['output'])

        # Add tag for dataframe
        syncer_obj.add_tag(X, "digits-dataset")

        seed = 1
        weights = [0.7, 0.3]
        syncer_obj.clear_buffer()
        x_train, x_test, y_train, y_test = (
            cross_validation.train_test_split_sync(
                X, y, train_size=0.7))
        events = syncer_obj.sync()
        self.random_split_event = events[0]
예제 #4
0
 def setUpClass(self):
     syncer_obj = SyncerTest(NewOrExistingProject("name", "author", "desc"),
                             NewOrExistingExperiment("expName", "expDesc"),
                             NewExperimentRun("expRunDesc"),
                             ThriftConfig(None, None))
     events = syncer_obj.sync()
     self.project_event = events[0]
     self.experiment_event = events[1]
     self.experiment_run_event = events[2]
예제 #5
0
 def setUpClass(self):
     name = "logistic-test"
     author = "srinidhi"
     description = "income-level logistic regression"
     syncer_obj = SyncerTest(
         NewOrExistingProject(name, author, description),
         DefaultExperiment(),
         NewExperimentRun("Abc"),
         ThriftConfig(None, None),
     )
     events = syncer_obj.sync()
     self.project_event = events[0]
     self.experiment_event = events[1]
     self.experiment_run_event = events[2]
예제 #6
0
    def setUpClass(self):
        name = "logistic-test"
        author = "srinidhi"
        description = "income-level logistic regression"
        syncer_obj = SyncerTest(
            NewOrExistingProject(name, author, description),
            DefaultExperiment(),
            NewExperimentRun("Abc"),
            ThriftConfig(None, None))
        letters = ['A', 'B', 'C', 'D']
        X = np.random.choice(letters, size=(100, 1)).ravel()
        model = preprocessing.LabelEncoder()

        # Add tag for model
        syncer_obj.add_tag(model, "label encoder")

        syncer_obj.clear_buffer()
        model.fit_transform_sync(X)
        events = syncer_obj.sync()
        self.fit_event = events[0]
        self.transform_event = events[1]
예제 #7
0
    def setUp(self):
        name = "logistic-test"
        author = "srinidhi"
        description = "income-level logistic regression"
        syncer_obj = SyncerTest(
            NewOrExistingProject(name, author, description),
            DefaultExperiment(), NewExperimentRun("Abc"),
            ThriftConfig(None, None))
        model = linear_model.LinearRegression()
        np.random.seed(0)
        X = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)),
                         columns=list('ABCD'))
        y = pd.DataFrame(np.random.randint(0, 100, size=(100, 1)),
                         columns=['output'])

        # Add tags for models / dataframes
        syncer_obj.add_tag(X, "digits-dataset")
        syncer_obj.add_tag(model, "linear reg")

        syncer_obj.clear_buffer()
        model.fit_sync(X, y)
        events = syncer_obj.sync()
        self.fit_event = events[0]