Example #1
0
 def test_series_with_index(self):
     data = np.array(['a', 'b', 'c', 'd'])
     s = pd.Series(data, index=[100, 101, 102, 103])
     push("test/pandas/series_with_index", s)
     s1 = pull("test/pandas/series_with_index")
     self.assertEqual(s.index.dtype, s1.index.dtype)
     self.assertTrue(s.equals(s1))
Example #2
0
 def test_nullable_types(self):
     df = pd.DataFrame({"tag1": [10, None], "tag2": [True, None]})
     df1 = df.astype({"tag1": "Int64", "tag2": pd.BooleanDtype()})
     push("test/pandas/nullable_types",
          df1,
          encoder=DataFrameEncoder(index=False))
     df2 = pull("test/pandas/nullable_types")
     map(lambda x, y: self.assertEqual(x, y), zip(df2.dtypes, df1.dtypes))
Example #3
0
 def test_df_with_non_int_index(self):
     dates = pd.date_range('20130101', periods=6)
     df = pd.DataFrame(np.random.randn(6, 4),
                       index=dates,
                       columns=list('ABCD'))
     push("test/pandas/df_with_index_non_int", df)
     df1 = pull("test/pandas/df_with_index_non_int")
     self.assertEqual(df.index.dtype, df1.index.dtype)
     self.assertTrue(df.index.to_series().equals(df1.index.to_series()))
Example #4
0
 def test_with_schema(self):
     df = pd.DataFrame({
         "float": [1.0],
         "int": [1],
         "datetime": [pd.Timestamp("20180310")],
         "string": ["foo"]
     })
     push("test/pandas/df_with_schema",
          df,
          encoder=DataFrameEncoder(index=False))
     df1 = pull("test/pandas/df_with_schema")
     map(lambda x, y: self.assertEqual(x, y), zip(df.dtypes, df1.dtypes))
     self.assertEqual(df["datetime"][0], df1["datetime"][0])
Example #5
0
 def test_df_with_index(self):
     raw_data = {
         "first_name": ["John", "Donald", "Maryam", "Don", "Andrey"],
         "last_name":
         ["Milnor", "Knuth", "Mirzakhani", "Zagier", "Okunkov"],
         "birth_year": [1931, 1938, 1977, 1951, 1969],
         "school":
         ["Princeton", "Stanford", "Stanford", "MPIM", "Princeton"]
     }
     df = pd.DataFrame(
         raw_data,
         columns=["first_name", "last_name", "birth_year", "school"])
     push("test/pandas/df_with_index", df)
     df1 = pull("test/pandas/df_with_index")
     self.assertTrue(df.index.to_series().equals(df1.index.to_series()))
Example #6
0
    def test_push_pull_linear_model(self):
        # generate regression dataset
        x, y = make_regression(n_samples=20, n_features=1, noise=0.75)

        # create the training and test datasets
        from sklearn.model_selection import train_test_split
        x_train, x_test, y_train, y_test = \
            train_test_split(x, y, test_size=0.3, random_state=1234)

        # train the simple Linear regression
        std_reg = LinearRegression()
        std_reg.fit(x_train, y_train)

        push("test/sklearn/my_linear_model", std_reg, "My first linear model")
        my_model: LinearRegression = pull("test/sklearn/my_linear_model")

        self.assertEqual(std_reg.coef_, my_model.coef_)
        self.assertEqual(std_reg.intercept_, my_model.intercept_)
        self.assertEqual(std_reg.normalize, my_model.normalize)
Example #7
0
    def test_simple_logistic_regression(self):
        data = load_breast_cancer()
        # normally we would put all of our imports at the top
        # but this lets us tell a story
        from sklearn.model_selection import train_test_split

        # split the data into train and test sets
        # this lets us simulate how our model will perform in the future
        x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.33)
        n, d = x_train.shape
        # Scale the data
        # you"ll learn why scaling is needed in a later course
        from sklearn.preprocessing import StandardScaler

        scaler = StandardScaler()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.transform(x_test)
        # Now all the fun Tensorflow stuff
        # Build the model

        model = tf.keras.models.Sequential([
            tf.keras.layers.Input(shape=(d,)),
            tf.keras.layers.Dense(1, activation="sigmoid")
        ])

        # Alternatively, you can do:
        # model = tf.keras.models.Sequential()
        # model.add(tf.keras.layers.Dense(1, input_shape=(d,), activation="sigmoid"))

        model.compile(optimizer="adam",
                      loss="binary_crossentropy",
                      metrics=["accuracy"])

        # Train the model
        r = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=100)

        # Evaluate the model - evaluate() returns loss and accuracy
        print("Train score:", model.evaluate(x_train, y_train))
        print("Test score:", model.evaluate(x_test, y_test))
        push("my_tf_model", model, "My first TF model")
        model1 = pull("my_tf_model")
        self.assertTrue(isinstance(model1, tf.keras.models.Sequential))
Example #8
0
    def test_geo_df(self):
        df = pd.DataFrame({
            'City':
            ['Buenos Aires', 'Brasilia', 'Santiago', 'Bogota', 'Caracas'],
            'Country':
            ['Argentina', 'Brazil', 'Chile', 'Colombia', 'Venezuela'],
            'Latitude': [-34.58, -15.78, -33.45, 4.60, 10.48],
            'Longitude': [-58.66, -47.91, -70.66, -74.08, -66.86]
        })

        gdf = geopandas.GeoDataFrame(df,
                                     geometry=geopandas.points_from_xy(
                                         df.Longitude, df.Latitude))

        push("my_first_geo", gdf)
        self.assertEqual(
            "application/zip",
            self.get_data("my_first_geo")["attachments"][0]["content_type"])

        gdf1 = pull("my_first_geo")
        self.assertTrue(gdf.equals(gdf1))
Example #9
0
    def test_linear_regression_weights(self):
        # create dummy data for training
        x_values = [i for i in range(11)]
        x_train = np.array(x_values, dtype=np.float32)
        x_train = x_train.reshape(-1, 1)

        y_values = [2 * i + 1 for i in x_values]
        y_train = np.array(y_values, dtype=np.float32)
        y_train = y_train.reshape(-1, 1)

        class LinearRegression(torch.nn.Module):
            def __init__(self, input_size, output_size):
                super(LinearRegression, self).__init__()
                self.linear = torch.nn.Linear(input_size, output_size)

            def forward(self, x):
                out = self.linear(x)
                return out

        input_dim = 1  # takes variable 'x'
        output_dim = 1  # takes variable 'y'
        learning_rate = 0.01
        epochs = 100

        model = LinearRegression(input_dim, output_dim)
        criterion = torch.nn.MSELoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

        for epoch in range(epochs):
            # Converting inputs and labels to Variable
            if torch.cuda.is_available():
                inputs = Variable(torch.from_numpy(x_train).cuda())
                labels = Variable(torch.from_numpy(y_train).cuda())
            else:
                inputs = Variable(torch.from_numpy(x_train))
                labels = Variable(torch.from_numpy(y_train))

            # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward,
            # don't want to cumulate gradients
            optimizer.zero_grad()

            # get output from the model, given the inputs
            outputs = model(inputs)

            # get loss for the predicted output
            loss = criterion(outputs, labels)
            print(loss)
            # get gradients w.r.t to parameters
            loss.backward()

            # update parameters
            optimizer.step()

            print('epoch {}, loss {}'.format(epoch, loss.item()))

        from dstack.torch.handlers import TorchModelEncoder
        TorchModelEncoder.STORE_WHOLE_MODEL = False
        push("my_torch_model", model, "My first PyTorch model")
        model1 = LinearRegression(input_dim, output_dim)
        from dstack.torch.handlers import TorchModelWeightsDecoder
        my_model: LinearRegression = pull(
            "my_torch_model", decoder=TorchModelWeightsDecoder(model1))
        self.assertEqual(model1, my_model)
        self.assertEqual(model.state_dict(), my_model.state_dict())
Example #10
0
 def test_series(self):
     data = np.array(['a', 'b', 'c', 'd'])
     s = pd.Series(data)
     push("test/pandas/series", s, encoder=SeriesEncoder(index=False))
     s1 = pull("test/pandas/series")
     self.assertTrue(s.equals(s1))
Example #11
0
def get_model():
    return ds.pull("sklearn_model")