Exemple #1
0
 def test_series_with_index(self):
     data = np.array(['a', 'b', 'c', 'd'])
     s = pd.Series(data, index=[100, 101, 102, 103])
     push("test/pandas/series_with_index", s)
     s1 = pull("test/pandas/series_with_index")
     self.assertEqual(s.index.dtype, s1.index.dtype)
     self.assertTrue(s.equals(s1))
Exemple #2
0
 def test_with_schema(self):
     md = Markdown("Test *markdown*")
     ds.push("test/md", md)
     frame_data = ds.pull_data(ds.create_context("test/md"))
     self.assertEqual("text/markdown", frame_data.content_type)
     self.assertEqual("markdown", frame_data.application)
     self.assertEqual(md.text, frame_data.data.value().decode("utf-8"))
Exemple #3
0
 def test_nullable_types(self):
     df = pd.DataFrame({"tag1": [10, None], "tag2": [True, None]})
     df1 = df.astype({"tag1": "Int64", "tag2": pd.BooleanDtype()})
     push("test/pandas/nullable_types",
          df1,
          encoder=DataFrameEncoder(index=False))
     df2 = pull("test/pandas/nullable_types")
     map(lambda x, y: self.assertEqual(x, y), zip(df2.dtypes, df1.dtypes))
Exemple #4
0
 def test_tab(self):
     ds.push("test/my_plot",
             self.get_figure(),
             my_tab=ds.tab("My brand new tab"))
     t = self.get_data("test/my_plot")["attachments"][0]["params"]["my_tab"]
     self.assertIsNotNone(t)
     self.assertEqual("tab", t["type"])
     self.assertEqual("My brand new tab", t["title"])
Exemple #5
0
 def prepare_server_stack(self, version: str) -> str:
     jar_path = self.create_fake_file("fake-server.jar")
     push(Installer._STACK,
          jar_path,
          profile=Installer._PROFILE,
          version=version,
          jdk_version="8",
          jdk_compatible_versions=self.java_version)
     return jar_path.name
Exemple #6
0
 def test_df_with_non_int_index(self):
     dates = pd.date_range('20130101', periods=6)
     df = pd.DataFrame(np.random.randn(6, 4),
                       index=dates,
                       columns=list('ABCD'))
     push("test/pandas/df_with_index_non_int", df)
     df1 = pull("test/pandas/df_with_index_non_int")
     self.assertEqual(df.index.dtype, df1.index.dtype)
     self.assertTrue(df.index.to_series().equals(df1.index.to_series()))
Exemple #7
0
 def test_push_params(self):
     stack = "test/my_plot"
     ds.push(stack,
             self.get_figure(),
             params={"z": 30},
             meta=ds.FrameMeta(text="hello", x=10, y=20))
     frame = self.get_data(stack)
     attachments = frame["attachments"]
     self.assertEqual(1, len(attachments[0]["params"]))
     self.assertEqual(30, attachments[0]["params"]["z"])
     self.assertEqual(3, len(frame["params"]))
     self.assertEqual({"x": 10, "y": 20, "text": "hello"}, frame["params"])
Exemple #8
0
 def test_with_schema(self):
     df = pd.DataFrame({
         "float": [1.0],
         "int": [1],
         "datetime": [pd.Timestamp("20180310")],
         "string": ["foo"]
     })
     push("test/pandas/df_with_schema",
          df,
          encoder=DataFrameEncoder(index=False))
     df1 = pull("test/pandas/df_with_schema")
     map(lambda x, y: self.assertEqual(x, y), zip(df.dtypes, df1.dtypes))
     self.assertEqual(df["datetime"][0], df1["datetime"][0])
Exemple #9
0
 def test_df_with_index(self):
     raw_data = {
         "first_name": ["John", "Donald", "Maryam", "Don", "Andrey"],
         "last_name":
         ["Milnor", "Knuth", "Mirzakhani", "Zagier", "Okunkov"],
         "birth_year": [1931, 1938, 1977, 1951, 1969],
         "school":
         ["Princeton", "Stanford", "Stanford", "MPIM", "Princeton"]
     }
     df = pd.DataFrame(
         raw_data,
         columns=["first_name", "last_name", "birth_year", "school"])
     push("test/pandas/df_with_index", df)
     df1 = pull("test/pandas/df_with_index")
     self.assertTrue(df.index.to_series().equals(df1.index.to_series()))
Exemple #10
0
    def test_download_jdk(self):
        fake_jdk = self.create_fake_archive("OpenJDK-1.8.0.121-x86_64-bin")
        self.assertTrue(fake_jdk.exists())
        self.assertFalse(fake_jdk.is_dir())

        push(f"{Installer._JDK_STACK_BASE}/8",
             fake_jdk,
             profile=Installer._PROFILE,
             os=self.installer.get_os())

        self.installer._download_jdk("8")
        self.assertTrue(self.installer._jdk_path().exists())
        self.assertTrue(self.installer._jdk_path().is_dir())
        file_list = [p.name for p in self.installer._jdk_path().iterdir()]
        self.assertIn("file1.txt", file_list)
Exemple #11
0
    def test_push_pull_linear_model(self):
        # generate regression dataset
        x, y = make_regression(n_samples=20, n_features=1, noise=0.75)

        # create the training and test datasets
        from sklearn.model_selection import train_test_split
        x_train, x_test, y_train, y_test = \
            train_test_split(x, y, test_size=0.3, random_state=1234)

        # train the simple Linear regression
        std_reg = LinearRegression()
        std_reg.fit(x_train, y_train)

        push("test/sklearn/my_linear_model", std_reg, "My first linear model")
        my_model: LinearRegression = pull("test/sklearn/my_linear_model")

        self.assertEqual(std_reg.coef_, my_model.coef_)
        self.assertEqual(std_reg.intercept_, my_model.intercept_)
        self.assertEqual(std_reg.normalize, my_model.normalize)
Exemple #12
0
    def test_simple_logistic_regression(self):
        data = load_breast_cancer()
        # normally we would put all of our imports at the top
        # but this lets us tell a story
        from sklearn.model_selection import train_test_split

        # split the data into train and test sets
        # this lets us simulate how our model will perform in the future
        x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.33)
        n, d = x_train.shape
        # Scale the data
        # you"ll learn why scaling is needed in a later course
        from sklearn.preprocessing import StandardScaler

        scaler = StandardScaler()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.transform(x_test)
        # Now all the fun Tensorflow stuff
        # Build the model

        model = tf.keras.models.Sequential([
            tf.keras.layers.Input(shape=(d,)),
            tf.keras.layers.Dense(1, activation="sigmoid")
        ])

        # Alternatively, you can do:
        # model = tf.keras.models.Sequential()
        # model.add(tf.keras.layers.Dense(1, input_shape=(d,), activation="sigmoid"))

        model.compile(optimizer="adam",
                      loss="binary_crossentropy",
                      metrics=["accuracy"])

        # Train the model
        r = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=100)

        # Evaluate the model - evaluate() returns loss and accuracy
        print("Train score:", model.evaluate(x_train, y_train))
        print("Test score:", model.evaluate(x_test, y_test))
        push("my_tf_model", model, "My first TF model")
        model1 = pull("my_tf_model")
        self.assertTrue(isinstance(model1, tf.keras.models.Sequential))
Exemple #13
0
    def test_geo_df(self):
        df = pd.DataFrame({
            'City':
            ['Buenos Aires', 'Brasilia', 'Santiago', 'Bogota', 'Caracas'],
            'Country':
            ['Argentina', 'Brazil', 'Chile', 'Colombia', 'Venezuela'],
            'Latitude': [-34.58, -15.78, -33.45, 4.60, 10.48],
            'Longitude': [-58.66, -47.91, -70.66, -74.08, -66.86]
        })

        gdf = geopandas.GeoDataFrame(df,
                                     geometry=geopandas.points_from_xy(
                                         df.Longitude, df.Latitude))

        push("my_first_geo", gdf)
        self.assertEqual(
            "application/zip",
            self.get_data("my_first_geo")["attachments"][0]["content_type"])

        gdf1 = pull("my_first_geo")
        self.assertTrue(gdf.equals(gdf1))
Exemple #14
0
 def test_per_frame_settings(self):
     ds.push("test/my_plot", self.get_figure())
     self.assertEqual(
         python_version,
         self.get_data("test/my_plot")["settings"]["python"]["version"])
     self.assertEqual(
         python_version_info.major,
         self.get_data("test/my_plot")["settings"]["python"]["major"])
     self.assertEqual(
         python_version_info.minor,
         self.get_data("test/my_plot")["settings"]["python"]["minor"])
     self.assertEqual(
         python_version_info.micro,
         self.get_data("test/my_plot")["settings"]["python"]["micro"])
     self.assertEqual(
         python_version_info.releaselevel,
         self.get_data("test/my_plot")["settings"]["python"]
         ["releaselevel"])
     self.assertEqual(
         python_version_info.serial,
         self.get_data("test/my_plot")["settings"]["python"]["serial"])
     self.assertIn("os", self.get_data("test/my_plot")["settings"])
Exemple #15
0
    def test_stack_access(self):
        ds.push("test/my_plot", self.get_figure())
        self.assertNotIn("access", self.get_data("test/my_plot"))

        ds.push("test/my_plot_1", self.get_figure(), access="public")
        self.assertEqual("public", self.get_data("test/my_plot_1")["access"])

        ds.push("test/my_plot_2", self.get_figure(), access="private")
        self.assertEqual("private", self.get_data("test/my_plot_2")["access"])
Exemple #16
0
import dstack.controls as ctrl
import dstack as ds
import plotly.express as px


@ds.cache()
def get_data():
    return px.data.stocks()


def symbols_handler(self: ctrl.ComboBox):
    print("Calling symbols_handler")
    self.items = get_data().columns[1:].tolist()


def output_handler(self, ticker):
    print("Calling output_handler")
    self.data = px.line(get_data(), x='date', y=ticker.value())


app = ds.app(controls=[(ctrl.ComboBox(handler=symbols_handler))],
             outputs=[(ctrl.Output(handler=output_handler))])

result = ds.push("logs", app)
print(result.url)
import dstack as ds
import dstack.controls as ctrl
import pandas as pd


def app_handler(self: ctrl.Output, uploader: ctrl.FileUploader):
    if len(uploader.uploads) > 0:
        with uploader.uploads[0].open() as f:
            self.data = pd.read_csv(f).head(100)
    else:
        self.data = ds.md("No file selected")


app = ds.app(controls=[ctrl.FileUploader(label="Select a CSV file")],
             outputs=[ctrl.Output(handler=app_handler)])

url = ds.push("controls/file_uploader", app)
print(url)
Exemple #18
0
import dstack.controls as ctrl
import dstack as ds
import plotly.express as px


@ds.cache()
def get_data():
    return px.data.stocks()


def output_handler(self, ticker):
    self.data = px.line(get_data(), x='date', y=ticker.value())


app = ds.app(
    controls=[(ctrl.ComboBox(items=get_data().columns[1:].tolist()))],
    outputs=[
        ctrl.Output(data=ds.md(
            "Here's a simple application with **Markdown** and a chart.")),
        ctrl.Output(handler=output_handler)
    ])

result = ds.push("markdown", app)
print(result.url)
from datetime import datetime, timedelta

import dstack.controls as ctrl
import dstack as ds
import plotly.express as px
import pandas_datareader as pdr


def output_handler(self: ctrl.Output, ticker: ctrl.TextField):
    if len(ticker.text) > 0:
        start = datetime.today() - timedelta(days=30)
        end = datetime.today()
        df = pdr.data.DataReader(ticker.text, 'yahoo', start, end)
        self.data = px.line(df, x=df.index, y=df['High'])
    else:
        self.data = ds.md("No ticker selected")


app = ds.app(controls=[ctrl.TextField(label="Select ticker")],
             outputs=[ctrl.Output(handler=output_handler)])

result = ds.push('controls/text_field', app)
print(result.url)
Exemple #20
0
        X[col] = X[col] / X[col].max()
    for c in countries:
        X[c] = X["Country"].apply(lambda x: 1 if x == c else 0)
    for s in sectors:
        if s:
            X[s] = X["Sector"].apply(lambda x: 1 if x == s else 0)

    X = X.drop(["Country", "Sector"], axis=1)
    return X


df = pd.read_csv("https://www.dropbox.com/s/cat8vm6lchlu5tp/data.csv?dl=1",
                 index_col=0)

countries = df["Country"].unique()
sectors = df["Sector"].unique()

X = df[df["RenewalMonth"] < 10].copy()
y = X["Churn"]
X = transform(X, countries, sectors)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=99)

model = LogisticRegression()
model.fit(X_train, y_train)

url = ds.push("sklearn_model", model)
print(url)
from datetime import datetime, timedelta

import dstack.controls as ctrl
import dstack as ds
import plotly.express as px
import pandas_datareader as pdr


def ticker_handler(self: ctrl.ComboBox):
    self.items = ['FB', 'AMZN', 'AAPL', 'NFLX', 'GOOG']


def output_handler(self: ctrl.Output, ticker: ctrl.ComboBox):
    if ticker.selected > -1:
        start = datetime.today() - timedelta(days=30)
        end = datetime.today()
        df = pdr.data.DataReader(ticker.items[ticker.selected], 'yahoo', start,
                                 end)
        self.data = px.line(df, x=df.index, y=df['High'])
    else:
        self.data = ds.md("No ticker selected")


app = ds.app(
    controls=[ctrl.ComboBox(label="Select ticker", handler=ticker_handler)],
    outputs=[ctrl.Output(handler=output_handler)])

result = ds.push('controls/combo_box', app)
print(result.url)
Exemple #22
0
import dstack.controls as ctrl
import dstack as ds
import plotly.express as px


@ds.cache()
def get_data():
    return px.data.stocks()


def output_handler(self, ticker):
    self.data = px.line(get_data(), x='date', y=ticker.value())


app = ds.app(controls=[(ctrl.ComboBox(items=get_data().columns[1:].tolist()))],
             outputs=[(ctrl.Output(handler=output_handler))])

result = ds.push("stocks", app)
print(result.url)
Exemple #23
0

def get_regions():
    df = get_data()
    return df["Region"].unique().tolist()


def countries_handler(self: ctrl.ComboBox, regions: ctrl.ComboBox):
    df = get_data()
    self.items = df[df["Region"] ==
                    regions.value()]["Country"].unique().tolist()


regions = ctrl.ComboBox(items=get_regions, label="Region")
countries = ctrl.ComboBox(handler=countries_handler,
                          label="Country",
                          multiple=True,
                          depends=[regions])


def output_handler(self: ctrl.Output, countries: ctrl.ComboBox):
    df = get_data()
    self.data = df[df["Country"].isin(countries.value())]


app = ds.app(controls=[regions, countries],
             outputs=[ds.Output(handler=output_handler, depends=[countries])])

result = ds.push('combo_box', app)
print(result.url)
Exemple #24
0
regions_ctrl = ctrl.ComboBox(x1["Region"].unique().tolist(), label="Region")
months_ctrl = ctrl.ComboBox(['Oct', 'Nov', 'Dec'], label="Month")
churn_ctrl = ctrl.CheckBox(label="Churn", selected=True, require_apply=False)


def app_handler(self: ctrl.Output, regions_ctrl: ctrl.ComboBox,
                months_ctrl: ctrl.ComboBox, churn_ctrl: ctrl.CheckBox):
    x1, x1a = get_data()
    y1_pred = get_model().predict(x1a)
    data = x1.copy()
    data["Predicted Churn"] = y1_pred
    data["Predicted Churn"] = data["Predicted Churn"].apply(
        lambda x: "Yes" if x == 1.0 else "No")
    data["RenewalMonth"] = data["RenewalMonth"].apply(lambda x: months[x - 1])
    data = data.drop(["y2015", "y2016", "y2017", "y2018", "y2019", "Churn"],
                     axis=1)

    data = data[(data["Predicted Churn"] == ("Yes" if churn_ctrl.selected else
                                             "No"))]
    data = data[(data["Region"] == regions_ctrl.value())]
    data = data[(data["RenewalMonth"] == months_ctrl.value())]
    self.data = data


app = ds.app(controls=[regions_ctrl, months_ctrl, churn_ctrl],
             outputs=[ctrl.Output(handler=app_handler)])

url = ds.push("sklearn", app)
print(url)
Exemple #25
0
    def test_linear_regression_weights(self):
        # create dummy data for training
        x_values = [i for i in range(11)]
        x_train = np.array(x_values, dtype=np.float32)
        x_train = x_train.reshape(-1, 1)

        y_values = [2 * i + 1 for i in x_values]
        y_train = np.array(y_values, dtype=np.float32)
        y_train = y_train.reshape(-1, 1)

        class LinearRegression(torch.nn.Module):
            def __init__(self, input_size, output_size):
                super(LinearRegression, self).__init__()
                self.linear = torch.nn.Linear(input_size, output_size)

            def forward(self, x):
                out = self.linear(x)
                return out

        input_dim = 1  # takes variable 'x'
        output_dim = 1  # takes variable 'y'
        learning_rate = 0.01
        epochs = 100

        model = LinearRegression(input_dim, output_dim)
        criterion = torch.nn.MSELoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

        for epoch in range(epochs):
            # Converting inputs and labels to Variable
            if torch.cuda.is_available():
                inputs = Variable(torch.from_numpy(x_train).cuda())
                labels = Variable(torch.from_numpy(y_train).cuda())
            else:
                inputs = Variable(torch.from_numpy(x_train))
                labels = Variable(torch.from_numpy(y_train))

            # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward,
            # don't want to cumulate gradients
            optimizer.zero_grad()

            # get output from the model, given the inputs
            outputs = model(inputs)

            # get loss for the predicted output
            loss = criterion(outputs, labels)
            print(loss)
            # get gradients w.r.t to parameters
            loss.backward()

            # update parameters
            optimizer.step()

            print('epoch {}, loss {}'.format(epoch, loss.item()))

        from dstack.torch.handlers import TorchModelEncoder
        TorchModelEncoder.STORE_WHOLE_MODEL = False
        push("my_torch_model", model, "My first PyTorch model")
        model1 = LinearRegression(input_dim, output_dim)
        from dstack.torch.handlers import TorchModelWeightsDecoder
        my_model: LinearRegression = pull(
            "my_torch_model", decoder=TorchModelWeightsDecoder(model1))
        self.assertEqual(model1, my_model)
        self.assertEqual(model.state_dict(), my_model.state_dict())
Exemple #26
0
import dstack as ds
import dstack.controls as ctrl

from handlers import fake_handler

app = ds.app(outputs=[ctrl.Output(handler=fake_handler)],
             depends=["handlers", "utils"],
             requirements="requirements.txt")

# An equal alternative to this is the following:
# ds.app(outputs=[ctrl.Output(handler=fake_handler)], depends=["numpy", "pandas", "faker==5.5.0", "handlers", "utils"])

url = ds.push("depends", app)
print(url)
Exemple #27
0
 def test_series(self):
     data = np.array(['a', 'b', 'c', 'd'])
     s = pd.Series(data)
     push("test/pandas/series", s, encoder=SeriesEncoder(index=False))
     s1 = pull("test/pandas/series")
     self.assertTrue(s.equals(s1))
Exemple #28
0
import dstack.controls as ctrl
import dstack as ds
import plotly.express as px


@ds.cache()
def get_data():
    return px.data.gapminder()


def output_handler(self: ctrl.Output, year: ctrl.Slider):
    year = year.values[year.selected]
    self.data = px.scatter(get_data().query("year==" + str(year)),
                           x="gdpPercap",
                           y="lifeExp",
                           size="pop",
                           color="country",
                           hover_name="country",
                           log_x=True,
                           size_max=60)


app = ds.app(controls=[
    ctrl.Slider(values=get_data()["year"].unique().tolist(),
                require_apply=False)
],
             outputs=[ctrl.Output(handler=output_handler)])

result = ds.push('controls/slider', app)
print(result.url)
Exemple #29
0
 def test_per_frame_settings(self):
     ds.push("test/my_plot", self.get_figure())
     self.assertEqual(python_version,
                      self.get_data("test/my_plot")["settings"]["python"])
     self.assertIn("os", self.get_data("test/my_plot")["settings"])