def write_prediction_to_table(dolt_context: DoltDT, reviews, labels, predictions, table_name): predictions = pd.Series(predictions).rename('predictions') result = pd.concat([reviews, labels, predictions], axis=1) dolt_context.write_table(table_name=table_name, df=result, pks=['review'])
def stats(self): train_score = self.model.score(self.train_bigram, self.train_labels) print("Train Score {}".format(round(train_score, 2))) train_predictions = self.model.predict(self.train_bigram) test_score = self.model.score(self.test_bigram, self.test_labels) print("Test Score {}".format(round(test_score, 2))) test_predictions = self.model.predict(self.test_bigram) # Output the predictions to a result table def write_prediction_to_table(dolt_context: DoltDT, reviews, labels, predictions, table_name): predictions = pd.Series(predictions).rename('predictions') result = pd.concat([reviews, labels, predictions], axis=1) dolt_context.write_table(table_name=table_name, df=result, pks=['review']) with DoltDT(run=self, doltdb_path=self.doltdb_path, branch='vinai/add-rotten-data') as dolt: write_prediction_to_table(dolt, self.train_reviews, self.train_labels, train_predictions, "train_results") write_prediction_to_table(dolt, self.test_reviews, self.test_labels, test_predictions, "test_results") self.next(self.end)
def middle(self): with DoltDT(run=self) as dolt: df = self.inp1 + self.inp2 dolt.write_table(table_name='baz', df=df, pks=['index']) self.next(self.end)
def middle(self): with DoltDT(run=self, database='foo', branch="master") as dolt: df = self.df df["B"] = df["B"].map(lambda x: x * 2) dolt.write_table(table_name='baz', df=df, pks=['index']) self.next(self.end)
def add_random(self): import random with DoltDT(run=self, doltdb_path='metaflow_movies') as dolt: self.df['gross'] = self.df['gross'] + random.randint(1, 1000000) dolt.write_table(table_name='movies', df=self.df, pks=['movie_title']) self.next(self.end)
def start(self): flow, run = self.flow_dep.split("/") d = DoltRun(flow_name=flow, run_id=run) f_input = d.reads[0] f_output = d.writes[0] with DoltDT(run=self) as dolt: self.inp1 = dolt.read_table(f_input.table_name, commit=f_input.commit) self.inp2 = dolt.read_table(f_output.table_name, commit=f_output.commit) self.next(self.middle)
def start(self): with DoltDT(run=self, database='foo', branch="master") as dolt: self.df = dolt.read_table('bar') first_run = Flow("SucceedsFirstDemo").latest_successful_run first_run_ts = datetime.datetime.strptime(first_run.finished_at, "%Y-%m-%dT%H:%M:%SZ") one_minute_ago = datetime.datetime.now() + datetime.timedelta( hours=8) - datetime.timedelta(minutes=1) if first_run_ts < one_minute_ago: raise Exception( "Run `FirstDemo` within one minute of `SecondDemo`") self.next(self.middle)
def start(self): with DoltDT(run=self, doltdb_path=self.doltdb_path, branch='vinai/add-rotten-data') as dolt: self.train_table = dolt.read_table('reviews_train') self.test_table = dolt.read_table('reviews_test') # Split the train and test into matrices and labels self.train_reviews = self.train_table['review'] self.train_labels = self.train_table['sentiment'] self.test_reviews = self.test_table['review'] self.test_labels = self.test_table['sentiment'] self.next(self.bigram_representation)
def predict(self): with DoltDT(run=self, doltdb_path='iris-model-results') as dolt: self.model = pickle.load(open('model.p', 'rb')) self.model_type = 'Decision Tree' samples = self.test_set['sample'] y_true = self.test_set['species'] y_true = y_true.rename('labels') test = self.test_set.drop(columns=['species', 'sample']) predictions = pd.Series(self.model.predict(test)) predictions = predictions.rename('predictions') self.result = pd.concat([samples, y_true, predictions], axis=1) dolt.write_table(table_name='result', df=self.result, pks=['sample']) self.next(self.end)
def start(self): with DoltDT(run=self) as dolt: self.df = dolt.read_table('bar') self.next(self.middle)
def start(self): # Start by getting original dataset with DoltDT(run=self, database='iris-test') as dolt: self.test_set = dolt.read_table('iris-test') self.next(self.predict)
def start(self): with DoltDT(run=self, database='foo', branch="master") as dolt: self.df = dolt.read_table('bar', commit=self.bar_version) self.next(self.middle)
def end(self): with DoltDT(run=self, doltdb_path='iris-model-results') as dolt: dolt.commit_table_writes()
def end(self): with DoltDT(run=self, doltdb_path='metaflow_demo') as dolt: dolt.commit_table_writes()