def start(self): """ Use the Metaflow client to retrieve the latest successful run from our MovieStatsFlow and assign them as data artifacts in this flow. This step uses 'conda' to isolate the environment. This step will always use pandas==1.3.3 regardless of what is installed on the system. """ # Load the analysis from the MovieStatsFlow. from metaflow import Flow, get_metadata # Print metadata provider print("Using metadata provider: %s" % get_metadata()) # Load the analysis from the MovieStatsFlow. run = Flow("MovieStatsFlow").latest_successful_run print("Using analysis from '%s'" % str(run)) # Get the dataframe from the start step before we sliced into into # genre specific dataframes. self.dataframe = run["start"].task.data.dataframe # Also grab the summary statistics. self.genre_stats = run.data.genre_stats # Compute our two recommendation types in parallel. self.next(self.bonus_movie, self.genre_movies)
def start(self): """ Use the Metaflow client to retrieve the latest successful run from our MovieStatsFlow and assign them as data artifacts in this flow. This step uses 'conda' to isolate the environment. This step will always use pandas==0.24.2 regardless of what is installed on the system. """ from metaflow import get_metadata print("Using metadata provider: %s" % get_metadata()) self.next(self.extract)
def start(self): """ The 'start' step is a regular step, so runs locally on the machine from which the flow is executed. """ from metaflow import get_metadata print("HelloAWS is starting.") print("") print("Using metadata provider: %s" % get_metadata()) print("") print("The start step is running locally. Next, the ") print("'hello' step will run remotely on AWS batch. ") print("If you are running in the Netflix sandbox, ") print("it may take some time to acquire a compute resource.") self.next(self.hello)
def start(self): """ Use the Metaflow client to retrieve the latest successful run from our MovieStatsFlow and assign them as data artifacts in this flow. """ from metaflow import Flow, get_metadata # Print metadata provider print("Using metadata provider: %s" % get_metadata()) # Load the analysis from the MovieStatsFlow. run = Flow("MovieStatsFlow").latest_successful_run print("Using analysis from '%s'" % str(run)) self.genre_stats = run.data.genre_stats # Compute our two recommendation types in parallel. self.next(self.bonus_movie, self.genre_movies)
from metaflow import Flow, get_metadata # Print metadata provider print("Using metadata provider: %s" % get_metadata()) # Load the analysis from the MovieStatsFlow. run = Flow('GenreStatsFlow').latest_successful_run print("Using analysis from '%s'" % str(run)) genre_stats = run.data.genre_stats print(genre_stats)
from metaflow import Flow,get_metadata,Run print("Metadata",get_metadata()) from metaflow_train import FinalData from typing import List import chart_studio.plotly as py import plotly.graph_objects as go import plotly.express as ps from plotly.subplots import make_subplots import math import os import datetime import itertools import seaborn as sns from scipy.stats import norm def get_key_map(arr): finalmap = [] for i in itertools.product(*arr): finalmap.append(i) return finalmap def plot_and_save_grad_figures(run:Run): """ Directly plots gradients. Will Die if gradients are too large. """ final_data_arr = run.data.final_data run_dir_path = "RunAnalytics/Run-"+run.id # Gradients are collected for whole Flow. So if 1st doesnt have. No one has. if not hasattr(final_data_arr[0],'gradients'): return None if len(final_data_arr[0].gradients['avg']) == 0:
from metaflow import Flow, get_metadata from metaflow.datatools.dolt import DoltDT from doltpy.core import Dolt def print_data_map(data_map): for run_step in data_map.keys(): for table in data_map[run_step]: print('{}, {}'.format(run_step, table)) #print(data_map[run_step][table]) print("Current metadata provider: %s" % get_metadata()) doltdb_path = './imdb-reviews' flow = Flow('IMDBSentimentsFlow') run = flow.latest_successful_run print("Using run: %s" % str(run)) ''' Ex 1: Get all the inputs used by a specific run of a flow ''' # doltdt = DoltDT(run, doltdb_path, 'master') # data_map_for_run = doltdt.get_reads(steps=['start']) # print_data_map(data_map_for_run) ''' Ex 2: Get all the inputs used by a specific step of a run of a flow ''' # doltdt = DoltDT(run, doltdb_path, 'vinai/add-rotten-data') # data_map_for_run = doltdt.get_reads(steps=['start']) # print_data_map(data_map_for_run) ''' Ex 3 Outputs are handled identically
def lambda_handler(event, context): print(event) for record in event['Records']: key = record['s3']['object']['key'] bucket_name = record['s3']['bucket']['name'] os.environ['METAFLOW_HOME'] = '/tmp' os.environ['USERNAME'] = "******" obj = { 'METAFLOW_DEFAULT_METADATA': 'service', 'METAFLOW_DEFAULT_DATASTORE': 's3', 'METAFLOW_DATASTORE_SYSROOT_S3': f"s3://{bucket_name}", 'METAFLOW_SERVICE_AUTH_KEY': "yvhNDfEzcRa5fxKq2ZELda1zk8wNXxMs17Jt4OGs", 'METAFLOW_SERVICE_URL': "https://5sqcgnuyte.execute-api.eu-west-1.amazonaws.com/api/" } with open('/tmp/config.json', 'w', encoding='utf-8') as f: json.dump(obj, f, ensure_ascii=False, indent=4) from metaflow import Run, get_metadata, namespace namespace(None) print(get_metadata()) step = key.split("/")[2] flow = key.split("/")[0] run_id = key.split("/")[1] run = Run(f"{flow}/{run_id}") dynamo_object = { "created_at": int( datetime.strptime( run.created_at.split(".")[0], '%Y-%m-%dT%H:%M:%S').timestamp()), "flow_name": flow, "run_id": int(run_id), "success": run.successful, "finished": run.finished, "finished_at": 0 if run.finished_at == None else int( datetime.strptime( run.finished_at.split(".")[0], '%Y-%m-%dT%H:%M:%S').timestamp()), "current_step": step, "user": _parse_tags(run.tags, "user"), "tags": run.tags, "bucket": bucket_name } print(dynamo_object) table = dynamodb.Table(EVENTS_RECORD_STORE) table.put_item(Item=dynamo_object) return