def _metadata_to_json(self, run_id, step_name, task_id, metadata): user = get_username() return [{ 'flow_id': self._flow_name, 'run_number': run_id, 'step_name': step_name, 'task_id': task_id, 'field_name': datum.field, 'type': datum.type, 'value': datum.value, 'tags': datum.tags, 'user_name': user, 'ts_epoch': int(round(time.time() * 1000)) } for datum in metadata]
def _metadata_to_json(self, run_id, step_name, task_id, metadata): user = get_username() return [{ "flow_id": self._flow_name, "run_number": run_id, "step_name": step_name, "task_id": task_id, "field_name": datum.field, "type": datum.type, "value": datum.value, "tags": list(set(datum.tags)) if datum.tags else [], "user_name": user, "ts_epoch": int(round(time.time() * 1000)), } for datum in metadata]
def get_runtime_environment(self, runtime_name): ''' Returns a dictionary of environment variables to be set Parameters ---------- runtime_name : string Name of the runtime for which to get the environment Returns ------- dict[string] -> string Environment variables from this metadata provider ''' return {'METAFLOW_RUNTIME_NAME': runtime_name, 'USER': get_username()}
def __init__(self, name, flow, graph, code_package, code_package_url, metadata, datastore, environment, event_logger, monitor, image, image_pull_secrets, env, env_from, labels, annotations, max_workers, volumes, workflow_timeout): self.name = name self.flow = flow self.graph = graph self.code_package = code_package self.code_package_url = code_package_url self.metadata = metadata self.datastore = datastore self.environment = environment self.event_logger = event_logger self.monitor = monitor self.image = image self.image_pull_secrets = image_pull_secrets self.env = env self.env_from = env_from self.labels = labels self.annotations = annotations self.volumes = volumes self.attributes = { 'labels': { 'app': 'metaflow', 'metaflow/workflow_template': name, 'app.kubernetes.io/created-by': get_username(), }, # TODO: Add annotations based on https://kubernetes.io/blog/2021/04/20/annotating-k8s-for-humans/ 'annotations': { 'metaflow/flow_name': self.flow.name, }, } # Add Metaflow system tags as labels self.system_tags = { "metaflow/%s" % sys_tag[:sys_tag.index(":")]: sanitize_label_value(sys_tag[sys_tag.index(":") + 1:]) for sys_tag in self.metadata.sticky_sys_tags } self.attributes['labels'].update(self.system_tags) self.max_workers = max_workers self.workflow_timeout = workflow_timeout self._flow_attributes = self._parse_flow_decorator() self._workflow = remove_empty_elements(self._compile()) self._cron = self._cron()
def _prepare_environment(self, attr, env_decorator): default = { 'METAFLOW_USER': get_username(), 'METAFLOW_DATASTORE_SYSROOT_S3': DATASTORE_SYSROOT_S3, } if DEFAULT_METADATA: default['METAFLOW_DEFAULT_METADATA'] = DEFAULT_METADATA if METADATA_SERVICE_URL: default['METAFLOW_SERVICE_URL'] = METADATA_SERVICE_URL if METADATA_SERVICE_HEADERS: default['METADATA_SERVICE_HEADERS'] = METADATA_SERVICE_HEADERS # add env vars from @environment decorator if exist default.update(env_decorator.get('vars', {})) default_env = [{'name': k, 'value': v} for k, v in default.items()] env = default_env + self._flow_attributes.get( 'env', []) + self.env + attr.get('env', []) env_from = self._flow_attributes.get( 'envFrom', []) + self.env_from + attr.get('envFrom', []) return env, env_from
def flow_init(self, flow, graph, environment, datastore, logger, echo, options): self._option_values = options project_name = self.attributes.get('name') project_flow_name, branch_name = format_name(flow.name, project_name, options['production'], options['branch'], get_username()) is_user_branch = options['branch'] is None and not options['production'] echo("Project: *%s*, Branch: *%s*" % (project_name, branch_name), fg='magenta', highlight='green') current._update_env({ 'project_name': project_name, 'branch_name': branch_name, 'is_user_branch': is_user_branch, 'is_production': options['production'], 'project_flow_name': project_flow_name })
def make_flow(obj, token, name, tags, namespace, max_workers, workflow_timeout, is_project): datastore = obj.datastore(obj.flow.name, mode='w', metadata=obj.metadata, event_logger=obj.event_logger, monitor=obj.monitor) if datastore.TYPE != 's3': raise MetaflowException("AWS Step Functions requires --datastore=s3.") # Attach AWS Batch decorator to the flow decorators._attach_decorators(obj.flow, [BatchDecorator.name]) decorators._init_step_decorators( obj.flow, obj.graph, obj.environment, obj.datastore, obj.logger) obj.package = MetaflowPackage( obj.flow, obj.environment, obj.echo, obj.package_suffixes) package_url = datastore.save_data( obj.package.sha, TransformableObject(obj.package.blob)) return StepFunctions(name, obj.graph, obj.flow, obj.package, package_url, token, obj.metadata, obj.datastore, obj.environment, obj.event_logger, obj.monitor, tags=tags, namespace=namespace, max_workers=max_workers, username=get_username(), workflow_timeout=workflow_timeout, is_project=is_project)
def make_flow(obj, token, name, tags, namespace, max_workers, workflow_timeout, workflow_priority): # TODO: Make this check less specific to Amazon S3 as we introduce # support for more cloud object stores. if obj.flow_datastore.TYPE != "s3": raise MetaflowException("Argo Workflows requires --datastore=s3.") # Attach @kubernetes and @environment decorator to the flow to # ensure that the related decorator hooks are invoked. decorators._attach_decorators( obj.flow, [KubernetesDecorator.name, EnvironmentDecorator.name]) decorators._init_step_decorators(obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger) # Save the code package in the flow datastore so that both user code and # metaflow package can be retrieved during workflow execution. obj.package = MetaflowPackage(obj.flow, obj.environment, obj.echo, obj.package_suffixes) package_url, package_sha = obj.flow_datastore.save_data([obj.package.blob], len_hint=1)[0] return ArgoWorkflows( name, obj.graph, obj.flow, package_sha, package_url, token, obj.metadata, obj.flow_datastore, obj.environment, obj.event_logger, obj.monitor, tags=tags, namespace=namespace, max_workers=max_workers, username=get_username(), workflow_timeout=workflow_timeout, workflow_priority=workflow_priority, )
def _execute_cmd(func, flow_name, run_id, user, my_runs, echo): if user and my_runs: raise CommandException("--user and --my-runs are mutually exclusive.") if run_id and my_runs: raise CommandException("--run_id and --my-runs are mutually exclusive.") if my_runs: user = util.get_username() latest_run = True if user and not run_id: latest_run = False if not run_id and latest_run: run_id = util.get_latest_run_id(echo, flow_name) if run_id is None: raise CommandException("A previous run id was not found. Specify --run-id.") func(flow_name, run_id, user, echo)
def make_flow( obj, token, name, tags, namespace, max_workers, workflow_timeout, is_project ): if obj.flow_datastore.TYPE != "s3": raise MetaflowException("AWS Step Functions requires --datastore=s3.") # Attach AWS Batch decorator to the flow decorators._attach_decorators(obj.flow, [BatchDecorator.name]) decorators._init_step_decorators( obj.flow, obj.graph, obj.environment, obj.flow_datastore, obj.logger ) obj.package = MetaflowPackage( obj.flow, obj.environment, obj.echo, obj.package_suffixes ) package_url, package_sha = obj.flow_datastore.save_data( [obj.package.blob], len_hint=1 )[0] return StepFunctions( name, obj.graph, obj.flow, package_sha, package_url, token, obj.metadata, obj.flow_datastore, obj.environment, obj.event_logger, obj.monitor, tags=tags, namespace=namespace, max_workers=max_workers, username=get_username(), workflow_timeout=workflow_timeout, is_project=is_project, )
def step(ctx, step_name, code_package_sha, code_package_url, executable=None, image=None, iam_role=None, execution_role=None, cpu=None, gpu=None, memory=None, queue=None, run_time_limit=None, shared_memory=None, max_swap=None, swappiness=None, **kwargs): def echo(batch_id, msg, stream=sys.stdout): ctx.obj.echo_always("[%s] %s" % (batch_id, msg)) if ctx.obj.datastore.datastore_root is None: ctx.obj.datastore.datastore_root = ctx.obj.datastore.get_datastore_root_from_config( echo) if R.use_r(): entrypoint = R.entrypoint() else: if executable is None: executable = ctx.obj.environment.executable(step_name) entrypoint = '%s -u %s' % (executable, os.path.basename(sys.argv[0])) top_args = " ".join(util.dict_to_cli_options(ctx.parent.parent.params)) input_paths = kwargs.get("input_paths") split_vars = None if input_paths: max_size = 30 * 1024 split_vars = { "METAFLOW_INPUT_PATHS_%d" % (i // max_size): input_paths[i:i + max_size] for i in range(0, len(input_paths), max_size) } kwargs["input_paths"] = "".join("${%s}" % s for s in split_vars.keys()) step_args = " ".join(util.dict_to_cli_options(kwargs)) step_cli = u"{entrypoint} {top_args} step {step} {step_args}".format( entrypoint=entrypoint, top_args=top_args, step=step_name, step_args=step_args) node = ctx.obj.graph[step_name] # Get retry information retry_count = kwargs.get("retry_count", 0) retry_deco = [deco for deco in node.decorators if deco.name == "retry"] minutes_between_retries = None if retry_deco: minutes_between_retries = int(retry_deco[0].attributes.get( "minutes_between_retries", 1)) # Set batch attributes attrs = { "metaflow.user": util.get_username(), "metaflow.flow_name": ctx.obj.flow.name, "metaflow.step_name": step_name, "metaflow.run_id": kwargs["run_id"], "metaflow.task_id": kwargs["task_id"], "metaflow.retry_count": str(retry_count), "metaflow.version": ctx.obj.environment.get_environment_info()["metaflow_version"], } env_deco = [deco for deco in node.decorators if deco.name == "environment"] if env_deco: env = env_deco[0].attributes["vars"] else: env = {} datastore_root = os.path.join( ctx.obj.datastore.make_path(ctx.obj.flow.name, kwargs['run_id'], step_name, kwargs['task_id'])) # Add the environment variables related to the input-paths argument if split_vars: env.update(split_vars) if retry_count: ctx.obj.echo_always( "Sleeping %d minutes before the next AWS Batch retry" % minutes_between_retries) time.sleep(minutes_between_retries * 60) batch = Batch(ctx.obj.metadata, ctx.obj.environment) try: with ctx.obj.monitor.measure("metaflow.batch.launch"): batch.launch_job(step_name, step_cli, code_package_sha, code_package_url, ctx.obj.datastore.TYPE, image=image, queue=queue, iam_role=iam_role, execution_role=execution_role, cpu=cpu, gpu=gpu, memory=memory, run_time_limit=run_time_limit, shared_memory=shared_memory, max_swap=max_swap, swappiness=swappiness, env=env, attrs=attrs) except Exception as e: print(e) _sync_metadata(echo, ctx.obj.metadata, datastore_root, retry_count) sys.exit(METAFLOW_EXIT_DISALLOW_RETRY) try: batch.wait(echo=echo) except BatchKilledException: # don't retry killed tasks traceback.print_exc() _sync_metadata(echo, ctx.obj.metadata, datastore_root, retry_count) sys.exit(METAFLOW_EXIT_DISALLOW_RETRY) _sync_metadata(echo, ctx.obj.metadata, datastore_root, retry_count)
def step( ctx, step_name, code_package_sha, code_package_url, executable=None, image=None, iam_role=None, execution_role=None, cpu=None, gpu=None, memory=None, queue=None, run_time_limit=None, shared_memory=None, max_swap=None, swappiness=None, host_volumes=None, **kwargs ): def echo(msg, stream="stderr", batch_id=None): msg = util.to_unicode(msg) if batch_id: msg = "[%s] %s" % (batch_id, msg) ctx.obj.echo_always(msg, err=(stream == sys.stderr)) if R.use_r(): entrypoint = R.entrypoint() else: if executable is None: executable = ctx.obj.environment.executable(step_name) entrypoint = "%s -u %s" % (executable, os.path.basename(sys.argv[0])) top_args = " ".join(util.dict_to_cli_options(ctx.parent.parent.params)) input_paths = kwargs.get("input_paths") split_vars = None if input_paths: max_size = 30 * 1024 split_vars = { "METAFLOW_INPUT_PATHS_%d" % (i // max_size): input_paths[i : i + max_size] for i in range(0, len(input_paths), max_size) } kwargs["input_paths"] = "".join("${%s}" % s for s in split_vars.keys()) step_args = " ".join(util.dict_to_cli_options(kwargs)) step_cli = u"{entrypoint} {top_args} step {step} {step_args}".format( entrypoint=entrypoint, top_args=top_args, step=step_name, step_args=step_args, ) node = ctx.obj.graph[step_name] # Get retry information retry_count = kwargs.get("retry_count", 0) retry_deco = [deco for deco in node.decorators if deco.name == "retry"] minutes_between_retries = None if retry_deco: minutes_between_retries = int( retry_deco[0].attributes.get("minutes_between_retries", 1) ) # Set batch attributes task_spec = { "flow_name": ctx.obj.flow.name, "step_name": step_name, "run_id": kwargs["run_id"], "task_id": kwargs["task_id"], "retry_count": str(retry_count), } attrs = {"metaflow.%s" % k: v for k, v in task_spec.items()} attrs["metaflow.user"] = util.get_username() attrs["metaflow.version"] = ctx.obj.environment.get_environment_info()[ "metaflow_version" ] env_deco = [deco for deco in node.decorators if deco.name == "environment"] if env_deco: env = env_deco[0].attributes["vars"] else: env = {} # Add the environment variables related to the input-paths argument if split_vars: env.update(split_vars) if retry_count: ctx.obj.echo_always( "Sleeping %d minutes before the next AWS Batch retry" % minutes_between_retries ) time.sleep(minutes_between_retries * 60) # this information is needed for log tailing ds = ctx.obj.flow_datastore.get_task_datastore( mode="w", run_id=kwargs["run_id"], step_name=step_name, task_id=kwargs["task_id"], attempt=int(retry_count), ) stdout_location = ds.get_log_location(TASK_LOG_SOURCE, "stdout") stderr_location = ds.get_log_location(TASK_LOG_SOURCE, "stderr") def _sync_metadata(): if ctx.obj.metadata.TYPE == "local": sync_local_metadata_from_datastore( DATASTORE_LOCAL_DIR, ctx.obj.flow_datastore.get_task_datastore( kwargs["run_id"], step_name, kwargs["task_id"] ), ) batch = Batch(ctx.obj.metadata, ctx.obj.environment) try: with ctx.obj.monitor.measure("metaflow.aws.batch.launch_job"): batch.launch_job( step_name, step_cli, task_spec, code_package_sha, code_package_url, ctx.obj.flow_datastore.TYPE, image=image, queue=queue, iam_role=iam_role, execution_role=execution_role, cpu=cpu, gpu=gpu, memory=memory, run_time_limit=run_time_limit, shared_memory=shared_memory, max_swap=max_swap, swappiness=swappiness, env=env, attrs=attrs, host_volumes=host_volumes, ) except Exception as e: traceback.print_exc() _sync_metadata() sys.exit(METAFLOW_EXIT_DISALLOW_RETRY) try: batch.wait(stdout_location, stderr_location, echo=echo) except BatchKilledException: # don't retry killed tasks traceback.print_exc() sys.exit(METAFLOW_EXIT_DISALLOW_RETRY) finally: _sync_metadata()
def resolve_token( name, token_prefix, obj, authorize, given_token, generate_new_token, is_project ): # 1) retrieve the previous deployment, if one exists workflow = StepFunctions.get_existing_deployment(name) if workflow is None: obj.echo( "It seems this is the first time you are deploying *%s* to " "AWS Step Functions." % name ) prev_token = None else: prev_user, prev_token = workflow # 2) authorize this deployment if prev_token is not None: if authorize is None: authorize = load_token(token_prefix) elif authorize.startswith("production:"): authorize = authorize[11:] # we allow the user who deployed the previous version to re-deploy, # even if they don't have the token if prev_user != get_username() and authorize != prev_token: obj.echo( "There is an existing version of *%s* on AWS Step " "Functions which was deployed by the user " "*%s*." % (name, prev_user) ) obj.echo( "To deploy a new version of this flow, you need to use " "the same production token that they used. " ) obj.echo( "Please reach out to them to get the token. Once you " "have it, call this command:" ) obj.echo(" step-functions create --authorize MY_TOKEN", fg="green") obj.echo( 'See "Organizing Results" at docs.metaflow.org for more ' "information about production tokens." ) raise IncorrectProductionToken( "Try again with the correct " "production token." ) # 3) do we need a new token or should we use the existing token? if given_token: if is_project: # we rely on a known prefix for @project tokens, so we can't # allow the user to specify a custom token with an arbitrary prefix raise MetaflowException( "--new-token is not supported for " "@projects. Use --generate-new-token to " "create a new token." ) if given_token.startswith("production:"): given_token = given_token[11:] token = given_token obj.echo("") obj.echo("Using the given token, *%s*." % token) elif prev_token is None or generate_new_token: token = new_token(token_prefix, prev_token) if token is None: if prev_token is None: raise MetaflowInternalError( "We could not generate a new " "token. This is unexpected. " ) else: raise MetaflowException( "--generate-new-token option is not " "supported after using --new-token. " "Use --new-token to make a new " "namespace." ) obj.echo("") obj.echo("A new production token generated.") else: token = prev_token obj.echo("") obj.echo("The namespace of this production flow is") obj.echo(" production:%s" % token, fg="green") obj.echo( "To analyze results of this production flow " "add this line in your notebooks:" ) obj.echo(' namespace("production:%s")' % token, fg="green") obj.echo( "If you want to authorize other people to deploy new versions " "of this flow to AWS Step Functions, they need to call" ) obj.echo(" step-functions create --authorize %s" % token, fg="green") obj.echo("when deploying this flow to AWS Step Functions for the first " "time.") obj.echo( 'See "Organizing Results" at https://docs.metaflow.org/ for more ' "information about production tokens." ) obj.echo("") store_token(token_prefix, token) return token
def container_template(self, node): """ Returns an argo container template spec. to execute a step """ attr = parse_step_decorator(node, ArgoStepDecorator) env_decorator = parse_step_decorator(node, EnvironmentDecorator) retry_decorator = parse_step_decorator(node, RetryDecorator) catch_decorator = parse_step_decorator(node, CatchDecorator) res_decorator = parse_step_decorator(node, ResourcesDecorator) k8s_decorator = parse_step_decorator(node, KubernetesDecorator) resources = merge_resources( res_decorator, { k: v for k, v in k8s_decorator.items() if k in ResourcesDecorator.defaults }) image = attr.get('image') or k8s_decorator.get( 'image') or self._default_image() env, env_from = self._prepare_environment(attr, env_decorator) res = self._resources(resources) volume_mounts = attr.get('volumeMounts', []) volume_mounts.append(self._shared_memory(resources)) user_code_retries = retry_decorator.get('times', 0) total_retries = user_code_retries + 1 if catch_decorator else user_code_retries retry_count = '{{retries}}' if total_retries else '0' cmd = self._commands(node, retry_count, user_code_retries) metadata = { 'labels': { **attr.get('labels', {}), **self.attributes['labels'], 'metaflow/step_name': sanitize_label_value(dns_name(node.name)), 'app.kubernetes.io/name': 'metaflow-task', 'app.kubernetes.io/part-of': 'metaflow', 'app.kubernetes.io/created-by': get_username(), }, 'annotations': { **attr.get('annotations', {}), **self.attributes['annotations'], # should be a label but cannot sanitize argo variables 'metaflow/attempt': retry_count, }, } metadata['labels'].update(self.system_tags) template = { 'name': dns_name(node.name), 'metadata': metadata, 'activeDeadlineSeconds': get_run_time_limit_for_task(node.decorators), 'inputs': { 'parameters': [{ 'name': 'input-paths' }], 'artifacts': attr.get('input_artifacts'), }, 'outputs': { 'parameters': [{ 'name': 'task-id', 'value': '{{pod.name}}' }], 'artifacts': attr.get('output_artifacts') }, 'nodeSelector': attr.get('nodeSelector'), 'container': { 'image': image, 'volumeMounts': volume_mounts, 'command': [cmd[0]], 'args': cmd[1:], 'env': env, 'envFrom': env_from, 'resources': { 'requests': res, 'limits': res } }, } if total_retries: template['retryStrategy'] = { 'retryPolicy': 'Always', # fallback_step for @catch is only executed if retry_count > user_code_retries 'limit': str(total_retries), 'backoff': { 'duration': '%sm' % str(retry_decorator['minutes_between_retries'] if user_code_retries else 0), } } if self._is_foreach_first_child(node): template['inputs']['parameters'].append({'name': 'split-index'}) if node.type == 'foreach': template['outputs']['parameters'].append({ 'name': 'num-splits', 'valueFrom': { 'path': ArgoInternalStepDecorator.splits_file_path } }) return template
def step( ctx, step_name, code_package_sha, code_package_url, executable=None, image=None, service_account=None, secrets=None, node_selector=None, k8s_namespace=None, cpu=None, disk=None, memory=None, gpu=None, gpu_vendor=None, run_time_limit=None, **kwargs ): def echo(msg, stream="stderr", job_id=None): msg = util.to_unicode(msg) if job_id: msg = "[%s] %s" % (job_id, msg) ctx.obj.echo_always(msg, err=(stream == sys.stderr)) node = ctx.obj.graph[step_name] # Construct entrypoint CLI if executable is None: executable = ctx.obj.environment.executable(step_name) # Set environment env = {} env_deco = [deco for deco in node.decorators if deco.name == "environment"] if env_deco: env = env_deco[0].attributes["vars"] # Set input paths. input_paths = kwargs.get("input_paths") split_vars = None if input_paths: max_size = 30 * 1024 split_vars = { "METAFLOW_INPUT_PATHS_%d" % (i // max_size): input_paths[i : i + max_size] for i in range(0, len(input_paths), max_size) } kwargs["input_paths"] = "".join("${%s}" % s for s in split_vars.keys()) env.update(split_vars) # Set retry policy. retry_count = int(kwargs.get("retry_count", 0)) retry_deco = [deco for deco in node.decorators if deco.name == "retry"] minutes_between_retries = None if retry_deco: minutes_between_retries = int( retry_deco[0].attributes.get("minutes_between_retries", 2) ) if retry_count: ctx.obj.echo_always( "Sleeping %d minutes before the next retry" % minutes_between_retries ) time.sleep(minutes_between_retries * 60) step_cli = "{entrypoint} {top_args} step {step} {step_args}".format( entrypoint="%s -u %s" % (executable, os.path.basename(sys.argv[0])), top_args=" ".join(util.dict_to_cli_options(ctx.parent.parent.params)), step=step_name, step_args=" ".join(util.dict_to_cli_options(kwargs)), ) # Set log tailing. ds = ctx.obj.flow_datastore.get_task_datastore( mode="w", run_id=kwargs["run_id"], step_name=step_name, task_id=kwargs["task_id"], attempt=int(retry_count), ) stdout_location = ds.get_log_location(TASK_LOG_SOURCE, "stdout") stderr_location = ds.get_log_location(TASK_LOG_SOURCE, "stderr") def _sync_metadata(): if ctx.obj.metadata.TYPE == "local": sync_local_metadata_from_datastore( DATASTORE_LOCAL_DIR, ctx.obj.flow_datastore.get_task_datastore( kwargs["run_id"], step_name, kwargs["task_id"] ), ) try: kubernetes = Kubernetes( datastore=ctx.obj.flow_datastore, metadata=ctx.obj.metadata, environment=ctx.obj.environment, ) # Configure and launch Kubernetes job. with ctx.obj.monitor.measure("metaflow.kubernetes.launch_job"): kubernetes.launch_job( flow_name=ctx.obj.flow.name, run_id=kwargs["run_id"], step_name=step_name, task_id=kwargs["task_id"], attempt=str(retry_count), user=util.get_username(), code_package_sha=code_package_sha, code_package_url=code_package_url, code_package_ds=ctx.obj.flow_datastore.TYPE, step_cli=step_cli, docker_image=image, service_account=service_account, secrets=secrets, node_selector=node_selector, namespace=k8s_namespace, cpu=cpu, disk=disk, memory=memory, gpu=gpu, gpu_vendor=gpu_vendor, run_time_limit=run_time_limit, env=env, ) except Exception as e: traceback.print_exc(chain=False) _sync_metadata() sys.exit(METAFLOW_EXIT_DISALLOW_RETRY) try: kubernetes.wait(stdout_location, stderr_location, echo=echo) except KubernetesKilledException: # don't retry killed tasks traceback.print_exc() sys.exit(METAFLOW_EXIT_DISALLOW_RETRY) finally: _sync_metadata()