Esempio n. 1
0
# figured out to use find_packages() via:
# https://stackoverflow.com/questions/10924885/is-it-possible-to-include-subdirectories-using-dist-utils-setup-py-as-part-of

pkg_name = 'rmldatatfrecord'

# attempt to write git data to file
# NOTE: does NOT work in the GitHub tarball installation case
# this will work in 3/4 install cases:
#   1. PyPI
#   2. GitHub clone
#   3. Local (editable), however NOTE in this case there is no need
#       for the file, as ravenml will find git information at runtime
#       in order to include patch data
plugin_dir = Path(__file__).resolve().parent
repo_root = is_repo(plugin_dir)
if repo_root:
    info = {
        'plugin_git_sha': git_sha(repo_root),
        'plugin_tracked_git_patch': git_patch_tracked(repo_root),
        'plugin_untracked_git_patch': git_patch_untracked(repo_root)
    }
    with open(plugin_dir / pkg_name / 'git_info.json', 'w') as f:
        dump(info, f, indent=2)

setup(
    name=pkg_name,
    version='0.1',
    description='Dataset creation plugin for ravenML',
    packages=find_packages(),
    package_data={pkg_name: ['git_info.json']},
Esempio n. 2
0
    def write_metadata(self):
        """Method writes out metadata in JSON format in file 'metadata.json',
            in root directory of dataset.

            If overridden, there are no expectations.

        Variables Needed:
            dataset_name (str): the name of the dataset (provided by 'create' input)
            created_by (str): name of who made the dataset (provided by 'create' input)
            comments (str): comments or notes supplied by the user regarding the
                dataset produced by this tool ((provided by 'create' input))
            training_type (str): the training type selected by the user (provided by 'create' input)
            image_ids (list): a list of image IDs that ended up in the final
                dataset (either dev or test) (provided by 'create' input)
            filters (dict): a dictionary representing filter metadata (provided by filtering methods)
            dataset_path (Path): where metadata will be written (provided by 'create' input)
        """
        dataset_path = self.dataset_path / self.dataset_name
        metadata_filepath = dataset_path / 'metadata.json'

        metadata = {}
        metadata["name"] = self.dataset_name
        metadata["date_created"] = datetime.utcnow().isoformat() + "Z"
        metadata["created_by"] = self.created_by
        metadata["comments"] = self.comments
        metadata["training_type"] = self.plugin_name
        metadata["image_ids"] = [(image_id[0].name, image_id[1])
                                 for image_id in self.image_ids]
        metadata["filters"] = self.filter_metadata

        # find ravenml directory
        rml_dir = Path(__file__).resolve().parent
        repo_root = git.is_repo(rml_dir)
        git_info = {}
        if repo_root:
            git_info["ravenml_git_sha"] = git.git_sha(repo_root)
            git_info["ravenml_tracked_git_patch"] = git.git_patch_tracked(
                repo_root)
            git_info["ravenml_untracked_git_patch"] = git.git_patch_untracked(
                repo_root)
        else:
            git_info = git.retrieve_from_pkg(rml_dir)
        metadata.update(git_info)

        # Find file 'write_metadata' calling file, which must be somewhere in the plugin
        plugin_dir = Path(inspect.getmodule(
            inspect.stack()[3][0]).__file__).resolve().parent
        repo_root = git.is_repo(plugin_dir)
        git_info = {}
        if repo_root:
            git_info["plugin_git_sha"] = git.git_sha(repo_root)
            # note running the patch commands in the repo root will include patches for other plugins
            git_info["plugin_tracked_git_patch"] = git.git_patch_tracked(
                repo_root)
            git_info["plugin_untracked_git_patch"] = git.git_patch_untracked(
                repo_root)
        else:
            git_info = git.retrieve_from_pkg(plugin_dir)
        metadata.update(git_info)

        with open(metadata_filepath, 'w') as outfile:
            json.dump(metadata, outfile, indent=2)
Esempio n. 3
0
def process_result(ctx: click.Context, result: TrainOutput, config: str):
    """Processes the result of a training by analyzing the given TrainOutput object.
    This callback is called after ANY command originating from the train command 
    group, hence the check to see if a result was actually returned - plugins
    simply do not return a TrainOutput from non-training commands.

    Args:
        ctx (Context): click context object
        ti (TrainInput): TrainInput object stored at ctx.obj, created at start of training.
            This object contains the metadata from the run and the path to the
            training artifacts (at ti.artifact_path) which is printed to the user
            after a local training.
        result (TrainOutput): training output object returned by training plugin
        config (str): config option from train command. Click requires that command
            callbacks accept the options from the original command.
    """
    if result is not None:
        # only plugin training commands that return a TrainOutput will activate this block
        # thus ctx.obj will always be a TrainInput object
        # NOTE: you cannot use the @pass_train decorator on process_result, otherwise on
        # non-training plugin commands, the TrainInput __init__ will be called by Click
        # when process_result runs and no TrainInput is at ctx.obj
        ti = ctx.obj    
        
        # store git info for plugin
        # NOTE: this will fail for plugins not installed via source
        plugin_repo_root = git.is_repo(result.plugin_dir)
        git_info = {}
        if plugin_repo_root:
            git_info["plugin_git_sha"] = git.git_sha(plugin_repo_root)
            # note running the patch commands in repo root will include patches for all plugins
            git_info["plugin_tracked_git_patch"] = git.git_patch_tracked(plugin_repo_root)
            git_info["plugin_untracked_git_patch"] = git.git_patch_untracked(plugin_repo_root)
        else:
            git_info = git.retrieve_from_pkg(result.plugin_dir)
        ti.metadata.update(git_info)

        # upload if not in local mode, determined by user defined artifact_path field in config
        if not ti.config.get('artifact_path'):
            uuid = cli_spinner('Uploading artifacts...', _upload_result, result, ti.metadata, ti.plugin_metadata)
            click.echo(f'Artifact UUID: {uuid}')
        else:
            with open(ti.artifact_path / 'metadata.json', 'w') as f:
                json.dump(ti.metadata, f, indent=2)
            click.echo(f'LOCAL MODE: Not uploading model to S3. Model is located at: {ti.artifact_path}')
            
        # stop, terminate, or do nothing to ec2 based on policy
        ec2_policy = ti.config.get('ec2_policy')
        # check if the policy is to stop or terminate
        if ec2_policy == None or ec2_policy == 'stop' or ec2_policy == 'terminate':
            policy_str = ec2_policy if ec2_policy else 'default'
            click.echo(f'Checking for EC2 instance and applying policy "{policy_str}"...')
            try:
                # grab ec2 id
                with urlopen(EC2_INSTANCE_ID_URL, timeout=5) as url:
                    ec2_instance_id = url.read().decode('utf-8')
                click.echo(f'EC2 Runtime detected.')
                client = boto3.client('ec2')
                # default is stop
                if ec2_policy == None or ec2_policy == 'stop':
                    click.echo("Stopping...")
                    client.stop_instances(InstanceIds=[ec2_instance_id], DryRun=False)
                else:
                    click.echo("Terminating...")
                    client.terminate_instances(InstanceIds=[ec2_instance_id], DryRun=False)
            except URLError:
                click.echo('No EC2 runtime detected. Doing nothing.')
        else:
            click.echo('Not checking for EC2 runtime since policy is to keep running.')
    return result
Esempio n. 4
0
pkg_name = 'ravenml'

rml_dir = Path(__file__).resolve().parent
with open(rml_dir / 'README.md', encoding='utf-8') as f:
    long_description = f.read()

# attempt to write git data to file
# NOTE: does NOT work in the GitHub tarball installation case
# this will work in 3/4 install cases:
#   1. PyPI
#   2. GitHub clone
#   3. Local (editable), however NOTE in this case there is no need
#       for the file, as ravenml will find git information at runtime
#       in order to include patch data
repo_root = is_repo(rml_dir)
if repo_root:
    info = {
        'ravenml_git_sha': git_sha(repo_root),
        'ravenml_tracked_git_patch': git_patch_tracked(repo_root),
        'ravenml_untracked_git_patch': git_patch_untracked(repo_root)
    }
    with open(rml_dir / pkg_name / 'git_info.json', 'w') as f:
        dump(info, f, indent=2)

setup(name=pkg_name,
      version='1.2',
      description='ML Training CLI Tool',
      long_description=long_description,
      long_description_content_type='text/markdown',
      license='MIT',