コード例 #1
0
    def load_workflow_config(cls, template_dir):
        """
        Given a workload 'template' directory, load the config from
        ``workflow.yaml`` (including injected defaults).
        
        Args:
            template_dir:
                A template directory containing workflow.yaml
        
        Returns:
            (workflow_cls, config_data)
            A tuple of the workflow class (a type) and the config data (a dict)
        """
        config_path = f'{template_dir}/workflow.yaml'

        if not os.path.exists(config_path):
            raise RuntimeError(
                f"Error: workflow.yaml not found in {template_dir}")

        # Determine workflow type and load config
        _cfg = load_config(config_path, {})
        if "workflow-name" not in _cfg:
            raise RuntimeError(
                f"Workflow config at {config_path} does not specify a workflow-name."
            )

        workflow_cls = Workflow.get_workflow_cls(_cfg['workflow-name'])
        config_data = load_config(config_path, workflow_cls.schema())
        return workflow_cls, config_data
コード例 #2
0
def test_failed_validate():
    schema = {'properties': {'mystring': {'type': 'string'}}}

    data = {"mystring": 123}

    f = StringIO()
    yaml.dump(data, f)
    f.seek(0)

    with pytest.raises(ValidationError):
        load_config(f, schema)
コード例 #3
0
ファイル: launchflow.py プロジェクト: aplbrain/flyemflows
def _load_and_overwrite_dask_config(execution_dir, cluster_type):
    # Load dask config, inject defaults for (selected) missing entries, and overwrite in-place.
    dask_config_path = os.path.abspath(f'{execution_dir}/dask-config.yaml')
    if os.path.exists(dask_config_path):
        # Check for completely empty dask config file
        from ruamel.yaml import YAML
        yaml = YAML()
        config = yaml.load(open(dask_config_path, 'r'))
        if not config:
            dask_config = {}
            validate(dask_config, DaskConfigSchema, inject_defaults=True)
        else:
            dask_config = load_config(dask_config_path, DaskConfigSchema)
    else:
        dask_config = {}
        validate(dask_config, DaskConfigSchema, inject_defaults=True)

    # Don't pollute the config file with extra jobqueue parameters we aren't using
    if "jobqueue" in dask_config:
        for key in list(dask_config["jobqueue"].keys()):
            if key != cluster_type:
                del dask_config["jobqueue"][key]

        if len(dask_config["jobqueue"]) == 0:
            del dask_config["jobqueue"]

    dump_config(dask_config, dask_config_path)

    # This environment variable is recognized by dask itself
    os.environ["DASK_CONFIG"] = dask_config_path
    dask.config.paths.append(dask_config_path)
    dask.config.refresh()
コード例 #4
0
def test_load_from_path():
    d = tempfile.mkdtemp()
    config = {'mynumber': 99}
    path = f'{d}/test_load_from_path.yaml'
    with open(path, 'w') as f:
        yaml.dump(config, f)

    loaded = load_config(path, TEST_SCHEMA, True)
    assert loaded['mynumber'] == 99
    assert loaded['mystring'] == "DEFAULT"
コード例 #5
0
def test_validate():
    schema = {'properties': {'mystring': {'type': 'string'}}}

    data = {"mystring": "Test"}

    f = StringIO()
    yaml.dump(data, f)
    f.seek(0)

    cfg = load_config(f, schema)
    assert cfg['mystring'] == 'Test'
コード例 #6
0
def test_missing_required_property_no_default():
    schema = {
        'required': ['mystring'],
        'properties': {
            'mystring': {
                'type': 'string',

                # NO DEFAULT -- really required
                #'default': 'DEFAULT'
            }
        }
    }

    data = {}

    f = StringIO()
    yaml.dump(data, f)
    f.seek(0)

    with pytest.raises(ValidationError):
        load_config(f, schema)
コード例 #7
0
def test_inject_default():
    schema = copy.deepcopy(TEST_SCHEMA)
    data = {'mynumber': 10}

    f = StringIO()
    yaml.dump(data, f)
    f.seek(0)

    cfg = load_config(f, schema)
    assert cfg['mystring'] == 'DEFAULT'
    assert cfg['myobject']['inner-string'] == 'INNER_DEFAULT'
    assert cfg['myobject'].from_default == True
    validate(cfg, schema)
コード例 #8
0
def load_and_overwrite_dask_config(cluster_type,
                                   dask_config_path=None,
                                   overwrite=False):
    """
    Load dask config, inject defaults for (selected) missing entries,
    and optionally overwrite in-place.

    Note: Also re-initializes the distributed logging configuration.
    """
    if dask_config_path is None and 'DASK_CONFIG' in os.environ:
        dask_config_path = os.environ["DASK_CONFIG"]
    dask_config_path = dask_config_path or 'dask-config.yaml'

    dask_config_path = os.path.abspath(dask_config_path)
    if os.path.exists(dask_config_path):
        # Check for completely empty dask config file
        from ruamel.yaml import YAML
        yaml = YAML()
        config = yaml.load(open(dask_config_path, 'r'))
        if not config:
            dask_config = {}
            validate(dask_config, DaskConfigSchema, inject_defaults=True)
        else:
            dask_config = load_config(dask_config_path, DaskConfigSchema)
    else:
        dask_config = {}
        validate(dask_config, DaskConfigSchema, inject_defaults=True)

    # Don't pollute the config file with extra jobqueue parameters we aren't using
    if "jobqueue" in dask_config:
        for key in list(dask_config["jobqueue"].keys()):
            if key != cluster_type:
                del dask_config["jobqueue"][key]

        if len(dask_config["jobqueue"]) == 0:
            del dask_config["jobqueue"]

    if overwrite:
        dump_config(dask_config, dask_config_path)

    # This environment variable is recognized by dask itself
    os.environ["DASK_CONFIG"] = dask_config_path
    dask.config.paths.append(dask_config_path)
    dask.config.refresh()

    # Must be imported this way due to aliased name 'config' in distributed.__init__
    from distributed.config import initialize_logging
    initialize_logging(dask.config.config)
コード例 #9
0
def main():
    # Early exit if we're dumping the config
    # (Parse it ourselves to allow omission of otherwise required parameters.)
    if ({'--dump-config-template', '-d'} & {*sys.argv}):
        from confiddler import dump_default_config
        dump_default_config(config_schema(), sys.stdout)
        sys.exit(0)

    parser = argparse.ArgumentParser()
    parser.add_argument('--processes', '-p', type=int, default=0)
    parser.add_argument('--threads', '-t', type=int, default=0)
    parser.add_argument('--check-scale', '-s', type=int, default=0)
    parser.add_argument('--dump-config-template', '-d', action='store_true')
    parser.add_argument('--verify', '-v', action='store_true')
    parser.add_argument('config')
    parser.add_argument(
        'stats_df_pkl',
        help='Mito statistics table, as produced by the MitoStats workflow.'
        'Note: The coordinates must be provided in scale-0 units,'
        '      regardless of the check-scale you want to use!')
    args = parser.parse_args()

    if args.threads == 0 and args.processes == 0:
        args.threads = 1
    elif (args.threads != 0) and (args.processes != 0):
        raise RuntimeError(
            "Can't use multi-threading and multi-processing.  Pick one.")

    from neuclease import configure_default_logging
    configure_default_logging()

    from confiddler import load_config

    config = load_config(args.config, config_schema())

    with open(args.stats_df_pkl, 'rb') as f:
        stats_df = pickle.load(f)

    stats_df = correct_centroids(config,
                                 stats_df,
                                 check_scale=args.check_scale,
                                 verify=args.verify,
                                 threads=args.threads,
                                 processes=args.processes)

    with open('corrected_stats_df.pkl', 'wb') as f:
        pickle.dump(stats_df, f, protocol=pickle.HIGHEST_PROTOCOL)
コード例 #10
0
def test_missing_required_property_with_default():
    schema = {
        'required': ['mystring'],
        'properties': {
            'mystring': {
                'type': 'string',
                'default': 'DEFAULT'
            }
        }
    }

    data = {}

    f = StringIO()
    yaml.dump(data, f)
    f.seek(0)

    cfg = load_config(f, schema)
    assert cfg['mystring'] == "DEFAULT"
コード例 #11
0
def test_load_list():
    """
    Make sure lists can be loaded properly
    (e.g. that it isn't overwritten with the default, etc.)
    """
    schema = copy.deepcopy(TEST_SCHEMA)
    schema['properties']['mylist'] = {
        'type': 'array',
        'items': {
            'type': 'string'
        },
        'default': []
    }

    data = {'mylist': ['a', 'b', 'c']}

    f = StringIO()
    yaml.dump(data, f)
    f.seek(0)

    cfg = load_config(f, schema)
    assert cfg['mylist'] == list('abc')
コード例 #12
0
def main():
    # Early exit if we're dumping the config
    # (Parse it ourselves to allow omission of otherwise required parameters.)
    if ({'--dump-config-template', '-d'} & {*sys.argv}):
        dump_default_config(ConfigSchema, sys.stdout, "yaml-with-comments")
        sys.exit(0)

    parser = argparse.ArgumentParser()
    parser.add_argument('--dump-config-template', '-d', action='store_true',
                        help='Dump out a template yaml config file and exit.')
    parser.add_argument('--processes', '-p', type=int, default=1,
                        help="Size of the process pool to use")
    parser.add_argument('config')
    parser.add_argument('body_id', type=int)
    args = parser.parse_args()

    import numpy as np
    from neuclease import configure_default_logging
    configure_default_logging()

    config = load_config(args.config, ConfigSchema)
    seg_src = [*config["segmentation"].values()]
    mito_cc_src = [*config["mito-objects"].values()]
    mito_class_src = [*config["mito-masks"].values()]

    stats_df = neuron_mito_stats(seg_src, mito_cc_src, mito_class_src, args.body_id,
                                 config["scale"], config["min-size"], config["centroid-adjustment-radius"],
                                 args.processes)

    csv_path = f"mito-stats-{args.body_id}-scale-{config['scale']}.csv"
    logger.info(f"Writing {csv_path}")
    stats_df.to_csv(csv_path, index=True, header=True)

    npy_path = f"mito-stats-{args.body_id}-scale-{config['scale']}.npy"
    logger.info(f"Writing {npy_path}")
    np.save(npy_path, stats_df.to_records(index=True))

    logger.info("DONE")
コード例 #13
0
def test_inject_default_array_item_objects():
    """
    Users can specify that items of an array should be objects,
    with a particular schema.  If that item schema specifies default properties,
    then those properties will be injected into any objects in the list (if the user ommitted them).
    
    The NUMBER of items must be chosen by the user,
    but the contents of the items is determined by the default schema.
    """
    schema = {
        'type': 'array',
        'items': {
            'type': 'object',
            'default': {},
            'properties': {
                'foo': {
                    'default': 'bar'
                }
            }
        }
    }

    # The first object in this array is completely specified
    # by the user, but the remaining two will be "filled in"
    # with the defaults from the item schema.
    data = [{'foo': 'MYFOO'}, {}, {}]

    f = StringIO()
    yaml.dump(data, f)
    f.seek(0)

    cfg = load_config(f, schema)
    assert cfg == [{'foo': 'MYFOO'}, {'foo': 'bar'}, {'foo': 'bar'}]
    assert not cfg[0].from_default
    assert cfg[1].from_default
    assert cfg[2].from_default
コード例 #14
0
def test_load_empty():
    f = StringIO('{}')
    cfg = load_config(f, {})
    assert cfg == {}
コード例 #15
0
def main():
    # Early exit if we're dumping the config
    # (Parse it ourselves to allow omission of otherwise required parameters.)
    if ({'--dump-config-template', '-d'} & {*sys.argv}):
        dump_default_config(ConfigSchema, sys.stdout, "yaml-with-comments")
        sys.exit(0)

    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--dump-config-template',
                        '-d',
                        action='store_true',
                        help='Dump out a template yaml config file and exit.')
    parser.add_argument('--count',
                        '-c',
                        type=int,
                        help='How many points to generate.')
    parser.add_argument('--roi', '-r', help='Limit points to the given ROI.')
    parser.add_argument('--body',
                        '-b',
                        type=int,
                        help='Limit points to the given body.')
    parser.add_argument(
        '--tbars',
        '-t',
        action='store_true',
        help=
        'If given, limit points to the tbars of the given body, from the "synapses" instance in the input UUID.'
    )
    parser.add_argument(
        '--skeleton',
        '-s',
        action='store_true',
        help=
        'If given, choose the points from the nodes of the skeleton for the given body.'
    )
    parser.add_argument(
        '--generate-points-only',
        '-g',
        action='store_true',
        help=
        "If given, generate the points list, but don't write neighborhood segmentations"
    )
    parser.add_argument(
        '--points',
        '-p',
        help=
        'A CSV file containing the points to use instead of automatically generating them.'
    )
    parser.add_argument(
        '--ng-links',
        '-n',
        action='store_true',
        help='If given, include neuroglancer links in the output CSV.'
        'Your config should specify the basic neuroglancer view settings; only the "position" will be overwritten in each link.'
    )
    parser.add_argument('config')
    args = parser.parse_args()

    configure_default_logging()

    config = load_config(args.config, ConfigSchema)
    update_ng_settings(config)
    input_seg = [*config["input"].values()]
    output_seg = [*config["output"].values()]
    radius = config["radius"]
    random_seed = config["random-seed"]

    if config["enforce-minimum-distance"]:
        minimum_distance = 2 * radius
    else:
        minimum_distance = 0

    if args.points and any(
        [args.count, args.roi, args.body, args.tbars, args.skeleton]):
        msg = ("If you're providing your own list of points, you shouldn't"
               " specify any of the auto-generation arguments, such as"
               " --count --roi --body --tbars")
        sys.exit(msg)

    if not args.points and not any(
        [args.count, args.roi, args.body, args.tbars, args.skeleton]):
        msg = "You must provide a list of points or specify how to auto-generate them."
        sys.exit(msg)

    if args.points:
        assert args.points.endswith('.csv')
        name, _ = os.path.splitext(args.points)
        output_path = name + '-neighborhoods.csv'
        points = pd.read_csv(args.points)
    else:
        points = autogen_points(input_seg, args.count, args.roi, args.body,
                                args.tbars, args.skeleton, random_seed,
                                minimum_distance)

        uuid = input_seg[1]
        output_path = f'neighborhoods-from-{uuid[:6]}'

        if not any([args.roi, args.body, args.tbars, args.skeleton]):
            output_path += input_seg[2]
        else:
            if args.roi:
                output_path += f'-{args.roi}'
            if args.body:
                output_path += f'-{args.body}'
            if args.tbars:
                output_path += '-tbars'
            if args.skeleton:
                output_path += '-skeleton'

        assignment_path = output_path + '.json'
        csv_path = output_path + '.csv'

    kd = scipy.spatial.cKDTree(points[[*'zyx']].values)
    if len(kd.query_pairs(2 * radius)) > 0:
        msg = (
            "Some of the chosen points are closer to each other than 2x the "
            f"configured radius ({radius}). Their neighborhood segments may "
            "be mangled in the output.")
        logger.warning(msg)

    cols = [*'xyz'] + list({*points.columns} - {*'xyz'})
    points = points[cols]

    if args.generate_points_only:
        add_link_col(points, config)
        export_as_html(points, csv_path)
        if not args.ng_links:
            del points['link']
            points.to_csv(csv_path,
                          index=False,
                          header=True,
                          quoting=csv.QUOTE_NONE)
        sys.exit(0)

    try:
        input_info = fetch_instance_info(*input_seg)
    except Exception:
        sys.exit(
            f"Couldn't find input segmentation instance: {' / '.join(input_seg)}"
        )

    try:
        fetch_instance_info(*output_seg)
    except Exception:
        logger.info(
            f"Output labelmap not found. Creating new label instance: {' / '.join(output_seg)}"
        )

        # Copy details from input instance.
        # But only provide a single value for each, even though the info provides three.
        # Otherwise, DVID kicks back errors like this:
        # Setting for 'VoxelUnits' was not a string: [nanometers nanometers nanometers]
        settings = {
            'block_size': input_info['Extended']['BlockSize'][0],
            'voxel_size': input_info['Extended']['VoxelSize'][0],
            'voxel_units': input_info['Extended']['VoxelUnits'][0],
            'max_scale': input_info['Extended']['MaxDownresLevel']
        }
        create_labelmap_instance(*output_seg, **settings)

        # Also create keyvalue for meshes
        create_instance(*output_seg[:2], output_seg[2] + '_meshes', 'keyvalue')

    results_df = write_point_neighborhoods(input_seg, output_seg, points,
                                           radius, args.body)

    add_link_col(results_df, config)
    export_as_html(results_df, csv_path)
    write_assignment_file(output_seg, results_df, assignment_path, config)
    if not args.ng_links:
        del results_df['link']
    results_df.to_csv(csv_path,
                      index=False,
                      header=True,
                      quoting=csv.QUOTE_NONE)
コード例 #16
0
def convert_grayscale(config_path, client=None):
    """
    Simple example showing how to:
     - create an input service (agnostic to data format)
     - read it into a distributed array (BrickWall)
     - realign it to an output array
     - write the realigned data (agnostic to format)
    
    The input will be accessed according to it's preferred access pattern,
    and the output will be written according to it's preferreed access pattern
    (e.g. entire slices if reading from a PNG stack, or blocks if using N5.)

    Caveats:
     
     - This does not implement a Workflow subclass
       (though there isn't much more to it).
     
     - For simplicity, this code assumes that the entire volume can be loaded
       into your cluster's RAM.  For large volumes, that won't work.
       A more robust solution would split the input volume into large
       "slabs" and process them each in turn.

    Example:

        # Set up some input data
        from flyemflows.util.n5 import export_to_multiscale_n5
        volume = np.random.randint(255, size=(500,500,500), dtype=np.uint8)
        export_to_multiscale_n5(volume, '/tmp/test-vol.n5')

        # Write the config file:
        cat < /tmp/test-config.yaml
        input:
          n5:
            path: /tmp/test-vol.n5
            dataset: 's0'

        output:
          slice-files:
            slice-path-format: '/tmp/test-slices/z{:04}.png'

        # Run this script:
        python convert_grayscale.py /tmp/test-config.yaml

    """
    # Define the config file schema
    schema = {
        "properties": {
            "input": GrayscaleVolumeSchema,
            "output": GrayscaleVolumeSchema
        }
    }
    
    # Load config (injects defaults for missing values)
    config = load_config(config_path, schema)

    # Create input service and input 'bricks'
    input_svc = VolumeService.create_from_config(config["input"])
    input_wall = BrickWall.from_volume_service(input_svc, client=client)

    # Copy bounding box from input to output
    config["output"]["geometry"]["bounding-box"] = config["input"]["geometry"]["bounding-box"]

    # Create output service and redistribute
    # data using the output's preferred grid
    output_svc = VolumeService.create_from_config(config["output"])
    output_grid = Grid(output_svc.preferred_message_shape)
    output_wall = input_wall.realign_to_new_grid(output_grid)

    # Write the data: one task per output brick
    # (e.g. output slices, if exporting to PNGs)
    def write_brick(brick):
        output_svc.write_subvolume(brick.volume, brick.physical_box[0])
    output_wall.bricks.map(write_brick).compute()

    print(f"DONE exporting")