Ejemplo n.º 1
0
def run_from_cmdline():
    parser = argparse.ArgumentParser('''

            NOTICE: You probably want to run.py rather than this script.

''')
    parser.add_argument('--dataset',
                        choices=DATASETS.keys(),
                        help=f'Dataset to benchmark on.',
                        required=True)
    parser.add_argument('--algorithm',
                        help='Name of algorithm for saving the results.',
                        required=True)
    parser.add_argument(
        '--module',
        help=
        'Python module containing algorithm. E.g. "ann_benchmarks.algorithms.annoy"',
        required=True)
    parser.add_argument('--constructor',
                        help='Constructer to load from modulel. E.g. "Annoy"',
                        required=True)
    parser.add_argument(
        '--count',
        help='K: Number of nearest neighbours for the algorithm to return.',
        required=True,
        type=int)
    parser.add_argument(
        '--runs',
        help=
        'Number of times to run the algorihm. Will use the fastest run-time over the bunch.',
        required=True,
        type=int)
    parser.add_argument(
        '--batch',
        help=
        'If flag included, algorithms will be run in batch mode, rather than "individual query" mode.',
        action='store_true')
    parser.add_argument(
        'build',
        help=
        'JSON of arguments to pass to the constructor. E.g. ["angular", 100]')
    parser.add_argument(
        'queries',
        help='JSON of arguments to pass to the queries. E.g. [100]',
        nargs='*',
        default=[])
    args = parser.parse_args()
    algo_args = json.loads(args.build)
    print(algo_args)
    query_args = [json.loads(q) for q in args.queries]

    definition = Definition(
        algorithm=args.algorithm,
        docker_tag=None,  # not needed
        module=args.module,
        constructor=args.constructor,
        arguments=algo_args,
        query_argument_groups=query_args,
        disabled=False)
    run(definition, args.dataset, args.count, args.runs, args.batch)
Ejemplo n.º 2
0
def run_from_cmdline():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--dataset',
        choices=DATASETS.keys(),
        required=True)
    parser.add_argument(
        '--algorithm',
        required=True)
    parser.add_argument(
        '--module',
        required=True)
    parser.add_argument(
        '--constructor',
        required=True)
    parser.add_argument(
        '--count',
        required=True,
        type=int)
    parser.add_argument(
        '--runs',
        required=True,
        type=int)
    parser.add_argument(
        '--batch',
        action='store_true')
    parser.add_argument(
        'build')
    parser.add_argument(
        'queries',
        nargs='*',
        default=[])
    args = parser.parse_args()
    algo_args = json.loads(args.build)
    query_args = [json.loads(q) for q in args.queries]

    definition = Definition(
        algorithm=args.algorithm,
        docker_tag=None,  # not needed
        module=args.module,
        constructor=args.constructor,
        arguments=algo_args,
        query_argument_groups=query_args,
        disabled=False
    )
    run(definition, args.dataset, args.count, args.runs, args.batch)
Ejemplo n.º 3
0
def run_from_cmdline():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--dataset',
        choices=DATASETS.keys(),
        required=True)
    parser.add_argument(
        '--algorithm',
        required=True)
    parser.add_argument(
        '--module',
        required=True)
    parser.add_argument(
        '--constructor',
        required=True)
    parser.add_argument(
        '--count',
        required=True,
        type=int)
    parser.add_argument(
        '--runs',
        required=True,
        type=int)
    parser.add_argument(
        '--batch',
        action='store_true')
    parser.add_argument(
        'build')
    parser.add_argument(
        'queries',
        nargs='*',
        default=[])
    args = parser.parse_args()
    algo_args = json.loads(args.build)
    query_args = [json.loads(q) for q in args.queries]

    definition = Definition(
        algorithm=args.algorithm,
        docker_tag=None, # not needed
        module=args.module,
        constructor=args.constructor,
        arguments=algo_args,
        query_argument_groups=query_args,
        disabled=False
    )
    run(definition, args.dataset, args.count, args.runs, args.batch)
Ejemplo n.º 4
0
def run_from_cmdline():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', choices=DATASETS.keys(), required=True)
    parser.add_argument('--algorithm', required=True)
    parser.add_argument('--module', required=True)
    parser.add_argument('--constructor', required=True)
    parser.add_argument('--count', required=True, type=int)
    parser.add_argument('--json-args', action='store_true')
    parser.add_argument('-a', '--arg', dest='args', action='append')
    args = parser.parse_args()
    if args.json_args:
        algo_args = [json.loads(arg) for arg in args.args]
    else:
        algo_args = args.args

    definition = Definition(
        algorithm=args.algorithm,
        docker_tag=None,  # not needed
        module=args.module,
        constructor=args.constructor,
        arguments=algo_args)
    run(definition, args.dataset, args.count)
Ejemplo n.º 5
0
def run_from_cmdline():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--dataset',
        choices=DATASETS.keys(),
        required=True)
    parser.add_argument(
        '--algorithm',
        required=True)
    parser.add_argument(
        '--module',
        required=True)
    parser.add_argument(
        '--constructor',
        required=True)
    parser.add_argument(
        '--count',
        required=True,
        type=int)
    parser.add_argument(
        '--json-args',
        action='store_true')
    parser.add_argument(
        '-a', '--arg',
        dest='args', action='append')
    args = parser.parse_args()
    if args.json_args:
        algo_args = [json.loads(arg) for arg in args.args]
    else:
        algo_args = args.args

    definition = Definition(
        algorithm=args.algorithm,
        docker_tag=None, # not needed
        module=args.module,
        constructor=args.constructor,
        arguments=algo_args
    )
    run(definition, args.dataset, args.count)
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--dataset',
                        metavar='NAME',
                        help='the dataset to load training points from',
                        default='glove-100-angular',
                        choices=DATASETS.keys())
    parser.add_argument("-k",
                        "--count",
                        default=10,
                        type=positive_int,
                        help="the number of near neighbours to search for")
    parser.add_argument('--definitions',
                        metavar='FILE',
                        help='load algorithm definitions from FILE',
                        default='algos.yaml')
    parser.add_argument('--algorithm',
                        metavar='NAME',
                        help='run only the named algorithm',
                        default=None)
    parser.add_argument(
        '--docker-tag',
        metavar='NAME',
        help='run only algorithms in a particular docker image',
        default=None)
    parser.add_argument(
        '--list-algorithms',
        help='print the names of all known algorithms and exit',
        action='store_true')
    parser.add_argument(
        '--force',
        help='''re-run algorithms even if their results already exist''',
        action='store_true')
    parser.add_argument(
        '--runs',
        metavar='COUNT',
        type=positive_int,
        help=
        'run each algorithm instance %(metavar)s times and use only the best result',
        default=2)
    parser.add_argument(
        '--timeout',
        type=int,
        help=
        'Timeout (in seconds) for each individual algorithm run, or -1 if no timeout should be set',
        default=-1)
    parser.add_argument(
        '--local',
        action='store_true',
        help=
        'If set, then will run everything locally (inside the same process) rather than using Docker'
    )
    parser.add_argument(
        '--max-n-algorithms',
        type=int,
        help='Max number of algorithms to run (just used for testing)',
        default=-1)
    parser.add_argument('--run-disabled',
                        help='run algorithms that are disabled in algos.yml',
                        action='store_true')

    args = parser.parse_args()
    if args.timeout == -1:
        args.timeout = None

    if args.list_algorithms:
        list_algorithms(args.definitions)
        sys.exit(0)

    # Nmslib specific code
    # Remove old indices stored on disk
    if os.path.exists(INDEX_DIR):
        shutil.rmtree(INDEX_DIR)

    dataset = get_dataset(args.dataset)
    dimension = len(dataset['train'][0])  # TODO(erikbern): ugly
    point_type = 'float'  # TODO(erikbern): should look at the type of X_train
    distance = dataset.attrs['distance']
    definitions = get_definitions(args.definitions, dimension, point_type,
                                  distance, args.count)

    # Filter out, from the loaded definitions, all those query argument groups
    # that correspond to experiments that have already been run. (This might
    # mean removing a definition altogether, so we can't just use a list
    # comprehension.)
    filtered_definitions = []
    for definition in definitions:
        query_argument_groups = definition.query_argument_groups
        if not query_argument_groups:
            query_argument_groups = [[]]
        not_yet_run = []
        for query_arguments in query_argument_groups:
            fn = get_result_filename(args.dataset, args.count, definition,
                                     query_arguments)
            if not os.path.exists(fn):
                not_yet_run.append(query_arguments)
        if not_yet_run:
            if definition.query_argument_groups:
                definition = definition._replace(
                    query_argument_groups=not_yet_run)
            filtered_definitions.append(definition)
    definitions = filtered_definitions

    random.shuffle(definitions)

    if args.algorithm:
        print('running only', args.algorithm)
        definitions = [d for d in definitions if d.algorithm == args.algorithm]

    if not args.local:
        # See which Docker images we have available
        docker_client = docker.from_env()
        docker_tags = set()
        for image in docker_client.images.list():
            for tag in image.tags:
                tag, _ = tag.split(':')
                docker_tags.add(tag)

        if args.docker_tag:
            print('running only', args.docker_tag)
            definitions = [
                d for d in definitions if d.docker_tag == args.docker_tag
            ]

        if set(d.docker_tag for d in definitions).difference(docker_tags):
            print('not all docker images available, only:', set(docker_tags))
            print(
                'missing docker images:',
                set(d.docker_tag for d in definitions).difference(docker_tags))
            definitions = [
                d for d in definitions if d.docker_tag in docker_tags
            ]
    else:

        def _test(df):
            status = algorithm_status(df)
            # If the module was loaded but doesn't actually have a constructor of
            # the right name, then the definition is broken
            assert status != InstantiationStatus.NO_CONSTRUCTOR, """\
%s.%s(%s): error: the module '%s' does not expose the named constructor""" % (
                df.module, df.constructor, df.arguments, df.module)
            if status == InstantiationStatus.NO_MODULE:
                # If the module couldn't be loaded (presumably because of a missing
                # dependency), print a warning and remove this definition from the
                # list of things to be run
                print("""\
%s.%s(%s): warning: the module '%s' could not be loaded; skipping""" %
                      (df.module, df.constructor, df.arguments, df.module))
                return False
            else:
                return True

        definitions = [d for d in definitions if _test(d)]

    if not args.run_disabled:
        if len([d for d in definitions if d.disabled]):
            print('Not running disabled algorithms:',
                  [d for d in definitions if d.disabled])
        definitions = [d for d in definitions if not d.disabled]

    if args.max_n_algorithms >= 0:
        definitions = definitions[:args.max_n_algorithms]

    if len(definitions) == 0:
        raise Exception('Nothing to run')
    else:
        print('Order:', definitions)

    for definition in definitions:
        print(definition, '...')

        try:
            if args.local:
                run(definition, args.dataset, args.count, args.runs)
            else:
                run_docker(definition, args.dataset, args.count, args.runs)
        except KeyboardInterrupt:
            break
        except:
            traceback.print_exc()
Ejemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        '--dataset',
        metavar='NAME',
        help='the dataset to load training points from',
        default='glove-100-angular',
        choices=DATASETS.keys())
    parser.add_argument(
        "-k", "--count",
        default=10,
        type=positive_int,
        help="the number of near neighbours to search for")
    parser.add_argument(
        '--definitions',
        metavar='FILE',
        help='load algorithm definitions from FILE',
        default='algos.yaml')
    parser.add_argument(
        '--algorithm',
        metavar='NAME',
        help='run only the named algorithm',
        default=None)
    parser.add_argument(
        '--docker-tag',
        metavar='NAME',
        help='run only algorithms in a particular docker image',
        default=None)
    parser.add_argument(
        '--list-algorithms',
        help='print the names of all known algorithms and exit',
        action='store_true',
        default=argparse.SUPPRESS)
    parser.add_argument(
        '--force',
        help='''re-run algorithms even if their results already exist''',
        action='store_true')
    parser.add_argument(
        '--runs',
        metavar='COUNT',
        type=positive_int,
        help='run each algorithm instance %(metavar)s times and use only the best result',
        default=3)
    parser.add_argument(
        '--timeout',
        type=int,
        help='Timeout (in seconds) for each individual algorithm run, or -1 if no timeout should be set',
        default=-1)
    parser.add_argument(
        '--local',
        action='store_true',
        help='If set, then will run everything locally (inside the same process) rather than using Docker')
    parser.add_argument(
        '--max-n-algorithms',
        type=int,
        help='Max number of algorithms to run (just used for testing)',
        default=-1)

    args = parser.parse_args()
    if args.timeout == -1:
        args.timeout = None

    if hasattr(args, "list_algorithms"):
        list_algorithms(args.definitions)
        sys.exit(0)

    # See which Docker images we have available
    docker_client = docker.from_env()
    docker_tags = set()
    for image in docker_client.images.list():
        for tag in image.tags:
            tag, _ = tag.split(':')
            docker_tags.add(tag)

    # Nmslib specific code
    # Remove old indices stored on disk
    if os.path.exists(INDEX_DIR):
        shutil.rmtree(INDEX_DIR)

    dataset = get_dataset(args.dataset)
    dimension = len(dataset['train'][0]) # TODO(erikbern): ugly
    point_type = 'float' # TODO(erikbern): should look at the type of X_train
    distance = dataset.attrs['distance']
    definitions = get_definitions(args.definitions, dimension, point_type, distance, args.count)

    # TODO(erikbern): should make this a helper function somewhere
    definitions = [definition for definition in definitions if not os.path.exists(get_result_filename(args.dataset, args.count, definition))]

    random.shuffle(definitions)
    
    if args.algorithm:
        print('running only', args.algorithm)
        definitions = [d for d in definitions if d.algorithm == args.algorithm]

    if args.docker_tag:
        print('running only', args.docker_tag)
        definitions = [d for d in definitions if d.docker_tag == args.docker_tag]

    if set(d.docker_tag for d in definitions).difference(docker_tags):
        print('not all docker images available, only:', set(docker_tags))
        print('missing docker images:', set(d.docker_tag for d in definitions).difference(docker_tags))
        definitions = [d for d in definitions if d.docker_tag in docker_tags]

    if args.max_n_algorithms >= 0:
        definitions = definitions[:args.max_n_algorithms]

    print('order:', definitions)

    for definition in definitions:
        print(definition, '...')

        try:
            if args.local:
                run(definition, args.dataset, args.count, args.runs)
            else:
                run_docker(definition, args.dataset, args.count, args.runs)
        except KeyboardInterrupt:
            break
        except:
            traceback.print_exc()
Ejemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--dataset',
                        metavar='NAME',
                        help='the dataset to load training points from',
                        default='glove-100-angular',
                        choices=DATASETS.keys())
    parser.add_argument("-k",
                        "--count",
                        default=10,
                        type=positive_int,
                        help="the number of near neighbours to search for")
    parser.add_argument('--definitions',
                        metavar='FILE',
                        help='load algorithm definitions from FILE',
                        default='algos.yaml')
    parser.add_argument('--algorithm',
                        metavar='NAME',
                        help='run only the named algorithm',
                        default=None)
    parser.add_argument(
        '--docker-tag',
        metavar='NAME',
        help='run only algorithms in a particular docker image',
        default=None)
    parser.add_argument(
        '--list-algorithms',
        help='print the names of all known algorithms and exit',
        action='store_true')
    parser.add_argument(
        '--force',
        help='re-run algorithms even if their results already exist',
        action='store_true')
    parser.add_argument(
        '--runs',
        metavar='COUNT',
        type=positive_int,
        help='run each algorithm instance %(metavar)s times and use only'
        ' the best result',
        default=5)
    parser.add_argument(
        '--timeout',
        type=int,
        help='Timeout (in seconds) for each individual algorithm run, or -1'
        'if no timeout should be set',
        default=2 * 3600)
    parser.add_argument(
        '--local',
        action='store_true',
        help='If set, then will run everything locally (inside the same '
        'process) rather than using Docker')
    parser.add_argument('--batch',
                        action='store_true',
                        help='If set, algorithms get all queries at once')
    parser.add_argument(
        '--max-n-algorithms',
        type=int,
        help='Max number of algorithms to run (just used for testing)',
        default=-1)
    parser.add_argument('--run-disabled',
                        help='run algorithms that are disabled in algos.yml',
                        action='store_true')
    parser.add_argument('--parallelism',
                        type=positive_int,
                        help='Number of Docker containers in parallel',
                        default=1)

    args = parser.parse_args()
    if args.timeout == -1:
        args.timeout = None

    if args.list_algorithms:
        list_algorithms(args.definitions)
        sys.exit(0)

    logging.config.fileConfig("logging.conf")
    logger = logging.getLogger("annb")

    # Nmslib specific code
    # Remove old indices stored on disk
    if os.path.exists(INDEX_DIR):
        shutil.rmtree(INDEX_DIR)

    dataset, dimension = get_dataset(args.dataset)
    point_type = dataset.attrs.get('point_type', 'float')
    distance = dataset.attrs['distance']
    definitions = get_definitions(args.definitions, dimension, point_type,
                                  distance, args.count)

    # Filter out, from the loaded definitions, all those query argument groups
    # that correspond to experiments that have already been run. (This might
    # mean removing a definition altogether, so we can't just use a list
    # comprehension.)
    filtered_definitions = []
    for definition in definitions:
        query_argument_groups = definition.query_argument_groups
        if not query_argument_groups:
            query_argument_groups = [[]]
        not_yet_run = []
        for query_arguments in query_argument_groups:
            fn = get_result_filename(args.dataset, args.count, definition,
                                     query_arguments, args.batch)
            if args.force or not os.path.exists(fn):
                not_yet_run.append(query_arguments)
        if not_yet_run:
            if definition.query_argument_groups:
                definition = definition._replace(
                    query_argument_groups=not_yet_run)
            filtered_definitions.append(definition)
    definitions = filtered_definitions

    random.shuffle(definitions)

    if args.algorithm:
        logger.info(f'running only {args.algorithm}')
        definitions = [d for d in definitions if d.algorithm == args.algorithm]

    if not args.local:
        # See which Docker images we have available
        docker_client = docker.from_env()
        docker_tags = set()
        for image in docker_client.images.list():
            for tag in image.tags:
                tag = tag.split(':')[0]
                docker_tags.add(tag)

        if args.docker_tag:
            logger.info(f'running only {args.docker_tag}')
            definitions = [
                d for d in definitions if d.docker_tag == args.docker_tag
            ]

        if set(d.docker_tag for d in definitions).difference(docker_tags):
            logger.info(
                f'not all docker images available, only: {set(docker_tags)}')
            logger.info(
                f'missing docker images: '
                f'{str(set(d.docker_tag for d in definitions).difference(docker_tags))}'
            )
            definitions = [
                d for d in definitions if d.docker_tag in docker_tags
            ]
    else:

        def _test(df):
            status = algorithm_status(df)
            # If the module was loaded but doesn't actually have a constructor
            # of the right name, then the definition is broken
            if status == InstantiationStatus.NO_CONSTRUCTOR:
                raise Exception(
                    "%s.%s(%s): error: the module '%s' does not"
                    " expose the named constructor" %
                    (df.module, df.constructor, df.arguments, df.module))

            if status == InstantiationStatus.NO_MODULE:
                # If the module couldn't be loaded (presumably because
                # of a missing dependency), print a warning and remove
                # this definition from the list of things to be run
                logging.warning(
                    "%s.%s(%s): the module '%s' could not be "
                    "loaded; skipping" %
                    (df.module, df.constructor, df.arguments, df.module))
                return False
            else:
                return True

        definitions = [d for d in definitions if _test(d)]

    if not args.run_disabled:
        if len([d for d in definitions if d.disabled]):
            logger.info(
                f'Not running disabled algorithms {[d for d in definitions if d.disabled]}'
            )
        definitions = [d for d in definitions if not d.disabled]

    if args.max_n_algorithms >= 0:
        definitions = definitions[:args.max_n_algorithms]

    if len(definitions) == 0:
        raise Exception('Nothing to run')
    else:
        logger.info(f'Order: {definitions}')

    if args.parallelism > multiprocessing.cpu_count() - 1:
        raise Exception('Parallelism larger than %d! (CPU count minus one)' %
                        (multiprocessing.cpu_count() - 1))

    # Multiprocessing magic to farm this out to all CPUs
    queue = multiprocessing.Queue()
    for definition in definitions:
        queue.put(definition)
    if args.batch and args.parallelism > 1:
        raise Exception(
            f"Batch mode uses all available CPU resources, --parallelism should be set to 1. (Was: {args.parallelism})"
        )
    workers = [
        multiprocessing.Process(target=run_worker, args=(i + 1, args, queue))
        for i in range(args.parallelism)
    ]
    [worker.start() for worker in workers]
    [worker.join() for worker in workers]
Ejemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        '--dataset',
        metavar='NAME',
        help='the dataset to load training points from',
        default='glove-100-angular',
        choices=DATASETS.keys())
    parser.add_argument(
        "-k", "--count",
        default=10,
        type=positive_int,
        help="the number of near neighbours to search for")
    parser.add_argument(
        '--definitions',
        metavar='FILE',
        help='load algorithm definitions from FILE',
        default='algos.yaml')
    parser.add_argument(
        '--algorithm',
        metavar='NAME',
        help='run only the named algorithm',
        default=None)
    parser.add_argument(
        '--docker-tag',
        metavar='NAME',
        help='run only algorithms in a particular docker image',
        default=None)
    parser.add_argument(
        '--list-algorithms',
        help='print the names of all known algorithms and exit',
        action='store_true')
    parser.add_argument(
        '--force',
        help='''re-run algorithms even if their results already exist''',
        action='store_true')
    parser.add_argument(
        '--runs',
        metavar='COUNT',
        type=positive_int,
        help='run each algorithm instance %(metavar)s times and use only the best result',
        default=3)
    parser.add_argument(
        '--timeout',
        type=int,
        help='Timeout (in seconds) for each individual algorithm run, or -1 if no timeout should be set',
        default=-1)
    parser.add_argument(
        '--local',
        action='store_true',
        help='If set, then will run everything locally (inside the same process) rather than using Docker')
    parser.add_argument(
        '--max-n-algorithms',
        type=int,
        help='Max number of algorithms to run (just used for testing)',
        default=-1)
    parser.add_argument(
        '--run-disabled',
        help='run algorithms that are disabled in algos.yml',
        action='store_true')

    args = parser.parse_args()
    if args.timeout == -1:
        args.timeout = None

    if args.list_algorithms:
        list_algorithms(args.definitions)
        sys.exit(0)

    # Nmslib specific code
    # Remove old indices stored on disk
    if os.path.exists(INDEX_DIR):
        shutil.rmtree(INDEX_DIR)

    dataset = get_dataset(args.dataset)
    dimension = len(dataset['train'][0]) # TODO(erikbern): ugly
    point_type = 'float' # TODO(erikbern): should look at the type of X_train
    distance = dataset.attrs['distance']
    definitions = get_definitions(args.definitions, dimension, point_type, distance, args.count)

    # Filter out, from the loaded definitions, all those query argument groups
    # that correspond to experiments that have already been run. (This might
    # mean removing a definition altogether, so we can't just use a list
    # comprehension.)
    filtered_definitions = []
    for definition in definitions:
        query_argument_groups = definition.query_argument_groups
        if not query_argument_groups:
            query_argument_groups = [[]]
        not_yet_run = []
        for query_arguments in query_argument_groups:
            fn = get_result_filename(args.dataset,
                    args.count, definition, query_arguments)
            if not os.path.exists(fn):
                not_yet_run.append(query_arguments)
        if not_yet_run:
            if definition.query_argument_groups:
                definition = definition._replace(
                        query_argument_groups = not_yet_run)
            filtered_definitions.append(definition)
    definitions = filtered_definitions

    random.shuffle(definitions)
    
    if args.algorithm:
        print('running only', args.algorithm)
        definitions = [d for d in definitions if d.algorithm == args.algorithm]

    if not args.local:
        # See which Docker images we have available
        docker_client = docker.from_env()
        docker_tags = set()
        for image in docker_client.images.list():
            for tag in image.tags:
                tag, _ = tag.split(':')
                docker_tags.add(tag)

        if args.docker_tag:
            print('running only', args.docker_tag)
            definitions = [d for d in definitions if d.docker_tag == args.docker_tag]

        if set(d.docker_tag for d in definitions).difference(docker_tags):
            print('not all docker images available, only:', set(docker_tags))
            print('missing docker images:', set(d.docker_tag for d in definitions).difference(docker_tags))
            definitions = [d for d in definitions if d.docker_tag in docker_tags]
    else:
        def _test(df):
            status = algorithm_status(df)
            # If the module was loaded but doesn't actually have a constructor of
            # the right name, then the definition is broken
            assert status != InstantiationStatus.NO_CONSTRUCTOR, """\
%s.%s(%s): error: the module '%s' does not expose the named constructor""" % (df.module, df.constructor, df.arguments, df.module)
            if status == InstantiationStatus.NO_MODULE:
                # If the module couldn't be loaded (presumably because of a missing
                # dependency), print a warning and remove this definition from the
                # list of things to be run
                print("""\
%s.%s(%s): warning: the module '%s' could not be loaded; skipping""" % (df.module, df.constructor, df.arguments, df.module))
                return False
            else:
                return True
        definitions = [d for d in definitions if _test(d)]

    if not args.run_disabled:
        if len([d for d in definitions if d.disabled]):
            print('Not running disabled algorithms:', [d for d in definitions if d.disabled])
        definitions = [d for d in definitions if not d.disabled]

    if args.max_n_algorithms >= 0:
        definitions = definitions[:args.max_n_algorithms]

    if len(definitions) == 0:
        raise Exception('Nothing to run')
    else:
        print('Order:', definitions)

    for definition in definitions:
        print(definition, '...')

        try:
            if args.local:
                run(definition, args.dataset, args.count, args.runs)
            else:
                run_docker(definition, args.dataset, args.count, args.runs)
        except KeyboardInterrupt:
            break
        except:
            traceback.print_exc()
Ejemplo n.º 10
0
import argparse
from ann_benchmarks.datasets import DATASETS, get_dataset_fn

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', choices=DATASETS.keys(), required=True)
    args = parser.parse_args()
    fn = get_dataset_fn(args.dataset)
    DATASETS[args.dataset](fn)