コード例 #1
0
ファイル: build.py プロジェクト: bitslab/dxr
def deploy_tree(tree, es, index_name):
    """Point the ES aliases and catalog records to a newly built tree, and
    delete any obsoleted index.

    """
    config = tree.config

    # Make new index live:
    alias = config.es_alias.format(format=FORMAT, tree=tree.name)
    swap_alias(alias, index_name, es)

    # Create catalog index if it doesn't exist.
    try:
        create_index_and_wait(
            es,
            config.es_catalog_index,
            settings={
                'settings': {
                    'index': {
                        # Fewer should be faster:
                        'number_of_shards': 1,
                        # This should be cranked up until it's on all nodes,
                        # so it's always a fast read:
                        'number_of_replicas': config.es_catalog_replicas
                    },
                },
                'mappings': {
                    TREE: {
                        '_all': {
                            'enabled': False
                        },
                        'properties': {
                            'name': UNANALYZED_STRING,
                            'format': UNANALYZED_STRING,
                            # In case es_alias changes in the conf file:
                            'es_alias': UNINDEXED_STRING,
                            # Needed so new trees or edited descriptions can show
                            # up without a WSGI restart:
                            'description': UNINDEXED_STRING,
                            # ["clang", "pygmentize"]:
                            'enabled_plugins': UNINDEXED_STRING,
                            'generated_date': UNINDEXED_STRING
                            # We may someday also need to serialize some plugin
                            # configuration here.
                        }
                    }
                }
            })
    except IndexAlreadyExistsError:
        pass

    # Insert or update the doc representing this tree. There'll be a little
    # race between this and the alias swap. We'll live.
    es.index(config.es_catalog_index,
             doc_type=TREE,
             doc=dict(name=tree.name,
                      format=FORMAT,
                      es_alias=alias,
                      description=tree.description,
                      enabled_plugins=[p.name for p in tree.enabled_plugins],
                      generated_date=config.generated_date),
             id='%s/%s' % (FORMAT, tree.name))
コード例 #2
0
ファイル: build.py プロジェクト: vck/dxr
def deploy_tree(tree, es, index_name):
    """Point the ES aliases and catalog records to a newly built tree, and
    delete any obsoleted index.

    """
    config = tree.config

    # Make new index live:
    alias = config.es_alias.format(format=FORMAT, tree=tree.name)
    swap_alias(alias, index_name, es)

    # Create catalog index if it doesn't exist.
    try:
        create_index_and_wait(
            es,
            config.es_catalog_index,
            settings={
                'settings': {
                    'index': {
                        # Fewer should be faster:
                        'number_of_shards': 1,
                        # This should be cranked up until it's on all nodes,
                        # so it's always a fast read:
                        'number_of_replicas': config.es_catalog_replicas
                    },
                },
                'mappings': {
                    TREE: {
                        '_all': {
                            'enabled': False
                        },
                        'properties': {
                            'name': UNANALYZED_STRING,
                            'format': UNANALYZED_STRING,
                            # In case es_alias changes in the conf file:
                            'es_alias': UNINDEXED_STRING,
                            # Needed so new trees or edited descriptions can show
                            # up without a WSGI restart:
                            'description': UNINDEXED_STRING,
                            # ["clang", "pygmentize"]:
                            'enabled_plugins': UNINDEXED_STRING,
                            'generated_date': UNINDEXED_STRING
                            # We may someday also need to serialize some plugin
                            # configuration here.
                        }
                    }
                }
            })
    except IndexAlreadyExistsError:
        pass

    # Insert or update the doc representing this tree. There'll be a little
    # race between this and the alias swap. We'll live.
    es.index(config.es_catalog_index,
             doc_type=TREE,
             doc=dict(name=tree.name,
                      format=FORMAT,
                      es_alias=alias,
                      description=tree.description,
                      enabled_plugins=[p.name for p in tree.enabled_plugins],
                      generated_date=config.generated_date),
             id='%s/%s' % (FORMAT, tree.name))
コード例 #3
0
ファイル: build.py プロジェクト: bitslab/dxr
def index_tree(tree, es, verbose=False):
    """Index a single tree into ES and the filesystem, and return the
    name of the new ES index.

    """
    config = tree.config

    def new_pool():
        return ProcessPoolExecutor(max_workers=config.workers)

    def farm_out(method_name):
        """Farm out a call to all tree indexers across a process pool.

        Return the tree indexers, including anything mutations the method call
        might have made.

        Show progress while doing it.

        """
        if not config.workers:
            return [save_scribbles(ti, method_name) for ti in tree_indexers]
        else:
            futures = [pool.submit(full_traceback, save_scribbles, ti, method_name)
                       for ti in tree_indexers]
            return [future.result() for future in
                    show_progress(futures, 'Running %s' % method_name)]

    def delete_index_quietly(es, index):
        """Delete an index, and ignore any error.

        This cannot be done inline in the except clause below, because, even
        if we catch this exception, it spoils the exception info in that
        scope, making the bare ``raise`` raise the not-found error rather than
        whatever went wrong earlier.

        """
        try:
            es.delete_index(index)
        except Exception:
            pass

    print "Starting tree '%s'." % tree.name

    # Note starting time
    start_time = datetime.now()

    skip_indexing = 'index' in config.skip_stages
    skip_build = 'build' in config.skip_stages
    skip_cleanup = skip_indexing or skip_build or 'clean' in config.skip_stages

    # Create and/or clear out folders:
    ensure_folder(tree.object_folder, tree.source_folder != tree.object_folder)
    ensure_folder(tree.temp_folder, not skip_cleanup)
    ensure_folder(tree.log_folder, not skip_cleanup)
    ensure_folder(join(tree.temp_folder, 'plugins'), not skip_cleanup)
    for plugin in tree.enabled_plugins:
        ensure_folder(join(tree.temp_folder, 'plugins', plugin.name),
                      not skip_cleanup)

    vcs_cache = VcsCache(tree)
    tree_indexers = [p.tree_to_index(p.name, tree, vcs_cache) for p in
                     tree.enabled_plugins if p.tree_to_index]
    try:
        if not skip_indexing:
            # Substitute the format, tree name, and uuid into the index identifier.
            index = tree.es_index.format(format=FORMAT,
                                         tree=tree.name,
                                         unique=uuid1())
            create_index_and_wait(
                es,
                index,
                settings={
                    'settings': {
                        'index': {
                            'number_of_shards': tree.es_shards,  # Fewer should be faster, assuming enough RAM.
                            'number_of_replicas': 0  # for speed
                        },
                        # Default analyzers and mappings are in the core plugin.
                        'analysis': reduce(
                                deep_update,
                                (p.analyzers for p in tree.enabled_plugins),
                                {}),

                        # DXR indices are immutable once built. Turn the
                        # refresh interval down to keep the segment count low
                        # while indexing. It will make for less merging later.
                        # We could also simply call "optimize" after we're
                        # done indexing, but it is unthrottled; we'd have to
                        # use shard allocation to do the indexing on one box
                        # and then move it elsewhere for actual use.
                        'refresh_interval':
                            '%is' % config.es_refresh_interval
                    },
                    'mappings': reduce(deep_update,
                                       (p.mappings for p in
                                            tree.enabled_plugins),
                                       {})
                })
        else:
            index = None
            print "Skipping indexing (due to 'index' in 'skip_stages')"

        # Run pre-build hooks:
        with new_pool() as pool:
            tree_indexers = farm_out('pre_build')
            # Tear down pool to let the build process use more RAM.

        if not skip_build:
            # Set up env vars, and build:
            build_tree(tree, tree_indexers, verbose)
        else:
            print "Skipping rebuild (due to 'build' in 'skip_stages')"

        # Post-build, and index files:
        if not skip_indexing:
            with new_pool() as pool:
                tree_indexers = farm_out('post_build')
                index_files(tree, tree_indexers, index, pool, es)

            # refresh() times out in prod. Wait until it doesn't. That
            # probably means things are ready to rock again.
            with aligned_progressbar(repeat(None), label='Refreshing index') as bar:
                for _ in bar:
                    try:
                        es.refresh(index=index)
                    except (ConnectionError, Timeout) as exc:
                        pass
                    else:
                        break

            es.update_settings(
                index,
                {
                    'settings': {
                        'index': {
                            'number_of_replicas': 1  # fairly arbitrary
                        }
                    }
                })
    except Exception as exc:
        # If anything went wrong, delete the index, because we're not
        # going to have a way of returning its name if we raise an
        # exception.
        if not skip_indexing:
            delete_index_quietly(es, index)
        raise

    print "Finished '%s' in %s." % (tree.name, datetime.now() - start_time)
    if not skip_cleanup:
        # By default, we remove the temp files, because they're huge.
        rmtree(tree.temp_folder)
    return index
コード例 #4
0
ファイル: build.py プロジェクト: vck/dxr
def index_tree(tree, es, verbose=False):
    """Index a single tree into ES and the filesystem, and return the
    name of the new ES index.

    """
    def new_pool():
        return ProcessPoolExecutor(max_workers=tree.workers)

    def farm_out(method_name):
        """Farm out a call to all tree indexers across a process pool.

        Return the tree indexers, including anything mutations the method call
        might have made.

        Show progress while doing it.

        """
        if not tree.workers:
            return [save_scribbles(ti, method_name) for ti in tree_indexers]
        else:
            futures = [
                pool.submit(full_traceback, save_scribbles, ti, method_name)
                for ti in tree_indexers
            ]
            return [
                future.result()
                for future in show_progress(futures, 'Running %s' %
                                            method_name)
            ]

    def delete_index_quietly(es, index):
        """Delete an index, and ignore any error.

        This cannot be done inline in the except clause below, because, even
        if we catch this exception, it spoils the exception info in that
        scope, making the bare ``raise`` raise the not-found error rather than
        whatever went wrong earlier.

        """
        try:
            es.delete_index(index)
        except Exception:
            pass

    print "Starting tree '%s'." % tree.name

    # Note starting time
    start_time = datetime.now()

    config = tree.config
    skip_indexing = 'index' in config.skip_stages
    skip_build = 'build' in config.skip_stages
    skip_cleanup = skip_indexing or skip_build or 'clean' in config.skip_stages

    # Create and/or clear out folders:
    ensure_folder(tree.object_folder, tree.source_folder != tree.object_folder)
    ensure_folder(tree.temp_folder, not skip_cleanup)
    ensure_folder(tree.log_folder, not skip_cleanup)
    ensure_folder(join(tree.temp_folder, 'plugins'), not skip_cleanup)
    for plugin in tree.enabled_plugins:
        ensure_folder(join(tree.temp_folder, 'plugins', plugin.name),
                      not skip_cleanup)

    vcs_cache = VcsCache(tree)
    tree_indexers = [
        p.tree_to_index(p.name, tree, vcs_cache) for p in tree.enabled_plugins
        if p.tree_to_index
    ]
    try:
        if not skip_indexing:
            # Substitute the format, tree name, and uuid into the index identifier.
            index = tree.es_index.format(format=FORMAT,
                                         tree=tree.name,
                                         unique=uuid1())
            create_index_and_wait(
                es,
                index,
                settings={
                    'settings': {
                        'index': {
                            'number_of_shards': tree.
                            es_shards,  # Fewer should be faster, assuming enough RAM.
                            'number_of_replicas': 0  # for speed
                        },
                        # Default analyzers and mappings are in the core plugin.
                        'analysis':
                        reduce(deep_update,
                               (p.analyzers for p in tree.enabled_plugins),
                               {}),

                        # DXR indices are immutable once built. Turn the
                        # refresh interval down to keep the segment count low
                        # while indexing. It will make for less merging later.
                        # We could also simply call "optimize" after we're
                        # done indexing, but it is unthrottled; we'd have to
                        # use shard allocation to do the indexing on one box
                        # and then move it elsewhere for actual use.
                        'refresh_interval':
                        '%is' % config.es_refresh_interval
                    },
                    'mappings':
                    reduce(deep_update,
                           (p.mappings for p in tree.enabled_plugins), {})
                })
        else:
            index = None
            print "Skipping indexing (due to 'index' in 'skip_stages')"

        # Run pre-build hooks:
        with new_pool() as pool:
            tree_indexers = farm_out('pre_build')
            # Tear down pool to let the build process use more RAM.

        if not skip_build:
            # Set up env vars, and build:
            build_tree(tree, tree_indexers, verbose)
        else:
            print "Skipping rebuild (due to 'build' in 'skip_stages')"

        # Post-build, and index files:
        if not skip_indexing:
            with new_pool() as pool:
                tree_indexers = farm_out('post_build')
                index_files(tree, tree_indexers, index, pool, es)

            # refresh() times out in prod. Wait until it doesn't. That
            # probably means things are ready to rock again.
            with aligned_progressbar(repeat(None),
                                     label='Refreshing index') as bar:
                for _ in bar:
                    try:
                        es.refresh(index=index)
                    except (ConnectionError, Timeout) as exc:
                        pass
                    else:
                        break

            es.update_settings(
                index,
                {
                    'settings': {
                        'index': {
                            'number_of_replicas': 1  # fairly arbitrary
                        }
                    }
                })
    except Exception as exc:
        # If anything went wrong, delete the index, because we're not
        # going to have a way of returning its name if we raise an
        # exception.
        if not skip_indexing:
            delete_index_quietly(es, index)
        raise

    print "Finished '%s' in %s." % (tree.name, datetime.now() - start_time)
    if not skip_cleanup:
        # By default, we remove the temp files, because they're huge.
        rmtree(tree.temp_folder)
    return index