Beispiel #1
0
    def execute(cls, options):
        q_project = {}
        if options.nbhd:
            nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd)
            if not nbhd:
                return "Invalid neighborhood url prefix."
            q_project['neighborhood_id'] = nbhd._id
        if options.project:
            q_project['shortname'] = options.project
        elif options.project_regex:
            q_project['shortname'] = {'$regex': options.project_regex}

        for chunk in chunked_find(M.Project, q_project):
            project_ids = []
            for p in chunk:
                log.info('Reindex project %s', p.shortname)
                if options.dry_run:
                    continue
                c.project = p
                project_ids.append(p._id)

            try:
                for chunk in chunked_list(project_ids, options.max_chunk):
                    if options.tasks:
                        cls._post_add_projects(chunk)
                    else:
                        add_projects(chunk)
            except CompoundError, err:
                log.exception('Error indexing projects:\n%r', err)
                log.error('%s', err.format_error())
            M.main_orm_session.flush()
            M.main_orm_session.clear()
Beispiel #2
0
    def execute(cls, options):
        q_project = {}
        if options.nbhd:
            nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd)
            if not nbhd:
                return "Invalid neighborhood url prefix."
            q_project['neighborhood_id'] = nbhd._id
        if options.project:
            q_project['shortname'] = options.project
        elif options.project_regex:
            q_project['shortname'] = {'$regex': options.project_regex}

        for chunk in chunked_find(M.Project, q_project):
            project_ids = []
            for p in chunk:
                log.info('Reindex project %s', p.shortname)
                if options.dry_run:
                    continue
                c.project = p
                project_ids.append(p._id)

            try:
                for chunk in chunked_list(project_ids, options.max_chunk):
                    if options.tasks:
                        cls._post_add_projects(chunk)
                    else:
                        add_projects(chunk)
            except CompoundError, err:
                log.exception('Error indexing projects:\n%r', err)
                log.error('%s', err.format_error())
            M.main_orm_session.flush()
            M.main_orm_session.clear()
Beispiel #3
0
 def _chunked_add_artifacts(self, ref_ids):
     # ref_ids contains solr index ids which can easily be over
     # 100 bytes. Here we allow for 160 bytes avg, plus
     # room for other document overhead.
     for chunk in utils.chunked_list(ref_ids, self.options.max_chunk):
         if self.options.tasks:
             self._post_add_artifacts(chunk)
         else:
             add_artifacts(
                 chunk, update_solr=self.options.solr, update_refs=self.options.refs, **self.add_artifact_kwargs
             )
Beispiel #4
0
 def _chunked_add_artifacts(self, ref_ids):
     # ref_ids contains solr index ids which can easily be over
     # 100 bytes. Here we allow for 160 bytes avg, plus
     # room for other document overhead.
     for chunk in utils.chunked_list(ref_ids, self.options.max_chunk):
         if self.options.tasks:
             self._post_add_artifacts(chunk)
         else:
             add_artifacts(chunk,
                           update_solr=self.options.solr,
                           update_refs=self.options.refs,
                           **self.add_artifact_kwargs)
Beispiel #5
0
    def flush(cls):
        """
        Creates indexing tasks for cached adds and deletes, and resets the
        caches.

        .. warning:: This method is NOT called automatically when the parent
           session is flushed. It MUST be called explicitly.
        """
        # Post in chunks to avoid overflowing the max BSON document
        # size when the Monq task is created:
        # cls.to_delete - contains solr index ids which can easily be over
        #                 100 bytes. Here we allow for 160 bytes avg, plus
        #                 room for other document overhead.
        # cls.to_add - contains BSON ObjectIds, which are 12 bytes each, so
        #              we can easily put 1m in a doc with room left over.
        if cls.to_delete:
            for chunk in chunked_list(list(cls.to_delete), 100 * 1000):
                cls._post(index_tasks.del_artifacts, chunk)

        if cls.to_add:
            for chunk in chunked_list(list(cls.to_add), 1000 * 1000):
                cls._post(index_tasks.add_artifacts, chunk)
        cls.to_delete = set()
        cls.to_add = set()
Beispiel #6
0
    def flush(cls):
        """
        Creates indexing tasks for cached adds and deletes, and resets the
        caches.

        .. warning:: This method is NOT called automatically when the parent
           session is flushed. It MUST be called explicitly.
        """
        # Post in chunks to avoid overflowing the max BSON document
        # size when the Monq task is created:
        # cls.to_delete - contains solr index ids which can easily be over
        #                 100 bytes. Here we allow for 160 bytes avg, plus
        #                 room for other document overhead.
        # cls.to_add - contains BSON ObjectIds, which are 12 bytes each, so
        #              we can easily put 1m in a doc with room left over.
        if cls.to_delete:
            for chunk in chunked_list(list(cls.to_delete), 100 * 1000):
                cls._post(index_tasks.del_artifacts, chunk)

        if cls.to_add:
            for chunk in chunked_list(list(cls.to_add), 1000 * 1000):
                cls._post(index_tasks.add_artifacts, chunk)
        cls.to_delete = set()
        cls.to_add = set()
Beispiel #7
0
 def execute(cls, options):
     for chunk in chunked_find(M.User, {}):
         user_ids = []
         for u in chunk:
             log.info('Reindex user %s', u.username)
             if options.dry_run:
                 continue
             user_ids.append(u._id)
         try:
             for chunk in chunked_list(user_ids, options.max_chunk):
                 if options.tasks:
                     cls._post_add_users(chunk)
                 else:
                     add_users(chunk)
         except CompoundError, err:
             log.exception('Error indexing users:\n%r', err)
             log.error('%s', err.format_error())
         M.main_orm_session.flush()
         M.main_orm_session.clear()
Beispiel #8
0
 def execute(cls, options):
     for chunk in chunked_find(M.User, {}):
         user_ids = []
         for u in chunk:
             log.info('Reindex user %s', u.username)
             if options.dry_run:
                 continue
             user_ids.append(u._id)
         try:
             for chunk in chunked_list(user_ids, options.max_chunk):
                 if options.tasks:
                     cls._post_add_users(chunk)
                 else:
                     add_users(chunk)
         except CompoundError, err:
             log.exception('Error indexing users:\n%r', err)
             log.error('%s', err.format_error())
         M.main_orm_session.flush()
         M.main_orm_session.clear()
Beispiel #9
0
 def test_chunked_list(self):
     l = range(10)
     chunks = list(utils.chunked_list(l, 3))
     self.assertEqual(len(chunks), 4)
     self.assertEqual(len(chunks[0]), 3)
     self.assertEqual([el for sublist in chunks for el in sublist], l)
Beispiel #10
0
 def test_chunked_list(self):
     l = range(10)
     chunks = list(utils.chunked_list(l, 3))
     self.assertEqual(len(chunks), 4)
     self.assertEqual(len(chunks[0]), 3)
     self.assertEqual([el for sublist in chunks for el in sublist], l)
Beispiel #11
0
 def _chunked_add_artifacts(self, ref_ids):
     # ref_ids contains solr index ids which can easily be over
     # 100 bytes. Here we allow for 160 bytes avg, plus
     # room for other document overhead.
     for chunk in utils.chunked_list(ref_ids, 100 * 1000):
         self._post_add_artifacts(chunk)
Beispiel #12
0
    def execute(cls, options):
        q_project = {}
        if options.nbhd:
            nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd)
            if not nbhd:
                return "Invalid neighborhood url prefix."
            q_project['neighborhood_id'] = nbhd._id
        if options.project:
            q_project['shortname'] = options.project
        elif options.project_regex:
            q_project['shortname'] = {'$regex': options.project_regex}

        log.info('Refreshing repositories')
        for chunk in chunked_find(M.Project, q_project):
            for p in chunk:
                log.info("Refreshing repos for project '%s'." % p.shortname)
                if options.dry_run:
                    continue
                c.project = p
                if options.mount_point:
                    mount_points = [options.mount_point]
                else:
                    mount_points = [ac.options.mount_point for ac in
                                    M.AppConfig.query.find(dict(project_id=p._id))]
                for app in (p.app_instance(mp) for mp in mount_points):
                    c.app = app
                    if not hasattr(app, 'repo'):
                        continue
                    if c.app.repo.tool.lower() not in options.repo_types:
                        log.info("Skipping %r: wrong type (%s)", c.app.repo,
                                 c.app.repo.tool.lower())
                        continue

                    ci_ids = []
                    if options.clean:
                        ci_ids = list(c.app.repo.all_commit_ids())
                    elif options.clean_after:
                        for ci in M.repository.CommitDoc.m.find({'repo_ids': c.app.repo._id,
                                                                 'committed.date': {'$gt': options.clean_after}}):
                            ci_ids.append(ci._id)

                    if ci_ids:
                        log.info("Deleting mongo data for %i commits...",
                                 len(ci_ids))
                        # delete these in chunks, otherwise the query doc can
                        # exceed the max BSON size limit (16MB at the moment)
                        for ci_ids_chunk in chunked_list(ci_ids, 3000):
                            i = M.repository.CommitDoc.m.find(
                                {"_id": {"$in": ci_ids_chunk}}).count()
                            if i:
                                log.info("Deleting %i CommitDoc docs...", i)
                                M.repository.CommitDoc.m.remove(
                                    {"_id": {"$in": ci_ids_chunk}})

                        # we used to have a TreesDoc (plural) collection to provide a mapping of commit_id to tree_id
                        # so that we could clear the relevant TreeDoc records
                        # its ok though, since they are created in refresh_tree_info() and overwrite existing records

                        for ci_ids_chunk in chunked_list(ci_ids, 3000):
                            # delete LastCommitDocs
                            i = M.repository.LastCommitDoc.m.find(
                                dict(commit_id={'$in': ci_ids_chunk})).count()
                            if i:
                                log.info(
                                    "Deleting %i LastCommitDoc docs...", i)
                                M.repository.LastCommitDoc.m.remove(
                                    dict(commit_id={'$in': ci_ids_chunk}))

                        del ci_ids

                    try:
                        if options.all:
                            log.info('Refreshing ALL commits in %r',
                                     c.app.repo)
                        else:
                            log.info('Refreshing NEW commits in %r',
                                     c.app.repo)
                        if options.profile:
                            import cProfile
                            cProfile.runctx(
                                'c.app.repo.refresh(options.all, notify=options.notify, '
                                '   commits_are_new=options.commits_are_new)',
                                globals(), locals(), 'refresh.profile')
                        else:
                            c.app.repo.refresh(
                                options.all, notify=options.notify, commits_are_new=options.commits_are_new)
                    except Exception:
                        log.exception('Error refreshing %r', c.app.repo)
            ThreadLocalORMSession.flush_all()
Beispiel #13
0
def main(options):
    q_project = {}
    if options.nbhd:
        nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd)
        if not nbhd:
            return "Invalid neighborhood url prefix."
        q_project['neighborhood_id'] = nbhd._id
    if options.project:
        q_project['shortname'] = options.project
    elif options.project_regex:
        q_project['shortname'] = {'$regex': options.project_regex}

    log.info('Refreshing repositories')
    if options.clean_all:
        log.info('Removing all repository objects')
        M.repo.CommitDoc.m.remove({})
        M.repo.TreeDoc.m.remove({})
        M.repo.TreesDoc.m.remove({})
        M.repo.DiffInfoDoc.m.remove({})
        M.repo.CommitRunDoc.m.remove({})

    for chunk in chunked_find(M.Project, q_project):
        for p in chunk:
            log.info("Refreshing repos for project '%s'." % p.shortname)
            if options.dry_run:
                continue
            c.project = p
            if options.mount_point:
                mount_points = [options.mount_point]
            else:
                mount_points = [
                    ac.options.mount_point
                    for ac in M.AppConfig.query.find(dict(project_id=p._id))
                ]
            for app in (p.app_instance(mp) for mp in mount_points):
                c.app = app
                if not hasattr(app, 'repo'):
                    continue
                if c.app.repo.tool.lower() not in options.repo_types:
                    log.info("Skipping %r: wrong type (%s)", c.app.repo,
                             c.app.repo.tool.lower())
                    continue
                try:
                    c.app.repo._impl._setup_hooks()
                except:
                    log.exception('Error setting up hooks for %r', c.app.repo)

                if options.clean:
                    ci_ids = list(c.app.repo.all_commit_ids())
                    log.info("Deleting mongo data for %i commits...",
                             len(ci_ids))
                    tree_ids = [
                        tree_id for doc in M.repo.TreesDoc.m.find(
                            {"_id": {
                                "$in": ci_ids
                            }}, {"tree_ids": 1})
                        for tree_id in doc.get("tree_ids", [])
                    ]

                    i = M.repo.CommitDoc.m.find({
                        "_id": {
                            "$in": ci_ids
                        }
                    }).count()
                    log.info("Deleting %i CommitDoc docs...", i)
                    M.repo.CommitDoc.m.remove({"_id": {"$in": ci_ids}})

                    # delete these in chunks, otherwise the query doc can
                    # exceed the max BSON size limit (16MB at the moment)
                    for tree_ids_chunk in chunked_list(tree_ids, 300000):
                        i = M.repo.TreeDoc.m.find({
                            "_id": {
                                "$in": tree_ids_chunk
                            }
                        }).count()
                        log.info("Deleting %i TreeDoc docs...", i)
                        M.repo.TreeDoc.m.remove(
                            {"_id": {
                                "$in": tree_ids_chunk
                            }})
                        i = M.repo.LastCommitDoc.m.find({
                            "object_id": {
                                "$in": tree_ids_chunk
                            }
                        }).count()
                        log.info("Deleting %i LastCommitDoc docs...", i)
                        M.repo.LastCommitDoc.m.remove(
                            {"object_id": {
                                "$in": tree_ids_chunk
                            }})
                    del tree_ids

                    # delete these after TreeDoc and LastCommitDoc so that if
                    # we crash, we don't lose the ability to delete those
                    i = M.repo.TreesDoc.m.find({
                        "_id": {
                            "$in": ci_ids
                        }
                    }).count()
                    log.info("Deleting %i TreesDoc docs...", i)
                    M.repo.TreesDoc.m.remove({"_id": {"$in": ci_ids}})

                    # delete LastCommitDocs for non-trees
                    repo_lastcommit_re = re.compile("^{}:".format(
                        c.app.repo._id))
                    i = M.repo.LastCommitDoc.m.find(
                        dict(_id=repo_lastcommit_re)).count()
                    log.info(
                        "Deleting %i remaining LastCommitDoc docs, by repo id...",
                        i)
                    M.repo.LastCommitDoc.m.remove(dict(_id=repo_lastcommit_re))

                    i = M.repo.DiffInfoDoc.m.find({
                        "_id": {
                            "$in": ci_ids
                        }
                    }).count()
                    log.info("Deleting %i DiffInfoDoc docs...", i)
                    M.repo.DiffInfoDoc.m.remove({"_id": {"$in": ci_ids}})

                    i = M.repo.CommitRunDoc.m.find({
                        "commit_ids": {
                            "$in": ci_ids
                        }
                    }).count()
                    log.info("Deleting %i CommitRunDoc docs...", i)
                    M.repo.CommitRunDoc.m.remove(
                        {"commit_ids": {
                            "$in": ci_ids
                        }})
                    del ci_ids

                try:
                    if options.all:
                        log.info('Refreshing ALL commits in %r', c.app.repo)
                    else:
                        log.info('Refreshing NEW commits in %r', c.app.repo)
                    if options.profile:
                        import cProfile
                        cProfile.runctx(
                            'c.app.repo.refresh(options.all, notify=options.notify)',
                            globals(), locals(), 'refresh.profile')
                    else:
                        c.app.repo.refresh(options.all, notify=options.notify)
                except:
                    log.exception('Error refreshing %r', c.app.repo)
        ThreadLocalORMSession.flush_all()
        ThreadLocalORMSession.close_all()
    def execute(cls, options):
        q_project = {}
        if options.nbhd:
            nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd)
            if not nbhd:
                return "Invalid neighborhood url prefix."
            q_project['neighborhood_id'] = nbhd._id
        if options.project:
            q_project['shortname'] = options.project
        elif options.project_regex:
            q_project['shortname'] = {'$regex': options.project_regex}

        log.info('Refreshing repositories')
        for chunk in chunked_find(M.Project, q_project):
            for p in chunk:
                log.info("Refreshing repos for project '%s'." % p.shortname)
                if options.dry_run:
                    continue
                c.project = p
                if options.mount_point:
                    mount_points = [options.mount_point]
                else:
                    mount_points = [ac.options.mount_point for ac in
                                    M.AppConfig.query.find(dict(project_id=p._id))]
                for app in (p.app_instance(mp) for mp in mount_points):
                    c.app = app
                    if not hasattr(app, 'repo'):
                        continue
                    if c.app.repo.tool.lower() not in options.repo_types:
                        log.info("Skipping %r: wrong type (%s)", c.app.repo,
                                 c.app.repo.tool.lower())
                        continue

                    if options.clean:
                        ci_ids = list(c.app.repo.all_commit_ids())
                        log.info("Deleting mongo data for %i commits...",
                                 len(ci_ids))
                        # like the tree_ids themselves below, we need to process these in
                        # chunks to avoid hitting the BSON max size limit
                        tree_ids = []
                        for ci_ids_chunk in chunked_list(ci_ids, 3000):
                            tree_ids.extend([
                                tree_id for doc in
                                M.repo.TreesDoc.m.find(
                                    {"_id": {"$in": ci_ids_chunk}},
                                    {"tree_ids": 1})
                                for tree_id in doc.get("tree_ids", [])])

                            i = M.repo.CommitDoc.m.find(
                                {"_id": {"$in": ci_ids_chunk}}).count()
                            if i:
                                log.info("Deleting %i CommitDoc docs...", i)
                                M.repo.CommitDoc.m.remove(
                                    {"_id": {"$in": ci_ids_chunk}})

                        # delete these in chunks, otherwise the query doc can
                        # exceed the max BSON size limit (16MB at the moment)
                        for tree_ids_chunk in chunked_list(tree_ids, 300000):
                            i = M.repo.TreeDoc.m.find(
                                {"_id": {"$in": tree_ids_chunk}}).count()
                            if i:
                                log.info("Deleting %i TreeDoc docs...", i)
                                M.repo.TreeDoc.m.remove(
                                    {"_id": {"$in": tree_ids_chunk}})
                        del tree_ids

                        # delete these after TreeDoc and LastCommitDoc so that if
                        # we crash, we don't lose the ability to delete those
                        for ci_ids_chunk in chunked_list(ci_ids, 3000):
                            # delete TreesDocs
                            i = M.repo.TreesDoc.m.find(
                                {"_id": {"$in": ci_ids_chunk}}).count()
                            if i:
                                log.info("Deleting %i TreesDoc docs...", i)
                                M.repo.TreesDoc.m.remove(
                                    {"_id": {"$in": ci_ids_chunk}})

                            # delete LastCommitDocs
                            i = M.repo.LastCommitDoc.m.find(
                                dict(commit_ids={'$in': ci_ids_chunk})).count()
                            if i:
                                log.info(
                                    "Deleting %i remaining LastCommitDoc docs, by repo id...", i)
                                M.repo.LastCommitDoc.m.remove(
                                    dict(commit_ids={'$in': ci_ids_chunk}))

                            i = M.repo.DiffInfoDoc.m.find(
                                {"_id": {"$in": ci_ids_chunk}}).count()
                            if i:
                                log.info("Deleting %i DiffInfoDoc docs...", i)
                                M.repo.DiffInfoDoc.m.remove(
                                    {"_id": {"$in": ci_ids_chunk}})

                            i = M.repo.CommitRunDoc.m.find(
                                {"commit_ids": {"$in": ci_ids_chunk}}).count()
                            if i:
                                log.info("Deleting %i CommitRunDoc docs...", i)
                                M.repo.CommitRunDoc.m.remove(
                                    {"commit_ids": {"$in": ci_ids_chunk}})
                        del ci_ids

                    try:
                        if options.all:
                            log.info('Refreshing ALL commits in %r',
                                     c.app.repo)
                        else:
                            log.info('Refreshing NEW commits in %r',
                                     c.app.repo)
                        if options.profile:
                            import cProfile
                            cProfile.runctx(
                                'c.app.repo.refresh(options.all, notify=options.notify)',
                                globals(), locals(), 'refresh.profile')
                        else:
                            c.app.repo.refresh(
                                options.all, notify=options.notify)
                    except:
                        log.exception('Error refreshing %r', c.app.repo)
            ThreadLocalORMSession.flush_all()
Beispiel #15
0
    def execute(cls, options):
        q_project = {}
        if options.nbhd:
            nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd)
            if not nbhd:
                return "Invalid neighborhood url prefix."
            q_project['neighborhood_id'] = nbhd._id
        if options.project:
            q_project['shortname'] = options.project
        elif options.project_regex:
            q_project['shortname'] = {'$regex': options.project_regex}

        log.info('Refreshing repositories')
        for chunk in chunked_find(M.Project, q_project):
            for p in chunk:
                log.info("Refreshing repos for project '%s'." % p.shortname)
                if options.dry_run:
                    continue
                c.project = p
                if options.mount_point:
                    mount_points = [options.mount_point]
                else:
                    mount_points = [ac.options.mount_point for ac in
                                    M.AppConfig.query.find(dict(project_id=p._id))]
                for app in (p.app_instance(mp) for mp in mount_points):
                    c.app = app
                    if not hasattr(app, 'repo'):
                        continue
                    if c.app.repo.tool.lower() not in options.repo_types:
                        log.info("Skipping %r: wrong type (%s)", c.app.repo,
                                 c.app.repo.tool.lower())
                        continue

                    ci_ids = []
                    if options.clean:
                        ci_ids = list(c.app.repo.all_commit_ids())
                    elif options.clean_after:
                        for ci in M.repository.CommitDoc.m.find({'repo_ids': c.app.repo._id,
                                                                 'committed.date': {'$gt': options.clean_after}}):
                            ci_ids.append(ci._id)

                    if ci_ids:
                        log.info("Deleting mongo data for %i commits...",
                                 len(ci_ids))
                        # delete these in chunks, otherwise the query doc can
                        # exceed the max BSON size limit (16MB at the moment)
                        for ci_ids_chunk in chunked_list(ci_ids, 3000):
                            i = M.repository.CommitDoc.m.find(
                                {"_id": {"$in": ci_ids_chunk}}).count()
                            if i:
                                log.info("Deleting %i CommitDoc docs...", i)
                                M.repository.CommitDoc.m.remove(
                                    {"_id": {"$in": ci_ids_chunk}})

                        # we used to have a TreesDoc (plural) collection to provide a mapping of commit_id to tree_id
                        # so that we could clear the relevant TreeDoc records
                        # its ok though, since they are created in refresh_tree_info() and overwrite existing records

                        for ci_ids_chunk in chunked_list(ci_ids, 3000):
                            # delete LastCommitDocs
                            i = M.repository.LastCommitDoc.m.find(
                                dict(commit_id={'$in': ci_ids_chunk})).count()
                            if i:
                                log.info(
                                    "Deleting %i LastCommitDoc docs...", i)
                                M.repository.LastCommitDoc.m.remove(
                                    dict(commit_id={'$in': ci_ids_chunk}))

                            i = M.repository.CommitRunDoc.m.find(
                                {"commit_ids": {"$in": ci_ids_chunk}}).count()
                            if i:
                                log.info("Deleting %i CommitRunDoc docs...", i)
                                M.repository.CommitRunDoc.m.remove(
                                    {"commit_ids": {"$in": ci_ids_chunk}})
                        del ci_ids

                    try:
                        if options.all:
                            log.info('Refreshing ALL commits in %r',
                                     c.app.repo)
                        else:
                            log.info('Refreshing NEW commits in %r',
                                     c.app.repo)
                        if options.profile:
                            import cProfile
                            cProfile.runctx(
                                'c.app.repo.refresh(options.all, notify=options.notify, '
                                '   commits_are_new=options.commits_are_new)',
                                globals(), locals(), 'refresh.profile')
                        else:
                            c.app.repo.refresh(
                                options.all, notify=options.notify, commits_are_new=options.commits_are_new)
                    except:
                        log.exception('Error refreshing %r', c.app.repo)
            ThreadLocalORMSession.flush_all()