Example #1
0
def search(request, pagename):
    from ductus.index import get_indexing_mongo_database
    indexing_db = get_indexing_mongo_database()
    if indexing_db is None:
        raise Http404("indexing database is not available")
    collection = indexing_db.urn_index

    # figure out target language (if given).
    # fixme: this probably doesn't belong here
    target_language_tags = [tag for tag in request.GET.getlist('tag')
                            if tag.startswith('target-language:')]
    target_language_code = None
    target_language_description = None
    if target_language_tags:
        target_language_code = target_language_tags[0].partition(':')[2]
        from ductus.utils.bcp47 import language_tag_to_description
        try:
            target_language_description = language_tag_to_description(target_language_code)
        except KeyError:
            pass

    # return results to the user
    return render_to_response('special/search.html', {
        'target_language_code': target_language_code,
        'target_language_description': target_language_description,
    }, RequestContext(request))
Example #2
0
def otics_front_page(request, pagename=None):
    from ductus.index import get_indexing_mongo_database
    indexing_db = get_indexing_mongo_database()

    languages = {}
    if indexing_db is not None:
        collection = indexing_db.urn_index
        relevant_pages = collection.find(
            {
                "tags": {
                    "$regex": "^target-language:"
                },
                "current_wikipages": {
                    "$not": {
                        "$size": 0
                    }
                },
            }, {"tags": 1})
        for page in relevant_pages:
            for tag in page["tags"]:
                if tag.startswith("target-language:"):
                    lang_code = tag[len("target-language:"):]
                    languages[lang_code] = languages.get(lang_code, 0) + 1

    total_lesson_count = sum(a for a in languages.values())
    language_tag_cloud = []
    for lang_code, count in sorted(six.iteritems(languages)):
        if count < 2:
            # XXX: until the tag cloud is fixed, don't display languages with
            # only one lesson
            continue
        try:
            descr = language_tag_to_description(lang_code)
        except KeyError:
            pass
        else:
            # XXX: temporary overrides
            if lang_code == 'el':
                descr = u'Greek'
            elif lang_code == 'km':
                descr = u'Khmer'
            language_tag_cloud.append(
                TagCloudElement(
                    count,
                    label=descr,
                    href=(u"/special/search?tag=target-language:%s" %
                          lang_code),
                    data=lang_code))
    prepare_tag_cloud(language_tag_cloud, min_percent=70, max_percent=150)
    return render_to_response(
        'otics/front_page.html', {
            'language_tag_cloud': language_tag_cloud,
            'total_lesson_count': total_lesson_count,
            'total_language_count': len(languages),
        }, RequestContext(request))
Example #3
0
def otics_front_page(request, pagename=None):
    from ductus.index import get_indexing_mongo_database
    indexing_db = get_indexing_mongo_database()

    languages = {}
    if indexing_db is not None:
        collection = indexing_db.urn_index
        relevant_pages = collection.find({
            "tags": {"$regex": "^target-language:"},
            "current_wikipages": {"$not": {"$size": 0}},
        }, {"tags": 1})
        for page in relevant_pages:
            for tag in page["tags"]:
                if tag.startswith("target-language:"):
                    lang_code = tag[len("target-language:"):]
                    languages[lang_code] = languages.get(lang_code, 0) + 1

    total_lesson_count = sum(a for a in languages.values())
    language_tag_cloud = []
    for lang_code, count in sorted(six.iteritems(languages)):
        if count < 2:
            # XXX: until the tag cloud is fixed, don't display languages with
            # only one lesson
            continue
        try:
            descr = language_tag_to_description(lang_code)
        except KeyError:
            pass
        else:
            # XXX: temporary overrides
            if lang_code == 'el':
                descr = u'Greek'
            elif lang_code == 'km':
                descr = u'Khmer'
            language_tag_cloud.append(TagCloudElement(count, label=descr, href=(u"/special/search?tag=target-language:%s" % lang_code), data=lang_code))
    prepare_tag_cloud(language_tag_cloud, min_percent=70, max_percent=150)
    return render_to_response('otics/front_page.html', {
        'language_tag_cloud': language_tag_cloud,
        'total_lesson_count': total_lesson_count,
        'total_language_count': len(languages),
    }, RequestContext(request))
Example #4
0
    def handle_noargs(self, **options):
        from ductus.index import get_indexing_mongo_database
        indexing_db = get_indexing_mongo_database()
        if indexing_db is None:
            raise Exception
        collection = indexing_db.urn_index

        def perform_upsert(urn, obj, ignore=None):
            # REMEMBER that dictionary order matters in mongodb; we just ignore
            # it

            # fixme: first inspect element to see if things might already be
            # right.  also check to make sure there aren't any unexpected
            # attributes on the toplevel element.  and do the same thing for
            # blobs too.

            obj = dict(obj)
            obj["urn"] = urn
            collection.update({"urn": urn}, obj, upsert=True, safe=True)
            verified_urns.add(urn)

        logging.basicConfig(level=logging.INFO) # FIXME

        # create the mongodb indexes
        collection.ensure_index("urn", unique=True, drop_dups=True)
        collection.ensure_index("parents", sparse=True)
        collection.ensure_index("tags", sparse=True)
        collection.ensure_index("links")
        collection.ensure_index("recursive_links")

        # Begin actual code

        from lxml import etree

        from ductus.resource import get_resource_database, UnexpectedHeader, hash_name
        from ductus.wiki.models import WikiPage

        resource_database = get_resource_database()

        verified_urns = set()
        current_wikipages_map = {}

        operations = {None: 0}

        def verify(urn):
            """Updates a urn's indexing info and returns the set of its recursive links
            """
            operations[None] += 1
            logger.info("operation %d: processing %s", operations[None], urn)

            if urn in verified_urns:
                q = collection.find_one({"urn": urn}, {"recursive_links": 1})
                try:
                    return set(q["recursive_links"])
                except KeyError:
                    return set()

            try:
                tree = resource_database.get_xml_tree(urn)
            except UnexpectedHeader:
                # it must be a blob
                perform_upsert(urn, {"fqn": None})
                return set()

            links = set()
            for event, element in etree.iterwalk(tree):
                if '{http://www.w3.org/1999/xlink}href' in element.attrib and element.getparent().tag != '{http://ductus.us/ns/2009/ductus}parents':
                    link = element.attrib['{http://www.w3.org/1999/xlink}href']
                    if link.startswith('urn:%s:' % hash_name):
                        links.add(link)

            recursive_links = set(links)
            for link in links:
                additional_links = verify(link)
                recursive_links.update(additional_links)

            resource = resource_database.get_resource_object(urn)

            assert resource.fqn is not None
            obj = {
                "fqn": resource.fqn,
                "links": list(links),
                "recursive_links": sorted(recursive_links),
                "current_wikipages": sorted(current_wikipages_map.get(urn, ())),
            }
            try:
                obj["parents"] = sorted([parent.href for parent in resource.common.parents])
                obj["tags"] = sorted([tag.value for tag in resource.tags])
            except AttributeError:
                pass
            perform_upsert(urn, obj)

            return recursive_links

        for wikipage in WikiPage.objects.all():
            revision = wikipage.get_latest_revision()
            if revision is not None and revision.urn:
                urn = 'urn:' + revision.urn
                current_wikipages_map.setdefault(urn, set()).add(wikipage.name)

        n_attempted = n_successful = 0
        for key in resource_database:
            n_attempted += 1
            try:
                verify(key)
            except Exception:
                logger.warning("Key failed: %s", key)
            else:
                n_successful += 1

        logger.info("Successfully processed %d of %d keys", n_successful, n_attempted)
Example #5
0
    def handle_noargs(self, **options):
        from ductus.index import get_indexing_mongo_database
        indexing_db = get_indexing_mongo_database()
        if indexing_db is None:
            raise Exception
        collection = indexing_db.urn_index

        def perform_upsert(urn, obj, ignore=None):
            # REMEMBER that dictionary order matters in mongodb; we just ignore
            # it

            # fixme: first inspect element to see if things might already be
            # right.  also check to make sure there aren't any unexpected
            # attributes on the toplevel element.  and do the same thing for
            # blobs too.

            obj = dict(obj)
            obj["urn"] = urn
            collection.update({"urn": urn}, obj, upsert=True, safe=True)
            verified_urns.add(urn)

        logging.basicConfig(level=logging.INFO)  # FIXME

        # create the mongodb indexes
        collection.ensure_index("urn", unique=True, drop_dups=True)
        collection.ensure_index("parents", sparse=True)
        collection.ensure_index("tags", sparse=True)
        collection.ensure_index("links")
        collection.ensure_index("recursive_links")

        # Begin actual code

        from lxml import etree

        from ductus.resource import get_resource_database, UnexpectedHeader, hash_name
        from ductus.wiki.models import WikiPage

        resource_database = get_resource_database()

        verified_urns = set()
        current_wikipages_map = {}

        operations = {None: 0}

        def verify(urn):
            """Updates a urn's indexing info and returns the set of its recursive links
            """
            operations[None] += 1
            logger.info("operation %d: processing %s", operations[None], urn)

            if urn in verified_urns:
                q = collection.find_one({"urn": urn}, {"recursive_links": 1})
                try:
                    return set(q["recursive_links"])
                except KeyError:
                    return set()

            try:
                tree = resource_database.get_xml_tree(urn)
            except UnexpectedHeader:
                # it must be a blob
                perform_upsert(urn, {"fqn": None})
                return set()

            links = set()
            for event, element in etree.iterwalk(tree):
                if '{http://www.w3.org/1999/xlink}href' in element.attrib and element.getparent(
                ).tag != '{http://ductus.us/ns/2009/ductus}parents':
                    link = element.attrib['{http://www.w3.org/1999/xlink}href']
                    if link.startswith('urn:%s:' % hash_name):
                        links.add(link)

            recursive_links = set(links)
            for link in links:
                additional_links = verify(link)
                recursive_links.update(additional_links)

            resource = resource_database.get_resource_object(urn)

            assert resource.fqn is not None
            obj = {
                "fqn": resource.fqn,
                "links": list(links),
                "recursive_links": sorted(recursive_links),
                "current_wikipages": sorted(current_wikipages_map.get(urn,
                                                                      ())),
            }
            try:
                obj["parents"] = sorted(
                    [parent.href for parent in resource.common.parents])
                obj["tags"] = sorted([tag.value for tag in resource.tags])
            except AttributeError:
                pass
            perform_upsert(urn, obj)

            return recursive_links

        for wikipage in WikiPage.objects.all():
            revision = wikipage.get_latest_revision()
            if revision is not None and revision.urn:
                urn = 'urn:' + revision.urn
                current_wikipages_map.setdefault(urn, set()).add(wikipage.name)

        n_attempted = n_successful = 0
        for key in resource_database:
            n_attempted += 1
            try:
                verify(key)
            except Exception:
                logger.warning("Key failed: %s", key)
            else:
                n_successful += 1

        logger.info("Successfully processed %d of %d keys", n_successful,
                    n_attempted)