def process_check_urls(limit=10, timeout=30):

    f = NamedTemporaryFile()
    writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)

    writer.writerow(BASE_HEADER)

    processor = Processor(limit=limit, reuse=1)
    processor.writer = writer
    processor.total_items = Course.objects.all().count() + \
                  Library.objects.all().count() + \
                  CommunityItem.objects.all().count()
    processor.cnt = 0
    processor.updated_statuses = {}

    process = processor.manage(pprocess.MakeReusable(process_item))

    for id, status, slug, title, collection, url in Course.objects.values_list(
                                  "id", "http_status", "slug", "title", "collection", "url"):
        process("course", id, status, slug, title,
                get_name_from_id(Collection, collection), url, timeout)

    for id, status, slug, title, collection, url in Library.objects.values_list(
                                  "id", "http_status", "slug", "title", "collection", "url"):
        process("library", id, status, slug, title,
                      get_name_from_id(Collection, collection), url, timeout)

    for id, status, slug, title, url in CommunityItem.objects.values_list(
                                  "id", "http_status", "slug", "title", "url"):
        process("community item", id, status, slug, title, "", url, timeout)

    processor.finish()

    for status, items in processor.updated_statuses.items():
        for _type, ids in items.items():
            if _type == 'course':
                model = Course
            elif _type == 'library':
                model = Library
            elif _type == 'community item':
                model = CommunityItem
            else:
                continue
            print "Set %s status for %i %s" % (str(status), len(items), unicode(model._meta.verbose_name_plural))
            model.objects.filter(id__in=ids).update(http_status=status)

    filename = '%s-%s.csv' % (CHECK_URLS, datetime.datetime.now().isoformat())
    report = Report(type=CHECK_URLS)
    report.file.save(filename, File(f))

    f.close()

    print "Done!"
    def build_metadata(self, item, site):
        if isinstance(item, SearchResult):
            search_result = item
            item = search_result.object
        else:
            search_result = None

        title = item.title
        abstract = item.abstract
        if item.published_on:
            date = item.published_on
        url = item.url
        oer_url = "http://%s%s" % (site.domain, item.get_absolute_url())
        identifier = self.repository.build_header(item).identifier


        if search_result:
            authors = search_result.authors
            if search_result.general_subjects:
                keywords = [get_name_from_id(GeneralSubject, slug) for slug in search_result.general_subjects]

        else:
            authors = item.authors.all().values_list("name", flat=True)
            keywords = item.general_subjects.all().values_list("name", flat=True)

        return render_to_string("oai/oer/oer_recommender.xml", locals())
Exemple #3
0
    def build_metadata(self, item, site):
        if isinstance(item, SearchResult):
            search_result = item
            item = search_result.object
        else:
            search_result = None

        title = item.title
        description = item.abstract
        if item.published_on:
            date = item.published_on

        content_type = []
        if search_result:
            creator = search_result.authors
            if search_result.general_subjects:
                subject = [get_name_from_id(GeneralSubject, int(id)) for id in search_result.general_subjects]

            if search_result.media_formats:
                for id in search_result.media_formats:
                    id = int(id)
                    slug = get_slug_from_id(MediaFormat, id)
                    if slug in MEDIA_FORMAT_TO_DC_TYPE_MAPPING:
                        content_type.append(MEDIA_FORMAT_TO_DC_TYPE_MAPPING[slug])
            if search_result.languages:
                language = [get_slug_from_id(Language, int(id)) for id in search_result.languages]
            if search_result.geographic_relevance:
                coverage = [get_name_from_id(GeographicRelevance, int(id)) for id in search_result.geographic_relevance]

        else:
            creator = item.authors.all().values_list("name", flat=True)
            subject = item.general_subjects.all().values_list("name", flat=True)
            if isinstance(item, (Course, Library)):
                for slug in item.media_formats.all().values_list("slug", flat=True):
                    if slug in MEDIA_FORMAT_TO_DC_TYPE_MAPPING:
                        content_type.append(MEDIA_FORMAT_TO_DC_TYPE_MAPPING[slug])
            language = item.languages.all().values_list("slug", flat=True)
            coverage = item.geographic_relevance.all().values_list("name", flat=True)

        if isinstance(item, Library):
            content_type.append("Collection")

        rights = item.license.name
        identifier = "http://%s%s" % (site.domain, item.get_absolute_url())

        return render_to_string("oai/oer/oai_dc.xml", locals())
Exemple #4
0
def populate_item_from_search_result(result):
    item = result.get_stored_fields()

    item["identifier"] = "%s.%s.%s" % (result.app_label, result.model_name, result.pk)

    if item.get("collection"):
        collection_id = item["collection"]
        item["collection"] = {
            "name": get_name_from_id(Collection, collection_id),
            "slug": get_slug_from_id(Collection, collection_id),
        }
    if item.get("general_subjects"):
        item["general_subjects"] = [get_name_from_id(GeneralSubject, id) for id in item["general_subjects"]]

    if item.get("grade_levels"):
        item["grade_levels"] = [get_name_from_id(GradeLevel, id) for id in item["grade_levels"]]

    if item.get("topics"):
        topics = []
        for id in item["topics"]:
            topic = get_object(Topic, pk=id)
            if not topic:
                continue
            topics.append(topic)
        item["topics"] = topics

    item["is_evaluated"] = bool(result.evaluated_rubrics)

    model = result.model

    namespace = getattr(model, "namespace", None)
    if namespace:
        item["get_absolute_url"] = reverse("materials:%s:view_item" % namespace, kwargs=dict(slug=item["slug"]))
        item["save_item_url"] = reverse("materials:%s:save_item" % namespace, kwargs=dict(slug=item["slug"]))
        item["unsave_item_url"] = reverse("materials:%s:unsave_item" % namespace, kwargs=dict(slug=item["slug"]))
        item["add_tags_url"] = reverse("tags:add_tags", args=(result.app_label, result.model_name, result.pk))
        item["toolbar_view_url"] = reverse("materials:%s:toolbar_view_item" % namespace, kwargs=dict(slug=item["slug"]))
        item["align_url"] = reverse("curriculum:align", args=(result.app_label, result.model_name, result.pk))
    else:
        item["get_absolute_url"] = result.object.get_absolute_url()
    return item
Exemple #5
0
    def build_metadata(self, item, site):
        if isinstance(item, SearchResult):
            search_result = item
            item = search_result.object
        else:
            search_result = None

        languages = []
        geographic_relevance = []
        authors = []
        identifier = self.repository.build_header(item).identifier
        media_formats = []
        course_material_types = []
        library_material_types = []
        community_types = []
        grade_levels = []
        license = item.license
        collection = None

        if search_result:
            if search_result.languages:
                languages = [get_slug_from_id(Language, int(id)) for id in search_result.languages]
            if search_result.geographic_relevance:
                geographic_relevance = [get_name_from_id(GeographicRelevance, int(id)) for id in search_result.geographic_relevance]
            authors = search_result.authors
            if search_result.media_formats:
                media_formats = [get_slug_from_id(MediaFormat, int(id)) for id in search_result.media_formats]
            if search_result.course_material_types:
                course_material_types = [get_name_from_id(CourseMaterialType, int(id)) for id in search_result.course_material_types]
            if search_result.library_material_types:
                library_material_types = [get_name_from_id(LibraryMaterialType, int(id)) for id in search_result.library_material_types]
            if search_result.community_types:
                community_types = [get_name_from_id(CommunityType, int(id)) for id in search_result.community_types]
            if search_result.grade_levels:
                grade_levels = [get_name_from_id(GradeLevel, int(id)) for id in search_result.grade_levels]
            if search_result.collection:
                collection = Collection.objects.get(pk=int(search_result.collection))
            if search_result.general_subjects:
                general_subjects = [get_name_from_id(GeneralSubject, int(id)) for id in search_result.general_subjects]

        else:
            languages = item.languages.values_list("slug", flat=True)
            geographic_relevance = item.geographic_relevance.all().values_list("name", flat=True)
            authors = item.authors.all().values_list("name", flat=True)
            if isinstance(item, (Course, Library)):
                media_formats = item.media_formats.values_list("slug", flat=True)
                collection = item.collection
            if isinstance(item, Course):
                course_material_types = item.material_types.all().values_list("name", flat=True)
            if isinstance(item, Library):
                library_material_types = item.material_types.all().values_list("name", flat=True)
            if isinstance(item, CommunityItem):
                community_types = item.community_types.all().values_list("name", flat=True)
            grade_levels = item.grade_levels.all().values_list("name", flat=True)
            general_subjects = item.general_subjects.all().values_list("name", flat=True)

        general = {}
        general["identifier"] = [{'catalog':self.repository.identifier, 'entry': identifier}]
        if isinstance(item, (Course, Library)):
            if item.collection and item.provider_id:
                general['identifier'].append({'catalog':item.collection.name, 'entry':item.provider_id})

        general["title"] = item.title
        general["description"] = item.abstract
        general["keyword"] = item.keywords.all().values_list("name", flat=True)

        if languages:
            general["language"] = languages
        else:
            general["language"] = ["en"]

        general["coverage"] = geographic_relevance

        life_cycle = {}
        life_cycle["contribute"] = []
        if authors:
            for name in authors:
                author = dict(role="author", entity=build_vcard([('N', name), ('FN', name)]))
                if item.content_creation_date:
                    author['date'] = item.content_creation_date.strftime(DATETIME_FORMAT_LRE3)
                life_cycle['contribute'].append(author)

        if isinstance(item, (Course, Library)):
            if item.institution:
                institution = {'role':'publisher'}
                institution['entity'] = build_vcard([('N', item.institution.name),
                                                    ('FN', item.institution.name),
                                                    ('ORG', item.institution.name)])
                if item.content_creation_date:
                    institution['date'] = item.content_creation_date.strftime(DATETIME_FORMAT_LRE3)
                life_cycle['contribute'].append(institution)

        meta_metadata = {}
        meta_metadata["identifier"] = dict(catalog=self.repository.identifier,
                                           entry=identifier[len(self.repository.identifier_prefix):])
        meta_metadata["language"] = general["language"][0]

        technical = {}
        technical["format"] = media_formats
        technical["location"] = "http://%s%s" % (site.domain, item.get_absolute_url())
        technical["installationRemarks"] = item.tech_requirements

        educational = {}
        educational["learningResourceType"] = course_material_types or library_material_types or  community_types or []
        educational["context"] = grade_levels
        educational["typicalagerange"] = grade_level_to_age_range(grade_levels)
        if isinstance(item, (Course, Library)):
            educational["description"] = item.curriculum_standards

        rights = {}
        if not license.name or 'public domain' in license.name.lower() or license.name.lower() == 'no license':
            rights['copyrightAndOtherRestrictions'] = 'no'
        else:
            rights['copyrightAndOtherRestrictions'] = 'yes'

        _description = u""
        if license.copyright_holder:
            _description += u'Copyright Holder: %s' % license.copyright_holder
        if license.description or license.name:
            _description += '\n\n%s' % (license.description or license.name)
        if license.url:
            _description += '\n\n%s' % license.url
        rights['description'] = {"en": _description.strip()}

        if license.type.startswith("cc-"):
            rights['description']['x-t-cc-url'] = license.url

        relation = {}
        if collection:
            relation["description"] = collection.name
            relation["identifier"] = "http://%s%s" % (site.domain,
                      reverse("materials:%s:collection_index" % item.namespace,
                              kwargs=dict(collection=collection.slug)))

        annotation = []
        for review in item.reviews.all().select_related():
            _name = u"%s %s" % (review.user.first_name, review.user.last_name)
            _name = _name.strip()
            annotation.append({
                 'entity': build_vcard([('N', _name), ('FN', _name)]),
                 'description': review['text'],
                 'date': review.timestamp.strftime(DATETIME_FORMAT_LRE3)
            })

        classification = general_subjects
        if item.published_on:
            published_on = item.published_on.strftime(DATETIME_FORMAT_LRE3)

        return render_to_string("oai/oer/oai_oer2.xml", locals())
Exemple #6
0
def index(
    request,
    general_subjects=None,
    grade_levels=None,
    course_material_types=None,
    library_material_types=None,
    collection=None,
    keywords=None,
    license=None,
    course_or_module=None,
    community_types=None,
    community_topics=None,
    microsite=None,
    model=None,
    search=False,
    tags=None,
    subjects=None,
    format=None,
    topics=None,
    alignment=None,
    facet_fields=None,
):

    if not facet_fields:
        facet_fields = [
            "general_subjects",
            "grade_levels",
            "keywords",
            "course_material_types",
            "media_formats",
            "cou_bucket",
            "indexed_topics",
        ]
    if model:
        index_namespace = model.namespace
    else:
        index_namespace = None

    if tags or subjects:
        # Tags and subjects are old path filters which are combined to
        # keywords filter now.

        # Redirect to keyword index.
        keywords = tags or subjects
        if index_namespace:
            url = reverse("materials:%s:keyword_index" % index_namespace, kwargs=dict(keywords=keywords))
        else:
            url = reverse("materials:keyword_index", kwargs=dict(keywords=keywords))
        return HttpResponsePermanentRedirect(url)

    if keywords:
        slugified_keywords = slugify(keywords)
        if not slugified_keywords:
            raise Http404()
        if slugified_keywords != keywords:
            # Keywords should be slugified.
            # Redirect to keyword index with slugified keyword.
            if index_namespace:
                url = reverse("materials:%s:keyword_index" % index_namespace, kwargs=dict(keywords=slugified_keywords))
            else:
                url = reverse("materials:keyword_index", kwargs=dict(keywords=slugified_keywords))
            return HttpResponsePermanentRedirect(url)

    query_string_params = {}
    filter_values = {}
    page_title = u"Browse"
    page_subtitle = u""
    breadcrumbs = [{"url": reverse("materials:browse"), "title": u"OER Materials"}]

    if not format:
        format = "html"
        if request.REQUEST.get("feed", None) == "yes":
            format = "rss"
        elif request.REQUEST.get("csv", None) == "yes":
            if not request.user.is_authenticated() or not request.user.is_staff:
                raise Http404()
            format = "csv"

    query = SearchQuerySet().narrow("is_displayed:true")

    if model:
        query = query.models(model)

    path_filter = None

    hidden_filters = {}

    for filter_name in PATH_FILTERS:
        value = locals()[filter_name]
        if value is not None:
            filter = FILTERS[filter_name]
            query = filter.update_query(query, value)
            path_filter = filter_name
            if page_subtitle:
                page_subtitle = u"%s → %s" % (page_subtitle, filter.page_subtitle(value))
            else:
                page_subtitle = filter.page_subtitle(value)
            filter_values[filter_name] = value

    visible_filters = [
        "search",
        "general_subjects",
        "grade_levels",
        "course_material_types",
        "media_formats",
        "cou_bucket",
    ]

    if microsite:
        microsite = Microsite.objects.get(slug=microsite)
        visible_filters.append("topics")

    search_query = u""

    for filter_name, filter in FILTERS.items():
        if filter_name == path_filter:
            continue
        value = filter.extract_value(request)
        if value is not None:
            query = filter.update_query(query, value)
            query_string_params = filter.update_query_string_params(query_string_params, value)
            filter_values[filter_name] = value
            if filter_name not in visible_filters:
                hidden_filters[filter.request_name] = value
            if filter_name == "search":
                search_query = value

    if search:
        if not search_query:
            if filter_values:
                return HttpResponsePermanentRedirect(
                    reverse("materials:index") + serialize_query_string_params(query_string_params)
                )
            else:
                messages.warning(request, u"You should specify the search term")
                return HttpResponsePermanentRedirect(reverse("materials:advanced_search"))

        page_title = u"Search Results"
        page_subtitle = search_query
        breadcrumbs = [{"url": reverse("materials:search"), "title": page_title}]

    elif model == CommunityItem:
        breadcrumbs = [{"url": reverse("materials:community"), "title": u"OER Community"}]

    if microsite:
        breadcrumbs = [
            {
                "url": reverse("materials:microsite", kwargs=dict(microsite=microsite.slug)),
                "title": u"%s Home" % microsite.name,
            }
        ]

    if not page_subtitle and model:
        page_subtitle = u"Content Type: %s" % model._meta.verbose_name_plural
    elif not page_subtitle and filter_values:
        filter_name = filter_values.keys()[0]
        filter = FILTERS[filter_name]
        page_subtitle = filter.page_subtitle(filter_values[filter_name])

    index_params = IndexParams(request, format, search_query)
    query_string_params = index_params.update_query_string_params(query_string_params)

    index_url = request.path + serialize_query_string_params(query_string_params, ignore_params=["batch_start"])
    if page_subtitle:
        index_title = u"%s: %s" % (page_title, page_subtitle)
    else:
        index_title = page_title

    feed_url = request.path + serialize_query_string_params(
        dict(query_string_params.items() + [("feed", "yes")]), ignore_params=["batch_start"]
    )
    csv_url = request.path + serialize_query_string_params(
        dict(query_string_params.items() + [("csv", "yes")]), ignore_params=["batch_start"]
    )

    batch_end = index_params.batch_start + index_params.batch_size

    if len(filter_values) == 1 and "featured" in filter_values:
        query = query.order_by("-featured_on")
    elif len(filter_values) == 1 and "evaluated_rubrics" in filter_values:
        query = query.order_by("-evaluation_score_rubric_%i" % filter_values["evaluated_rubrics"][0])
    elif index_params.query_order_by is not None:
        query = query.order_by(index_params.query_order_by)

    if index_params.sort_by == "visits" and not filter_values:
        query = query.narrow("visits:[1 TO *]")

    items = []

    if format == "html":

        for facet_field in facet_fields:
            query = query.facet(facet_field)

        total_items = len(query)

        if total_items and index_params.batch_start >= total_items:
            return HttpResponsePermanentRedirect(index_url)

        results = query[index_params.batch_start : batch_end]
        for result in results:
            if result is None:
                continue
            items.append(populate_item_from_search_result(result))

        pagination = Pagination(
            request.path, query_string_params, index_params.batch_start, index_params.batch_size, total_items
        )

        facets = query.facet_counts().get("fields", {})

        index_filters = build_index_filters(visible_filters, facets, filter_values, path_filter, microsite)

        all_keywords = query.count() and facets.get("keywords", []) or []
        if len(all_keywords) > MAX_TOP_KEYWORDS:
            top_keywords = get_tag_cloud(dict(all_keywords[:MAX_TOP_KEYWORDS]), 3, 0, 0)
            all_keywords = get_tag_cloud(dict(all_keywords), 3, 0, 0)
        else:
            top_keywords = get_tag_cloud(dict(all_keywords), 3, 0, 0)
            all_keywords = []

        for keyword in top_keywords:
            name = (
                get_name_from_slug(Keyword, keyword["slug"])
                or get_name_from_slug(Tag, keyword["slug"])
                or keyword["slug"]
            )
            keyword["name"] = name
        for keyword in all_keywords:
            name = (
                get_name_from_slug(Keyword, keyword["slug"])
                or get_name_from_slug(Tag, keyword["slug"])
                or keyword["slug"]
            )
            keyword["name"] = name

        if request.is_ajax():
            output = render_to_string("materials/include/index-items.html", RequestContext(request, locals()))
            data = dict(
                items=output,
                first_item_number=pagination.first_item_number,
                last_item_number=pagination.last_item_number,
                total_items=pagination.total_items,
                page_title=unicode(page_title),
                page_subtitle=page_subtitle and unicode(page_subtitle or u""),
            )
            return JsonResponse(data)
        return direct_to_template(request, "materials/index.html", locals())

    elif format == "rss":
        results = query[0:20]
        for result in results:
            if result is None:
                continue
            item = result.get_stored_fields()
            if item.get("general_subjects"):
                item["general_subjects"] = [get_name_from_id(GeneralSubject, id) for id in item["general_subjects"]]

            namespace = getattr(result.model, "namespace", None)
            if namespace:
                item["get_absolute_url"] = reverse("materials:%s:view_item" % namespace, kwargs=dict(slug=item["slug"]))
            else:
                item["get_absolute_url"] = result.object.get_absolute_url()

            item["model_verbose_name"] = result.model._meta.verbose_name_plural

            items.append(item)

        return direct_to_template(request, "materials/index-rss.xml", locals(), "text/xml")

    elif format == "json":
        results = query[index_params.batch_start : batch_end]

        for result in results:
            if result is None:
                continue
            data = result.get_stored_fields()
            item = {
                "id": result.id,
                "title": data["title"],
                "abstract": data["abstract"],
                "url": data["url"],
                "keywords": data["keywords_names"],
                "subject": [get_slug_from_id(GeneralSubject, id) for id in (data["general_subjects"] or [])],
                "grade_level": [get_slug_from_id(GradeLevel, id) for id in (data["grade_levels"] or [])],
                "collection": data["collection"] and get_name_from_id(Collection, data["collection"]) or None,
            }
            items.append(item)

        return JsonResponse(items)

    elif format == "xml":
        query = query.load_all()
        results = query[index_params.batch_start : batch_end]

        for result in results:
            if result is None:
                continue
            object = result.object
            data = result.get_stored_fields()
            item = {"url": data["url"], "title": data["title"]}
            if data.get("authors"):
                item["author"] = data["authors"][0]
            if data.get("institution"):
                item["institution"] = get_name_from_id(Institution, data["institution"])
            item["abstract"] = data["abstract"]

            license = object.license
            item["copyright_holder"] = license.copyright_holder
            item["license_url"] = license.url
            item["license_name"] = license.name
            item["license_description"] = license.description
            item["license_type"] = license.type
            item["cou_bucket"] = license.bucket

            if data["rating"]:
                item["rating"] = "%.1f" % data["rating"]

            item["fields"] = []
            grade_levels = data.get("grade_levels")
            if grade_levels:
                item["fields"].append(
                    dict(
                        title=u"Grade Level",
                        param=FILTERS["grade_levels"].request_name,
                        value=u",".join([get_slug_from_id(GradeLevel, id) for id in grade_levels]),
                        content=u",".join([get_name_from_id(GradeLevel, id) for id in grade_levels]),
                    )
                )
            general_subjects = data.get("general_subjects")
            if general_subjects:
                item["fields"].append(
                    dict(
                        title=u"Subject",
                        param=FILTERS["general_subjects"].request_name,
                        value=u",".join([get_slug_from_id(GeneralSubject, id) for id in general_subjects]),
                        content=u",".join([get_name_from_id(GeneralSubject, id) for id in general_subjects]),
                    )
                )
            collection = data.get("collection")
            if collection:
                item["fields"].append(
                    dict(
                        title=u"Collection",
                        param=FILTERS["collection"].request_name,
                        value=get_slug_from_id(Collection, collection),
                        content=get_name_from_id(Collection, collection),
                    )
                )
            geographic_relevance = data.get("geographic_relevance")
            if geographic_relevance:
                item["fields"].append(
                    dict(
                        title=u"Geographic Regional Relevance",
                        param=FILTERS["geographic_relevance"].request_name,
                        value=u",".join([get_slug_from_id(GeographicRelevance, id) for id in geographic_relevance]),
                        content=u",".join([get_name_from_id(GeographicRelevance, id) for id in geographic_relevance]),
                    )
                )

            keywords = object.keywords.values("slug", "name")
            if keywords:
                item["fields"].append(
                    dict(
                        title=u"Keywords",
                        param=FILTERS["keywords"].request_name,
                        value=u",".join([k["slug"] for k in keywords]),
                        content=u",".join([k["name"] for k in keywords]),
                    )
                )

            tags = object.tags.values("slug", "name").order_by("slug").distinct()
            if tags:
                item["fields"].append(
                    dict(
                        title=u"Tags",
                        param=FILTERS["keywords"].request_name,
                        value=u",".join([k["slug"] for k in tags]),
                        content=u",".join([k["name"] for k in tags]),
                    )
                )

            items.append(item)

        return direct_to_template(request, "materials/index-xml.xml", locals(), "text/xml")

    elif format == "csv":
        return csv_export(query, index_title)