def process_check_urls(limit=10, timeout=30): f = NamedTemporaryFile() writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC) writer.writerow(BASE_HEADER) processor = Processor(limit=limit, reuse=1) processor.writer = writer processor.total_items = Course.objects.all().count() + \ Library.objects.all().count() + \ CommunityItem.objects.all().count() processor.cnt = 0 processor.updated_statuses = {} process = processor.manage(pprocess.MakeReusable(process_item)) for id, status, slug, title, collection, url in Course.objects.values_list( "id", "http_status", "slug", "title", "collection", "url"): process("course", id, status, slug, title, get_name_from_id(Collection, collection), url, timeout) for id, status, slug, title, collection, url in Library.objects.values_list( "id", "http_status", "slug", "title", "collection", "url"): process("library", id, status, slug, title, get_name_from_id(Collection, collection), url, timeout) for id, status, slug, title, url in CommunityItem.objects.values_list( "id", "http_status", "slug", "title", "url"): process("community item", id, status, slug, title, "", url, timeout) processor.finish() for status, items in processor.updated_statuses.items(): for _type, ids in items.items(): if _type == 'course': model = Course elif _type == 'library': model = Library elif _type == 'community item': model = CommunityItem else: continue print "Set %s status for %i %s" % (str(status), len(items), unicode(model._meta.verbose_name_plural)) model.objects.filter(id__in=ids).update(http_status=status) filename = '%s-%s.csv' % (CHECK_URLS, datetime.datetime.now().isoformat()) report = Report(type=CHECK_URLS) report.file.save(filename, File(f)) f.close() print "Done!"
def build_metadata(self, item, site): if isinstance(item, SearchResult): search_result = item item = search_result.object else: search_result = None title = item.title abstract = item.abstract if item.published_on: date = item.published_on url = item.url oer_url = "http://%s%s" % (site.domain, item.get_absolute_url()) identifier = self.repository.build_header(item).identifier if search_result: authors = search_result.authors if search_result.general_subjects: keywords = [get_name_from_id(GeneralSubject, slug) for slug in search_result.general_subjects] else: authors = item.authors.all().values_list("name", flat=True) keywords = item.general_subjects.all().values_list("name", flat=True) return render_to_string("oai/oer/oer_recommender.xml", locals())
def build_metadata(self, item, site): if isinstance(item, SearchResult): search_result = item item = search_result.object else: search_result = None title = item.title description = item.abstract if item.published_on: date = item.published_on content_type = [] if search_result: creator = search_result.authors if search_result.general_subjects: subject = [get_name_from_id(GeneralSubject, int(id)) for id in search_result.general_subjects] if search_result.media_formats: for id in search_result.media_formats: id = int(id) slug = get_slug_from_id(MediaFormat, id) if slug in MEDIA_FORMAT_TO_DC_TYPE_MAPPING: content_type.append(MEDIA_FORMAT_TO_DC_TYPE_MAPPING[slug]) if search_result.languages: language = [get_slug_from_id(Language, int(id)) for id in search_result.languages] if search_result.geographic_relevance: coverage = [get_name_from_id(GeographicRelevance, int(id)) for id in search_result.geographic_relevance] else: creator = item.authors.all().values_list("name", flat=True) subject = item.general_subjects.all().values_list("name", flat=True) if isinstance(item, (Course, Library)): for slug in item.media_formats.all().values_list("slug", flat=True): if slug in MEDIA_FORMAT_TO_DC_TYPE_MAPPING: content_type.append(MEDIA_FORMAT_TO_DC_TYPE_MAPPING[slug]) language = item.languages.all().values_list("slug", flat=True) coverage = item.geographic_relevance.all().values_list("name", flat=True) if isinstance(item, Library): content_type.append("Collection") rights = item.license.name identifier = "http://%s%s" % (site.domain, item.get_absolute_url()) return render_to_string("oai/oer/oai_dc.xml", locals())
def populate_item_from_search_result(result): item = result.get_stored_fields() item["identifier"] = "%s.%s.%s" % (result.app_label, result.model_name, result.pk) if item.get("collection"): collection_id = item["collection"] item["collection"] = { "name": get_name_from_id(Collection, collection_id), "slug": get_slug_from_id(Collection, collection_id), } if item.get("general_subjects"): item["general_subjects"] = [get_name_from_id(GeneralSubject, id) for id in item["general_subjects"]] if item.get("grade_levels"): item["grade_levels"] = [get_name_from_id(GradeLevel, id) for id in item["grade_levels"]] if item.get("topics"): topics = [] for id in item["topics"]: topic = get_object(Topic, pk=id) if not topic: continue topics.append(topic) item["topics"] = topics item["is_evaluated"] = bool(result.evaluated_rubrics) model = result.model namespace = getattr(model, "namespace", None) if namespace: item["get_absolute_url"] = reverse("materials:%s:view_item" % namespace, kwargs=dict(slug=item["slug"])) item["save_item_url"] = reverse("materials:%s:save_item" % namespace, kwargs=dict(slug=item["slug"])) item["unsave_item_url"] = reverse("materials:%s:unsave_item" % namespace, kwargs=dict(slug=item["slug"])) item["add_tags_url"] = reverse("tags:add_tags", args=(result.app_label, result.model_name, result.pk)) item["toolbar_view_url"] = reverse("materials:%s:toolbar_view_item" % namespace, kwargs=dict(slug=item["slug"])) item["align_url"] = reverse("curriculum:align", args=(result.app_label, result.model_name, result.pk)) else: item["get_absolute_url"] = result.object.get_absolute_url() return item
def build_metadata(self, item, site): if isinstance(item, SearchResult): search_result = item item = search_result.object else: search_result = None languages = [] geographic_relevance = [] authors = [] identifier = self.repository.build_header(item).identifier media_formats = [] course_material_types = [] library_material_types = [] community_types = [] grade_levels = [] license = item.license collection = None if search_result: if search_result.languages: languages = [get_slug_from_id(Language, int(id)) for id in search_result.languages] if search_result.geographic_relevance: geographic_relevance = [get_name_from_id(GeographicRelevance, int(id)) for id in search_result.geographic_relevance] authors = search_result.authors if search_result.media_formats: media_formats = [get_slug_from_id(MediaFormat, int(id)) for id in search_result.media_formats] if search_result.course_material_types: course_material_types = [get_name_from_id(CourseMaterialType, int(id)) for id in search_result.course_material_types] if search_result.library_material_types: library_material_types = [get_name_from_id(LibraryMaterialType, int(id)) for id in search_result.library_material_types] if search_result.community_types: community_types = [get_name_from_id(CommunityType, int(id)) for id in search_result.community_types] if search_result.grade_levels: grade_levels = [get_name_from_id(GradeLevel, int(id)) for id in search_result.grade_levels] if search_result.collection: collection = Collection.objects.get(pk=int(search_result.collection)) if search_result.general_subjects: general_subjects = [get_name_from_id(GeneralSubject, int(id)) for id in search_result.general_subjects] else: languages = item.languages.values_list("slug", flat=True) geographic_relevance = item.geographic_relevance.all().values_list("name", flat=True) authors = item.authors.all().values_list("name", flat=True) if isinstance(item, (Course, Library)): media_formats = item.media_formats.values_list("slug", flat=True) collection = item.collection if isinstance(item, Course): course_material_types = item.material_types.all().values_list("name", flat=True) if isinstance(item, Library): library_material_types = item.material_types.all().values_list("name", flat=True) if isinstance(item, CommunityItem): community_types = item.community_types.all().values_list("name", flat=True) grade_levels = item.grade_levels.all().values_list("name", flat=True) general_subjects = item.general_subjects.all().values_list("name", flat=True) general = {} general["identifier"] = [{'catalog':self.repository.identifier, 'entry': identifier}] if isinstance(item, (Course, Library)): if item.collection and item.provider_id: general['identifier'].append({'catalog':item.collection.name, 'entry':item.provider_id}) general["title"] = item.title general["description"] = item.abstract general["keyword"] = item.keywords.all().values_list("name", flat=True) if languages: general["language"] = languages else: general["language"] = ["en"] general["coverage"] = geographic_relevance life_cycle = {} life_cycle["contribute"] = [] if authors: for name in authors: author = dict(role="author", entity=build_vcard([('N', name), ('FN', name)])) if item.content_creation_date: author['date'] = item.content_creation_date.strftime(DATETIME_FORMAT_LRE3) life_cycle['contribute'].append(author) if isinstance(item, (Course, Library)): if item.institution: institution = {'role':'publisher'} institution['entity'] = build_vcard([('N', item.institution.name), ('FN', item.institution.name), ('ORG', item.institution.name)]) if item.content_creation_date: institution['date'] = item.content_creation_date.strftime(DATETIME_FORMAT_LRE3) life_cycle['contribute'].append(institution) meta_metadata = {} meta_metadata["identifier"] = dict(catalog=self.repository.identifier, entry=identifier[len(self.repository.identifier_prefix):]) meta_metadata["language"] = general["language"][0] technical = {} technical["format"] = media_formats technical["location"] = "http://%s%s" % (site.domain, item.get_absolute_url()) technical["installationRemarks"] = item.tech_requirements educational = {} educational["learningResourceType"] = course_material_types or library_material_types or community_types or [] educational["context"] = grade_levels educational["typicalagerange"] = grade_level_to_age_range(grade_levels) if isinstance(item, (Course, Library)): educational["description"] = item.curriculum_standards rights = {} if not license.name or 'public domain' in license.name.lower() or license.name.lower() == 'no license': rights['copyrightAndOtherRestrictions'] = 'no' else: rights['copyrightAndOtherRestrictions'] = 'yes' _description = u"" if license.copyright_holder: _description += u'Copyright Holder: %s' % license.copyright_holder if license.description or license.name: _description += '\n\n%s' % (license.description or license.name) if license.url: _description += '\n\n%s' % license.url rights['description'] = {"en": _description.strip()} if license.type.startswith("cc-"): rights['description']['x-t-cc-url'] = license.url relation = {} if collection: relation["description"] = collection.name relation["identifier"] = "http://%s%s" % (site.domain, reverse("materials:%s:collection_index" % item.namespace, kwargs=dict(collection=collection.slug))) annotation = [] for review in item.reviews.all().select_related(): _name = u"%s %s" % (review.user.first_name, review.user.last_name) _name = _name.strip() annotation.append({ 'entity': build_vcard([('N', _name), ('FN', _name)]), 'description': review['text'], 'date': review.timestamp.strftime(DATETIME_FORMAT_LRE3) }) classification = general_subjects if item.published_on: published_on = item.published_on.strftime(DATETIME_FORMAT_LRE3) return render_to_string("oai/oer/oai_oer2.xml", locals())
def index( request, general_subjects=None, grade_levels=None, course_material_types=None, library_material_types=None, collection=None, keywords=None, license=None, course_or_module=None, community_types=None, community_topics=None, microsite=None, model=None, search=False, tags=None, subjects=None, format=None, topics=None, alignment=None, facet_fields=None, ): if not facet_fields: facet_fields = [ "general_subjects", "grade_levels", "keywords", "course_material_types", "media_formats", "cou_bucket", "indexed_topics", ] if model: index_namespace = model.namespace else: index_namespace = None if tags or subjects: # Tags and subjects are old path filters which are combined to # keywords filter now. # Redirect to keyword index. keywords = tags or subjects if index_namespace: url = reverse("materials:%s:keyword_index" % index_namespace, kwargs=dict(keywords=keywords)) else: url = reverse("materials:keyword_index", kwargs=dict(keywords=keywords)) return HttpResponsePermanentRedirect(url) if keywords: slugified_keywords = slugify(keywords) if not slugified_keywords: raise Http404() if slugified_keywords != keywords: # Keywords should be slugified. # Redirect to keyword index with slugified keyword. if index_namespace: url = reverse("materials:%s:keyword_index" % index_namespace, kwargs=dict(keywords=slugified_keywords)) else: url = reverse("materials:keyword_index", kwargs=dict(keywords=slugified_keywords)) return HttpResponsePermanentRedirect(url) query_string_params = {} filter_values = {} page_title = u"Browse" page_subtitle = u"" breadcrumbs = [{"url": reverse("materials:browse"), "title": u"OER Materials"}] if not format: format = "html" if request.REQUEST.get("feed", None) == "yes": format = "rss" elif request.REQUEST.get("csv", None) == "yes": if not request.user.is_authenticated() or not request.user.is_staff: raise Http404() format = "csv" query = SearchQuerySet().narrow("is_displayed:true") if model: query = query.models(model) path_filter = None hidden_filters = {} for filter_name in PATH_FILTERS: value = locals()[filter_name] if value is not None: filter = FILTERS[filter_name] query = filter.update_query(query, value) path_filter = filter_name if page_subtitle: page_subtitle = u"%s → %s" % (page_subtitle, filter.page_subtitle(value)) else: page_subtitle = filter.page_subtitle(value) filter_values[filter_name] = value visible_filters = [ "search", "general_subjects", "grade_levels", "course_material_types", "media_formats", "cou_bucket", ] if microsite: microsite = Microsite.objects.get(slug=microsite) visible_filters.append("topics") search_query = u"" for filter_name, filter in FILTERS.items(): if filter_name == path_filter: continue value = filter.extract_value(request) if value is not None: query = filter.update_query(query, value) query_string_params = filter.update_query_string_params(query_string_params, value) filter_values[filter_name] = value if filter_name not in visible_filters: hidden_filters[filter.request_name] = value if filter_name == "search": search_query = value if search: if not search_query: if filter_values: return HttpResponsePermanentRedirect( reverse("materials:index") + serialize_query_string_params(query_string_params) ) else: messages.warning(request, u"You should specify the search term") return HttpResponsePermanentRedirect(reverse("materials:advanced_search")) page_title = u"Search Results" page_subtitle = search_query breadcrumbs = [{"url": reverse("materials:search"), "title": page_title}] elif model == CommunityItem: breadcrumbs = [{"url": reverse("materials:community"), "title": u"OER Community"}] if microsite: breadcrumbs = [ { "url": reverse("materials:microsite", kwargs=dict(microsite=microsite.slug)), "title": u"%s Home" % microsite.name, } ] if not page_subtitle and model: page_subtitle = u"Content Type: %s" % model._meta.verbose_name_plural elif not page_subtitle and filter_values: filter_name = filter_values.keys()[0] filter = FILTERS[filter_name] page_subtitle = filter.page_subtitle(filter_values[filter_name]) index_params = IndexParams(request, format, search_query) query_string_params = index_params.update_query_string_params(query_string_params) index_url = request.path + serialize_query_string_params(query_string_params, ignore_params=["batch_start"]) if page_subtitle: index_title = u"%s: %s" % (page_title, page_subtitle) else: index_title = page_title feed_url = request.path + serialize_query_string_params( dict(query_string_params.items() + [("feed", "yes")]), ignore_params=["batch_start"] ) csv_url = request.path + serialize_query_string_params( dict(query_string_params.items() + [("csv", "yes")]), ignore_params=["batch_start"] ) batch_end = index_params.batch_start + index_params.batch_size if len(filter_values) == 1 and "featured" in filter_values: query = query.order_by("-featured_on") elif len(filter_values) == 1 and "evaluated_rubrics" in filter_values: query = query.order_by("-evaluation_score_rubric_%i" % filter_values["evaluated_rubrics"][0]) elif index_params.query_order_by is not None: query = query.order_by(index_params.query_order_by) if index_params.sort_by == "visits" and not filter_values: query = query.narrow("visits:[1 TO *]") items = [] if format == "html": for facet_field in facet_fields: query = query.facet(facet_field) total_items = len(query) if total_items and index_params.batch_start >= total_items: return HttpResponsePermanentRedirect(index_url) results = query[index_params.batch_start : batch_end] for result in results: if result is None: continue items.append(populate_item_from_search_result(result)) pagination = Pagination( request.path, query_string_params, index_params.batch_start, index_params.batch_size, total_items ) facets = query.facet_counts().get("fields", {}) index_filters = build_index_filters(visible_filters, facets, filter_values, path_filter, microsite) all_keywords = query.count() and facets.get("keywords", []) or [] if len(all_keywords) > MAX_TOP_KEYWORDS: top_keywords = get_tag_cloud(dict(all_keywords[:MAX_TOP_KEYWORDS]), 3, 0, 0) all_keywords = get_tag_cloud(dict(all_keywords), 3, 0, 0) else: top_keywords = get_tag_cloud(dict(all_keywords), 3, 0, 0) all_keywords = [] for keyword in top_keywords: name = ( get_name_from_slug(Keyword, keyword["slug"]) or get_name_from_slug(Tag, keyword["slug"]) or keyword["slug"] ) keyword["name"] = name for keyword in all_keywords: name = ( get_name_from_slug(Keyword, keyword["slug"]) or get_name_from_slug(Tag, keyword["slug"]) or keyword["slug"] ) keyword["name"] = name if request.is_ajax(): output = render_to_string("materials/include/index-items.html", RequestContext(request, locals())) data = dict( items=output, first_item_number=pagination.first_item_number, last_item_number=pagination.last_item_number, total_items=pagination.total_items, page_title=unicode(page_title), page_subtitle=page_subtitle and unicode(page_subtitle or u""), ) return JsonResponse(data) return direct_to_template(request, "materials/index.html", locals()) elif format == "rss": results = query[0:20] for result in results: if result is None: continue item = result.get_stored_fields() if item.get("general_subjects"): item["general_subjects"] = [get_name_from_id(GeneralSubject, id) for id in item["general_subjects"]] namespace = getattr(result.model, "namespace", None) if namespace: item["get_absolute_url"] = reverse("materials:%s:view_item" % namespace, kwargs=dict(slug=item["slug"])) else: item["get_absolute_url"] = result.object.get_absolute_url() item["model_verbose_name"] = result.model._meta.verbose_name_plural items.append(item) return direct_to_template(request, "materials/index-rss.xml", locals(), "text/xml") elif format == "json": results = query[index_params.batch_start : batch_end] for result in results: if result is None: continue data = result.get_stored_fields() item = { "id": result.id, "title": data["title"], "abstract": data["abstract"], "url": data["url"], "keywords": data["keywords_names"], "subject": [get_slug_from_id(GeneralSubject, id) for id in (data["general_subjects"] or [])], "grade_level": [get_slug_from_id(GradeLevel, id) for id in (data["grade_levels"] or [])], "collection": data["collection"] and get_name_from_id(Collection, data["collection"]) or None, } items.append(item) return JsonResponse(items) elif format == "xml": query = query.load_all() results = query[index_params.batch_start : batch_end] for result in results: if result is None: continue object = result.object data = result.get_stored_fields() item = {"url": data["url"], "title": data["title"]} if data.get("authors"): item["author"] = data["authors"][0] if data.get("institution"): item["institution"] = get_name_from_id(Institution, data["institution"]) item["abstract"] = data["abstract"] license = object.license item["copyright_holder"] = license.copyright_holder item["license_url"] = license.url item["license_name"] = license.name item["license_description"] = license.description item["license_type"] = license.type item["cou_bucket"] = license.bucket if data["rating"]: item["rating"] = "%.1f" % data["rating"] item["fields"] = [] grade_levels = data.get("grade_levels") if grade_levels: item["fields"].append( dict( title=u"Grade Level", param=FILTERS["grade_levels"].request_name, value=u",".join([get_slug_from_id(GradeLevel, id) for id in grade_levels]), content=u",".join([get_name_from_id(GradeLevel, id) for id in grade_levels]), ) ) general_subjects = data.get("general_subjects") if general_subjects: item["fields"].append( dict( title=u"Subject", param=FILTERS["general_subjects"].request_name, value=u",".join([get_slug_from_id(GeneralSubject, id) for id in general_subjects]), content=u",".join([get_name_from_id(GeneralSubject, id) for id in general_subjects]), ) ) collection = data.get("collection") if collection: item["fields"].append( dict( title=u"Collection", param=FILTERS["collection"].request_name, value=get_slug_from_id(Collection, collection), content=get_name_from_id(Collection, collection), ) ) geographic_relevance = data.get("geographic_relevance") if geographic_relevance: item["fields"].append( dict( title=u"Geographic Regional Relevance", param=FILTERS["geographic_relevance"].request_name, value=u",".join([get_slug_from_id(GeographicRelevance, id) for id in geographic_relevance]), content=u",".join([get_name_from_id(GeographicRelevance, id) for id in geographic_relevance]), ) ) keywords = object.keywords.values("slug", "name") if keywords: item["fields"].append( dict( title=u"Keywords", param=FILTERS["keywords"].request_name, value=u",".join([k["slug"] for k in keywords]), content=u",".join([k["name"] for k in keywords]), ) ) tags = object.tags.values("slug", "name").order_by("slug").distinct() if tags: item["fields"].append( dict( title=u"Tags", param=FILTERS["keywords"].request_name, value=u",".join([k["slug"] for k in tags]), content=u",".join([k["name"] for k in tags]), ) ) items.append(item) return direct_to_template(request, "materials/index-xml.xml", locals(), "text/xml") elif format == "csv": return csv_export(query, index_title)