def bulktext_api(request, refs): """ Used by the linker. :param request: :param refs: :return: """ if request.method == "GET": cb = request.GET.get("callback", None) useTextFamily = request.GET.get("useTextFamily", None) refs = set(refs.split("|")) res = {} for tref in refs: try: oref = model.Ref(tref) lang = "he" if is_hebrew(tref) else "en" if useTextFamily: text_fam = model.TextFamily(oref, commentary=0, context=0, pad=False) he = text_fam.he en = text_fam.text res[tref] = { 'he': he, 'en': en, 'lang': lang, 'ref': oref.normal(), 'primary_category': text_fam.contents()['primary_category'], 'heRef': oref.he_normal(), 'url': oref.url() } else: he = model.TextChunk(oref, "he").text en = model.TextChunk(oref, "en").text res[tref] = { 'he': he if isinstance(he, basestring) else JaggedTextArray(he).flatten_to_string( ), # these could be flattened on the client, if need be. 'en': en if isinstance(en, basestring) else JaggedTextArray(en).flatten_to_string(), 'lang': lang, 'ref': oref.normal(), 'heRef': oref.he_normal(), 'url': oref.url() } except (InputError, ValueError, AttributeError, KeyError) as e: # referer = request.META.get("HTTP_REFERER", "unknown page") # This chatter fills up the logs. todo: put in it's own file # logger.warning(u"Linker failed to parse {} from {} : {}".format(tref, referer, e)) res[tref] = {"error": 1} resp = jsonResponse(res, cb) return resp
def bundle_many_texts(refs, useTextFamily=False, as_sized_string=False, min_char=None, max_char=None): res = {} for tref in refs: try: oref = model.Ref(tref) lang = "he" if is_hebrew(tref) else "en" if useTextFamily: text_fam = model.TextFamily(oref, commentary=0, context=0, pad=False) he = text_fam.he en = text_fam.text res[tref] = { 'he': he, 'en': en, 'lang': lang, 'ref': oref.normal(), 'primary_category': text_fam.contents()['primary_category'], 'heRef': oref.he_normal(), 'url': oref.url() } else: he_tc = model.TextChunk(oref, "he") en_tc = model.TextChunk(oref, "en") if as_sized_string: kwargs = {} if min_char: kwargs['min_char'] = min_char if max_char: kwargs['max_char'] = max_char he_text = he_tc.as_sized_string(**kwargs) en_text = en_tc.as_sized_string(**kwargs) else: he = he_tc.text en = en_tc.text # these could be flattened on the client, if need be. he_text = he if isinstance(he, str) else JaggedTextArray(he).flatten_to_string() en_text = en if isinstance(en, str) else JaggedTextArray(en).flatten_to_string() res[tref] = { 'he': he_text, 'en': en_text, 'lang': lang, 'ref': oref.normal(), 'heRef': oref.he_normal(), 'url': oref.url() } except (InputError, ValueError, AttributeError, KeyError) as e: # referer = request.META.get("HTTP_REFERER", "unknown page") # This chatter fills up the logs. todo: put in it's own file # logger.warning(u"Linker failed to parse {} from {} : {}".format(tref, referer, e)) res[tref] = {"error": 1} return res
def section_data(oref, defaultVersions): """ :param defaultVersions dict: {'en': Version, 'he': Version} Returns a dictionary with all the data we care about for section level `oref`. """ tf = model.TextFamily(oref, version=None, lang=None, commentary=0, context=0, pad=0, alts=False) text = tf.contents() data = { "ref": text["ref"], "heRef": text["heRef"], "indexTitle": text["indexTitle"], "heTitle": text["heTitle"], "sectionRef": text["sectionRef"], "next": oref.next_section_ref().normal() if oref.next_section_ref() else None, "prev": oref.prev_section_ref().normal() if oref.prev_section_ref() else None, "content": [], } def get_version_title(chunk): if not chunk.is_merged: version = chunk.version() if version and version.language in defaultVersions and version.versionTitle != defaultVersions[ version.language].versionTitle: #print "VERSION NOT DEFAULT {} ({})".format(oref, chunk.lang) try: vnotes = version.versionNotes except AttributeError: vnotes = None try: vlicense = version.license except AttributeError: vlicense = None try: vsource = version.versionSource except AttributeError: vsource = None return version.versionTitle, vnotes, vlicense, vsource else: return None, None, None, None # default version else: #merged #print "MERGED SECTION {} ({})".format(oref, chunk.lang) all_versions = set(chunk.sources) merged_version = u'Merged from {}'.format(u', '.join(all_versions)) return merged_version, None, None, None en_vtitle, en_vnotes, en_vlicense, en_vsource = get_version_title( tf._chunks['en']) he_vtitle, he_vnotes, he_vlicense, he_vsource = get_version_title( tf._chunks['he']) if en_vtitle: data['versionTitle'] = en_vtitle if he_vtitle: data['heVersionTitle'] = he_vtitle if en_vnotes: data['versionNotes'] = en_vnotes if he_vnotes: data['heVersionNotes'] = he_vnotes if en_vlicense: data['license'] = en_vlicense if he_vlicense: data['heLicense'] = he_vlicense if en_vsource: data['versionSource'] = en_vsource if he_vsource: data['heVersionSource'] = he_vsource en_len = len(text["text"]) he_len = len(text["he"]) for x in xrange(0, max([en_len, he_len])): curContent = {} curContent["segmentNumber"] = str(x + 1) links = get_links(text["ref"] + ":" + curContent["segmentNumber"], False) print links if len(links) > 0: curContent["links"] = [simple_link(link) for link in links] if x < en_len: curContent["text"] = text["text"][x] if x < he_len: curContent["he"] = text["he"][x] data["content"] += [curContent] return data
def section_data(oref, defaultVersions): """ :param defaultVersions dict: {'en': Version, 'he': Version} Returns a dictionary with all the data we care about for section level `oref`. """ tf = model.TextFamily(oref, version=None, lang=None, commentary=0, context=0, pad=0, alts=False, stripItags=True) text = tf.contents() data = { "ref": text["ref"], "heRef": text["heRef"], "indexTitle": text["indexTitle"], "heTitle": text["heTitle"], "sectionRef": text["sectionRef"], "next": oref.next_section_ref().normal() if oref.next_section_ref() else None, "prev": oref.prev_section_ref().normal() if oref.prev_section_ref() else None, "content": [], } def get_version_title(chunk): if not chunk.is_merged: version = chunk.version() if version and version.language in defaultVersions and version.versionTitle != defaultVersions[ version.language].versionTitle: #print "VERSION NOT DEFAULT {} ({})".format(oref, chunk.lang) try: vnotes = version.versionNotes except AttributeError: vnotes = None try: vlicense = version.license except AttributeError: vlicense = None try: vsource = version.versionSource except AttributeError: vsource = None try: vnotesInHebrew = version.versionNotesInHebrew except AttributeError: vnotesInHebrew = None try: versionTitleInHebrew = version.versionTitleInHebrew except AttributeError: versionTitleInHebrew = None return version.versionTitle, vnotes, vlicense, vsource, versionTitleInHebrew, vnotesInHebrew else: return None, None, None, None, None, None # default version else: #merged #print "MERGED SECTION {} ({})".format(oref, chunk.lang) all_versions = set(chunk.sources) merged_version = 'Merged from {}'.format(', '.join(all_versions)) return merged_version, None, None, None, None, None en_vtitle, en_vnotes, en_vlicense, en_vsource, en_vtitle_he, en_vnotes_he = get_version_title( tf._chunks['en']) he_vtitle, he_vnotes, he_vlicense, he_vsource, he_vtitle_he, he_vnotes_he = get_version_title( tf._chunks['he']) if en_vtitle: data['versionTitle'] = en_vtitle if he_vtitle: data['heVersionTitle'] = he_vtitle if en_vnotes: data['versionNotes'] = en_vnotes if he_vnotes: data['heVersionNotes'] = he_vnotes if en_vlicense: data['license'] = en_vlicense if he_vlicense: data['heLicense'] = he_vlicense if en_vsource: data['versionSource'] = en_vsource if he_vsource: data['heVersionSource'] = he_vsource if en_vtitle_he: data['versionTitleInHebrew'] = en_vtitle_he if he_vtitle_he: data['heVersionTitleInHebrew'] = he_vtitle_he if en_vnotes_he: data['versionNotesInHebrew'] = en_vnotes_he if he_vnotes_he: data['heVersionNotesInHebrew'] = he_vnotes_he en_len = len(text["text"]) he_len = len(text["he"]) section_links = get_links(text["ref"], False) anchor_ref_dict = defaultdict(list) for link in section_links: anchor_oref = model.Ref(link["anchorRef"]) if not anchor_oref.is_segment_level() or len( anchor_oref.sections) == 0: continue # don't bother with section level links start_seg_num = anchor_oref.sections[-1] # make sure sections are the same in range # TODO doesn't deal with links that span sections end_seg_num = anchor_oref.toSections[-1] if anchor_oref.sections[ 0] == anchor_oref.toSections[0] else max(en_len, he_len) for x in range(start_seg_num, end_seg_num + 1): anchor_ref_dict[x] += [simple_link(link)] for x in range(0, max(en_len, he_len)): curContent = {} curContent["segmentNumber"] = str(x + 1) links = anchor_ref_dict[x + 1] if len(links) > 0: curContent["links"] = links if x < en_len: curContent["text"] = text["text"][x] if x < he_len: curContent["he"] = text["he"][x] data["content"] += [curContent] return data