def _create_parasha_topic_story(parasha_obj, mustHave=None, **kwargs): from sefaria.model.topic import get_topics from sefaria.utils.util import titlecase from sefaria.utils.calendars import make_parashah_response_from_calendar_entry page = iteration - 1 topics = get_topics() parasha = text.Term.normalize(titlecase(parasha_obj["parasha"])) topic = topics.get(parasha) related_topics = [ t for t, x in topic.related_topics[page * k:page * k + k] if x > 1 ] if len(related_topics) < k: return cal = make_parashah_response_from_calendar_entry(parasha_obj)[0] cls.generate_story(topics=related_topics, title={ "en": "Topics in " + cal["displayValue"]["en"], "he": u"נושאים ב" + cal["displayValue"]["he"] }, lead={ "en": "Weekly Torah Portion", "he": u'פרשת השבוע' }, mustHave=mustHave or [], **kwargs).save()
def normalize_new_topic_title(title): ALLOWED_HASHTAGS = ("#MeToo") if title not in ALLOWED_HASHTAGS: title = title.replace("#", "") # replace | with - b/c | is a reserved char for search sheet queries when filtering on tags title = titlecase(title).replace('|', '-') return title
def update_sheet_tags(sheet_id, tags): """ Sets the tag list for sheet_id to those listed in list 'tags'. """ tags = list(set(tags)) # tags list should be unique normalizedTags = [titlecase(tag) for tag in tags] db.sheets.update({"id": sheet_id}, {"$set": {"tags": normalizedTags}}) return {"status": "ok"}
def update_sheet_tags(sheet_id, tags): """ Sets the tag list for sheet_id to those listed in list 'tags'. """ tags = list(set(tags)) # tags list should be unique normalizedTags = [titlecase(tag) for tag in tags] db.sheets.update({"id": sheet_id}, {"$set": {"tags": normalizedTags}}) return {"status": "ok"}
def update_sheet_tags(sheet_id, tags): """ Sets the tag list for sheet_id to those listed in list 'tags'. """ tags = list(set(tags)) # tags list should be unique # replace | with - b/c | is a reserved char for search sheet queries when filtering on tags normalizedTags = [titlecase(tag).replace('|', '-') for tag in tags] db.sheets.update({"id": sheet_id}, {"$set": {"tags": normalizedTags}}) return {"status": "ok"}
def update_sheet_tags(sheet_id, tags): """ Sets the tag list for sheet_id to those listed in list 'tags'. """ tags = list(set(tags)) # tags list should be unique # replace | with - b/c | is a reserved char for search sheet queries when filtering on tags normalizedTags = [titlecase(tag).replace('|','-') for tag in tags] db.sheets.update({"id": sheet_id}, {"$set": {"tags": normalizedTags}}) return {"status": "ok"}
def normalize_tag(tag): """ Returns an array of `tags` that `tag` normalizes to, which may be empty or multiple """ if tag in FIXED_SUBS: return [FIXED_SUBS[tag]] tag = re.sub('#(\d+)', "\g<1>", tag) # replace hash'd numbers ("#1" with just numbers) tag = tag.replace("#", ",") # remove # and treat as separator if tag.startswith("http"): # scrub URLS return [] if re.search('^\w+([\.-]?\w+)*@\w+([\.-]?\w+)*(\.\w{2,3})+$', tag): # scrub email addresses return [] tags = tag.split(",") tags = [tag for tag in tags if len(tag)] tags = [titlecase(tag.strip()) for tag in tags] tags = list(set(tags)) return tags
en_term = min(he_terms[main_he]) for secondary_term in [r.decode("utf-8") for r in row[1:] if r]: he_terms[secondary_term].add(en_term) """ # Load proto-terms from English variants sheet # For each # - If any of these terms, or their Hebrew synonyms, exist - add to existing term # - else create new proto-term with open(en_file, 'rb') as tfile: next(tfile) rows = csv.reader(tfile) for row in rows: (he_name, count, en_primary, en_names) = (h.unescape(row[0].decode("utf-8")), int(row[1]), titlecase(row[2]), list(set([titlecase(x) for x in row[3:] if x]))) if replacement_dict.get(en_primary): if isinstance(replacement_dict.get(en_primary), list): continue en_primary = replacement_dict.get(en_primary) all_names = [he_name] + [en_primary] + en_names + he_synonyms.get( he_name, []) already_used = [ name_to_term_map.get(x) for x in all_names if name_to_term_map.get(x) ] if len(set(already_used)) >= 2: print("Bridged Terms: {} {}".format( en_primary, [t.get_primary_title("en") for t in already_used]))
from sefaria.system.database import db sheets = db.sheets.find({"tags": {"$exists": "true"}}) for sheet in sheets: olddoc = sheet newdoc = {} normTags = [] oldTags = olddoc["tags"] for tag in oldTags: if "," in tag: commaSeparatedTags = tag.split(',') for commaSeparatedTag in commaSeparatedTags: normTags.append(titlecase(commaSeparatedTag.strip())) else: normTags.append(titlecase(tag.strip())) newdoc = olddoc normTags = list(set(normTags)) # tags list should be unique newdoc["tags"] = normTags print(newdoc["id"]) print(olddoc["tags"]) print(newdoc["tags"]) print("-------") # print newdoc
""" en_term = min(he_terms[main_he]) for secondary_term in [r.decode("utf-8") for r in row[1:] if r]: he_terms[secondary_term].add(en_term) """ # Load proto-terms from English variants sheet # For each # - If any of these terms, or their Hebrew synonyms, exist - add to existing term # - else create new proto-term with open(en_file, 'rb') as tfile: next(tfile) rows = csv.reader(tfile) for row in rows: (he_name, count, en_primary, en_names) = (h.unescape(row[0].decode("utf-8")), int(row[1]), titlecase(row[2]), list(set([titlecase(x) for x in row[3:] if x]))) if replacement_dict.get(en_primary): if isinstance(replacement_dict.get(en_primary), list): continue en_primary = replacement_dict.get(en_primary) all_names = [he_name] + [en_primary] + en_names + he_synonyms.get(he_name, []) already_used = [name_to_term_map.get(x) for x in all_names if name_to_term_map.get(x)] if len(set(already_used)) >= 2: print u"Bridged Terms: {} {}".format(en_primary, [t.get_primary_title("en") for t in already_used]) elif len(set(already_used)) == 1: existing_term = already_used[0] assert isinstance(existing_term, Term) if not existing_term.has_title(he_name, "he"): existing_term.add_title(he_name, "he")
from sefaria.system.database import db sheets = db.sheets.find({"tags": { "$exists": "true" } }) for sheet in sheets: olddoc = sheet; newdoc = {}; normTags = []; oldTags = olddoc["tags"]; for tag in oldTags: if "," in tag: commaSeparatedTags = tag.split(',') for commaSeparatedTag in commaSeparatedTags: normTags.append(titlecase(commaSeparatedTag.strip())) else: normTags.append(titlecase(tag)) newdoc = olddoc normTags = list(set(normTags)) # tags list should be unique newdoc["tags"] = normTags print newdoc["id"] print olddoc["tags"] print newdoc["tags"] print "-------"