Python has_cantillation Exemples, sefaria.utils.hebrew.has_cantillation Python Exemples

Exemple #1

0

Afficher le fichier

 def body(self):
     self.browse_to_ref("Shabbat 2b")
     assert not has_cantillation(self.get_nth_section_hebrew(1).text)
     assert not has_cantillation(self.get_nth_section_hebrew(1).text, False)
     self.toggle_on_text_settings()
     assert not self.is_aliyot_toggleSet_displayed()
     assert not self.is_vocalization_toggleSet_displayed()
     self.toggle_language_bilingual()
     self.browse_to_ref("Joshua 2")
     self.toggle_on_text_settings()
     assert not self.is_aliyot_toggleSet_displayed()
     assert self.is_vocalization_toggleSet_displayed()
     self.browse_to_ref("Genesis 1")
     self.toggle_on_text_settings()
     assert self.is_aliyot_toggleSet_displayed()
     assert self.is_vocalization_toggleSet_displayed()

Exemple #2

0

Afficher le fichier

Fichier : lexicon.py Projet : Sefaria/Sefaria-Project

    def _single_lookup(cls, input_word, lookup_key='form', **kwargs):
        from sefaria.utils.hebrew import is_hebrew, strip_cantillation, has_cantillation
        from sefaria.model import Ref

        lookup_ref = kwargs.get("lookup_ref", None)
        wform_pkey = lookup_key
        if is_hebrew(input_word):
            input_word = strip_cantillation(input_word)
            if not has_cantillation(input_word, detect_vowels=True):
                wform_pkey = 'c_form'
        query_obj = {wform_pkey: input_word}
        if lookup_ref:
            nref = Ref(lookup_ref).normal()
            query_obj["refs"] = {'$regex': '^{}'.format(nref)}
        forms = WordFormSet(query_obj)
        if lookup_ref and forms.count() == 0:
            del query_obj["refs"]
            forms = WordFormSet(query_obj)
        if forms.count() > 0:
            result = []
            headword_query = []
            for form in forms:
                for lookup in form.lookups:
                    headword_query.append({'headword': lookup['headword']})
                    # TODO: if we want the 'lookups' in wf to be a dict we can pass as is to the lexiconentry, we need to change the key 'lexicon' to 'parent_lexicon' in word forms
            return headword_query
        else:
            return []

Exemple #3

0

Afficher le fichier

Fichier : lexicon.py Projet : roguewraith/Sefaria-Project

    def _single_lookup(cls, input_word, lookup_key='form', **kwargs):
        from sefaria.utils.hebrew import is_hebrew, strip_cantillation, has_cantillation
        from sefaria.model import Ref

        lookup_ref = kwargs.get("lookup_ref", None)
        wform_pkey = lookup_key
        if is_hebrew(input_word):
            input_word = strip_cantillation(input_word)
            if not has_cantillation(input_word, detect_vowels=True):
                wform_pkey = 'c_form'
        query_obj = {wform_pkey: input_word}
        if lookup_ref:
            nref = Ref(lookup_ref).normal()
            query_obj["refs"] = {'$regex': '^{}'.format(nref)}
        forms = WordFormSet(query_obj)
        if lookup_ref and forms.count() == 0:
            del query_obj["refs"]
            forms = WordFormSet(query_obj)
        if forms.count() > 0:
            result = []
            headword_query = []
            for form in forms:
                for lookup in form.lookups:
                    headword_query.append({'headword': lookup['headword']})
                    # TODO: if we want the 'lookups' in wf to be a dict we can pass as is to the lexiconentry, we need to change the key 'lexicon' to 'parent_lexicon' in word forms
            return headword_query
        else:
            return []

Exemple #4

0

Afficher le fichier

Fichier : basic_tests.py Projet : redrover9/Sefaria-Project

 def body(self):
     # changed to a book that should NEVER get cantillation
     self.browse_to_ref("Introductions to the Babylonian Talmud, Berakhot, Introduction to Berakhot")
     assert not has_cantillation(self.get_nth_section_hebrew(1).text)
     assert not has_cantillation(self.get_nth_section_hebrew(1).text, False)
     self.toggle_on_text_settings()
     assert not self.is_aliyot_toggleSet_displayed()
     assert not self.is_vocalization_toggleSet_displayed()
     self.toggle_language_bilingual()
     self.browse_to_ref("Joshua 2")
     self.toggle_on_text_settings()
     assert not self.is_aliyot_toggleSet_displayed()
     assert self.is_vocalization_toggleSet_displayed()
     self.browse_to_ref("Genesis 1")
     self.toggle_on_text_settings()
     assert self.is_aliyot_toggleSet_displayed()
     assert self.is_vocalization_toggleSet_displayed()

Exemple #5

0

Afficher le fichier

    def body(self):
        self.browse_to_ref("Darashos HaRan 1")
        assert not has_cantillation(self.get_nth_section_hebrew(1).text)
        assert not has_cantillation(self.get_nth_section_hebrew(1).text, False)
        self.toggle_on_text_settings()
        assert not self.is_aliyot_toggleSet_displayed()
        assert not self.is_vocalization_toggleSet_displayed()

        self.browse_to_ref("Berakhot 2b")
        self.toggle_on_text_settings()
        assert not self.is_aliyot_toggleSet_displayed()
        assert self.is_vocalization_toggleSet_displayed()

        self.browse_to_ref("Joshua 2")
        self.toggle_on_text_settings()
        assert not self.is_aliyot_toggleSet_displayed()
        assert self.is_vocalization_toggleSet_displayed()

        self.browse_to_ref("Genesis 1")
        self.toggle_on_text_settings()
        assert self.is_aliyot_toggleSet_displayed()
        assert self.is_vocalization_toggleSet_displayed()

Exemple #6

0

Afficher le fichier

Fichier : lexicon.py Projet : YisraelV/Sefaria-Project

    def get_word_form_objects(cls, input_word, lookup_key='form', **kwargs):
        from sefaria.utils.hebrew import is_hebrew, strip_cantillation, has_cantillation
        from sefaria.model import Ref

        lookup_ref = kwargs.get("lookup_ref", None)
        wform_pkey = lookup_key
        if is_hebrew(input_word):
            input_word = strip_cantillation(input_word)
            if not has_cantillation(input_word, detect_vowels=True):
                wform_pkey = 'c_form'
        query_obj = {wform_pkey: input_word}
        if lookup_ref:
            nref = Ref(lookup_ref).normal()
            query_obj["refs"] = {'$regex': '^{}'.format(nref)}
        forms = WordFormSet(query_obj)
        if lookup_ref and len(forms) == 0:
            del query_obj["refs"]
            forms = WordFormSet(query_obj)
        return forms

Exemple #7

0

Afficher le fichier

Fichier : lexicon.py Projet : aquiandres/Sefaria-Project

    def get_word_form_objects(cls, input_word, lookup_key='form', **kwargs):
        from sefaria.model import Ref

        lookup_ref = kwargs.get("lookup_ref", None)
        wform_pkey = lookup_key
        if is_hebrew(input_word):
            # This step technically used to happen in the lookup main method `lexicon_lookup` if there were no initial results, but in case where a
            # consonantal form was supplied in the first place, this optimizes queries.
            input_word = strip_cantillation(input_word)
            if not has_cantillation(input_word, detect_vowels=True):
                wform_pkey = 'c_form'
        query_obj = {wform_pkey: input_word}
        if lookup_ref:
            nref = Ref(lookup_ref).normal()
            query_obj["refs"] = {'$regex': '^{}'.format(nref)}
        forms = WordFormSet(query_obj)
        if lookup_ref and len(forms) == 0:
            del query_obj["refs"]
            forms = WordFormSet(query_obj)
        return forms

Exemple #8

0

Afficher le fichier

Fichier : main.py Projet : nyulacska/Sefaria-Data

def get_midrashic_text(text):
    """
    Given text, removes commentary text and returns midrashic text
    If it determines text is all commentary, returns empty string
    :param text:
    :return:
    """
    dash = "—"
    words = text.split()
    if len(words) == 0:
        return "", ""
    midrash_words = 0.0
    filler_words = 0.0
    curr_non_midrash_start = None
    curr_midrash_start = None
    in_paren = False
    potential_non_midrash_span = None
    non_midrash_spans = []
    for i, w in enumerate(words):
        if "(" in w:
            in_paren = True
        if in_paren or re.match(r"^[^\u05d0-\u05ea]+$", w):
            if w.strip() == dash and curr_midrash_start is not None:
                potential_non_midrash_span = (curr_midrash_start, i + 1)
            filler_words += 1
        elif has_cantillation(w,
                              detect_vowels=True) or is_filler_midrash_word(w):
            if curr_non_midrash_start is not None:
                non_midrash_spans += [(curr_non_midrash_start, i)]
            curr_non_midrash_start = None
            potential_non_midrash_span = None
            if curr_midrash_start is None:
                curr_midrash_start = i
            midrash_words += 1
        else:
            curr_midrash_start = None
            if curr_non_midrash_start is None:
                if potential_non_midrash_span is not None:
                    potential_non_midrash_span_len = potential_non_midrash_span[
                        1] - potential_non_midrash_span[0]
                    if potential_non_midrash_span_len <= 8:
                        non_midrash_spans += [potential_non_midrash_span]
                    potential_non_midrash_span = None
                curr_non_midrash_start = i
        if ")" in w:
            in_paren = False
        if re.search(r"\.\s*$", w) and not re.search(r"\.\.\.\s*$", w):
            # period means end of potential non midrash span
            curr_midrash_start = None
    if curr_non_midrash_start is not None:
        non_midrash_spans += [(curr_non_midrash_start, len(words))]
    actual_len = len(words) - filler_words
    non_midrash_words = reduce(lambda a, b: a + (b[1] - b[0]),
                               non_midrash_spans, 0)
    cutoff = 0.7 if actual_len < 20 else 0.8
    if actual_len <= 0:
        return "", text
    if (non_midrash_words / actual_len) > cutoff:
        return "", text
    if len(non_midrash_spans) == 0:
        return text, ""
    midrashic_text = ""
    commentary_text = ""
    last_end = 0
    for s, e in non_midrash_spans:
        midrashic_text += " ".join(words[last_end:s])
        commentary_text += " ".join(words[s:e])
        last_end = e
    midrashic_text += " ".join(words[last_end:])
    return midrashic_text, commentary_text

Exemple #9

0

Afficher le fichier

def create_node_set():
    aspaklaria_nodes = read_csv("aspaklaria_nodes.csv")  # DONE
    final_topic_names = read_csv("final_topic_names.csv")  # DONE
    new_topics_edges = read_csv("new_topics_edges.csv")  # DONE
    upper_level_nodes = read_csv("upper_level_nodes.csv")  # DONE
    tanakh_matched = read_csv("tanakh_matched.csv")  # DONE
    tanakh_unmatched = read_csv("tanakh_unmatched.csv")  # DONE
    tanakh_edges = read_csv("tanakh_edges.csv")  # DONE
    edge_types = read_csv("edge_types.csv")  # DONE
    sefer_haagada = read_csv("sefer_haagada.csv")  # DONE
    talmud_matched = read_csv("talmud_matched.csv")  # DONE
    talmud_unmatched = read_csv("talmud_unmatched.csv")  # DONE
    talmud_edges = read_csv("talmud_edges.csv")  # DONE
    source_sheets = read_csv("source_sheets.csv")  # DONE
    source_sheets_dedup = read_csv("source_sheets_dedup.csv")  # DONE
    halachic_edges = read_csv("halachic_edges.csv")  # DONE

    node_set = NodeSet()
    Node.node_set = node_set
    print("START UPPER LEVEL")
    # UPPER LEVEL
    for row in upper_level_nodes:
        nid = row["Node"].lower()
        try:
            # already exists. just add isa edge
            n = node_set[nid]
        except KeyError:
            n = Node(nid, row["Node"], bfo_id=row["BFO ID"])
        if len(row["isa"]) > 0:
            n.add_edge("is a", row["isa"].lower())
        node_set[nid] = n

    print("START EDGE TYPES")
    # EDGE TYPES
    for row in edge_types:
        if len(row["Edge Inverse"]) > 0:
            edge_types_dict[row["Edge"]] = row["Edge Inverse"]
            edge_types_dict[row["Edge Inverse"]] = row["Edge"]
            if row["Edge"] != row["Edge Inverse"]:
                inverse_edge_set.add(row["Edge Inverse"])

    print("START ASPAKLARIA")
    # ASPAKLARIA
    overwritten_cats = set()
    for row in aspaklaria_nodes:
        n = Node(row["Topic"],
                 according_to=(row["According to"]
                               if len(row["According to"]) else None))
        is_cat = row['Is Category']
        if len(is_cat) > 0:
            if is_cat in overwritten_cats:
                print(
                    "Already overwrote {} to {}. now overwriting to {}".format(
                        is_cat, node_set[is_cat].id, n.id))
            node_set[is_cat] = n  # reroute to n
            overwritten_cats.add(is_cat)
        else:
            isa = row["Is A Type Of"] if row[
                "Is A Type Of"] not in upper_level_mapping else upper_level_mapping[
                    row["Is A Type Of"]]
            n.add_edge("is a", isa)
            if len(row["Is a Type Of (2)"]) > 0:
                isa2 = row["Is a Type Of (2)"] if row[
                    "Is a Type Of (2)"] not in upper_level_mapping else upper_level_mapping[
                        row["Is a Type Of (2)"]]
                n.add_edge("is a", isa2)
        node_set[row["Topic"]] = n

    print("START EDGES")
    # EDGES
    for row in new_topics_edges:
        if len(row["Topic"]) == 0 or len(row["Has Edge"]) == 0 or len(
                row["To Topic (Actual)"]) == 0:
            continue
        try:
            n = node_set[row["Topic"]]
        except KeyError:
            print("KeyError: {}".format(row["Topic"]))
            continue
        temp_edge = row["Has Edge"]
        if temp_edge == "alternate spelling of":
            # merge both nodes. usually `to topic` is the main topic
            try:
                m = node_set[row["To Topic (Actual)"]]
                m.alt_spell_id = n.id
                node_set[row["Topic"]] = m  # reroute to m from now on
            except KeyError:
                print("Alt Spelling Key Error", row["To Topic (Actual)"])
        else:
            n.add_edge(row["Has Edge"], row["To Topic (Actual)"])

    print("START TANAKH UNMATCHED")
    # TANAKH UNMATCHED
    for row in tanakh_unmatched:
        wid = re.findall(r"Q\d+$", row["URL"])[0]
        n = Node(wid, row["English Name"], row["Hebrew Name"], wikidata_id=wid)
        n.add_edge("is a", "biblical person")
        node_set[wid] = n

    print("START TALMUD UNMATCHED")
    # TALMUD UNMATCHED
    for row in talmud_unmatched:
        jeLink = row["jeLink"] if len(row["jeLink"]) > 0 else None
        heWikiLink = row["heWikiLink"] if len(row["heWikiLink"]) > 0 else None
        enWikiLink = row["enWikiLink"] if len(row["enWikiLink"]) > 0 else None

        n = Node(row["English Name"],
                 row["English Name"],
                 row["Hebrew Name"],
                 generation=row["generation"],
                 jeLink=jeLink,
                 heWikiLink=heWikiLink,
                 enWikiLink=enWikiLink)
        n.add_edge(
            "is a", "mishnaic person"
            if row["Time Period"] == 'mishnah' else "talmudic person")
        node_set[row["English Name"]] = n
        node_set.items_by_talmud_name[row["English Name"]] = n

    print("START RAMBAM")
    # RAMBAM
    with codecs.open(u"{}/../rambam/rambam_topic_hierarchy.json".format(ROOT),
                     "rb",
                     encoding="utf8") as fin:
        rambam = json.load(fin)
        for row in rambam:
            rid = "RAMBAM|{}".format(row["en"])
            n = Node(rid, row["en"])
            for p in row["parents"]:
                n.add_edge("is a", "RAMBAM|{}".format(p))
            if len(row["parents"]) == 0:
                n.add_edge("is a", "halacha")
            node_set[rid] = n

    print("START SEFER HAAGADA MATCHED")
    # SEFER HAAGADA MATCHED
    for row in sefer_haagada:
        if len(row["Aspaklaria Topic"].strip()) > 0:
            n = node_set[row["Aspaklaria Topic"]]
            if len(row['synonym']) > 0:
                n.alt_he.add(row["Topic Name"])
            n.sefer_haagada_name = row["Topic Name"]
            node_set.items_by_sefer_haagada_name[row["Topic Name"]] = n

    print("START TOPIC NAMES")
    # TOPIC NAMES
    for irow, row in enumerate(final_topic_names):
        if irow >= RAMBAM_ROW_INDEX and len(
                row["English description"].strip()) > 0:
            n = node_set["RAMBAM|{}".format(
                row["English description"].strip())]
        elif irow >= RAMBAM_ROW_INDEX and has_cantillation(row["Topic"],
                                                           detect_vowels=True):
            # Sefer Haagada
            n = Node(row["Topic"])
            n.sefer_haagada_name = row["Topic"]
            n.add_edge("is a", row["Is A Type Of"])
            if len(row["Is a Type Of (2)"].strip()) > 0:
                n.add_edge("is a", row["Is a Type Of (2)"])
            node_set.items_by_sefer_haagada_name[row["Topic"]] = n
            node_set[row["Topic"]] = n
        else:
            try:
                n = node_set[row["Topic"]]
            except KeyError:
                continue
        description = u""
        final_english = row["Final English Translation"]
        if len(row["Is Paren Good Description"]) > 0:
            match = re.search(r"^(.*)\(([^)]+)\)\s*$", final_english)
            final_english = match.group(1).strip()
            description = match.group(2)
        if len(row["According to:"]) > 0:
            if len(description) > 0:
                description += u". "
            description += u"Translated according to {}".format(
                row["According to:"])
        n.en_name = final_english
        n.description = description
        n.en_transliteration = row["Final English Transliteration"] if len(
            row["Final English Transliteration"]) else None
        temp_he = row["Final Topic Name"].strip()
        if len(n.he_name) == 0:
            n.he_name = temp_he
        elif temp_he != n.he_name and len(temp_he) > 0:  # alt title
            print("Adding alt he {} to {}".format(temp_he, n.he_name))
            n.alt_he.add(temp_he)
    print("START TANAKH MATCHED")
    # TANAKH MATCHED
    for row in tanakh_matched:
        if len(row["Match Name"]) > 0:
            n = node_set[row["Name"]]
            alt_he = row["Match Name"]
            alt_en = row["Match En Name"]
            if len(n.en_name) == 0:
                n.en_name = alt_en
            else:
                n.alt_en.add(alt_en)
            if len(n.he_name) == 0:
                n.he_name = alt_he
            else:
                n.alt_he.add(alt_he)
            n.wikidata_id = row["Match ID"]
            node_set.items_by_wid[n.wikidata_id] = n

    print("START TALMUD MATCHED")
    # TALMUD MATCHED
    for row in talmud_matched:
        if len(row["Match Name En"]) > 0:
            n = node_set[row["Name"]]
            alt_he = row["Match Name 1"]
            alt_en = row["Match Name En"]
            if len(n.en_name) == 0 and alt_en != n.en_transliteration:
                n.en_name = alt_en
            elif alt_en != n.en_name and alt_en != n.en_transliteration:
                n.alt_en.add(alt_en)
            if len(n.he_name) == 0:
                n.he_name = alt_he
            elif alt_he != n.he_name:
                n.alt_he.add(alt_he)
            try:
                yo = node_set[row["Match Name En"]]
                print("{} EXISTS!!".format(row["Match Name En"]))
            except KeyError:
                pass
            node_set.items_by_talmud_name[row["Match Name En"]] = n

    print("START TANAKH EDGES")
    # TANAKH EDGES
    male_female_dict = {"female": u"נקבה", "male": u"זכר"}
    # manually add king of israel / judah which are relevant to tanakh edges
    n = Node(u"מלך יהודה", "King of Judah", u"מלך יהודה")
    n.add_edge("is a", u"מלך מלכות")
    node_set[u"מלך יהודה"] = n
    n = Node(u"מלך ישראל", "King of Israel", u"מלך ישראל")
    n.add_edge("is a", u"מלך מלכות")
    node_set[u"מלך ישראל"] = n
    for row in tanakh_edges:
        try:
            n = node_set.get_by_wid(row["ID"])
        except KeyError:
            # for some reason doesn't exist yet. create it
            n = Node(row["ID"],
                     row["Name"],
                     row["He Name"],
                     wikidata_id=row["ID"])
            node_set[row["ID"]] = n
    for row in tanakh_edges:
        try:
            n = node_set.get_by_wid(row["Value ID"])
        except KeyError:
            # for some reason doesn't exist yet. create it
            n = Node(row["Value ID"],
                     row["Value"],
                     wikidata_id=row["Value ID"])
            node_set[row["Value ID"]] = n
            print("Created Value {}".format(row["Value ID"]))
    for row in tanakh_edges:
        n = node_set.get_by_wid(row["ID"])
        value = row["Value"]
        if row["Edge"] == "alternate spelling of":
            # just add the alt title
            if len(n.he_name) == 0:
                n.he_name = value
            else:
                n.alt_he.add(value)
        elif row["Edge"] == "has transliteration":
            if len(n.en_name) == 0:
                n.en_name = value
            else:
                n.alt_en.add(value)
        else:
            if value in male_female_dict:
                to_node_id = male_female_dict[value]
            else:
                if len(row["Value ID"]) == 0:
                    to_node_id = value
                else:
                    try:
                        to_node_id = node_set.get_by_wid(row["Value ID"]).id
                    except KeyError:
                        print(row["Value ID"])
                        continue
            n.add_edge(row["Edge"], to_node_id)

    print("START TALMUD EDGES")
    # TALMUD EDGES
    for row in talmud_edges:
        try:
            n = node_set.get_by_talmud_name(row["Name"])
        except KeyError:
            person = Person().load({"key": row["Name"]})

            n = Node(row["Name"],
                     row["Name"],
                     person.primary_name('he'),
                     jeLink=getattr(person, 'jeLink', None),
                     heWikiLink=getattr(person, 'heWikiLink', None),
                     enWikiLink=getattr(person, 'enWikiLink', None))
            node_set[row["Name"]] = n
            node_set.items_by_talmud_name[row["Name"]] = n
        try:
            n = node_set.get_by_talmud_name(row["Value"])
        except KeyError:
            person = Person().load({"key": row["Value"]})

            n = Node(row["Value"],
                     row["Value"],
                     person.primary_name('he'),
                     jeLink=getattr(person, 'jeLink', None),
                     heWikiLink=getattr(person, 'heWikiLink', None),
                     enWikiLink=getattr(person, 'enWikiLink', None))
            node_set[row["Value"]] = n
            node_set.items_by_talmud_name[row["Value"]] = n

    for row in talmud_edges:
        try:
            n = node_set.get_by_talmud_name(row["Name"])
        except KeyError:
            print(row["Name"])
            print("NAME")
            continue
        try:
            to_node = node_set.get_by_talmud_name(row["Value"])
        except KeyError:
            print(row["Value"])
            print("VALUE")
            continue
        n.add_edge(row["Edge"], to_node.id)

    print("START SOURCE SHEET DEDUP")
    """
    B -> A
    C -> B ===> C -> A
    
    A -> [B]
    B -> [C]
    """
    source_sheet_dedup_map = {}
    source_sheets_dedup_index_map = {}
    source_sheets_dedup_list = []
    for irow, row in enumerate(source_sheets_dedup):
        source_sheets_dedup_list += [row]
        source_sheets_dedup_index_map[row['tag']] = irow
    for row in reversed(source_sheets_dedup_list):
        if len(row['same as']) == 0:
            continue
        if row['same as'] in source_sheet_dedup_map:
            alt_same_as = source_sheet_dedup_map[row['same as']]
            if source_sheets_dedup_index_map[
                    alt_same_as] < source_sheets_dedup_index_map[
                        row['same as']]:
                same_as = alt_same_as
            else:
                same_as = row['same as']
                # rewrite
                for k, v in source_sheet_dedup_map.items():
                    if v == alt_same_as:
                        if k == alt_same_as:
                            del source_sheet_dedup_map[k]
                        else:
                            source_sheet_dedup_map[k] = row['same as']
                source_sheet_dedup_map[alt_same_as] = row['same as']
        else:
            same_as = row['same as']
        if same_as == row['tag']:
            continue
        source_sheet_dedup_map[row['tag']] = same_as

    print("START SOURCE SHEETS")
    # SOURCE SHEETS
    for row in source_sheets:
        # if aspak -> if not synon -> else -> match it
        # else, if not isCat, -> is a "is a type of" else
        he = row["hebrew tag"]
        en = row["tag"]
        if len(row["aspaklaria topic"]) > 0 and len(row["not synonym"]) == 0:
            n = node_set[row["aspaklaria topic"]]
            if len(n.en_name) == 0 and en != n.en_transliteration:
                n.en_name = en
            elif len(en) > 0 and en != n.en_name:
                n.alt_en.add(en)
            if len(n.he_name) == 0:
                n.he_name = he
            elif len(he) > 0 and he != n.he_name:
                n.alt_he.add(he)
            n.source_sheet_tags.add(en)
        elif len(row["is a type of"]) > 0 or len(row["is category"]) > 0:
            isa = row["is a type of"] if row[
                "is a type of"] not in upper_level_mapping else upper_level_mapping[
                    row["is a type of"]]
            n = node_set[isa]

            if row["is category"]:
                if len(n.en_name) == 0 and en != n.en_transliteration:
                    n.en_name = en
                elif len(en) > 0 and en != n.en_name:
                    n.alt_en.add(en)
                if len(n.he_name) == 0:
                    n.he_name = he
                elif len(he) > 0 and he != n.he_name:
                    n.alt_he.add(he)
                n.source_sheet_tags.add(en)
            else:
                if en in source_sheet_dedup_map:
                    old_sheet_node = node_set[u"SHEET|{}".format(
                        source_sheet_dedup_map[en])]
                    if len(old_sheet_node.en_name
                           ) == 0 and en != old_sheet_node.en_transliteration:
                        old_sheet_node.en_name = en
                    elif len(en) > 0 and en != old_sheet_node.en_name:
                        old_sheet_node.alt_en.add(en)
                    if len(old_sheet_node.he_name) == 0:
                        old_sheet_node.he_name = he
                    elif len(he) > 0 and he != old_sheet_node.he_name:
                        old_sheet_node.alt_he.add(he)
                    old_sheet_node.source_sheet_tags.add(en)
                else:
                    # completely new topic
                    _id = u"SHEET|{}".format(en)
                    m = Node(_id, en, he)
                    m.source_sheet_tags.add(en)
                    m.add_edge("is a", n.id)
                    node_set[_id] = m

    print("START HALACHIC EDGES")
    # HALACHIC EDGES
    for row in halachic_edges:
        if len(row["rambam topic"]) > 0:
            n = node_set[row["topic"]]
            edge_type = "applies halacha" if "halachic process" in n.get_types(
            ) else "related to"
            n.add_edge(edge_type,
                       "RAMBAM|{}".format(row["rambam topic"].strip()))

    # CLEAN UP
    node_set.add_edge_inverses()
    node_set.validate()

    return node_set