def process_version_title_change_in_search(ver, **kwargs):
    from sefaria.local_settings import SEARCH_INDEX_ON_SAVE
    if SEARCH_INDEX_ON_SAVE:
        from sefaria.search import delete_version, TextIndexer, get_new_and_current_index_names
        search_index_name = get_new_and_current_index_names()['current']
        search_index_name_merged = get_new_and_current_index_names(merged=True)['current']
        text_index = library.get_index(ver.title)
        delete_version(text_index, kwargs.get("old"), ver.language)
        for ref in text_index.all_segment_refs():
            TextIndexer.index_ref(search_index_name, ref, kwargs.get("new"), ver.language, False)
            TextIndexer.index_ref(search_index_name_merged, ref, None, ver.language, True)
Ejemplo n.º 2
0
def process_version_title_change_in_search(ver, **kwargs):
    from sefaria.local_settings import SEARCH_INDEX_ON_SAVE
    if SEARCH_INDEX_ON_SAVE:
        from sefaria.search import delete_version, TextIndexer, get_new_and_current_index_names
        search_index_name = get_new_and_current_index_names("text")['current']
        # no reason to deal with merged index since versions don't exist. still leaving this here in case it is necessary
        # search_index_name_merged = get_new_and_current_index_names("merged")['current']
        text_index = library.get_index(ver.title)
        delete_version(text_index, kwargs.get("old"), ver.language)
        for ref in text_index.all_segment_refs():
            TextIndexer.index_ref(search_index_name, ref, kwargs.get("new"), ver.language, False)
    def parse_lib_to_json(cls, start, end):
        print('parse_lib', start, end)
        cls.ref_num_min_N_title = [] # min ref_num of each book.title [[min_ref_num, book_title], ...]
        cls.ref_num = 0 # absolute index num for all refs
        cls.curr_title = None
        cls.curr_section = None
        cls.curr_section_text = u""
        # only used for debuging (
        cls.ref_num_2_full_name = []

        # index of list is ref_num (implicitly) ["intro to bookA", 1, 2, 3, "Intro to bookB", ...]
        cls.ref_num_2_part = []

        # dict of words: list of all ref_nums which that words appears in
        cls.words_2_ref_nums = defaultdict(set)

        TextIndexer.index_all("", True, for_es=False, action=OfflineTextIndexer.index_segment)
        # after it's done there's likely an extra section that hasn't been indexed
        cls.index_section(cls.curr_title, cls.section_ref, cls.curr_section_text)
        """
        indexes = library.all_index_records()
        indexes = indexes[start:end]
        print("Running on {} indexes".format(len(indexes)))
        last_time = time.time()
        for i, index in enumerate(indexes):
            title = index.title
            print(i, str(dt.now().time()), index.title, time.time() - last_time)
            last_time = time.time()
            sys.stdout.flush()
            ref_num_min_N_title.append((ref_num, title,))
            try:
                section_refs = index.all_section_refs()
                for section_ref in section_refs:
                        # remove the title from the section_ref
                        ref_part = re.sub(ur'^{}'.format(re.escape(title)), u'', section_ref.normal())
                        ref_num_2_full_name.append(section_ref.normal())
                        ref_num_2_part.append(ref_part)
                        add_words(section_ref, words_2_ref_nums, ref_num)
                        ref_num += 1
            except InputError as e:
                print('ERROR', e)
        print('saving to json...')
        save(REF_NUM_MIN_N_TITLE, ref_num_min_N_title)
        save(REF_NUM_2_PART, ref_num_2_part)
        """

        # convert sets to lists for json
        words_2_ref_nums = {key: sorted(list(value)) for key, value in words_2_ref_nums.iteritems()}
        save(WORDS_2_REF_NUMS, words_2_ref_nums)
        save(_ONLY_WORDS_LIST, words_2_ref_nums.keys())