def get_or_create_index(path, schema, src): """Get or create an Index.""" index = open_dir(path) if exists_in(path) else create_in(path, schema) indexed_titles = set(field['title'] for field in gen_indexed_fields(index)) corpus = Corpus(src) documents = set(corpus.gen_documents()) update_index(index.writer(), indexed_titles, documents) return index
def get_out_links(): corpus = Corpus('src') links = {} for doc in corpus.gen_documents(): text = doc.content try: refs = get_references(text) except: continue else: # print(doc.filename, refs) out_links = [] for ref in set(refs): try: canonical_name = get_canonical_name(ref) while not canonical_name.endswith('html'): canonical_name = get_canonical_name(canonical_name) except KeyError: # print('KeyError:', doc.title, ref) pass else: out_links.append(canonical_name) links[doc.filename] = set(out_links) return links