Exemple #1
0
def get_or_create_index(path, schema, src):
    """Get or create an Index."""
    index = open_dir(path) if exists_in(path) else create_in(path, schema)
    indexed_titles = set(field['title'] for field in gen_indexed_fields(index))
    corpus = Corpus(src)
    documents = set(corpus.gen_documents())
    update_index(index.writer(), indexed_titles, documents)
    return index
Exemple #2
0
def get_or_create_index(path, schema, src):
    """Get or create an Index."""
    index = open_dir(path) if exists_in(path) else create_in(path, schema)
    indexed_titles = set(field['title'] for field in gen_indexed_fields(index))
    corpus = Corpus(src)
    documents = set(corpus.gen_documents())
    update_index(index.writer(), indexed_titles, documents)
    return index
def get_out_links():
    corpus = Corpus('src')
    links = {}
    for doc in corpus.gen_documents():
        text = doc.content
        try:
            refs = get_references(text)
        except:
            continue
        else:
            # print(doc.filename, refs)
            out_links = []
            for ref in set(refs):
                try:
                    canonical_name = get_canonical_name(ref)
                    while not canonical_name.endswith('html'):
                        canonical_name = get_canonical_name(canonical_name)
                except KeyError:
                    # print('KeyError:', doc.title, ref)
                    pass
                else:
                    out_links.append(canonical_name)
            links[doc.filename] = set(out_links)
    return links