예제 #1
0
def addOrBuildBOWToIndex(writer, guid, index_data, full_corpus=False):
    """
        Loads JSON file with BOW data to doc in index, NOT filtering for anything
    """
    bow_filename=cp.Corpus.cachedDataIDString("bow",guid,index_data)
    try:
        bows=cp.Corpus.loadCachedJson(bow_filename)
    except:
        bows=None

    if bows is None:
        print("BOW not found, rebuilding")
        bows=prebuildMulti(index_data["method"],
                           index_data["parameters"],
                           index_data["function_name"],
                           None,
                           None,
                           guid,
                           False,
                           []) #!TODO rhetorical_annotations here?
        # Note: prebuildMulti will return a dict[param]=list of bows
        bows=bows[index_data["parameter"]]

##    if not isinstance(bows, list):
##        print("BOWS IS NOT A LIST")
##        print("guid:", guid)
##        print("index_data:", index_data)
##        print("Type:", type(bows))
    assert isinstance(bows,list)
    addLoadedBOWsToIndex(writer, guid, bows, index_data)
예제 #2
0
def addOrBuildBOWToIndexExcludingCurrent(writer, guid, exclude_list, max_year, index_data, full_corpus=False):
    """
        Loads JSON file with BOW data to index, filtering for
        inlink_context, excluding what bits
        came from the current exclude_list, posterior year, same author, etc.
    """
    bow_filename=cp.Corpus.cachedDataIDString("bow",guid,index_data)
    try:
        bows=cp.Corpus.loadCachedJson(bow_filename)
    except:
        bows=None

    if not bows:
        bows=prebuildMulti(index_data["method"],
                           index_data["parameters"],
                           index_data["function_name"],
                           None,
                           None,
                           guid,
                           False,
                           []) #!TODO rhetorical_annotations here?

    assert isinstance(bows,list)

    # joinTogetherContext?
    bows=doc_representation.filterInlinkContext(bows, exclude_list, max_year, full_corpus=full_corpus)

    assert isinstance(bows,list)
    addLoadedBOWsToIndex(writer, guid, bows, {"method":index_data["method"],
        "parameter":index_data["parameter"]})