def addOrBuildBOWToIndex(writer, guid, index_data, full_corpus=False): """ Loads JSON file with BOW data to doc in index, NOT filtering for anything """ bow_filename=cp.Corpus.cachedDataIDString("bow",guid,index_data) try: bows=cp.Corpus.loadCachedJson(bow_filename) except: bows=None if bows is None: print("BOW not found, rebuilding") bows=prebuildMulti(index_data["method"], index_data["parameters"], index_data["function_name"], None, None, guid, False, []) #!TODO rhetorical_annotations here? # Note: prebuildMulti will return a dict[param]=list of bows bows=bows[index_data["parameter"]] ## if not isinstance(bows, list): ## print("BOWS IS NOT A LIST") ## print("guid:", guid) ## print("index_data:", index_data) ## print("Type:", type(bows)) assert isinstance(bows,list) addLoadedBOWsToIndex(writer, guid, bows, index_data)
def addOrBuildBOWToIndexExcludingCurrent(writer, guid, exclude_list, max_year, index_data, full_corpus=False): """ Loads JSON file with BOW data to index, filtering for inlink_context, excluding what bits came from the current exclude_list, posterior year, same author, etc. """ bow_filename=cp.Corpus.cachedDataIDString("bow",guid,index_data) try: bows=cp.Corpus.loadCachedJson(bow_filename) except: bows=None if not bows: bows=prebuildMulti(index_data["method"], index_data["parameters"], index_data["function_name"], None, None, guid, False, []) #!TODO rhetorical_annotations here? assert isinstance(bows,list) # joinTogetherContext? bows=doc_representation.filterInlinkContext(bows, exclude_list, max_year, full_corpus=full_corpus) assert isinstance(bows,list) addLoadedBOWsToIndex(writer, guid, bows, {"method":index_data["method"], "parameter":index_data["parameter"]})