def query(pl_list, tokens, isRankedBased):
    if not pl_list and not isRankedBased:
        return []

    results = intersect(pl_list)  # in rankedBased, only the lists of NOT and Phrase inputs

    if isRankedBased:
        for docID in results:
            # Only the Docs that have at least one query term get + 0.5 bonus
            if getNewsList()[docID].hasQueryTerm:
                getNewsList()[docID].score += 0.5
        finalizeResults()   # Top K
        results = getResults()  # it is nothing but a return :/

    structuredResults = []
    for result in results:
        structuredResults.append(getNewsList()[result].structuredFormatResultsPage(tokens))

    return structuredResults
def NOT(term_pl):
    all_docs = []
    pointer = 0
    if len(term_pl) == 0:
        for d in range(0, len(getNewsList())):
            all_docs.append(d)
    else:
        for d in range(0, len(getNewsList())):
            if d != term_pl[pointer]:
                all_docs.append(d)
            else:
                pointer += 1
                if pointer == len(term_pl):
                    d += 1
                    while d < len(getNewsList()):
                        all_docs.append(d)
                        d += 1
                    break
    return all_docs
def constructInvertedIndex():
    global dictionary
    dictionary = BTree(Node("سسسسسس", 1, []))
    nodesList = []
    docCounter = 0
    for news in getNewsList():
        nodes = {}
        position = 0
        for term in tokenize(normalize(news.content), check_finglish):
            if term != invalidToken:
                nodes[dictionary.addOccurrence(term, news.id, position)] = True
            position += 1
        nodesList.append(nodes)
        for node in nodes:
            node.cal_tf(news.id)
        docCounter += 1
        if docCounter % 20 == 0:
            Laws.heap(getDictionary())
    calAllIdf(dictionary.root)

    i = 0
    for news in getNewsList():  # calculate the documents' normalize factors for 3 scoring schemes
        nodes = nodesList[i]
        sum_of_squares_1 = 0
        sum_of_squares_2 = 0
        sum_of_squares_3 = 0
        for node in nodes.keys():
            sum_of_squares_1 += math.pow((getTf(news.id, node.postingsList) - 1) * node.idf, 2)
            sum_of_squares_2 += math.pow(getTf(news.id, node.postingsList), 2)
            sum_of_squares_3 += math.pow(getTf(news.id, node.postingsList) * node.idf, 2)
        normalizationFactorsScheme1.append(math.sqrt(sum_of_squares_1))
        normalizationFactorsScheme2.append(math.sqrt(sum_of_squares_2))
        normalizationFactorsScheme3.append(math.sqrt(sum_of_squares_3))
        i += 1

    Laws.storeHeapDataSet()
    storeDictionary(dictionary)
    storeNormFactors()
def cal_idf(node):
    N = len(getNewsList())
    node.idf = math.log10(N / node.frequency)
def storeHeapDataSet():
    with open('Laws/heap' + str(len(getNewsList())) + '.pickle',
              'wb') as handle:
        pickle.dump(heapDataSet, handle, protocol=pickle.HIGHEST_PROTOCOL)