Ejemplo n.º 1
0
def phrase_search(search_phrase, mode):
    # len_of_query = preprocess_squery(search_phrase,mode)[1]
    term = preprocess_squery(search_phrase, mode)
    len_of_query = term.__len__()

    t1 = set(term_psearch(term[0], mode))
    t2 = set(term_psearch(term[-1], mode))
    term_ids = list(t1 & t2)

    i = 0
    # {'1901-00001': {'pos': [59], 'tf': 1}, 'df': 1}
    IDtftl = []  # doc_id contains the first and last word

    term_dic_f = read_index(term[0], mode)
    term_dic_l = read_index(term[-1], mode)
    if term_dic_f == None or term_dic_l == None:
        return 'None'
    else:
        for docid in term_ids:
            if term_dic_f.__contains__(docid) and term_dic_l.__contains__(
                    docid):
                posif = term_dic_f.get(docid)['pos']
                posil = term_dic_l.get(docid)['pos']
                m = 0
                n = 0
                while m < posif.__len__():
                    while n < posil.__len__():
                        if (int(posil[n]) - int(posif[m]) == len_of_query - 1):
                            IDtftl.append(docid)
                        n += 1
                    m += 1

    docid_phrase = []
    if len_of_query > 2:

        for id_query in IDtftl:
            for i in range(len_of_query - 1):
                dictf = read_index(term[i], mode)
                dictl = read_index(term[i + 1], mode)
                if dictf.__contains__(id_query) and dictl.__contains__(
                        id_query):
                    posif = dictf.get(id_query)['pos']
                    posil = dictl.get(id_query)['pos']
                    m = 0
                    n = 0
                    while m < posif.__len__():
                        while n < posil.__len__():
                            if (int(posil[n]) - int(posif[m]) == 1):
                                docid_phrase.append(id_query)
                            n += 1
                        m += 1
    else:
        docid_phrase = IDtftl

    phrase_docid = set(docid_phrase)
    if docid_phrase.__len__() == 0:
        return 'None'
    else:
        return phrase_docid
Ejemplo n.º 2
0
def term_psearch(term, mode):
    docid_list = []
    term_dic = read_index(term, mode)
    # print(term_dic) {'_id': 'constant', 'df': 2277, '1901-00002': {'tf': 2, 'pos': [28, 40]}, '1901-00018': {'pos': [76], 'tf': 1}, '1901-00028': {'pos': [10], 'tf': 1}, '1901-00119': {'pos': [42, 76], 'tf': 2}, '1901-00210': {'pos': [93], 'tf': 1},
    if term_dic == None:  #term is not in index
        return '0'
    else:
        docid = term_dic.keys()
        for item in docid:
            if item != 'df' and item != '_id':
                docid_list.append(item)
        return docid_list
Ejemplo n.º 3
0
def term_psearch(term,mode):
    docid_list = []
    term_dic = read_index(term,mode)

    if term_dic == None or len(term_dic) == 1: #term is not in index
        return '0'
    else :
        docid = term_dic.keys()
        for item in docid:
            if item!= '_id':
                docid_list.append(item)
        return docid_list
Ejemplo n.º 4
0
def term_search(term,mode):
    # posi_list = []
    # if readindex(file_path).__contains__(term):
    #     posi_list = list(text[term]['docdict'].keys())
    # return posi_list
    docid_list = []
    term_dic = read_index(term, mode)
    if term_dic == None: #term is not in index
        return '0'
    else :
        docid = term_dic.keys()
        for item in docid:
            if item != 'df' and item!= '_id':
                docid_list.append(item)
        return docid_list