Exemple #1
0
# nltk.download("punkt")
# nltk.download("maxnet_treebank_pos_tagger")
#设置新添的下载的nltk依赖库路径
data.path.append(r"nltk_data")

DIRECTNAME = 'Reuters'

# print("establishing the INDEX...")
# establishIndex.createIndex(DIRECTNAME)

print("getting word list...")
WORDLIST = getIndex.getWordList()
print("getting index...")
INDEX = getIndex.getIndex()
print("loading the wordnet...")
stemming.lemmatize_sentence("a", False)

PATH = tools.projectpath + DIRECTNAME
FILES = os.listdir(tools.reuterspath)
FILENUM = len(FILES)

LOOP = True
print("=================Searching System=================")

while LOOP:
    print("searching operation: ")
    print(
        "[1] Overall [2]TOP K [3]BOOL [4]Phrase [5]wildcard [6]synonyms [7]exit"
    )
    print("your choice(int):")
    try:
Exemple #2
0
def preProcess(filename):
    file = open(filename, 'r')
    content = file.read()
    words = stemming.lemmatize_sentence(content, False)
    return words
K = 10

while LOOP:
    print("---------> Start search No. {0}".format(SEARCH_NUM))
    syn_FLAG = False

    print("Enable Synonym Retrieval?[Y]/[N] ", end='')
    choose = input()
    if choose == "Y":
        syn_FLAG = True

    print("input the query statement(EXIT to quit): ", end='')
    STATEMENT = input()
    if STATEMENT == "EXIT":
        break

    print("stemming...")
    INPUT_WORDS = stemming.lemmatize_sentence(STATEMENT, True)

    DOC_LIST = BoolSearchDel.bool_search(DOC_NUM, INPUT_WORDS, INDEX, syn_FLAG)

    print("Found {0} document(s)  that matched query".format(len(DOC_LIST)))
    for i in range(min(K, len(DOC_LIST))):
        print("doc name:{0}.html, score = {1}".format(DOC_LIST[i][0],
                                                      DOC_LIST[i][1]))

    SEARCH_NUM += 1

print("ByeBye!")
def preProcess(filename):
    file = open(filename, 'r')
    content = file.read()
    words = stemming.lemmatize_sentence(content)
    print(words)