Beispiel #1
0
# Query processing
tIndexFile = open('topics.xml', 'r')

soup = BeautifulSoup(tIndexFile.read(), 'html.parser')

queries = soup.findAll('query')

fnewstoplis = []
for query in queries:
    lisq = str(query.get_text()).split()
    #print(len(lisq))
    newstoplis = []
    for i in range(len(lisq)):
        if lisq[i] not in dictStopword:
            portword = PorterStemmer().stem(lisq[i])
            portword = portword.replace('\'', '')
            newstoplis.append(portword)
    fnewstoplis.append(newstoplis)

topIds = []
topics = soup.findAll('topic')

topIds = [topics[i].get('number') for i in range(len(topics))]

querydict = {topIds[i]: fnewstoplis[i] for i in range(len(topIds))}

#  Real relevent
realRelfile = open('relevance judgements.qrel', 'r')

realRel = realRelfile.readlines()