def process_query(query): raw_terms = query.split() num_terms = len(raw_terms) final_query = "" if num_terms > 0: tokin = open("tokin.dat","w") tokin.write(query.lower()) tokin.close() q_temp = tokenize_and_remove_stopword("tokin.dat") q_temp = q_temp.split() final_query = "" for qw in q_temp: final_query = final_query + stem(qw) + " " return final_query
fo = open("patin.xml","w") stopword('stopwords.dat') fo.write("<data>") for file in root.findall('file'): tokin = open("tokin.dat", "w") index = file.find('index').text fo.write("<file>\n") fo.write("<I>"+index+"</I>\n") author = file.find('Author') if author is not None: fo.write("<A>") authin = open("authin.dat", "w") authin.write(author.text.lower()) authin.close() tok = tokenize_and_remove_stopword('authin.dat') tok = tok.lower().split() for w in tok: fo.write(stem(w) + " ") fo.write("</A>\n") title = file.find('Title').text if title is not None: tokin.write(title) content = file.find('Content') if content is not None: tokin.write(content.text) tokin.close() tok = tokenize_and_remove_stopword('tokin.dat') fo.write("<C>")
fo = open("patin.xml", "w") stopword('stopwords.dat') fo.write("<data>") for file in root.findall('file'): tokin = open("tokin.dat", "w") index = file.find('index').text fo.write("<file>\n") fo.write("<I>" + index + "</I>\n") author = file.find('Author') if author is not None: fo.write("<A>") authin = open("authin.dat", "w") authin.write(author.text.lower()) authin.close() tok = tokenize_and_remove_stopword('authin.dat') tok = tok.lower().split() for w in tok: fo.write(stem(w) + " ") fo.write("</A>\n") title = file.find('Title').text if title is not None: tokin.write(title) content = file.find('Content') if content is not None: tokin.write(content.text) tokin.close() tok = tokenize_and_remove_stopword('tokin.dat') fo.write("<C>")