Exemple #1
0
 def get_queries(self):
     queries, queries_1 = self.read_queries()
     tokenize = Tokenizer.Tokenize(" ")
     self.modified_queries = tokenize.process_data(queries)
     self.modified_queries_1 = queries_1
     f = open("queries for lucene.txt", 'w')
     i = 0
     for q in self.modified_queries:
         q = q.strip("\n")
         q = q.replace("\n", ' ')
         f.write(str(q))
         f.write("\n")
Exemple #2
0
def test(str):
    Tokenizer.Initialize(str)
    print(str)
    Tokenizer.Tokenize()
Exemple #3
0
 def start_tokenizing(self):
     tokenize = Tokenizer.Tokenize(self.source_path)  # self.source_path
     self.new_source_path, self.N = tokenize.start_processing(
     )  # "tokenized_corpus", 3204