def exec_second(self, parole): e = EditDistance() a = Ngram() tempi = [] n_vicine_trovate = [] for parola in parole: with open('60000_parole_italiane.txt', 'r') as f: # print 'parola --> ', parola # edit distance # print '----- EDIT DISTANCE' e_results = [] start = timer() for line in f: p = line.rstrip() _, op = e.edit_distance(parola, p) costo = e.op_sequence(op, len(parola) - 1, len(p) - 1, []) if costo < self.sogliaCosto: e_results.append((p, costo)) end = timer() time_edit = end - start n_edit = len(e_results) # print 'risultati (%s)' % n_edit, '-->', sorted(e_results, key=get(1)) # print 'tempo -->', time_edit # ngrams # print '----- NGRAMS' g_results = [] b = a.ngram(parola, self.numberOfGrams) with open("%s_grams.txt" % self.numberOfGrams, 'r') as r: start = timer() for line in r: s = line.split(' -> ') p, g = s[0], s[1] f = a.jaccard(b, g) if f > self.sogliaJaccard: g_results.append((p, f)) end = timer() time_gram = end - start n_gram = len(g_results) # print 'risultati (%s)' % n_gram, '-->', sorted(g_results, key=get(1), reverse=True) # print 'tempo -->', time_gram # print '\n' tempi.append([time_edit, time_gram]) n_vicine_trovate.append([n_edit, n_gram]) return [tempi, n_vicine_trovate]
def exec_fifth(self): e = EditDistance() a = Ngram() originale = raw_input("**** Inserisci parola --> ") parola = self.storpia(originale) print '**** Parola storpiata -->', parola # edit distance print '----- EDIT DISTANCE' # costi: 1, 2, 3, 4, 5 for c in range(1, 6): with open('60000_parole_italiane.txt', 'r') as f: e_results = [] for line in f: p = line.rstrip() _, op = e.edit_distance(parola, p) costo = e.op_sequence(op, len(parola) - 1, len(p) - 1, []) if costo < c: e_results.append((p, costo)) if any(originale in a for a in e_results): w = 'parola originale trovata!' else: w = 'parola originale non trovata!' print w, '(soglia costo %s, %s risultati)' % ( c, len(e_results)), '-->', sorted(e_results, key=get(1)) # ngram print '----- NGRAM' b = a.ngram(parola, self.numberOfGrams) # coefficienti: 0.5, 0.6, 0.7, 0.8, 0.9 for j in np.arange(0.5, 1.0, 0.1): with open("%s_grams.txt" % self.numberOfGrams, 'r') as f: g_results = [] for line in f: s = line.split(' -> ') p, g = s[0], s[1] f = a.jaccard(b, g) if f > j: g_results.append((p, f)) if any(originale in a for a in g_results): w = 'parola originale trovata!' else: w = 'parola originale non trovata!' print w, '(jaccard %s, %s risultati)' % ( j, len(g_results)), '-->', sorted(g_results, key=get(1), reverse=True)
def exec_third(self): e = EditDistance() a = Ngram() costi = [] coefficienti = [] risultati_edit = [] risultati_gram = [] parola = raw_input("**** Inserisci parola --> ") # edit distance # print '----- EDIT DISTANCE' # costi: 1, 2, 3, 4, 5 for c in range(1, 6): costi.append(c) with open('60000_parole_italiane.txt', 'r') as f: e_results = [] for line in f: p = line.rstrip() _, op = e.edit_distance(parola, p) costo = e.op_sequence(op, len(parola) - 1, len(p) - 1, []) if costo < c: e_results.append((p, costo)) risultati_edit.append(len(e_results)) # print 'ho trovato %s risultati per soglia costo %s' % (len(e_results), c), '-->', sorted(e_results, key=get(1)) # ngram # print '----- NGRAM' b = a.ngram(parola, self.numberOfGrams) # coefficienti: 0.5, 0.6, 0.7, 0.8, 0.9 for j in np.arange(0.5, 1.0, 0.1): coefficienti.append(j) with open("%s_grams.txt" % self.numberOfGrams, 'r') as f: g_results = [] for line in f: s = line.split(' -> ') p, g = s[0], s[1] f = a.jaccard(b, g) if f > j: g_results.append((p, f)) risultati_gram.append(len(g_results)) # print 'ho trovato %s risultati per jaccard maggiore di %s' % (len(g_results), j), '-->', sorted(g_results, key=get(1), reverse=True) return [costi, coefficienti, risultati_edit, risultati_gram]
def exec_first(self): with open('60000_parole_italiane.txt', 'r') as f: e = EditDistance() a = Ngram() lines = f.readlines() rand = random.randint(0, len(lines)) word = lines[rand].rstrip() print 'random word -->', word # test edit distance start = timer() for line in lines: p = line.rstrip() if p == word: break _, op = e.edit_distance(word, p) _ = e.op_sequence(op, len(word) - 1, len(p) - 1, []) end = timer() time_edit = end - start # print 'tempo trascorso edit distance -->', time_edit # test ngrams b = a.ngram(word, self.numberOfGrams) with open("%s_grams.txt" % self.numberOfGrams, 'r') as r: start = timer() for line in r: s = line.split(' -> ') p, g = s[0], s[1] if p == word: break _ = a.jaccard(b, g) end = timer() time_ngram = end - start # print 'tempo trascorso ngrams -->', time_ngram return [word, time_edit, time_ngram]