def dist_txt(self, x, y):
     """
     Calculates distance between 2 given texts.
     
     It calculates 1 - (set(x)&set(y) / set(x)|set(y)).
     
     TESTED.
     """
     x_s = set(text_to_words(x))
     y_s = set(text_to_words(y))
     return len(x_s & y_s)/len(x_s | y_s)
 def dist_txt(self, x, y):
     """
     Calculates distance between 2 given texts.
     
     It calculates 1 - (set(x)&set(y) / set(x)|set(y)).
     
     TESTED.
     """
     x_s = set(text_to_words(x))
     y_s = set(text_to_words(y))
     return len(x_s & y_s) / len(x_s | y_s)
 def dist_txt(self, x, y):
     """
     Calculates distance between 2 given texts.
     
     It calculates more or less 1 - (set(x)&set(y) / set(x)|set(y)).
     More or less, because the repetitions indicate the similarity as well.
     
     TESTED.
     """
     #x_s = set(text_to_words(x))
     #y_s = set(text_to_words(y))
     #return len(x_s & y_s)/len(x_s | y_s)
     x_words = sorted( filter(lambda x: x not in self.stopwords ,text_to_words(x)))
     y_words = sorted( filter(lambda x: x not in self.stopwords ,text_to_words(y)))
     
     x_ind = 0
     y_ind = 0
     
     diff = 0
     while x_ind < len(x_words) or y_ind < len(y_words):
         if x_ind==len(x_words):
             diff+=len(y_words)-y_ind
             break
         elif y_ind==len(y_words):
             diff+=len(x_words)-x_ind
             break
         else:
             if x_words[x_ind] > y_words[y_ind]:
                 y_ind+=1
                 diff+=1
             elif x_words[x_ind] < y_words[y_ind]:
                 x_ind+=1
                 diff+=1
             else:
                 x_ind+=1
                 y_ind+=1
     
     return diff/(len(x_words)+len(y_words))