def process_table(self, table, overlap_attr, q_val, rem_stop_words): # get overlap_attr column attr_col_values = table[overlap_attr] # remove non-ascii chars attr_col_values = [helper.remove_non_ascii(val) for val in attr_col_values] # remove special characters attr_col_values = [self.rem_punctuations(val).lower() for val in attr_col_values] # chop the attribute values col_values_chopped = [val.split() for val in attr_col_values] # convert the chopped values into a set col_values_chopped = [list(set(val)) for val in col_values_chopped] # remove stop words if rem_stop_words == True: col_values_chopped = [self.rem_stopwords(val) for val in col_values_chopped] if q_val is not None: values = [' '.join(val) for val in col_values_chopped] col_values_chopped = [qgram(val, q_val) for val in values] return col_values_chopped
def process_val(self, val, overlap_attr, q_val, rem_stop_words): val = helper.remove_non_ascii(val) val = self.rem_punctuations(val).lower() chopped_vals = val.split() if rem_stop_words == True: chopped_vals = self.rem_stopwords(chopped_vals) if q_val != None: values = ' '.join(chopped_vals) chopped_vals = qgram(values, q_val) return list(set(chopped_vals))