Esempio n. 1
0
 def gen_replace_all(self):
     replaced_pattern = '%s|%s' % (self.PHONE_REPLACEMENT, self.URL_REPLACEMENT)
     for row in Utils.iter_csv(self.filename):
         sentence = row[self.nth]
         if not self.__is_valid(sentence):
             continue
         replaced = Replacer.replace_url(Replacer.replace_phone(sentence))
         row[self.nth] = replaced
         if bool(re.search(replaced_pattern, row[self.nth])):
             self.logger.info('%s -> %s' % (sentence.decode('utf-8'), replaced.decode('utf-8')))
         yield row
Esempio n. 2
0
 def set_labels(self, sentences_csv_fname):
     self.labels = [
         int(x[0]) for x in Utils.iter_csv(sentences_csv_fname, 2).split()
     ]
     return True
Esempio n. 3
0
 def set_labels(self, sentences_csv_fname):
     self.labels = [int(x[0]) for x in Utils.iter_csv(sentences_csv_fname, 2).split()]
     return True