def gen_replace_all(self): replaced_pattern = '%s|%s' % (self.PHONE_REPLACEMENT, self.URL_REPLACEMENT) for row in Utils.iter_csv(self.filename): sentence = row[self.nth] if not self.__is_valid(sentence): continue replaced = Replacer.replace_url(Replacer.replace_phone(sentence)) row[self.nth] = replaced if bool(re.search(replaced_pattern, row[self.nth])): self.logger.info('%s -> %s' % (sentence.decode('utf-8'), replaced.decode('utf-8'))) yield row
def set_labels(self, sentences_csv_fname): self.labels = [ int(x[0]) for x in Utils.iter_csv(sentences_csv_fname, 2).split() ] return True
def set_labels(self, sentences_csv_fname): self.labels = [int(x[0]) for x in Utils.iter_csv(sentences_csv_fname, 2).split()] return True