def retrieveText(self, pn, terms): result = {} os.chdir(pn + '/output') with open('output_vote_remain_Dark_Age.csv', 'rU') as csvfile: reader = csv.reader(csvfile, delimiter='*', quotechar='|') i = -1 try: for row in reader: if (i == -1): i = i + 1 continue if (len(row) < 2): continue text = row[1] tFalse = True for term in terms: if (term in text) or (term.lower() in text.lower()): tFalse = False break for s in result: t = result[s] if (text in t): tFalse = False break if (tFalse == True): result[str(i)] = text i = i + 1 except csv.Error, e: sys.exit('line %d: %s' % (reader.line_num, e)) return result
def parse_text(text): text = text.strip(" ") return GracefulList(text.lower().replace("\\", "").replace("\n", " ").split(" "))
def __tokenize(text): return re.split(r'[^a-z0-9\-]+', text.lower())