Exemple #1
0
    def retrieveText(self, pn, terms):
        result = {}
        os.chdir(pn + '/output')

        with open('output_vote_remain_Dark_Age.csv', 'rU') as csvfile:
            reader = csv.reader(csvfile, delimiter='*', quotechar='|')
            i = -1
            try:
                for row in reader:

                    if (i == -1):
                        i = i + 1
                        continue
                    if (len(row) < 2):
                        continue
                    text = row[1]

                    tFalse = True
                    for term in terms:
                        if (term in text) or (term.lower() in text.lower()):
                            tFalse = False
                            break

                    for s in result:
                        t = result[s]
                        if (text in t):
                            tFalse = False
                            break
                    if (tFalse == True):
                        result[str(i)] = text
                    i = i + 1
            except csv.Error, e:
                sys.exit('line %d: %s' % (reader.line_num, e))

            return result
def parse_text(text):
    text = text.strip(" ")
    return GracefulList(text.lower().replace("\\", "").replace("\n", " ").split(" "))
Exemple #3
0
def __tokenize(text):
    return re.split(r'[^a-z0-9\-]+', text.lower())