class AnnotatedTweetReader(object): """Reads in tweets annotated for LID and codeswitching.""" def __init__(self, infile): self._csv = UnicodeDictReader(infile, delimiter='\t') def __iter__(self): return self def next(self): """Return relevant fields from the next row of the CSV.""" row = self._csv.next() goldlid = GOLDLID_ABBREVIATIONS[row[FIELD_GOLDLID]] if row[FIELD_GOLDLID] else None tokens = row[FIELD_TWEET].split() lidcs = row[FIELD_PREDLID] return tokens, lidcs, goldlid
def __init__(self, infile): self._csv = UnicodeDictReader(infile, delimiter='\t')