def process_many_mutation(ifiles, ofiles): """Process Sentence files in Batch format. This function is useful for when you need to procecess lots of files in one large batch. """ finder = mutfinder_gen('regex.txt') for ifile, ofile in zip(ifiles, ofiles): print ifile with open(ifile) as handle: reader = csv.DictReader(handle, delimiter = '\t', fieldnames = ('ParNum', 'Text')) rows = [x for x in reader] ofields = ('ParNum', 'SentNum', 'Mutation', 'Text') with open(ofile, 'w') as handle: writer = csv.DictWriter(handle, ofields, delimiter = '\t') writer.writerow(dict(zip(ofields, ofields))) for row in rows: if row['Text']: sent_list = ['']+list(sent_tokenize(row['Text'].replace('\n', '')))+[''] for sentnum, sent in enumerate(sent_list): for mut, _ in finder(sent).items(): text = ' '.join(sent_list[sentnum-1:sentnum+1]) nrow = {'Text': text, 'ParNum': row['ParNum'], 'SentNum': sentnum, 'Mutation': mut} writer.writerow(nrow)
def process_many_mutation(ifiles, ofiles): """Process Sentence files in Batch format. This function is useful for when you need to procecess lots of files in one large batch. """ finder = mutfinder_gen('regex.txt') for ifile, ofile in zip(ifiles, ofiles): print ifile with open(ifile) as handle: reader = csv.DictReader(handle, delimiter='\t', fieldnames=('ParNum', 'Text')) rows = [x for x in reader] ofields = ('ParNum', 'SentNum', 'Mutation', 'Text') with open(ofile, 'w') as handle: writer = csv.DictWriter(handle, ofields, delimiter='\t') writer.writerow(dict(zip(ofields, ofields))) for row in rows: if row['Text']: sent_list = [''] + list( sent_tokenize(row['Text'].replace('\n', ''))) + [''] for sentnum, sent in enumerate(sent_list): for mut, _ in finder(sent).items(): text = ' '.join(sent_list[sentnum - 1:sentnum + 1]) nrow = { 'Text': text, 'ParNum': row['ParNum'], 'SentNum': sentnum, 'Mutation': mut } writer.writerow(nrow)
def process_mutation(ifile, ofile, finder = None): with open(ifile) as handle: reader = csv.DictReader(handle, delimiter = '\t', fieldnames = ('ParNum', 'Text')) rows = [x for x in reader] if finder is None: finder = mutfinder_gen('regex.txt') ofields = ('ParNum', 'SentNum', 'Mutation', 'Text') with open(ofile, 'w') as handle: writer = csv.DictWriter(handle, ofields, delimiter = '\t') writer.writerow(dict(zip(ofields, ofields))) for row in rows: if row['Text']: sent_list = ['']+list(sent_tokenize(row['Text'].replace('\n', '')))+[''] for sentnum, sent in enumerate(sent_list): for mut, _ in finder(sent).items(): text = ' '.join(sent_list[sentnum-1:sentnum+1]) nrow = {'Text': text, 'ParNum': row['ParNum'], 'SentNum': sentnum, 'Mutation': mut} writer.writerow(nrow)
def process_mutation(ifile, ofile, finder=None): with open(ifile) as handle: reader = csv.DictReader(handle, delimiter='\t', fieldnames=('ParNum', 'Text')) rows = [x for x in reader] if finder is None: finder = mutfinder_gen('regex.txt') ofields = ('ParNum', 'SentNum', 'Mutation', 'Text') with open(ofile, 'w') as handle: writer = csv.DictWriter(handle, ofields, delimiter='\t') writer.writerow(dict(zip(ofields, ofields))) for row in rows: if row['Text']: sent_list = [''] + list( sent_tokenize(row['Text'].replace('\n', ''))) + [''] for sentnum, sent in enumerate(sent_list): for mut, _ in finder(sent).items(): text = ' '.join(sent_list[sentnum - 1:sentnum + 1]) nrow = { 'Text': text, 'ParNum': row['ParNum'], 'SentNum': sentnum, 'Mutation': mut } writer.writerow(nrow)