Esempio n. 1
0
def process_many_mutation(ifiles, ofiles):
    """Process Sentence files in Batch format.
     This function is useful for when you need to procecess lots of files in 
    one large batch.
    """
    
    finder = mutfinder_gen('regex.txt')

    for ifile, ofile in zip(ifiles, ofiles):
        print ifile
        with open(ifile) as handle:
            reader = csv.DictReader(handle, delimiter = '\t', fieldnames = ('ParNum', 'Text'))
            rows = [x for x in reader]
            
        ofields = ('ParNum', 'SentNum', 'Mutation', 'Text')
        with open(ofile, 'w') as handle:
            writer = csv.DictWriter(handle, ofields, delimiter = '\t')
            writer.writerow(dict(zip(ofields, ofields)))
            for row in rows:
                if row['Text']:
                    sent_list = ['']+list(sent_tokenize(row['Text'].replace('\n', '')))+['']

                    for sentnum, sent in enumerate(sent_list):
                        for mut, _ in finder(sent).items():
                            text = ' '.join(sent_list[sentnum-1:sentnum+1])
                            nrow = {'Text': text,
                                    'ParNum': row['ParNum'],
                                    'SentNum': sentnum,
                                    'Mutation': mut}
                            writer.writerow(nrow)
Esempio n. 2
0
def process_many_mutation(ifiles, ofiles):
    """Process Sentence files in Batch format.
     This function is useful for when you need to procecess lots of files in 
    one large batch.
    """

    finder = mutfinder_gen('regex.txt')

    for ifile, ofile in zip(ifiles, ofiles):
        print ifile
        with open(ifile) as handle:
            reader = csv.DictReader(handle,
                                    delimiter='\t',
                                    fieldnames=('ParNum', 'Text'))
            rows = [x for x in reader]

        ofields = ('ParNum', 'SentNum', 'Mutation', 'Text')
        with open(ofile, 'w') as handle:
            writer = csv.DictWriter(handle, ofields, delimiter='\t')
            writer.writerow(dict(zip(ofields, ofields)))
            for row in rows:
                if row['Text']:
                    sent_list = [''] + list(
                        sent_tokenize(row['Text'].replace('\n', ''))) + ['']

                    for sentnum, sent in enumerate(sent_list):
                        for mut, _ in finder(sent).items():
                            text = ' '.join(sent_list[sentnum - 1:sentnum + 1])
                            nrow = {
                                'Text': text,
                                'ParNum': row['ParNum'],
                                'SentNum': sentnum,
                                'Mutation': mut
                            }
                            writer.writerow(nrow)
Esempio n. 3
0
def process_mutation(ifile, ofile, finder = None):
    
    with open(ifile) as handle:
        reader = csv.DictReader(handle, delimiter = '\t', fieldnames = ('ParNum', 'Text'))
        rows = [x for x in reader]

    if finder is None:
        finder = mutfinder_gen('regex.txt')

    ofields = ('ParNum', 'SentNum', 'Mutation', 'Text')
    with open(ofile, 'w') as handle:
        writer = csv.DictWriter(handle, ofields, delimiter = '\t')
        writer.writerow(dict(zip(ofields, ofields)))
        for row in rows:
            if row['Text']:
                sent_list = ['']+list(sent_tokenize(row['Text'].replace('\n', '')))+['']

                for sentnum, sent in enumerate(sent_list):
                    for mut, _ in finder(sent).items():
                        text = ' '.join(sent_list[sentnum-1:sentnum+1])
                        nrow = {'Text': text,
                                'ParNum': row['ParNum'],
                                'SentNum': sentnum,
                                'Mutation': mut}
                        writer.writerow(nrow)
Esempio n. 4
0
def process_mutation(ifile, ofile, finder=None):

    with open(ifile) as handle:
        reader = csv.DictReader(handle,
                                delimiter='\t',
                                fieldnames=('ParNum', 'Text'))
        rows = [x for x in reader]

    if finder is None:
        finder = mutfinder_gen('regex.txt')

    ofields = ('ParNum', 'SentNum', 'Mutation', 'Text')
    with open(ofile, 'w') as handle:
        writer = csv.DictWriter(handle, ofields, delimiter='\t')
        writer.writerow(dict(zip(ofields, ofields)))
        for row in rows:
            if row['Text']:
                sent_list = [''] + list(
                    sent_tokenize(row['Text'].replace('\n', ''))) + ['']

                for sentnum, sent in enumerate(sent_list):
                    for mut, _ in finder(sent).items():
                        text = ' '.join(sent_list[sentnum - 1:sentnum + 1])
                        nrow = {
                            'Text': text,
                            'ParNum': row['ParNum'],
                            'SentNum': sentnum,
                            'Mutation': mut
                        }
                        writer.writerow(nrow)