Exemple #1
0
    def batch_insert(self, filename, table, preview=False):
        '''
        Batch insert column values from filename into table.

        If *preview* is True, print the sql statement that will be used 
        for inserting and the table of values from *filename*
        
        *filename* should be a TSV file with a column header line with 
        column names corresponding to the column names in the target 
        database table.

        For example, suppose we have a file ``insert.tsv``:

            id  name  age
            1   John  20
            2   Mary  24

        We'd like to insert this table of values into the "people" table of 
        our company database.  To preview the insertion:

            >>> from datastore import sqlite
            >>> ds = sqlite('company.db') 
            >>> ds.batch_insert('insert.tsv', table="people", preview=True)
            INSERT INTO people (id, name, age) VALUES (:id, :name, :age);
            id  name    age
            1   John    20
            2   Mary    24
        
        '''
        data = Reader(filename)
        self.valid_table(table)
        for col in data.columns: 
            self.valid_column(col, table)
        insert_st = data.insert_sql(table)
        if preview:
            print insert_st
            print data
        else:
            try:
                self.cur.executemany(insert_st, data.rows)
            except sqlite3.IntegrityError as err:
                print insert_st
                print err
                for row in data.rows:
                    print row
                    self.insert(table, **row) ## try inserting row by row
                                              ##  to find problematic row
                raise SystemExit
Exemple #2
0
    def batch_update(self, filename, table, pk='id', preview=False):
        '''
        Batch update column values from filename into table.

        If *preview* is True, print the sql statement that will be used 
        for updating and the table of values from *filename*
        
        *filename* should be a TSV file with a header line having 
        column names corresponding to the column names in the target 
        database table.

        The *pk* argument should reference the primary key to use for each
        update in the WHERE clause ("WHERE id = :id"). For example, consider 
        the following update file:

        For example, suppose we have a file ``update.tsv``:

            id  name  age
            1   John  21
            2   Mary  25
        
        We'd like to update the existing values in the "people" table of 
        our company database with these new values.  To preview the update:

            >>> from datastore import sqlite
            >>> ds = sqlite('company.db') 
            >>> ds.batch_update('update.tsv', table="people", preview=True)
            UPDATE people SET name = :name, age = :age WHERE id = :id;
            id  name    age
            1   John    21
            2   Mary    25

        '''
        data = Reader(filename)
        self.valid_table(table)
        for col in data.columns: 
            self.valid_column(col, table)
        update_st = data.update_sql(table)
        if preview:
            print update_st
            print data
        else:
            self.cur.executemany(update_st, data.rows)
Exemple #3
0
#!/usr/bin/env python 

'''
cut.py -- cut specified columns from a TSV-formatted file.

Assumes the file contains a header line containing column names, 
of which the columns to be cut are subset.

Modify the columns variable below.

'''
import sys
from datastore.table import Reader

transcripts = sys.argv[1:]
columns = '''id subject session row time line key 
             p_utts_orig p_utts p_form p_lrb p_obj p_gloss p_orient p_mspd 
             c_utts_orig c_utts c_form c_lrb c_obj c_gloss c_orient c_mspd 
             context'''.split()

def pprint(values): print "\t".join(values)

pprint(columns)

for t in transcripts:
    T = Reader(t)
    for row in T.values(*columns): 
        pprint(row)
Exemple #4
0
from datastore.table import Reader
from collections import defaultdict


ses = Reader('ses.xls')
subjects = defaultdict(dict)
ses_cols = 'SUBJ SEX EDU INC RACE ETHN'.split()

for row in ses:
    subjects[row['SUBJ']] = row


outcomes = Reader('outcomes.tsv')
out = defaultdict(dict)
columns = 'SUBJ SESS VOCAB READ_WJ READ_GM'.split()

for subj, sess, voc, rwj, rgm in outcomes.values(*columns):
    if not out.has_key(subj):
        out[subj] = {
                'SUBJ': subj,
                'VOCB1': '',
                'VOCB2': '', 
                'VOCB3': '', 
                'VOCB4': '', 
                'READ1': '', 
                'READ2': '', 
                'READ3': '', 
                'READ4': '', 
                'READ5': ''
                }
    if sess == "5" and voc:
Exemple #5
0
from datastore.table import Reader
from collections import defaultdict


sess_map = dict([(1,14), (2,18), (3,22), (4,26), (5,30), (6,34), 
                 (7,38), (8,42), (9,46), (10,50), (11,54), (12,58), 
                 (13,62), (14,66), (15,70), (16,74), (17,78), (18,82), 
                 (19,86), (20,90)])

speech = Reader('speech.xls')
ses = Reader('ses.xls')

subjects = defaultdict(dict)
ses_cols = 'SUBJ SEX EDU INC RACE ETHN'.split()

for row in ses:
    subjects[row['SUBJ']] = row


visits = defaultdict(dict)
columns = 'subject session speaker word_types'.split()

for subj, sess, spkr, wt in speech.values(*columns):
    age = sess_map[int(sess)]
    if not visits.has_key((subj, sess)):
        visits[subj, sess] = {'SUBJ': subj, 'SESS': sess, 'AGE': age,
                              'CWT': '', 'PWT': ''}
    if spkr == "child":
        visits[subj, sess]['CWT'] = wt
    else:
        visits[subj, sess]['PWT'] = wt
Exemple #6
0
def test_batch_update():
    '''Testing batch_update_sql method'''
    r = Reader(tsv_dir + 'init.tsv')
    assert r.batch_update_sql(table='people') == batch_update_string
    q = Reader(tsv_dir + 'quotes.tsv')
    assert q.batch_update_sql(table='utterances') == batch_update_qstring
Exemple #7
0
def test_batch_insert():
    '''Testing batch_insert_sql method'''
    assert r.batch_insert_sql(table='people') == batch_insert_string
    q = Reader(tsv_dir + 'quotes.tsv')
    assert q.batch_insert_sql(table='utterances') == batch_insert_qstring
Exemple #8
0
from datastore.table import Reader

r = Reader('ses.tsv')

def pprint(args):
    print "\t".join(args)

pprint('SUBJ SEX EDU INC RACE ETHN'.split())
for v in r.values('id', 'sex', 'edu', 'income', 'race', 'ethn'):
    pprint(v)