def batch_insert(self, filename, table, preview=False): ''' Batch insert column values from filename into table. If *preview* is True, print the sql statement that will be used for inserting and the table of values from *filename* *filename* should be a TSV file with a column header line with column names corresponding to the column names in the target database table. For example, suppose we have a file ``insert.tsv``: id name age 1 John 20 2 Mary 24 We'd like to insert this table of values into the "people" table of our company database. To preview the insertion: >>> from datastore import sqlite >>> ds = sqlite('company.db') >>> ds.batch_insert('insert.tsv', table="people", preview=True) INSERT INTO people (id, name, age) VALUES (:id, :name, :age); id name age 1 John 20 2 Mary 24 ''' data = Reader(filename) self.valid_table(table) for col in data.columns: self.valid_column(col, table) insert_st = data.insert_sql(table) if preview: print insert_st print data else: try: self.cur.executemany(insert_st, data.rows) except sqlite3.IntegrityError as err: print insert_st print err for row in data.rows: print row self.insert(table, **row) ## try inserting row by row ## to find problematic row raise SystemExit
def batch_update(self, filename, table, pk='id', preview=False): ''' Batch update column values from filename into table. If *preview* is True, print the sql statement that will be used for updating and the table of values from *filename* *filename* should be a TSV file with a header line having column names corresponding to the column names in the target database table. The *pk* argument should reference the primary key to use for each update in the WHERE clause ("WHERE id = :id"). For example, consider the following update file: For example, suppose we have a file ``update.tsv``: id name age 1 John 21 2 Mary 25 We'd like to update the existing values in the "people" table of our company database with these new values. To preview the update: >>> from datastore import sqlite >>> ds = sqlite('company.db') >>> ds.batch_update('update.tsv', table="people", preview=True) UPDATE people SET name = :name, age = :age WHERE id = :id; id name age 1 John 21 2 Mary 25 ''' data = Reader(filename) self.valid_table(table) for col in data.columns: self.valid_column(col, table) update_st = data.update_sql(table) if preview: print update_st print data else: self.cur.executemany(update_st, data.rows)
#!/usr/bin/env python ''' cut.py -- cut specified columns from a TSV-formatted file. Assumes the file contains a header line containing column names, of which the columns to be cut are subset. Modify the columns variable below. ''' import sys from datastore.table import Reader transcripts = sys.argv[1:] columns = '''id subject session row time line key p_utts_orig p_utts p_form p_lrb p_obj p_gloss p_orient p_mspd c_utts_orig c_utts c_form c_lrb c_obj c_gloss c_orient c_mspd context'''.split() def pprint(values): print "\t".join(values) pprint(columns) for t in transcripts: T = Reader(t) for row in T.values(*columns): pprint(row)
from datastore.table import Reader from collections import defaultdict ses = Reader('ses.xls') subjects = defaultdict(dict) ses_cols = 'SUBJ SEX EDU INC RACE ETHN'.split() for row in ses: subjects[row['SUBJ']] = row outcomes = Reader('outcomes.tsv') out = defaultdict(dict) columns = 'SUBJ SESS VOCAB READ_WJ READ_GM'.split() for subj, sess, voc, rwj, rgm in outcomes.values(*columns): if not out.has_key(subj): out[subj] = { 'SUBJ': subj, 'VOCB1': '', 'VOCB2': '', 'VOCB3': '', 'VOCB4': '', 'READ1': '', 'READ2': '', 'READ3': '', 'READ4': '', 'READ5': '' } if sess == "5" and voc:
from datastore.table import Reader from collections import defaultdict sess_map = dict([(1,14), (2,18), (3,22), (4,26), (5,30), (6,34), (7,38), (8,42), (9,46), (10,50), (11,54), (12,58), (13,62), (14,66), (15,70), (16,74), (17,78), (18,82), (19,86), (20,90)]) speech = Reader('speech.xls') ses = Reader('ses.xls') subjects = defaultdict(dict) ses_cols = 'SUBJ SEX EDU INC RACE ETHN'.split() for row in ses: subjects[row['SUBJ']] = row visits = defaultdict(dict) columns = 'subject session speaker word_types'.split() for subj, sess, spkr, wt in speech.values(*columns): age = sess_map[int(sess)] if not visits.has_key((subj, sess)): visits[subj, sess] = {'SUBJ': subj, 'SESS': sess, 'AGE': age, 'CWT': '', 'PWT': ''} if spkr == "child": visits[subj, sess]['CWT'] = wt else: visits[subj, sess]['PWT'] = wt
def test_batch_update(): '''Testing batch_update_sql method''' r = Reader(tsv_dir + 'init.tsv') assert r.batch_update_sql(table='people') == batch_update_string q = Reader(tsv_dir + 'quotes.tsv') assert q.batch_update_sql(table='utterances') == batch_update_qstring
def test_batch_insert(): '''Testing batch_insert_sql method''' assert r.batch_insert_sql(table='people') == batch_insert_string q = Reader(tsv_dir + 'quotes.tsv') assert q.batch_insert_sql(table='utterances') == batch_insert_qstring
from datastore.table import Reader r = Reader('ses.tsv') def pprint(args): print "\t".join(args) pprint('SUBJ SEX EDU INC RACE ETHN'.split()) for v in r.values('id', 'sex', 'edu', 'income', 'race', 'ethn'): pprint(v)