def update(self, table, primary_keys, field_values): ss = [] upd = """UPDATE %s.%s\nSET """ % (self.db, table) for f in field_values: v = field_values[f] if type(v) is str or type(v) is unicode or v is None: upd += "%s = " % f upd += "%s,\n" ss.append(v) else: upd += """%s = %s,\n""" % (f, v) upd = upd[:-2] #seek back and eat the last ,\n where = """\nWHERE """ #build this as k1 = v1 and k2 = v2 for k in primary_keys: v = primary_keys[k] if type(v) is str or type(v) is unicode or v is None: where += "%s = " % k where += "%s and " ss.append(v) else: where += """%s = %s and """ % (k, primary_keys[k]) where = where[:-4] + ";" upd += where with mysql.MYSQL(self.srv, self.db, self.uid, self.pwd) as ms: ms.query(upd, ss, False) return True #success? return False #no success!
def get_stage_ids_names(self): sql = """SELECT stage_id,name FROM %s.stages;""" % (self.db) v, res = (), {} with mysql.MYSQL(self.srv, self.db, self.uid, self.pwd) as ms: res = ms.query(sql, v, True) return res return None
def select_fields_row(self, table, primary_keys, fields): ss, data = [], [] select = 'SELECT ' for f in fields: select += f + ', ' select = select[:-2] #seek back one ', ' select += ' FROM %s.%s' % (self.db, table) where = """\nWHERE """ #build this as k1 = v1 and k2 = v2 for k in primary_keys: v = primary_keys[k] if type(v) is str or type(v) is unicode or v is None: where += "%s = " % k where += "%s and " ss.append(v) else: where += """%s = %s and """ % (k, primary_keys[k]) where = where[:-4] + ";" select += where with mysql.MYSQL(self.srv, self.db, self.uid, self.pwd) as ms: res = ms.query(select, ss, True) for i in range(0, len(res)): #this is the number of rows row = res[i] #this is a dict return type for f in row: #this is the number of fields in each row f is now a field key for the dict if type(row[f]) is bytearray: #is this only (long)blobs ? row[f] = self.blob_to_obj(row[f]) else: row[f] = row[f] data.append(row) #insert uncompressed objects into data if len(data) >= 1: return data else: raise IndexError
def get_ref_info(self, ref_name): data = [] sql = """ SELECT * FROM %s.refs WHERE name like '%s';""" % (self.db, ref_name) with mysql.MYSQL(self.srv, self.db, self.uid, self.pwd) as ms: res = ms.query(sql, (), True) for r in res: data.append(r) if len(data) == 1: return data[0] else: raise IndexError
def get_max_key(self, table): if self.schema.has_key(table): pk = self.schema[table]['pk'][0] #assume a single key here sql = """SELECT max(%s) AS %s FROM %s.%s;""" % (pk, pk, self.db, table) v, res = (), {} with mysql.MYSQL(self.srv, self.db, self.uid, self.pwd) as ms: res = ms.query(sql, v, True) return max(0, res[0][pk]) #assume one max of course return None
def get_run_info(self, run_id): data = [] run_info_sql = """ SELECT ref_id,mut_mag,mut_len,mut_type FROM %s.runs WHERE run_id = %s""" % (self.db, run_id) with mysql.MYSQL(self.srv, self.db, self.uid, self.pwd) as ms: res = ms.query(run_info_sql, (), True) for r in res: data.append(r) if len(data) == 1: return data[0] else: raise IndexError
def select_tables(self): tbls = {'names': []} tables = """ SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '%s';""" with mysql.MYSQL(self.srv, self.db, self.uid, self.pwd) as ms: sql, v = tables % (self.db), () res = ms.query(sql, v, True) for r in res: tbls['names'] += [str(r[u'TABLE_NAME'])] return tbls
def get_ref_name(self, run_id): data = [] ref_name_sql = """ SELECT name FROM %s.refs WHERE ref_id IN ( SELECT ref_id FROM %s.runs WHERE run_id = %s );""" % ( self.db, self.db, run_id) with mysql.MYSQL(self.srv, self.db, self.uid, self.pwd) as ms: res = ms.query(ref_name_sql, (), True) for r in res: data.append(r) if len(data) == 1: return data[0]['name'] else: raise IndexError
def get_run_vc(self, run_id): data = [] sql = """ SELECT SR.stage_id, SR.run_id, S.name, SR.results, SR.done FROM %s.staged_runs as SR JOIN %s.stages as S ON (SR.stage_id = S.stage_id) WHERE SR.run_id = %s and S.out_ext like '%.vcf%';""" % ( self.db, self.db, run_id) with mysql.MYSQL(self.srv, self.db, self.uid, self.pwd) as ms: res = ms.query(sql, (), True) for r in res: data.append(r) return data
def select_all(self, table): data = [] #final uncompressed dictionary select, v = 'SELECT * FROM %s.%s;' % (self.db, table), () with mysql.MYSQL(self.srv, self.db, self.uid, self.pwd) as ms: res = ms.query(select, v, True) for i in range(0, len(res)): #this is the number of rows row = res[i] #this is a dict return type for f in row: #this is the number of fields in each row f is now a field key for the dict if type(row[f]) is bytearray: #is this only (long)blobs ? row[f] = self.blob_to_obj(row[f]) else: row[f] = row[f] #maybe also do a check to convert u'' to '' via type(row[f]) is unicode data.append(row) #insert uncompressed objects into data return data
def get_run_true_vc(self, run_id): data = [] run_info_sql = """ SELECT mut_true_vc FROM %s.runs WHERE run_id = %s""" % (self.db, run_id) with mysql.MYSQL(self.srv, self.db, self.uid, self.pwd) as ms: res = ms.query(run_info_sql, (), True) for r in res: data.append(r) obj = None try: obj = self.blob_to_obj(data[0]['mut_true_vc'], status=True) except Exception: obj = data return obj
def get_stage_id(self, wrapper): ss, data, table = [], [], 'stages' select = 'SELECT stage_id FROM %s.%s' % (self.db, table) where = """\nWHERE wrapper = "%s";""" % wrapper #build this as k1 = v1 and k2 = v select += where with mysql.MYSQL(self.srv, self.db, self.uid, self.pwd) as ms: res = ms.query(select, ss, True) for i in range(0, len(res)): #this is the number of rows row = res[i] #this is a dict return type for f in row: #this is the number of fields in each row f is now a field key for the dict if type(row[f]) is bytearray: #is this only (long)blobs ? row[f] = self.blob_to_obj(row[f]) else: row[f] = row[f] data.append(row) #insert uncompressed objects into data if len(data) == 1: return data[0]['stage_id'] else: raise IndexError
def select_fields(self, table): flds, pk = {}, [] fields = """ SELECT COLUMN_NAME, COLUMN_TYPE, ORDINAL_POSITION, COLUMN_KEY FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME = '%s';""" with mysql.MYSQL(self.srv, self.db, self.uid, self.pwd) as ms: sql, v = fields % (self.db, table), () res = ms.query(sql, v, True) for r in res: flds[str(r[u'COLUMN_NAME'])] = { 'pos': r[u'ORDINAL_POSITION'], 'type': str(r[u'COLUMN_TYPE']) } if r[u'COLUMN_KEY'] == u'PRI': pk += [str(r[u'COLUMN_NAME'])] flds['pk'] = pk return flds
def get_staged_run_vc(self, run_id, stage_id): data = [] #don't worry about errors for now staged_run_info_sql = """ SELECT params,results FROM %s.staged_runs WHERE run_id = %s AND stage_id = %s; """ % (self.db, run_id, stage_id) with mysql.MYSQL(self.srv, self.db, self.uid, self.pwd) as ms: res = ms.query(staged_run_info_sql, (), True) for r in res: data.append(r) if len(data) == 1: return [ self.blob_to_obj(data[0], status=True), self.blob_to_obj(data[1], status=True) ] else: raise IndexError
def insert(self, table, field_values): ss = [] #use tuple for mysql.connector compatibilty ins = """INSERT INTO %s.%s (""" % (self.db, table) for f in field_values: ins += f + ", " ins = ins[:-2] + ") VALUES(" for f in field_values: v = field_values[f] if type(v) is str or type(v) is unicode or v is None: ins += "%s," ss.append(v) else: ins += str(v) + "," ins = ins[:-1] + ");" ss = tuple(ss) with mysql.MYSQL(self.srv, self.db, self.uid, self.pwd) as ms: ms.query(ins, ss, False) return True #success? return False #no success!
import time import csv # read cfg for host and database to connect to with open("../db.cfg") as f: host = f.readline().rstrip('\n') database = f.readline().rstrip('\n') # read cfg for credentials (username and password to DB) # TODO: decrypt using a key found in another file with open("../cred.cfg") as f: usr = f.readline().rstrip('\n') pwd = f.readline().rstrip('\n') # connect db = con.MYSQL(host, database, usr, pwd) # time query start = time.perf_counter() # variable to be injected id = [] # query that injects ID into %s query_base = """insert into inventory (CameraModel, SerialNumber, CameraNumber, DateReceived, LastLocation, StillInUse, Comments) values """ query = '' with open('../Data/InventoryDataClean.csv', mode='r') as infile: reader = csv.reader(infile)
"CompressedBitsPerPixel", "ShutterSpeedValue", "MaxApertureValue"] # files that may be moved to out dir or quarantine dir # depends on if they are successful in insert files_pending = [] # files to be moved to d_dir deletes = [] # files to be moved to quarantine quarantines = [] # performance timing start = perf_counter() # epoch timing for log l_start = time.time() # connect and query with con.MYSQL(host, database, port, usr, pwd) as db: # for every row of exif for row in values: # row of exif data, this is what will be inserted # made of converted values fRow = [] try: # for every exif tag that is in the returned meta dict for c in cols: # do conversions if row.get(c) is None or row[c].strip(' ') == '': row[c] = None elif c in timestamps: d = datetime.strptime(row[c], "%Y:%m:%d %H:%M:%S") ts = int(time.mktime(d.timetuple())) row[c] = ts
print('processing file=%s' % file) uploadDate = datetime.today().strftime('%m%d%Y_%H%M%S_') fpath_base = file.rsplit('\\', 3)[0] fpath_in = file fpath_err = fpath_base + '\\ErrRpts\\' + uploadDate + file.rsplit( '\\')[-1] + 'QcRpt.txt' fpath_out = fpath_base + '\\UploadedRpts\\' + uploadDate + file.rsplit( '\\')[-1] delim = '\t' raw = readXlsx(fpath_in, db_err) header = raw[0] # could use to check header names in the excel file raw = raw[1:] os.rename(fpath_in, fpath_out) if raw is not None and header == headerList: with msc.MYSQL('localhost', db_scm, 3306, config_uid, config_pw) as dbo: insDate = datetime.today().strftime('%Y-%m-%d %H:%M:%S') # Insert into the database line by line. Append DB error if not caught by qc checks. for i in range(len(raw)): # Any blank values change to None type for j in range(len(raw[i])): if len(str(raw[i][j])) < 1: raw[i][j] = None else: raw[i][j] = raw[i][j] # generate auto populated fields for fish char_name = 'Count' result_cd = 'count' result_tp = 'Actual' bio_int = 'Frequency Class'
def new(self): drop = "DROP TABLE IF EXISTS %s;" create = """ CREATE TABLE %s (%s) ROW_FORMAT=DYNAMIC; """ #new connection is safe and closes automatically with mysql.MYSQL(self.srv, self.db, self.uid, self.pwd) as ms: #DB workflow is to get the max id for each table and increment if a unique key #is obtainible from the checks instead of a new table for each simulator and aligner #Generated and Real References #ref_len is the total bp such as human ~3E7bp #seq is the fa.gz compressed string ms.query(drop % 'refs') fields = """ ref_id int primary key not null, name varchar(255) not null, ref_len bigint, seq_names text, seq_lens text, seqs longblob, url text """ ms.query(create % ('refs', fields)) print('Created sve.refs') #Runs----------------------------- #a run is a set of simulations on parameters from a given ref-seq #the ref-seq is generated first and then a run is generated which is #a unique set of parameters (that induces randomized edits) #variant caller stages can be re-run to optimize parameters for a given platform and alignment ms.query(drop % 'runs') fields = """ run_id int primary key not null, platform_id varchar(50) not null, node_id varchar(50) not null, ref_id int not null, calibrating bit(1) not null, mut_mag int, mut_len bigint, mut_type char(3), mut_true_vc longblob, mut_ens_vc longblob, stage_id_list blob, stage_depth int, curr_stage_id int, debug bit(1), start datetime, stop datetime """ ms.query(create % ('runs', fields)) print('Created sve.runs') #Stages #stage_id a unique identifier for a new stage (a run is a (||)sequence of stages) #name is the text for the bound name in the enc, IE 'bwa' #param_ranges is a JSON string that gives {param:{start:1,stop:10,step:1}} for fixed step or #{param:{v1,v2,...,v3}} for fixed value iterations ms.query(drop % 'stages') fields = """ stage_id int primary key not null, type varchar(25) not null, name varchar(50) not null, version varchar(50), exe_path varchar(255), wrapper varchar(50), in_ext varchar(255), out_ext varchar(255), param_map blob """ ms.query(create % ('stages', fields)) print('Created sve.stages') #Staged_Runs links the stage+parameters to the run_id #so the stage_list and progress can be observed on a large #scale with full persistance and the possibility of multiple #distpatching centers building out unique runs into the DB #results can be used to store the output of each stage 4GB max compressed size... ms.query(drop % 'staged_runs') fields = """ run_id int not null, stage_id int not null, in_files varchar(255) not null, in_files_size varchar(255), params blob, command text, results longblob, errors text, start datetime, stop datetime, done bit(1), primary key(run_id,stage_id,in_files) """ ms.query(create % ('staged_runs', fields)) print('Created sve.staged_runs')