def select(self, table, columns='*', where=None, sql='', toDict=True, toArray=False, distinct=False, limit=None, offset=None): """ Construct and execute a SELECT statement, returning the results. ============== ================================================================ **Arguments:** table The name of the table from which to read data columns (list or str) List of column names to read from table. The default is '*', which reads all columns If *columns* is given as a string, it is inserted verbatim into the SQL command. If *columns* is given as a list, it is converted to a string of comma-separated, quoted names. where Optional dict of {column: value} pairs. only results where column=value will be returned distinct (bool) If true, omit all redundant results limit (int) Limit the number of results that may be returned (best used with offset argument) offset (int) Omit a certain number of results from the beginning of the list sql Optional string to be appended to the SQL query (will be inserted before limit/offset arguments) toDict If True, return a list-of-dicts (this is the default) toArray if True, return a numpy record array ============== ================================================================ """ p = debug.Profiler("SqliteDatabase.select", disabled=True) if columns != '*': #if isinstance(columns, basestring): #columns = columns.split(',') if not isinstance(columns, basestring): qf = [] for f in columns: if f == '*': qf.append(f) else: qf.append('"' + f + '"') columns = ','.join(qf) #columns = quoteList(columns) whereStr = self._buildWhereClause(where, table) distinct = "distinct" if (distinct is True) else "" limit = ("limit %d" % limit) if (limit is not None) else "" offset = ("offset %d" % offset) if (offset is not None) else "" cmd = "SELECT %s %s FROM %s %s %s %s %s" % ( distinct, columns, table, whereStr, sql, limit, offset) p.mark("generated command") q = self.exe(cmd, toDict=toDict, toArray=toArray) p.finish() return q
def storeDBScan(self, scan, storeEvents=True): """Store all data for a scan, using cached values if possible""" p = debug.Profiler("Photostim.storeDBScan", disabled=True) if storeEvents: self.clearDBScan(scan) with pg.BusyCursor(): #dh = scan.source() #print "Store scan:", scan.source().name() events = [] stats = [] spots = scan.spots() with pg.ProgressDialog("Preparing data for %s" % scan.name(), 0, len(spots) + 1) as dlg: ## collect events and stats from all spots in the scan for i in xrange(len(spots)): s = spots[i] fh = self.dataModel.getClampFile(s.data()) try: ev = scan.getEvents(fh)['events'] events.append(ev) except: print fh, scan.getEvents(fh) raise st = scan.getStats(s.data()) stats.append(st) dlg.setValue(i) if dlg.wasCanceled(): raise HelpfulException("Scan store canceled by user.", msgType='status') p.mark("Prepared data") dbui = self.getElement('Database') db = dbui.getDb() with db.transaction(): ## Store all events for this scan if storeEvents: events = [x for x in events if len(x) > 0] if len(events) > 0: ev = np.concatenate(events) p.mark("concatenate events") self.detector.storeToDB(ev) p.mark("stored all events") ## Store spot data self.storeStats(stats) p.mark("stored all stats") p.finish()
def _readRecord(self, rec): prof = debug.Profiler("_readRecord", disabled=True) data = collections.OrderedDict() names = rec.keys() for i in range(len(rec)): val = rec[i] name = names[i] ## Unpickle byte arrays into their original objects. ## (Hopefully they were stored as pickled data in the first place!) if isinstance(val, buffer): val = pickle.loads(str(val)) data[name] = val prof.finish() return data
def _queryToArray(self, q): prof = debug.Profiler("_queryToArray", disabled=True) recs = self._queryToDict(q) prof.mark("got records") if len(recs) < 1: #return np.array([]) ## need to return empty array *with correct columns*, but this is very difficult, so just return None return None rec1 = recs[0] dtype = functions.suggestRecordDType(rec1, singleRecord=True) #print rec1, dtype arr = np.empty(len(recs), dtype=dtype) arr[0] = tuple(rec1.values()) for i in xrange(1, len(recs)): arr[i] = tuple(recs[i].values()) prof.mark('converted to array') prof.finish() return arr
def select(self, table, columns='*', where=None, sql='', toDict=True, toArray=False, distinct=False, limit=None, offset=None): """Extends select to convert directory/file columns back into Dir/FileHandles. If the file doesn't exist, you will still get a handle, but it may not be the correct type.""" prof = debug.Profiler("AnalysisDatabase.select()", disabled=True) data = SqliteDatabase.select(self, table, columns, where=where, sql=sql, distinct=distinct, limit=limit, offset=offset, toDict=True, toArray=False) data = TableData(data) prof.mark("got data from SQliteDatabase") config = self.getColumnConfig(table) ## convert file/dir handles for column, conf in config.iteritems(): if column not in data.columnNames(): continue if conf.get('Type', '').startswith('directory'): rids = set([d[column] for d in data]) linkTable = conf['Link'] handles = dict([(rid, self.getDir(linkTable, rid)) for rid in rids if rid is not None]) handles[None] = None data[column] = map(handles.get, data[column]) elif conf.get('Type', None) == 'file': def getHandle(name): if name is None: return None else: if os.sep == '/': sep = '\\' else: sep = '/' name = name.replace(sep, os.sep) ## make sure file handles have an operating-system-appropriate separator (/ for Unix, \ for Windows) return self.baseDir()[name] data[column] = map(getHandle, data[column]) prof.mark("converted file/dir handles") ret = data.originalData() if toArray: ret = data.toArray() prof.mark("converted data to array") prof.finish() return ret
def exe(self, cmd, data=None, batch=False, toDict=True, toArray=False): """Execute an SQL query. If data is provided, it should be a list of dicts and each will be bound to the query and executed sequentially. Returns the query object. Arguments: cmd - The SQL query to execute data - List of dicts, one per record to be processed For each record, data is bound to the query by key name {"key1": "value1"} => ":key1"="value1" batch - If True, then all input data is processed in a single execution. In this case, data must be provided as a dict-of-lists or record array. toDict - If True, return a list-of-dicts representation of the query results toArray - If True, return a record array representation of the query results """ p = debug.Profiler('SqliteDatabase.exe', disabled=True) p.mark('Command: %s' % cmd) if data is None: cur = self.db.execute(cmd) p.mark("Executed with no data") else: data = TableData(data) res = [] if batch: cur = self.db.executemany(cmd, data.__iter__()) else: for d in data: p.mark("bound values for record") self.db.execute(cmd, d) p.mark("executed with data") if cmd is not None: if str(cmd)[:6].lower() == 'create': self.tables = None ## clear table cache if toArray: ret = self._queryToArray(cur) elif toDict: ret = self._queryToDict(cur) else: ret = cur p.finish() return ret
def storeToDB(self, data=None): p = debug.Profiler("EventDetector.storeToDB", disabled=True) if data is None: data = self.flowchart.output()['events'] dbui = self.getElement('Database') table = dbui.getTableName(self.dbIdentity) db = dbui.getDb() if db is None: raise Exception("No DB selected") p.mark("DB prep done") if len(data) == 0: ## if there is no event data, then we need to delete previous event data dh = self.currentFile.name(relativeTo=db.baseDir()) if dh[-10:] == '/Clamp1.ma' or dh[-10:] == '/Clamp2.ma': dh = dh[:-10] protocolID = db( 'Select rowid, Dir from DirTable_Protocol where Dir="%s"' % dh) if len(protocolID) > 0: protocolID = protocolID[0]['rowid'] else: return db('Delete from %s where ProtocolDir=%i' % (table, protocolID)) return ## determine the set of fields we expect to find in the table columns = db.describeData(data) columns.update({ 'ProtocolSequenceDir': 'directory:ProtocolSequence', 'ProtocolDir': 'directory:Protocol', }) p.mark("field list done") with db.transaction(): ## Make sure target table exists and has correct columns, links to input file db.checkTable(table, owner=self.dbIdentity, columns=columns, create=True, addUnknownColumns=True, indexes=[['SourceFile'], ['ProtocolSequenceDir']]) p.mark("data prepared") ## collect all protocol/Sequence dirs prots = {} seqs = {} for fh in set(data['SourceFile']): prots[fh] = fh.parent() seqs[fh] = self.dataModel.getParent(fh, 'ProtocolSequence') ## delete all records from table for current input files for fh in set(data['SourceFile']): db.delete(table, where={'SourceFile': fh}) p.mark("previous records deleted") ## assemble final list of records records = {} for col in data.dtype.names: records[col] = data[col] records['ProtocolSequenceDir'] = map(seqs.get, data['SourceFile']) records['ProtocolDir'] = map(prots.get, data['SourceFile']) p.mark("record list assembled") ## insert all data to DB with pg.ProgressDialog("Storing events...", 0, 100) as dlg: for n, nmax in db.iterInsert(table, records, chunkSize=50): dlg.setMaximum(nmax) dlg.setValue(n) if dlg.wasCanceled(): raise HelpfulException("Scan store canceled by user.", msgType='status') p.mark("records inserted") p.finish()
def _queryToDict(self, q): prof = debug.Profiler("_queryToDict", disabled=True) res = [] for rec in q: res.append(self._readRecord(rec)) return res
def iterInsert(self, table, records=None, replaceOnConflict=False, ignoreExtraColumns=False, chunkSize=500, chunkAll=False, **args): """ Iteratively insert chunks of data into a table while yielding a tuple (n, max) indicating progress. This *must* be used inside a for loop:: for n,nmax in db.iterInsert(table, data): print "Insert %d%% complete" % (100. * n / nmax) Use the chunkSize argument to determine how many records are inserted per iteration. See insert() for a description of all other options. """ p = debug.Profiler("SqliteDatabase.insert", disabled=True) if records is None: records = [args] #if type(records) is not list: #records = [records] if len(records) == 0: return ret = [] with self.transaction(): ## Rememember that _prepareData may change the number of columns! records = TableData( self._prepareData(table, records, ignoreUnknownColumns=ignoreExtraColumns, batch=True)) p.mark("prepared data") columns = records.keys() insert = "INSERT" if replaceOnConflict: insert += " OR REPLACE" #print "Insert:", columns cmd = "%s INTO %s (%s) VALUES (%s)" % ( insert, table, quoteList(columns), ','.join( [':' + f for f in columns])) numRecs = len(records) if chunkAll: ## insert all records in one go. self.exe(cmd, records, batch=True) yield (numRecs, numRecs) return chunkSize = int(chunkSize) ## just make sure offset = 0 i = 0 while offset < len(records): #print len(columns), len(records[0]), len(self.tableSchema(table)) chunk = records[offset:offset + chunkSize] self.exe(cmd, chunk, batch=True) offset += len(chunk) yield (offset, numRecs) p.mark("Transaction done") p.finish()