def execQuery(self, qry): ''' Executes a SQL statement, returning the results ''' self.connect() logger.debug('Executing query: ' + qry) qry = qry.strip() if not len(qry): return [] # any error handling we should do? #print qry #x = raw_input() #if x == '': # x = qry #self.dbconn = None #self.dbconn.reset() #self.connect() #Check to see if the connection is valid if (self.dbconn.status != 1): logger.info("DB Connection bad, attempting reset") self.dbconn.reset() response = self.dbconn.query(qry) if response: return response.getresult()
def LoadByRegEx(coriedir="/home/workspace/ccalmr/forecasts/forecasts_ref/2004/", regexp='2004-004'): qlog.info("Starting load job on %s/%s" % (coriedir, regexp)) t1 = time.time() # a regexp for catching only real run dirs e = re.compile(coriedir+regexp) paths = [] for root, dirs, files in os.walk(coriedir): exists = [root+d for d in dirs if os.path.exists(root+d)] paths += [p for p in exists if e.match(p)] db = qdb.quarrydb() h = harvest.Harvester() b = bulkloader.BulkLoader(db) b.truncateFiles() sm = sigmanager.SignatureManager(db) for p in paths: h.harvestDir(p, b) b.closeFiles() b.loadharvest() sm.ReapHarvest() qlog.info("Load job finished in %s seconds." % (time.time() - t1,))
def harvestDir(self, rundir, loader=bulkloader.BulkLoader()): qlog.info("Harvesting %s" % (rundir, )) t = time.time() if not os.path.exists(rundir): print "%s does not exist." % (rundir, ) qlog.error("run %s does not exist. (broken link?)") return for root, dirs, files in os.walk(rundir): for fname in files: fullpath = "%s/%s" % (root, fname) try: ds = self.harvest(fullpath) except: (e, v, t) = sys.exc_info() print "Error processing %s/%s: %s: %s, %s" % (root, fname, e, v, t) sys.exit(1) return if ds: loader.addresource(fullpath) for d in ds: loader.adddescriptor(d) qlog.info("...harvested in %s seconds." % (time.time() - t, ))
def LoadByRegEx( coriedir="/home/workspace/ccalmr/forecasts/forecasts_ref/2004/", regexp='2004-004'): qlog.info("Starting load job on %s/%s" % (coriedir, regexp)) t1 = time.time() # a regexp for catching only real run dirs e = re.compile(coriedir + regexp) paths = [] for root, dirs, files in os.walk(coriedir): exists = [root + d for d in dirs if os.path.exists(root + d)] paths += [p for p in exists if e.match(p)] db = qdb.quarrydb() h = harvest.Harvester() b = bulkloader.BulkLoader(db) b.truncateFiles() sm = sigmanager.SignatureManager(db) for p in paths: h.harvestDir(p, b) b.closeFiles() b.loadharvest() sm.ReapHarvest() qlog.info("Load job finished in %s seconds." % (time.time() - t1, ))
def loadharvest(self, dir=config.datadir): '''Uses psql to stage harvested data in the quarry database''' qlog.info("Loading harvest from %s" % (dir,)) t = time.time() sql = "%s;\n %s;\n %s;\n %s;" % ("begin transaction", queries.load_resources(dir), queries.load_descriptors(dir), "commit") sqlfile = config.tmpdir + "/temp.sql" f = file(sqlfile, "w") f.write(sql) f.close() self.closeFiles() cmd = '''%s -h %s -f "%s" %s''' % (config.psqlpath, self.qdb.hostname, sqlfile, self.qdb.dbname) flusher = popen2.Popen3(cmd) output = flusher.fromchild # don't quit till this process finishes qlog.debug('psql response:\n' + output.read()) flusher.wait() qlog.info("...bulk loaded in %s seconds." % (time.time() - t,))
def loadharvest(self, dir=config.datadir): '''Uses psql to stage harvested data in the quarry database''' qlog.info("Loading harvest from %s" % (dir, )) t = time.time() sql = "%s;\n %s;\n %s;\n %s;" % ( "begin transaction", queries.load_resources(dir), queries.load_descriptors(dir), "commit") sqlfile = config.tmpdir + "/temp.sql" f = file(sqlfile, "w") f.write(sql) f.close() self.closeFiles() cmd = '''%s -h %s -f "%s" %s''' % (config.psqlpath, self.qdb.hostname, sqlfile, self.qdb.dbname) flusher = popen2.Popen3(cmd) output = flusher.fromchild # don't quit till this process finishes qlog.debug('psql response:\n' + output.read()) flusher.wait() qlog.info("...bulk loaded in %s seconds." % (time.time() - t, ))
def DispatchNewResources(self): qlog.debug("Dispatch New Resources") self._RefreshUniqueSignatures() for s in self.UniqueSignatures(): qlog.info("Computing extent for signature %s"%(s,)) s.ComputeExtent() # resource table is deprecated self._InsertNewResources()
def DispatchNewResources(self): qlog.debug("Dispatch New Resources") self._RefreshUniqueSignatures() for s in self.UniqueSignatures(): qlog.info("Computing extent for signature %s" % (s, )) s.ComputeExtent() # resource table is deprecated self._InsertNewResources()
def execCommand(self, qry): '''Executes a SQL statement, ignoring the results''' #self.connect() # any error handling we should do? logger.debug('Executing command: ' + qry) #Check to see if the connection is valid if (self.dbconn.status != 1): logger.info("DB Connection bad, attempting reset") self.dbconn.reset() result = self.dbconn.query(qry)
def Describe(key): qlog.info("Describe(%s)" % (key, )) for s in sm.UniqueSignatures(): q = s.ConjunctiveQuery(conditions=[('userkey', key)], wildcard=False) rs = db.execQuery(q) if len(rs) != 0: break if len(rs) == 0: raise ValueError("no resource '%s' was found" % (key, )) else: qlog.debug("columns: %s" % (s.rawcolumns(), )) return zip(s.rawcolumns(), [str(x) for x in rs[0]])
def Describe(key): qlog.info("Describe(%s)" % (key,)) for s in sm.UniqueSignatures(): q = s.ConjunctiveQuery(conditions=[('userkey', key)], wildcard=False) rs = db.execQuery(q); if len(rs) != 0: break if len(rs) == 0: raise ValueError("no resource '%s' was found" % (key,)) else: qlog.debug("columns: %s" % (s.rawcolumns(),)) return zip(s.rawcolumns(), [str(x) for x in rs[0]])
def PathValues(path, conditions, property, offset=0, limit='all', sorted=False): '''Retrieve values of property for resources that 1) satisfy conditions and 2) are avilable in the path context provided.''' raw = "PathValues(%s, %s, %s)[%s,%s]%s" % (path, conditions, property, offset, limit, sorted) qlog.info(raw) try: callstr = pattre.sub('_', raw) start_time = time.time() if iscached(callstr): tuples = getCachedResults(callstr) updateCache(callstr) results = [t[0] for t in tuples] else: # ---------------- if not path: results = ValidValues(conditions, property, offset, limit, sorted) else: newpath = path + [(conditions, property)] q = TraverseQuery(newpath) q = PagedQuery(q, offset, limit, sorted) tuples = db.execQuery(q) tuples = asList(tuples) results = [t[0] for t in tuples] # ---------------- t = time.time() - start_time if t > INTERACTIVE_SPEED and not iscached(callstr): cacheQuery(callstr, results) qlog.debug(str(results)) qlog.info("----- finished in: %f secs" % (t, )) return results except Exception, e: (et, v, t) = sys.exc_info() qlog.ExceptionMessage(et, v, t) raise
def LoadTriples(self, fname): '''Load triples from an ASCII file. Use config.py to configure delimiters''' # TODO: support RDF and N-triples format, at least t = time.time() qlog.info("Loading triples from %s" % (fname, )) cmd = '''%s -h %s -c "%s" %s''' % (config.psqlpath, self.qdb.hostname, queries.copy_triples( fname, csv=""), self.qdb.dbname) flusher = popen2.Popen3(cmd) output = flusher.fromchild # don't quit till this process finishes qlog.debug('psql response:\n' + output.read()) flusher.wait() qlog.info("...triples loaded in %s seconds." % (time.time() - t, ))
def LoadTriples(self, fname): '''Load triples from an ASCII file. Use config.py to configure delimiters''' # TODO: support RDF and N-triples format, at least t = time.time() qlog.info("Loading triples from %s" % (fname,)) cmd = '''%s -h %s -c "%s" %s''' % (config.psqlpath, self.qdb.hostname, queries.copy_triples(fname, csv=""), self.qdb.dbname) flusher = popen2.Popen3(cmd) output = flusher.fromchild # don't quit till this process finishes qlog.debug('psql response:\n' + output.read()) flusher.wait() qlog.info("...triples loaded in %s seconds." % (time.time() - t,))
def export_LoadASCIITriples(self, triples, delim): db = config.dbname path = '/tmp/triples.quarry' f = file(path, 'w') f.write(triples) f.close() copy = queries.copy_triples path = config.psqlpath cmd = '%s -d %s -c "%s"' % (path, db, queries.clear_triples) qlog.info('shell command: "%s"' % (cmd, )) os.system(cmd) cmd = '%s -d %s -c "%s"' % (path, db, copy(path, delim)) qlog.info('shell command: "%s"' % (cmd, )) os.system(cmd) s = sigmanager.SignatureManager() #s.ProcessTriples() return True
def export_LoadASCIITriples(self, triples, delim): db = config.dbname path = '/tmp/triples.quarry' f = file(path, 'w') f.write(triples) f.close() copy = queries.copy_triples path = config.psqlpath cmd = '%s -d %s -c "%s"' % (path, db, queries.clear_triples) qlog.info('shell command: "%s"' % (cmd,)) os.system(cmd) cmd = '%s -d %s -c "%s"' % (path, db, copy(path, delim)) qlog.info('shell command: "%s"' % (cmd,)) os.system(cmd) s = sigmanager.SignatureManager() #s.ProcessTriples() return True
def ReapHarvest(self): qlog.info("Reaping harvest (computing signatures)") t = time.time() self.qdb.execCommand('begin transaction;') try: #self.DeleteExistingResources() self.ComputeResourceSignatures() self.DispatchNewResources() self.ClearStagingArea() self.ClearCache() except: (e, v, t) = sys.exc_info() qlog.critical(str(e) + ": " + str(v)) self.qdb.execCommand('rollback;') # re-raise the error; it's probably fatal raise e, v, t return False else: self.qdb.execCommand('commit;') qlog.info("...reaped in %s seconds." % (time.time() - t,)) return True
def PathValues(path, conditions, property, offset=0, limit='all', sorted=False): '''Retrieve values of property for resources that 1) satisfy conditions and 2) are avilable in the path context provided.''' raw = "PathValues(%s, %s, %s)[%s,%s]%s" % (path, conditions,property,offset,limit,sorted) qlog.info(raw) try: callstr = pattre.sub('_', raw) start_time = time.time() if iscached(callstr): tuples = getCachedResults(callstr) updateCache(callstr) results = [t[0] for t in tuples] else: # ---------------- if not path: results = ValidValues(conditions, property, offset, limit, sorted) else: newpath = path + [(conditions, property)] q = TraverseQuery(newpath) q = PagedQuery(q, offset, limit, sorted) tuples = db.execQuery(q) tuples = asList(tuples) results = [t[0] for t in tuples] # ---------------- t = time.time() - start_time if t > INTERACTIVE_SPEED and not iscached(callstr): cacheQuery(callstr, results) qlog.debug(str(results)) qlog.info("----- finished in: %f secs" % (t,)) return results except Exception, e: (et,v,t) = sys.exc_info() qlog.ExceptionMessage(et,v,t) raise
def ReapHarvest(self): qlog.info("Reaping harvest (computing signatures)") t = time.time() self.qdb.execCommand('begin transaction;') try: #self.DeleteExistingResources() self.ComputeResourceSignatures() self.DispatchNewResources() self.ClearStagingArea() self.ClearCache() except: (e, v, t) = sys.exc_info() qlog.critical(str(e) + ": " + str(v)) self.qdb.execCommand('rollback;') # re-raise the error; it's probably fatal raise e, v, t return False else: self.qdb.execCommand('commit;') qlog.info("...reaped in %s seconds." % (time.time() - t, )) return True
def PathProperties(path, conds): '''Retrieve unique properties for resources that 1) satisfy conditions and 2) are avilable in the path context provided.''' raw = "PathProperties(%s, %s)" % (path, conds) qlog.info(raw) try: callstr = pattre.sub('_', raw) start_time = time.time() if iscached(callstr): tuples = getCachedResults(callstr) updateCache(callstr) results = ('', [t[0] for t in tuples]) else: # ---------------- if not path: props = ValidProps(conds) results = '', props elif not conds: results = PropertiesOf(TraverseQuery(path), True) else: newpath = path + [(conds, 'userkey')] results = Traverse(newpath) # ---------------- t = time.time() - start_time if t > INTERACTIVE_SPEED and not iscached(callstr): cacheQuery(callstr, results[1]) qlog.debug(str(results)) qlog.info("----- finished in: %f secs" % (t, )) return results except Exception, e: (et, v, t) = sys.exc_info() qlog.ExceptionMessage(et, v, t) raise
def PathProperties(path, conds): '''Retrieve unique properties for resources that 1) satisfy conditions and 2) are avilable in the path context provided.''' raw = "PathProperties(%s, %s)" % (path, conds) qlog.info(raw) try: callstr = pattre.sub('_', raw) start_time = time.time() if iscached(callstr): tuples = getCachedResults(callstr) updateCache(callstr) results = ('', [t[0] for t in tuples]) else: # ---------------- if not path: props = ValidProps(conds) results = '', props elif not conds: results = PropertiesOf(TraverseQuery(path), True) else: newpath = path + [(conds, 'userkey')] results = Traverse(newpath) # ---------------- t = time.time() - start_time if t > INTERACTIVE_SPEED and not iscached(callstr): cacheQuery(callstr, results[1]) qlog.debug(str(results)) qlog.info("----- finished in: %f secs" % (t,)) return results except Exception, e: (et,v,t) = sys.exc_info() qlog.ExceptionMessage(et,v,t) raise
def harvestDir(self, rundir, loader=bulkloader.BulkLoader()): qlog.info("Harvesting %s" % (rundir,)) t = time.time() if not os.path.exists(rundir): print "%s does not exist." %(rundir,) qlog.error("run %s does not exist. (broken link?)") return for root, dirs, files in os.walk(rundir): for fname in files: fullpath = "%s/%s" % (root, fname) try: ds = self.harvest(fullpath) except: (e, v, t) = sys.exc_info() print "Error processing %s/%s: %s: %s, %s" % (root, fname, e,v,t) sys.exit(1) return if ds: loader.addresource(fullpath) for d in ds: loader.adddescriptor(d) qlog.info("...harvested in %s seconds." % (time.time() - t,))
def ProcessTriples(self): # some data may have been loaded as raw triples # load these into the staging area t = time.time() qlog.info("Processing Triples...") qr = queries.resources_from_triples self.qdb.execCommand(qr) qlog.info("...extracted resources in %s seconds." % (time.time() - t,)) t = time.time() qd = queries.descriptors_from_triples(config.db_multivalue_delimiter) self.qdb.execCommand(qd) qlog.info("...extracted descriptors in %s seconds." % (time.time() - t,))
def ProcessTriples(self): # some data may have been loaded as raw triples # load these into the staging area t = time.time() qlog.info("Processing Triples...") qr = queries.resources_from_triples self.qdb.execCommand(qr) qlog.info("...extracted resources in %s seconds." % (time.time() - t, )) t = time.time() qd = queries.descriptors_from_triples(config.db_multivalue_delimiter) self.qdb.execCommand(qd) qlog.info("...extracted descriptors in %s seconds." % (time.time() - t, ))
qlog.info('shell command: "%s"' % (cmd, )) os.system(cmd) s = sigmanager.SignatureManager() #s.ProcessTriples() return True def export_Test(self, xs): return xs def serve_forever(self): self.quit = 0 while not self.quit: self.handle_request() def export_kill(self): self.quit = 1 self.server_close() return 1 if __name__ == "__main__": try: qlog.info("Starting quarry server....") server = QuarryServer( (socket.gethostbyname(socket.gethostname()), config.port)) #server.register_introspection_functions() server.serve_forever() except KeyboardInterrupt: server.export_kill()
def _RefreshUniqueSignatures(self): qry = queries.new_signatures self.qdb.Insert("signature",qry) sql = "SELECT count(*) from signature" S = self.qdb.execQuery(sql)[0][0] qlog.info("Found %s unique signatures" % (S,))
def _RefreshUniqueSignatures(self): qry = queries.new_signatures self.qdb.Insert("signature", qry) sql = "SELECT count(*) from signature" S = self.qdb.execQuery(sql)[0][0] qlog.info("Found %s unique signatures" % (S, ))
def log_message(self, format, *args): qlog.info(format % args)
qlog.info('shell command: "%s"' % (cmd,)) os.system(cmd) s = sigmanager.SignatureManager() #s.ProcessTriples() return True def export_Test(self, xs): return xs def serve_forever(self): self.quit = 0 while not self.quit: self.handle_request() def export_kill(self): self.quit = 1 self.server_close() return 1 if __name__ == "__main__": try: qlog.info("Starting quarry server....") server = QuarryServer((socket.gethostbyname(socket.gethostname()), config.port)) #server.register_introspection_functions() server.serve_forever() except KeyboardInterrupt: server.export_kill()