コード例 #1
0
ファイル: qdb.py プロジェクト: billhowe/quarry
    def execQuery(self, qry):
      '''
Executes a SQL statement, returning the results
      '''
      self.connect()
      logger.debug('Executing query: ' + qry)
      qry = qry.strip()
      if not len(qry):
        return []
      # any error handling we should do?
      #print qry
      #x = raw_input()
      #if x == '':
      #  x = qry
      #self.dbconn = None
      #self.dbconn.reset()
      #self.connect()

      #Check to see if the connection is valid
      if (self.dbconn.status != 1):
        logger.info("DB Connection bad, attempting reset")
        self.dbconn.reset()

      response = self.dbconn.query(qry)
      if response: return response.getresult()
コード例 #2
0
ファイル: quarryLOAD.py プロジェクト: billhowe/quarry
def LoadByRegEx(coriedir="/home/workspace/ccalmr/forecasts/forecasts_ref/2004/", regexp='2004-004'):
                        
  qlog.info("Starting load job on %s/%s" % (coriedir, regexp))
  t1 = time.time()
  
  # a regexp for catching only real run dirs
  e = re.compile(coriedir+regexp)

  paths = []
  for root, dirs, files in os.walk(coriedir):
    exists = [root+d for d in dirs if os.path.exists(root+d)]
    paths += [p for p in exists if e.match(p)]
 
  db = qdb.quarrydb()

  h = harvest.Harvester()
  b = bulkloader.BulkLoader(db)
  b.truncateFiles()
  sm = sigmanager.SignatureManager(db)

  for p in paths:
    h.harvestDir(p, b)

  b.closeFiles()
  b.loadharvest()

  sm.ReapHarvest()

  qlog.info("Load job finished in %s seconds." % (time.time() - t1,))
コード例 #3
0
ファイル: harvest.py プロジェクト: billhowe/quarry
    def harvestDir(self, rundir, loader=bulkloader.BulkLoader()):
        qlog.info("Harvesting %s" % (rundir, ))
        t = time.time()

        if not os.path.exists(rundir):
            print "%s does not exist." % (rundir, )
            qlog.error("run %s does not exist. (broken link?)")
            return
        for root, dirs, files in os.walk(rundir):
            for fname in files:
                fullpath = "%s/%s" % (root, fname)
                try:
                    ds = self.harvest(fullpath)
                except:
                    (e, v, t) = sys.exc_info()
                    print "Error processing %s/%s: %s: %s, %s" % (root, fname,
                                                                  e, v, t)
                    sys.exit(1)
                    return
                if ds:
                    loader.addresource(fullpath)
                    for d in ds:
                        loader.adddescriptor(d)

        qlog.info("...harvested in %s seconds." % (time.time() - t, ))
コード例 #4
0
ファイル: quarryLOAD.py プロジェクト: billhowe/quarry
def LoadByRegEx(
        coriedir="/home/workspace/ccalmr/forecasts/forecasts_ref/2004/",
        regexp='2004-004'):

    qlog.info("Starting load job on %s/%s" % (coriedir, regexp))
    t1 = time.time()

    # a regexp for catching only real run dirs
    e = re.compile(coriedir + regexp)

    paths = []
    for root, dirs, files in os.walk(coriedir):
        exists = [root + d for d in dirs if os.path.exists(root + d)]
        paths += [p for p in exists if e.match(p)]

    db = qdb.quarrydb()

    h = harvest.Harvester()
    b = bulkloader.BulkLoader(db)
    b.truncateFiles()
    sm = sigmanager.SignatureManager(db)

    for p in paths:
        h.harvestDir(p, b)

    b.closeFiles()
    b.loadharvest()

    sm.ReapHarvest()

    qlog.info("Load job finished in %s seconds." % (time.time() - t1, ))
コード例 #5
0
    def execQuery(self, qry):
        '''
Executes a SQL statement, returning the results
      '''
        self.connect()
        logger.debug('Executing query: ' + qry)
        qry = qry.strip()
        if not len(qry):
            return []
        # any error handling we should do?
        #print qry
        #x = raw_input()
        #if x == '':
        #  x = qry
        #self.dbconn = None
        #self.dbconn.reset()
        #self.connect()

        #Check to see if the connection is valid
        if (self.dbconn.status != 1):
            logger.info("DB Connection bad, attempting reset")
            self.dbconn.reset()

        response = self.dbconn.query(qry)
        if response: return response.getresult()
コード例 #6
0
ファイル: bulkloader.py プロジェクト: billhowe/quarry
    def loadharvest(self, dir=config.datadir):

      '''Uses psql to stage harvested data in the quarry database'''

      qlog.info("Loading harvest from %s" % (dir,))
      t = time.time()

      sql = "%s;\n %s;\n %s;\n %s;" % ("begin transaction",
                                       queries.load_resources(dir), 
                                       queries.load_descriptors(dir),
                                       "commit")
      sqlfile = config.tmpdir + "/temp.sql"
    
      f = file(sqlfile, "w")
      f.write(sql)
      f.close()
      
      self.closeFiles()
      cmd = '''%s -h %s -f "%s" %s''' % (config.psqlpath, 
                                         self.qdb.hostname, 
                                         sqlfile, 
                                         self.qdb.dbname)

      flusher = popen2.Popen3(cmd)
      output = flusher.fromchild
      # don't quit till this process finishes
      qlog.debug('psql response:\n' + output.read())
      flusher.wait()
      
      qlog.info("...bulk loaded in %s seconds." % (time.time() - t,))
コード例 #7
0
ファイル: bulkloader.py プロジェクト: billhowe/quarry
    def loadharvest(self, dir=config.datadir):
        '''Uses psql to stage harvested data in the quarry database'''

        qlog.info("Loading harvest from %s" % (dir, ))
        t = time.time()

        sql = "%s;\n %s;\n %s;\n %s;" % (
            "begin transaction", queries.load_resources(dir),
            queries.load_descriptors(dir), "commit")
        sqlfile = config.tmpdir + "/temp.sql"

        f = file(sqlfile, "w")
        f.write(sql)
        f.close()

        self.closeFiles()
        cmd = '''%s -h %s -f "%s" %s''' % (config.psqlpath, self.qdb.hostname,
                                           sqlfile, self.qdb.dbname)

        flusher = popen2.Popen3(cmd)
        output = flusher.fromchild
        # don't quit till this process finishes
        qlog.debug('psql response:\n' + output.read())
        flusher.wait()

        qlog.info("...bulk loaded in %s seconds." % (time.time() - t, ))
コード例 #8
0
ファイル: sigmanager.py プロジェクト: billhowe/quarry
  def DispatchNewResources(self):
    qlog.debug("Dispatch New Resources")
    self._RefreshUniqueSignatures()
    for s in self.UniqueSignatures():
      qlog.info("Computing extent for signature %s"%(s,))
      s.ComputeExtent()

    # resource table is deprecated
    self._InsertNewResources()
コード例 #9
0
ファイル: sigmanager.py プロジェクト: billhowe/quarry
    def DispatchNewResources(self):
        qlog.debug("Dispatch New Resources")
        self._RefreshUniqueSignatures()
        for s in self.UniqueSignatures():
            qlog.info("Computing extent for signature %s" % (s, ))
            s.ComputeExtent()

        # resource table is deprecated
        self._InsertNewResources()
コード例 #10
0
    def execCommand(self, qry):
        '''Executes a SQL statement, ignoring the results'''
        #self.connect()
        # any error handling we should do?
        logger.debug('Executing command: ' + qry)

        #Check to see if the connection is valid
        if (self.dbconn.status != 1):
            logger.info("DB Connection bad, attempting reset")
            self.dbconn.reset()

        result = self.dbconn.query(qry)
コード例 #11
0
ファイル: qdb.py プロジェクト: billhowe/quarry
    def execCommand(self, qry):
      '''Executes a SQL statement, ignoring the results'''
      #self.connect()
       # any error handling we should do?
      logger.debug('Executing command: ' + qry)

      #Check to see if the connection is valid
      if (self.dbconn.status != 1):
        logger.info("DB Connection bad, attempting reset")
        self.dbconn.reset()

      result = self.dbconn.query(qry)
コード例 #12
0
def Describe(key):
    qlog.info("Describe(%s)" % (key, ))
    for s in sm.UniqueSignatures():
        q = s.ConjunctiveQuery(conditions=[('userkey', key)], wildcard=False)
        rs = db.execQuery(q)
        if len(rs) != 0:
            break

    if len(rs) == 0:
        raise ValueError("no resource '%s' was found" % (key, ))
    else:
        qlog.debug("columns: %s" % (s.rawcolumns(), ))
        return zip(s.rawcolumns(), [str(x) for x in rs[0]])
コード例 #13
0
ファイル: quarry.py プロジェクト: billhowe/quarry
def Describe(key): 
  qlog.info("Describe(%s)" % (key,))
  for s in sm.UniqueSignatures():
    q = s.ConjunctiveQuery(conditions=[('userkey', key)], wildcard=False)
    rs = db.execQuery(q);
    if len(rs) != 0:
      break
  
  if len(rs) == 0:    
    raise ValueError("no resource '%s' was found" % (key,))
  else:
    qlog.debug("columns: %s" % (s.rawcolumns(),))
    return zip(s.rawcolumns(), [str(x) for x in rs[0]])    
コード例 #14
0
def PathValues(path,
               conditions,
               property,
               offset=0,
               limit='all',
               sorted=False):
    '''Retrieve values of property for resources that 1) satisfy conditions and 2)
are avilable in the path context provided.'''

    raw = "PathValues(%s, %s, %s)[%s,%s]%s" % (path, conditions, property,
                                               offset, limit, sorted)
    qlog.info(raw)
    try:
        callstr = pattre.sub('_', raw)

        start_time = time.time()
        if iscached(callstr):
            tuples = getCachedResults(callstr)
            updateCache(callstr)
            results = [t[0] for t in tuples]
        else:

            # ----------------
            if not path:
                results = ValidValues(conditions, property, offset, limit,
                                      sorted)
            else:
                newpath = path + [(conditions, property)]
                q = TraverseQuery(newpath)
                q = PagedQuery(q, offset, limit, sorted)
                tuples = db.execQuery(q)
                tuples = asList(tuples)
                results = [t[0] for t in tuples]
            # ----------------
            t = time.time() - start_time
            if t > INTERACTIVE_SPEED and not iscached(callstr):
                cacheQuery(callstr, results)
            qlog.debug(str(results))
            qlog.info("----- finished in: %f secs" % (t, ))

        return results

    except Exception, e:
        (et, v, t) = sys.exc_info()
        qlog.ExceptionMessage(et, v, t)
        raise
コード例 #15
0
ファイル: bulkloader.py プロジェクト: billhowe/quarry
    def LoadTriples(self, fname):
        '''Load triples from an ASCII file.  Use config.py to configure delimiters'''

        # TODO: support RDF and N-triples format, at least
        t = time.time()

        qlog.info("Loading triples from %s" % (fname, ))
        cmd = '''%s -h %s -c "%s" %s''' % (config.psqlpath, self.qdb.hostname,
                                           queries.copy_triples(
                                               fname, csv=""), self.qdb.dbname)
        flusher = popen2.Popen3(cmd)
        output = flusher.fromchild
        # don't quit till this process finishes
        qlog.debug('psql response:\n' + output.read())
        flusher.wait()

        qlog.info("...triples loaded in %s seconds." % (time.time() - t, ))
コード例 #16
0
ファイル: bulkloader.py プロジェクト: billhowe/quarry
    def LoadTriples(self, fname):
      '''Load triples from an ASCII file.  Use config.py to configure delimiters'''

      # TODO: support RDF and N-triples format, at least
      t = time.time()

      qlog.info("Loading triples from %s" % (fname,))
      cmd = '''%s -h %s -c "%s" %s''' % (config.psqlpath, 
                                         self.qdb.hostname, 
                                         queries.copy_triples(fname, csv=""), 
                                         self.qdb.dbname)
      flusher = popen2.Popen3(cmd)
      output = flusher.fromchild
      # don't quit till this process finishes
      qlog.debug('psql response:\n' + output.read())
      flusher.wait()
      
      qlog.info("...triples loaded in %s seconds." % (time.time() - t,))
コード例 #17
0
ファイル: quarryWS.py プロジェクト: billhowe/quarry
    def export_LoadASCIITriples(self, triples, delim):
        db = config.dbname
        path = '/tmp/triples.quarry'
        f = file(path, 'w')
        f.write(triples)
        f.close()

        copy = queries.copy_triples
        path = config.psqlpath

        cmd = '%s -d %s -c "%s"' % (path, db, queries.clear_triples)
        qlog.info('shell command: "%s"' % (cmd, ))
        os.system(cmd)
        cmd = '%s -d %s -c "%s"' % (path, db, copy(path, delim))
        qlog.info('shell command: "%s"' % (cmd, ))
        os.system(cmd)

        s = sigmanager.SignatureManager()
        #s.ProcessTriples()
        return True
コード例 #18
0
ファイル: quarryWS.py プロジェクト: billhowe/quarry
 def export_LoadASCIITriples(self, triples, delim):
   db = config.dbname 
   path = '/tmp/triples.quarry'                                                               
   f = file(path, 'w')
   f.write(triples)
   f.close()
   
   copy = queries.copy_triples
   path = config.psqlpath
                                                                                     
   cmd = '%s -d %s -c "%s"' % (path, db, queries.clear_triples)
   qlog.info('shell command: "%s"' % (cmd,))
   os.system(cmd)
   cmd = '%s -d %s -c "%s"' % (path, db, copy(path, delim))
   qlog.info('shell command: "%s"' % (cmd,))
   os.system(cmd)
                                                                                     
   s = sigmanager.SignatureManager()
   #s.ProcessTriples()
   return True
コード例 #19
0
ファイル: sigmanager.py プロジェクト: billhowe/quarry
 def ReapHarvest(self):
   qlog.info("Reaping harvest (computing signatures)")
   t = time.time()
   
   self.qdb.execCommand('begin transaction;')
   try: 
     #self.DeleteExistingResources()
     self.ComputeResourceSignatures()
     self.DispatchNewResources()
     self.ClearStagingArea()
     self.ClearCache()
   except: 
     (e, v, t) = sys.exc_info()
     qlog.critical(str(e) + ": " + str(v))
     self.qdb.execCommand('rollback;')
     # re-raise the error; it's probably fatal
     raise e, v, t
     return False 
   else:
     self.qdb.execCommand('commit;')
     qlog.info("...reaped in %s seconds." % (time.time() - t,))
     return True
コード例 #20
0
ファイル: quarry.py プロジェクト: billhowe/quarry
def PathValues(path, conditions, property, offset=0, limit='all', sorted=False):
  '''Retrieve values of property for resources that 1) satisfy conditions and 2)
are avilable in the path context provided.'''

  raw = "PathValues(%s, %s, %s)[%s,%s]%s" % (path, conditions,property,offset,limit,sorted)
  qlog.info(raw)
  try:
    callstr = pattre.sub('_', raw)

    start_time = time.time()
    if iscached(callstr):
      tuples = getCachedResults(callstr)
      updateCache(callstr)
      results = [t[0] for t in tuples]
    else:

      # ----------------
      if not path:
        results = ValidValues(conditions, property, offset, limit, sorted)
      else:
        newpath = path + [(conditions, property)]
        q = TraverseQuery(newpath)
        q = PagedQuery(q, offset, limit, sorted) 
        tuples = db.execQuery(q)
        tuples = asList(tuples)
        results = [t[0] for t in tuples]
       # ----------------
      t = time.time() - start_time
      if t > INTERACTIVE_SPEED and not iscached(callstr):
         cacheQuery(callstr, results)
      qlog.debug(str(results))
      qlog.info("----- finished in: %f secs" % (t,))

    return results

  except Exception, e:
    (et,v,t) = sys.exc_info()
    qlog.ExceptionMessage(et,v,t)
    raise
コード例 #21
0
ファイル: sigmanager.py プロジェクト: billhowe/quarry
    def ReapHarvest(self):
        qlog.info("Reaping harvest (computing signatures)")
        t = time.time()

        self.qdb.execCommand('begin transaction;')
        try:
            #self.DeleteExistingResources()
            self.ComputeResourceSignatures()
            self.DispatchNewResources()
            self.ClearStagingArea()
            self.ClearCache()
        except:
            (e, v, t) = sys.exc_info()
            qlog.critical(str(e) + ": " + str(v))
            self.qdb.execCommand('rollback;')
            # re-raise the error; it's probably fatal
            raise e, v, t
            return False
        else:
            self.qdb.execCommand('commit;')
            qlog.info("...reaped in %s seconds." % (time.time() - t, ))
            return True
コード例 #22
0
def PathProperties(path, conds):
    '''Retrieve unique properties for resources that 1) satisfy conditions and 2)
are avilable in the path context provided.'''

    raw = "PathProperties(%s, %s)" % (path, conds)
    qlog.info(raw)
    try:
        callstr = pattre.sub('_', raw)

        start_time = time.time()
        if iscached(callstr):
            tuples = getCachedResults(callstr)
            updateCache(callstr)
            results = ('', [t[0] for t in tuples])
        else:

            # ----------------
            if not path:
                props = ValidProps(conds)
                results = '', props
            elif not conds:
                results = PropertiesOf(TraverseQuery(path), True)
            else:
                newpath = path + [(conds, 'userkey')]
                results = Traverse(newpath)
            # ----------------
            t = time.time() - start_time
            if t > INTERACTIVE_SPEED and not iscached(callstr):
                cacheQuery(callstr, results[1])
            qlog.debug(str(results))
            qlog.info("----- finished in: %f secs" % (t, ))

        return results

    except Exception, e:
        (et, v, t) = sys.exc_info()
        qlog.ExceptionMessage(et, v, t)
        raise
コード例 #23
0
ファイル: quarry.py プロジェクト: billhowe/quarry
def PathProperties(path, conds):
  '''Retrieve unique properties for resources that 1) satisfy conditions and 2)
are avilable in the path context provided.'''

  raw = "PathProperties(%s, %s)" % (path, conds)
  qlog.info(raw) 
  try:
    callstr = pattre.sub('_', raw)
 
    start_time = time.time()
    if iscached(callstr):
      tuples = getCachedResults(callstr)
      updateCache(callstr)
      results = ('', [t[0] for t in tuples])
    else:
 
      # ----------------
      if not path:
        props = ValidProps(conds)
        results = '', props
      elif not conds:
        results = PropertiesOf(TraverseQuery(path), True)
      else: 
        newpath = path + [(conds, 'userkey')]
        results = Traverse(newpath)
       # ---------------- 
      t = time.time() - start_time 
      if t > INTERACTIVE_SPEED and not iscached(callstr): 
         cacheQuery(callstr, results[1]) 
      qlog.debug(str(results)) 
      qlog.info("----- finished in: %f secs" % (t,)) 
    
    return results
  
  except Exception, e:
    (et,v,t) = sys.exc_info()
    qlog.ExceptionMessage(et,v,t)
    raise
コード例 #24
0
ファイル: harvest.py プロジェクト: billhowe/quarry
 def harvestDir(self, rundir, loader=bulkloader.BulkLoader()):
   qlog.info("Harvesting %s" % (rundir,))
   t = time.time()
   
   if not os.path.exists(rundir):
     print "%s does not exist." %(rundir,)
     qlog.error("run %s does not exist. (broken link?)")
     return
   for root, dirs, files in os.walk(rundir):
     for fname in files:
       fullpath = "%s/%s" % (root, fname)
       try:
         ds = self.harvest(fullpath)
       except:
         (e, v, t) = sys.exc_info()
         print "Error processing %s/%s: %s: %s, %s" % (root, fname, e,v,t)
         sys.exit(1)
         return
       if ds: 
         loader.addresource(fullpath)
         for d in ds: loader.adddescriptor(d)
       
   qlog.info("...harvested in %s seconds." % (time.time() - t,))
コード例 #25
0
ファイル: sigmanager.py プロジェクト: billhowe/quarry
 def ProcessTriples(self):
   # some data may have been loaded as raw triples
   # load these into the staging area
   t = time.time()
   qlog.info("Processing Triples...")
   qr = queries.resources_from_triples
   self.qdb.execCommand(qr)
   qlog.info("...extracted resources in %s seconds." % (time.time() - t,))
   
   t = time.time()
   qd = queries.descriptors_from_triples(config.db_multivalue_delimiter)
   self.qdb.execCommand(qd)
   qlog.info("...extracted descriptors in %s seconds." % (time.time() - t,))
コード例 #26
0
ファイル: sigmanager.py プロジェクト: billhowe/quarry
    def ProcessTriples(self):
        # some data may have been loaded as raw triples
        # load these into the staging area
        t = time.time()
        qlog.info("Processing Triples...")
        qr = queries.resources_from_triples
        self.qdb.execCommand(qr)
        qlog.info("...extracted resources in %s seconds." %
                  (time.time() - t, ))

        t = time.time()
        qd = queries.descriptors_from_triples(config.db_multivalue_delimiter)
        self.qdb.execCommand(qd)
        qlog.info("...extracted descriptors in %s seconds." %
                  (time.time() - t, ))
コード例 #27
0
ファイル: quarryWS.py プロジェクト: billhowe/quarry
        qlog.info('shell command: "%s"' % (cmd, ))
        os.system(cmd)

        s = sigmanager.SignatureManager()
        #s.ProcessTriples()
        return True

    def export_Test(self, xs):
        return xs

    def serve_forever(self):
        self.quit = 0
        while not self.quit:
            self.handle_request()

    def export_kill(self):
        self.quit = 1
        self.server_close()
        return 1


if __name__ == "__main__":
    try:
        qlog.info("Starting quarry server....")
        server = QuarryServer(
            (socket.gethostbyname(socket.gethostname()), config.port))
        #server.register_introspection_functions()
        server.serve_forever()
    except KeyboardInterrupt:
        server.export_kill()
コード例 #28
0
ファイル: sigmanager.py プロジェクト: billhowe/quarry
 def _RefreshUniqueSignatures(self):
   qry = queries.new_signatures
   self.qdb.Insert("signature",qry)
   sql = "SELECT count(*) from signature"
   S = self.qdb.execQuery(sql)[0][0]
   qlog.info("Found %s unique signatures" % (S,))
コード例 #29
0
ファイル: sigmanager.py プロジェクト: billhowe/quarry
 def _RefreshUniqueSignatures(self):
     qry = queries.new_signatures
     self.qdb.Insert("signature", qry)
     sql = "SELECT count(*) from signature"
     S = self.qdb.execQuery(sql)[0][0]
     qlog.info("Found %s unique signatures" % (S, ))
コード例 #30
0
ファイル: quarryWS.py プロジェクト: billhowe/quarry
 def log_message(self, format, *args):
     qlog.info(format % args)
コード例 #31
0
ファイル: quarryWS.py プロジェクト: billhowe/quarry
    qlog.info('shell command: "%s"' % (cmd,))
    os.system(cmd)
                                                                                      
    s = sigmanager.SignatureManager()
    #s.ProcessTriples()
    return True

  def export_Test(self, xs):
    return xs

  def serve_forever(self):
    self.quit = 0
    while not self.quit:
      self.handle_request()

  def export_kill(self):
    self.quit = 1
    self.server_close()
    return 1

if __name__ == "__main__":
  try:
    qlog.info("Starting quarry server....")
    server = QuarryServer((socket.gethostbyname(socket.gethostname()), config.port))
    #server.register_introspection_functions()
    server.serve_forever()
  except KeyboardInterrupt:
    server.export_kill()

                    
コード例 #32
0
ファイル: quarryWS.py プロジェクト: billhowe/quarry
 def log_message(self, format, *args):
   qlog.info(format % args)