Beispiel #1
0
  def __init__(self, server, database, username, password, viewName, dbRecordName, limit=False, lyonpath = '/sps/edelweis/kdata/data/raw/'):
    '''
      server - couchdb server, including credentials.
      database - name of the couchdb database
      username - username of the account on the Lyon CC.
      password - password for 'username'. this will use sftp.
      viewName - the database viewname in couchdbkit format. That is, for view nameed 'proc0' in the document '_design/proc',
          viewName would be 'proc/proc0'
      dbRecordName - this is the key name that is given to the database docs, such as 'proc0' or 'metaproc0'
      limit - if set to true, the method 'sendAvailableDocs' will only send one document, at maximum
      lyonpath - by default set to '/sps/edelweis/kdata/data/raw/'



    '''

    self.server = server
    self.database = database
    self.username = username
    self.password = password
    self.viewName = viewName  #something like 'proc/proc0'
    self.dbRecordName = dbRecordName  #something like 'proc0'
    self.limit = limit
    self.path = lyonpath
    self.testDocFunction = None

    #create a DBProcess instance, which will assist in uploading the
    #document to the database
    self.myProc = DBProcess( self.server, self.database, self._sendToLyon)
Beispiel #2
0
def process(couchServer, couchDbName, runList, **processKwargs):
  '''
  runList is a list of document _id's for each run that will be processed.

  the following kwargs affect the behavior
  'useProc0' -- if set to True, then this rootifies the data using the samba file found in the location doc['proc0']['file'], where the 'doc' is the CouchDB database document for that run. If set to False, or not set at all, then the file located in doc['file'] is rootified. This is used when the rootification process (proc1) is done "locally" (in production, this means on a computer in Modane -- S7)

  'ftp' -- if set to True, then this process will attempt to send the rootified data file to the ccage.in2p3.fr system. 
  
  If 'ftp' is set to True, you must supply a 'username' and 'password' in the kwargs, which will be used to gain access to ccage.in2p3.fr and transfer the data to /sps/edelweis/kdata/data/raw

  '''
  
  global myProc
  myProc = DBProcess(couchServer, couchDbName, _rootify)

  #create a list of remaining docs that will be dealt with if the job
  #is killed by the SIGXCPU sent by the Lyon CC batch system

  myRemainingDocs = []
  for anId in runList:
    myRemainingDocs.append(anId)

  #the SIGXCPU signal is sent by the Lyon Batch system - set up handler function
  signal.signal( getattr(signal, 'SIGXCPU') ,_sigxcpuHandler)

 
  #start the data processing loop
  for anId in runList:

    if caught_sigxcpu:
      _cleanUp(myRemainingDocs, couchServer, couchDbName)
      break  

    doc = myProc.get(anId)
    (doc, result) = _processOne(doc, **processKwargs)
    myProc.upload(doc)

    myRemainingDocs.remove(anId)
Beispiel #3
0
class ManagedSendToLyon:
  '''
    This class manages sending local data files to Lyon. This class is used both by runProc0 and runMetaProc0. The runProc1
    script does NOT use this class because it would be more effort to go back and recode this right now.

    To use this class, when you instantiate it, you must provide the name of the database map-reduce "view" that provides the list
    of database documents that correspond to local data files that will be transferred to Lyon. Also, you must provide the name of
    the key of the record that will be attached to the database document to record the details of this action. 

    Here's a description of what happens when you use this class.

    1. Instantiate with object (see __init__ for details of needed values)
      1b - set the testDocFunction attribute if desired (see Advanced use below)
    2. Call the method ManagedSendToLyon.sendAvailableDocs.
    3. In this method, the database mapReduce view results are checked. 
    4. For each document (doc) returned by the view...
      5. It sets the doc['status'] to 'dbRecordName in progress' and saves the doc to the database (where dbRecordName = 'proc0' or 'metaproc0' for example)
      6. Sends the local file found in doc['file'] to Lyon via sftp.
      7. Upon successful transfer of the data, sets doc['status'] to 'good' (or to 'dbRecordName failed', if unsuccessful)
      8. Information about the transfer and the file location in Lyon is stored in the doc[dbRecordName] key 
      9. The doc is saved back to the datbase.

    This class requires that the documents returned by the database View results has the key 'file', which points
    to a valid local file. That is, it must have doc['file'] for step #6 to work.

    Advanced use:
          There is an uninitiated attribute to this class called testDocFunction. The user of this class can set this
          attribute to point to an external function that takes a database document as the input and returns True/False.
          This lets the user decide if action will be taken for each particular database document. 

          Example: runProc0.py (and runMetaProc0.py) uses this to test for the existence of the _log file in the S7 data directory
          If the _log file is found, then it assumes that Jules has copied over all of the data for this run and it returns true.
          When it returns True, then this class proceeds to copy over the data (performing steps 4-9 above)



  '''

  def __init__(self, server, database, username, password, viewName, dbRecordName, limit=False, lyonpath = '/sps/edelweis/kdata/data/raw/'):
    '''
      server - couchdb server, including credentials.
      database - name of the couchdb database
      username - username of the account on the Lyon CC.
      password - password for 'username'. this will use sftp.
      viewName - the database viewname in couchdbkit format. That is, for view nameed 'proc0' in the document '_design/proc',
          viewName would be 'proc/proc0'
      dbRecordName - this is the key name that is given to the database docs, such as 'proc0' or 'metaproc0'
      limit - if set to true, the method 'sendAvailableDocs' will only send one document, at maximum
      lyonpath - by default set to '/sps/edelweis/kdata/data/raw/'



    '''

    self.server = server
    self.database = database
    self.username = username
    self.password = password
    self.viewName = viewName  #something like 'proc/proc0'
    self.dbRecordName = dbRecordName  #something like 'proc0'
    self.limit = limit
    self.path = lyonpath
    self.testDocFunction = None

    #create a DBProcess instance, which will assist in uploading the
    #document to the database
    self.myProc = DBProcess( self.server, self.database, self._sendToLyon)
    

  def _sendToLyon(self, fileName, **kwargs):
    
    print 'calling sftpToSps.send'
    
    try:
      sftpRet = sftp.send(self.username, self.password, fileName, self.path)
    except Exception as e:
      raise KDataTransferError('KDataTransferError. ManagedSendToLyon._sendToLyon \n' + str(type(e)) + ' : ' + str(e))

    return sftpRet
    
    
  def sendAvailableDocs(self):
    
    
    print '\n', str(datetime.datetime.utcnow()), ': starting ManagedSendToLyon.sendAvailableDocs \n'
    print self.viewName, self.dbRecordName

    
    if self.limit: 
      vr = self.myProc.view(self.viewName, reduce=False, limit=1)
    else:
      vr = self.myProc.view(self.viewName, reduce=False)

    successfulDocs = []
    failedDocs = []

    for row in vr:
      print str(datetime.datetime.utcnow()), 'ManagedSendToLyon.py has doc', row['id']
      
      try:
        doc = self.myProc.get(row['id'])

        if self.testDocFunction is not None:
          if self.testDocFunction(doc) is False: 
            print str(datetime.datetime.utcnow()), 'ManagedSendToLyon.py testDocFunction returned false'
            continue

        doc['status'] = self.dbRecordName + ' in progress'
        self.myProc.upload(doc)

        print '  sending %s to lyon' % doc['file']
        procDict = self.myProc.doprocess(doc['file']) #this step calls _sendToLyon


        if len(procDict) > 0:
          print 'appending database document'

          #add a few more items to the document
          procDict['date'] = str(datetime.datetime.utcnow())
          procDict['date_unixtime'] = time.time()
          procDict['processname'] = 'ManagedSendToLyon'
        
          #if len(procDict['sftpErrs']) > 0:
          #  doc['status'] = self.dbRecordName + ' failed'
          #else:
          
          doc['status'] = 'good'
          #this step will add the procDict dictionary to the 
          #database document and then upload it to the DB
          if doc.has_key(self.dbRecordName) == False:
            doc[self.dbRecordName] = {}
          
          doc[self.dbRecordName].update(procDict)
          self.myProc.upload(doc)
          successfulDocs.append(doc['_id'])

        else:
          doc['status'] = self.dbRecordName + ' failed'
          self.myProc.upload(doc)
          print 'send to lyon returned an empty dictionary!'
          failedDocs.append(doc['_id'])

      except Exception as e:
        print 'an exception has occurred'
        print e
        if doc.has_key(self.dbRecordName) == False:
            doc[self.dbRecordName] = {}
          
        doc[self.dbRecordName]['exception'] = str(type(e)) + ': ' + str(e)
        doc['status'] = self.dbRecordName + ' failed'
        #if doc.has_key(self.dbRecordName): del doc[self.dbRecordName]
        self.myProc.upload(doc)
        failedDocs.append(doc['_id'])

    return (successfulDocs, failedDocs)    
Beispiel #4
0
def setupProc(server, database, function):
    global myProc
    myProc = DBProcess(server, database, function)
    return myProc
def main(*argv):
  '''
  argv[0] is the server (http://127.0.0.1:5984)
  argv[1] is the database (datadb)
  '''
  
  print '\n', str(datetime.datetime.now()), ': starting runProc0.py \n'

  #create a DBProcess instance, which will assist in uploading the proc
  #document to the database
  myProc = DBProcess(argv[0], argv[1], scpToLyon)
  
  vr = myProc.view('proc/proc0', reduce=False)
  
  for row in vr:
    print row['id']
    doc = myProc.get(row['id'])
    print 'have doc', row['id']
    doc['status'] = 'proc0 in progress'
    myProc.upload(doc)
    try:
      procDict = myProc.doprocess(doc['file']) #this step calls rootfiyAndScp
      print 'called process'

      if len(procDict) > 0:
        #add a few more items to the document
        procDict['date'] = str(datetime.datetime.utcnow())
        procDict['processname'] = 'copySambaFileToSps'
      
        if len(procDict['scpErrs']) > 0:
          doc['status'] = 'proc0 failed'
        else:
          doc['status'] = 'good'
        #this step will add the procDict dictionary to the 
        #database document and then upload it to the DB
        if doc.has_key('proc0') == False:
          doc['proc0'] = {}
        
        doc['proc0'].update(procDict)
        myProc.upload(doc)
      
      else:
        doc['status'] = 'proc0 failed'
        myProc.upload(doc)
        print 'the process returned an empty dictionary!'
        sys.exit(-1)

    except Exception as e:
      print e
      doc['exception'] = str(type(e)) + ': ' + str(e)
      doc['status'] = 'proc0 failed'
      if doc.has_key('proc0'): del doc['proc0']
      myProc.upload(doc)
      sys.exit(-1)
def main(*argv):
    '''
  argv[0] is the server (http://127.0.0.1:5984)
  argv[1] is the database (datadb)
  '''

    print '\n', str(datetime.datetime.now()), ': starting runProc0.py \n'

    #create a DBProcess instance, which will assist in uploading the proc
    #document to the database
    myProc = DBProcess(argv[0], argv[1], scpToLyon)

    vr = myProc.view('proc/proc0', reduce=False)

    for row in vr:
        print row['id']
        doc = myProc.get(row['id'])
        print 'have doc', row['id']
        doc['status'] = 'proc0 in progress'
        myProc.upload(doc)
        try:
            procDict = myProc.doprocess(
                doc['file'])  #this step calls rootfiyAndScp
            print 'called process'

            if len(procDict) > 0:
                #add a few more items to the document
                procDict['date'] = str(datetime.datetime.utcnow())
                procDict['processname'] = 'copySambaFileToSps'

                if len(procDict['scpErrs']) > 0:
                    doc['status'] = 'proc0 failed'
                else:
                    doc['status'] = 'good'
                #this step will add the procDict dictionary to the
                #database document and then upload it to the DB
                if doc.has_key('proc0') == False:
                    doc['proc0'] = {}

                doc['proc0'].update(procDict)
                myProc.upload(doc)

            else:
                doc['status'] = 'proc0 failed'
                myProc.upload(doc)
                print 'the process returned an empty dictionary!'
                sys.exit(-1)

        except Exception as e:
            print e
            doc['exception'] = str(type(e)) + ': ' + str(e)
            doc['status'] = 'proc0 failed'
            if doc.has_key('proc0'): del doc['proc0']
            myProc.upload(doc)
            sys.exit(-1)