def createFileDocinFilesDB(self, doc_id = '', state = 'new', publication_state = 'not_published'):
        """
        Creates a test document in files_db
        """
        doc = {}
        lfn = random.choice(self.lfn) + doc_id
        doc['_id'] = getHashLfn(lfn)
        doc['dn'] = "/C=IT/O=INFN/OU=Personal Certificate/L=Perugia/CN=Hassen Riahi"
        doc['workflow'] = 'someWorkflow'
        doc['jobid'] = '1'
        doc['lfn'] = lfn
        doc['retry_count'] = []
        doc['source'] = random.choice(self.sites)
        doc['destination'] = random.choice(self.sites)
        doc['user'] = random.choice(self.users)
        doc['group'] = 'someGroup'
        doc['role'] = 'someRole'
        doc['state'] = state
        doc['checksums'] = 'someChecksums'
        doc['start_time'] = str(datetime.datetime.now())
        doc['end_time'] = str(datetime.datetime.now())
        doc['dbSource_url'] = 'someUrl'
        doc['size'] = 1000
        doc['end_time'] = 10000
        doc['last_update'] = 10000
        doc['job_end_time'] = 10000
        doc['publication_state'] = publication_state
        doc['publication_retry_count'] = []
        doc['publish_dbs_url'] = 'https://cmsdbsprod.cern.ch:8443/cms_dbs_ph_analysis_02_writer/servlet/DBSServlet'
        doc['inputdataset'] = '/RelValProdTTbar/JobRobot-MC_3XY_V24_JobRobot-v1/GEN-SIM-DIGI-RECO'
        doc['dbs_url'] = 'http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet'
        self.db.queue(doc, True)
        self.db.commit()

        return doc
Esempio n. 2
0
        def pull_value(row):
            now = str(datetime.datetime.now())
            last_update = int(time.time())

            # Prepare file documents
            value = row["value"]
            value["lfn"] = value["_id"]
            value["user"] = value["_id"].split("/")[4]
            value["_id"] = getHashLfn(value["_id"])
            value["size"] = value["size"]
            value["retry_count"] = []
            value["state"] = "new"
            value["start_time"] = now
            value["last_update"] = last_update

            # Attributes required for publication
            value["job_end_time"] = row["key"]
            value["publication_state"] = "not_published"
            value["publication_retry_count"] = []
            try:
                value["dbSource_url"] = self.config.data_source.replace(
                    ((self.config.data_source).split("@")[0]).split("//")[1] + "@", ""
                )
            except:
                value["dbSource_url"] = self.config.data_source

            return value
Esempio n. 3
0
    def mark_acquired(self, files=[]):
        """
        Mark the list of files as tranferred
        """
        lfn_in_transfer = []
        dash_rep = ()
        if self.config.isOracle:
            toUpdate = list()
            for lfn in files:
                if lfn['value'][0].find('temp') == 7:
                    docId = lfn['key'][5]
                    toUpdate.append(docId)
                    try:
                        docbyId = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers','fileusertransfers'),
                                                    data=encodeRequest({'subresource': 'getById', 'id': docId}))
                        document = oracleOutputMapping(docbyId, None)[0]
                        dash_rep = (document['jobid'], document['job_retry_count'], document['taskname'])
                        lfn_in_transfer.append(lfn)
                    except Exception as ex:
                        self.logger.error("Error during dashboard report update: %s" %ex)
                        return [],()

            return lfn_in_transfer, dash_rep
        else:
            for lfn in files:
                if lfn['value'][0].find('temp') == 7:
                    docId = getHashLfn(lfn['value'][0])
                    self.logger.debug("Marking acquired %s" % docId)
                    # Load document to get the retry_count
                    try:
                        document = self.db.document(docId)
                    except Exception as ex:
                        msg = "Error loading document from couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue
                    if document['state'] == 'new' or document['state'] == 'retry':
                        data = dict()
                        data['state'] = 'acquired'
                        data['last_update'] = time.time()
                        updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId
                        updateUri += "?" + urllib.urlencode(data)
                        try:
                            self.db.makeRequest(uri=updateUri, type="PUT", decode=False)
                        except Exception as ex:
                            msg = "Error updating document in couch"
                            msg += str(ex)
                            msg += str(traceback.format_exc())
                            self.logger.error(msg)
                            continue
                        self.logger.debug("Marked acquired %s of %s" % (docId, lfn))
                        lfn_in_transfer.append(lfn)
                        dash_rep = (document['jobid'], document['job_retry_count'], document['workflow'])
                    else:
                        continue
                else:
                    good_lfn = lfn['value'][0].replace('store', 'store/temp', 1)
                    self.mark_good([good_lfn])
            return lfn_in_transfer, dash_rep
Esempio n. 4
0
    def createTestFileFinishedYesterdayinFilesDB( self ):
        """
        Creates a test document in files_db

        """
        doc = {}
        doc['_id'] = getHashLfn("/this/is/a/lfnA")
        doc['dn'] = "/C=IT/O=INFN/OU=Personal Certificate/L=Perugia/CN=Hassen Riahi"
        doc['workflow'] = 'someWorkflow'
        doc['size'] = 999999
        doc['jobid'] = '1'
        doc['lfn'] = '/this/is/a/lfnA'
        doc['retry_count'] = []
        doc['source'] = random.choice(self.sites)
        doc['destination'] = random.choice(self.sites)
        doc['user'] = random.choice(self.users)
        doc['state'] = 'done'
        doc['start_time'] = str(datetime.datetime.now()).\
replace(str(datetime.datetime.now()).split(" ")[0].split("-")[2], \
str(int(str(datetime.datetime.now()).split(" ")[0].split("-")[2]) - 3))
        doc['end_time'] = str(datetime.datetime.now()).\
replace(str(datetime.datetime.now()).split(" ")[0].split("-")[2], \
str(int(str(datetime.datetime.now()).split(" ")[0].split("-")[2]) - 2))
        doc['job_end_time'] = str(time.time())
        doc['dbSource_url'] = 'someUrl'
        self.db.queue(doc, True)
        self.db.commit()

        return doc
Esempio n. 5
0
        def pull_value(row):
            now = str(datetime.datetime.now())
            last_update = int(time.time())

            # Prepare file documents
            value = row['value']
            value['lfn'] = value["_id"]
            value['user'] = value["_id"].split('/')[4]
            value['_id'] = getHashLfn(value["_id"])
            value['size'] = value['size']
            value['retry_count'] = []
            value['state'] = 'new'
            value['start_time'] = now
            value['last_update'] = last_update

            # Attributes required for publication
            value['job_end_time'] = row['key']
            value['publication_state'] = 'not_published'
            value['publication_retry_count'] = []
            try:
                value['dbSource_url'] = self.config.data_source.replace(
                    ((self.config.data_source).split("@")[0]).split("//")[1] +
                    "@", "")
            except:
                value['dbSource_url'] = self.config.data_source

            return value
Esempio n. 6
0
    def createFileDocinFilesDB(self, doc_id = '', state = 'new', publication_state = 'not_published'):
        """
        Creates a test document in files_db
        """
        doc = {}
        lfn = random.choice(self.lfn) + doc_id
        doc['_id'] = getHashLfn(lfn)
        doc['dn'] = "/C=IT/O=INFN/OU=Personal Certificate/L=Perugia/CN=Hassen Riahi"
        doc['workflow'] = 'someWorkflow'
        doc['jobid'] = '1'
        doc['lfn'] = lfn
        doc['retry_count'] = []
        doc['source'] = random.choice(self.sites)
        doc['destination'] = random.choice(self.sites)
        doc['user'] = random.choice(self.users)
        doc['group'] = 'someGroup'
        doc['role'] = 'someRole'
        doc['state'] = state
        doc['checksums'] = 'someChecksums'
        doc['start_time'] = str(datetime.datetime.now())
        doc['end_time'] = str(datetime.datetime.now())
        doc['dbSource_url'] = 'someUrl'
        doc['size'] = 1000
        doc['end_time'] = 10000
        doc['last_update'] = 10000
        doc['job_end_time'] = 10000
        doc['publication_state'] = publication_state
        doc['publication_retry_count'] = []
        doc['publish_dbs_url'] = 'https://cmsdbsprod.cern.ch:8443/cms_dbs_ph_analysis_02_writer/servlet/DBSServlet'
        doc['inputdataset'] = '/RelValProdTTbar/JobRobot-MC_3XY_V24_JobRobot-v1/GEN-SIM-DIGI-RECO'
        doc['dbs_url'] = 'http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet'
        self.db.queue(doc, True)
        self.db.commit()

        return doc
Esempio n. 7
0
 def mark_failed( self, files=[] ):
     """
     Something failed for these files so increment the retry count
     """
     now = str(datetime.datetime.now())
     last_update = int(time.time())
     for lfn in files:
         data = {}
         docId = getHashLfn(lfn)
         # Load document to get the retry_count
         try:
             document = self.db.document( docId )
         except Exception, ex:
             msg =  "Error loading document from couch"
             msg += str(ex)
             msg += str(traceback.format_exc())
             self.logger.error(msg)
         # Prepare data to update the document in couch
         if len(document['publication_retry_count']) + 1 > self.max_retry:
             data['publication_state'] = 'publication_failed'
         else:
             data['publication_state'] = 'publishing'
         data['last_update'] = last_update
         data['retry'] = now
         # Update the document in couch
         try:
             updateUri = "/" + self.db.name + "/_design/DBSPublisher/_update/updateFile/" + docId
             updateUri += "?" + urllib.urlencode(data)
             self.db.makeRequest(uri = updateUri, type = "PUT", decode = False)
         except Exception, ex:
             msg =  "Error in updating document in couch"
             msg += str(ex)
             msg += str(traceback.format_exc())
             self.logger.error(msg)
    def mark_good(self, files=[]):
        """
        Mark the list of files as tranferred
        """
        now = str(datetime.datetime.now())
        last_update = int(time.time())

        for lfn in files:

            try:
                document = self.db.document( getHashLfn(lfn) )
            except Exception, ex:
                msg =  "Error loading document from couch"
                msg += str(ex)
                msg += str(traceback.format_exc())
                self.logger.error(msg)

            outputLfn = document['lfn'].replace('store/temp', 'store', 1)

            try:
                data = {}
                data['end_time'] = now
                data['state'] = 'done'
                data['lfn'] = outputLfn
                data['last_update'] = last_update
                updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + getHashLfn(lfn)
                updateUri += "?" + urllib.urlencode(data)
                self.db.makeRequest(uri = updateUri, type = "PUT", decode = False)
            except Exception, ex:
                msg =  "Error updating document in couch"
                msg += str(ex)
                msg += str(traceback.format_exc())
                self.logger.error(msg)
    def createTestFileFinishedYesterdayinFilesDB( self ):
        """
        Creates a test document in files_db

        """
        doc = {}
        doc['_id'] = getHashLfn("/this/is/a/lfnA")
        doc['dn'] = "/C=IT/O=INFN/OU=Personal Certificate/L=Perugia/CN=Hassen Riahi"
        doc['workflow'] = 'someWorkflow'
        doc['size'] = 999999
        doc['jobid'] = '1'
        doc['lfn'] = '/this/is/a/lfnA'
        doc['retry_count'] = []
        doc['source'] = random.choice(self.sites)
        doc['destination'] = random.choice(self.sites)
        doc['user'] = random.choice(self.users)
        doc['state'] = 'done'
        doc['start_time'] = str(datetime.datetime.now()).\
replace(str(datetime.datetime.now()).split(" ")[0].split("-")[2], \
str(int(str(datetime.datetime.now()).split(" ")[0].split("-")[2]) - 3))
        doc['end_time'] = str(datetime.datetime.now()).\
replace(str(datetime.datetime.now()).split(" ")[0].split("-")[2], \
str(int(str(datetime.datetime.now()).split(" ")[0].split("-")[2]) - 2))
        doc['job_end_time'] = str(time.time())
        doc['dbSource_url'] = 'someUrl'
        self.db.queue(doc, True)
        self.db.commit()

        return doc
Esempio n. 10
0
 def testF_TestIfgetHashLfnHashCorrectlyLFNs(self):
     """
     _testF_TestIfgetHashLfnHashCorrectlyLFNs
     Tests if the getHashLfn function of the AsyncStageOut module module hashs correctly LFNs.
     """
     lfn = "/My/lfn/path"
     hashedLfn = getHashLfn(lfn)
     assert hashlib.sha224(lfn).hexdigest() == hashedLfn
 def testF_TestIfgetHashLfnHashCorrectlyLFNs(self):
     """
     _testF_TestIfgetHashLfnHashCorrectlyLFNs
     Tests if the getHashLfn function of the AsyncStageOut module module hashs correctly LFNs.
     """
     lfn = "/My/lfn/path"
     hashedLfn = getHashLfn(lfn)
     assert hashlib.sha224(lfn).hexdigest() == hashedLfn
Esempio n. 12
0
 def mark_good(self, files=[]):
     """
     Mark the list of files as tranferred
     """
     updated_lfn = []
     for lfn in files:
         hash_lfn = getHashLfn(lfn)
         self.logger.info("Marking good %s" % hash_lfn)
         self.logger.debug("Marking good %s" % lfn)
         try:
             document = self.db.document(hash_lfn)
         except Exception, ex:
             msg = "Error loading document from couch"
             msg += str(ex)
             msg += str(traceback.format_exc())
             self.logger.error(msg)
             continue
         self.logger.info("Doc %s Loaded" % hash_lfn)
         if document['state'] != 'killed' and document[
                 'state'] != 'done' and document['state'] != 'failed':
             outputLfn = document['lfn'].replace('store/temp', 'store', 1)
             try:
                 now = str(datetime.datetime.now())
                 last_update = time.time()
                 data = {}
                 data['end_time'] = now
                 data['state'] = 'done'
                 data['lfn'] = outputLfn
                 data['last_update'] = last_update
                 updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + getHashLfn(
                     lfn)
                 updateUri += "?" + urllib.urlencode(data)
                 self.db.makeRequest(uri=updateUri,
                                     type="PUT",
                                     decode=False)
                 updated_lfn.append(lfn)
                 self.logger.debug("Marked good %s" % lfn)
             except Exception, ex:
                 msg = "Error updating document in couch"
                 msg += str(ex)
                 msg += str(traceback.format_exc())
                 self.logger.error(msg)
                 continue
             try:
                 self.db.commit()
             except Exception, ex:
                 msg = "Error commiting documents in couch"
                 msg += str(ex)
                 msg += str(traceback.format_exc())
                 self.logger.error(msg)
                 continue
Esempio n. 13
0
 def mark_acquired(self, files=[]):
     """
     Mark the list of files as tranferred
     """
     lfn_in_transfer = []
     dash_rep = ()
     for lfn in files:
         if lfn['value'].find('temp') > 1:
             docId = getHashLfn(lfn['value'])
             self.logger.debug("Marking acquired %s" % docId)
             # Load document to get the retry_count
             try:
                 document = self.db.document(docId)
             except Exception, ex:
                 msg = "Error loading document from couch"
                 msg += str(ex)
                 msg += str(traceback.format_exc())
                 self.logger.error(msg)
                 continue
             if (document['state'] == 'new'
                     or document['state'] == 'retry'):
                 data = {}
                 data['state'] = 'acquired'
                 data['last_update'] = time.time()
                 updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId
                 updateUri += "?" + urllib.urlencode(data)
                 try:
                     self.db.makeRequest(uri=updateUri,
                                         type="PUT",
                                         decode=False)
                 except Exception, ex:
                     msg = "Error updating document in couch"
                     msg += str(ex)
                     msg += str(traceback.format_exc())
                     self.logger.error(msg)
                     continue
                 self.logger.debug("Marked acquired %s of %s" %
                                   (docId, lfn))
                 lfn_in_transfer.append(lfn)
                 dash_rep = (document['jobid'], document['job_retry_count'],
                             document['workflow'])
             else:
                 continue
Esempio n. 14
0
 def mark_acquired(self, files=[]):
     """
     Mark the list of files as tranferred
     """
     lfn_in_transfer = []
     dash_rep = ()
     for lfn in files:
         if lfn['value'][0].find('temp') == 7:
             docId = getHashLfn(lfn['value'][0])
             self.logger.debug("Marking acquired %s" % docId)
             # Load document to get the retry_count
             try:
                 document = self.db.document(docId)
             except Exception as ex:
                 msg = "Error loading document from couch"
                 msg += str(ex)
                 msg += str(traceback.format_exc())
                 self.logger.error(msg)
                 continue
             if (document['state'] == 'new' or document['state'] == 'retry'):
                 data = {}
                 data['state'] = 'acquired'
                 data['last_update'] = time.time()
                 updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId
                 updateUri += "?" + urllib.urlencode(data)
                 try:
                     self.db.makeRequest(uri=updateUri, type="PUT", decode=False)
                 except Exception as ex:
                     msg = "Error updating document in couch"
                     msg += str(ex)
                     msg += str(traceback.format_exc())
                     self.logger.error(msg)
                     continue
                 self.logger.debug("Marked acquired %s of %s" % (docId, lfn))
                 lfn_in_transfer.append(lfn)
                 dash_rep = (document['jobid'], document['job_retry_count'], document['workflow'])
             else:
                 continue
         else:
             good_lfn = lfn['value'][0].replace('store', 'store/temp', 1)
             self.mark_good([good_lfn])
     return lfn_in_transfer, dash_rep
Esempio n. 15
0
        def pull_value(row):
            now = str(datetime.datetime.now())

            # Prepare the files_db document
            value = row['value']
            value['lfn'] = value["_id"]
            value['user'] = value["_id"].split('/')[4]
            value['_id'] = getHashLfn(value["_id"])
            value['size'] = value['size']
            value['retry_count'] = []
            value['state'] = 'new'
            value['start_time'] = now
            value['dbSource_update'] = row['key']
            try:
                value['dbSource_url'] = self.config.data_source.replace(
                    ((self.config.data_source).split("@")[0]).split("//")[1] +
                    "@", "")
            except:
                value['dbSource_url'] = self.config.data_source

            return value
Esempio n. 16
0
    def update_FTSJobID(self, jobReport):
        """
        """
        for job in jobReport:
            try:
                fileDoc = dict()
                fileDoc['asoworker'] = self.config.asoworker
                fileDoc['subresource'] = 'updateTransfers'
                fileDoc['list_of_ids'] = [getHashLfn(x) for x in job['LFNs']]
                fileDoc['list_of_transfer_state'] = ["SUBMITTED" for x in  job['LFNs']]
                fileDoc['list_of_fts_instance'] = [self.fts_server_for_transfer for x in job['LFNs']]
                fileDoc['list_of_fts_id'] = [ job['FTSJobid'] for  x in  job['LFNs'] ]

                self.logger.debug("Marking submitted %s files " % (len(fileDoc['list_of_ids'])))
                result = self.oracleDB.post(self.config.oracleFileTrans,
                                                        data=encodeRequest(fileDoc))
                self.logger.debug("Marked submitted %s" % (fileDoc['list_of_ids']))
            except Exception as ex:
                self.logger.error("Error during status update: %s" %ex)
                time.sleep(10)
                return False
        return True       
Esempio n. 17
0
     ##    for out_lfn in job_doc['files']:
     ##        if not status.has_key(out_lfn): status[out_lfn] = 'done'
 message['transferStatus'] = status
 if len(done_files) != number_ended_files:
     try:
         failed_files = self.monitoring_db.loadView('UserMonitoring', 'LFNFailedByJobId', query)['rows']
     except Exception, e:
         self.logger.exception('A problem occured when contacting UserMonitoring - LFNFailedByJobId: %s' % e)
         return
     self.logger.info("the job %s has %s failed files %s" %(job, len(failed_files), failed_files))
     transferError = ""
     for file in failed_files:
         if file['value'].find('temp') > 1:
             status[file['value']] = 'failed'
             lfn = file['value']
             docId = getHashLfn(lfn)
             # Load document to get the failure reason from output file
             try:
                 document = self.monitoring_db.document( docId )
                 #if (document['file_type'] == "output" and document.has_key('failure_reason')):
                 if document.has_key('failure_reason'):
                     if transferError:
                         transferError = transferError + "," + document['failure_reason']
                     else:
                         transferError = document['failure_reason']
                 if document.has_key('publication_state'):
                     if document['publication_state'] == 'publication_failed':
                         if transferError:
                             transferError = transferError + "," + 'Publication Failure'
                         else:
                             transferError = 'Publication Failure'
Esempio n. 18
0
    def mark_failed(self, files=[], failures_reasons=[], force_fail=False):
        """
        Something failed for these files so increment the retry count
        """
        updated_lfn = []
        for lfn in files:
            data = {}
            self.logger.debug("Document: %s" % lfn)
            if not isinstance(lfn, dict):
                if 'temp' not in lfn:
                    temp_lfn = lfn.replace('store', 'store/temp', 1)
                else:
                    temp_lfn = lfn
            else:
                if 'temp' not in lfn['value']:
                    temp_lfn = lfn['value'].replace('store', 'store/temp', 1)
                else:
                    temp_lfn = lfn['value']
            docId = getHashLfn(temp_lfn)
            # Load document to get the retry_count
            if self.config.isOracle:
                try:
                    self.logger.debug("Document: %s" %docId)
                    docbyId = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers',
                                                                                    'fileusertransfers'),
                                                data=encodeRequest({'subresource': 'getById', 'id': docId}))
                    document = oracleOutputMapping(docbyId)[0]
                    data = dict()
                    data['asoworker'] = self.config.asoworker
                    data['subresource'] = 'updateTransfers'
                    data['list_of_ids'] = docId

                    if force_fail or document['transfer_retry_count'] + 1 > self.max_retry:
                        data['list_of_transfer_state'] = 'FAILED'
                        data['list_of_retry_value'] = 0
                    else:
                        data['list_of_transfer_state'] = 'RETRY'
                        fatal_error = self.determine_fatal_error(failures_reasons[files.index(lfn)])
                        if fatal_error:
                            data['list_of_transfer_state'] = 'FAILED'
                        
                    data['list_of_failure_reason'] = failures_reasons[files.index(lfn)]
                    data['list_of_retry_value'] = 0

                    self.logger.debug("update: %s" % data)
                    result = self.oracleDB.post(self.config.oracleFileTrans,
                                                data=encodeRequest(data))
                    if not data['list_of_transfer_state'] == 'RETRY':  
                        updated_lfn.append(lfn)
                    self.logger.debug("Marked failed %s" % lfn)
                except Exception as ex:
                    self.logger.error("Error updating document status: %s" %ex)
                    continue
            else:
                try:
                    document = self.db.document( docId )
                except Exception as ex:
                    msg = "Error loading document from couch"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.error(msg)
                    continue
                if document['state'] != 'killed' and document['state'] != 'done' and document['state'] != 'failed':
                    now = str(datetime.datetime.now())
                    last_update = time.time()
                    # Prepare data to update the document in couch
                    if force_fail or len(document['retry_count']) + 1 > self.max_retry:
                        data['state'] = 'failed'
                        data['end_time'] = now
                    else:
                        data['state'] = 'retry'
                        fatal_error = self.determine_fatal_error(failures_reasons[files.index(lfn)])
                        if fatal_error:
                            data['state'] = 'failed'
                            data['end_time'] = now

                    self.logger.debug("Failure list: %s" % failures_reasons)
                    self.logger.debug("Files: %s" % files)
                    self.logger.debug("LFN %s" % lfn)

                    data['failure_reason'] = failures_reasons[files.index(lfn)]
                    data['last_update'] = last_update
                    data['retry'] = now
                    # Update the document in couch
                    self.logger.debug("Marking failed %s" % docId)
                    try:
                        updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId
                        updateUri += "?" + urllib.urlencode(data)
                        self.db.makeRequest(uri = updateUri, type = "PUT", decode = False)
                        updated_lfn.append(docId)
                        self.logger.debug("Marked failed %s" % docId)
                    except Exception as ex:
                        msg = "Error in updating document in couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue
                    try:
                        self.db.commit()
                    except Exception as ex:
                        msg = "Error commiting documents in couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue
                else: updated_lfn.append(docId)
        self.logger.debug("failed file updated")
        return updated_lfn
Esempio n. 19
0
    def mark_good(self, files):
        """
        Mark the list of files as tranferred
        """
        updated_lfn = []
        good_ids = []
        if len(files) == 0:
            return updated_lfn
        for it, lfn in enumerate(files):
            hash_lfn = getHashLfn(lfn)
            self.logger.info("Marking good %s" % hash_lfn)
            self.logger.debug("Marking good %s" % lfn)
            if not self.config.isOracle:
                try:
                    document = self.db.document(hash_lfn)
                except Exception as ex:
                    msg = "Error loading document from couch"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.error(msg)
                    continue
            self.logger.info("Doc %s Loaded" % hash_lfn)
            try:
                now = str(datetime.datetime.now())
                last_update = time.time()
                if self.config.isOracle:
                    docId = getHashLfn(lfn)
                    good_ids.append(docId)
                    updated_lfn.append(lfn)
                else:
                    if document['state'] != 'killed' and document['state'] != 'done' and document['state'] != 'failed':
                        outputLfn = document['lfn'].replace('store/temp', 'store', 1)
                        data = dict()
                        data['end_time'] = now
                        data['state'] = 'done'
                        data['lfn'] = outputLfn
                        data['last_update'] = last_update
                        updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + getHashLfn(lfn)
                        updateUri += "?" + urllib.urlencode(data)
                        self.db.makeRequest(uri = updateUri, type = "PUT", decode = False)
                        updated_lfn.append(lfn)
                        self.logger.debug("Marked good %s" % lfn)
                    else: 
                        updated_lfn.append(lfn)
                    try:
                        self.db.commit()
                    except Exception as ex:
                        msg = "Error commiting documents in couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue   
            except Exception as ex:
                msg = "Error updating document"
                msg += str(ex)
                msg += str(traceback.format_exc())
                self.logger.error(msg)
                continue
        if self.config.isOracle:
            try:
                data = dict()
                data['asoworker'] = self.config.asoworker
                data['subresource'] = 'updateTransfers'
                data['list_of_ids'] = good_ids
                data['list_of_transfer_state'] = ["DONE" for x in good_ids]
                result = self.oracleDB.post(self.config.oracleFileTrans,
                                            data=encodeRequest(data))
                self.logger.debug("Marked good %s" % good_ids)
            except Exception:
                self.logger.exception('Error updating document')
                return {}
        
        self.logger.info("Transferred file %s updated, removing now source file" %docId)
        try:
            docbyId = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers','fileusertransfers'),
                                        data=encodeRequest({'subresource': 'getById', 'id': docId}))
            document = oracleOutputMapping(docbyId, None)[0]
        except Exception:
            msg = "Error getting file from source"
            self.logger.exception(msg)
            return {}

        if document["source"] not in self.site_tfc_map:
            self.logger.debug("site not found... gathering info from phedex")
            self.site_tfc_map[document["source"]] = self.get_tfc_rules(document["source"])
        pfn = self.apply_tfc_to_lfn( '%s:%s' %(document["source"], lfn))
        self.logger.debug("File has to be removed now from source site: %s" %pfn)
        self.remove_files(self.userProxy, pfn)
        self.logger.debug("Transferred file removed from source")
        return updated_lfn
Esempio n. 20
0
    def killThread(self, thread_id, transfers):
        """This is the worker thread function for kill command.
        """
        while True:
            transfer_list = transfers.get()
            self.logger.info("Starting thread %s" % (thread_id))
            user = transfer_list[0]['username']
            group = transfer_list[0]['user_group']
            role = transfer_list[0]['user_role']

            uiSetupScript = getattr(self.config, 'UISetupScript', None)

            self.logger.debug("Trying to get DN for %s %s %s %s" % (user, self.logger, self.config.opsProxy, self.config.opsProxy))
            try:
                userDN = getDNFromUserName(user, self.logger, ckey=self.config.opsProxy, cert=self.config.opsProxy)
            except Exception as ex:
                msg = "Error retrieving the user DN"
                msg += str(ex)
                msg += str(traceback.format_exc())
                self.logger.error(msg)
                continue
            if not userDN:
                transfers.task_done()
                time.sleep(1)
                continue
            self.logger.debug("user DN: %s" % userDN)

            try:
                defaultDelegation = {'logger': self.logger,
                                     'credServerPath': self.config.credentialDir,
                                     'myProxySvr': 'myproxy.cern.ch',
                                     'min_time_left': getattr(self.config, 'minTimeLeft', 36000),
                                     'serverDN': self.config.serverDN,
                                     'uisource': uiSetupScript,
                                     'cleanEnvironment': getattr(self.config, 'cleanEnvironment', False)}
                if hasattr(self.config, "cache_area"):
                    cache_area = self.config.cache_area
                    defaultDelegation['myproxyAccount'] = re.compile('https?://([^/]*)/.*').findall(cache_area)[0]
            except IndexError:
                self.logger.error('MyproxyAccount parameter cannot be retrieved from %s . ' % self.config.cache_area)
                transfers.task_done()
                time.sleep(1)
                continue
            if getattr(self.config, 'serviceCert', None):
                defaultDelegation['server_cert'] = self.config.serviceCert
            if getattr(self.config, 'serviceKey', None):
                defaultDelegation['server_key'] = self.config.serviceKey
            try:
                defaultDelegation['userDN'] = userDN
                defaultDelegation['group'] = group if group else ''
                defaultDelegation['role'] = role if group else ''
                self.logger.debug('delegation: %s' % defaultDelegation)
                valid_proxy, user_proxy = getProxy(defaultDelegation, self.logger)
            except Exception as ex:
                msg = "Error getting the user proxy"
                msg += str(ex)
                msg += str(traceback.format_exc())
                self.logger.error(msg)
                transfers.task_done()
                time.sleep(1)
                continue

            # TODO: take server from db, right now, take only the first of the list and assuming it valid for all
            try:
                # TODO: debug u added during info upload. To be fixed soon! For now worked around
                fts_server = transfer_list[0]['fts_instance'].split('u')[1]
                self.logger.info("Delegating proxy to %s" % fts_server)
                context = fts3.Context(fts_server, user_proxy, user_proxy, verify=True)
                self.logger.debug(fts3.delegate(context, lifetime=timedelta(hours=48), force=False))

                self.logger.info("Proxy delegated. Grouping files by jobId")
                jobs = {}
                for fileToKill in transfer_list:
                    # TODO: debug u added during info upload. To be fixed soon! For now worked around
                    jid = str(fileToKill['fts_id']).split('u')[1]
                    if jid not in jobs:
                        jobs[jid] = []
                    jobs[jid].append(fileToKill)

                self.logger.info("Found %s jobIds", len(jobs.keys()))
                self.logger.debug("jobIds: %s", jobs.keys)

                # list for files killed or failed to
                killed = []
                too_late = []

                for ftsJobId, files in jobs.iteritems():
                    self.logger.info("Cancelling tranfers in %s" % ftsJobId)

                    ref_lfns = [str(x['destination_lfn'].split('/store/')[1]) for x in files]
                    source_lfns = [x['source_lfn'] for x in files]

                    job_list = fts3.get_job_status(context, ftsJobId, list_files=True)
                    tx = job_list['files']

                    # TODO: this workaround is needed to get FTS file id, we may want to add a column in the db?
                    idListToKill = [x['file_id'] for x in tx
                                    if x['dest_surl'].split('/cms/store/')[1] in ref_lfns]

                    # needed for the state update
                    lfnListToKill = [ref_lfns.index(str(x['dest_surl'].split('/cms/store/')[1])) for x in tx
                                       if x['dest_surl'].split('/cms/store/')[1] in ref_lfns]

                    self.logger.debug("List of ids to cancel for job %s: %s" % (ftsJobId, idListToKill))
                    res = fts3.cancel(context, ftsJobId, idListToKill)
                    self.logger.debug('Kill command result: %s' % json.dumps(res))

                    if not isinstance(res, list):
                        res = [res]

                    # Verify if the kill command succeeded
                    for k, kill_res in enumerate(res):
                        indexToUpdate = lfnListToKill[k]
                        if kill_res in ("FINISHEDDIRTY", "FINISHED", "FAILED"):
                            self.logger.debug(source_lfns[indexToUpdate])
                            too_late.append(getHashLfn(source_lfns[indexToUpdate]))
                        else:
                            killed.append(getHashLfn(source_lfns[indexToUpdate]))

                # TODO: decide how to update status for too_late files
                killed += too_late
                self.logger.debug('Updating status of killed files: %s' % killed)

                if len(killed) > 0:
                    data = dict()
                    data['asoworker'] = self.config.asoworker
                    data['subresource'] = 'updateTransfers'
                    data['list_of_ids'] = killed
                    data['list_of_transfer_state'] = ["KILLED" for _ in killed]
                    self.oracleDB.post(self.config.oracleFileTrans,
                                       data=encodeRequest(data))
                    self.logger.debug("Marked killed %s" % killed)
            except:
                # TODO: split and improve try/except
                self.logger.exception('Kill command failed')

            transfers.task_done()
Esempio n. 21
0
    def mark_failed(self, files=[], failures_reasons=[], force_fail=False):
        """
        Something failed for these files so increment the retry count
        """
        updated_lfn = []
        for lfn in files:
            data = {}
            if not isinstance(lfn, dict):
                if 'temp' not in lfn:
                    temp_lfn = lfn.replace('store', 'store/temp', 1)
                else:
                    temp_lfn = lfn
                perm_lfn = lfn
            else:
                if 'temp' not in lfn['value']:
                    temp_lfn = lfn['value'].replace('store', 'store/temp', 1)
                else:
                    temp_lfn = lfn['value']
                perm_lfn = lfn['value']
            docId = getHashLfn(temp_lfn)
            # Load document to get the retry_count
            try:
                document = self.db.document(docId)
            except Exception, ex:
                msg = "Error loading document from couch"
                msg += str(ex)
                msg += str(traceback.format_exc())
                self.logger.error(msg)
                continue
            if document['state'] != 'killed' and document[
                    'state'] != 'done' and document['state'] != 'failed':
                now = str(datetime.datetime.now())
                last_update = time.time()
                # Prepare data to update the document in couch
                if force_fail or len(
                        document['retry_count']) + 1 > self.max_retry:
                    data['state'] = 'failed'
                    data['end_time'] = now
                else:
                    data['state'] = 'retry'

                self.logger.debug("Failure list: %s" % failures_reasons)
                self.logger.debug("Files: %s" % files)
                self.logger.debug("LFN %s" % lfn)

                data['failure_reason'] = failures_reasons[files.index(lfn)]
                data['last_update'] = last_update
                data['retry'] = now
                # Update the document in couch
                self.logger.debug("Marking failed %s" % docId)
                try:
                    updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId
                    updateUri += "?" + urllib.urlencode(data)
                    self.db.makeRequest(uri=updateUri,
                                        type="PUT",
                                        decode=False)
                    updated_lfn.append(docId)
                    self.logger.debug("Marked failed %s" % docId)
                except Exception, ex:
                    msg = "Error in updating document in couch"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.error(msg)
                    continue
                try:
                    self.db.commit()
                except Exception, ex:
                    msg = "Error commiting documents in couch"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.error(msg)
                    continue
Esempio n. 22
0
    def command(self, jobs, jobs_lfn, jobs_pfn, jobs_report):
        """
        For each job the worker has to complete:
        Delete files that have failed previously
        Create a temporary copyjob file
        Submit the copyjob to the appropriate FTS server
        Parse the output of the FTS transfer and return complete and failed files for recording
        """
        # Output: {"userProxyPath":"/path/to/proxy","LFNs":["lfn1","lfn2","lfn3"],"PFNs":["pfn1","pfn2","pfn3"],"FTSJobid":'id-of-fts-job', "username": '******'}
        #Loop through all the jobs for the links we have
        failure_reasons = []
        for link, copyjob in jobs.items():
            submission_error = False
            status_error = False
            fts_job = {}
            # Validate copyjob file before doing anything
            self.logger.debug("Valid %s" % self.validate_copyjob(copyjob))
            if not self.validate_copyjob(copyjob): continue

            rest_copyjob = {
                        "params":{
                                "bring_online": None,
                                "verify_checksum": False,
                                "copy_pin_lifetime": -1,
                                "max_time_in_queue": self.config.max_h_in_queue,
                                "job_metadata":{"issuer": "ASO"},
                                "spacetoken": None,
                                "source_spacetoken": None,
                                "fail_nearline": False,
                                "overwrite": True,
                                "gridftp": None
                        },
                        "files":[]
                }

            pairs = []
            for SrcDest in copyjob:
                tempDict = {"sources": [], "metadata": None, "destinations": []}

                tempDict["sources"].append(SrcDest.split(" ")[0])
                tempDict["destinations"].append(SrcDest.split(" ")[1])
                rest_copyjob["files"].append(tempDict)


            self.logger.debug("Subbmitting this REST copyjob %s" % rest_copyjob)
            url = self.fts_server_for_transfer + '/jobs'
            self.logger.debug("Running FTS submission command")
            self.logger.debug("FTS server: %s" % self.fts_server_for_transfer)
            self.logger.debug("link: %s -> %s" % link)
            heade = {"Content-Type ":"application/json"}
            buf = StringIO.StringIO()
            try:
                connection = RequestHandler(config={'timeout': 300, 'connecttimeout' : 300})
            except Exception as ex:
                msg = str(ex)
                msg += str(traceback.format_exc())
                self.logger.debug(msg)
            try:
                response, datares = connection.request(url, rest_copyjob, heade, verb='POST', doseq=True, ckey=self.user_proxy, \
                                                       cert=self.user_proxy, capath='/etc/grid-security/certificates', \
                                                       cainfo=self.user_proxy, verbose=True)
                self.logger.debug("Submission done")
                self.logger.debug('Submission header status: %s' % response.status)
                self.logger.debug('Submission header reason: %s' % response.reason)
                self.logger.debug('Submission result %s' %  datares)
            except Exception as ex:
                msg = "Error submitting to FTS: %s " % url
                msg += str(ex)
                msg += str(traceback.format_exc())
                self.logger.debug(msg)
                failure_reasons.append(msg)
                submission_error = True
            buf.close()
            if not submission_error:
                res = {}
                try:
                    res = json.loads(datares)
                except Exception as ex:
                    msg = "Couldn't load submission acknowledgment from FTS"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.debug(msg)
                    submission_error = True
                    failure_reasons.append(msg)
                if 'job_id' in res:
                    fileId_list = []
                    files_res = []
                    files_ = {}
                    job_id = res['job_id']
                    file_url = self.fts_server_for_transfer + '/jobs/' + job_id +'/files'
                    self.logger.debug("Submitting to %s" % file_url)
                    file_buf = StringIO.StringIO()
                    try:
                        response, files_ = connection.request(file_url, {}, heade, doseq=True, ckey=self.user_proxy, \
                                                              cert=self.user_proxy, capath='/etc/grid-security/certificates', \
                                                              cainfo=self.user_proxy, verbose=True)
                        files_res = json.loads(files_)
                    except Exception as ex:
                        msg = "Error contacting FTS to retrieve file: %s " % file_url
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.debug(msg)
                        submission_error = True
                        failure_reasons.append(msg)
                    self.logger.debug("List files in job %s" % files_)
                    file_buf.close()
                    for file_in_job in files_res:
                        if 'file_id' in file_in_job:
                            fileId_list.append(file_in_job['file_id'])
                        else:
                            msg = "Could not load submitted file %s from FTS" % file_url
                            self.logger.debug(msg)
                            submission_error = True
                            failure_reasons.append(msg)
                    self.logger.debug("File id list %s" % fileId_list)
            if submission_error:
                self.logger.debug("Submission failed")
                self.logger.info("Mark failed %s files" % len(jobs_lfn[link]))
                self.logger.debug("Mark failed %s files" % jobs_lfn[link])
                failed_files = self.mark_failed(jobs_lfn[link], force_fail=False, submission_error=True, failure_reasons=failure_reasons)
                self.logger.info("Marked failed %s" % len(failed_files))
                continue
            fts_job['userProxyPath'] = self.user_proxy
            fts_job['LFNs'] = jobs_lfn[link]
            fts_job['PFNs'] = jobs_pfn[link]
            fts_job['FTSJobid'] = job_id
            fts_job['files_id'] = fileId_list
            fts_job['username'] = self.user
            self.logger.debug("Creating json file %s in %s" % (fts_job, self.dropbox_dir))
            ftsjob_file = open('%s/Monitor.%s.json' % (self.dropbox_dir, fts_job['FTSJobid']), 'w')
            jsondata = json.dumps(fts_job)
            ftsjob_file.write(jsondata)
            ftsjob_file.close()
            self.logger.debug("%s ready." % fts_job)
            # Prepare Dashboard report
            for lfn in fts_job['LFNs']:
                lfn_report = {}
                lfn_report['FTSJobid'] = fts_job['FTSJobid']
                index = fts_job['LFNs'].index(lfn)
                lfn_report['PFN'] = fts_job['PFNs'][index]
                lfn_report['FTSFileid'] = fts_job['files_id'][index]
                lfn_report['Workflow'] = jobs_report[link][index][2]
                lfn_report['JobVersion'] = jobs_report[link][index][1]
                job_id = '%d_https://glidein.cern.ch/%d/%s_%s' % (int(jobs_report[link][index][0]), int(jobs_report[link][index][0]), lfn_report['Workflow'].replace("_", ":"), lfn_report['JobVersion'])
                lfn_report['JobId'] = job_id
                lfn_report['URL'] = self.fts_server_for_transfer
                self.logger.debug("Creating json file %s in %s for FTS3 Dashboard" % (lfn_report, self.dropbox_dir))
                dash_job_file = open('/tmp/Dashboard.%s.json' % getHashLfn(lfn_report['PFN']), 'w')
                jsondata = json.dumps(lfn_report)
                dash_job_file.write(jsondata)
                dash_job_file.close()
                self.logger.debug("%s ready for FTS Dashboard report." % lfn_report)
        return
Esempio n. 23
0
    def mark_failed(self, files=[], force_fail=False, submission_error=False):
        """
        Something failed for these files so increment the retry count
        """
        updated_lfn = []
        for lfn in files:
            data = {}
            if not isinstance(lfn, dict):
                if 'temp' not in lfn:
                    temp_lfn = lfn.replace('store', 'store/temp', 1)
                else:
                    temp_lfn = lfn
            else:
                if 'temp' not in lfn['value'][0]:
                    temp_lfn = lfn['value'][0].replace('store', 'store/temp', 1)
                else:
                    temp_lfn = lfn['value'][0]

            # Load document and get the retry_count
            if self.config.isOracle:
                docId = getHashLfn(temp_lfn)
                self.logger.debug("Marking failed %s" % docId)
                try:
                    docbyId = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers',
                                                                                    'fileusertransfers'),
                                                data=encodeRequest({'subresource': 'getById', 'id': docId}))
                except Exception as ex:
                    self.logger.error("Error updating failed docs: %s" %ex)
                    continue
                document = oracleOutputMapping(docbyId, None)[0]
                self.logger.debug("Document: %s" % document)

                fileDoc = dict()
                fileDoc['asoworker'] = self.config.asoworker
                fileDoc['subresource'] = 'updateTransfers'
                fileDoc['list_of_ids'] = docId 

                if force_fail or document['transfer_retry_count'] + 1 > self.max_retry:
                    fileDoc['list_of_transfer_state'] = 'FAILED'
                    fileDoc['list_of_retry_value'] = 1
                else:
                    fileDoc['list_of_transfer_state'] = 'RETRY'
                if submission_error:
                    fileDoc['list_of_failure_reason'] = "Job could not be submitted to FTS: temporary problem of FTS"
                    fileDoc['list_of_retry_value'] = 1
                elif not self.valid_proxy:
                    fileDoc['list_of_failure_reason'] = "Job could not be submitted to FTS: user's proxy expired"
                    fileDoc['list_of_retry_value'] = 1
                else:
                    fileDoc['list_of_failure_reason'] = "Site config problem."
                    fileDoc['list_of_retry_value'] = 1

                self.logger.debug("update: %s" % fileDoc)
                try:
                    updated_lfn.append(docId)
                    result = self.oracleDB.post(self.config.oracleFileTrans,
                                         data=encodeRequest(fileDoc))
                except Exception as ex:
                    msg = "Error updating document"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.error(msg)
                    continue

            else:
                docId = getHashLfn(temp_lfn)
                try:
                    document = self.db.document(docId)
                except Exception as ex:
                    msg = "Error loading document from couch"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.error(msg)
                    continue
                if document['state'] != 'killed' and document['state'] != 'done' and document['state'] != 'failed':
                    now = str(datetime.datetime.now())
                    last_update = time.time()
                    # Prepare data to update the document in couch
                    if force_fail or len(document['retry_count']) + 1 > self.max_retry:
                        data['state'] = 'failed'
                    else:
                        data['state'] = 'retry'
                    if submission_error:
                        data['failure_reason'] = "Job could not be submitted to FTS: temporary problem of FTS"
                    elif not self.valid_proxy:
                        data['failure_reason'] = "Job could not be submitted to FTS: user's proxy expired"
                    else:
                        data['failure_reason'] = "Site config problem."
                    data['last_update'] = last_update
                    data['retry'] = now

                    # Update the document in couch
                    self.logger.debug("Marking failed %s" % docId)
                    try:
                        updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId
                        updateUri += "?" + urllib.urlencode(data)
                        self.db.makeRequest(uri=updateUri, type="PUT", decode=False)
                        updated_lfn.append(docId)
                        self.logger.debug("Marked failed %s" % docId)
                    except Exception as ex:
                        msg = "Error in updating document in couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue
                    try:
                        self.db.commit()
                    except Exception as ex:
                        msg = "Error commiting documents in couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue
            self.logger.debug("failed file updated")
            return updated_lfn
Esempio n. 24
0
 def mark_good(self, files=[]):
     """
     Mark the list of files as tranferred
     """
     last_update = int(time.time())
     for lfn in files:
         try:
             data = {}
             data['publication_state'] = 'published'
             data['last_update'] = last_update
             updateUri = "/" + self.db.name + "/_design/DBSPublisher/_update/updateFile/" + getHashLfn(lfn.replace('store', 'store/temp', 1))
             updateUri += "?" + urllib.urlencode(data)
             self.logger.info(updateUri)
             self.db.makeRequest(uri = updateUri, type = "PUT", decode = False)
         except Exception, ex:
             msg =  "Error updating document in couch"
             msg += str(ex)
             msg += str(traceback.format_exc())
             self.logger.error(msg)
Esempio n. 25
0
    def mark_failed(self, files=[], force_fail = False, submission_error = False):
        """
        Something failed for these files so increment the retry count
        """
        updated_lfn = []
        for lfn in files:
            data = {}
            if not isinstance(lfn, dict):
                if 'temp' not in lfn:
                    temp_lfn = lfn.replace('store', 'store/temp', 1)
                else:
                    temp_lfn = lfn
            else:
                if 'temp' not in lfn['value']:
                    temp_lfn = lfn['value'].replace('store', 'store/temp', 1)
                else:
                    temp_lfn = lfn['value']

            docId = getHashLfn(temp_lfn)

            # Load document to get the retry_count
            try:
                document = self.db.document( docId )
            except Exception, ex:
                msg = "Error loading document from couch"
                msg += str(ex)
                msg += str(traceback.format_exc())
                self.logger.error(msg)
                continue
            if document['state'] != 'killed' and document['state'] != 'done' and document['state'] != 'failed':
                now = str(datetime.datetime.now())
                last_update = time.time()
                # Prepare data to update the document in couch
                if force_fail or len(document['retry_count']) + 1 > self.max_retry:
                    data['state'] = 'failed'
                else:
                    data['state'] = 'retry'
                if submission_error:
                    data['failure_reason'] = "Job could not be submitted to FTS"
                else:
                    data['failure_reason'] = "Site config problem."
                data['last_update'] = last_update
                data['retry'] = now

                # Update the document in couch
                self.logger.debug("Marking failed %s" % docId)
                try:
                    updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId
                    updateUri += "?" + urllib.urlencode(data)
                    self.db.makeRequest(uri = updateUri, type = "PUT", decode = False)
                    updated_lfn.append(docId)
                    self.logger.debug("Marked failed %s" % docId)
                except Exception, ex:
                    msg = "Error in updating document in couch"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.error(msg)
                    continue
                try:
                    self.db.commit()
                except Exception, ex:
                    msg = "Error commiting documents in couch"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.error(msg)
                    continue
Esempio n. 26
0
    def command(self, jobs, jobs_lfn, jobs_pfn, jobs_report, retry=False):
        """
        For each job the worker has to complete:
        Delete files that have failed previously
        Create a temporary copyjob file
        Submit the copyjob to the appropriate FTS server
        Parse the output of the FTS transfer and return complete and failed files for recording
        """
        # Output: {"userProxyPath":"/path/to/proxy","LFNs":["lfn1","lfn2","lfn3"],"PFNs":["pfn1","pfn2","pfn3"],"FTSJobid":'id-of-fts-job', "username": '******'}
        tmp_file_pool = []
        #command2 = 'export X509_USER_PROXY=%s ; source %s ;' % (self.userProxy, self.uiSetupScript)
        #self.logger.debug("executing command: %s" % (command2))
        #stdout2, rc2 = execute_command(command2, self.logger, self.commandTimeout)
        for link, copyjob in jobs.items():
            submission_error = False
            status_error = False
            fts_job = {}
            # Validate copyjob file before doing anything
            self.logger.debug("Copyjob: %s" % copyjob)
            self.logger.debug("Valid %s" % self.validate_copyjob(copyjob))
            if not self.validate_copyjob(copyjob): continue

            tmp_copyjob_file = tempfile.NamedTemporaryFile(delete=False)
            tmp_copyjob_file.write('\n'.join(copyjob))
            tmp_copyjob_file.close()
            #self.logger.debug("Temp Copyjob: %s" % tmp_copyjob_file)
            tmp_file_pool.append(tmp_copyjob_file.name)

            #self.logger.debug("Temp Copyjob: %s" % tmp_copyjob_file)
            self.logger.debug("Running FTS submission command")
            self.logger.debug("FTS server: %s" % self.fts_server_for_transfer)
            self.logger.debug("link: %s -> %s" % link)
            self.logger.debug("copyjob file: %s" % tmp_copyjob_file.name)

            command = '%s/../../../apps/asyncstageout/Monitor/PHEDEX/Testbed/FakeFTS.pl %s -s %s -f %s' % (
                self.config.componentDir, self.submission_command,
                self.fts_server_for_transfer, tmp_copyjob_file.name)
            # command = 'export X509_USER_PROXY=%s ; source %s ; /data/ASO/async_install_103pre3/current/apps/asyncstageout/Monitor/PHEDEX/Testbed/FakeFTS.pl %s -s %s -f %s' % (self.userProxy, self.uiSetupScript,
            #                                         self.submission_command, self.fts_server_for_transfer,
            #                                        tmp_copyjob_file.name)

            init_time = str(strftime("%a, %d %b %Y %H:%M:%S",
                                     time.localtime()))
            self.logger.debug("executing command: %s at: %s for: %s" %
                              (command, init_time, self.userDN))
            stdout, rc = execute_command(command, self.logger,
                                         self.commandTimeout)
            self.logger.debug("Submission result %s" % rc)
            self.logger.debug("Sending %s %s %s" %
                              (jobs_lfn[link], jobs_pfn[link], stdout.strip()))
            if not rc:
                # Updating files to acquired in the database
                #self.logger.info("Mark acquired %s files" % len(jobs_lfn[link]))
                #self.logger.debug("Mark acquired %s files" % jobs_lfn[link])
                #acquired_files = self.mark_acquired(jobs_lfn[link])
                #self.logger.info("Marked acquired %s" % len(acquired_files))
                #if not acquired_files:
                #    continue
                fts_job['userProxyPath'] = self.userProxy
                fts_job['LFNs'] = jobs_lfn[link]
                fts_job['PFNs'] = jobs_pfn[link]
                fts_job['FTSJobid'] = stdout.strip()
                fts_job['username'] = self.user
                self.logger.debug("Creating json file %s in %s" %
                                  (fts_job, self.dropbox_dir))
                ftsjob_file = open(
                    '%s/Monitor.%s.json' %
                    (self.dropbox_dir, fts_job['FTSJobid']), 'w')
                jsondata = json.dumps(fts_job)
                ftsjob_file.write(jsondata)
                ftsjob_file.close()
                self.logger.debug("%s ready." % fts_job)
                # Prepare Dashboard report
                for lfn in fts_job['LFNs']:
                    lfn_report = {}
                    lfn_report['FTSJobid'] = fts_job['FTSJobid']
                    index = fts_job['LFNs'].index(lfn)
                    lfn_report['PFN'] = fts_job['PFNs'][index]
                    lfn_report['Workflow'] = jobs_report[link][index][2]
                    lfn_report['JobVersion'] = jobs_report[link][index][1]
                    job_id = '%d_https://glidein.cern.ch/%d/%s_%s' % (
                        int(jobs_report[link][index][0]),
                        int(jobs_report[link][index][0]),
                        lfn_report['Workflow'].replace(
                            "_", ":"), lfn_report['JobVersion'])
                    lfn_report['JobId'] = job_id
                    self.logger.debug(
                        "Creating json file %s in %s for FTS3 Dashboard" %
                        (lfn_report, self.dropbox_dir))
                    dash_job_file = open(
                        '/tmp/Dashboard.%s.json' %
                        getHashLfn(lfn_report['PFN']), 'w')
                    jsondata = json.dumps(lfn_report)
                    dash_job_file.write(jsondata)
                    dash_job_file.close()
                    self.logger.info("%s ready for FTS Dashboard report." %
                                     lfn_report)
            elif len(jobs_lfn[link]):
                self.logger.debug("Submission failed")
                self.logger.info("Mark failed %s files" % len(jobs_lfn[link]))
                self.logger.debug("Mark failed %s files" % jobs_lfn[link])
                failed_files = self.mark_failed(jobs_lfn[link],
                                                bad_logfile=None,
                                                force_fail=False,
                                                submission_error=True)
                self.logger.info("Marked failed %s" % len(failed_files))
                continue
            else:
                continue
        # Generate the json output
        self.logger.debug("Jobs submission Done. Removing copy_job files")
        #for tmp in tmp_file_pool:
        #   os.unlink( tmp )
        return
Esempio n. 27
0
    def command(self, jobs, jobs_lfn, jobs_pfn, jobs_report, retry = False):
        """
        For each job the worker has to complete:
        Delete files that have failed previously
        Create a temporary copyjob file
        Submit the copyjob to the appropriate FTS server
        Parse the output of the FTS transfer and return complete and failed files for recording
        """
        # Output: {"userProxyPath":"/path/to/proxy","LFNs":["lfn1","lfn2","lfn3"],"PFNs":["pfn1","pfn2","pfn3"],"FTSJobid":'id-of-fts-job', "username": '******'}
        tmp_file_pool = []
        #command2 = 'export X509_USER_PROXY=%s ; source %s ;' % (self.userProxy, self.uiSetupScript)
        #self.logger.debug("executing command: %s" % (command2))
        #stdout2, rc2 = execute_command(command2, self.logger, self.commandTimeout) 
	for link, copyjob in jobs.items():
            submission_error = False
            status_error = False
            fts_job = {}
            # Validate copyjob file before doing anything
	    self.logger.debug("Copyjob: %s" % copyjob) 
            self.logger.debug("Valid %s" % self.validate_copyjob(copyjob) )
            if not self.validate_copyjob(copyjob): continue

            tmp_copyjob_file = tempfile.NamedTemporaryFile(delete=False)
            tmp_copyjob_file.write('\n'.join(copyjob))
            tmp_copyjob_file.close()
            #self.logger.debug("Temp Copyjob: %s" % tmp_copyjob_file)
            tmp_file_pool.append(tmp_copyjob_file.name)

            #self.logger.debug("Temp Copyjob: %s" % tmp_copyjob_file) 
            self.logger.debug("Running FTS submission command")
            self.logger.debug("FTS server: %s" % self.fts_server_for_transfer)
            self.logger.debug("link: %s -> %s" % link)
            self.logger.debug("copyjob file: %s" % tmp_copyjob_file.name)
 
            
            command = '%s/../../../apps/asyncstageout/Monitor/PHEDEX/Testbed/FakeFTS.pl %s -s %s -f %s' % (self.config.componentDir, self.submission_command, self.fts_server_for_transfer,tmp_copyjob_file.name)
           # command = 'export X509_USER_PROXY=%s ; source %s ; /data/ASO/async_install_103pre3/current/apps/asyncstageout/Monitor/PHEDEX/Testbed/FakeFTS.pl %s -s %s -f %s' % (self.userProxy, self.uiSetupScript,
                                           #                                         self.submission_command, self.fts_server_for_transfer,
                                            #                                        tmp_copyjob_file.name)


            init_time = str(strftime("%a, %d %b %Y %H:%M:%S", time.localtime()))
            self.logger.debug("executing command: %s at: %s for: %s" % (command, init_time, self.userDN))
            stdout, rc = execute_command(command, self.logger, self.commandTimeout)
            self.logger.debug("Submission result %s" % rc)
            self.logger.debug("Sending %s %s %s" % ( jobs_lfn[link], jobs_pfn[link], stdout.strip() ))
            if not rc:
                # Updating files to acquired in the database
                #self.logger.info("Mark acquired %s files" % len(jobs_lfn[link]))
                #self.logger.debug("Mark acquired %s files" % jobs_lfn[link])
                #acquired_files = self.mark_acquired(jobs_lfn[link])
                #self.logger.info("Marked acquired %s" % len(acquired_files))
                #if not acquired_files:
                #    continue
                fts_job['userProxyPath'] = self.userProxy
                fts_job['LFNs'] = jobs_lfn[link]
                fts_job['PFNs'] = jobs_pfn[link]
                fts_job['FTSJobid'] = stdout.strip()
                fts_job['username'] = self.user
                self.logger.debug("Creating json file %s in %s" % (fts_job, self.dropbox_dir))
                ftsjob_file = open('%s/Monitor.%s.json' % (self.dropbox_dir, fts_job['FTSJobid'] ), 'w')
                jsondata = json.dumps(fts_job)
                ftsjob_file.write(jsondata)
                ftsjob_file.close()
                self.logger.debug("%s ready." % fts_job)
                # Prepare Dashboard report
                for lfn in fts_job['LFNs']:
                    lfn_report = {}
                    lfn_report['FTSJobid'] = fts_job['FTSJobid']
                    index = fts_job['LFNs'].index(lfn)
                    lfn_report['PFN'] = fts_job['PFNs'][index]
                    lfn_report['Workflow'] = jobs_report[link][index][2]
                    lfn_report['JobVersion'] = jobs_report[link][index][1]
                    job_id = '%d_https://glidein.cern.ch/%d/%s_%s' % (int(jobs_report[link][index][0]), int(jobs_report[link][index][0]), lfn_report['Workflow'].replace("_", ":"), lfn_report['JobVersion'])
                    lfn_report['JobId'] = job_id
                    self.logger.debug("Creating json file %s in %s for FTS3 Dashboard" % (lfn_report, self.dropbox_dir))
                    dash_job_file = open('/tmp/Dashboard.%s.json' % getHashLfn(lfn_report['PFN']) , 'w')
                    jsondata = json.dumps(lfn_report)
                    dash_job_file.write(jsondata)
                    dash_job_file.close()
                    self.logger.info("%s ready for FTS Dashboard report." % lfn_report)
            elif len(jobs_lfn[link]):
                self.logger.debug("Submission failed")
                self.logger.info("Mark failed %s files" % len(jobs_lfn[link]))
                self.logger.debug("Mark failed %s files" % jobs_lfn[link])
                failed_files = self.mark_failed(jobs_lfn[link], bad_logfile=None, force_fail = False, submission_error=True)
                self.logger.info("Marked failed %s" % len(failed_files))
                continue
            else:
                continue
        # Generate the json output
        self.logger.debug("Jobs submission Done. Removing copy_job files")
        #for tmp in tmp_file_pool:
         #   os.unlink( tmp )
        return
Esempio n. 28
0
# lfn_base has store/temp in it twice to make sure that
# the temp->permananet lfn change is correct.
lfn_base = '/store/temp/user/%s/my_cool_dataset/file-%s-%s.root'

now = str(datetime.datetime.now())
job_end_time =datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
last_update = int(time.time());

print "Script starts at %s" %now

while i <= size:
    id=random.randint(1000, 1999)
    user =  random.choice(users)
    dest = users_dest[user]
    _id=getHashLfn(lfn_base % (user,id , i))
    state='new'
    file_doc = {'_id': '%s' %(_id) ,
                'lfn': lfn_base % (user,
                                   id,
                                   i),
                'dn': 'UserDN',
                #'_attachments': '',
                'checksums': {
                         "adler32": "ad:b2378cab"
                },
                "failure_reason": [
                ],
                'group': '',
                'publish':1,
                'timestamp': now,
Esempio n. 29
0
    def __call__(self):
        """
        _getViewResults_
        Get the result of the view.
        """
        sites = [
            'T2_IT_Rome', 'T2_CH_CAF', 'T2_DE_DESY', 'T2_BR_UERJ',
            'T2_CH_CSCS', 'T2_CN_Beijing', 'T2_DE_DESY', 'T2_DE_RWTH',
            'T2_EE_Estonia', 'T2_ES_CIEMAT', 'T2_ES_IFCA', 'T2_FI_HIP',
            'T2_FR_CCIN2P3', 'T2_FR_GRIF_IRFU', 'T2_FR_GRIF_LLR', 'T2_FR_IPHC',
            'T2_HU_Budapest', 'T2_IN_TIFR', 'T2_IT_Bari', 'T2_IT_Legnaro',
            'T2_IT_Pisa', 'T2_IT_Rome', 'T2_KR_KNU', 'T2_PK_NCP',
            'T2_PL_Cracow', 'T2_PL_Warsaw', 'T2_PT_LIP_Lisbon',
            'T2_PT_NCG_Lisbon', 'T2_RU_IHEP', 'T2_RU_INR', 'T2_RU_ITEP',
            'T2_RU_JINR', 'T2_RU_PNPI', 'T2_RU_RRC_KI', 'T2_RU_SINP',
            'T2_TR_METU', 'T2_TW_Taiwan', 'T2_UA_KIPT', 'T2_UK_London_Brunel',
            'T2_UK_London_IC', 'T2_UK_SGrid_Bristol', 'T2_UK_SGrid_RALPP',
            'T2_US_Caltech', 'T2_US_Florida', 'T2_US_MIT', 'T2_US_Nebraska',
            'T2_US_Purdue', 'T2_US_UCSD', 'T2_US_Wisconsin'
        ]

        numberUsers = 5
        j = 1

        users = []
        while j <= numberUsers:

            users.append('user' + str(random.randint(1, 1000)))
            j += 1

        size = 3

        i = 1

        lfn_base = '/store/temp/riahi/user/%s/store/temp/file-duplic-%s-%s.root'
        results = []

        while i <= size:

            last_update = int(time.time())
            user = random.choice(users)
            lfn = lfn_base % (user, random.randint(1000, 9999), i)
            id = getHashLfn(lfn)
            workflow = 'workflow-%s-%s' % (user, random.randint(1, 100))
            results.append({
                '_id': id,
                'source': random.choice(sites),
                'destination': random.choice(sites),
                'task': workflow,
                'workflow': workflow,
                'lfn': lfn,
                'jobid': random.randint(1000, 9999),
                'state': 'new',
                'last_update': last_update,
                'dbSource_update': last_update,
                'retry_count': [],
                'checksums': 'checksum',
                'size': random.randint(1000, 9999),
                'dn': '/UserDN',
                'group': '',
                'role': '',
                'user': user
            })

            i += 1

        logging.debug("Dummy docs queued %s" % results)
        return results
Esempio n. 30
0
    def __call__(self):
        """
        _getViewResults_
        Get the result of the view.
        """
        sites = ['T2_IT_Rome', 'T2_CH_CAF', 'T2_DE_DESY',
                 'T2_BR_UERJ', 'T2_CH_CSCS', 'T2_CN_Beijing', 'T2_DE_DESY',
                 'T2_DE_RWTH', 'T2_EE_Estonia', 'T2_ES_CIEMAT', 'T2_ES_IFCA',
                 'T2_FI_HIP', 'T2_FR_CCIN2P3', 'T2_FR_GRIF_IRFU', 'T2_FR_GRIF_LLR',
                 'T2_FR_IPHC', 'T2_HU_Budapest', 'T2_IN_TIFR', 'T2_IT_Bari',
                 'T2_IT_Legnaro', 'T2_IT_Pisa', 'T2_IT_Rome', 'T2_KR_KNU', 'T2_PK_NCP',
                 'T2_PL_Cracow', 'T2_PL_Warsaw', 'T2_PT_LIP_Lisbon', 'T2_PT_NCG_Lisbon',
                 'T2_RU_IHEP', 'T2_RU_INR', 'T2_RU_ITEP', 'T2_RU_JINR', 'T2_RU_PNPI',
                 'T2_RU_RRC_KI', 'T2_RU_SINP', 'T2_TR_METU', 'T2_TW_Taiwan',
                 'T2_UA_KIPT', 'T2_UK_London_Brunel', 'T2_UK_London_IC',
                 'T2_UK_SGrid_Bristol', 'T2_UK_SGrid_RALPP', 'T2_US_Caltech',
                 'T2_US_Florida', 'T2_US_MIT', 'T2_US_Nebraska', 'T2_US_Purdue',
                 'T2_US_UCSD', 'T2_US_Wisconsin']

        numberUsers = 5
        j = 1

        users = []
        while j <= numberUsers:

            users.append( 'user'+ str( random.randint(1, 1000) ) )
            j += 1

        size = 3

        i = 1

        lfn_base = '/store/temp/riahi/user/%s/store/temp/file-duplic-%s-%s.root'
        results = []

        while i <= size:

            last_update = int(time.time())
            user = random.choice(users)
            lfn = lfn_base % (user, random.randint(1000, 9999), i)
            id = getHashLfn( lfn )
            workflow = 'workflow-%s-%s' % (user, random.randint(1, 100))
            results.append( {'_id': id,
                        'source': random.choice(sites),
                        'destination': random.choice(sites),
                        'task': workflow,
                        'workflow': workflow,
                        'lfn': lfn,
                        'jobid': random.randint(1000, 9999),
                        'state': 'new',
                        'last_update':last_update,
                        'dbSource_update': last_update,
                        'retry_count': [],
                        'checksums': 'checksum',
                        'size': random.randint(1000, 9999),
                        'dn': '/UserDN',
                        'group': '',
                        'role': '',
                        'user': user}
            )

            i += 1

        logging.debug("Dummy docs queued %s" %results)
        return results