def createFileDocinFilesDB(self, doc_id = '', state = 'new', publication_state = 'not_published'): """ Creates a test document in files_db """ doc = {} lfn = random.choice(self.lfn) + doc_id doc['_id'] = getHashLfn(lfn) doc['dn'] = "/C=IT/O=INFN/OU=Personal Certificate/L=Perugia/CN=Hassen Riahi" doc['workflow'] = 'someWorkflow' doc['jobid'] = '1' doc['lfn'] = lfn doc['retry_count'] = [] doc['source'] = random.choice(self.sites) doc['destination'] = random.choice(self.sites) doc['user'] = random.choice(self.users) doc['group'] = 'someGroup' doc['role'] = 'someRole' doc['state'] = state doc['checksums'] = 'someChecksums' doc['start_time'] = str(datetime.datetime.now()) doc['end_time'] = str(datetime.datetime.now()) doc['dbSource_url'] = 'someUrl' doc['size'] = 1000 doc['end_time'] = 10000 doc['last_update'] = 10000 doc['job_end_time'] = 10000 doc['publication_state'] = publication_state doc['publication_retry_count'] = [] doc['publish_dbs_url'] = 'https://cmsdbsprod.cern.ch:8443/cms_dbs_ph_analysis_02_writer/servlet/DBSServlet' doc['inputdataset'] = '/RelValProdTTbar/JobRobot-MC_3XY_V24_JobRobot-v1/GEN-SIM-DIGI-RECO' doc['dbs_url'] = 'http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet' self.db.queue(doc, True) self.db.commit() return doc
def pull_value(row): now = str(datetime.datetime.now()) last_update = int(time.time()) # Prepare file documents value = row["value"] value["lfn"] = value["_id"] value["user"] = value["_id"].split("/")[4] value["_id"] = getHashLfn(value["_id"]) value["size"] = value["size"] value["retry_count"] = [] value["state"] = "new" value["start_time"] = now value["last_update"] = last_update # Attributes required for publication value["job_end_time"] = row["key"] value["publication_state"] = "not_published" value["publication_retry_count"] = [] try: value["dbSource_url"] = self.config.data_source.replace( ((self.config.data_source).split("@")[0]).split("//")[1] + "@", "" ) except: value["dbSource_url"] = self.config.data_source return value
def mark_acquired(self, files=[]): """ Mark the list of files as tranferred """ lfn_in_transfer = [] dash_rep = () if self.config.isOracle: toUpdate = list() for lfn in files: if lfn['value'][0].find('temp') == 7: docId = lfn['key'][5] toUpdate.append(docId) try: docbyId = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers','fileusertransfers'), data=encodeRequest({'subresource': 'getById', 'id': docId})) document = oracleOutputMapping(docbyId, None)[0] dash_rep = (document['jobid'], document['job_retry_count'], document['taskname']) lfn_in_transfer.append(lfn) except Exception as ex: self.logger.error("Error during dashboard report update: %s" %ex) return [],() return lfn_in_transfer, dash_rep else: for lfn in files: if lfn['value'][0].find('temp') == 7: docId = getHashLfn(lfn['value'][0]) self.logger.debug("Marking acquired %s" % docId) # Load document to get the retry_count try: document = self.db.document(docId) except Exception as ex: msg = "Error loading document from couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue if document['state'] == 'new' or document['state'] == 'retry': data = dict() data['state'] = 'acquired' data['last_update'] = time.time() updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId updateUri += "?" + urllib.urlencode(data) try: self.db.makeRequest(uri=updateUri, type="PUT", decode=False) except Exception as ex: msg = "Error updating document in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue self.logger.debug("Marked acquired %s of %s" % (docId, lfn)) lfn_in_transfer.append(lfn) dash_rep = (document['jobid'], document['job_retry_count'], document['workflow']) else: continue else: good_lfn = lfn['value'][0].replace('store', 'store/temp', 1) self.mark_good([good_lfn]) return lfn_in_transfer, dash_rep
def createTestFileFinishedYesterdayinFilesDB( self ): """ Creates a test document in files_db """ doc = {} doc['_id'] = getHashLfn("/this/is/a/lfnA") doc['dn'] = "/C=IT/O=INFN/OU=Personal Certificate/L=Perugia/CN=Hassen Riahi" doc['workflow'] = 'someWorkflow' doc['size'] = 999999 doc['jobid'] = '1' doc['lfn'] = '/this/is/a/lfnA' doc['retry_count'] = [] doc['source'] = random.choice(self.sites) doc['destination'] = random.choice(self.sites) doc['user'] = random.choice(self.users) doc['state'] = 'done' doc['start_time'] = str(datetime.datetime.now()).\ replace(str(datetime.datetime.now()).split(" ")[0].split("-")[2], \ str(int(str(datetime.datetime.now()).split(" ")[0].split("-")[2]) - 3)) doc['end_time'] = str(datetime.datetime.now()).\ replace(str(datetime.datetime.now()).split(" ")[0].split("-")[2], \ str(int(str(datetime.datetime.now()).split(" ")[0].split("-")[2]) - 2)) doc['job_end_time'] = str(time.time()) doc['dbSource_url'] = 'someUrl' self.db.queue(doc, True) self.db.commit() return doc
def pull_value(row): now = str(datetime.datetime.now()) last_update = int(time.time()) # Prepare file documents value = row['value'] value['lfn'] = value["_id"] value['user'] = value["_id"].split('/')[4] value['_id'] = getHashLfn(value["_id"]) value['size'] = value['size'] value['retry_count'] = [] value['state'] = 'new' value['start_time'] = now value['last_update'] = last_update # Attributes required for publication value['job_end_time'] = row['key'] value['publication_state'] = 'not_published' value['publication_retry_count'] = [] try: value['dbSource_url'] = self.config.data_source.replace( ((self.config.data_source).split("@")[0]).split("//")[1] + "@", "") except: value['dbSource_url'] = self.config.data_source return value
def mark_failed( self, files=[] ): """ Something failed for these files so increment the retry count """ now = str(datetime.datetime.now()) last_update = int(time.time()) for lfn in files: data = {} docId = getHashLfn(lfn) # Load document to get the retry_count try: document = self.db.document( docId ) except Exception, ex: msg = "Error loading document from couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) # Prepare data to update the document in couch if len(document['publication_retry_count']) + 1 > self.max_retry: data['publication_state'] = 'publication_failed' else: data['publication_state'] = 'publishing' data['last_update'] = last_update data['retry'] = now # Update the document in couch try: updateUri = "/" + self.db.name + "/_design/DBSPublisher/_update/updateFile/" + docId updateUri += "?" + urllib.urlencode(data) self.db.makeRequest(uri = updateUri, type = "PUT", decode = False) except Exception, ex: msg = "Error in updating document in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg)
def mark_good(self, files=[]): """ Mark the list of files as tranferred """ now = str(datetime.datetime.now()) last_update = int(time.time()) for lfn in files: try: document = self.db.document( getHashLfn(lfn) ) except Exception, ex: msg = "Error loading document from couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) outputLfn = document['lfn'].replace('store/temp', 'store', 1) try: data = {} data['end_time'] = now data['state'] = 'done' data['lfn'] = outputLfn data['last_update'] = last_update updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + getHashLfn(lfn) updateUri += "?" + urllib.urlencode(data) self.db.makeRequest(uri = updateUri, type = "PUT", decode = False) except Exception, ex: msg = "Error updating document in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg)
def testF_TestIfgetHashLfnHashCorrectlyLFNs(self): """ _testF_TestIfgetHashLfnHashCorrectlyLFNs Tests if the getHashLfn function of the AsyncStageOut module module hashs correctly LFNs. """ lfn = "/My/lfn/path" hashedLfn = getHashLfn(lfn) assert hashlib.sha224(lfn).hexdigest() == hashedLfn
def mark_good(self, files=[]): """ Mark the list of files as tranferred """ updated_lfn = [] for lfn in files: hash_lfn = getHashLfn(lfn) self.logger.info("Marking good %s" % hash_lfn) self.logger.debug("Marking good %s" % lfn) try: document = self.db.document(hash_lfn) except Exception, ex: msg = "Error loading document from couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue self.logger.info("Doc %s Loaded" % hash_lfn) if document['state'] != 'killed' and document[ 'state'] != 'done' and document['state'] != 'failed': outputLfn = document['lfn'].replace('store/temp', 'store', 1) try: now = str(datetime.datetime.now()) last_update = time.time() data = {} data['end_time'] = now data['state'] = 'done' data['lfn'] = outputLfn data['last_update'] = last_update updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + getHashLfn( lfn) updateUri += "?" + urllib.urlencode(data) self.db.makeRequest(uri=updateUri, type="PUT", decode=False) updated_lfn.append(lfn) self.logger.debug("Marked good %s" % lfn) except Exception, ex: msg = "Error updating document in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue try: self.db.commit() except Exception, ex: msg = "Error commiting documents in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue
def mark_acquired(self, files=[]): """ Mark the list of files as tranferred """ lfn_in_transfer = [] dash_rep = () for lfn in files: if lfn['value'].find('temp') > 1: docId = getHashLfn(lfn['value']) self.logger.debug("Marking acquired %s" % docId) # Load document to get the retry_count try: document = self.db.document(docId) except Exception, ex: msg = "Error loading document from couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue if (document['state'] == 'new' or document['state'] == 'retry'): data = {} data['state'] = 'acquired' data['last_update'] = time.time() updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId updateUri += "?" + urllib.urlencode(data) try: self.db.makeRequest(uri=updateUri, type="PUT", decode=False) except Exception, ex: msg = "Error updating document in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue self.logger.debug("Marked acquired %s of %s" % (docId, lfn)) lfn_in_transfer.append(lfn) dash_rep = (document['jobid'], document['job_retry_count'], document['workflow']) else: continue
def mark_acquired(self, files=[]): """ Mark the list of files as tranferred """ lfn_in_transfer = [] dash_rep = () for lfn in files: if lfn['value'][0].find('temp') == 7: docId = getHashLfn(lfn['value'][0]) self.logger.debug("Marking acquired %s" % docId) # Load document to get the retry_count try: document = self.db.document(docId) except Exception as ex: msg = "Error loading document from couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue if (document['state'] == 'new' or document['state'] == 'retry'): data = {} data['state'] = 'acquired' data['last_update'] = time.time() updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId updateUri += "?" + urllib.urlencode(data) try: self.db.makeRequest(uri=updateUri, type="PUT", decode=False) except Exception as ex: msg = "Error updating document in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue self.logger.debug("Marked acquired %s of %s" % (docId, lfn)) lfn_in_transfer.append(lfn) dash_rep = (document['jobid'], document['job_retry_count'], document['workflow']) else: continue else: good_lfn = lfn['value'][0].replace('store', 'store/temp', 1) self.mark_good([good_lfn]) return lfn_in_transfer, dash_rep
def pull_value(row): now = str(datetime.datetime.now()) # Prepare the files_db document value = row['value'] value['lfn'] = value["_id"] value['user'] = value["_id"].split('/')[4] value['_id'] = getHashLfn(value["_id"]) value['size'] = value['size'] value['retry_count'] = [] value['state'] = 'new' value['start_time'] = now value['dbSource_update'] = row['key'] try: value['dbSource_url'] = self.config.data_source.replace( ((self.config.data_source).split("@")[0]).split("//")[1] + "@", "") except: value['dbSource_url'] = self.config.data_source return value
def update_FTSJobID(self, jobReport): """ """ for job in jobReport: try: fileDoc = dict() fileDoc['asoworker'] = self.config.asoworker fileDoc['subresource'] = 'updateTransfers' fileDoc['list_of_ids'] = [getHashLfn(x) for x in job['LFNs']] fileDoc['list_of_transfer_state'] = ["SUBMITTED" for x in job['LFNs']] fileDoc['list_of_fts_instance'] = [self.fts_server_for_transfer for x in job['LFNs']] fileDoc['list_of_fts_id'] = [ job['FTSJobid'] for x in job['LFNs'] ] self.logger.debug("Marking submitted %s files " % (len(fileDoc['list_of_ids']))) result = self.oracleDB.post(self.config.oracleFileTrans, data=encodeRequest(fileDoc)) self.logger.debug("Marked submitted %s" % (fileDoc['list_of_ids'])) except Exception as ex: self.logger.error("Error during status update: %s" %ex) time.sleep(10) return False return True
## for out_lfn in job_doc['files']: ## if not status.has_key(out_lfn): status[out_lfn] = 'done' message['transferStatus'] = status if len(done_files) != number_ended_files: try: failed_files = self.monitoring_db.loadView('UserMonitoring', 'LFNFailedByJobId', query)['rows'] except Exception, e: self.logger.exception('A problem occured when contacting UserMonitoring - LFNFailedByJobId: %s' % e) return self.logger.info("the job %s has %s failed files %s" %(job, len(failed_files), failed_files)) transferError = "" for file in failed_files: if file['value'].find('temp') > 1: status[file['value']] = 'failed' lfn = file['value'] docId = getHashLfn(lfn) # Load document to get the failure reason from output file try: document = self.monitoring_db.document( docId ) #if (document['file_type'] == "output" and document.has_key('failure_reason')): if document.has_key('failure_reason'): if transferError: transferError = transferError + "," + document['failure_reason'] else: transferError = document['failure_reason'] if document.has_key('publication_state'): if document['publication_state'] == 'publication_failed': if transferError: transferError = transferError + "," + 'Publication Failure' else: transferError = 'Publication Failure'
def mark_failed(self, files=[], failures_reasons=[], force_fail=False): """ Something failed for these files so increment the retry count """ updated_lfn = [] for lfn in files: data = {} self.logger.debug("Document: %s" % lfn) if not isinstance(lfn, dict): if 'temp' not in lfn: temp_lfn = lfn.replace('store', 'store/temp', 1) else: temp_lfn = lfn else: if 'temp' not in lfn['value']: temp_lfn = lfn['value'].replace('store', 'store/temp', 1) else: temp_lfn = lfn['value'] docId = getHashLfn(temp_lfn) # Load document to get the retry_count if self.config.isOracle: try: self.logger.debug("Document: %s" %docId) docbyId = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers', 'fileusertransfers'), data=encodeRequest({'subresource': 'getById', 'id': docId})) document = oracleOutputMapping(docbyId)[0] data = dict() data['asoworker'] = self.config.asoworker data['subresource'] = 'updateTransfers' data['list_of_ids'] = docId if force_fail or document['transfer_retry_count'] + 1 > self.max_retry: data['list_of_transfer_state'] = 'FAILED' data['list_of_retry_value'] = 0 else: data['list_of_transfer_state'] = 'RETRY' fatal_error = self.determine_fatal_error(failures_reasons[files.index(lfn)]) if fatal_error: data['list_of_transfer_state'] = 'FAILED' data['list_of_failure_reason'] = failures_reasons[files.index(lfn)] data['list_of_retry_value'] = 0 self.logger.debug("update: %s" % data) result = self.oracleDB.post(self.config.oracleFileTrans, data=encodeRequest(data)) if not data['list_of_transfer_state'] == 'RETRY': updated_lfn.append(lfn) self.logger.debug("Marked failed %s" % lfn) except Exception as ex: self.logger.error("Error updating document status: %s" %ex) continue else: try: document = self.db.document( docId ) except Exception as ex: msg = "Error loading document from couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue if document['state'] != 'killed' and document['state'] != 'done' and document['state'] != 'failed': now = str(datetime.datetime.now()) last_update = time.time() # Prepare data to update the document in couch if force_fail or len(document['retry_count']) + 1 > self.max_retry: data['state'] = 'failed' data['end_time'] = now else: data['state'] = 'retry' fatal_error = self.determine_fatal_error(failures_reasons[files.index(lfn)]) if fatal_error: data['state'] = 'failed' data['end_time'] = now self.logger.debug("Failure list: %s" % failures_reasons) self.logger.debug("Files: %s" % files) self.logger.debug("LFN %s" % lfn) data['failure_reason'] = failures_reasons[files.index(lfn)] data['last_update'] = last_update data['retry'] = now # Update the document in couch self.logger.debug("Marking failed %s" % docId) try: updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId updateUri += "?" + urllib.urlencode(data) self.db.makeRequest(uri = updateUri, type = "PUT", decode = False) updated_lfn.append(docId) self.logger.debug("Marked failed %s" % docId) except Exception as ex: msg = "Error in updating document in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue try: self.db.commit() except Exception as ex: msg = "Error commiting documents in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue else: updated_lfn.append(docId) self.logger.debug("failed file updated") return updated_lfn
def mark_good(self, files): """ Mark the list of files as tranferred """ updated_lfn = [] good_ids = [] if len(files) == 0: return updated_lfn for it, lfn in enumerate(files): hash_lfn = getHashLfn(lfn) self.logger.info("Marking good %s" % hash_lfn) self.logger.debug("Marking good %s" % lfn) if not self.config.isOracle: try: document = self.db.document(hash_lfn) except Exception as ex: msg = "Error loading document from couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue self.logger.info("Doc %s Loaded" % hash_lfn) try: now = str(datetime.datetime.now()) last_update = time.time() if self.config.isOracle: docId = getHashLfn(lfn) good_ids.append(docId) updated_lfn.append(lfn) else: if document['state'] != 'killed' and document['state'] != 'done' and document['state'] != 'failed': outputLfn = document['lfn'].replace('store/temp', 'store', 1) data = dict() data['end_time'] = now data['state'] = 'done' data['lfn'] = outputLfn data['last_update'] = last_update updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + getHashLfn(lfn) updateUri += "?" + urllib.urlencode(data) self.db.makeRequest(uri = updateUri, type = "PUT", decode = False) updated_lfn.append(lfn) self.logger.debug("Marked good %s" % lfn) else: updated_lfn.append(lfn) try: self.db.commit() except Exception as ex: msg = "Error commiting documents in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue except Exception as ex: msg = "Error updating document" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue if self.config.isOracle: try: data = dict() data['asoworker'] = self.config.asoworker data['subresource'] = 'updateTransfers' data['list_of_ids'] = good_ids data['list_of_transfer_state'] = ["DONE" for x in good_ids] result = self.oracleDB.post(self.config.oracleFileTrans, data=encodeRequest(data)) self.logger.debug("Marked good %s" % good_ids) except Exception: self.logger.exception('Error updating document') return {} self.logger.info("Transferred file %s updated, removing now source file" %docId) try: docbyId = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers','fileusertransfers'), data=encodeRequest({'subresource': 'getById', 'id': docId})) document = oracleOutputMapping(docbyId, None)[0] except Exception: msg = "Error getting file from source" self.logger.exception(msg) return {} if document["source"] not in self.site_tfc_map: self.logger.debug("site not found... gathering info from phedex") self.site_tfc_map[document["source"]] = self.get_tfc_rules(document["source"]) pfn = self.apply_tfc_to_lfn( '%s:%s' %(document["source"], lfn)) self.logger.debug("File has to be removed now from source site: %s" %pfn) self.remove_files(self.userProxy, pfn) self.logger.debug("Transferred file removed from source") return updated_lfn
def killThread(self, thread_id, transfers): """This is the worker thread function for kill command. """ while True: transfer_list = transfers.get() self.logger.info("Starting thread %s" % (thread_id)) user = transfer_list[0]['username'] group = transfer_list[0]['user_group'] role = transfer_list[0]['user_role'] uiSetupScript = getattr(self.config, 'UISetupScript', None) self.logger.debug("Trying to get DN for %s %s %s %s" % (user, self.logger, self.config.opsProxy, self.config.opsProxy)) try: userDN = getDNFromUserName(user, self.logger, ckey=self.config.opsProxy, cert=self.config.opsProxy) except Exception as ex: msg = "Error retrieving the user DN" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue if not userDN: transfers.task_done() time.sleep(1) continue self.logger.debug("user DN: %s" % userDN) try: defaultDelegation = {'logger': self.logger, 'credServerPath': self.config.credentialDir, 'myProxySvr': 'myproxy.cern.ch', 'min_time_left': getattr(self.config, 'minTimeLeft', 36000), 'serverDN': self.config.serverDN, 'uisource': uiSetupScript, 'cleanEnvironment': getattr(self.config, 'cleanEnvironment', False)} if hasattr(self.config, "cache_area"): cache_area = self.config.cache_area defaultDelegation['myproxyAccount'] = re.compile('https?://([^/]*)/.*').findall(cache_area)[0] except IndexError: self.logger.error('MyproxyAccount parameter cannot be retrieved from %s . ' % self.config.cache_area) transfers.task_done() time.sleep(1) continue if getattr(self.config, 'serviceCert', None): defaultDelegation['server_cert'] = self.config.serviceCert if getattr(self.config, 'serviceKey', None): defaultDelegation['server_key'] = self.config.serviceKey try: defaultDelegation['userDN'] = userDN defaultDelegation['group'] = group if group else '' defaultDelegation['role'] = role if group else '' self.logger.debug('delegation: %s' % defaultDelegation) valid_proxy, user_proxy = getProxy(defaultDelegation, self.logger) except Exception as ex: msg = "Error getting the user proxy" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) transfers.task_done() time.sleep(1) continue # TODO: take server from db, right now, take only the first of the list and assuming it valid for all try: # TODO: debug u added during info upload. To be fixed soon! For now worked around fts_server = transfer_list[0]['fts_instance'].split('u')[1] self.logger.info("Delegating proxy to %s" % fts_server) context = fts3.Context(fts_server, user_proxy, user_proxy, verify=True) self.logger.debug(fts3.delegate(context, lifetime=timedelta(hours=48), force=False)) self.logger.info("Proxy delegated. Grouping files by jobId") jobs = {} for fileToKill in transfer_list: # TODO: debug u added during info upload. To be fixed soon! For now worked around jid = str(fileToKill['fts_id']).split('u')[1] if jid not in jobs: jobs[jid] = [] jobs[jid].append(fileToKill) self.logger.info("Found %s jobIds", len(jobs.keys())) self.logger.debug("jobIds: %s", jobs.keys) # list for files killed or failed to killed = [] too_late = [] for ftsJobId, files in jobs.iteritems(): self.logger.info("Cancelling tranfers in %s" % ftsJobId) ref_lfns = [str(x['destination_lfn'].split('/store/')[1]) for x in files] source_lfns = [x['source_lfn'] for x in files] job_list = fts3.get_job_status(context, ftsJobId, list_files=True) tx = job_list['files'] # TODO: this workaround is needed to get FTS file id, we may want to add a column in the db? idListToKill = [x['file_id'] for x in tx if x['dest_surl'].split('/cms/store/')[1] in ref_lfns] # needed for the state update lfnListToKill = [ref_lfns.index(str(x['dest_surl'].split('/cms/store/')[1])) for x in tx if x['dest_surl'].split('/cms/store/')[1] in ref_lfns] self.logger.debug("List of ids to cancel for job %s: %s" % (ftsJobId, idListToKill)) res = fts3.cancel(context, ftsJobId, idListToKill) self.logger.debug('Kill command result: %s' % json.dumps(res)) if not isinstance(res, list): res = [res] # Verify if the kill command succeeded for k, kill_res in enumerate(res): indexToUpdate = lfnListToKill[k] if kill_res in ("FINISHEDDIRTY", "FINISHED", "FAILED"): self.logger.debug(source_lfns[indexToUpdate]) too_late.append(getHashLfn(source_lfns[indexToUpdate])) else: killed.append(getHashLfn(source_lfns[indexToUpdate])) # TODO: decide how to update status for too_late files killed += too_late self.logger.debug('Updating status of killed files: %s' % killed) if len(killed) > 0: data = dict() data['asoworker'] = self.config.asoworker data['subresource'] = 'updateTransfers' data['list_of_ids'] = killed data['list_of_transfer_state'] = ["KILLED" for _ in killed] self.oracleDB.post(self.config.oracleFileTrans, data=encodeRequest(data)) self.logger.debug("Marked killed %s" % killed) except: # TODO: split and improve try/except self.logger.exception('Kill command failed') transfers.task_done()
def mark_failed(self, files=[], failures_reasons=[], force_fail=False): """ Something failed for these files so increment the retry count """ updated_lfn = [] for lfn in files: data = {} if not isinstance(lfn, dict): if 'temp' not in lfn: temp_lfn = lfn.replace('store', 'store/temp', 1) else: temp_lfn = lfn perm_lfn = lfn else: if 'temp' not in lfn['value']: temp_lfn = lfn['value'].replace('store', 'store/temp', 1) else: temp_lfn = lfn['value'] perm_lfn = lfn['value'] docId = getHashLfn(temp_lfn) # Load document to get the retry_count try: document = self.db.document(docId) except Exception, ex: msg = "Error loading document from couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue if document['state'] != 'killed' and document[ 'state'] != 'done' and document['state'] != 'failed': now = str(datetime.datetime.now()) last_update = time.time() # Prepare data to update the document in couch if force_fail or len( document['retry_count']) + 1 > self.max_retry: data['state'] = 'failed' data['end_time'] = now else: data['state'] = 'retry' self.logger.debug("Failure list: %s" % failures_reasons) self.logger.debug("Files: %s" % files) self.logger.debug("LFN %s" % lfn) data['failure_reason'] = failures_reasons[files.index(lfn)] data['last_update'] = last_update data['retry'] = now # Update the document in couch self.logger.debug("Marking failed %s" % docId) try: updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId updateUri += "?" + urllib.urlencode(data) self.db.makeRequest(uri=updateUri, type="PUT", decode=False) updated_lfn.append(docId) self.logger.debug("Marked failed %s" % docId) except Exception, ex: msg = "Error in updating document in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue try: self.db.commit() except Exception, ex: msg = "Error commiting documents in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue
def command(self, jobs, jobs_lfn, jobs_pfn, jobs_report): """ For each job the worker has to complete: Delete files that have failed previously Create a temporary copyjob file Submit the copyjob to the appropriate FTS server Parse the output of the FTS transfer and return complete and failed files for recording """ # Output: {"userProxyPath":"/path/to/proxy","LFNs":["lfn1","lfn2","lfn3"],"PFNs":["pfn1","pfn2","pfn3"],"FTSJobid":'id-of-fts-job', "username": '******'} #Loop through all the jobs for the links we have failure_reasons = [] for link, copyjob in jobs.items(): submission_error = False status_error = False fts_job = {} # Validate copyjob file before doing anything self.logger.debug("Valid %s" % self.validate_copyjob(copyjob)) if not self.validate_copyjob(copyjob): continue rest_copyjob = { "params":{ "bring_online": None, "verify_checksum": False, "copy_pin_lifetime": -1, "max_time_in_queue": self.config.max_h_in_queue, "job_metadata":{"issuer": "ASO"}, "spacetoken": None, "source_spacetoken": None, "fail_nearline": False, "overwrite": True, "gridftp": None }, "files":[] } pairs = [] for SrcDest in copyjob: tempDict = {"sources": [], "metadata": None, "destinations": []} tempDict["sources"].append(SrcDest.split(" ")[0]) tempDict["destinations"].append(SrcDest.split(" ")[1]) rest_copyjob["files"].append(tempDict) self.logger.debug("Subbmitting this REST copyjob %s" % rest_copyjob) url = self.fts_server_for_transfer + '/jobs' self.logger.debug("Running FTS submission command") self.logger.debug("FTS server: %s" % self.fts_server_for_transfer) self.logger.debug("link: %s -> %s" % link) heade = {"Content-Type ":"application/json"} buf = StringIO.StringIO() try: connection = RequestHandler(config={'timeout': 300, 'connecttimeout' : 300}) except Exception as ex: msg = str(ex) msg += str(traceback.format_exc()) self.logger.debug(msg) try: response, datares = connection.request(url, rest_copyjob, heade, verb='POST', doseq=True, ckey=self.user_proxy, \ cert=self.user_proxy, capath='/etc/grid-security/certificates', \ cainfo=self.user_proxy, verbose=True) self.logger.debug("Submission done") self.logger.debug('Submission header status: %s' % response.status) self.logger.debug('Submission header reason: %s' % response.reason) self.logger.debug('Submission result %s' % datares) except Exception as ex: msg = "Error submitting to FTS: %s " % url msg += str(ex) msg += str(traceback.format_exc()) self.logger.debug(msg) failure_reasons.append(msg) submission_error = True buf.close() if not submission_error: res = {} try: res = json.loads(datares) except Exception as ex: msg = "Couldn't load submission acknowledgment from FTS" msg += str(ex) msg += str(traceback.format_exc()) self.logger.debug(msg) submission_error = True failure_reasons.append(msg) if 'job_id' in res: fileId_list = [] files_res = [] files_ = {} job_id = res['job_id'] file_url = self.fts_server_for_transfer + '/jobs/' + job_id +'/files' self.logger.debug("Submitting to %s" % file_url) file_buf = StringIO.StringIO() try: response, files_ = connection.request(file_url, {}, heade, doseq=True, ckey=self.user_proxy, \ cert=self.user_proxy, capath='/etc/grid-security/certificates', \ cainfo=self.user_proxy, verbose=True) files_res = json.loads(files_) except Exception as ex: msg = "Error contacting FTS to retrieve file: %s " % file_url msg += str(ex) msg += str(traceback.format_exc()) self.logger.debug(msg) submission_error = True failure_reasons.append(msg) self.logger.debug("List files in job %s" % files_) file_buf.close() for file_in_job in files_res: if 'file_id' in file_in_job: fileId_list.append(file_in_job['file_id']) else: msg = "Could not load submitted file %s from FTS" % file_url self.logger.debug(msg) submission_error = True failure_reasons.append(msg) self.logger.debug("File id list %s" % fileId_list) if submission_error: self.logger.debug("Submission failed") self.logger.info("Mark failed %s files" % len(jobs_lfn[link])) self.logger.debug("Mark failed %s files" % jobs_lfn[link]) failed_files = self.mark_failed(jobs_lfn[link], force_fail=False, submission_error=True, failure_reasons=failure_reasons) self.logger.info("Marked failed %s" % len(failed_files)) continue fts_job['userProxyPath'] = self.user_proxy fts_job['LFNs'] = jobs_lfn[link] fts_job['PFNs'] = jobs_pfn[link] fts_job['FTSJobid'] = job_id fts_job['files_id'] = fileId_list fts_job['username'] = self.user self.logger.debug("Creating json file %s in %s" % (fts_job, self.dropbox_dir)) ftsjob_file = open('%s/Monitor.%s.json' % (self.dropbox_dir, fts_job['FTSJobid']), 'w') jsondata = json.dumps(fts_job) ftsjob_file.write(jsondata) ftsjob_file.close() self.logger.debug("%s ready." % fts_job) # Prepare Dashboard report for lfn in fts_job['LFNs']: lfn_report = {} lfn_report['FTSJobid'] = fts_job['FTSJobid'] index = fts_job['LFNs'].index(lfn) lfn_report['PFN'] = fts_job['PFNs'][index] lfn_report['FTSFileid'] = fts_job['files_id'][index] lfn_report['Workflow'] = jobs_report[link][index][2] lfn_report['JobVersion'] = jobs_report[link][index][1] job_id = '%d_https://glidein.cern.ch/%d/%s_%s' % (int(jobs_report[link][index][0]), int(jobs_report[link][index][0]), lfn_report['Workflow'].replace("_", ":"), lfn_report['JobVersion']) lfn_report['JobId'] = job_id lfn_report['URL'] = self.fts_server_for_transfer self.logger.debug("Creating json file %s in %s for FTS3 Dashboard" % (lfn_report, self.dropbox_dir)) dash_job_file = open('/tmp/Dashboard.%s.json' % getHashLfn(lfn_report['PFN']), 'w') jsondata = json.dumps(lfn_report) dash_job_file.write(jsondata) dash_job_file.close() self.logger.debug("%s ready for FTS Dashboard report." % lfn_report) return
def mark_failed(self, files=[], force_fail=False, submission_error=False): """ Something failed for these files so increment the retry count """ updated_lfn = [] for lfn in files: data = {} if not isinstance(lfn, dict): if 'temp' not in lfn: temp_lfn = lfn.replace('store', 'store/temp', 1) else: temp_lfn = lfn else: if 'temp' not in lfn['value'][0]: temp_lfn = lfn['value'][0].replace('store', 'store/temp', 1) else: temp_lfn = lfn['value'][0] # Load document and get the retry_count if self.config.isOracle: docId = getHashLfn(temp_lfn) self.logger.debug("Marking failed %s" % docId) try: docbyId = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers', 'fileusertransfers'), data=encodeRequest({'subresource': 'getById', 'id': docId})) except Exception as ex: self.logger.error("Error updating failed docs: %s" %ex) continue document = oracleOutputMapping(docbyId, None)[0] self.logger.debug("Document: %s" % document) fileDoc = dict() fileDoc['asoworker'] = self.config.asoworker fileDoc['subresource'] = 'updateTransfers' fileDoc['list_of_ids'] = docId if force_fail or document['transfer_retry_count'] + 1 > self.max_retry: fileDoc['list_of_transfer_state'] = 'FAILED' fileDoc['list_of_retry_value'] = 1 else: fileDoc['list_of_transfer_state'] = 'RETRY' if submission_error: fileDoc['list_of_failure_reason'] = "Job could not be submitted to FTS: temporary problem of FTS" fileDoc['list_of_retry_value'] = 1 elif not self.valid_proxy: fileDoc['list_of_failure_reason'] = "Job could not be submitted to FTS: user's proxy expired" fileDoc['list_of_retry_value'] = 1 else: fileDoc['list_of_failure_reason'] = "Site config problem." fileDoc['list_of_retry_value'] = 1 self.logger.debug("update: %s" % fileDoc) try: updated_lfn.append(docId) result = self.oracleDB.post(self.config.oracleFileTrans, data=encodeRequest(fileDoc)) except Exception as ex: msg = "Error updating document" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue else: docId = getHashLfn(temp_lfn) try: document = self.db.document(docId) except Exception as ex: msg = "Error loading document from couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue if document['state'] != 'killed' and document['state'] != 'done' and document['state'] != 'failed': now = str(datetime.datetime.now()) last_update = time.time() # Prepare data to update the document in couch if force_fail or len(document['retry_count']) + 1 > self.max_retry: data['state'] = 'failed' else: data['state'] = 'retry' if submission_error: data['failure_reason'] = "Job could not be submitted to FTS: temporary problem of FTS" elif not self.valid_proxy: data['failure_reason'] = "Job could not be submitted to FTS: user's proxy expired" else: data['failure_reason'] = "Site config problem." data['last_update'] = last_update data['retry'] = now # Update the document in couch self.logger.debug("Marking failed %s" % docId) try: updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId updateUri += "?" + urllib.urlencode(data) self.db.makeRequest(uri=updateUri, type="PUT", decode=False) updated_lfn.append(docId) self.logger.debug("Marked failed %s" % docId) except Exception as ex: msg = "Error in updating document in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue try: self.db.commit() except Exception as ex: msg = "Error commiting documents in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue self.logger.debug("failed file updated") return updated_lfn
def mark_good(self, files=[]): """ Mark the list of files as tranferred """ last_update = int(time.time()) for lfn in files: try: data = {} data['publication_state'] = 'published' data['last_update'] = last_update updateUri = "/" + self.db.name + "/_design/DBSPublisher/_update/updateFile/" + getHashLfn(lfn.replace('store', 'store/temp', 1)) updateUri += "?" + urllib.urlencode(data) self.logger.info(updateUri) self.db.makeRequest(uri = updateUri, type = "PUT", decode = False) except Exception, ex: msg = "Error updating document in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg)
def mark_failed(self, files=[], force_fail = False, submission_error = False): """ Something failed for these files so increment the retry count """ updated_lfn = [] for lfn in files: data = {} if not isinstance(lfn, dict): if 'temp' not in lfn: temp_lfn = lfn.replace('store', 'store/temp', 1) else: temp_lfn = lfn else: if 'temp' not in lfn['value']: temp_lfn = lfn['value'].replace('store', 'store/temp', 1) else: temp_lfn = lfn['value'] docId = getHashLfn(temp_lfn) # Load document to get the retry_count try: document = self.db.document( docId ) except Exception, ex: msg = "Error loading document from couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue if document['state'] != 'killed' and document['state'] != 'done' and document['state'] != 'failed': now = str(datetime.datetime.now()) last_update = time.time() # Prepare data to update the document in couch if force_fail or len(document['retry_count']) + 1 > self.max_retry: data['state'] = 'failed' else: data['state'] = 'retry' if submission_error: data['failure_reason'] = "Job could not be submitted to FTS" else: data['failure_reason'] = "Site config problem." data['last_update'] = last_update data['retry'] = now # Update the document in couch self.logger.debug("Marking failed %s" % docId) try: updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId updateUri += "?" + urllib.urlencode(data) self.db.makeRequest(uri = updateUri, type = "PUT", decode = False) updated_lfn.append(docId) self.logger.debug("Marked failed %s" % docId) except Exception, ex: msg = "Error in updating document in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue try: self.db.commit() except Exception, ex: msg = "Error commiting documents in couch" msg += str(ex) msg += str(traceback.format_exc()) self.logger.error(msg) continue
def command(self, jobs, jobs_lfn, jobs_pfn, jobs_report, retry=False): """ For each job the worker has to complete: Delete files that have failed previously Create a temporary copyjob file Submit the copyjob to the appropriate FTS server Parse the output of the FTS transfer and return complete and failed files for recording """ # Output: {"userProxyPath":"/path/to/proxy","LFNs":["lfn1","lfn2","lfn3"],"PFNs":["pfn1","pfn2","pfn3"],"FTSJobid":'id-of-fts-job', "username": '******'} tmp_file_pool = [] #command2 = 'export X509_USER_PROXY=%s ; source %s ;' % (self.userProxy, self.uiSetupScript) #self.logger.debug("executing command: %s" % (command2)) #stdout2, rc2 = execute_command(command2, self.logger, self.commandTimeout) for link, copyjob in jobs.items(): submission_error = False status_error = False fts_job = {} # Validate copyjob file before doing anything self.logger.debug("Copyjob: %s" % copyjob) self.logger.debug("Valid %s" % self.validate_copyjob(copyjob)) if not self.validate_copyjob(copyjob): continue tmp_copyjob_file = tempfile.NamedTemporaryFile(delete=False) tmp_copyjob_file.write('\n'.join(copyjob)) tmp_copyjob_file.close() #self.logger.debug("Temp Copyjob: %s" % tmp_copyjob_file) tmp_file_pool.append(tmp_copyjob_file.name) #self.logger.debug("Temp Copyjob: %s" % tmp_copyjob_file) self.logger.debug("Running FTS submission command") self.logger.debug("FTS server: %s" % self.fts_server_for_transfer) self.logger.debug("link: %s -> %s" % link) self.logger.debug("copyjob file: %s" % tmp_copyjob_file.name) command = '%s/../../../apps/asyncstageout/Monitor/PHEDEX/Testbed/FakeFTS.pl %s -s %s -f %s' % ( self.config.componentDir, self.submission_command, self.fts_server_for_transfer, tmp_copyjob_file.name) # command = 'export X509_USER_PROXY=%s ; source %s ; /data/ASO/async_install_103pre3/current/apps/asyncstageout/Monitor/PHEDEX/Testbed/FakeFTS.pl %s -s %s -f %s' % (self.userProxy, self.uiSetupScript, # self.submission_command, self.fts_server_for_transfer, # tmp_copyjob_file.name) init_time = str(strftime("%a, %d %b %Y %H:%M:%S", time.localtime())) self.logger.debug("executing command: %s at: %s for: %s" % (command, init_time, self.userDN)) stdout, rc = execute_command(command, self.logger, self.commandTimeout) self.logger.debug("Submission result %s" % rc) self.logger.debug("Sending %s %s %s" % (jobs_lfn[link], jobs_pfn[link], stdout.strip())) if not rc: # Updating files to acquired in the database #self.logger.info("Mark acquired %s files" % len(jobs_lfn[link])) #self.logger.debug("Mark acquired %s files" % jobs_lfn[link]) #acquired_files = self.mark_acquired(jobs_lfn[link]) #self.logger.info("Marked acquired %s" % len(acquired_files)) #if not acquired_files: # continue fts_job['userProxyPath'] = self.userProxy fts_job['LFNs'] = jobs_lfn[link] fts_job['PFNs'] = jobs_pfn[link] fts_job['FTSJobid'] = stdout.strip() fts_job['username'] = self.user self.logger.debug("Creating json file %s in %s" % (fts_job, self.dropbox_dir)) ftsjob_file = open( '%s/Monitor.%s.json' % (self.dropbox_dir, fts_job['FTSJobid']), 'w') jsondata = json.dumps(fts_job) ftsjob_file.write(jsondata) ftsjob_file.close() self.logger.debug("%s ready." % fts_job) # Prepare Dashboard report for lfn in fts_job['LFNs']: lfn_report = {} lfn_report['FTSJobid'] = fts_job['FTSJobid'] index = fts_job['LFNs'].index(lfn) lfn_report['PFN'] = fts_job['PFNs'][index] lfn_report['Workflow'] = jobs_report[link][index][2] lfn_report['JobVersion'] = jobs_report[link][index][1] job_id = '%d_https://glidein.cern.ch/%d/%s_%s' % ( int(jobs_report[link][index][0]), int(jobs_report[link][index][0]), lfn_report['Workflow'].replace( "_", ":"), lfn_report['JobVersion']) lfn_report['JobId'] = job_id self.logger.debug( "Creating json file %s in %s for FTS3 Dashboard" % (lfn_report, self.dropbox_dir)) dash_job_file = open( '/tmp/Dashboard.%s.json' % getHashLfn(lfn_report['PFN']), 'w') jsondata = json.dumps(lfn_report) dash_job_file.write(jsondata) dash_job_file.close() self.logger.info("%s ready for FTS Dashboard report." % lfn_report) elif len(jobs_lfn[link]): self.logger.debug("Submission failed") self.logger.info("Mark failed %s files" % len(jobs_lfn[link])) self.logger.debug("Mark failed %s files" % jobs_lfn[link]) failed_files = self.mark_failed(jobs_lfn[link], bad_logfile=None, force_fail=False, submission_error=True) self.logger.info("Marked failed %s" % len(failed_files)) continue else: continue # Generate the json output self.logger.debug("Jobs submission Done. Removing copy_job files") #for tmp in tmp_file_pool: # os.unlink( tmp ) return
def command(self, jobs, jobs_lfn, jobs_pfn, jobs_report, retry = False): """ For each job the worker has to complete: Delete files that have failed previously Create a temporary copyjob file Submit the copyjob to the appropriate FTS server Parse the output of the FTS transfer and return complete and failed files for recording """ # Output: {"userProxyPath":"/path/to/proxy","LFNs":["lfn1","lfn2","lfn3"],"PFNs":["pfn1","pfn2","pfn3"],"FTSJobid":'id-of-fts-job', "username": '******'} tmp_file_pool = [] #command2 = 'export X509_USER_PROXY=%s ; source %s ;' % (self.userProxy, self.uiSetupScript) #self.logger.debug("executing command: %s" % (command2)) #stdout2, rc2 = execute_command(command2, self.logger, self.commandTimeout) for link, copyjob in jobs.items(): submission_error = False status_error = False fts_job = {} # Validate copyjob file before doing anything self.logger.debug("Copyjob: %s" % copyjob) self.logger.debug("Valid %s" % self.validate_copyjob(copyjob) ) if not self.validate_copyjob(copyjob): continue tmp_copyjob_file = tempfile.NamedTemporaryFile(delete=False) tmp_copyjob_file.write('\n'.join(copyjob)) tmp_copyjob_file.close() #self.logger.debug("Temp Copyjob: %s" % tmp_copyjob_file) tmp_file_pool.append(tmp_copyjob_file.name) #self.logger.debug("Temp Copyjob: %s" % tmp_copyjob_file) self.logger.debug("Running FTS submission command") self.logger.debug("FTS server: %s" % self.fts_server_for_transfer) self.logger.debug("link: %s -> %s" % link) self.logger.debug("copyjob file: %s" % tmp_copyjob_file.name) command = '%s/../../../apps/asyncstageout/Monitor/PHEDEX/Testbed/FakeFTS.pl %s -s %s -f %s' % (self.config.componentDir, self.submission_command, self.fts_server_for_transfer,tmp_copyjob_file.name) # command = 'export X509_USER_PROXY=%s ; source %s ; /data/ASO/async_install_103pre3/current/apps/asyncstageout/Monitor/PHEDEX/Testbed/FakeFTS.pl %s -s %s -f %s' % (self.userProxy, self.uiSetupScript, # self.submission_command, self.fts_server_for_transfer, # tmp_copyjob_file.name) init_time = str(strftime("%a, %d %b %Y %H:%M:%S", time.localtime())) self.logger.debug("executing command: %s at: %s for: %s" % (command, init_time, self.userDN)) stdout, rc = execute_command(command, self.logger, self.commandTimeout) self.logger.debug("Submission result %s" % rc) self.logger.debug("Sending %s %s %s" % ( jobs_lfn[link], jobs_pfn[link], stdout.strip() )) if not rc: # Updating files to acquired in the database #self.logger.info("Mark acquired %s files" % len(jobs_lfn[link])) #self.logger.debug("Mark acquired %s files" % jobs_lfn[link]) #acquired_files = self.mark_acquired(jobs_lfn[link]) #self.logger.info("Marked acquired %s" % len(acquired_files)) #if not acquired_files: # continue fts_job['userProxyPath'] = self.userProxy fts_job['LFNs'] = jobs_lfn[link] fts_job['PFNs'] = jobs_pfn[link] fts_job['FTSJobid'] = stdout.strip() fts_job['username'] = self.user self.logger.debug("Creating json file %s in %s" % (fts_job, self.dropbox_dir)) ftsjob_file = open('%s/Monitor.%s.json' % (self.dropbox_dir, fts_job['FTSJobid'] ), 'w') jsondata = json.dumps(fts_job) ftsjob_file.write(jsondata) ftsjob_file.close() self.logger.debug("%s ready." % fts_job) # Prepare Dashboard report for lfn in fts_job['LFNs']: lfn_report = {} lfn_report['FTSJobid'] = fts_job['FTSJobid'] index = fts_job['LFNs'].index(lfn) lfn_report['PFN'] = fts_job['PFNs'][index] lfn_report['Workflow'] = jobs_report[link][index][2] lfn_report['JobVersion'] = jobs_report[link][index][1] job_id = '%d_https://glidein.cern.ch/%d/%s_%s' % (int(jobs_report[link][index][0]), int(jobs_report[link][index][0]), lfn_report['Workflow'].replace("_", ":"), lfn_report['JobVersion']) lfn_report['JobId'] = job_id self.logger.debug("Creating json file %s in %s for FTS3 Dashboard" % (lfn_report, self.dropbox_dir)) dash_job_file = open('/tmp/Dashboard.%s.json' % getHashLfn(lfn_report['PFN']) , 'w') jsondata = json.dumps(lfn_report) dash_job_file.write(jsondata) dash_job_file.close() self.logger.info("%s ready for FTS Dashboard report." % lfn_report) elif len(jobs_lfn[link]): self.logger.debug("Submission failed") self.logger.info("Mark failed %s files" % len(jobs_lfn[link])) self.logger.debug("Mark failed %s files" % jobs_lfn[link]) failed_files = self.mark_failed(jobs_lfn[link], bad_logfile=None, force_fail = False, submission_error=True) self.logger.info("Marked failed %s" % len(failed_files)) continue else: continue # Generate the json output self.logger.debug("Jobs submission Done. Removing copy_job files") #for tmp in tmp_file_pool: # os.unlink( tmp ) return
# lfn_base has store/temp in it twice to make sure that # the temp->permananet lfn change is correct. lfn_base = '/store/temp/user/%s/my_cool_dataset/file-%s-%s.root' now = str(datetime.datetime.now()) job_end_time =datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") last_update = int(time.time()); print "Script starts at %s" %now while i <= size: id=random.randint(1000, 1999) user = random.choice(users) dest = users_dest[user] _id=getHashLfn(lfn_base % (user,id , i)) state='new' file_doc = {'_id': '%s' %(_id) , 'lfn': lfn_base % (user, id, i), 'dn': 'UserDN', #'_attachments': '', 'checksums': { "adler32": "ad:b2378cab" }, "failure_reason": [ ], 'group': '', 'publish':1, 'timestamp': now,
def __call__(self): """ _getViewResults_ Get the result of the view. """ sites = [ 'T2_IT_Rome', 'T2_CH_CAF', 'T2_DE_DESY', 'T2_BR_UERJ', 'T2_CH_CSCS', 'T2_CN_Beijing', 'T2_DE_DESY', 'T2_DE_RWTH', 'T2_EE_Estonia', 'T2_ES_CIEMAT', 'T2_ES_IFCA', 'T2_FI_HIP', 'T2_FR_CCIN2P3', 'T2_FR_GRIF_IRFU', 'T2_FR_GRIF_LLR', 'T2_FR_IPHC', 'T2_HU_Budapest', 'T2_IN_TIFR', 'T2_IT_Bari', 'T2_IT_Legnaro', 'T2_IT_Pisa', 'T2_IT_Rome', 'T2_KR_KNU', 'T2_PK_NCP', 'T2_PL_Cracow', 'T2_PL_Warsaw', 'T2_PT_LIP_Lisbon', 'T2_PT_NCG_Lisbon', 'T2_RU_IHEP', 'T2_RU_INR', 'T2_RU_ITEP', 'T2_RU_JINR', 'T2_RU_PNPI', 'T2_RU_RRC_KI', 'T2_RU_SINP', 'T2_TR_METU', 'T2_TW_Taiwan', 'T2_UA_KIPT', 'T2_UK_London_Brunel', 'T2_UK_London_IC', 'T2_UK_SGrid_Bristol', 'T2_UK_SGrid_RALPP', 'T2_US_Caltech', 'T2_US_Florida', 'T2_US_MIT', 'T2_US_Nebraska', 'T2_US_Purdue', 'T2_US_UCSD', 'T2_US_Wisconsin' ] numberUsers = 5 j = 1 users = [] while j <= numberUsers: users.append('user' + str(random.randint(1, 1000))) j += 1 size = 3 i = 1 lfn_base = '/store/temp/riahi/user/%s/store/temp/file-duplic-%s-%s.root' results = [] while i <= size: last_update = int(time.time()) user = random.choice(users) lfn = lfn_base % (user, random.randint(1000, 9999), i) id = getHashLfn(lfn) workflow = 'workflow-%s-%s' % (user, random.randint(1, 100)) results.append({ '_id': id, 'source': random.choice(sites), 'destination': random.choice(sites), 'task': workflow, 'workflow': workflow, 'lfn': lfn, 'jobid': random.randint(1000, 9999), 'state': 'new', 'last_update': last_update, 'dbSource_update': last_update, 'retry_count': [], 'checksums': 'checksum', 'size': random.randint(1000, 9999), 'dn': '/UserDN', 'group': '', 'role': '', 'user': user }) i += 1 logging.debug("Dummy docs queued %s" % results) return results
def __call__(self): """ _getViewResults_ Get the result of the view. """ sites = ['T2_IT_Rome', 'T2_CH_CAF', 'T2_DE_DESY', 'T2_BR_UERJ', 'T2_CH_CSCS', 'T2_CN_Beijing', 'T2_DE_DESY', 'T2_DE_RWTH', 'T2_EE_Estonia', 'T2_ES_CIEMAT', 'T2_ES_IFCA', 'T2_FI_HIP', 'T2_FR_CCIN2P3', 'T2_FR_GRIF_IRFU', 'T2_FR_GRIF_LLR', 'T2_FR_IPHC', 'T2_HU_Budapest', 'T2_IN_TIFR', 'T2_IT_Bari', 'T2_IT_Legnaro', 'T2_IT_Pisa', 'T2_IT_Rome', 'T2_KR_KNU', 'T2_PK_NCP', 'T2_PL_Cracow', 'T2_PL_Warsaw', 'T2_PT_LIP_Lisbon', 'T2_PT_NCG_Lisbon', 'T2_RU_IHEP', 'T2_RU_INR', 'T2_RU_ITEP', 'T2_RU_JINR', 'T2_RU_PNPI', 'T2_RU_RRC_KI', 'T2_RU_SINP', 'T2_TR_METU', 'T2_TW_Taiwan', 'T2_UA_KIPT', 'T2_UK_London_Brunel', 'T2_UK_London_IC', 'T2_UK_SGrid_Bristol', 'T2_UK_SGrid_RALPP', 'T2_US_Caltech', 'T2_US_Florida', 'T2_US_MIT', 'T2_US_Nebraska', 'T2_US_Purdue', 'T2_US_UCSD', 'T2_US_Wisconsin'] numberUsers = 5 j = 1 users = [] while j <= numberUsers: users.append( 'user'+ str( random.randint(1, 1000) ) ) j += 1 size = 3 i = 1 lfn_base = '/store/temp/riahi/user/%s/store/temp/file-duplic-%s-%s.root' results = [] while i <= size: last_update = int(time.time()) user = random.choice(users) lfn = lfn_base % (user, random.randint(1000, 9999), i) id = getHashLfn( lfn ) workflow = 'workflow-%s-%s' % (user, random.randint(1, 100)) results.append( {'_id': id, 'source': random.choice(sites), 'destination': random.choice(sites), 'task': workflow, 'workflow': workflow, 'lfn': lfn, 'jobid': random.randint(1000, 9999), 'state': 'new', 'last_update':last_update, 'dbSource_update': last_update, 'retry_count': [], 'checksums': 'checksum', 'size': random.randint(1000, 9999), 'dn': '/UserDN', 'group': '', 'role': '', 'user': user} ) i += 1 logging.debug("Dummy docs queued %s" %results) return results