def run(selections,args): # BEWARE: tricky statement # # 'upgrade' is a multi-selections 'subcommand' which do the same as the # mono-selection 'install' subcommand, but for many selections. What we do # here is replace 'upgrade' subcommand with 'install' subcommand, so that we can, # now that we are in 'upgrade' func/context, # come back to the existing mono-selection func, # for each selection, with 'install' subcommand. # args.subcommand='install' # force non-interactive mode args.yes=True exclude_selection_files=get_exclude(args) for selection in selections: if selection.filename in exclude_selection_files: continue try: sdlog.info("SDUPGRAD-003","Process %s.."%selection.filename,stdout=True) install(args,selection) except sdexception.IncorrectParameterException,e: sdlog.error("SDUPGRAD-004","Error occurs while processing %s (%s)"%(selection.filename,str(e)),stderr=True) except sdexception.SDException,e: sdlog.error("SDUPGRAD-008","Error occurs while processing %s (%s)"%(selection.filename,str(e)),stderr=True)
def run(files): (keep,reject)=filter(files) if len(reject)>0: sdlog.info("SDPOSXPC-001","%i anomalies found"%len(reject)) return keep
def qualitycheck_ok(dataset_versions,d): """ based on some statistics, this method accepts or deny 'latest' promotion for the dataset 'd' return false if 'd' don't seem ready to be promoted to 'latest' true if 'd' seems ready to be promoted to 'latest' """ # retrieve stats for current latest flagged version latest_dataset=dataset_versions.get_dataset_with_latest_flag_set() current_version_stats=latest_dataset.statistics # retrieve stats for candidate version for 'latest' promotion candidate_stats=sddatasetquery.get_dataset_stats(d) # assert if latest_dataset.dataset_id==d.dataset_id: raise SDException("SYDDFLAG-140","fatal error (%i)"%d.dataset_id) # variable number quality check if candidate_stats['variable_count'] < (current_version_stats['variable_count'] * 0.5) : # if variable number drops sdlog.info("SYDDFLAG-730","%s"%d.get_full_local_path()) return False # total file number quality check """ if candidate_stats.getFilesCount() < current_version_stats.getFilesCount(): # if file number decrease sdlog.info("SYDDFLAG-734","%s"%d.get_full_local_path()) return False """ return True
def use_file_timestamp_if_dataset_timestamp_is_missing(d): if 'timestamp' not in d: # timestamp doesn't exist in ESGF for this dataset # hack # # Use a dataset's (random (i.e. files have not always the same even # timestmap in one dataset, so we take one randomly)) file timestamp # as dataset's timestamp is missing in ESGF ! # Note # We do not filter replica in the query below in case the master host is not up result=sdquicksearch.run(parameter=['limit=1','fields=instance_id,timestamp,type','type=File','dataset_id=%s'%d['instance_id']],post_pipeline_mode=None) if len(result.files)>0: file=result.files[0] if 'timestamp' in file: d['timestamp']=file['timestamp'] sdlog.info("SDTIMEST-001","Dataset timestamp set from one dataset's file's timestamp (dataset_functional_id=%s,file_functional_id=%s)"%(d['instance_id'],file['instance_id'])) else: raise SDException("SDTIMEST-008","Timestamp missing in both dataset and dataset's file(s) (%s)"%d['instance_id']) else: raise SDException("SDTIMEST-011","Dataset exist in ESGF, but is empty (%s)"%d['instance_id'])
def set_timestamp_when_empty__BATCH_MODE_1(): """ Retrieve *all* datasets from ESGF, then update local timestamp. Not used. """ datasets = sddump.dump_ESGF(parameter=['searchapi_host=esgf-data.dkrz.de'], fields=sdfields.get_timestamp_fields()) sdlog.info("SDREBUIL-008", "%i dataset(s) retrieved from ESGF." % len(datasets)) sdlog.info("SDREBUIL-012", "Start updating timestamp in local database.") for i, d in enumerate(datasets): if 'instance_id' in d: # this is because some dataset have no instance_id in ESGF ! dataset = sddatasetdao.get_dataset( dataset_functional_id=d['instance_id']) if dataset is not None: if 'timestamp' in d: # this is because some dataset have no timestamp in ESGF ! dataset.timestamp = d['timestamp'] sddatasetdao.update_dataset(dataset, commit=False, keys=['timestamp']) SDProgressBar.print_progress_bar( len(datasets), i, title="Updating dataset's timestamp.. ") SDProgressBar.progress_complete() sddb.conn.commit()
def set_timestamp_when_empty__BATCH_MODE_1(): """ Retrieve *all* datasets from ESGF, then update local timestamp. Not used. """ datasets=sddump.dump_ESGF(['type=Dataset','searchapi_host=esgf-data.dkrz.de'],'timestamp') sdlog.info("SDREBUIL-008","%i dataset(s) retrieved from ESGF."%len(datasets)) sdlog.info("SDREBUIL-012","Start updating timestamp in local database.") for i,d in enumerate(datasets): if 'instance_id' in d: # this is because some dataset have no instance_id in ESGF ! dataset=sddatasetdao.get_dataset(dataset_functional_id=d['instance_id']) if dataset is not None: if 'timestamp' in d: # this is because some dataset have no timestamp in ESGF ! dataset.timestamp=d['timestamp'] sddatasetdao.update_dataset(dataset,commit=False,keys=['timestamp']) SDProgressBar.print_progress_bar(len(datasets),i,title="Updating dataset's timestamp.. ") SDProgressBar.progress_complete() sddb.conn.commit()
def call_web_service(self, request): sdlog.debug("SYDPROXY-100", "Search-API call started (%s)." % request.get_url()) try: response = sdnetutils.call_web_service( request.get_url(), timeout=sdconst.SEARCH_API_HTTP_TIMEOUT ) # returns Response object except: # if exception occurs in sdnetutils.call_web_service() method, all # previous calls to this method inside this paginated call are also # cancelled # we reset the offset so the paginated call can be restarted from the begining the next time # (maybe overkill as offset is reinitialized when entering 'call_web_service__PAGINATION()' func) request.offset = 0 raise sdlog.info( "SYDPROXY-100", "Search-API call completed (returned-files-count=%i,match-count=%i,url=%s)." % (response.count(), response.num_found, request.get_url())) return response
def update_datasets_status_HELPER(datasets): """ Refresh datasets status flag Note This func doesn't handle the 'latest' flag """ datasets_modified_count = 0 for d in datasets: # store dataset current state l__status = d.status # compute new 'status' flag d.status = compute_dataset_status(d) sddatasetdao.update_dataset(d) # check if the dataset has changed if l__status != d.status: sdlog.info( "SYDDFLAG-188", "Dataset status has been changed from %s to %s (%s)" % (l__status, d.status, d.dataset_functional_id)) datasets_modified_count += 1 sdlog.info("SYDDFLAG-192", "Modified datasets: %i" % datasets_modified_count)
def submit( order_name, project, model, dataset, variable='', filename='', commit=True ): # TODO: replace single quote with None and move 'None2SingleQuote' processing inside Event object (and add comment about why we use single quote instead of None in event table !!!) event_name = order_name dataset_pattern = sdproduct.replace_output12_product_with_wildcard(dataset) filename_pattern = filename sdlog.info( "SDPPORDE-001", "'%s' triggered (%s,%s)" % (event_name, dataset_pattern, variable)) event = Event(name=event_name) event.project = project event.model = model event.dataset_pattern = dataset_pattern event.variable = variable event.filename_pattern = filename_pattern event.crea_date = sdtime.now() event.priority = sdconst.DEFAULT_PRIORITY sdeventdao.add_event(event, commit=commit)
def immediate_md_delete(tr): """Delete file (metadata only).""" sdlog.info("SDDELETE-080","Delete metadata (%s)"%tr.get_full_local_path()) try: sdfiledao.delete_file(tr,commit=False) except Exception,e: sdlog.error("SDDELETE-128","Error occurs during file metadata suppression (%s,%s)"%(tr.get_full_local_path(),str(e)))
def disconnect(): global conn if is_connected(): conn.close() conn=None # hack # # force sqlite db file to be group writable # # It should be done with umask when creating the db, but seems not working due to a bug. # # more info # http://www.mail-archive.com/[email protected]/msg59080.html # https://code.djangoproject.com/ticket/19292 # if os.path.exists(sdconfig.db_file): if not sdtools.is_group_writable(sdconfig.db_file): if sdtools.set_file_permission(sdconfig.db_file): sdlog.info("SDDATABA-003","File permissions have been modified ('%s')"%sdconfig.db_file) else: # we come here when user have not enough priviledge to set file permission sdlog.info("SDDATABA-004","Missing privilege to modify file permissions ('%s')"%sdconfig.db_file)
def qualitycheck_ok(dataset_versions, d): """ based on some statistics, this method accepts or deny 'latest' promotion for the dataset 'd' return false if 'd' don't seem ready to be promoted to 'latest' true if 'd' seems ready to be promoted to 'latest' """ # retrieve stats for current latest flagged version latest_dataset = dataset_versions.get_dataset_with_latest_flag_set() current_version_stats = latest_dataset.statistics # retrieve stats for candidate version for 'latest' promotion candidate_stats = sddatasetquery.get_dataset_stats(d) # assert if latest_dataset.dataset_id == d.dataset_id: raise SDException("SYDDFLAG-140", "fatal error (%i)" % d.dataset_id) # variable number quality check if candidate_stats['variable_count'] < ( current_version_stats['variable_count'] * 0.5): # if variable number drops sdlog.info("SYDDFLAG-730", "%s" % d.get_full_local_path()) return False # total file number quality check """ if candidate_stats.getFilesCount() < current_version_stats.getFilesCount(): # if file number decrease sdlog.info("SYDDFLAG-734","%s"%d.get_full_local_path()) return False """ return True
def run(args, metadata=None): import syndautils syndautils.check_daemon() if metadata is None: # retrieve metadata if args.incremental and not args.selection_file: print_stderr( "ERROR: 'selection_file' option is not set (a selection file must be used when 'incremental' option is set)" ) return (1, 0) if args.selection_file is not None: sdlog.info("SYNDINST-006", "Process '%s'" % args.selection_file) try: metadata = syndautils.file_full_search(args) except sdexception.EmptySelectionException, e: print_stderr('No dataset will be installed, upgraded, or removed.') return (0, 0) except sdexception.SDException, e: sdlog.info("SYNDINST-006", "Exception occured during installation ('%s')" % str(e)) raise
def get_RTT(remote_host): if remote_host not in sdgc.RTT_cache: sdlog.info("SDNEARES-012","Compute RTT for '%s' host."%remote_host) sdgc.RTT_cache[remote_host]=compute_RTT(remote_host) return sdgc.RTT_cache[remote_host]
def run(metadata): """ Set files status to "delete" Returns: Number of deleted items. Note - the func only change the status (i.e. data and metadata will be removed later by the daemon) """ if metadata.count() < 1: return 0 f=metadata.get_one_file() selection_filename=sdpostpipelineutils.get_attached_parameter__global([f],'selection_filename') # note that if no files are found at all for this selection (no matter the status), then the filename will be blank # TODO: merge both to improve perf metadata=sdsimplefilter.run(metadata,'status',sdconst.TRANSFER_STATUS_NEW,'remove') metadata=sdsimplefilter.run(metadata,'status',sdconst.TRANSFER_STATUS_DELETE,'remove') count=metadata.count() if count>0: po=sdpipelineprocessing.ProcessingObject(delete) metadata=sdpipelineprocessing.run_pipeline(metadata,po) sddb.conn.commit() # final commit (we do all update in one transaction). sdhistorydao.add_history_line(sdconst.ACTION_DELETE,selection_filename) sdlog.info("SDDELETE-929","%i files marked for deletion (selection=%s)"%(count,selection_filename)) return count
def get_datasets_timestamps(squeries,parallel): # switch url for q in squeries: q['url_tmp']=q['url'] if 'dataset_timestamp_url' not in q: sdlog.info("SYNDABTI-300","dataset_timestamp_url not found in query") raise MissingDatasetTimestampUrlException() # just in case (should be always set for 'install' action) q['url']=q['dataset_timestamp_url'] # run datasets=sdrun.run(squeries,parallel) # transform to dict for quick random access di={} for d in datasets: instance_id=d['instance_id'] try: timestamp=get_timestamp(instance_id,d) di[instance_id]=timestamp except MissingTimestampException, e: sdlog.info("SYNDABTI-500","dataset found but dataset timestamp is missing (%s)"%instance_id)
def change_replica(file_functional_id,new_replica,conn=sddb.conn): (url,data_node)=new_replica sdlog.info("SDMODIFQ-001","Set new replica for %s file (new_url=%s,new_dn=%s)"%(file_functional_id,url,data_node)) c=conn.cursor() res=c.execute("update file set url=?,data_node=? where file_functional_id=?",(url,data_node,file_functional_id)) conn.commit() c.close()
def run(files): (keep,reject)=filter(files) if len(reject)>0: sdlog.info("SDPOSXPC-001","%i malformed file(s) found"%len(reject)) return keep
def uniq(metadata): if metadata.count() < 1: return metadata # retrieve global flag f=metadata.get_one_file() keep_replica=sdpostpipelineutils.get_attached_parameter__global([f],'keep_replica') functional_id_keyname=sdpostpipelineutils.get_functional_identifier_name(f) if keep_replica=='true': # Keep replica. # In this case, we remove type-A duplicates, but we keep type-B duplicates (i.e. replicas) # uniq key => id (i.e. including datanode) sdlog.info("SSHRINKU-001","Remove duplicate..") metadata=sdrmdup.run(metadata,functional_id_keyname) else: # Do not keep replica. # In this case, we remove type-A and type-B duplicates by randomly keeping one candidate # uniq key => instance_id (i.e. excluding datanode) sdlog.info("SSHRINKU-002","Remove duplicate and replicate..") metadata=sdrmduprep.run(metadata,functional_id_keyname) return metadata
def get_urls(file_functional_id): result = sdquicksearch.run(parameter=[ 'limit=1', 'fields=%s' % url_fields, 'type=File', 'instance_id=%s' % file_functional_id ], post_pipeline_mode=None) li = result.get_files() if len(li) > 0: file_ = li[0] # remove non url attributes try: del file_['attached_parameters'] except Exception as e: pass urls = file_ else: sdlog.info( "SDNEXTUR-090", "File not found (file_functional_id=%s)" % (tr.file_functional_id, )) raise sdexception.FileNotFoundException() return urls
def call_web_service(url,timeout=sdconst.SEARCH_API_HTTP_TIMEOUT,lowmem=False): # default is to load list resulting from HTTP call in memory (should work on lowmem machine as response should not exceed SEARCH_API_CHUNKSIZE) start_time=SDTimer.get_time() buf=HTTP_GET(url,timeout) elapsed_time=SDTimer.get_elapsed_time(start_time) buf=fix_encoding(buf) try: di=search_api_parser.parse_metadata(buf) except Exception,e: # If we are here, it's likely that they is a problem with the internet connection # (e.g. we are behind an HTTP proxy and have no authorization to use it) sdlog.info('SDNETUTI-001','XML parsing error (exception=%s). Most of the time, this error is due to a network error.'%str(e)) # debug # # TODO: maybe always enable this # sdtrace.log_exception() # debug # # (if the error is not due to a network error (e.g. internet connection # problem), raise the original exception below and set the debug mode # to see the stacktrace. # #raise raise SDException('SDNETUTI-008','Network error (see log for details)') # we raise a new exception 'network error' here, because most of the time, 'xml parsing error' is due to an 'network error'.
def process_async_event(): # 'async' is because event are waiting in 'event' table before being proceeded events=sdeventdao.get_events(status=sdconst.EVENT_STATUS_NEW,limit=200) # process 200 events at a time (arbitrary) if len(events)>0: try: sdppproxy.event(events) for e in events: e.status=sdconst.EVENT_STATUS_OLD sdeventdao.update_events(events,commit=False) sddb.conn.commit() sdlog.info("SYNDTASK-001","Events status succesfully updated") except RemoteException,e: # non-fatal sddb.conn.rollback() sdlog.info("SYNDTASK-002","Error occurs during event processing (%s)"%str(e)) except Exception,e: # fatal sddb.conn.rollback() sdlog.error("SYNDTASK-018","Fatal error occurs during event processing (%s)"%str(e)) # debug #traceback.print_exc(file=open(sdconfig.stacktrace_log_file,"a")) raise
def terminate(signal,frame): global quit import sdlog print # this print is just not to display the msg below on the same line as ^C sdlog.info("SDTSCHED-004","Shutdown in progress..",stderr=True) if scheduler_state!=1: # we can only stop the scheduler if it is running sdlog.info("SDTSCHED-009","The daemon is not running (scheduler_state=%s)"%scheduler_state) return sdwatchdog.quit=1 quit=1 # kill all childs (i.e. abort running transfer(s) if any) import psutil parent = psutil.Process(os.getpid()) # NEW WAY # see TAG54353543DFDSFD for info # if hasattr(parent, 'get_children'): for child in parent.get_children(True): if child.is_running(): child.terminate() else: for child in parent.children(True): if child.is_running(): child.terminate() # OLD """
def process_async_event(): # 'async' is because event are waiting in 'event' table before being proceeded events=sdeventdao.get_events(status=sdconst.EVENT_STATUS_NEW,limit=200) # process 200 events at a time (arbitrary) if len(events)>0: try: sdppproxy.event(events) for e in events: e.status=sdconst.EVENT_STATUS_OLD sdeventdao.update_events(events,commit=False) sddb.conn.commit() sdlog.info("SYNDTASK-001","Events status succesfully updated") except RemoteException,e: # non-fatal sddb.conn.rollback() sdlog.info("SYNDTASK-002","Error occurs during event processing (%s)"%str(e)) except Exception,e: # fatal sddb.conn.rollback() sdlog.error("SYNDTASK-018","Fatal error occurs during event processing (%s)"%str(e)) # debug #sdtrace.log_exception() raise
def disconnect(): global conn if is_connected(): conn.close() conn = None # hack # # force sqlite db file to be group writable # # It should be done with umask when creating the db, but seems not working due to a bug. # # more info # http://www.mail-archive.com/[email protected]/msg59080.html # https://code.djangoproject.com/ticket/19292 # if os.path.exists(sdconfig.db_file): if not sdtools.is_group_writable(sdconfig.db_file): if sdtools.set_file_permission(sdconfig.db_file): sdlog.info( "SDDATABA-003", "File permissions have been modified ('%s')" % sdconfig.db_file) else: # we come here when user have not enough priviledge to set file permission sdlog.info( "SDDATABA-004", "Missing privilege to modify file permissions ('%s')" % sdconfig.db_file)
def load_table_in_memory(tablename, indexname): global _in_memory_conn sdlog.info("SDDATABA-001", "loading '%s' table" % tablename) # create a database in memory if _in_memory_conn is None: _in_memory_conn = sqlite3.connect(":memory:") # attach persistent DB _in_memory_conn.execute("ATTACH '%s' AS persistentdb" % get_db_name()) # drop table if already exists in memory _in_memory_conn.execute("drop table if exists main.'%s'" % tablename) # copy table from persistent DB to memory _in_memory_conn.execute( "create table main.'%s' as select * from persistentdb.[%s]" % (tablename, tablename)) # create index _in_memory_conn.execute( "create index if not exists main.'%s' on '%s' (file)" % (indexname, tablename)) # commit _in_memory_conn.commit() # detach persistent DB _in_memory_conn.execute("detach persistentdb") _in_memory_conn.row_factory = sqlite3.Row sdlog.info("SDDATABA-002", "table loaded")
def build_path(name,product,project): if project in ('CMIP5','GeoMIP'): if product is None: sdlog.info("SD28TO29-011","Product is None (name=%s)"%name) # BEWARE # this case if for the 24 GeoMIP without product (select local_image from transfert where product_xml_tag is NULL and project_id=2;) path="{0}/{1}".format(project,name) else: path="{0}/{1}/{2}".format(project,product,name) elif project in ('CORDEX'): if 'output/' in name: # add project path="{0}/{1}".format(project,name) else: # for some datasets of CORDEX project, project and product are missing, so we add it (it's always 'output' for this project) # add output and project path="{0}/{1}/{2}".format(project,'output',name) else: # project is missing path="{0}/{1}".format(project,name) path=fix_project_name(path) # project name in 28 have been transformed in many columns (in 29, only local_path should have transformed fields) return path
def get_RTT(remote_host): if remote_host not in sdgc.RTT_cache: sdlog.info("SDNEARES-012", "Compute RTT for '%s' host." % remote_host) sdgc.RTT_cache[remote_host] = compute_RTT(remote_host) return sdgc.RTT_cache[remote_host]
def load_table_in_memory(tablename,indexname): global _in_memory_conn sdlog.info("SDDATABA-001","loading '%s' table"%tablename) # create a database in memory if _in_memory_conn is None: _in_memory_conn = sqlite3.connect(":memory:") # attach persistent DB _in_memory_conn.execute("ATTACH '%s' AS persistentdb"%get_db_name()) # drop table if already exists in memory _in_memory_conn.execute("drop table if exists main.'%s'"%tablename) # copy table from persistent DB to memory _in_memory_conn.execute("create table main.'%s' as select * from persistentdb.[%s]"%(tablename,tablename)) # create index _in_memory_conn.execute("create index if not exists main.'%s' on '%s' (file)"%(indexname,tablename)) # commit _in_memory_conn.commit() # detach persistent DB _in_memory_conn.execute("detach persistentdb") _in_memory_conn.row_factory = sqlite3.Row sdlog.info("SDDATABA-002","table loaded")
def variable_complete_event(project,model,dataset,variable): sdlog.info("SYDEVENT-002","'variable_complete_event' triggered (%s,%s)"%(dataset.dataset_functional_id,variable)) # cascade 1 if dataset.status==sdconst.DATASET_STATUS_COMPLETE: dataset_complete_event(project,model,dataset) # trigger 'dataset complete' event # cascade 2 if project=='CMIP5': assert '/output/' not in dataset.path (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset.path) if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2): d1=sddatasetdao.get_dataset(path=ds_path_output1) d2=sddatasetdao.get_dataset(path=ds_path_output2) if sdvariable.is_variable_complete(d1.dataset_id,variable) and sdvariable.is_variable_complete(d2.dataset_id,variable): dataset_pattern=sdproduct.build_output12_dataset_pattern(dataset.local_path) variable_complete_output12_event(project,model,dataset_pattern,variable) # trigger event (cross dataset event) else: # we also trigger the 'variable_complete_output12_event' event if the variable is over one product only (because if only one product, then output12 event is also true) dataset_pattern=sdproduct.build_output12_dataset_pattern(dataset.local_path) variable_complete_output12_event(project,model,dataset_pattern,variable) # trigger event (cross dataset event)
def run_helper(queries): """ notes - "queries" is non-threadsafe (i.e. not a Queue), but doesn't matter as threads do not use it """ total_query_to_process=len(queries) sdlog.debug("SDPROXMT-003","%d search-API queries to process (max_thread_per_host=%d,timeout=%d)"%(total_query_to_process,max_thread_per_host,sdconst.SEARCH_API_HTTP_TIMEOUT)) while True: if sdconfig.proxymt_progress_stat: sdlog.info("SDPROXMT-033","threads per host: %s"%",".join(['%s=%s'%(host,len(searchAPIServices[host]['threadlist'])) for host in searchAPIServices.keys()])) if len(queries)>0: distribute_queries(queries) else: # leave the loop only if all threads completed if all_threads_completed(): break # remove completed threads from list for host in searchAPIServices.keys(): li=[] for t in searchAPIServices[host]['threadlist']: if t.is_alive(): li.append(t) searchAPIServices[host]['threadlist']=li # log total_query_already_processed = total_query_to_process - len(queries) if total_query_to_process > 0: # display progress only when there are a lot of queries if len(queries) > 0: # display progress only when still query to process sdlog.info("SDPROXMT-004","total_queries=%d, running_or_done_queries=%d, waiting_queries=%d"%(total_query_to_process,total_query_already_processed,len(queries))) # if all services are busy, we sleep to limit loop speed # (note that all the code around the "sleep" call is to detect system overload) sleep_time=10 warning_threshold=5 # threshold not to emit warning for every small load exceedance befo=time.time() time.sleep(sleep_time) afte=time.time() diff=afte-befo if diff>sleep_time+warning_threshold: sdlog.warning("SDPROXMT-005","WARNING: system overload detected (sleep takes %d second to complete)."%diff) # retrieve result from output queue metadata=sdtypes.Metadata() while not __result_queue.empty(): success=__result_queue.get(False) # retrieve result from ONE successful search-API call success.connect() # TAGKLK434L3K34K metadata.slurp(success) # warning: success is modified here # retrieve error from output queue and insert them into a list errors=[] while not __error_queue.empty(): query=__error_queue.get(False) errors.append(query) return (metadata,errors)
def add_selection(us): # compute selection checksum from scratch l__file_checksum=sdutils.compute_checksum(us.get_selection_file_full_path()) if not exists_selection(us): # add selection in database if missing us.set_checksum(l__file_checksum) us.set_status(sdconst.SELECTION_STATUS_NEW) us.set_fullscan(True) insertSelection(us) # warning: this modify us object (set PK) else: # selection already in database from_db_us=fetch_selection(us.get_filename()) # retrieve us from DB us.set_selection_id(from_db_us.get_selection_id()) # copy DB id # check if same checksums if l__file_checksum==from_db_us.get_checksum(): # same checksum # retrieve status us.set_status(from_db_us.get_status()) us.set_checksum(from_db_us.get_checksum()) if us.get_status()==sdconst.SELECTION_STATUS_NORMAL: # nothing to do here (let (a) and (b) decide if we need fullscan) pass elif us.get_status()==sdconst.SELECTION_STATUS_MODIFIED: us.set_fullscan(True) elif us.get_status()==sdconst.SELECTION_STATUS_NEW: us.set_fullscan(True) else: raise SDException("SYNDATSEL-071","unknown status") else: # same checksum # checksum differ sdlog.info("SYNDASEL-197","%s selection has been modified (marked for fullscan)"%us.get_filename()) us.set_checksum(l__file_checksum) # update checksum us.set_status(sdconst.SELECTION_STATUS_MODIFIED) # update status update_selection(us) # add selection in selection list # TODO _selections[us.get_filename()]=us
def change_priority(new_priority,conn=sddb.conn): """Change priority value for already existing transfer.""" c=conn.cursor() sdlog.info("SDMODIFQ-002","updating %s selection (new priority=%s)"%(u_s.filename,new_priority)) res=c.execute("UPDATE file SET priority = ? WHERE EXISTS (SELECT 1 FROM selection__file WHERE file.file_id = selection__file.file_id AND selection__file.selection_id = ?)",(new_priority,u_s.get_selection_id(),)) modified_files_count=c.rowcount conn.commit() c.close()
def reset(): import sddeletedataset nbr = sddeletequery.purge_error_and_waiting_transfer() sddeletedataset.purge_orphan_datasets() sdlog.info("SDDELETE-931", "%i transfer(s) removed" % nbr) return nbr
def start_watchdog(): """Starting download processes watchdog.""" sdlog.info("SDTSCHED-993", "Starting watchdog..") frozenCheckerThread = sdwatchdog.FrozenDownloadCheckerThread() frozenCheckerThread.setDaemon(True) frozenCheckerThread.start()
def add_file(f): sdlog.info("SDENQUEU-003","Create transfer (local_path=%s,url=%s)"%(f.get_full_local_path(),f.url)) f.dataset_id=add_dataset(f) f.status=sdconst.TRANSFER_STATUS_WAITING f.crea_date=sdtime.now() sdfiledao.add_file(f,commit=False)
def start_watchdog(): """Starting download processes watchdog.""" sdlog.info("SDTSCHED-993","Starting watchdog..") frozenCheckerThread=sdwatchdog.FrozenDownloadCheckerThread() frozenCheckerThread.setDaemon(True) frozenCheckerThread.start()
def reset(): import sddeletedataset nbr=sddeletequery.purge_error_and_waiting_transfer() sddeletedataset.purge_orphan_datasets() sdlog.info("SDDELETE-931","%i transfer(s) removed"%nbr) return nbr
def sequential_exec(queries): search=sdproxy.SearchAPIProxy() metadata=sdtypes.Metadata() for i,q in enumerate(queries): sdlog.info("SYNDARUN-001","Process query %d"%i) result=search.run(url=q['url'],attached_parameters=q.get('attached_parameters')) metadata.slurp(result) return metadata
def renew_certificate (host,port,username,password): sdlog.info("SDMYPROX-002","Renew certificate..") # we need a mkdir here to prevent 'No such file or directory' myproxyclient error (see TAGFERE5435 for more info) sd=sdconfig.get_security_dir() if not os.path.isdir(sd): os.makedirs(sd) # currently, we set bootstrap option everytime # # TODO: change this to set only the first time (i.e. if .esg/certificates is empty) # bootstrap=True # currently, we set trustroots option everytime updateTrustRoots=True authnGetTrustRootsCall=False # TODO: maybe add option in 'synda certificate' to use specify another path for cadir (for debugging purpose) #ROOT_TRUSTROOT_DIR = '/etc/grid-security/certificates' #USER_TRUSTROOT_DIR = '~/.globus/certificates' # set env. os.environ['ESGF_CREDENTIAL']=sdconfig.esgf_x509_proxy os.environ['ESGF_CERT_DIR']=sdconfig.esgf_x509_cert_dir os.environ['X509_CERT_DIR']=sdconfig.esgf_x509_cert_dir if 'X509_USER_PROXY' in os.environ: del os.environ['X509_USER_PROXY'] #if 'GLOBUS_LOCATION' in os.environ: # del os.environ['GLOBUS_LOCATION'] # main myproxy_clnt = MyProxyClient(hostname=host, port=port, caCertDir=sdconfig.esgf_x509_cert_dir, proxyCertLifetime=43200) # 12 hours # credname=credname creds=myproxy_clnt.logon(username, password, bootstrap=bootstrap, updateTrustRoots=updateTrustRoots, authnGetTrustRootsCall=authnGetTrustRootsCall) # store cert on disk fout = open(sdconfig.esgf_x509_proxy, 'w') for cred in creds: fout.write(cred) fout.close()
def sequential_exec(queries): search = sdproxy.SearchAPIProxy() metadata = sdtypes.Metadata() for i, q in enumerate(queries): sdlog.info("SYNDARUN-001", "Process query %d" % i) result = search.run(url=q['url'], attached_parameters=q.get('attached_parameters')) metadata.slurp(result) return metadata
def inner(*args,**kwargs): start_time=SDTimer.get_time() result = func(*args,**kwargs) elapsed_time=SDTimer.get_elapsed_time(start_time) sdlog.info('SDPROFIL-001','%s ran in %2.9f sec' %(func.__name__,elapsed_time)) return result
def install(args,selection): # TODO: maybe force type=file here, in case the selection file have 'type=Dataset' if not args.dry_run: sdlog.info("SDUPGRAD-001","Retrieve metadata from ESGF..") metadata=sdsearch.run(selection=selection) sdlog.info("SDUPGRAD-002","Install files..") (status,newly_installed_files_count)=sdinstall.run(args,metadata)
def event(events): try: sdlog.info("SDPPPROX-001","Push events to postprocessing") serialized_events=[e.__dict__ for e in events] # transform list of event to list of dict (needed, because custom class cannot be serialized to JSON) get_service().event(serialized_events) # send events sdlog.info("SDPPPROX-002","%i events successfully transmitted to postprocessing"%len(serialized_events)) except urllib2.URLError,e: sdlog.error("SDPPPROX-010","Network error occured (url=%s,port=%s,%s)"%(url,port,str(e))) raise RemoteException("SDPPPROX-100","Network error occured")
def inner(*args, **kwargs): start_time = SDTimer.get_time() result = func(*args, **kwargs) elapsed_time = SDTimer.get_elapsed_time(start_time) sdlog.info('SDPROFIL-001', '%s ran in %2.9f sec' % (func.__name__, elapsed_time)) return result
def add_file(f): sdlog.info( "SDENQUEU-003", "Create transfer (local_path=%s,url=%s)" % (f.get_full_local_path(), f.url)) f.dataset_id = add_dataset(f) f.status = sdconst.TRANSFER_STATUS_WAITING f.crea_date = sdtime.now() sdfiledao.add_file(f, commit=False)
def immediate_md_delete(tr): """Delete file (metadata only).""" sdlog.info("SDDELETE-080", "Delete metadata (%s)" % tr.get_full_local_path()) try: sdfiledao.delete_file(tr, commit=False) except Exception, e: sdlog.error( "SDDELETE-128", "Error occurs during file metadata suppression (%s,%s)" % (tr.get_full_local_path(), str(e)))
def update_datasets_status(): """ Update status flag for all datasets. Notes - This func is used to fix inconsistencies. - This func doesn't handle the 'latest' flag """ sdlog.info("SYDDFLAG-186", "Update status for all datasets") datasets = sddatasetdao.get_datasets() update_datasets_status_HELPER(datasets)
def renew_certificate(host, port, username, password): sdlog.info("SDMYPROX-002", "Renew certificate..") # we need a mkdir here to prevent 'No such file or directory' myproxyclient error (see TAGFERE5435 for more info) sd = sdconfig.get_security_dir() if not os.path.isdir(sd): os.makedirs(sd) # currently, we set bootstrap option everytime # # TODO: change this to set only the first time (i.e. if .esg/certificates is empty) # bootstrap = True # currently, we set trustroots option everytime updateTrustRoots = True authnGetTrustRootsCall = False # TODO: maybe add option in 'synda certificate' to use specify another path for cadir (for debugging purpose) #ROOT_TRUSTROOT_DIR = '/etc/grid-security/certificates' #USER_TRUSTROOT_DIR = '~/.globus/certificates' # set env. os.environ['ESGF_CREDENTIAL'] = sdconfig.esgf_x509_proxy os.environ['ESGF_CERT_DIR'] = sdconfig.esgf_x509_cert_dir os.environ['X509_CERT_DIR'] = sdconfig.esgf_x509_cert_dir if 'X509_USER_PROXY' in os.environ: del os.environ['X509_USER_PROXY'] #if 'GLOBUS_LOCATION' in os.environ: # del os.environ['GLOBUS_LOCATION'] # main myproxy_clnt = MyProxyClient(hostname=host, port=port, caCertDir=sdconfig.esgf_x509_cert_dir, proxyCertLifetime=43200) # 12 hours # credname=credname creds = myproxy_clnt.logon(username, password, bootstrap=bootstrap, updateTrustRoots=updateTrustRoots, authnGetTrustRootsCall=authnGetTrustRootsCall) # store cert on disk fout = open(sdconfig.esgf_x509_proxy, 'w') for cred in creds: fout.write(cred) fout.close()
def upgrade_db(conn,current_db_version,new_db_version): versions = upgrade_procs.keys() li=sddbversionutils.version_range( versions, current_db_version, new_db_version ) # remove the first value (i.e. no upgrade needed there as db is already at this version) li=li[1:] for v in li: if v not in upgrade_procs: raise SDException("SDDBVERS-318","Incorrect database version: cannot upgrade database (version=%s)."%(v,)) else: upgrade_procs[v](conn) sdlog.info("SDDBVERS-319","Database updated to version %s"%(v,))
def reset_datasets_flags(): """Reset dataset status and latest flag from scratch for all datasets.""" count = 0 sdlog.info("SYDDFLAG-933", "recalculate status and latest flag for all dataset..", True) sdmodifyquery.wipeout_datasets_flags( ) # we reset all flags before starting the main processing (we clean everything to start from scratch) count = update_datasets__status_and_latest() while count > 0: count = update_datasets__status_and_latest()
def update_complete_datasets_status(): """ Update status flag for datasets with complete status. Notes - This func is used to fix inconsistencies, when dataset have complete status, but some of its files are not 'done' yet. - This func doesn't handle the 'latest' flag """ sdlog.info("SYDDFLAG-184", "Update complete datasets status") complete_datasets = sddatasetdao.get_datasets( status=sdconst.DATASET_STATUS_COMPLETE) update_datasets_status_HELPER(complete_datasets)