def pre_transfer_check_list(tr): """ Return: Check list status True: Check list OK False: Check list NOK """ if lfae_mode == "keep": # usefull mode if # - metadata needs to be regenerated without retransfering the data # - synda files are mixed with files from other sources if os.path.isfile(tr.get_full_local_path()): # file already here, mark the file as done sdlog.info( "SYNDTASK-197", "Local file already exists: keep it (lfae_mode=keep,local_file=%s)" % tr.get_full_local_path()) tr.status = sdconst.TRANSFER_STATUS_DONE tr.error_msg = "Local file already exists: keep it (lfae_mode=keep)" tr.end_date = sdtime.now() sdfiledao.update_file( tr ) # note: it is important not to update a running status in this case, else local file non-related with synda may be removed by synda (because of cleanup_running_transfer() func). See mail from Hans Ramthun at 20150331 for more details. return False else: # file not here, start the download return True elif lfae_mode == "replace": if os.path.isfile(tr.get_full_local_path()): sdlog.info( "SYNDTASK-187", "Local file already exists: remove it (lfae_mode=replace,local_file=%s)" % tr.get_full_local_path()) os.remove(tr.get_full_local_path()) return True elif lfae_mode == "abort": if os.path.isfile(tr.get_full_local_path()): sdlog.info( "SYNDTASK-188", "Local file already exists: transfer aborted (lfae_mode=abort,local_file=%s)" % tr.get_full_local_path()) tr.status = sdconst.TRANSFER_STATUS_ERROR tr.error_msg = "Local file already exists: transfer aborted (lfae_mode=abort)" tr.end_date = sdtime.now() sdfiledao.update_file(tr) return False else: return True
def start_transfer(tr): """Retrieve next transfer to start Note if no more transfer waiting, get_transfer() raises "NoTransferWaitingException" exception """ def start_transfer_thread(tr): sdfiledao.update_file(tr) th=WorkerThread(tr,eot_queue,Download) th.setDaemon(True) # if main thread quits, we kill running threads (note though that forked child processes are NOT killed and continue running after that !) th.start() # we reset values from previous try if any tr.end_date=None tr.error_msg=None tr.status=sdconst.TRANSFER_STATUS_RUNNING tr.start_date=sdtime.now() if lfae_mode=="keep": # usefull mode if # - metadata needs to be regenerated without retransfering the data # - synda files are mixed with files from other sources if os.path.isfile(tr.get_full_local_path()): # file already here, mark the file as done sdlog.info("SYNDTASK-197","Local file already exists: keep it (lfae_mode=keep,local_file=%s)"%tr.get_full_local_path()) tr.status=sdconst.TRANSFER_STATUS_DONE tr.error_msg="Local file already exists: keep it (lfae_mode=keep)" tr.end_date=sdtime.now() sdfiledao.update_file(tr) # note: it is important not to update a running status in this case, else local file non-related with synda may be removed by synda (because of cleanup_running_transfer() func). See mail from Hans Ramthun at 20150331 for more details. else: # file not here, start the download start_transfer_thread(tr) elif lfae_mode=="replace": if os.path.isfile(tr.get_full_local_path()): sdlog.info("SYNDTASK-187","Local file already exists: remove it (lfae_mode=replace,local_file=%s)"%tr.get_full_local_path()) os.remove(tr.get_full_local_path()) start_transfer_thread(tr) elif lfae_mode=="abort": if os.path.isfile(tr.get_full_local_path()): tr.status=sdconst.TRANSFER_STATUS_ERROR tr.error_msg="Local file already exists: transfer aborted (lfae_mode=abort)" tr.end_date=sdtime.now() sdfiledao.update_file(tr) else: start_transfer_thread(tr)
def pre_transfer_check_list(tr): """ Return: Check list status True: Check list OK False: Check list NOK """ if lfae_mode=="keep": # usefull mode if # - metadata needs to be regenerated without retransfering the data # - synda files are mixed with files from other sources if os.path.isfile(tr.get_full_local_path()): # file already here, mark the file as done sdlog.info("SYNDTASK-197","Local file already exists: keep it (lfae_mode=keep,local_file=%s)"%tr.get_full_local_path()) tr.status=sdconst.TRANSFER_STATUS_DONE tr.error_msg="Local file already exists: keep it (lfae_mode=keep)" tr.end_date=sdtime.now() sdfiledao.update_file(tr) # note: it is important not to update a running status in this case, else local file non-related with synda may be removed by synda (because of cleanup_running_transfer() func). See mail from Hans Ramthun at 20150331 for more details. return False else: # file not here, start the download return True elif lfae_mode=="replace": if os.path.isfile(tr.get_full_local_path()): sdlog.info("SYNDTASK-187","Local file already exists: remove it (lfae_mode=replace,local_file=%s)"%tr.get_full_local_path()) os.remove(tr.get_full_local_path()) return True elif lfae_mode=="abort": if os.path.isfile(tr.get_full_local_path()): sdlog.info("SYNDTASK-188","Local file already exists: transfer aborted (lfae_mode=abort,local_file=%s)"%tr.get_full_local_path()) tr.status=sdconst.TRANSFER_STATUS_ERROR tr.priority -= 1 tr.error_msg="Local file already exists: transfer aborted (lfae_mode=abort)" tr.end_date=sdtime.now() sdfiledao.update_file(tr) return False else: return True
def keep_recent_datasets(datasets): """This func is a hack.""" li=[] # Note that we use last_mod_date instead of crea_date, so to also # try to retrieve timestamp for previously inserted dataset # (i.e. dataset which have been modified during this discovery # (i.e. new files have been added to the dataset), but which have # been created in a previous discovery). # # We only try to retrieve timestamp for recent datasets (-24H). # This is to prevent retrieving timestamp for datasets not related # to the current discovery, because for example, there are 20 000 # datasets without timestamp on VESG4, and we don't want to trigger # 20 000 search-API request each time we install a new file ! for d in datasets: interval=sdtime.compute_time_delta(d.last_mod_date,sdtime.now()) if interval > ( 24 * 3600 ): # This dataset has not been modified in the last 24 hours, # so it is not related to the current discovery. pass else: li.append(d) return li
def update_latest_flag(d,force_latest=False): """ Args: force_latest: If 'true', force 'latest' to 'true' no matter what the compute_latest_flag() method say) Notes - warning: this method update the dataset in database (and in some cases, also all other different versions of this datasets) - warning: this method modifies 'd' object """ assert not d.latest # this func must NOT be called if the dataset is already 'latest' dataset_versions=sddatasetquery.get_dataset_versions(d,True) # retrieves all dataset versions d.latest=True if force_latest else compute_latest_flag(dataset_versions,d) # set the *new* value for the 'latest' flag if d.latest==True: # if we are here, it means latest switch from False to True d.latest_date=sdtime.now() # "latest_date" is set when dataset "latest" flag switches from False to True switch_off_latest_flag_for_all_other_versions(d.version,dataset_versions) # MOD_A else: # the latest stay false, do nothing pass sddatasetdao.update_dataset(d,False,sddb.conn) # MOD_B sddb.conn.commit() # commit all datasets modifications together (MOD_A (if any) and MOD_B)
def latest_dataset_complete_event(project, model, dataset_pattern, commit=True): # this event means latest dataset has been completed (beware: no 'latest switch' event here: was latest before and still is) sdlog.log( "SYDEVENT-045", "'latest_dataset_complete_event' triggered (%s)" % dataset_pattern, event_triggered_log_level) if project == 'CMIP5': # CMIP5 use output12 special event return if project in sdconst.PROJECT_WITH_ONE_VARIABLE_PER_DATASET: # CORDEX and CMIP6 use only variable level event return event = Event(name=sdconst.EVENT_LATEST_DATASET_COMPLETE) event.project = project event.model = model event.dataset_pattern = dataset_pattern event.variable = '' event.filename_pattern = '' event.crea_date = sdtime.now() event.priority = sdconst.DEFAULT_PRIORITY sdeventdao.add_event(event, commit=commit)
def file_complete_event(tr): """ Note when a variable is complete, we know for sure that all variable's files are fetched, because a variable is atomic, i.e. it is not possible to retrieve a subset of variable's files (this is true because you can't select a subset of the files of a variable with the search-API (search-API temporal n spatial filters are at variable level without the possibility to ask a subset of the variable's files)) but a dataset can be marked as complete even if it contains only a subset of variables included in this dataset (but still all variables that have been discovered for this dataset must be complete) """ sdlog.log("SYDEVENT-001", "'file_complete_event' triggered (%s)" % tr.file_functional_id, event_triggered_log_level) if sdconfig.is_event_enabled(sdconst.EVENT_FILE_COMPLETE, tr.project): event = Event(name=sdconst.EVENT_FILE_COMPLETE) event.project = tr.project event.model = tr.model event.dataset_pattern = tr.dataset.local_path event.variable = tr.variable event.filename_pattern = tr.filename event.crea_date = sdtime.now() event.priority = sdconst.DEFAULT_PRIORITY sdeventdao.add_event(event, commit=True) # update dataset (all except 'latest' flag) tr.dataset.status = sddatasetflag.compute_dataset_status(tr.dataset) tr.dataset.last_done_transfer_date = tr.end_date sddatasetdao.update_dataset(tr.dataset) if sdvariable.is_variable_complete(tr.dataset.dataset_id, tr.variable): variable_complete_event( tr.project, tr.model, tr.dataset, tr.variable) # trigger 'variable complete' event
def file_complete_event(tr): """ Note when a variable is complete, we know for sure that all variable's files are fetched, because a variable is atomic, i.e. it is not possible to retrieve a subset of variable's files (this is true because you can't select a subset of the files of a variable with the search-API (search-API temporal n spatial filters are at variable level without the possibility to ask a subset of the variable's files)) but a dataset can be marked as complete even if it contains only a subset of variables included in this dataset (but still all variables that have been discovered for this dataset must be complete) """ sdlog.log("SYDEVENT-001","'file_complete_event' triggered (%s)"%tr.file_functional_id,event_triggered_log_level) if sdconfig.is_event_enabled(sdconst.EVENT_FILE_COMPLETE,tr.project): event=Event(name=sdconst.EVENT_FILE_COMPLETE) event.project=tr.project event.model=tr.model event.dataset_pattern=tr.dataset.local_path event.variable=tr.variable event.filename_pattern=tr.filename event.crea_date=sdtime.now() event.priority=sdconst.DEFAULT_PRIORITY sdeventdao.add_event(event,commit=True) # update dataset (all except 'latest' flag) tr.dataset.status=sddatasetflag.compute_dataset_status(tr.dataset) tr.dataset.last_done_transfer_date=tr.end_date sddatasetdao.update_dataset(tr.dataset) if sdvariable.is_variable_complete(tr.dataset.dataset_id,tr.variable): variable_complete_event(tr.project,tr.model,tr.dataset,tr.variable) # trigger 'variable complete' event
def prepare_transfer(tr): # we reset values from previous try if any tr.end_date=None tr.error_msg=None tr.status=sdconst.TRANSFER_STATUS_RUNNING tr.start_date=sdtime.now()
def keep_recent_datasets(datasets): """This func is a hack.""" li = [] # Note that we use last_mod_date instead of crea_date, so to also # try to retrieve timestamp for previously inserted dataset # (i.e. dataset which have been modified during this discovery # (i.e. new files have been added to the dataset), but which have # been created in a previous discovery). # # We only try to retrieve timestamp for recent datasets (-24H). # This is to prevent retrieving timestamp for datasets not related # to the current discovery, because for example, there are 20 000 # datasets without timestamp on VESG4, and we don't want to trigger # 20 000 search-API request each time we install a new file ! for d in datasets: interval = sdtime.compute_time_delta(d.last_mod_date, sdtime.now()) if interval > (24 * 3600): # This dataset has not been modified in the last 24 hours, # so it is not related to the current discovery. pass else: li.append(d) return li
def update_latest_flag(d, force_latest=False): """ Args: force_latest: If 'true', force 'latest' to 'true' no matter what the compute_latest_flag() method say) Notes - warning: this method update the dataset in database (and in some cases, also all other different versions of this datasets) - warning: this method modifies 'd' object """ assert not d.latest # this func must NOT be called if the dataset is already 'latest' dataset_versions = sddatasetquery.get_dataset_versions( d, True) # retrieves all dataset versions d.latest = True if force_latest else compute_latest_flag( dataset_versions, d) # set the *new* value for the 'latest' flag if d.latest == True: # if we are here, it means latest switch from False to True d.latest_date = sdtime.now( ) # "latest_date" is set when dataset "latest" flag switches from False to True switch_off_latest_flag_for_all_other_versions( d.version, dataset_versions) # MOD_A else: # the latest stay false, do nothing pass sddatasetdao.update_dataset(d, False, sddb.conn) # MOD_B sddb.conn.commit( ) # commit all datasets modifications together (MOD_A (if any) and MOD_B)
def submit( order_name, project, model, dataset, variable='', filename='', commit=True ): # TODO: replace single quote with None and move 'None2SingleQuote' processing inside Event object (and add comment about why we use single quote instead of None in event table !!!) event_name = order_name dataset_pattern = sdproduct.replace_output12_product_with_wildcard(dataset) filename_pattern = filename sdlog.info( "SDPPORDE-001", "'%s' triggered (%s,%s)" % (event_name, dataset_pattern, variable)) event = Event(name=event_name) event.project = project event.model = model event.dataset_pattern = dataset_pattern event.variable = variable event.filename_pattern = filename_pattern event.crea_date = sdtime.now() event.priority = sdconst.DEFAULT_PRIORITY sdeventdao.add_event(event, commit=commit)
def prepare_transfer(tr): # we reset values from previous try if any tr.end_date=None tr.error_msg=None tr.status=sdconst.TRANSFER_STATUS_RUNNING tr.start_date=sdtime.now()
def add_history_line(action,selection_filename=None,insertion_group_id=None,crea_date=None,selection_file_checksum=None,conn=sddb.conn): crea_date=sdtime.now() if crea_date is None else crea_date c = conn.cursor() c.execute("insert into history (action, selection_filename, crea_date, insertion_group_id, selection_file_checksum) values (?,?,?,?,?)",(action, selection_filename, crea_date, insertion_group_id,selection_file_checksum)) c.close() conn.commit()
def run(cls,tr): cls.start_transfer_script(tr) tr.end_date=sdtime.now() # compute metrics if tr.status==sdconst.TRANSFER_STATUS_DONE: tr.duration=sdtime.compute_duration(tr.start_date,tr.end_date) tr.rate=sdtools.compute_rate(tr.size,tr.duration)
def add_file(f): sdlog.info("SDENQUEU-003","Create transfer (local_path=%s,url=%s)"%(f.get_full_local_path(),f.url)) f.dataset_id=add_dataset(f) f.status=sdconst.TRANSFER_STATUS_WAITING f.crea_date=sdtime.now() sdfiledao.add_file(f,commit=False)
def store_dataset_export_event(d, conn=sddb.conn): c = conn.cursor() c.execute("insert into export (dataset_id,export_date) values (?,?)", ( d.dataset_id, sdtime.now(), )) conn.commit() c.close()
def add_file(f): sdlog.info( "SDENQUEU-003", "Create transfer (local_path=%s,url=%s)" % (f.get_full_local_path(), f.url)) f.dataset_id = add_dataset(f) f.status = sdconst.TRANSFER_STATUS_WAITING f.crea_date = sdtime.now() sdfiledao.add_file(f, commit=False)
def variable_complete_event(project, model, dataset, variable, commit=True): sdlog.log( "SYDEVENT-002", "'variable_complete_event' triggered (%s,%s)" % (dataset.dataset_functional_id, variable), event_triggered_log_level) if sdconfig.is_event_enabled(sdconst.EVENT_VARIABLE_COMPLETE, project): event = Event(name=sdconst.EVENT_VARIABLE_COMPLETE) event.project = project event.model = model event.dataset_pattern = dataset.local_path event.variable = variable event.filename_pattern = '' event.crea_date = sdtime.now() event.priority = sdconst.DEFAULT_PRIORITY sdeventdao.add_event(event, commit=commit) # cascade 1 (trigger dataset event) if dataset.status == sdconst.DATASET_STATUS_COMPLETE: dataset_complete_event(project, model, dataset) # trigger 'dataset complete' event # cascade 2 (trigger variable output12 event) if project == 'CMIP5': if '/output/' in dataset.path: return (ds_path_output1, ds_path_output2) = sdproduct.get_output12_dataset_paths(dataset.path) if sddatasetdao.exists_dataset( path=ds_path_output1) and sddatasetdao.exists_dataset( path=ds_path_output2): d1 = sddatasetdao.get_dataset(path=ds_path_output1) d2 = sddatasetdao.get_dataset(path=ds_path_output2) if sdvariable.is_variable_complete( d1.dataset_id, variable) and sdvariable.is_variable_complete( d2.dataset_id, variable): dataset_pattern = sdproduct.replace_output12_product_with_wildcard( dataset.local_path) variable_complete_output12_event( project, model, dataset_pattern, variable) # trigger event (cross dataset event) else: # we also trigger the 'variable_complete_output12_event' event if the variable is over one product only (because if only one product, then output12 event is also true) dataset_pattern = sdproduct.replace_output12_product_with_wildcard( dataset.local_path) variable_complete_output12_event( project, model, dataset_pattern, variable) # trigger event (cross dataset event)
def variable_complete_output12_event(project,model,dataset_pattern,variable,commit=True): sdlog.info("SYDEVENT-003","'variable_complete_output12_event' triggered (%s,%s)"%(dataset_pattern,variable)) event=Event(name=sdconst.EVENT_OUTPUT12_VARIABLE_COMPLETE) event.project=project event.model=model event.dataset_pattern=dataset_pattern event.variable=variable event.filename_pattern='' event.crea_date=sdtime.now() event.priority=sdconst.DEFAULT_PRIORITY sdeventdao.add_event(event,commit=commit)
def latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=True): # this event means one latest dataset has been completed (i.e. was latest before and still is) sdlog.info("SYDEVENT-006","'latest_dataset_complete_output12_event' triggered (%s)"%dataset_pattern) event=Event(name=sdconst.EVENT_OUTPUT12_LATEST_DATASET_COMPLETE) event.project=project event.model=model event.dataset_pattern=dataset_pattern event.variable='' event.filename_pattern='' event.crea_date=sdtime.now() event.priority=sdconst.DEFAULT_PRIORITY sdeventdao.add_event(event,commit=commit)
def variable_complete_output12_event(project, model, dataset_pattern, variable, commit=True): sdlog.log( "SYDEVENT-003", "'variable_complete_output12_event' triggered (%s,%s)" % (dataset_pattern, variable), event_triggered_log_level) event = Event(name=sdconst.EVENT_OUTPUT12_VARIABLE_COMPLETE) event.project = project event.model = model event.dataset_pattern = dataset_pattern event.variable = variable event.filename_pattern = '' event.crea_date = sdtime.now() event.priority = sdconst.DEFAULT_PRIORITY sdeventdao.add_event(event, commit=commit)
def latest_output12_dataset_complete_event(project, model, dataset_pattern, commit=True): # this event means latest output12 dataset has been completed (beware: no 'latest switch' event here: was latest before and still is) sdlog.log( "SYDEVENT-006", "'latest_output12_dataset_complete_event' triggered (%s)" % dataset_pattern, event_triggered_log_level) event = Event(name=sdconst.EVENT_OUTPUT12_LATEST_DATASET_COMPLETE) event.project = project event.model = model event.dataset_pattern = dataset_pattern event.variable = '' event.filename_pattern = '' event.crea_date = sdtime.now() event.priority = sdconst.DEFAULT_PRIORITY sdeventdao.add_event(event, commit=commit)
def submit(order_name,project,model,dataset,variable='',filename='',commit=True): # TODO: replace single quote with None and move 'None2SingleQuote' processing inside Event object (and add comment about why we use single quote instead of None in event table !!!) event_name=order_name dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset) filename_pattern=filename sdlog.info("SDPPORDE-001","'%s' triggered (%s,%s)"%(event_name,dataset_pattern,variable)) event=Event(name=event_name) event.project=project event.model=model event.dataset_pattern=dataset_pattern event.variable=variable event.filename_pattern=filename_pattern event.crea_date=sdtime.now() event.priority=sdconst.DEFAULT_PRIORITY sdeventdao.add_event(event,commit=commit)
def variable_complete_event(project,model,dataset,variable,commit=True): sdlog.log("SYDEVENT-002","'variable_complete_event' triggered (%s,%s)"%(dataset.dataset_functional_id,variable),event_triggered_log_level) if sdconfig.is_event_enabled(sdconst.EVENT_VARIABLE_COMPLETE,project): event=Event(name=sdconst.EVENT_VARIABLE_COMPLETE) event.project=project event.model=model event.dataset_pattern=dataset.local_path event.variable=variable event.filename_pattern='' event.crea_date=sdtime.now() event.priority=sdconst.DEFAULT_PRIORITY sdeventdao.add_event(event,commit=commit) # cascade 1 (trigger dataset event) if dataset.status==sdconst.DATASET_STATUS_COMPLETE: dataset_complete_event(project,model,dataset) # trigger 'dataset complete' event # cascade 2 (trigger variable output12 event) if project=='CMIP5': assert '/output/' not in dataset.path (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset.path) if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2): d1=sddatasetdao.get_dataset(path=ds_path_output1) d2=sddatasetdao.get_dataset(path=ds_path_output2) if sdvariable.is_variable_complete(d1.dataset_id,variable) and sdvariable.is_variable_complete(d2.dataset_id,variable): dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path) variable_complete_output12_event(project,model,dataset_pattern,variable) # trigger event (cross dataset event) else: # we also trigger the 'variable_complete_output12_event' event if the variable is over one product only (because if only one product, then output12 event is also true) dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path) variable_complete_output12_event(project,model,dataset_pattern,variable) # trigger event (cross dataset event)
def latest_dataset_complete_event(project,model,dataset_pattern,commit=True): # this event means latest dataset has been completed (beware: no 'latest switch' event here: was latest before and still is) sdlog.log("SYDEVENT-045","'latest_dataset_complete_event' triggered (%s)"%dataset_pattern,event_triggered_log_level) if project=='CMIP5': # CMIP5 use output12 special event return if project in sdconst.PROJECT_WITH_ONE_VARIABLE_PER_DATASET: # CORDEX and CMIP6 use only variable level event return event=Event(name=sdconst.EVENT_LATEST_DATASET_COMPLETE) event.project=project event.model=model event.dataset_pattern=dataset_pattern event.variable='' event.filename_pattern='' event.crea_date=sdtime.now() event.priority=sdconst.DEFAULT_PRIORITY sdeventdao.add_event(event,commit=commit)
def start_transfer_script(cls,tr): sdlog.info("JFPDMDEF-001","Will download url=%s"%(tr.url,)) if sdconfig.fake_download: tr.status=sdconst.TRANSFER_STATUS_DONE tr.error_msg="" tr.sdget_error_msg="" return # main (tr.sdget_status,killed,tr.sdget_error_msg)=sdget.download(tr.url, tr.get_full_local_path(), debug=False, http_client=sdconst.HTTP_CLIENT_WGET, timeout=sdconst.ASYNC_DOWNLOAD_HTTP_TIMEOUT, verbosity=0, buffered=True, hpss=hpss) # check assert tr.size is not None # compute metrics tr.end_date=sdtime.now() tr.duration=sdtime.compute_duration(tr.start_date,tr.end_date) tr.rate=sdtools.compute_rate(tr.size,tr.duration) # post-processing if tr.sdget_status==0: if int(tr.size) != os.path.getsize(tr.get_full_local_path()): sdlog.error("SDDMDEFA-002","size don't match (remote_size=%i,local_size=%i,local_path=%s)"%(int(tr.size),os.path.getsize(tr.get_full_local_path()),tr.get_full_local_path())) # retrieve remote checksum remote_checksum=tr.checksum if remote_checksum!=None: # remote checksum exists # compute local checksum checksum_type=tr.checksum_type if tr.checksum_type is not None else sdconst.CHECKSUM_TYPE_MD5 # fallback to 'md5' (arbitrary) local_checksum=sdutils.compute_checksum(tr.get_full_local_path(),checksum_type) # compare local and remote checksum if remote_checksum==local_checksum: # checksum is ok tr.status=sdconst.TRANSFER_STATUS_DONE tr.error_msg="" else: # checksum is not ok if incorrect_checksum_action=="remove": tr.status=sdconst.TRANSFER_STATUS_ERROR tr.error_msg="File corruption detected: local checksum doesn't match remote checksum" # remove file from local repository sdlog.error("SDDMDEFA-155","checksum don't match: remove local file (local_checksum=%s,remote_checksum=%s,local_path=%s)"%(local_checksum,remote_checksum,tr.get_full_local_path())) try: os.remove(tr.get_full_local_path()) except Exception,e: sdlog.error("SDDMDEFA-158","error occurs while removing local file (%s)"%tr.get_full_local_path()) elif incorrect_checksum_action=="keep": sdlog.info("SDDMDEFA-157","local checksum doesn't match remote checksum (%s)"%tr.get_full_local_path()) tr.status=sdconst.TRANSFER_STATUS_DONE tr.error_msg="" else: raise sdexception.FatalException("SDDMDEFA-507","incorrect value (%s)"%incorrect_checksum_action)
def run(cls,tr): cls.start_transfer_script(tr) tr.end_date=sdtime.now()
def add_dataset(f): """ Returns: dataset_id """ d = sddatasetdao.get_dataset(dataset_functional_id=f.dataset_functional_id) if d is not None: # check dataset local path format # # (once a dataset has been created using one local_path format, it # cannot be changed anymore without removing the all dataset / # restarting the dataset from scratch). # if d.local_path != f.dataset_local_path: raise SDException( "SDENQUEU-008", "Incorrect local path format (existing_format=%s,new_format=%s)" % (d.local_path, f.dataset_local_path)) # compute new dataset status if d.status == sdconst.DATASET_STATUS_IN_PROGRESS: d.status = sdconst.DATASET_STATUS_IN_PROGRESS elif d.status == sdconst.DATASET_STATUS_EMPTY: d.status = sdconst.DATASET_STATUS_EMPTY elif d.status == sdconst.DATASET_STATUS_COMPLETE: d.status = sdconst.DATASET_STATUS_IN_PROGRESS # this means that a dataset may be "in-progress" and also "latest" # Note related to the "latest" dataset column # # Adding new files to a datasets may change the status, but don't # change dataset "latest" flag. This is because a dataset can only # downgrade here ("complete" => "in-progress"), or stay the same. And # when a dataset downgrade, "latest" flag, if true, stay as is, and if # false, stay as is also. # "last_mod_date" is only modified here (i.e. it is not modified when # dataset's files status change). in other words, it changes only when # adding new files to it using this script. # d.last_mod_date = sdtime.now() sddatasetdao.update_dataset(d, commit=False) return d.dataset_id else: sdlog.info("SDENQUEU-002", "create dataset (dataset_path=%s)" % (f.dataset_path)) d = Dataset() d.local_path = f.dataset_local_path d.path = f.dataset_path d.path_without_version = f.dataset_path_without_version d.dataset_functional_id = f.dataset_functional_id d.template = f.dataset_template d.version = f.dataset_version d.project = f.project d.status = sdconst.DATASET_STATUS_EMPTY d.latest = False d.crea_date = sdtime.now() d.last_mod_date = sdtime.now() # non-mandatory attributes d.timestamp = f.dataset_timestamp if hasattr( f, 'dataset_timestamp') else None d.model = f.model if hasattr(f, 'model') else None return sddatasetdao.add_dataset(d, commit=False)
def transfers_end(): _, _, access_token = api_client.goauth.get_access_token( username=globus_username, password=globus_password) api = api_client.TransferAPIClient(username=globus_username, goauth=access_token) for task_id in globus_tasks: code, reason, data = api.task(task_id, fields="status") status = data['status'] sdlog.debug( "SDDMGLOB-016", "Checking the status of Globus transfer tasks, id: %s, status: %s" % (task_id, status)) for item in globus_tasks[task_id]['items']: tr = item['tr'] if status == "SUCCEEDED": assert tr.size is not None if int(tr.size) != os.path.getsize(tr.get_full_local_path()): sdlog.error( "SDDMGLOB-002", "size don't match (remote_size=%i,local_size=%i,local_path=%s)" % (int(tr.size), os.path.getsize(tr.get_full_local_path()), tr.get_full_local_path())) # retrieve local and remote checksum checksum_type = tr.checksum_type if tr.checksum_type is not None else sdconst.CHECKSUM_TYPE_MD5 local_checksum = sdutils.compute_checksum( tr.get_full_local_path(), checksum_type) remote_checksum = tr.checksum # retrieve remote checksum if remote_checksum != None: # remote checksum exists # compare local and remote checksum if remote_checksum == local_checksum: # checksum is ok tr.status = sdconst.TRANSFER_STATUS_DONE else: # checksum is not ok if incorrect_checksum_action == "remove": tr.status = sdconst.TRANSFER_STATUS_ERROR tr.priority -= 1 tr.error_msg = "File corruption detected: local checksum doesn't match remote checksum" # remove file from local repository sdlog.error( "SDDMGLOB-155", "checksum don't match: remove local file (local_checksum=%s,remote_checksum=%s,local_path=%s)" % (local_checksum, remote_checksum, tr.get_full_local_path())) try: os.remove(tr.get_full_local_path()) except Exception, e: sdlog.error( "SDDMGLOB-158", "error occurs while removing local file (%s)" % tr.get_full_local_path()) elif incorrect_checksum_action == "keep": sdlog.info( "SDDMGLOB-157", "local checksum doesn't match remote checksum (%s)" % tr.get_full_local_path()) tr.status = sdconst.TRANSFER_STATUS_DONE else: raise FatalException( "SDDMGLOB-507", "incorrect value (%s)" % incorrect_checksum_action) else: # remote checksum is missing # NOTE: we DON'T store the local checksum ('file' table contains only the REMOTE checksum) tr.status = sdconst.TRANSFER_STATUS_DONE if tr.status == sdconst.TRANSFER_STATUS_DONE: tr.end_date = sdtime.now( ) # WARNING: this is not the real end of transfer date but the date when we ask the globus scheduler if the transfer is done. tr.error_msg = "" sdlog.info("SDDMGLOB-101", "Transfer done (%s)" % str(tr)) elif status == "FAILED": tr.status = sdconst.TRANSFER_STATUS_ERROR tr.priority -= 1 tr.error_msg = "Error occurs during download." sdlog.info("SDDMGLOB-101", "Transfer failed (%s)" % str(tr)) # Remove local file if exists if os.path.isfile(tr.get_full_local_path()): try: os.remove(tr.get_full_local_path()) except Exception, e: sdlog.error( "SDDMGLOB-528", "Error occurs during file suppression (%s,%s)" % (tr.get_full_local_path(), str(e)))
def transfers_end(): _, _, access_token = api_client.goauth.get_access_token(username=globus_username, password=globus_password) api = api_client.TransferAPIClient(username=globus_username, goauth=access_token) for task_id in globus_tasks: code, reason, data = api.task(task_id, fields="status") status = data['status'] sdlog.debug("SDDMGLOB-016", "Checking the status of Globus transfer tasks, id: %s, status: %s" % (task_id, status)) for item in globus_tasks[task_id]['items']: tr = item['tr'] if status == "SUCCEEDED": assert tr.size is not None if int(tr.size) != os.path.getsize(tr.get_full_local_path()): sdlog.error("SDDMGLOB-002","size don't match (remote_size=%i,local_size=%i,local_path=%s)"%(int(tr.size),os.path.getsize(tr.get_full_local_path()),tr.get_full_local_path())) # retrieve local and remote checksum checksum_type=tr.checksum_type if tr.checksum_type is not None else 'md5' local_checksum=sdutils.compute_checksum(tr.get_full_local_path(),checksum_type) remote_checksum=tr.checksum # retrieve remote checksum if remote_checksum!=None: # remote checksum exists # compare local and remote checksum if remote_checksum==local_checksum: # checksum is ok tr.status = sdconst.TRANSFER_STATUS_DONE else: # checksum is not ok if incorrect_checksum_action=="remove": tr.status=sdconst.TRANSFER_STATUS_ERROR tr.error_msg="File corruption detected: local checksum doesn't match remote checksum" # remove file from local repository sdlog.error("SDDMGLOB-155","checksum don't match: remove local file (local_checksum=%s,remote_checksum=%s,local_path=%s)"%(local_checksum,remote_checksum,tr.get_full_local_path())) try: os.remove(tr.get_full_local_path()) except Exception,e: sdlog.error("SDDMGLOB-158","error occurs while removing local file (%s)"%tr.get_full_local_path()) elif incorrect_checksum_action=="keep": sdlog.info("SDDMGLOB-157","local checksum doesn't match remote checksum (%s)"%tr.get_full_local_path()) tr.status=sdconst.TRANSFER_STATUS_DONE else: raise FatalException("SDDMGLOB-507","incorrect value (%s)"%incorrect_checksum_action) else: # remote checksum is missing # NOTE: we DON'T store the local checksum ('file' table contains only the REMOTE checksum) tr.status = sdconst.TRANSFER_STATUS_DONE if tr.status == sdconst.TRANSFER_STATUS_DONE: tr.end_date=sdtime.now() # WARNING: this is not the real end of transfer date but the date when we ask the globus scheduler if the transfer is done. tr.error_msg="" sdlog.info("SDDMGLOB-101", "Transfer done (%s)" % str(tr)) elif status == "FAILED": tr.status = sdconst.TRANSFER_STATUS_ERROR tr.error_msg = "Error occurs during download." sdlog.info("SDDMGLOB-101", "Transfer failed (%s)" % str(tr)) # Remove local file if exists if os.path.isfile(tr.get_full_local_path()): try: os.remove(tr.get_full_local_path()) except Exception,e: sdlog.error("SDDMGLOB-528","Error occurs during file suppression (%s,%s)"%(tr.get_full_local_path(),str(e)))
def store_dataset_export_event(d,conn=sddb.conn): c=conn.cursor() c.execute("insert into export (dataset_id,export_date) values (?,?)",(d.dataset_id,sdtime.now(),)) conn.commit() c.close()
def add_dataset(f): """ Returns: dataset_id """ d=sddatasetdao.get_dataset(dataset_functional_id=f.dataset_functional_id) if d is not None: # check dataset local path format # # (once a dataset has been created using one local_path format, it # cannot be changed anymore without removing the all dataset / # restarting the dataset from scratch). # if d.local_path!=f.dataset_local_path: raise SDException("SDENQUEU-008","Incorrect local path format (existing_format=%s,new_format=%s)"%(d.local_path,f.dataset_local_path)) # compute new dataset status if d.status==sdconst.DATASET_STATUS_IN_PROGRESS: d.status=sdconst.DATASET_STATUS_IN_PROGRESS elif d.status==sdconst.DATASET_STATUS_EMPTY: d.status=sdconst.DATASET_STATUS_EMPTY elif d.status==sdconst.DATASET_STATUS_COMPLETE: d.status=sdconst.DATASET_STATUS_IN_PROGRESS # this means that a dataset may be "in-progress" and also "latest" # Note related to the "latest" dataset column # # Adding new files to a datasets may change the status, but don't # change dataset "latest" flag. This is because a dataset can only # downgrade here ("complete" => "in-progress"), or stay the same. And # when a dataset downgrade, "latest" flag, if true, stay as is, and if # false, stay as is also. # "last_mod_date" is only modified here (i.e. it is not modified when # dataset's files status change). in other words, it changes only when # adding new files to it using this script. # d.last_mod_date=sdtime.now() sddatasetdao.update_dataset(d,commit=False) return d.dataset_id else: sdlog.info("SDENQUEU-002","create dataset (dataset_path=%s)"%(f.dataset_path)) d=Dataset() d.local_path=f.dataset_local_path d.path=f.dataset_path d.path_without_version=f.dataset_path_without_version d.dataset_functional_id=f.dataset_functional_id d.template=f.dataset_template d.version=f.dataset_version d.project=f.project d.status=sdconst.DATASET_STATUS_EMPTY d.latest=False d.crea_date=sdtime.now() d.last_mod_date=sdtime.now() # non-mandatory attributes d.timestamp=f.dataset_timestamp if hasattr(f,'dataset_timestamp') else None d.model=f.model if hasattr(f,'model') else None return sddatasetdao.add_dataset(d,commit=False)
def start_transfer_script(cls, tr): if sdconfig.fake_download: tr.status = sdconst.TRANSFER_STATUS_DONE tr.error_msg = "" tr.sdget_error_msg = "" return # main (tr.sdget_status, killed, tr.sdget_error_msg) = sdget.download( tr.url, tr.get_full_local_path(), debug=False, http_client=sdconst.HTTP_CLIENT_WGET, timeout=sdconst.ASYNC_DOWNLOAD_HTTP_TIMEOUT, verbosity=0, buffered=True, hpss=hpss) # check assert tr.size is not None # compute metrics tr.end_date = sdtime.now() tr.duration = sdtime.compute_duration(tr.start_date, tr.end_date) tr.rate = sdtools.compute_rate(tr.size, tr.duration) # post-processing if tr.sdget_status == 0: if int(tr.size) != os.path.getsize(tr.get_full_local_path()): sdlog.error( "SDDMDEFA-002", "size don't match (remote_size=%i,local_size=%i,local_path=%s)" % (int(tr.size), os.path.getsize( tr.get_full_local_path()), tr.get_full_local_path())) # retrieve remote checksum remote_checksum = tr.checksum if remote_checksum != None: # remote checksum exists # compute local checksum checksum_type = tr.checksum_type if tr.checksum_type is not None else sdconst.CHECKSUM_TYPE_MD5 # fallback to 'md5' (arbitrary) local_checksum = sdutils.compute_checksum( tr.get_full_local_path(), checksum_type) # compare local and remote checksum if remote_checksum == local_checksum: # checksum is ok tr.status = sdconst.TRANSFER_STATUS_DONE tr.error_msg = "" else: # checksum is not ok if incorrect_checksum_action == "remove": tr.status = sdconst.TRANSFER_STATUS_ERROR tr.error_msg = "File corruption detected: local checksum doesn't match remote checksum" # remove file from local repository sdlog.error( "SDDMDEFA-155", "checksum don't match: remove local file (local_checksum=%s,remote_checksum=%s,local_path=%s)" % (local_checksum, remote_checksum, tr.get_full_local_path())) try: os.remove(tr.get_full_local_path()) except Exception, e: sdlog.error( "SDDMDEFA-158", "error occurs while removing local file (%s)" % tr.get_full_local_path()) elif incorrect_checksum_action == "keep": sdlog.info( "SDDMDEFA-157", "local checksum doesn't match remote checksum (%s)" % tr.get_full_local_path()) tr.status = sdconst.TRANSFER_STATUS_DONE tr.error_msg = "" else: raise sdexception.FatalException( "SDDMDEFA-507", "incorrect value (%s)" % incorrect_checksum_action)
def add_history_line(action,selection_filename=None,insertion_group_id=None,conn=sddb.conn): c = conn.cursor() c.execute("insert into history (action, selection_filename, crea_date, insertion_group_id) values (?,?,?,?)",(action, selection_filename, sdtime.now(), insertion_group_id)) c.close() conn.commit()