def download_file(url, full_local_path, checksum_type, credentials = "~/.esg/credentials.pem"): import sdutils try: # setup HTTP handler certFile = expanduser(credentials) opener = urllib2.build_opener(HTTPSClientAuthHandler(certFile,certFile)) opener.add_handler(urllib2.HTTPCookieProcessor()) # download file localFile=open( full_local_path, 'w') webFile=opener.open(url) # TODO # JRA: modify below to add checksum & huge file support (i.e. file that doesn't fit in memory) #https://gist.github.com/brianewing/994303 #http://stackoverflow.com/questions/1517616/stream-large-binary-files-with-urllib2-to-file localFile.write(webFile.read()) # cleanup localFile.close() webFile.close() opener.close() local_checksum=sdutils.compute_checksum(file_fullpath,checksum_type) except Exception,e: # TODO: log error msg return (1,None)
def add_selection(us): # compute selection checksum from scratch l__file_checksum=sdutils.compute_checksum(us.get_selection_file_full_path()) if not exists_selection(us): # add selection in database if missing us.set_checksum(l__file_checksum) us.set_status(sdconst.SELECTION_STATUS_NEW) us.set_fullscan(True) insertSelection(us) # warning: this modify us object (set PK) else: # selection already in database from_db_us=fetch_selection(us.get_filename()) # retrieve us from DB us.set_selection_id(from_db_us.get_selection_id()) # copy DB id # check if same checksums if l__file_checksum==from_db_us.get_checksum(): # same checksum # retrieve status us.set_status(from_db_us.get_status()) us.set_checksum(from_db_us.get_checksum()) if us.get_status()==sdconst.SELECTION_STATUS_NORMAL: # nothing to do here (let (a) and (b) decide if we need fullscan) pass elif us.get_status()==sdconst.SELECTION_STATUS_MODIFIED: us.set_fullscan(True) elif us.get_status()==sdconst.SELECTION_STATUS_NEW: us.set_fullscan(True) else: raise SDException("SYNDATSEL-071","unknown status") else: # same checksum # checksum differ sdlog.info("SYNDASEL-197","%s selection has been modified (marked for fullscan)"%us.get_filename()) us.set_checksum(l__file_checksum) # update checksum us.set_status(sdconst.SELECTION_STATUS_MODIFIED) # update status update_selection(us) # add selection in selection list # TODO _selections[us.get_filename()]=us
def reset(): """Mark all selections as done (aka "normal", "complete"..) and set the checksum.""" for filename in get_selection_file_list(): us=Selection(filename=filename,logger=get_logger()) us.set_status(sdconst.SELECTION_STATUS_NORMAL) cs=sdutils.compute_checksum(us.get_selection_file_full_path()) us.set_checksum(cs) update_selection(us)
def download_file(url, local_path, checksum_type): status = file_transfer_synchronous_wrapper(url, local_path) if status == 0: local_checksum = sdutils.compute_checksum(local_path, checksum_type) else: local_checksum = None return (status, local_checksum)
def download_file(url,local_path,checksum_type): status=file_transfer_synchronous_wrapper(url,local_path) if status==0: local_checksum=sdutils.compute_checksum(local_path,checksum_type) else: local_checksum=None return (status,local_checksum)
def download_file(url,full_local_path,checksum_type): status=sdnetutils.download_file(url,full_local_path) if status==0: local_checksum=sdutils.compute_checksum(full_local_path,checksum_type) else: local_checksum=None return (status,local_checksum)
def file_changed_since_last_run(selection_file, action): # retrieve current checksum current_checksum = sdutils.compute_checksum(selection_file) # retrieve previous run checksum selection_filename = os.path.basename(selection_file) previous_run = get_previous_run(selection_filename, action) previous_checksum = previous_run['selection_file_checksum'] return (previous_checksum == current_checksum)
def file_changed_since_last_run(selection_file,action): # retrieve current checksum current_checksum=sdutils.compute_checksum(selection_file) # retrieve previous run checksum selection_filename=os.path.basename(selection_file) previous_run=get_previous_run(selection_filename,action) previous_checksum=previous_run['selection_file_checksum'] return (previous_checksum==current_checksum)
def set_checksum_when_empty(file): """ This method compute missing checksum for all files never used yet """ for f in sdrebuildquery.get_files_without_checksum(): checksum_type = sdconst.CHECKSUM_TYPE_MD5 cs = sdutils.compute_checksum(f.get_full_local_path(), checksum_type) f.checksum = cs f.checksum_type = checksum_type sdfiledao.update(f) """Set variable column when NULL (some rows have variable column NULL for historical reasons, this procedure fix that)."""
def add_history_line(action=None,selection_file=None,insertion_group_id=None,crea_date=None): # check assert action is not None assert selection_file is not None # compute checksum if selection_file==sdconst.SELECTION_FROM_CMDLINE: cs=None else: cs=sdutils.compute_checksum(selection_file) # main selection_filename=os.path.basename(selection_file) sdhistorydao.add_history_line(action,selection_filename=selection_filename,insertion_group_id=insertion_group_id,crea_date=crea_date,selection_file_checksum=cs)
def set_checksum_when_empty(file): """ This method compute missing checksum for all files never used yet """ for f in sdrebuildquery.get_files_without_checksum(): checksum_type=sdconst.CHECKSUM_TYPE_MD5 cs=sdutils.compute_checksum(f.get_full_local_path(),checksum_type) f.checksum=cs f.checksum_type=checksum_type sdfiledao.update(f) """Set variable column when NULL (some rows have variable column NULL for historical reasons, this procedure fix that)."""
def add_history_line(action=None, selection_file=None, insertion_group_id=None, crea_date=None): # check assert action is not None assert selection_file is not None # compute checksum if selection_file == sdconst.SELECTION_FROM_CMDLINE: cs = None else: cs = sdutils.compute_checksum(selection_file) # main selection_filename = os.path.basename(selection_file) sdhistorydao.add_history_line(action, selection_filename=selection_filename, insertion_group_id=insertion_group_id, crea_date=crea_date, selection_file_checksum=cs)
def transfers_end(): _, _, access_token = api_client.goauth.get_access_token( username=globus_username, password=globus_password) api = api_client.TransferAPIClient(username=globus_username, goauth=access_token) for task_id in globus_tasks: code, reason, data = api.task(task_id, fields="status") status = data['status'] sdlog.debug( "SDDMGLOB-016", "Checking the status of Globus transfer tasks, id: %s, status: %s" % (task_id, status)) for item in globus_tasks[task_id]['items']: tr = item['tr'] if status == "SUCCEEDED": assert tr.size is not None if int(tr.size) != os.path.getsize(tr.get_full_local_path()): sdlog.error( "SDDMGLOB-002", "size don't match (remote_size=%i,local_size=%i,local_path=%s)" % (int(tr.size), os.path.getsize(tr.get_full_local_path()), tr.get_full_local_path())) # retrieve local and remote checksum checksum_type = tr.checksum_type if tr.checksum_type is not None else sdconst.CHECKSUM_TYPE_MD5 local_checksum = sdutils.compute_checksum( tr.get_full_local_path(), checksum_type) remote_checksum = tr.checksum # retrieve remote checksum if remote_checksum != None: # remote checksum exists # compare local and remote checksum if remote_checksum == local_checksum: # checksum is ok tr.status = sdconst.TRANSFER_STATUS_DONE else: # checksum is not ok if incorrect_checksum_action == "remove": tr.status = sdconst.TRANSFER_STATUS_ERROR tr.priority -= 1 tr.error_msg = "File corruption detected: local checksum doesn't match remote checksum" # remove file from local repository sdlog.error( "SDDMGLOB-155", "checksum don't match: remove local file (local_checksum=%s,remote_checksum=%s,local_path=%s)" % (local_checksum, remote_checksum, tr.get_full_local_path())) try: os.remove(tr.get_full_local_path()) except Exception, e: sdlog.error( "SDDMGLOB-158", "error occurs while removing local file (%s)" % tr.get_full_local_path()) elif incorrect_checksum_action == "keep": sdlog.info( "SDDMGLOB-157", "local checksum doesn't match remote checksum (%s)" % tr.get_full_local_path()) tr.status = sdconst.TRANSFER_STATUS_DONE else: raise FatalException( "SDDMGLOB-507", "incorrect value (%s)" % incorrect_checksum_action) else: # remote checksum is missing # NOTE: we DON'T store the local checksum ('file' table contains only the REMOTE checksum) tr.status = sdconst.TRANSFER_STATUS_DONE if tr.status == sdconst.TRANSFER_STATUS_DONE: tr.end_date = sdtime.now( ) # WARNING: this is not the real end of transfer date but the date when we ask the globus scheduler if the transfer is done. tr.error_msg = "" sdlog.info("SDDMGLOB-101", "Transfer done (%s)" % str(tr)) elif status == "FAILED": tr.status = sdconst.TRANSFER_STATUS_ERROR tr.priority -= 1 tr.error_msg = "Error occurs during download." sdlog.info("SDDMGLOB-101", "Transfer failed (%s)" % str(tr)) # Remove local file if exists if os.path.isfile(tr.get_full_local_path()): try: os.remove(tr.get_full_local_path()) except Exception, e: sdlog.error( "SDDMGLOB-528", "Error occurs during file suppression (%s,%s)" % (tr.get_full_local_path(), str(e)))
def transfers_end(): _, _, access_token = api_client.goauth.get_access_token(username=globus_username, password=globus_password) api = api_client.TransferAPIClient(username=globus_username, goauth=access_token) for task_id in globus_tasks: code, reason, data = api.task(task_id, fields="status") status = data['status'] sdlog.debug("SDDMGLOB-016", "Checking the status of Globus transfer tasks, id: %s, status: %s" % (task_id, status)) for item in globus_tasks[task_id]['items']: tr = item['tr'] if status == "SUCCEEDED": assert tr.size is not None if int(tr.size) != os.path.getsize(tr.get_full_local_path()): sdlog.error("SDDMGLOB-002","size don't match (remote_size=%i,local_size=%i,local_path=%s)"%(int(tr.size),os.path.getsize(tr.get_full_local_path()),tr.get_full_local_path())) # retrieve local and remote checksum checksum_type=tr.checksum_type if tr.checksum_type is not None else 'md5' local_checksum=sdutils.compute_checksum(tr.get_full_local_path(),checksum_type) remote_checksum=tr.checksum # retrieve remote checksum if remote_checksum!=None: # remote checksum exists # compare local and remote checksum if remote_checksum==local_checksum: # checksum is ok tr.status = sdconst.TRANSFER_STATUS_DONE else: # checksum is not ok if incorrect_checksum_action=="remove": tr.status=sdconst.TRANSFER_STATUS_ERROR tr.error_msg="File corruption detected: local checksum doesn't match remote checksum" # remove file from local repository sdlog.error("SDDMGLOB-155","checksum don't match: remove local file (local_checksum=%s,remote_checksum=%s,local_path=%s)"%(local_checksum,remote_checksum,tr.get_full_local_path())) try: os.remove(tr.get_full_local_path()) except Exception,e: sdlog.error("SDDMGLOB-158","error occurs while removing local file (%s)"%tr.get_full_local_path()) elif incorrect_checksum_action=="keep": sdlog.info("SDDMGLOB-157","local checksum doesn't match remote checksum (%s)"%tr.get_full_local_path()) tr.status=sdconst.TRANSFER_STATUS_DONE else: raise FatalException("SDDMGLOB-507","incorrect value (%s)"%incorrect_checksum_action) else: # remote checksum is missing # NOTE: we DON'T store the local checksum ('file' table contains only the REMOTE checksum) tr.status = sdconst.TRANSFER_STATUS_DONE if tr.status == sdconst.TRANSFER_STATUS_DONE: tr.end_date=sdtime.now() # WARNING: this is not the real end of transfer date but the date when we ask the globus scheduler if the transfer is done. tr.error_msg="" sdlog.info("SDDMGLOB-101", "Transfer done (%s)" % str(tr)) elif status == "FAILED": tr.status = sdconst.TRANSFER_STATUS_ERROR tr.error_msg = "Error occurs during download." sdlog.info("SDDMGLOB-101", "Transfer failed (%s)" % str(tr)) # Remove local file if exists if os.path.isfile(tr.get_full_local_path()): try: os.remove(tr.get_full_local_path()) except Exception,e: sdlog.error("SDDMGLOB-528","Error occurs during file suppression (%s,%s)"%(tr.get_full_local_path(),str(e)))
def start_transfer_script(cls,tr): if sdconfig.fake_download: tr.status=sdconst.TRANSFER_STATUS_DONE tr.error_msg="" tr.sdget_error_msg="" return (tr.sdget_status,killed,tr.sdget_error_msg)=sdget.download(tr.url, tr.get_full_local_path(), debug=False, http_client=sdconst.HTTP_CLIENT_WGET, timeout=sdconst.ASYNC_DOWNLOAD_HTTP_TIMEOUT, verbosity=0, buffered=True, hpss=hpss) if tr.sdget_status==0: assert tr.size is not None if int(tr.size) != os.path.getsize(tr.get_full_local_path()): sdlog.error("SDDMDEFA-002","size don't match (remote_size=%i,local_size=%i,local_path=%s)"%(int(tr.size),os.path.getsize(tr.get_full_local_path()),tr.get_full_local_path())) # retrieve remote checksum remote_checksum=tr.checksum if remote_checksum!=None: # remote checksum exists # compute local checksum checksum_type=tr.checksum_type if tr.checksum_type is not None else sdconst.CHECKSUM_TYPE_MD5 # fallback to 'md5' (arbitrary) local_checksum=sdutils.compute_checksum(tr.get_full_local_path(),checksum_type) # compare local and remote checksum if remote_checksum==local_checksum: # checksum is ok tr.status=sdconst.TRANSFER_STATUS_DONE tr.error_msg="" else: # checksum is not ok if incorrect_checksum_action=="remove": tr.status=sdconst.TRANSFER_STATUS_ERROR tr.error_msg="File corruption detected: local checksum doesn't match remote checksum" # remove file from local repository sdlog.error("SDDMDEFA-155","checksum don't match: remove local file (local_checksum=%s,remote_checksum=%s,local_path=%s)"%(local_checksum,remote_checksum,tr.get_full_local_path())) try: os.remove(tr.get_full_local_path()) except Exception,e: sdlog.error("SDDMDEFA-158","error occurs while removing local file (%s)"%tr.get_full_local_path()) elif incorrect_checksum_action=="keep": sdlog.info("SDDMDEFA-157","local checksum doesn't match remote checksum (%s)"%tr.get_full_local_path()) tr.status=sdconst.TRANSFER_STATUS_DONE tr.error_msg="" else: raise sdexception.FatalException("SDDMDEFA-507","incorrect value (%s)"%incorrect_checksum_action)
def start_transfer_script(cls,tr): sdlog.info("JFPDMDEF-001","Will download url=%s"%(tr.url,)) if sdconfig.fake_download: tr.status=sdconst.TRANSFER_STATUS_DONE tr.error_msg="" tr.sdget_error_msg="" return # main (tr.sdget_status,killed,tr.sdget_error_msg)=sdget.download(tr.url, tr.get_full_local_path(), debug=False, http_client=sdconst.HTTP_CLIENT_WGET, timeout=sdconst.ASYNC_DOWNLOAD_HTTP_TIMEOUT, verbosity=0, buffered=True, hpss=hpss) # check assert tr.size is not None # compute metrics tr.end_date=sdtime.now() tr.duration=sdtime.compute_duration(tr.start_date,tr.end_date) tr.rate=sdtools.compute_rate(tr.size,tr.duration) # post-processing if tr.sdget_status==0: if int(tr.size) != os.path.getsize(tr.get_full_local_path()): sdlog.error("SDDMDEFA-002","size don't match (remote_size=%i,local_size=%i,local_path=%s)"%(int(tr.size),os.path.getsize(tr.get_full_local_path()),tr.get_full_local_path())) # retrieve remote checksum remote_checksum=tr.checksum if remote_checksum!=None: # remote checksum exists # compute local checksum checksum_type=tr.checksum_type if tr.checksum_type is not None else sdconst.CHECKSUM_TYPE_MD5 # fallback to 'md5' (arbitrary) local_checksum=sdutils.compute_checksum(tr.get_full_local_path(),checksum_type) # compare local and remote checksum if remote_checksum==local_checksum: # checksum is ok tr.status=sdconst.TRANSFER_STATUS_DONE tr.error_msg="" else: # checksum is not ok if incorrect_checksum_action=="remove": tr.status=sdconst.TRANSFER_STATUS_ERROR tr.error_msg="File corruption detected: local checksum doesn't match remote checksum" # remove file from local repository sdlog.error("SDDMDEFA-155","checksum don't match: remove local file (local_checksum=%s,remote_checksum=%s,local_path=%s)"%(local_checksum,remote_checksum,tr.get_full_local_path())) try: os.remove(tr.get_full_local_path()) except Exception,e: sdlog.error("SDDMDEFA-158","error occurs while removing local file (%s)"%tr.get_full_local_path()) elif incorrect_checksum_action=="keep": sdlog.info("SDDMDEFA-157","local checksum doesn't match remote checksum (%s)"%tr.get_full_local_path()) tr.status=sdconst.TRANSFER_STATUS_DONE tr.error_msg="" else: raise sdexception.FatalException("SDDMDEFA-507","incorrect value (%s)"%incorrect_checksum_action)
def run(files, timeout=sdconst.DIRECT_DOWNLOAD_HTTP_TIMEOUT, force=False, http_client=sdconst.HTTP_CLIENT_WGET, local_path_prefix=sdconfig.sandbox_folder, verify_checksum=False, network_bandwidth_test=False, debug=True, verbosity=0, buffered=False, hpss=False): """ Returns: 0 if all transfers complete successfully 1 if one or more transfer(s) didn't complete successfully """ """ If protocol is set to 'globus' in attached_parameters, download all files with url scheme globus:, or gridftp: provided that there is a gridftp to globus mapping defined in /esg/config/esgf_endpoints.xml. Remaining files will be downloaded usingglobus-url-copy or wget. """ files = sdglobus.direct(files, force, local_path_prefix, verify_checksum, network_bandwidth_test, debug, verbosity) failed_count = 0 for file_ in files: # check assert 'url' in file_ #assert 'data_node' in file_ assert 'local_path' in file_ if verify_checksum: if checksum_attrs_ok(file_): missing_remote_checksum_attrs = False else: missing_remote_checksum_attrs = True # cast f = File(**file_) # prepare attributes #local_path='/tmp/test.nc' #local_path='%s/test.nc'%sdconfig.tmp_folder local_path = f.get_full_local_path(prefix=local_path_prefix) # check if not network_bandwidth_test: if os.path.isfile(local_path): if force: os.remove(local_path) else: print_stderr( 'Warning: download cancelled as local file already exists (%s)' % local_path) failed_count += 1 continue # special case if network_bandwidth_test: local_path = '/dev/null' # transfer (status, killed, script_stderr) = sdget.download(f.url, local_path, debug, http_client, timeout, verbosity, buffered, hpss) # post-transfer if status != 0: failed_count += 1 print_stderr('Download failed (%s)' % f.url) if buffered: # in non-buffered mode, stderr is already display (because child stderr is binded to parent stderr) # currently, we don't come here but we may need in the futur so we keep this block if script_stderr is not None: print_stderr(script_stderr) else: if network_bandwidth_test: return if verify_checksum: if missing_remote_checksum_attrs: failed_count += 1 print_stderr( 'Warning: missing remote checksum attributes prevented checksum verification (%s)' % local_path) else: remote_checksum = f.checksum local_checksum = sdutils.compute_checksum( local_path, f.checksum_type) if local_checksum == remote_checksum: print_stderr( 'File successfully downloaded, checksum OK (%s)' % local_path) else: failed_count += 1 print_stderr( "Error: local checksum don't match remote checksum (%s)" % local_path) else: print_stderr( 'File successfully downloaded, no checksum verification (%s)' % local_path) if failed_count > 0: return 1 else: return 0
def run(files, timeout=sdconst.DIRECT_DOWNLOAD_HTTP_TIMEOUT, force=False, http_client=sdconst.HTTP_CLIENT_WGET, local_path_prefix=sdconfig.sandbox_folder, verify_checksum=False, network_bandwidth_test=False, debug=True, verbosity=0, buffered=False, hpss=False): """ Returns: 0 if all transfers complete successfully 1 if one or more transfer(s) didn't complete successfully """ failed_count=0 for file_ in files: # check assert 'url' in file_ #assert 'data_node' in file_ assert 'local_path' in file_ if verify_checksum: if checksum_attrs_ok(file_): missing_remote_checksum_attrs=False else: missing_remote_checksum_attrs=True # cast f=File(**file_) # prepare attributes #local_path='/tmp/test.nc' #local_path='%s/test.nc'%sdconfig.tmp_folder local_path=f.get_full_local_path(prefix=local_path_prefix) # check if not network_bandwidth_test: if os.path.isfile(local_path): if force: os.remove(local_path) else: print_stderr('Warning: download cancelled as local file already exists (%s)'%local_path) failed_count+=1 continue # special case if network_bandwidth_test: local_path='/dev/null' # transfer (status,killed,script_stderr)=sdget.download(f.url,local_path,debug,http_client,timeout,verbosity,buffered,hpss) # post-transfer if status!=0: failed_count+=1 print_stderr('Download failed (%s)'%f.url) if buffered: # in non-buffered mode, stderr is already display (because child stderr is binded to parent stderr) # currently, we don't come here but we may need in the futur so we keep this block if script_stderr is not None: print_stderr(script_stderr) else: if network_bandwidth_test: return if verify_checksum: if missing_remote_checksum_attrs: failed_count+=1 print_stderr('Warning: missing remote checksum attributes prevented checksum verification (%s)'%local_path) else: remote_checksum=f.checksum local_checksum=sdutils.compute_checksum(local_path,f.checksum_type) if local_checksum==remote_checksum: print_stderr('File successfully downloaded, checksum OK (%s)'%local_path) else: failed_count+=1 print_stderr("Error: local checksum don't match remote checksum (%s)"%local_path) else: print_stderr('File successfully downloaded, no checksum verification (%s)'%local_path) if failed_count>0: return 1 else: return 0