def display_status(self, percent_complete, task_id, num_completed, num_total): """ Updates the event_list with a nicely formated percent completion """ spacer = ' ' if num_completed < 10 else '' message = 'Transfer in progress {spacer}({completed}/{total}) ['.format( completed=num_completed, spacer=spacer, total=num_total) # now get the percent completion and elapsed time for i in range(1, 100, 5): if i < percent_complete: message += '*' else: message += '_' message += '] {percent:.2f}%'.format(percent=percent_complete) # check if the event has already been pushed into the event_list replaced = False from lib.util import print_debug try: for index, event in enumerate(self.event_list.list): if task_id == event.data: msg = '{time} {msg}'.format(time=time.strftime("%I:%M"), msg=message) self.event_list.replace(index=index, message=msg) replaced = True break if not replaced: msg = '{time} {msg}'.format(time=time.strftime("%I:%M"), msg=message) self.event_list.push(message=msg, data=task_id) except Exception as e: print_debug(e)
def check_globus(src_uuid, dst_uuid, src_path, dst_path): """ Check that the globus endpoints are not only active but will return information about the paths we're interested in. Im assuming that the endpoints have already been activated """ try: endpoints = [{ 'type': 'source', 'id': src_uuid, 'path': src_path }, { 'type': 'destination', 'id': dst_uuid, 'path': dst_path }] except Exception as e: print_debug(e) client = get_client() try: for endpoint in endpoints: _ = get_ls(client, endpoint['path'], endpoint['id'], False, 0, False) hostname = client.endpoint_server_list( endpoint)['DATA']['hostname'] print "Access confirmed for {}".format(hostname) except Exception as e: print_debug(e) return False, endpoint else: return True, None
def setup_globus(endpoints, event_list): """ Check globus login status and login as nessisary, then iterate over a list of endpoints and activate them all Parameters: endpoints: list of strings containing globus endpoint UUIDs event_list: the event list to push user notifications into return: True if successful, False otherwise """ # First go through the globus login process if not check_logged_in(): message = 'Globus login required. Please run {cmd}\n\n'.format( cmd='"globus login"') print_line(message, event_list) print '================================================' sys.exit(1) if isinstance(endpoints, str): endpoints = [endpoints] activated = False client = get_client() while not activated: activated = True message = '' for endpoint in endpoints: msg = 'activating endpoint {}'.format(endpoint) logging.info(msg) try: r = client.endpoint_autoactivate(endpoint, if_expires_in=3600) logging.info(r['code']) except Exception as e: print_debug(e) if e.code == 'ClientError.NotFound': return False else: continue if r["code"] == "AutoActivationFailed": activated = False logging.info('endpoint autoactivation failed') server_document = client.endpoint_server_list(endpoint) for server in server_document['DATA']: hostname = server["hostname"] break message += """ Data transfer server {server} requires manual activation. Please open the following URL in a browser to activate the endpoint: https://www.globus.org/app/endpoints/{endpoint}/activate """.format(endpoint=endpoint, server=server['hostname']) if not activated: print message raw_input('Press ENTER once endpoints have been activated\n') return True
def update_local_status(self): """ Update the database with the local status of the expected files Parameters: types (list(str)): the list of files types to expect, must be members of file_type_map """ self.mutex.acquire() try: datafiles = DataFile.select().where( DataFile.local_status == filestatus['NOT_EXIST']) for datafile in datafiles: should_save = False if os.path.exists(datafile.local_path): local_size = os.path.getsize(datafile.local_path) if local_size == datafile.remote_size: datafile.local_status = filestatus['EXISTS'] datafile.local_size = local_size should_save = True if local_size != datafile.local_size \ or should_save: datafile.local_size = local_size datafile.save() except Exception as e: print_debug(e) finally: if self.mutex.locked(): self.mutex.release()
def get_ls(client, path, endpoint): for fail_count in xrange(10): try: res = get_ls(client, path, endpoint, False, 0, False) except Exception as e: sleep(fail_count) if fail_count >= 9: print_debug(e) else: return res
def get_ls(client, path, endpoint): for fail_count in xrange(10): try: res = globus_ls(client, path, endpoint, False, 0, False) except Exception as e: sleep(fail_count) print_message("Globus server error, retrying") print_debug(e) else: return res
def write_database(self): """ Write out a human readable version of the database for debug purposes """ file_list_path = os.path.join( self._config['global']['project_path'], 'output', 'file_list.txt') with open(file_list_path, 'w') as fp: try: for case in self._config['simulations']: if case in ['start_year', 'end_year', 'comparisons']: continue fp.write('+++++++++++++++++++++++++++++++++++++++++++++') fp.write('\n\t{case}\t\n'.format(case=case)) fp.write('+++++++++++++++++++++++++++++++++++++++++++++\n') q = (DataFile .select(DataFile.datatype) .where(DataFile.case == case) .distinct()) for df_type in q.execute(): _type = df_type.datatype fp.write('===================================\n') fp.write('\t' + _type + ':\n') datafiles = (DataFile .select() .where( (DataFile.datatype == _type) & (DataFile.case == case))) for datafile in datafiles.execute(): filestr = '-------------------------------------' filestr += '\n\t name: ' + datafile.name + '\n\t local_status: ' if datafile.local_status == 0: filestr += ' present, ' elif datafile.local_status == 1: filestr += ' missing, ' else: filestr += ' in transit, ' filestr += '\n\t remote_status: ' if datafile.remote_status == 0: filestr += ' present' elif datafile.remote_status == 1: filestr += ' missing' else: filestr += ' in transit' filestr += '\n\t local_size: ' + \ str(datafile.local_size) filestr += '\n\t local_path: ' + datafile.local_path filestr += '\n\t remote_path: ' + datafile.remote_path filestr += '\n\t year: ' + str(datafile.year) filestr += '\n\t month: ' + str(datafile.month) + '\n' fp.write(filestr) except Exception as e: print_debug(e)
def all_data_remote(self): self.mutex.acquire() try: for data in DataFile.select(): if data.remote_status != filestatus['EXISTS']: return False except Exception as e: print_debug(e) finally: if self.mutex.locked(): self.mutex.release() return True
def _get_ls(self, client, path): for fail_count in xrange(10): try: res = get_ls(client, path, self.remote_endpoint, False, 0, False) except Exception as e: sleep(fail_count) if fail_count >= 9: print_debug(e) sys.exit() else: return res
def get_ls(client, remote_path): """ Return a list of the contents of the remote_path from the host that the client is connected to """ try: cmd = 'ls {}'.format(remote_path) stdin, stdout, stderr = client.exec_command(cmd) except Exception as e: print_debug(e) return None return stdout.read().split('\n')
def finalize(config, event_list, status, runmanager): if status == 1 and config['global'].get('native_grid_cleanup') in [ 1, '1', 'true', 'True' ]: message = 'Performing post run cleanup' native_cleanup(config) else: message = 'Leaving native grid files in place' print_message(message, 'ok') if status == 1: msg = 'All processing complete' code = 'ok' else: msg = 'The following jobs encountered an error and were marked as failed:' code = 'error' for case in runmanager.cases: for job in case['jobs']: if job.status != JobStatus.COMPLETED: msg += '\n {}'.format(job.msg_prefix()) print_message(msg, code) emailaddr = config['global'].get('email') if emailaddr: message = 'Sending notification email to {}'.format(emailaddr) print_message(message, 'ok') try: if status == 1: msg = 'Your processflow run has completed successfully\n' status = msg else: msg = 'One or more processflow jobs failed\n' status = msg msg += 'See log for additional details\n{}\n'.format( config['global']['log_path']) for case in runmanager.cases: msg += '==' + '=' * len(case['case']) + '==\n' msg += ' # ' + case['case'] + ' #\n' msg += '==' + '=' * len(case['case']) + '==\n\n' for job in case['jobs']: msg += '\t > ' + job.get_report_string() + '\n' msg += '\n' m = Mailer(src='*****@*****.**', dst=emailaddr) m.send(status=status, msg=msg) except Exception as e: print_debug(e) logging.info("All processes complete")
def setup_local_hosting(self, job, img_src): """ Sets up the local directory for hosting diagnostic output """ msg = 'Setting up local hosting for {}'.format(job.type) self.event_list.push(message=msg, data=job) logging.info(msg) host_dir = job.config.get('web_dir') url = job.config.get('host_url') if os.path.exists(job.config.get('web_dir')): new_id = time.strftime("%Y-%m-%d-%I-%M") host_dir += '_' + new_id url += '_' + new_id job.config['host_url'] = url if not os.path.exists(img_src): msg = '{job} hosting failed, no image source at {path}'.format( job=job.type, path=img_src) logging.error(msg) return try: msg = 'copying images from {src} to {dst}'.format(src=img_src, dst=host_dir) logging.info(msg) copytree(src=img_src, dst=host_dir) while True: try: p = Popen(['chmod', '-R', '0755', host_dir]) except: sleep(1) else: break out, err = p.communicate() head, _ = os.path.split(host_dir) os.chmod(head, 0755) head, _ = os.path.split(head) os.chmod(head, 0755) except Exception as e: from lib.util import print_debug print_debug(e) msg = 'Error copying {0} to host directory {1}'.format( job.type, host_dir) self.event_list.push(message=msg, data=job) return
def all_data_local(self): """ Returns True if all data is local, False otherwise """ try: query = (DataFile .select() .where( (DataFile.local_status == FileStatus.NOT_PRESENT.value) | (DataFile.local_status == FileStatus.IN_TRANSIT.value))) missing_data = query.execute() # if any of the data is missing, not all data is local if missing_data: logging.debug('All data is not local, missing the following') logging.debug([x.name for x in missing_data]) return False except Exception as e: print_debug(e) logging.debug('All data is local') return True
def add_files(self, data_type, file_list): """ Add files to the database Parameters: data_type (str): the data_type of the new files file_list (list): a list of dictionaries in the format local_path (str): path to the file, case (str): the case these files belong to name (str): the filename remote_path (str): the remote path of these files, optional transfer_type (str): the transfer type of these files, optional year (int): the year of the file, optional month (int): the month of the file, optional remote_uuid (str): remote globus endpoint id, optional remote_hostname (str): remote hostname for sftp transfer, optional """ try: new_files = list() for file in file_list: new_files.append({ 'name': file['name'], 'local_path': file['local_path'], 'local_status': file.get('local_status', FileStatus.NOT_PRESENT.value), 'datatype': data_type, 'case': file['case'], 'year': file.get('year', 0), 'month': file.get('month', 0), 'remote_uuid': file.get('remote_uuid', ''), 'remote_hostname': file.get('remote_hostname', ''), 'remote_path': file.get('remote_path', ''), 'remote_status': FileStatus.NOT_PRESENT.value, 'local_size': 0, 'transfer_type': file.get('transfer_type', 'local') }) step = 50 for idx in range(0, len(new_files), step): DataFile.insert_many( new_files[idx: idx + step]).execute() except Exception as e: print_debug(e)
def get_file_paths_by_year(self, start_year, end_year, _type): self.mutex.acquire() try: if _type in [ 'rest', 'streams.ocean', 'streams.cice', 'mpas-cice_in', 'mpas-o_in', 'meridionalHeatTransport' ]: datafiles = DataFile.select().where(DataFile.datatype == _type) else: datafiles = DataFile.select().where( (DataFile.datatype == _type) & (DataFile.year >= start_year) & (DataFile.year <= end_year)) files = [x.local_path for x in datafiles] except Exception as e: print_debug(e) files = [] finally: if self.mutex.locked(): self.mutex.release() return files
def years_ready(self, start_year, end_year): """ Checks if atm files exist from start year to end of endyear Parameters: start_year (int): the first year to start checking end_year (int): the last year to check for Returns: -1 if no data present 0 if partial data present 1 if all data present """ data_ready = True non_zero_data = False self.mutex.acquire() try: datafiles = DataFile.select().where((DataFile.datatype == 'atm') & (DataFile.year >= start_year) & (DataFile.year <= end_year)) for datafile in datafiles: if datafile.local_status in [ filestatus['NOT_EXIST'], filestatus['IN_TRANSIT'] ]: data_ready = False else: non_zero_data = True except Exception as e: print_debug(e) finally: if self.mutex.locked(): self.mutex.release() if data_ready: return 1 elif not data_ready and non_zero_data: return 0 elif not data_ready and not non_zero_data: return -1
def transfer(sftp_client, file): """ Use a paramiko ssh client to transfer the files in file_list one at a time Parameters: sftp_client (paramiko.SFTPClient): the client to use for transport file (dict): a dict with keys remote_path, and local_path """ _, f_name = os.path.split(file['remote_path']) try: sftp_client.get(file['remote_path'], file['local_path']) except Exception as e: print_debug(e) msg = '{} transfer failed'.format(f_name) logging.error(msg) return False else: msg = '{} transfer successful'.format(f_name) logging.info(msg) return True
def update_local_status(self): """ Update the database with the local status of the expected files Return True if there was new local data found, False othewise """ try: query = (DataFile .select() .where( (DataFile.local_status == FileStatus.NOT_PRESENT.value) | (DataFile.local_status == FileStatus.IN_TRANSIT.value))) printed = False change = False for datafile in query.execute(): marked = False if os.path.exists(datafile.local_path): if datafile.local_status == FileStatus.NOT_PRESENT.value or datafile.local_status == FileStatus.IN_TRANSIT.value: datafile.local_status = FileStatus.PRESENT.value marked = True change = True else: if datafile.transfer_type == 'local': msg = '{case} transfer_type is local, but {filename} is not present'.format( case=datafile.case, filename=datafile.name) logging.error(msg) if not printed: print_line(msg, self._event_list) printed = True if datafile.local_status == FileStatus.PRESENT.value: datafile.local_status = FileStatus.NOT_PRESENT.value marked = True if marked: datafile.save() except Exception as e: print_debug(e) return change
def years_ready(self, data_type, start_year, end_year): """ Checks if data_type files exist from start year to end of endyear Parameters: start_year (int): the first year to start checking end_year (int): the last year to check for Returns: -1 if no data present 0 if partial data present 1 if all data present """ data_ready = True non_zero_data = False self._mutex.acquire() try: query = (DataFile.select().where((DataFile.datatype == data_type) & (DataFile.year >= start_year) & (DataFile.year <= end_year))) for datafile in query.execute(): if datafile.local_status != FileStatus.NOT_PRESENT.value: data_ready = False else: non_zero_data = True except Exception as e: print_debug(e) finally: if self._mutex.locked(): self._mutex.release() if data_ready: return 1 elif not data_ready and non_zero_data: return 0 elif not data_ready and not non_zero_data: return -1
def get_ll(client, remote_path): try: cmd = 'ls -la {}'.format(remote_path) stdin, stdout, stderr = client.exec_command(cmd) except Exception as e: print_debug(e) return None out = stdout.read().split('\n') ll = [] for item in out: file_info = filter(lambda x: x != '', item.split(' ')) if len(file_info) < 9: continue if file_info[0] == 'total': continue if file_info[-1] in ['.', '..']: continue ll.append({ 'permissions': file_info[0], 'num_links': file_info[1], 'owner': file_info[2], 'group': file_info[3], 'size': file_info[4], 'creation': ' '.join(file_info[5:7]), 'name': ' '.join(file_info[8:]) }) return ll
def populate_file_list(self, simstart, simend, experiment): """ Populate the database with the required DataFile entries Parameters: simstart (int): the start year of the simulation, simend (int): the end year of the simulation, experiment (str): the name of the experiment ex: 20170915.beta2.A_WCYCL1850S.ne30_oECv3_ICG.edison """ print 'Creating file table' if self.sta: print 'Using short term archive' else: print 'Short term archive turned off' if not self.start_year: self.start_year = simstart newfiles = [] with DataFile._meta.database.atomic(): for _type in self.types: if _type not in file_type_map: continue if _type == 'rest': self.populate_handle_rest(simstart, newfiles) elif _type in [ 'streams.ocean', 'streams.cice', 'mpas-o_in', 'mpas-cice_in' ]: self.populate_handle_mpas(_type, newfiles) elif _type == 'meridionalHeatTransport': self.populate_heat_transport(newfiles) else: local_base = os.path.join(self.local_path, _type) if not os.path.exists(local_base): os.makedirs(local_base) for year in xrange(simstart, simend + 1): for month in xrange(1, 13): if _type == 'atm': name = file_type_map[_type].replace( 'EXPERIMENT', experiment) else: name = file_type_map[_type] yearstr = '{0:04d}'.format(year) monthstr = '{0:02d}'.format(month) name = name.replace('YEAR', yearstr) name = name.replace('MONTH', monthstr) local_path = os.path.join(local_base, name) if self.sta: remote_path = os.path.join( self.remote_path, 'archive', _type, 'hist', name) else: remote_path = os.path.join( self.remote_path, name) newfiles = self._add_file(newfiles=newfiles, name=name, local_path=local_path, remote_path=remote_path, _type=_type, year=year, month=month) print 'Inserting file data into the table' self.mutex.acquire() try: step = 50 for idx in range(0, len(newfiles), step): DataFile.insert_many(newfiles[idx:idx + step]).execute() except Exception as e: print_debug(e) finally: if self.mutex.locked(): self.mutex.release() print 'Database update complete'
def transfer_needed(self, event_list, event): """ Start a transfer job for any files that arent local, but do exist remotely Globus user must already be logged in """ # required files dont exist locally, do exist remotely # or if they do exist locally have a different local and remote size target_files = list() try: q = (DataFile .select(DataFile.case) .where( DataFile.local_status == FileStatus.NOT_PRESENT.value)) caselist = [x.case for x in q.execute()] if not caselist or len(caselist) == 0: return cases = list() for case in caselist: if case not in cases: cases.append(case) for case in cases: q = (DataFile .select() .where( (DataFile.case == case) & (DataFile.local_status == FileStatus.NOT_PRESENT.value))) required_files = [x for x in q.execute()] for file in required_files: if file.transfer_type == 'local': required_files.remove(file) if not required_files: msg = 'ERROR: all missing files are marked as local' print_line(msg, self._event_list) return # mark files as in-transit so we dont double-copy q = (DataFile .update({DataFile.local_status: FileStatus.IN_TRANSIT}) .where(DataFile.name << [x.name for x in required_files])) q.execute() for file in required_files: target_files.append({ 'local_path': file.local_path, 'remote_path': file.remote_path, }) if required_files[0].transfer_type == 'globus': msg = 'Starting globus file transfer of {} files'.format( len(required_files)) print_line(msg, self._event_list) msg = 'See https://www.globus.org/app/activity for transfer details' print_line(msg, self._event_list) client = get_client() if not self.verify_remote_files(client=client, case=case): return False remote_uuid = required_files[0].remote_uuid local_uuid = self._config['global']['local_globus_uuid'] thread_name = '{}_globus_transfer'.format(required_files[0].case) _args = (client, remote_uuid, local_uuid, target_files, self.kill_event) thread = Thread( target=globus_transfer, name=thread_name, args=_args) self.thread_list.append(thread) thread.start() elif required_files[0].transfer_type == 'sftp': msg = 'Starting sftp file transfer of {} files'.format( len(required_files)) print_line(msg, self._event_list) client = get_ssh_client(required_files[0].remote_hostname) if not self.verify_remote_files(client=client, case=case): return False thread_name = '{}_sftp_transfer'.format(required_files[0].case) _args = (target_files, client, self.kill_event) thread = Thread( target=self._ssh_transfer, name=thread_name, args=_args) self.thread_list.append(thread) thread.start() except Exception as e: print_debug(e) return False
.where( (DataFile.case == case) & (DataFile.datatype == datatype))) datafiles = q.execute() for df in datafiles: if not os.path.exists(df.local_path) and df.local_status == FileStatus.PRESENT.value: df.local_status = FileStatus.NOT_PRESENT.value df.save() elif os.path.exists(df.local_path) and df.local_status == FileStatus.NOT_PRESENT.value: df.local_status = FileStatus.PRESENT.value df.save() if df.local_status != FileStatus.PRESENT.value: return False return True except Exception as e: print_debug(e) def render_file_string(self, data_type, data_type_option, case, year=None, month=None): """ Takes strings from the data_types dict and replaces the keywords with the appropriate values """ # setup the replacement dict start_year = int(self._config['simulations']['start_year']) end_year = int(self._config['simulations']['end_year']) replace = { 'PROJECT_PATH': self._config['global']['project_path'], 'REMOTE_PATH': self._config['simulations'][case].get('remote_path', ''), 'CASEID': case, 'REST_YR': '{:04d}'.format(start_year + 1), 'START_YR': '{:04d}'.format(start_year),
def main(test=False, **kwargs): """ Processflow main """ # The master configuration object config = {} # An event to kill the threads on terminal exception thread_kill_event = threading.Event() mutex = threading.Lock() # A flag to tell if we have all the data locally all_data = False all_data_remote = False # get a globus client client = get_client() # Read in parameters from config if test: print '==========================================' print '---- Processflow running in test mode ----' print '==========================================' _args = kwargs['testargs'] config, filemanager, runmanager = initialize( argv=_args, version=__version__, branch=__branch__, event_list=event_list, kill_event=thread_kill_event, mutex=mutex, testing=True) else: config, filemanager, runmanager = initialize( argv=sys.argv[1:], version=__version__, branch=__branch__, event_list=event_list, kill_event=thread_kill_event, mutex=mutex) # setup returned an error code if isinstance(config, int): print "Error in setup, exiting" return -1 logging.info('Config setup complete') debug = True if config['global'].get('debug') else False msg = "Updating local file status" print_line(line=msg, event_list=event_list) filemanager.update_local_status() all_data_local = filemanager.all_data_local() if not all_data_local: filemanager.transfer_needed(event_list=event_list, event=thread_kill_event) # msg = "Writing human readable state to file" # print_line(msg, event_list) # check if the case_scripts directory is present # if its not, transfer it over if config['global'].get('get_scripts'): pass # msg = 'transfering case_scripts from remote machine' # print_line( # line=msg, # event_list=event_list) # case_scripts_dir = os.path.join( # config['global']['input_path'], # 'case_scripts') # if not os.path.exists(case_scripts_dir): # logging.info(msg) # src_path = os.path.join( # config['global']['source_path'], 'case_scripts') # while True: # try: # args = { # 'source_endpoint': config['transfer']['source_endpoint'], # 'destination_endpoint': config['transfer']['destination_endpoint'], # 'src_path': src_path, # 'dst_path': case_scripts_dir, # 'event_list': event_list, # 'event': thread_kill_event # } # thread = threading.Thread( # target=transfer_directory, # name='transfer_directory', # kwargs=args) # except: # sleep(1) # else: # thread_list.append(thread) # thread.start() # break # Main loop printed = False loop_delay = 10 state_path = os.path.join(config['global']['project_path'], 'output', 'state.txt') try: print "--------------------------" print " Entering Main Loop " print " Status file: {}".format(state_path) print "--------------------------" while True: if not all_data_local: if debug: print_line(' -- Updating local status --', event_list) if filemanager.update_local_status(): msg = filemanager.report_files_local() print_line(msg, event_list) filemanager.write_database() all_data_local = filemanager.all_data_local() if not all_data_local: if debug: print_line(' -- Additional data needed --', event_list) filemanager.transfer_needed(event_list, thread_kill_event) if debug: print_line(' -- checking data -- ', event_list) runmanager.check_data_ready() if debug: print_line(' -- starting ready jobs --', event_list) runmanager.start_ready_jobs() if debug: print_line(' -- monitoring running jobs --', event_list) runmanager.monitor_running_jobs() if debug: print_line(' -- writing out state -- ', event_list) runmanager.write_job_sets(state_path) status = runmanager.is_all_done() # return -1 if still running # return 0 if a jobset failed # return 1 if all complete if status >= 0: msg = "Finishing up run" print_line(msg, event_list) printed = False while not filemanager.all_data_local(): if not printed: printed = True msg = 'Jobs are complete, but additional data is being transfered' print_line(msg, event_list) filemanager.update_local_status() if not filemanager.all_data_local(): filemanager.transfer_needed(event_list=event_list, event=thread_kill_event) sleep(10) filemanager.write_database() finalize(config=config, event_list=event_list, status=status, kill_event=thread_kill_event, runmanager=runmanager) # SUCCESS EXIT return 0 if debug: print_line(' -- sleeping', event_list) sleep(loop_delay) except KeyboardInterrupt as e: print_message('\n----- KEYBOARD INTERRUPT -----') runmanager.write_job_sets(state_path) filemanager.terminate_transfers() print_message('----- cleanup complete -----', 'ok') except Exception as e: print_message('----- AN UNEXPECTED EXCEPTION OCCURED -----') print_debug(e) runmanager.write_job_sets(state_path) filemanager.terminate_transfers()
def transfer_needed(self, event_list, event, remote_endpoint, ui, display_event, emailaddr, thread_list): """ Start a transfer job for any files that arent local, but do exist remotely Globus user must already be logged in Parameters: event_list (EventList): the list to push information into event (threadding.event): the thread event to trigger a cancel """ if self.active_transfers >= 2: return False # required files dont exist locally, do exist remotely # or if they do exist locally have a different local and remote size self.mutex.acquire() try: required_files = [ x for x in DataFile.select().where( (DataFile.remote_status == filestatus['EXISTS']) & (DataFile.local_status != filestatus['IN_TRANSIT']) & ((DataFile.local_status == filestatus['NOT_EXIST']) | (DataFile.local_size != DataFile.remote_size))) ] if len(required_files) == 0: return False target_files = [] target_size = 1e11 # 100 GB total_size = 0 for file in required_files: if total_size + file.remote_size < target_size: target_files.append({ 'name': file.name, 'local_size': file.local_size, 'local_path': file.local_path, 'local_status': file.local_status, 'remote_size': file.remote_size, 'remote_path': file.remote_path, 'remote_status': file.remote_status }) total_size += file.remote_size else: break except Exception as e: print_debug(e) return False finally: if self.mutex.locked(): self.mutex.release() logging.info('Transfering required files') print 'total transfer size {size} gigabytes for {nfiles} files'.format( size=(total_size / 1e9), nfiles=len(target_files)) transfer_config = { 'file_list': target_files, 'source_endpoint': self.remote_endpoint, 'destination_endpoint': self.local_endpoint, 'source_path': self.remote_path, 'destination_path': self.local_path, 'source_email': emailaddr, 'display_event': display_event, 'ui': ui, } transfer = Transfer(config=transfer_config, event_list=event_list) print 'starting transfer for:' transfer_names = [x['name'] for x in transfer.file_list] for file in transfer.file_list: print ' ' + file['name'] logging.info(file['name']) self.mutex.acquire() try: DataFile.update(local_status=filestatus['IN_TRANSIT']).where( DataFile.name << transfer_names).execute() print 'following files are in transit' for df in DataFile.select(): if df.local_status == filestatus['IN_TRANSIT']: print ' ' + df.name except Exception as e: print_debug(e) return False finally: if self.mutex.locked(): self.mutex.release() args = (transfer, event, event_list) thread = threading.Thread(target=self._handle_transfer, name='filemanager_transfer', args=args) thread_list.append(thread) thread.start() return True
def main(test=False, **kwargs): """ Processflow main Parameters: test (bool): turns on test mode. Simply stops the logger from reloading itself, which stops a crash when running from inside the test runner kwargs (dict): when running in test mode, arguments are passed directly through the kwargs which bypasses the argument parsing. """ # The master configuration object config = {} # An event to kill the threads on terminal exception thread_kill_event = threading.Event() # A flag to tell if we have all the data locally all_data = False all_data_remote = False # get a globus client client = get_client() # Read in parameters from config if test: print '==========================================' print '---- Processflow running in test mode ----' print '==========================================' _args = kwargs['testargs'] config, filemanager, runmanager = initialize( argv=_args, version=__version__, branch=__branch__, event_list=event_list, kill_event=thread_kill_event, testing=True) else: config, filemanager, runmanager = initialize( argv=sys.argv[1:], version=__version__, branch=__branch__, event_list=event_list, kill_event=thread_kill_event) # setup returned an error code if isinstance(config, int): print "Error in setup, exiting" return -1 logging.info('Config setup complete') debug = True if config['global'].get('debug') else False msg = "Updating local file status" print_line(line=msg, event_list=event_list) filemanager.update_local_status() all_data_local = filemanager.all_data_local() if not all_data_local: filemanager.transfer_needed(event_list=event_list, event=thread_kill_event) # Main loop printed = False loop_delay = 10 state_path = os.path.join(config['global']['project_path'], 'output', 'state.txt') try: print "--------------------------" print " Entering Main Loop " print " Status file: {}".format(state_path) print "--------------------------" while True: if not all_data_local: if debug: print_line(' -- Updating local status --', event_list) if filemanager.update_local_status(): msg = filemanager.report_files_local() print_line(msg, event_list) filemanager.write_database() all_data_local = filemanager.all_data_local() if not all_data_local: if debug: print_line(' -- Additional data needed --', event_list) filemanager.transfer_needed(event_list, thread_kill_event) if debug: print_line(' -- checking data -- ', event_list) runmanager.check_data_ready() if debug: print_line(' -- starting ready jobs --', event_list) runmanager.start_ready_jobs() if debug: print_line(' -- monitoring running jobs --', event_list) runmanager.monitor_running_jobs() if debug: print_line(' -- writing out state -- ', event_list) runmanager.write_job_sets(state_path) status = runmanager.is_all_done() # return -1 if still running # return 0 if a jobset failed # return 1 if all complete if status >= 0: msg = "Finishing up run" print_line(msg, event_list) printed = False while not filemanager.all_data_local(): if not printed: printed = True msg = 'Jobs are complete, but additional data is being transfered' print_line(msg, event_list) filemanager.update_local_status() if not filemanager.all_data_local(): filemanager.transfer_needed(event_list=event_list, event=thread_kill_event) sleep(10) filemanager.write_database() finalize(config=config, event_list=event_list, status=status, runmanager=runmanager) # SUCCESS EXIT return 0 if debug: print_line(' -- sleeping', event_list) sleep(loop_delay) except KeyboardInterrupt as e: print_message('\n----- KEYBOARD INTERRUPT -----') runmanager.write_job_sets(state_path) filemanager.terminate_transfers() print_message('----- cleanup complete -----', 'ok') except Exception as e: print_message('----- AN UNEXPECTED EXCEPTION OCCURED -----') print_debug(e) runmanager.write_job_sets(state_path) filemanager.terminate_transfers()
def update_remote_status(self, client): """ Check remote location for existance of the files on our list If they exist, update their status in the DB Parameters: client (globus_sdk.client): the globus client to use for remote query """ result = client.endpoint_autoactivate(self.remote_endpoint, if_expires_in=2880) if result['code'] == "AutoActivationFailed": return False if self.sta: for _type in self.types: if _type == 'rest': if not self.updated_rest: self.mutex.acquire() name, path, size = self.update_remote_rest_sta_path( client) DataFile.update( remote_status=filestatus['EXISTS'], remote_size=size, remote_path=path, name=name).where( DataFile.datatype == 'rest').execute() if self.mutex.locked(): self.mutex.release() self.updated_rest = True continue elif _type in [ 'streams.ocean', 'streams.cice', 'mpas-o_in', 'mpas-cice_in' ]: remote_path = os.path.join(self.remote_path, 'run') elif _type == 'meridionalHeatTransport': remote_path = os.path.join(self.remote_path, 'archive', 'ocn', 'hist') else: remote_path = os.path.join(self.remote_path, 'archive', _type, 'hist') print 'Querying globus for {}'.format(_type) res = self._get_ls(client=client, path=remote_path) self.mutex.acquire() try: names = [ x.name for x in DataFile.select().where( DataFile.datatype == _type) ] to_update_name = [ x['name'] for x in res if x['name'] in names ] to_update_size = [ x['size'] for x in res if x['name'] in names ] q = DataFile.update( remote_status=filestatus['EXISTS'], remote_size=to_update_size[to_update_name.index( DataFile.name)]).where( (DataFile.name << to_update_name) & (DataFile.datatype == _type)) n = q.execute() except Exception as e: print_debug(e) print "Do you have the correct start and end dates?" finally: if self.mutex.locked(): self.mutex.release() else: remote_path = self.remote_path res = self._get_ls(client=client, path=remote_path) self.mutex.acquire() try: for _type in self.types: names = [ x.name for x in DataFile.select().where( DataFile.datatype == _type) ] to_update_name = [ x['name'] for x in res if x['name'] in names ] to_update_size = [ x['size'] for x in res if x['name'] in names ] q = DataFile.update( remote_status=filestatus['EXISTS'], remote_size=to_update_size[to_update_name.index( DataFile.name)]).where( (DataFile.name << to_update_name) & (DataFile.datatype == _type)) n = q.execute() print 'updated {} records'.format(n) except Exception as e: print_debug(e) finally: if self.mutex.locked(): self.mutex.release()