def wait_for_file_to_be_written_to(fpath, total_timeout=100000, time_frame=0.05): ''' fpath: str path to file to check total_timeout: number total number of seconds before aborting the wait command time_frame: number number of seconds to wait between each check of file size. Purpose: Wait until a file that exists to have its filesize remains constant in a given time frame. It will not be constant if it is currently being written to. ''' start_time = time_utils.gtime() while True: try: fsize = os.path.getsize(fpath) break except FileNotFoundError: pass if time_utils.gtime() - start_time > total_timeout: raise Exception( 'file ' + fpath + ' still not done being written to after a total of ' + str(total_timeout) + ' seconds') time_utils.sleep(time_frame) while fsize != os.path.getsize(fpath) and fsize != 0: fsize = os.path.getsize(fpath) time_utils.sleep(time_frame) if time_utils.gtime() - start_time > total_timeout: raise Exception( 'file ' + fpath + ' still not done being written to after a total of ' + str(total_timeout) + ' seconds')
def safe_np_load(npy_fpath, total_timeout=10000, time_frame=0.05, verbose=False, check_file_done_being_written_to=True): ''' npy_fpath: str Path to file that is loadable by np.load() total_timeout: number total number of seconds before aborting the wait command time_frame: number number of seconds to wait between each check of file size. verbose: bool Whether to print some log info check_file_done_being_written_to: bool Whether to check file size to determine if the file is being written to and thus unsafe to load. Return: np.array The contents of npy_fpath as loaded by np.load() Purpose: Check to make sure file exists before loading it. If DNE, wait until it does exist or your timeout is reached. ''' start_time = time_utils.gtime() if check_file_done_being_written_to: wait_for_file_to_exist_and_written_to(npy_fpath, total_timeout=total_timeout, time_frame=time_frame) else: wait_for_file_to_exist(npy_fpath, total_timeout=total_timeout, time_frame=time_frame) if verbose: print( 'took {} seconds to wait for file to exist and written to according to the function wait_for_file_to_exist_and_written_to' .format(time_utils.gtime() - start_time)) start_time_load = time_utils.gtime() while time_utils.gtime() - start_time < total_timeout: try: npy = np.load(npy_fpath) if verbose: print('took {} seconds after file {} exists to load it'.format( time_utils.gtime() - start_time_load, npy_fpath)) return npy except ValueError: time_utils.sleep(time_frame) raise TimeoutError('total_timeout was reached in save_np_load')
def read_fragile_csv(fpath): wait_for_file_to_be_written_to(fpath, total_timeout=1000, time_frame=0.1) read_success = False start_time = time_utils.gtime() while not read_success: try: df = pd.read_csv(fpath) read_success = True except: time_utils.sleep(0.1) if time_utils.gtime() - start_time > 1000: raise Exception( 'Took more than 1000 seconds to try to read', tasks_fpath, '\nExpected the file to be existant and non-empty.') return df
def wait_for_file_to_exist(fpath, total_timeout=100000, time_frame=0.05): ''' fpath: str path to file to check total_timeout: number total number of seconds before aborting the wait command time_frame: number number of seconds to wait between each check of file size. Purpose: Wait until file exists for up to total_timeout seconds. ''' start_time = time_utils.gtime() while not os.path.exists(fpath): if time_utils.gtime() - start_time > total_timeout: raise Exception('file ' + fpath + ' still DNE after a total of ' + str(total_timeout) + ' seconds') time_utils.sleep(time_frame)
def wait_for_file_to_vanish(fpath, total_timeout=100000, time_frame=0.05, go_ahead_if_out_of_time=False): start_time = time_utils.gtime() if time_frame == 0: while os.path.exists(fpath): if time_utils.gtime( ) - start_time > total_timeout and not go_ahead_if_out_of_time: raise Exception('file ' + fpath + ' still exists after a total of ' + str(total_timeout) + ' seconds') return #wait until a file is removed by some other process while os.path.exists(fpath): #sleep a random amount of time to help prevent clashing (if multiple ranks) time_utils.sleep(random.uniform(time_frame, 1.1 * time_frame)) if time_utils.gtime( ) - start_time > total_timeout and not go_ahead_if_out_of_time: raise Exception('file ' + fpath + ' still exists after a total of ' + str(total_timeout) + ' seconds')
def lock_file(fpath, lockfile_message='locked', total_timeout=100000, time_frame=0.05, go_ahead_if_out_of_time=False): start_time = time_utils.gtime() wait_for_file_to_vanish(fpath, total_timeout=total_timeout, time_frame=time_frame, go_ahead_if_out_of_time=go_ahead_if_out_of_time) read_lockfile_message = 'Nonelkjlkj' while read_lockfile_message != lockfile_message: with open(fpath, 'w') as f: f.write(lockfile_message) time_utils.sleep(0.05) try: with open(fpath) as f: read_lockfile_message = f.read() except: pass if time_utils.gtime( ) - start_time > total_timeout and not go_ahead_if_out_of_time: raise Exception('Took longer than total_timeout =', total_timeout, 'seconds to acquire lock file.')
def get_new_task(lockfile_fpath, incomplete_tasks_fpath): lockfile_message = str(int(time_utils.gtime() * 10000)) lock_file(lockfile_fpath, lockfile_message=lockfile_message, total_timeout=1000, time_frame=0.1, go_ahead_if_out_of_time=False) tasks_df = read_fragile_csv(incomplete_tasks_fpath) if len(tasks_df.values[len(tasks_df) - 1]) > 0: task_id = tasks_df.values[len(tasks_df) - 1][0] else: rm(lockfile_fpath) return None tasks_df.drop(index=len(tasks_df) - 1, inplace=True) tasks_df.to_csv(incomplete_tasks_fpath, index=False) num_incomplete_tasks = len(tasks_df) del tasks_df rm_file_with_message(lockfile_fpath, lockfile_message) return task_id
def add_completed_task(lockfile_fpath, complete_tasks_fpath, task_id, intermediate_func=None, intermediate_args=[]): # Use lockfile for complete tasks to let me know this task_id was complete. lockfile_message = str(int(time_utils.gtime() * 10000)) lock_file(lockfile_fpath, lockfile_message=lockfile_message, total_timeout=1000, time_frame=0.1, go_ahead_if_out_of_time=False) if os.path.exists(complete_tasks_fpath): tasks_df = read_fragile_csv(complete_tasks_fpath) tasks_df = tasks_df.append(pd.DataFrame({'task_id': [task_id]})) else: tasks_df = pd.DataFrame({'task_id': [task_id]}) if intermediate_func is not None: intermediate_func(*intermediate_args) # Write to complete_tasks_fpath that this task is complete tasks_df.to_csv(complete_tasks_fpath, index=False) del tasks_df rm_file_with_message(lockfile_fpath, lockfile_message)