Example #1
0
def wait_for_file_to_be_written_to(fpath,
                                   total_timeout=100000,
                                   time_frame=0.05):
    '''
    fpath: str
        path to file to check
    total_timeout: number
        total number of seconds before aborting the wait command
    time_frame: number
        number of seconds to wait between each check of file size.
    Purpose: Wait until a file that exists to have its filesize remains constant in
        a given time frame. It will not be constant if it is currently being written to.
    '''
    start_time = time_utils.gtime()
    while True:
        try:
            fsize = os.path.getsize(fpath)
            break
        except FileNotFoundError:
            pass
        if time_utils.gtime() - start_time > total_timeout:
            raise Exception(
                'file ' + fpath +
                ' still not done being written to after a total of ' +
                str(total_timeout) + ' seconds')
    time_utils.sleep(time_frame)
    while fsize != os.path.getsize(fpath) and fsize != 0:
        fsize = os.path.getsize(fpath)
        time_utils.sleep(time_frame)
        if time_utils.gtime() - start_time > total_timeout:
            raise Exception(
                'file ' + fpath +
                ' still not done being written to after a total of ' +
                str(total_timeout) + ' seconds')
Example #2
0
def safe_np_load(npy_fpath,
                 total_timeout=10000,
                 time_frame=0.05,
                 verbose=False,
                 check_file_done_being_written_to=True):
    '''
    npy_fpath: str
        Path to file that is loadable by np.load()

    total_timeout: number
        total number of seconds before aborting the wait command

    time_frame: number
        number of seconds to wait between each check of file size.

    verbose: bool
        Whether to print some log info

    check_file_done_being_written_to: bool
        Whether to check file size to determine if the file is being written to
        and thus unsafe to load.

    Return: np.array
        The contents of npy_fpath as loaded by np.load()

    Purpose: Check to make sure file exists before loading it. If DNE, wait until
        it does exist or your timeout is reached.
    '''
    start_time = time_utils.gtime()
    if check_file_done_being_written_to:
        wait_for_file_to_exist_and_written_to(npy_fpath,
                                              total_timeout=total_timeout,
                                              time_frame=time_frame)
    else:
        wait_for_file_to_exist(npy_fpath,
                               total_timeout=total_timeout,
                               time_frame=time_frame)
    if verbose:
        print(
            'took {} seconds to wait for file to exist and written to according to the function wait_for_file_to_exist_and_written_to'
            .format(time_utils.gtime() - start_time))
        start_time_load = time_utils.gtime()
    while time_utils.gtime() - start_time < total_timeout:
        try:
            npy = np.load(npy_fpath)
            if verbose:
                print('took {} seconds after file {} exists to load it'.format(
                    time_utils.gtime() - start_time_load, npy_fpath))
            return npy
        except ValueError:
            time_utils.sleep(time_frame)
    raise TimeoutError('total_timeout was reached in save_np_load')
Example #3
0
def read_fragile_csv(fpath):
    wait_for_file_to_be_written_to(fpath, total_timeout=1000, time_frame=0.1)
    read_success = False
    start_time = time_utils.gtime()
    while not read_success:
        try:
            df = pd.read_csv(fpath)
            read_success = True
        except:
            time_utils.sleep(0.1)
        if time_utils.gtime() - start_time > 1000:
            raise Exception(
                'Took more than 1000 seconds to try to read', tasks_fpath,
                '\nExpected the file to be existant and non-empty.')
    return df
Example #4
0
def wait_for_file_to_exist(fpath, total_timeout=100000, time_frame=0.05):
    '''
    fpath: str
        path to file to check
    total_timeout: number
        total number of seconds before aborting the wait command
    time_frame: number
        number of seconds to wait between each check of file size.
    Purpose: Wait until file exists for up to total_timeout seconds.
    '''
    start_time = time_utils.gtime()
    while not os.path.exists(fpath):
        if time_utils.gtime() - start_time > total_timeout:
            raise Exception('file ' + fpath + ' still DNE after a total of ' +
                            str(total_timeout) + ' seconds')
        time_utils.sleep(time_frame)
Example #5
0
def wait_for_file_to_vanish(fpath,
                            total_timeout=100000,
                            time_frame=0.05,
                            go_ahead_if_out_of_time=False):
    start_time = time_utils.gtime()
    if time_frame == 0:
        while os.path.exists(fpath):
            if time_utils.gtime(
            ) - start_time > total_timeout and not go_ahead_if_out_of_time:
                raise Exception('file ' + fpath +
                                ' still exists after a total of ' +
                                str(total_timeout) + ' seconds')
        return
    #wait until a file is removed by some other process
    while os.path.exists(fpath):
        #sleep a random amount of time to help prevent clashing (if multiple ranks)
        time_utils.sleep(random.uniform(time_frame, 1.1 * time_frame))
        if time_utils.gtime(
        ) - start_time > total_timeout and not go_ahead_if_out_of_time:
            raise Exception('file ' + fpath +
                            ' still exists after a total of ' +
                            str(total_timeout) + ' seconds')
Example #6
0
def lock_file(fpath,
              lockfile_message='locked',
              total_timeout=100000,
              time_frame=0.05,
              go_ahead_if_out_of_time=False):
    start_time = time_utils.gtime()
    wait_for_file_to_vanish(fpath,
                            total_timeout=total_timeout,
                            time_frame=time_frame,
                            go_ahead_if_out_of_time=go_ahead_if_out_of_time)
    read_lockfile_message = 'Nonelkjlkj'
    while read_lockfile_message != lockfile_message:
        with open(fpath, 'w') as f:
            f.write(lockfile_message)
        time_utils.sleep(0.05)
        try:
            with open(fpath) as f:
                read_lockfile_message = f.read()
        except:
            pass
        if time_utils.gtime(
        ) - start_time > total_timeout and not go_ahead_if_out_of_time:
            raise Exception('Took longer than total_timeout =', total_timeout,
                            'seconds to acquire lock file.')
Example #7
0
def get_new_task(lockfile_fpath, incomplete_tasks_fpath):
    lockfile_message = str(int(time_utils.gtime() * 10000))
    lock_file(lockfile_fpath,
              lockfile_message=lockfile_message,
              total_timeout=1000,
              time_frame=0.1,
              go_ahead_if_out_of_time=False)
    tasks_df = read_fragile_csv(incomplete_tasks_fpath)
    if len(tasks_df.values[len(tasks_df) - 1]) > 0:
        task_id = tasks_df.values[len(tasks_df) - 1][0]
    else:
        rm(lockfile_fpath)
        return None
    tasks_df.drop(index=len(tasks_df) - 1, inplace=True)
    tasks_df.to_csv(incomplete_tasks_fpath, index=False)
    num_incomplete_tasks = len(tasks_df)
    del tasks_df
    rm_file_with_message(lockfile_fpath, lockfile_message)
    return task_id
Example #8
0
def add_completed_task(lockfile_fpath,
                       complete_tasks_fpath,
                       task_id,
                       intermediate_func=None,
                       intermediate_args=[]):
    # Use lockfile for complete tasks to let me know this task_id was complete.
    lockfile_message = str(int(time_utils.gtime() * 10000))
    lock_file(lockfile_fpath,
              lockfile_message=lockfile_message,
              total_timeout=1000,
              time_frame=0.1,
              go_ahead_if_out_of_time=False)
    if os.path.exists(complete_tasks_fpath):
        tasks_df = read_fragile_csv(complete_tasks_fpath)
        tasks_df = tasks_df.append(pd.DataFrame({'task_id': [task_id]}))
    else:
        tasks_df = pd.DataFrame({'task_id': [task_id]})
    if intermediate_func is not None:
        intermediate_func(*intermediate_args)
    # Write to complete_tasks_fpath that this task is complete
    tasks_df.to_csv(complete_tasks_fpath, index=False)
    del tasks_df
    rm_file_with_message(lockfile_fpath, lockfile_message)