def test_update_missingfile(self): data1 = {'a': 1, 'b': 2} output_file = os.path.join(fixture_dir, 'foo2_{0}.json'.format(t.timestamp())) exists_before = t.item_exists(output_file) t.update_json(data=data1, input_file=output_file) exists_after = t.item_exists(output_file) self.assertTrue( exists_after and not exists_before, 'File was not created correctly by the update JSON function')
def submit(verbose = False, log_dir = None, monitor = False, validate = False, *args, **kwargs): """ Submits a shell command to be run as a `qsub` compute job. Returns a `Job` object. Passes args and kwargs to `submit_job`. Compute jobs are created by assembling a `qsub` shell command using a bash heredoc wrapped around the provided shell command to be executed. The numeric job ID and job name echoed by `qsub` on stdout will be captured and used to generate a 'Job' object. Parameters ---------- verbose: bool `True` or `False`, whether or not the generated `qsub` command should be printed in log output log_dir: str the directory to use for qsub job log output files, defaults to the current working directory monitor: bool whether the job should be immediately monitored until completion validate: bool whether or not the job should immediately be validated upon completion *args: list list of arguments to pass on to `submit_job` **kwargs: dict dictionary of args to pass on to `submit_job` Returns ------- Job a `Job` object, representing a `qsub` compute job that has been submitted to the HPC cluster Examples -------- Example usage:: job = submit(command = 'echo foo') job = submit(command = 'echo foo', log_dir = "logs", print_verbose = True, monitor = True, validate = True) """ # check if log_dir was passed if log_dir: # create the dir if it doesnt exist already tools.mkdirs(log_dir) # only continue if the log_dir exists now if not tools.item_exists(item = log_dir, item_type = 'dir'): logger.warning('log_dir does not exist and will not be used for qsub job submission; {0}'.format(log_dir)) else: # resolve the path to the full, expanded, absolute, real path - bad log_dir paths break job submissions easily log_dir = os.path.realpath(os.path.expanduser(log_dir)) stdout_log_dir = log_dir stderr_log_dir = log_dir kwargs.update({ 'stdout_log_dir': stdout_log_dir, 'stderr_log_dir': stderr_log_dir }) proc_stdout = submit_job(return_stdout = True, verbose = verbose, *args, **kwargs) job_id, job_name = get_job_ID_name(proc_stdout) job = Job(id = job_id, name = job_name, log_dir = log_dir) # optionally, monitor the job to completion if monitor: monitor_jobs(jobs = [job], **kwargs) # optionally, validate the job completion if validate: job.validate_completion() return(job)
def validate(input_file): """ Validates a file for annotation. Makes sure that the file meets valdation criteria Parameters ---------- input_file: str the path to a file to be validated. Returns ------- bool either ``True`` or ``False`` if the file passed validation Notes ----- Criteria: - file must exist - file must have >0 lines """ # check file existence if not tools.item_exists(item = input_file, item_type = 'file'): logger.warning('File does not exist and will not be annotated: {0}'.format(input_file)) return(False) # check number if lines num_lines = tools.num_lines(input_file) if not num_lines > 0: logger.warning('File has {0} lines and will not be annotated: {1}'.format(num_lines, input_file)) return(False) # return the boolean value from the filetype specific validations return(filetype_validation(input_file))
def get_log_file(self, _type = 'stdout'): """ Returns the expected path to the job's log file Parameters ---------- _type: str either 'stdout' or 'stderr', representing the type of log path to generate Notes ----- A stdout log file basename for a compute job with an ID of `4088513` and a name of `python` would look like this: `python.o4088513` The corresponding stderr log name would look like: `python.e4088513` """ if not self.log_dir: logger.warning('log_dir attribute is not set for this qsub job: {0}'.format((self.id, self.name))) return(None) type_key = {'stdout': '.o', 'stderr': '.e'} type_char = type_key[_type] logfile = str(self.name) + type_char + str(self.id) log_path = os.path.join(str(self.log_dir), logfile) if not tools.item_exists(log_path): logger.warning('Log file does not appear to exist: {0}'.format(log_path)) return(log_path)
def test_item_wrong_type(self): exists = t.item_exists(item=scriptfile, item_type='dir') self.assertFalse(exists)
def test_item_should_not_exist_file(self): item = "foobarbaznotarealfilenamefakefilegoeshere" exists = t.item_exists(item=item, item_type='file') self.assertFalse(exists)
def test_item_should_exist_dir(self): exists = t.item_exists(item=scriptdir, item_type='dir') self.assertTrue(exists)
def test_item_should_exist_file(self): exists = t.item_exists(item=scriptfile, item_type='file') self.assertTrue(exists)