def _init_local(self): # Set up temp folder in shared directory (trim to max filename length) base_tmp_dir = self.shared_tmp_dir random_id = '%016x' % random.getrandbits(64) folder_name = self.task_id + '-' + random_id self.tmp_dir = os.path.join(base_tmp_dir, folder_name) max_filename_length = os.fstatvfs(0).f_namemax self.tmp_dir = self.tmp_dir[:max_filename_length] logger.info("Tmp dir: %s", self.tmp_dir) os.makedirs(self.tmp_dir) # Dump the code to be run into a pickle file logging.debug("Dumping pickled class") self._dump(self.tmp_dir) if not self.no_tarball: # Make sure that all the class's dependencies are tarred and available # This is not necessary if luigi is importable from the cluster node logging.debug("Tarballing dependencies") # Grab luigi and the module containing the code to be run packages = [luigi ] + [__import__(self.__module__, None, None, 'dummy')] create_packages_archive(packages, os.path.join(self.tmp_dir, "packages.tar"))
def _init_local(self): base_tmp_dir = self.shared_tmp_dir random_id = '%016x' % random.getrandbits(64) task_name = random_id + self.task_id # If any parameters are directories, if we don't # replace the separators on *nix, it'll create a weird nested directory task_name = task_name.replace("/", "::") # Max filename length max_filename_length = os.fstatvfs(0).f_namemax self.tmp_dir = os.path.join(base_tmp_dir, task_name[:max_filename_length]) LOGGER.info("Tmp dir: %s", self.tmp_dir) os.makedirs(self.tmp_dir) # Dump the code to be run into a pickle file LOGGER.debug("Dumping pickled class") self._dump(self.tmp_dir) # Make sure that all the class's dependencies are tarred and available LOGGER.debug("Tarballing dependencies") # Grab luigi and the module containing the code to be run packages = [luigi, __import__(self.__module__, None, None, 'dummy')] create_packages_archive(packages, os.path.join(self.tmp_dir, "packages.tar")) # Now, pass onto the class's specified init_local() method. self.init_local()
def _init_local(self): # Set up temp folder in shared directory (trim to max filename length) base_tmp_dir = self.shared_tmp_dir random_id = '%016x' % random.getrandbits(64) folder_name = self.task_id + '-' + random_id self.tmp_dir = os.path.join(base_tmp_dir, folder_name) max_filename_length = os.fstatvfs(0).f_namemax self.tmp_dir = self.tmp_dir[:max_filename_length] logger.info("Tmp dir: %s", self.tmp_dir) os.makedirs(self.tmp_dir) # Dump the code to be run into a pickle file logging.debug("Dumping pickled class") self._dump(self.tmp_dir) if not self.no_tarball: # Make sure that all the class's dependencies are tarred and available # This is not necessary if luigi is importable from the cluster node logging.debug("Tarballing dependencies") # Grab luigi and the module containing the code to be run packages = [luigi] + [__import__(self.__module__, None, None, 'dummy')] create_packages_archive(packages, os.path.join(self.tmp_dir, "packages.tar"))
def run(self): if self.run_locally == 1: return self.work() else: # Set up temp folder in shared directory (trim to max filename length) base_tmp_dir = self.shared_tmp_dir random_id = '%016x' % random.getrandbits(64) folder_name = self.task_id + '-' + random_id self.tmp_dir = os.path.join(base_tmp_dir, folder_name) max_filename_length = os.fstatvfs(0).f_namemax self.tmp_dir = self.tmp_dir[:max_filename_length] logger.info("Tmp dir: %s", self.tmp_dir) to_copy = [d for d in os.listdir() if d != ".git"] if not os.path.exists(self.tmp_dir): os.makedirs(self.tmp_dir) for f in to_copy: if os.path.isfile(f): copyfile(f, os.path.join(self.tmp_dir, f)) else: copytree(f, os.path.join(self.tmp_dir, f)) # Dump the code to be run into a pickle file logging.debug("Dumping pickled class") self._dump(self.tmp_dir) if not self.no_tarball: # Make sure that all the class's dependencies are tarred and available # This is not necessary if luigi is importable from the cluster node logging.debug("Tarballing dependencies") # Grab luigi and the module containing the code to be run packages = [luigi] + [ __import__(self.__module__, None, None, 'dummy') ] create_packages_archive( packages, os.path.join(self.tmp_dir, "packages.tar")) # make a stamp indicator in the folder # generate unique descriptive stamp for current commit get_commit_property = lambda s: subprocess.check_output( "git --no-pager log -1 --format=%{}".format( s), shell=True).decode("utf-8").strip() commit_time = get_commit_property("ci")[:16] \ .replace(" ", "-") \ .replace(":", "-") commit_author = get_commit_property("ce").split("@")[0] commit_hash = get_commit_property("h") stamp = "{}-{}-{}".format(commit_time, commit_author, commit_hash) # write out stamp to temp folder with open(os.path.join(self.tmp_dir, "stamp"), "w") as stamp_file: stamp_file.write(stamp) # Build a qsub argument that will run sge_runner.py on the directory # we've specified runner_path = os.path.join("utility", "sge_runner.py") if runner_path.endswith("pyc"): runner_path = runner_path[:-3] + "py" # enclose tmp_dir in quotes to protect from special escape chars job_str = 'python {0} "{1}" "{2}"'.format(runner_path, self.tmp_dir, os.getcwd()) if self.no_tarball: job_str += ' "--no-tarball"' qsub_template = """echo {cmd} | {qsub_command} -V -r y -pe {pe} {n_cpu} -N {job_name} -l m_mem_free={mem_free} -sync y""" submit_cmd = qsub_template.format(cmd=job_str, job_name=self.job_name, pe=self.parallel_env, n_cpu=self.n_cpu, mem_free=self.mem_free, qsub_command=self.qsub_command) logger.debug('qsub command: \n' + submit_cmd) # Submit the job and grab job ID try: output = subprocess.check_output(submit_cmd, shell=True, stderr=subprocess.STDOUT) logger.debug("qsub job complete with output:\n" + output.decode('utf-8')) except subprocess.CalledProcessError as e: logger.error("qsub submission failed with output:\n" + e.output.decode('utf-8')) if os.path.exists(os.path.join(self.tmp_dir, "job.err")): with open(os.path.join(self.tmp_dir, "job.err"), "r") as err: logger.error(err.read()) # wait a beat, to give things a chance to settle time.sleep(2) # check whether the file exists if not self.output().exists(): raise Exception("qsub failed to produce output") else: # delete the temporaries, if they're there. if self.tmp_dir and os.path.exists(self.tmp_dir): logger.info('Removing temporary directory %s', self.tmp_dir) subprocess.call(["rm", "-rf", self.tmp_dir])