def _init_local(self):

        # Set up temp folder in shared directory (trim to max filename length)
        base_tmp_dir = self.shared_tmp_dir
        random_id = '%016x' % random.getrandbits(64)
        folder_name = self.task_id + '-' + random_id
        self.tmp_dir = os.path.join(base_tmp_dir, folder_name)
        max_filename_length = os.fstatvfs(0).f_namemax
        self.tmp_dir = self.tmp_dir[:max_filename_length]
        logger.info("Tmp dir: %s", self.tmp_dir)
        os.makedirs(self.tmp_dir)

        # Dump the code to be run into a pickle file
        logging.debug("Dumping pickled class")
        self._dump(self.tmp_dir)

        if not self.no_tarball:
            # Make sure that all the class's dependencies are tarred and available
            # This is not necessary if luigi is importable from the cluster node
            logging.debug("Tarballing dependencies")
            # Grab luigi and the module containing the code to be run
            packages = [luigi
                        ] + [__import__(self.__module__, None, None, 'dummy')]
            create_packages_archive(packages,
                                    os.path.join(self.tmp_dir, "packages.tar"))
Exemple #2
0
    def _init_local(self):

        base_tmp_dir = self.shared_tmp_dir

        random_id = '%016x' % random.getrandbits(64)
        task_name = random_id + self.task_id
        # If any parameters are directories, if we don't
        # replace the separators on *nix, it'll create a weird nested directory
        task_name = task_name.replace("/", "::")

        # Max filename length
        max_filename_length = os.fstatvfs(0).f_namemax
        self.tmp_dir = os.path.join(base_tmp_dir, task_name[:max_filename_length])

        LOGGER.info("Tmp dir: %s", self.tmp_dir)
        os.makedirs(self.tmp_dir)

        # Dump the code to be run into a pickle file
        LOGGER.debug("Dumping pickled class")
        self._dump(self.tmp_dir)

        # Make sure that all the class's dependencies are tarred and available
        LOGGER.debug("Tarballing dependencies")
        # Grab luigi and the module containing the code to be run
        packages = [luigi, __import__(self.__module__, None, None, 'dummy')]
        create_packages_archive(packages, os.path.join(self.tmp_dir, "packages.tar"))

        # Now, pass onto the class's specified init_local() method.
        self.init_local()
Exemple #3
0
    def _init_local(self):

        # Set up temp folder in shared directory (trim to max filename length)
        base_tmp_dir = self.shared_tmp_dir
        random_id = '%016x' % random.getrandbits(64)
        folder_name = self.task_id + '-' + random_id
        self.tmp_dir = os.path.join(base_tmp_dir, folder_name)
        max_filename_length = os.fstatvfs(0).f_namemax
        self.tmp_dir = self.tmp_dir[:max_filename_length]
        logger.info("Tmp dir: %s", self.tmp_dir)
        os.makedirs(self.tmp_dir)

        # Dump the code to be run into a pickle file
        logging.debug("Dumping pickled class")
        self._dump(self.tmp_dir)

        if not self.no_tarball:
            # Make sure that all the class's dependencies are tarred and available
            # This is not necessary if luigi is importable from the cluster node
            logging.debug("Tarballing dependencies")
            # Grab luigi and the module containing the code to be run
            packages = [luigi] + [__import__(self.__module__, None, None, 'dummy')]
            create_packages_archive(packages, os.path.join(self.tmp_dir, "packages.tar"))
Exemple #4
0
    def run(self):
        if self.run_locally == 1:
            return self.work()
        else:

            # Set up temp folder in shared directory (trim to max filename length)
            base_tmp_dir = self.shared_tmp_dir
            random_id = '%016x' % random.getrandbits(64)
            folder_name = self.task_id + '-' + random_id
            self.tmp_dir = os.path.join(base_tmp_dir, folder_name)
            max_filename_length = os.fstatvfs(0).f_namemax
            self.tmp_dir = self.tmp_dir[:max_filename_length]
            logger.info("Tmp dir: %s", self.tmp_dir)

            to_copy = [d for d in os.listdir() if d != ".git"]
            if not os.path.exists(self.tmp_dir):
                os.makedirs(self.tmp_dir)

            for f in to_copy:
                if os.path.isfile(f):
                    copyfile(f, os.path.join(self.tmp_dir, f))
                else:
                    copytree(f, os.path.join(self.tmp_dir, f))

            # Dump the code to be run into a pickle file
            logging.debug("Dumping pickled class")
            self._dump(self.tmp_dir)

            if not self.no_tarball:
                # Make sure that all the class's dependencies are tarred and available
                # This is not necessary if luigi is importable from the cluster node
                logging.debug("Tarballing dependencies")
                # Grab luigi and the module containing the code to be run
                packages = [luigi] + [
                    __import__(self.__module__, None, None, 'dummy')
                ]
                create_packages_archive(
                    packages, os.path.join(self.tmp_dir, "packages.tar"))

            # make a stamp indicator in the folder
            # generate unique descriptive stamp for current commit
            get_commit_property = lambda s: subprocess.check_output(
                "git --no-pager log -1 --format=%{}".format(
                    s), shell=True).decode("utf-8").strip()

            commit_time   = get_commit_property("ci")[:16] \
                .replace(" ", "-") \
                .replace(":", "-")

            commit_author = get_commit_property("ce").split("@")[0]
            commit_hash = get_commit_property("h")
            stamp = "{}-{}-{}".format(commit_time, commit_author, commit_hash)

            # write out stamp to temp folder
            with open(os.path.join(self.tmp_dir, "stamp"), "w") as stamp_file:
                stamp_file.write(stamp)

            # Build a qsub argument that will run sge_runner.py on the directory
            # we've specified
            runner_path = os.path.join("utility", "sge_runner.py")
            if runner_path.endswith("pyc"):
                runner_path = runner_path[:-3] + "py"
            # enclose tmp_dir in quotes to protect from special escape chars
            job_str = 'python {0} "{1}" "{2}"'.format(runner_path,
                                                      self.tmp_dir,
                                                      os.getcwd())
            if self.no_tarball:
                job_str += ' "--no-tarball"'

            qsub_template = """echo {cmd} | {qsub_command} -V -r y -pe {pe} {n_cpu} -N {job_name} -l m_mem_free={mem_free} -sync y"""

            submit_cmd = qsub_template.format(cmd=job_str,
                                              job_name=self.job_name,
                                              pe=self.parallel_env,
                                              n_cpu=self.n_cpu,
                                              mem_free=self.mem_free,
                                              qsub_command=self.qsub_command)

            logger.debug('qsub command: \n' + submit_cmd)

            # Submit the job and grab job ID
            try:
                output = subprocess.check_output(submit_cmd,
                                                 shell=True,
                                                 stderr=subprocess.STDOUT)

                logger.debug("qsub job complete with output:\n" +
                             output.decode('utf-8'))
            except subprocess.CalledProcessError as e:
                logger.error("qsub submission failed with output:\n" +
                             e.output.decode('utf-8'))
                if os.path.exists(os.path.join(self.tmp_dir, "job.err")):
                    with open(os.path.join(self.tmp_dir, "job.err"),
                              "r") as err:
                        logger.error(err.read())

            # wait a beat, to give things a chance to settle
            time.sleep(2)

            # check whether the file exists
            if not self.output().exists():
                raise Exception("qsub failed to produce output")
            else:
                # delete the temporaries, if they're there.
                if self.tmp_dir and os.path.exists(self.tmp_dir):
                    logger.info('Removing temporary directory %s',
                                self.tmp_dir)
                    subprocess.call(["rm", "-rf", self.tmp_dir])