Example #1
0
    def build_many(self, outfile_infiles_changed_context):
        """We can build all the npm files with just one npm command!"""
        # It would be faster to load just the input node modules,
        # instead of all of them, but it's too difficult to know
        # what module to load to get a given binary.  Ah well.
        # We run 'npm install' in the directory holding node_modules/.
        #
        # We just do this on a best-effort basis, since the user might
        # not even be connected to the internet.  If it fails, we
        # depend on the sanity-checking below to make sure we actually
        # built what we needed to.
        #
        # It's bad if two 'npm install' calls happen at the same time,
        # so I use file-locking to prevent that.  If I were cool I
        # could lock on the package-json file, to allow independent
        # npm installs to happen at the same time.
        lockfile = os.path.join(tempfile.gettempdir(), 'npm_install.lock')
        with open(lockfile, 'w') as f:
            fcntl.lockf(f, fcntl.LOCK_EX)
            (rc, _,
             _) = self.try_call_with_output(['npm', 'install', '--no-save'],
                                            stderr=None,
                                            stdout=None)

        # This is just sanity-checking that we built what we needed to.
        for (outfile_name, infile_names, _,
             context) in (outfile_infiles_changed_context):
            if not os.path.exists(self.abspath(outfile_name)):
                if rc != 0:
                    msg = 'Could not run "npm install"'
                else:
                    msg = 'Need to add %s to %s' % (outfile_name,
                                                    infile_names[0])
                raise compile_rule.CompileFailure(msg)
Example #2
0
    def build(self, outfile_name, infile_names, changed, context):
        """Download .index and .chunk files from prod.

        CompilePOFile takes a long time to compute.  So when not on jenkins we
        call this rule instead to fetch from prod what is there.
        """
        if self._locale_paths is None:
            self._init_locale_paths()

        log.v2("Determining latest prod translation files for %s" %
               context['{lang}'])

        locale = context['{lang}']
        locale_path = 'gs://ka_translations/%s/' % locale
        if locale_path not in self.locale_paths:
            raise NoSuchLocaleCompileFailure(locale)

        try:
            stdout = self.call_with_output(['gsutil', 'ls', locale_path])
        except compile_rule.CompileFailure, e:
            # TODO(james): make sure we download gcloud and gsutil as part
            # of the khan-dotfiles setup.
            raise compile_rule.CompileFailure(
                "%s.\nFailed to download translations from gcs. Make sure "
                "that you have gsutil installed via gcloud." % e)
Example #3
0
 def _init_locale_paths(self):
     try:
         self.locale_paths = self.call_with_output(
             ['gsutil', 'ls', 'gs://ka_translations']).split()
     except compile_rule.CompileFailure, e:
         raise compile_rule.CompileFailure(
             "%s.\nFailed to download translations from gcs. Make sure "
             "that you have gsutil installed via gcloud." % e)
Example #4
0
 def _munge_sys_path():
     """Modify sys.path so we can load the git-bigfile library."""
     # First, find out where git-bigfile lives.  It lives on the
     # path, so we can just look for that.
     for pathdir in os.environ['PATH'].split(':'):
         if os.path.exists(os.path.join(pathdir, 'git-bigfile')):
             sys.path.append(os.path.dirname(pathdir))
             return
     raise compile_rule.CompileFailure(
         "Can't find git-bigfile in %s" % os.environ['PATH'])
Example #5
0
class DownloadIndex(compile_rule.CompileBase):
    def __init__(self):
        super(DownloadIndex, self).__init__()
        self._locale_paths = None

    def version(self):
        """Update every time build() changes in a way that affects output."""
        import datetime
        # Force redownloading once a month.
        return datetime.datetime.now().strftime("%Y-%m")

    def build(self, outfile_name, infile_names, changed, context):
        """Download .index and .chunk files from prod.

        CompilePOFile takes a long time to compute.  So when not on jenkins we
        call this rule instead to fetch from prod what is there.
        """
        if self._locale_paths is None:
            self._init_locale_paths()

        log.v2("Determining latest prod translation files for %s" %
               context['{lang}'])

        locale = context['{lang}']
        locale_path = 'gs://ka_translations/%s/' % locale
        if locale_path not in self.locale_paths:
            raise NoSuchLocaleCompileFailure(locale)

        try:
            stdout = self.call_with_output(['gsutil', 'ls', locale_path])
        except compile_rule.CompileFailure, e:
            # TODO(james): make sure we download gcloud and gsutil as part
            # of the khan-dotfiles setup.
            raise compile_rule.CompileFailure(
                "%s.\nFailed to download translations from gcs. Make sure "
                "that you have gsutil installed via gcloud." % e)
        dirs = stdout.split()

        if dirs:
            most_recent_dir = dirs[-1]
            log.v2("Downloading latest prod files from %s" %
                   most_recent_dir)
            self.call(
                ['gsutil', '-m', 'cp', '-r', "%s*" % most_recent_dir,
                 os.path.dirname(outfile_name)])

            return

        # No translation files found on gcs ... lets complain
        raise compile_rule.CompileFailure(
            "Failed to find translation files for %s on gcs" %
            context['{lang}'])
Example #6
0
    def build_many(self, outfile_infiles_changed_context):
        from shared.testutil import fake_datetime

        sha_to_files = {}            # for the files we need to get from S3
        for (outfile, infiles, _, context) in outfile_infiles_changed_context:
            assert len(infiles) == 1, infiles
            assert infiles[0].startswith('intl/translations/')

            with open(self.abspath(infiles[0])) as f:
                head = f.read(64).strip()

            # Does the head look like a sha1?  (sha1's are only 40 bytes.)
            # If so, store it for later.  If not, take care of it now.
            if head.strip('0123456789abcdefABCDEF') == '':
                sha_to_files.setdefault(head, []).append(outfile)
            else:
                # Nope, not a sha1.  NOTE: We could also use a hard-link,
                # but that could fail if genfiles is on a different
                # filesystem from the source.  Copying is more expensive
                # but safer.  Symlinks are right out.
                shutil.copyfile(self.abspath(infiles[0]),
                                self.abspath(outfile))

        if not sha_to_files:
            return

        # We could just call 'git bigfile pull' but we purposefully
        # don't so as to leave untouched the file-contents in
        # intl/translations.  This works better with kake, which
        # doesn't like it when input contents change as part of a kake
        # rule.
        self._munge_sys_path()     # so the following import succeeds
        import gitbigfile.command

        # Download all our files from S3 in parallel.  We store these
        # files under a 'permanent' name based on the sha1.  (Later
        # we'll copy these files to outfile_name.)  That way even if
        # you check out a different branch and come back to this one
        # again, you can get the old contents without needing to
        # revisit S3.
        # GitBigfile() (in _download_from_s3) runs 'git' commands in a
        # subprocess, so we need to be in the right repository for that.
        old_cwd = os.getcwd()
        os.chdir(self.abspath('intl/translations'))
        try:
            # This will actually try to download translation files via
            # bigfile.  This requires a real datetime for making the
            # api requests to S3 (S3 complains about weird dates).
            with fake_datetime.suspend_fake_datetime():
                arglists = []
                for (sha, outfiles) in sha_to_files.iteritems():
                    # Typically a given sha will have only one outfile,
                    # but for some shas (an empty po-file, e.g.), many
                    # outfiles may share the same sha!
                    log.v1('Fetching %s from S3' % ' '.join(outfiles))
                    # We just need to put this in a directory we know we
                    # can write to: take one of the outfile dirs arbitrarily.
                    sha_name = os.path.join(os.path.dirname(outfiles[0]), sha)
                    arglists.append(
                        (gitbigfile.command, self.abspath(sha_name), sha))
                shared.util.thread.run_many_threads(
                    self._download_from_s3, arglists)
        except RuntimeError as why:
            log.error(why)    # probably misleading, but maybe helpful
            # TODO(csilvers): check whether git-bigfile *is* set up
            # correctly, and give a more precise failure message if so.
            raise compile_rule.CompileFailure(
                "Failed to download translation file for %s from S3. "
                "Make sure you have git-bigfile set up as per the "
                "configs in the khan-dotfiles repo: namely, the "
                "'bigfile' section in .gitconfig.khan, and the "
                "update_credentials() section in setup.sh." % outfile)
        finally:
            os.chdir(old_cwd)

        # Now copy from the sha-name to the actual output filename.
        for (sha, outfiles) in sha_to_files.iteritems():
            sha_name = os.path.join(os.path.dirname(outfiles[0]), sha)
            for outfile in outfiles:
                log.v2('Copying from %s to %s' % (sha_name, outfile))
                try:
                    os.unlink(self.abspath(outfile))
                except OSError:
                    pass     # probably file not found
                os.link(self.abspath(sha_name), self.abspath(outfile))