def build_many(self, outfile_infiles_changed_context): """We can build all the npm files with just one npm command!""" # It would be faster to load just the input node modules, # instead of all of them, but it's too difficult to know # what module to load to get a given binary. Ah well. # We run 'npm install' in the directory holding node_modules/. # # We just do this on a best-effort basis, since the user might # not even be connected to the internet. If it fails, we # depend on the sanity-checking below to make sure we actually # built what we needed to. # # It's bad if two 'npm install' calls happen at the same time, # so I use file-locking to prevent that. If I were cool I # could lock on the package-json file, to allow independent # npm installs to happen at the same time. lockfile = os.path.join(tempfile.gettempdir(), 'npm_install.lock') with open(lockfile, 'w') as f: fcntl.lockf(f, fcntl.LOCK_EX) (rc, _, _) = self.try_call_with_output(['npm', 'install', '--no-save'], stderr=None, stdout=None) # This is just sanity-checking that we built what we needed to. for (outfile_name, infile_names, _, context) in (outfile_infiles_changed_context): if not os.path.exists(self.abspath(outfile_name)): if rc != 0: msg = 'Could not run "npm install"' else: msg = 'Need to add %s to %s' % (outfile_name, infile_names[0]) raise compile_rule.CompileFailure(msg)
def build(self, outfile_name, infile_names, changed, context): """Download .index and .chunk files from prod. CompilePOFile takes a long time to compute. So when not on jenkins we call this rule instead to fetch from prod what is there. """ if self._locale_paths is None: self._init_locale_paths() log.v2("Determining latest prod translation files for %s" % context['{lang}']) locale = context['{lang}'] locale_path = 'gs://ka_translations/%s/' % locale if locale_path not in self.locale_paths: raise NoSuchLocaleCompileFailure(locale) try: stdout = self.call_with_output(['gsutil', 'ls', locale_path]) except compile_rule.CompileFailure, e: # TODO(james): make sure we download gcloud and gsutil as part # of the khan-dotfiles setup. raise compile_rule.CompileFailure( "%s.\nFailed to download translations from gcs. Make sure " "that you have gsutil installed via gcloud." % e)
def _init_locale_paths(self): try: self.locale_paths = self.call_with_output( ['gsutil', 'ls', 'gs://ka_translations']).split() except compile_rule.CompileFailure, e: raise compile_rule.CompileFailure( "%s.\nFailed to download translations from gcs. Make sure " "that you have gsutil installed via gcloud." % e)
def _munge_sys_path(): """Modify sys.path so we can load the git-bigfile library.""" # First, find out where git-bigfile lives. It lives on the # path, so we can just look for that. for pathdir in os.environ['PATH'].split(':'): if os.path.exists(os.path.join(pathdir, 'git-bigfile')): sys.path.append(os.path.dirname(pathdir)) return raise compile_rule.CompileFailure( "Can't find git-bigfile in %s" % os.environ['PATH'])
class DownloadIndex(compile_rule.CompileBase): def __init__(self): super(DownloadIndex, self).__init__() self._locale_paths = None def version(self): """Update every time build() changes in a way that affects output.""" import datetime # Force redownloading once a month. return datetime.datetime.now().strftime("%Y-%m") def build(self, outfile_name, infile_names, changed, context): """Download .index and .chunk files from prod. CompilePOFile takes a long time to compute. So when not on jenkins we call this rule instead to fetch from prod what is there. """ if self._locale_paths is None: self._init_locale_paths() log.v2("Determining latest prod translation files for %s" % context['{lang}']) locale = context['{lang}'] locale_path = 'gs://ka_translations/%s/' % locale if locale_path not in self.locale_paths: raise NoSuchLocaleCompileFailure(locale) try: stdout = self.call_with_output(['gsutil', 'ls', locale_path]) except compile_rule.CompileFailure, e: # TODO(james): make sure we download gcloud and gsutil as part # of the khan-dotfiles setup. raise compile_rule.CompileFailure( "%s.\nFailed to download translations from gcs. Make sure " "that you have gsutil installed via gcloud." % e) dirs = stdout.split() if dirs: most_recent_dir = dirs[-1] log.v2("Downloading latest prod files from %s" % most_recent_dir) self.call( ['gsutil', '-m', 'cp', '-r', "%s*" % most_recent_dir, os.path.dirname(outfile_name)]) return # No translation files found on gcs ... lets complain raise compile_rule.CompileFailure( "Failed to find translation files for %s on gcs" % context['{lang}'])
def build_many(self, outfile_infiles_changed_context): from shared.testutil import fake_datetime sha_to_files = {} # for the files we need to get from S3 for (outfile, infiles, _, context) in outfile_infiles_changed_context: assert len(infiles) == 1, infiles assert infiles[0].startswith('intl/translations/') with open(self.abspath(infiles[0])) as f: head = f.read(64).strip() # Does the head look like a sha1? (sha1's are only 40 bytes.) # If so, store it for later. If not, take care of it now. if head.strip('0123456789abcdefABCDEF') == '': sha_to_files.setdefault(head, []).append(outfile) else: # Nope, not a sha1. NOTE: We could also use a hard-link, # but that could fail if genfiles is on a different # filesystem from the source. Copying is more expensive # but safer. Symlinks are right out. shutil.copyfile(self.abspath(infiles[0]), self.abspath(outfile)) if not sha_to_files: return # We could just call 'git bigfile pull' but we purposefully # don't so as to leave untouched the file-contents in # intl/translations. This works better with kake, which # doesn't like it when input contents change as part of a kake # rule. self._munge_sys_path() # so the following import succeeds import gitbigfile.command # Download all our files from S3 in parallel. We store these # files under a 'permanent' name based on the sha1. (Later # we'll copy these files to outfile_name.) That way even if # you check out a different branch and come back to this one # again, you can get the old contents without needing to # revisit S3. # GitBigfile() (in _download_from_s3) runs 'git' commands in a # subprocess, so we need to be in the right repository for that. old_cwd = os.getcwd() os.chdir(self.abspath('intl/translations')) try: # This will actually try to download translation files via # bigfile. This requires a real datetime for making the # api requests to S3 (S3 complains about weird dates). with fake_datetime.suspend_fake_datetime(): arglists = [] for (sha, outfiles) in sha_to_files.iteritems(): # Typically a given sha will have only one outfile, # but for some shas (an empty po-file, e.g.), many # outfiles may share the same sha! log.v1('Fetching %s from S3' % ' '.join(outfiles)) # We just need to put this in a directory we know we # can write to: take one of the outfile dirs arbitrarily. sha_name = os.path.join(os.path.dirname(outfiles[0]), sha) arglists.append( (gitbigfile.command, self.abspath(sha_name), sha)) shared.util.thread.run_many_threads( self._download_from_s3, arglists) except RuntimeError as why: log.error(why) # probably misleading, but maybe helpful # TODO(csilvers): check whether git-bigfile *is* set up # correctly, and give a more precise failure message if so. raise compile_rule.CompileFailure( "Failed to download translation file for %s from S3. " "Make sure you have git-bigfile set up as per the " "configs in the khan-dotfiles repo: namely, the " "'bigfile' section in .gitconfig.khan, and the " "update_credentials() section in setup.sh." % outfile) finally: os.chdir(old_cwd) # Now copy from the sha-name to the actual output filename. for (sha, outfiles) in sha_to_files.iteritems(): sha_name = os.path.join(os.path.dirname(outfiles[0]), sha) for outfile in outfiles: log.v2('Copying from %s to %s' % (sha_name, outfile)) try: os.unlink(self.abspath(outfile)) except OSError: pass # probably file not found os.link(self.abspath(sha_name), self.abspath(outfile))