def freeze_script(script_path, temp_path='_hadoopy_temp'): """Freezes a script, puts it on hdfs, and gives you the path 'frozen_tar_path' can be given to launch_frozen and it will use that instead of making its own, this is useful for repeated calls. If a file with the same md5 already exists in the temp_path, it is used instead of putting a new copy there to avoid the file transfer. The files are put into a temporary file based on the timestamp first, then moved to a location that is only a function of their md5 to prevent partial files. Args: script_path: Path to a hadoopy script temp_path: HDFS temporary path (default is '_hadoopy_temp') Returns: {'cmds': commands_ran, 'frozen_tar_path': frozen_tar_path} """ tmp_frozen_tar_path = temp_path + '/%f.tar' % time.time() freeze_fp = tempfile.NamedTemporaryFile(suffix='.tar') cmds = hadoopy._freeze.freeze_to_tar(os.path.abspath(script_path), freeze_fp.name) md5 = _md5_file(freeze_fp.name) frozen_tar_path = temp_path + '/%s.tar' % md5 if hadoopy.exists(frozen_tar_path): return {'cmds': cmds, 'frozen_tar_path': frozen_tar_path} hadoopy.put(freeze_fp.name, tmp_frozen_tar_path) try: hadoopy.mv(tmp_frozen_tar_path, frozen_tar_path) except IOError, e: if hadoopy.exists(frozen_tar_path): # Check again return {'cmds': cmds, 'frozen_tar_path': frozen_tar_path} raise e
def freeze_script(script_path, cache=True, temp_path='_hadoopy_temp'): """Freezes a script, puts it on hdfs, and gives you the path 'frozen_tar_path' can be given to launch_frozen and it will use that instead of making its own, this is useful for repeated calls. If a file with the same md5 already exists in the temp_path, it is used instead of putting a new copy there to avoid the file transfer. The files are put into a temporary file based on the timestamp first, then moved to a location that is only a function of their md5 to prevent partial files. Args: script_path: Path to a hadoopy script cache: If True (default) then use previously frozen scripts. Cache is stored in memory (not persistent). temp_path: HDFS temporary path (default is '_hadoopy_temp') Returns: {'cmds': commands_ran, 'frozen_tar_path': frozen_tar_path} Raises: ValueError: Script cannot be found """ script_abspath = os.path.abspath(script_path) if not os.path.exists(script_abspath): raise ValueError('Script [%s] does not exist.' % script_abspath) try: if not cache: raise KeyError # NOTE(brandyn): Don't use cache item cmds, frozen_tar_path = FREEZE_CACHE[script_abspath] except KeyError: tmp_frozen_tar_path = temp_path + '/%f.tar' % time.time() freeze_fp = tempfile.NamedTemporaryFile(suffix='.tar') cmds = hadoopy._freeze.freeze_to_tar(os.path.abspath(script_path), freeze_fp.name) md5 = _md5_file(freeze_fp.name) frozen_tar_path = temp_path + '/%s.tar' % md5 if not hadoopy.exists(frozen_tar_path): if not hadoopy.exists(temp_path): # CDH4 Fix hadoopy.mkdir(temp_path) hadoopy.put(freeze_fp.name, tmp_frozen_tar_path) try: hadoopy.mv(tmp_frozen_tar_path, frozen_tar_path) except IOError: if not hadoopy.exists(frozen_tar_path): # Check again raise FREEZE_CACHE[script_abspath] = cmds, frozen_tar_path return {'cmds': cmds, 'frozen_tar_path': frozen_tar_path}