def archive_and_unarchive(self, extension, archive_template, added_files=[]): join = os.path.join # archive it up archive_name = 'a.' + extension variables = dict(archive_name=join('..', archive_name), files_to_archive='.') archive_command = [arg % variables for arg in archive_template] # sometime the relevant command isn't available or doesn't work; # if so, skip the test try: proc = Popen(archive_command, cwd=join(self.tmp_dir, 'a'), stdout=PIPE, stderr=PIPE) except OSError as e: if e.errno == 2: self.skipTest("No %s command" % archive_command[0]) else: raise proc.communicate() # discard output if proc.returncode != 0: self.skipTest("Can't run `%s` to create archive." % cmd_line(archive_command)) # unarchive it into b/ unarchive(join(self.tmp_dir, archive_name), join(self.tmp_dir, 'b')) self.ensure_expected_results(added_files=added_files)
def archive_and_unarchive(self, extension, archive_template, added_files=[]): join = os.path.join # archive it up archive_name = 'a.' + extension variables = dict(archive_name=join('..', archive_name), files_to_archive='.') archive_command = [arg % variables for arg in archive_template] # sometime the relevant command isn't available or doesn't work; # if so, skip the test proc = Popen(archive_command, cwd=join(self.tmp_dir, 'a'), stdout=PIPE, stderr=PIPE) proc.communicate() # discard output if proc.returncode != 0: self.skipTest("Can't run command to create archive.") # unarchive it into b/ unarchive(join(self.tmp_dir, archive_name), join(self.tmp_dir, 'b')) self.ensure_expected_results(added_files=added_files)
def _setup_working_dir(self): """Make a working directory with symlinks to our script and external files. Return name of the script""" # specify that we want to upload our script along with other files if self._script: self._script['upload'] = 'file' if self._wrapper_script: self._wrapper_script['upload'] = 'file' # create the working directory self._working_dir = os.path.join(self._get_local_tmp_dir(), 'working_dir') self.mkdir(self._working_dir) # give all our files names, and symlink or unarchive them self._name_files() for file_dict in self._files: path = file_dict['path'] dest = os.path.join(self._working_dir, file_dict['name']) if file_dict.get('upload') == 'file': self._symlink_to_file_or_copy(path, dest) elif file_dict.get('upload') == 'archive': log.debug('unarchiving %s -> %s' % (path, dest)) unarchive(path, dest)
def _setup_working_dir(self): """Make a working directory with symlinks to our script and external files. Return name of the script""" # specify that we want to upload our script along with other files if self._script: self._script['upload'] = 'file' if self._wrapper_script: self._wrapper_script['upload'] = 'file' # create the working directory if not self._working_dir: self._working_dir = os.path.join( self._get_local_tmp_dir(), 'working_dir') self.mkdir(self._working_dir) # give all our files names, and symlink or unarchive them self._name_files() for file_dict in self._files: path = file_dict['path'] name = file_dict['name'] dest = os.path.join(self._working_dir, name) if file_dict.get('upload') == 'file': self._symlink_to_file_or_copy(path, dest) elif file_dict.get('upload') == 'archive': log.debug('unarchiving %s -> %s' % (path, dest)) unarchive(path, dest)
def archive_and_unarchive(self, extension, archive_template, added_files=[]): join = os.path.join # archive it up archive_name = 'a.' + extension variables = dict(archive_name=join('..', archive_name), files_to_archive='.') archive_command = [arg % variables for arg in archive_template] check_call(archive_command, cwd=join(self.tmp_dir, 'a')) # unarchive it into b/ unarchive(join(self.tmp_dir, archive_name), join(self.tmp_dir, 'b')) self.ensure_expected_results(added_files=added_files)
def _setup_working_dir(self): """Make a working directory with symlinks to our script and external files. Return name of the script""" # create the working directory if not self._working_dir: self._working_dir = os.path.join( self._get_local_tmp_dir(), 'working_dir') self.mkdir(self._working_dir) # give all our files names, and symlink or unarchive them for name, path in self._working_dir_mgr.name_to_path('file').iteritems(): dest = os.path.join(self._working_dir, name) self._symlink_to_file_or_copy(path, dest) for name, path in self._working_dir_mgr.name_to_path('archive').iteritems(): dest = os.path.join(self._working_dir, name) log.debug('unarchiving %s -> %s' % (path, dest)) unarchive(path, dest)
def _setup_working_dir(self, working_dir): """Make a working directory with symlinks to our script and external files. Return name of the script""" log.debug('setting up working dir in %s' % working_dir) # create the working directory self.fs.mkdir(working_dir) files = self._working_dir_mgr.name_to_path('file').items() # give all our files names, and symlink or unarchive them for name, path in files: dest = os.path.join(working_dir, name) self._symlink_to_file_or_copy(path, dest) archives = self._working_dir_mgr.name_to_path('archive').items() for name, path in archives: dest = os.path.join(working_dir, name) log.debug('unarchiving %s -> %s' % (path, dest)) unarchive(path, dest)
def _setup_working_dir(self, working_dir): """Make a working directory with symlinks to our script and external files. Return name of the script""" log.debug('setting up working dir in %s' % working_dir) # create the working directory self.mkdir(working_dir) files = self._working_dir_mgr.name_to_path('file').iteritems() # give all our files names, and symlink or unarchive them for name, path in files: dest = os.path.join(working_dir, name) self._symlink_to_file_or_copy(path, dest) archives = self._working_dir_mgr.name_to_path('archive').iteritems() for name, path in archives: dest = os.path.join(working_dir, name) log.debug('unarchiving %s -> %s' % (path, dest)) unarchive(path, dest)
def archive_and_unarchive(self, extension, archive_template, added_files=[]): join = os.path.join # archive it up archive_name = "a." + extension variables = dict(archive_name=join("..", archive_name), files_to_archive=".") archive_command = [arg % variables for arg in archive_template] # sometime the relevant command isn't available or doesn't work; # if so, skip the test proc = Popen(archive_command, cwd=join(self.tmp_dir, "a"), stdout=PIPE, stderr=PIPE) proc.communicate() # discard output if proc.returncode != 0: self.skipTest("Can't run command to create archive.") # unarchive it into b/ unarchive(join(self.tmp_dir, archive_name), join(self.tmp_dir, "b")) self.ensure_expected_results(added_files=added_files)
def _setup_working_dir(self): """Make a working directory with symlinks to our script and external files. Return name of the script""" # create the working directory if not self._working_dir: self._working_dir = os.path.join(self._get_local_tmp_dir(), 'working_dir') self.mkdir(self._working_dir) files = self._working_dir_mgr.name_to_path('file').iteritems() # give all our files names, and symlink or unarchive them for name, path in files: dest = os.path.join(self._working_dir, name) self._symlink_to_file_or_copy(path, dest) archives = self._working_dir_mgr.name_to_path('archive').iteritems() for name, path in archives: dest = os.path.join(self._working_dir, name) log.debug('unarchiving %s -> %s' % (path, dest)) unarchive(path, dest)
def _create_dist_cache_dir(self, step_num): """Copy working directory files into a shared directory, simulating the way Hadoop's Distributed Cache works on nodes.""" cache_dir = self._dist_cache_dir(step_num) log.debug('creating simulated Distributed Cache dir: %s' % cache_dir) self.fs.mkdir(cache_dir) for name, path in self._working_dir_mgr.name_to_path('file').items(): dest = self._path_in_dist_cache_dir(name, step_num) log.debug('copying %s -> %s' % (path, dest)) shutil.copy(path, dest) _chmod_u_rx(dest) for name, path in self._working_dir_mgr.name_to_path( 'archive').items(): dest = self._path_in_dist_cache_dir(name, step_num) log.debug('unarchiving %s -> %s' % (path, dest)) unarchive(path, dest) _chmod_u_rx(dest, recursive=True)
def mapper(self, key, value): """ Each mapper computes the BIC score for a GMM pair """ overall = t = time.time() index1, index2 = key didx1, didx2, em_iters = value t = time.time() # X = tools.binary_read('self_X') # d1 = tools.get_data_from_indices(X, didx1) # d2 = tools.get_data_from_indices(X, didx2) # sys.stderr.write("get_data_from_indices: {0}\n".format(time.time()-t)) d1 = tools.get_data_from_file_from_indices('self_X', didx1) d2 = tools.get_data_from_file_from_indices('self_X', didx2) sys.stderr.write("get_data_from_file_from_indices: {0}\n".format(time.time()-t)) data = np.concatenate((d1, d2)) t = time.time() util.unarchive('gmm.tgz', 'gmm') g1 = pickle.load(open('gmm/'+str(index1), 'r')) g2 = pickle.load(open('gmm/'+str(index2), 'r')) sys.stderr.write("read iter_gmm_list: {0}\n".format(time.time()-t)) new_gmm = g1 score = 0 t = time.time() try: new_gmm, score = compute_distance_BIC(g1, g2, data, em_iters) except: raise #data_to_yield = (score, new_gmm, g1, g2, index1, index2) data_to_yield = (score, index1, index2) sys.stderr.write("compute_distance_BIC: {0}\n".format(time.time()-t)) sys.stderr.write("total BIC time: {0}\n".format(time.time()-overall)) yield 1, data_to_yield