예제 #1
0
    def archive_and_unarchive(self, extension, archive_template,
                              added_files=[]):
        join = os.path.join

        # archive it up
        archive_name = 'a.' + extension
        variables = dict(archive_name=join('..', archive_name),
                         files_to_archive='.')
        archive_command = [arg % variables for arg in archive_template]

        # sometime the relevant command isn't available or doesn't work;
        # if so, skip the test
        try:
            proc = Popen(archive_command, cwd=join(self.tmp_dir, 'a'),
                         stdout=PIPE, stderr=PIPE)
        except OSError as e:
            if e.errno == 2:
                self.skipTest("No %s command" % archive_command[0])
            else:
                raise
        proc.communicate()  # discard output
        if proc.returncode != 0:
            self.skipTest("Can't run `%s` to create archive." %
                          cmd_line(archive_command))

        # unarchive it into b/
        unarchive(join(self.tmp_dir, archive_name), join(self.tmp_dir, 'b'))

        self.ensure_expected_results(added_files=added_files)
예제 #2
0
파일: test_util.py 프로젝트: mikekap/mrjob
    def archive_and_unarchive(self,
                              extension,
                              archive_template,
                              added_files=[]):
        join = os.path.join

        # archive it up
        archive_name = 'a.' + extension
        variables = dict(archive_name=join('..', archive_name),
                         files_to_archive='.')
        archive_command = [arg % variables for arg in archive_template]

        # sometime the relevant command isn't available or doesn't work;
        # if so, skip the test
        proc = Popen(archive_command,
                     cwd=join(self.tmp_dir, 'a'),
                     stdout=PIPE,
                     stderr=PIPE)
        proc.communicate()  # discard output
        if proc.returncode != 0:
            self.skipTest("Can't run command to create archive.")

        # unarchive it into b/
        unarchive(join(self.tmp_dir, archive_name), join(self.tmp_dir, 'b'))

        self.ensure_expected_results(added_files=added_files)
예제 #3
0
    def _setup_working_dir(self):
        """Make a working directory with symlinks to our script and
        external files. Return name of the script"""
        # specify that we want to upload our script along with other files
        if self._script:
            self._script['upload'] = 'file'
        if self._wrapper_script:
            self._wrapper_script['upload'] = 'file'

        # create the working directory
        self._working_dir = os.path.join(self._get_local_tmp_dir(),
                                         'working_dir')
        self.mkdir(self._working_dir)

        # give all our files names, and symlink or unarchive them
        self._name_files()
        for file_dict in self._files:
            path = file_dict['path']
            dest = os.path.join(self._working_dir, file_dict['name'])

            if file_dict.get('upload') == 'file':
                self._symlink_to_file_or_copy(path, dest)
            elif file_dict.get('upload') == 'archive':
                log.debug('unarchiving %s -> %s' % (path, dest))
                unarchive(path, dest)
예제 #4
0
    def _setup_working_dir(self):
        """Make a working directory with symlinks to our script and
        external files. Return name of the script"""
        # specify that we want to upload our script along with other files
        if self._script:
            self._script['upload'] = 'file'
        if self._wrapper_script:
            self._wrapper_script['upload'] = 'file'

        # create the working directory
        if not self._working_dir:
            self._working_dir = os.path.join(
                self._get_local_tmp_dir(), 'working_dir')
            self.mkdir(self._working_dir)

        # give all our files names, and symlink or unarchive them
        self._name_files()
        for file_dict in self._files:
            path = file_dict['path']
            name = file_dict['name']
            dest = os.path.join(self._working_dir, name)

            if file_dict.get('upload') == 'file':
                self._symlink_to_file_or_copy(path, dest)
            elif file_dict.get('upload') == 'archive':
                log.debug('unarchiving %s -> %s' % (path, dest))
                unarchive(path, dest)
예제 #5
0
파일: util_test.py 프로젝트: gimlids/LTPM
    def archive_and_unarchive(self, extension, archive_template,
                              added_files=[]):
        join = os.path.join

        # archive it up
        archive_name = 'a.' + extension
        variables = dict(archive_name=join('..', archive_name),
                         files_to_archive='.')
        archive_command = [arg % variables for arg in archive_template]
        check_call(archive_command, cwd=join(self.tmp_dir, 'a'))

        # unarchive it into b/
        unarchive(join(self.tmp_dir, archive_name), join(self.tmp_dir, 'b'))

        self.ensure_expected_results(added_files=added_files)
예제 #6
0
    def archive_and_unarchive(self,
                              extension,
                              archive_template,
                              added_files=[]):
        join = os.path.join

        # archive it up
        archive_name = 'a.' + extension
        variables = dict(archive_name=join('..', archive_name),
                         files_to_archive='.')
        archive_command = [arg % variables for arg in archive_template]
        check_call(archive_command, cwd=join(self.tmp_dir, 'a'))

        # unarchive it into b/
        unarchive(join(self.tmp_dir, archive_name), join(self.tmp_dir, 'b'))

        self.ensure_expected_results(added_files=added_files)
예제 #7
0
파일: local.py 프로젝트: Infolaber/mrjob
    def _setup_working_dir(self):
        """Make a working directory with symlinks to our script and
        external files. Return name of the script"""
        # create the working directory
        if not self._working_dir:
            self._working_dir = os.path.join(
                self._get_local_tmp_dir(), 'working_dir')
            self.mkdir(self._working_dir)

        # give all our files names, and symlink or unarchive them
        for name, path in self._working_dir_mgr.name_to_path('file').iteritems():
            dest = os.path.join(self._working_dir, name)
            self._symlink_to_file_or_copy(path, dest)

        for name, path in self._working_dir_mgr.name_to_path('archive').iteritems():
            dest = os.path.join(self._working_dir, name)
            log.debug('unarchiving %s -> %s' % (path, dest))
            unarchive(path, dest)
예제 #8
0
파일: sim.py 프로젝트: wanglt311/mrjob
    def _setup_working_dir(self, working_dir):
        """Make a working directory with symlinks to our script and
        external files. Return name of the script"""
        log.debug('setting up working dir in %s' % working_dir)

        # create the working directory
        self.fs.mkdir(working_dir)

        files = self._working_dir_mgr.name_to_path('file').items()
        # give all our files names, and symlink or unarchive them
        for name, path in files:
            dest = os.path.join(working_dir, name)
            self._symlink_to_file_or_copy(path, dest)

        archives = self._working_dir_mgr.name_to_path('archive').items()
        for name, path in archives:
            dest = os.path.join(working_dir, name)
            log.debug('unarchiving %s -> %s' % (path, dest))
            unarchive(path, dest)
예제 #9
0
파일: sim.py 프로젝트: PythonCharmers/mrjob
    def _setup_working_dir(self, working_dir):
        """Make a working directory with symlinks to our script and
        external files. Return name of the script"""
        log.debug('setting up working dir in %s' % working_dir)

        # create the working directory
        self.mkdir(working_dir)

        files = self._working_dir_mgr.name_to_path('file').iteritems()
        # give all our files names, and symlink or unarchive them
        for name, path in files:
            dest = os.path.join(working_dir, name)
            self._symlink_to_file_or_copy(path, dest)

        archives = self._working_dir_mgr.name_to_path('archive').iteritems()
        for name, path in archives:
            dest = os.path.join(working_dir, name)
            log.debug('unarchiving %s -> %s' % (path, dest))
            unarchive(path, dest)
예제 #10
0
파일: test_util.py 프로젝트: bchess/mrjob
    def archive_and_unarchive(self, extension, archive_template, added_files=[]):
        join = os.path.join

        # archive it up
        archive_name = "a." + extension
        variables = dict(archive_name=join("..", archive_name), files_to_archive=".")
        archive_command = [arg % variables for arg in archive_template]

        # sometime the relevant command isn't available or doesn't work;
        # if so, skip the test
        proc = Popen(archive_command, cwd=join(self.tmp_dir, "a"), stdout=PIPE, stderr=PIPE)
        proc.communicate()  # discard output
        if proc.returncode != 0:
            self.skipTest("Can't run command to create archive.")

        # unarchive it into b/
        unarchive(join(self.tmp_dir, archive_name), join(self.tmp_dir, "b"))

        self.ensure_expected_results(added_files=added_files)
예제 #11
0
    def _setup_working_dir(self):
        """Make a working directory with symlinks to our script and
        external files. Return name of the script"""
        # create the working directory
        if not self._working_dir:
            self._working_dir = os.path.join(self._get_local_tmp_dir(),
                                             'working_dir')
            self.mkdir(self._working_dir)

        files = self._working_dir_mgr.name_to_path('file').iteritems()
        # give all our files names, and symlink or unarchive them
        for name, path in files:
            dest = os.path.join(self._working_dir, name)
            self._symlink_to_file_or_copy(path, dest)

        archives = self._working_dir_mgr.name_to_path('archive').iteritems()
        for name, path in archives:
            dest = os.path.join(self._working_dir, name)
            log.debug('unarchiving %s -> %s' % (path, dest))
            unarchive(path, dest)
예제 #12
0
파일: sim.py 프로젝트: Affirm/mrjob
    def _create_dist_cache_dir(self, step_num):
        """Copy working directory files into a shared directory,
        simulating the way Hadoop's Distributed Cache works on nodes."""
        cache_dir = self._dist_cache_dir(step_num)

        log.debug('creating simulated Distributed Cache dir: %s' % cache_dir)
        self.fs.mkdir(cache_dir)

        for name, path in self._working_dir_mgr.name_to_path('file').items():

            dest = self._path_in_dist_cache_dir(name, step_num)
            log.debug('copying %s -> %s' % (path, dest))
            shutil.copy(path, dest)
            _chmod_u_rx(dest)

        for name, path in self._working_dir_mgr.name_to_path(
                'archive').items():

            dest = self._path_in_dist_cache_dir(name, step_num)
            log.debug('unarchiving %s -> %s' % (path, dest))
            unarchive(path, dest)
            _chmod_u_rx(dest, recursive=True)
예제 #13
0
    def _create_dist_cache_dir(self, step_num):
        """Copy working directory files into a shared directory,
        simulating the way Hadoop's Distributed Cache works on nodes."""
        cache_dir = self._dist_cache_dir(step_num)

        log.debug('creating simulated Distributed Cache dir: %s' % cache_dir)
        self.fs.mkdir(cache_dir)

        for name, path in self._working_dir_mgr.name_to_path('file').items():

            dest = self._path_in_dist_cache_dir(name, step_num)
            log.debug('copying %s -> %s' % (path, dest))
            shutil.copy(path, dest)
            _chmod_u_rx(dest)

        for name, path in self._working_dir_mgr.name_to_path(
                'archive').items():

            dest = self._path_in_dist_cache_dir(name, step_num)
            log.debug('unarchiving %s -> %s' % (path, dest))
            unarchive(path, dest)
            _chmod_u_rx(dest, recursive=True)
예제 #14
0
    def mapper(self, key, value):
        """
        Each mapper computes the BIC score for a GMM pair
        """
        
        overall = t = time.time()
        
        index1, index2 = key        
        didx1, didx2, em_iters = value

        t = time.time()
#        X = tools.binary_read('self_X')
#        d1 = tools.get_data_from_indices(X, didx1)
#        d2 = tools.get_data_from_indices(X, didx2)
#        sys.stderr.write("get_data_from_indices: {0}\n".format(time.time()-t))
        d1 = tools.get_data_from_file_from_indices('self_X', didx1)
        d2 = tools.get_data_from_file_from_indices('self_X', didx2)
        sys.stderr.write("get_data_from_file_from_indices: {0}\n".format(time.time()-t))
        data = np.concatenate((d1, d2))
        
        t = time.time()
        util.unarchive('gmm.tgz', 'gmm')
        g1 = pickle.load(open('gmm/'+str(index1), 'r'))
        g2 = pickle.load(open('gmm/'+str(index2), 'r'))
        sys.stderr.write("read iter_gmm_list: {0}\n".format(time.time()-t))
        new_gmm = g1
        score = 0
        t = time.time()
        try:
            new_gmm, score = compute_distance_BIC(g1, g2, data, em_iters)
        except:
            raise
        #data_to_yield = (score, new_gmm, g1, g2, index1, index2)
        data_to_yield = (score, index1, index2)
        sys.stderr.write("compute_distance_BIC: {0}\n".format(time.time()-t))
        sys.stderr.write("total BIC time: {0}\n".format(time.time()-overall))
        yield 1, data_to_yield