def is_done(self):
        """
        Returns `True` if the map-reduce job is done, and `False`
        otherwise.

        For all process-step output files that have not been fetched,
        gets the first part of the output file, and checks whether its
        contents begins with the string 'FinalRank'.

        Special notes:
            WARNING! The usage of this method in your code requires that
            that you used the default output directories in all calls
            to do_iter().
        """

        # Cache the result so we can return immediately without hitting
        # any of the Amazon APIs
        if self._is_done:
            return True

        iter_no = self._get_last_process_step_iter_no()
        if iter_no < 0:
            return False

        while self._last_process_step_iter_no < iter_no:
            self._last_process_step_iter_no += 1
            i = self._last_process_step_iter_no

            outdir = self._get_default_outdir('process', iter_no=i)
            keyname = self._get_keyname(outdir, 'part-00000')

            bucket = self._s3_conn.get_bucket(self._s3_bucket)
            key = Key(bucket=bucket, name=keyname)
            contents = key.next() # get first chunk of the output file

            if contents.startswith('FinalRank'):
                self._is_done = True # cache result
                break

        return self._is_done
Esempio n. 2
0
    def is_done(self):
        """
        Returns `True` if the map-reduce job is done, and `False`
        otherwise.

        For all process-step output files that have not been fetched,
        gets the first part of the output file, and checks whether its
        contents begins with the string 'FinalRank'.

        Special notes:
            WARNING! The usage of this method in your code requires that
            that you used the default output directories in all calls
            to do_iter().
        """

        # Cache the result so we can return immediately without hitting
        # any of the Amazon APIs
        if self._is_done:
            return True

        iter_no = self._get_last_process_step_iter_no()
        if iter_no < 0:
            return False

        while self._last_process_step_iter_no < iter_no:
            self._last_process_step_iter_no += 1
            i = self._last_process_step_iter_no

            outdir = self._get_default_outdir('process', iter_no=i)
            keyname = self._get_keyname(outdir, 'part-00000')

            bucket = self._s3_conn.get_bucket(self._s3_bucket)
            key = Key(bucket=bucket, name=keyname)
            contents = key.next()  # get first chunk of the output file

            if contents.startswith('FinalRank'):
                self._is_done = True  # cache result
                break

        return self._is_done