Example #1
0
    def get_corpus_status(self):
        '''
        Return status data for the corpus of the specified S3 bucket/project

        @type bucket_name: String
        @param bucket_name: Name of the S3 bucket to use

        @type project_name: String
        @param project_name: Name of the project folder inside the S3 bucket

        @rtype: dict
        @return: Dictionary containing corpus size per date modified
        '''
        remote_keys = list(self.bucket.list(self.remote_path_corpus))

        status_data = {}

        for remote_key in remote_keys:
            # Ignore any folders
            if remote_key.name.endswith("/"):
                continue

            dt = boto_parse_ts(remote_key.last_modified)

            date_str = "%s-%02d-%02d" % (dt.year, dt.month, dt.day)

            if date_str not in status_data:
                status_data[date_str] = 0
            status_data[date_str] += 1

        return status_data
Example #2
0
    def get_corpus_status(self):
        '''
        Return status data for the corpus of the specified S3 bucket/project

        @type bucket_name: String
        @param bucket_name: Name of the S3 bucket to use

        @type project_name: String
        @param project_name: Name of the project folder inside the S3 bucket

        @rtype: dict
        @return: Dictionary containing corpus size per date modified
        '''
        remote_keys = list(self.bucket.list(self.remote_path_corpus))

        status_data = {}

        for remote_key in remote_keys:
            # Ignore any folders
            if remote_key.name.endswith("/"):
                continue

            dt = boto_parse_ts(remote_key.last_modified)

            date_str = "%s-%02d-%02d" % (dt.year, dt.month, dt.day)

            if date_str not in status_data:
                status_data[date_str] = 0
            status_data[date_str] += 1

        return status_data