def get_corpus_status(self): ''' Return status data for the corpus of the specified S3 bucket/project @type bucket_name: String @param bucket_name: Name of the S3 bucket to use @type project_name: String @param project_name: Name of the project folder inside the S3 bucket @rtype: dict @return: Dictionary containing corpus size per date modified ''' remote_keys = list(self.bucket.list(self.remote_path_corpus)) status_data = {} for remote_key in remote_keys: # Ignore any folders if remote_key.name.endswith("/"): continue dt = boto_parse_ts(remote_key.last_modified) date_str = "%s-%02d-%02d" % (dt.year, dt.month, dt.day) if date_str not in status_data: status_data[date_str] = 0 status_data[date_str] += 1 return status_data