def split(filenames, format_string, shards): """Get a FileFormatRoot for each shard. This method creates a list of FileFormatRoot and assigns each root some input files. The number of roots is less than or equal to shards. Args: filenames: input filenames format_string: format string from user. shards: number of shards to split inputs. Returns: A list of FileFormatRoot or None if all input files have zero bytes. """ parsed_formats = parser.parse(format_string) sizes = [files.stat(filename).st_size for filename in filenames] # TODO(user): add min shard size protection if needed. size_per_shard = float(sum(sizes)) / shards if not size_per_shard: return if parsed_formats[0].can_split(): return _deep_split(filenames, size_per_shard, parsed_formats) else: return _shallow_split(filenames, size_per_shard, parsed_formats, sizes)
def assertResultEquals(self, expected, format_string): self.assertEquals(expected, ' '.join(unicode(f) for f in parser.parse(format_string)))
def assertResultEquals(self, expected, format_string): self.assertEquals( expected, ' '.join(unicode(f) for f in parser.parse(format_string)))