def fill_from_path(spec, partition_path, reuse): data = reuse if data is None: data = DataFiles.new_partition_data(spec) partitions = partition_path.split("/") if len(partitions) > len(spec.fields): raise RuntimeError( "Invalid partition data, too many fields (expecting %s): %s" % (len(spec.fields), partition_path)) if len(partitions) < len(spec.fields): raise RuntimeError( "Invalid partition data, not enough fields(expecting %s): %s" % (len(spec.fields), partition_path)) for i, part in enumerate(partitions): field = spec.fields[i] parts = part.split("=") if len(parts) != 2 or parts[0] is None or parts[0] != field.name: raise RuntimeError("Invalid partition: %s" % part) data.set( i, Conversions.from_partition_string(data.get_type(i), parts[1])) return data
def strict_file(): return TestDataFile("file.avro", TestHelpers.Row.of(), 50, { 4: 50, 5: 50, 6: 50 }, { 4: 50, 5: 10, 6: 0 }, { 1: Conversions.to_byte_buffer(IntegerType.get(), 30), 7: Conversions.to_byte_buffer(IntegerType.get(), 5) }, { 1: Conversions.to_byte_buffer(IntegerType.get(), 79), 7: Conversions.to_byte_buffer(IntegerType.get(), 5) })
def file(): return TestDataFile( "file.avro", TestHelpers.Row.of(), 50, # value counts { 4: 50, 5: 50, 6: 50 }, # null value counts { 4: 50, 5: 10, 6: 0 }, # lower bounds {1: Conversions.to_byte_buffer(IntegerType.get(), 30)}, # upper bounds {1: Conversions.to_byte_buffer(IntegerType.get(), 79)})
def inc_man_file(): return TestManifestFile( "manifest-list.avro", 1024, 0, int(time.time() * 1000), 5, 10, 0, (TestFieldSummary( False, Conversions.to_byte_buffer(IntegerType.get(), 30), Conversions.to_byte_buffer( IntegerType.get(), 79)), TestFieldSummary(True, None, None), TestFieldSummary( True, Conversions.to_byte_buffer(StringType.get(), 'a'), Conversions.to_byte_buffer(StringType.get(), 'z')), TestFieldSummary( False, Conversions.to_byte_buffer(StringType.get(), 'a'), Conversions.to_byte_buffer(StringType.get(), 'z'))))