Exemple #1
0
def writeData(writer, filename, data):
    key = Text()
    value = BytesWritable()

    key.set(filename)
    value.set(data)
    writer.append(key, value)
Exemple #2
0
def convert_to_sequencefiles(cpp_encrypted_data):
    # Get all data files outputted by C++
    partition_pattern = os.path.join(cpp_encrypted_data, "data/cpp-part*")
    partition_files = glob.glob(partition_pattern)

    # Convert each partition to SequenceFile format
    for partition_file in partition_files:
        # FIXME: should we stream this so we dont load entire 1 GB into memory?
        with open(partition_file, "rb") as partition:
            partition_data = partition.read()

        # FIXME: better way of generating new file name
        # This way has the limitation of original path cannot contain `cpp-`
        output_partition_file = partition_file.replace("cpp-", "")
        sequence_file_writer = SequenceFile.createWriter(
            output_partition_file, IntWritable, BytesWritable)

        key = IntWritable()
        value = BytesWritable()

        key.set(0)
        value.set(partition_data)

        sequence_file_writer.append(key, value)
        sequence_file_writer.close()

        # Remove temporary file generated by C++
        os.remove(partition_file)
Exemple #3
0
def sequence(file_out, s3_files_in, make_key, tempvaluefile="/tmp/temp.nc"):
    """
    String file path to write to
    A list of string file paths to read from. Each file in is encoded to a
    different k, v pair, with the key equal to the cube's metadata
    make_key is a function with takes a cube and returns a uid string
    
    """
    keys_done = []

    writer = SequenceFile.createWriter(file_out, Text, BytesWritable)
    for s3_file_in in s3_files_in:
        f = get_s3_file(s3_file_in, tempvaluefile)
        c = iris.load_cube(f)
        key_writer = Text()

        if (str(c.metadata) in keys_done):
            warnings.warn("Key for file " + f +
                          " already present - overwriting")
        key_writer.set(make_key(c))
        keys_done.append(str(c.metadata))

        value_writer = BytesWritable()
        with open(tempvaluefile, "rb") as f:
            print s3_file_in
            value_writer.set(f.read())
            writer.append(key_writer, value_writer)
    writer.close()
def sequence(file_out, s3_files_in, make_key, tempvaluefile="/tmp/temp.nc"):
    """
    String file path to write to
    A list of string file paths to read from. Each file in is encoded to a
    different k, v pair, with the key equal to the cube's metadata
    make_key is a function with takes a cube and returns a uid string
    
    """
    keys_done = []
    
    writer = SequenceFile.createWriter(file_out, Text, BytesWritable)
    for s3_file_in in s3_files_in:
        f = get_s3_file(s3_file_in, tempvaluefile)
        c = iris.load_cube(f)
        key_writer = Text()
        
        if (str(c.metadata) in keys_done):
            warnings.warn("Key for file "+f+" already present - overwriting")
        key_writer.set(make_key(c))
        keys_done.append(str(c.metadata))
        
        value_writer = BytesWritable()
        with open(tempvaluefile, "rb") as f:
            print s3_file_in
            value_writer.set(f.read())
            writer.append(key_writer, value_writer)
    writer.close()
def writeData(writer):
    key = BytesWritable()
    value = BytesWritable()

    # for i in xrange(1000):
    key.set("A")
    value.set("B")
    print '[%d] %s %s' % (writer.getLength(), key.toString(), value.toString())
    writer.append(key, value)
Exemple #6
0
 def writeData(self, key, value):
     datetime_now = time.localtime(time.time())
     if datetime_now.tm_mday != self.file_time.tm_mday or datetime_now.tm_hour != self.file_time.tm_hour:
         self.writer = self.create_writer(self.writer)
     writer = self.writer
     writer_key = Text()
     writer_value = BytesWritable()
     writer_key.set(key)
     writer_value.set(value)
     #print '[%d] %s %s' % (writer.getLength(), writer_key.toString(), writer_value.toString())
     writer.append(writer_key, writer_value)
def write_seq_file(file_name, data_dict):
    writer = SequenceFile.createWriter(file_name, Text, BytesWritable)
    for key, value in data_dict.iteritems():
        print key, ", " ,
        key_writer = Text()
        key_writer.set(key)
        
        value_writer = BytesWritable()
        iris.save(value, "temp.nc")
        with open("temp.nc", "rb") as f:
            value_writer.set(f.read())
        writer.append(key_writer, value_writer)
    writer.close()
def write_seq_file(file_name, data_dict):
    writer = SequenceFile.createWriter(file_name, Text, BytesWritable)
    for key, value in data_dict.iteritems():
        print key, ", ",
        key_writer = Text()
        key_writer.set(key)

        value_writer = BytesWritable()
        iris.save(value, "temp.nc")
        with open("temp.nc", "rb") as f:
            value_writer.set(f.read())
        writer.append(key_writer, value_writer)
    writer.close()
def writeData(writer):
    key = BytesWritable()
    value = BytesWritable()

    # for i in xrange(1000):
    key.set("A")
    value.set("B")
    print '[%d] %s %s' % (writer.getLength(), key.toString(), value.toString())
    writer.append(key, value)
Exemple #10
0
def importSGY(sgyFilename, rddFilename):

    # os.remove(rddFilename)
    fp = open(sgyFilename, 'rb')
    writer = SequenceFile.createWriter(rddFilename, IntWritable, BytesWritable)

    SH = segypy.getSegyHeader(sgyFilename, 3600, segypy.endian)
    bps = segypy.getBytePerSample(SH)

    filesize = os.path.getsize(sgyFilename)
    samp_count = SH['ns']
    data_len = samp_count * bps
    trace_size = data_len + 240
    ntraces = (filesize - 3600) / trace_size

    data = fp.read(3600)
    for trace_num in range(ntraces):
        SegyTraceHeader = fp.read(240)
        SegyTraceData = fp.read(data_len)
		error - segypy.getValue is not correct
        SegyTraceData = segypy.getValue(
            SegyTraceData, 0, 'float', segypy.endian, samp_count)
        writer.append(IntWritable(trace_num), BytesWritable(
            str(SegyTraceHeader) + str(SegyTraceData)))