Esempio n. 1
0
class Avro_Merger(object):
    _merge_started = False
    _avro_extention = '.avro'
    _avro_stats_record = None


    def __init__(self, path, new_filename):
        try:
            self._avro_files = filter(lambda x: x.endswith(self._avro_extention), iter(os.listdir(path)))
            schema = avro.schema.parse(open(schema_file).read())
            self._writter = DataFileWriter(open(output_file, 'w'), DatumWriter(), schema, 'deflate')
        except Exception as e:
            raise avro.schema.AvroException(e)
            sys.exit(1)


    def flog_metadata_handler(func):
        """ This is a decorator that handles avro meta data as well as very last stats record 
            in each file during merging
        """    
        def wrapper(self, avro_records):
            """ Wrapper method for consuming flog avro file
            """
            # Handle meta data
            if self._writter.tell() != 0:  # TODO, need to fix this
                next(avro_records)

            # Handle stats line
            self._avro_stats_record = deque(avro_records, maxlen=1).pop()

            func(avro_records)

        return wrapper
        

    @flog_metadata_handler
    def consume_avro(self, avro_records):
        """ Write the avro data from the butter to file
        """
        map(self._writter.append, iter(self._avro_record))

    
    def merge(self):
        """ Loop through the avros and merge each file
        """
        for file_ in self._avro_files:
            try:
                avro_records = DataFileReader(open(os.path.join(input_dir, file_), "r"), DatumReader())
            except Exception as e:
                raise avro.schema.AvroException(e)

            # Consume the records!
            self.consume_avro(avro_records)

        # Write stats data to the last of the file
        self._writter.append(self._avro_stats_record)
        self._writter.close()