Ejemplo n.º 1
0
    def flush_data(self, dumper):
        if self._flushed:
            raise ValueError(
                "Can't add table data, table interaction already flushed")
        importer.verbose("Storing %s data, %d lines in memory, size %d" %
                         (self._table_name, len(self._buf), self._buf_size))
        self._buf.sort(cmp=_lines_compare)

        if len(self._chunks):
            # multiple chunks
            sequence = 1
            dumper.append(self._start_line)
            output_size = 0
            sorted_memory = ((key(line), line) for line in self._buf)
            sorted_chunks = (((key(line), line) for line in chunk)
                             for chunk in self._chunks)
            for _key, _line in heapq.merge(sorted_memory, *sorted_chunks):
                dumper.append(_line)
                output_size += len(_line)
                if output_size > self._max_chunk_size:
                    dumper.append(_END_COPY_LINE)
                    dumper.flush()

                    dumper.new_output(
                        '{counter}_{table_name}_{sequence}.sql'.format(
                            counter=self._counter,
                            table_name=self._table_name,
                            sequence=importer.str_in_base(sequence,
                                                          min_with=4)))
                    output_size = 0
                    sequence += 1

                    dumper.append(self._start_line)
                    # for _key _ine in sorted data
        else:
            dumper.append(self._start_line)
            dumper.add_lines(self._buf)

        dumper.append(_END_COPY_LINE)
        dumper.flush()

        for chunk in self._chunks:
            chunk.close()
        self._chunks = []
        self._buf_size = 0
        self._buf = []

        self._flushed = True
Ejemplo n.º 2
0
    def add_line(self, line):
        if self._flushed:
            raise ValueError("Can't add table data, table interaction already flushed")
        self._buf.append(line)
        self._buf_size += len(line)
        if self._buf_size > self._max_chunk_size:
            importer.verbose("Splitting %s temporary data, %d-th part, %d lines, size %d" %
                             (self._table_name, len(self._chunks) + 1, len(self._buf), self._buf_size))

            self._buf.sort(cmp=_lines_compare)
            chunk = tempfile.TemporaryFile("r+w")
            chunk.writelines(self._buf)
            chunk.seek(0)

            self._chunks.append(chunk)
            self._buf_size = 0
            self._buf = []
Ejemplo n.º 3
0
    def flush_data(self, dumper):
        if self._flushed:
            raise ValueError("Can't add table data, table interaction already flushed")
        importer.verbose("Storing %s data, %d lines in memory, size %d" %
                         (self._table_name, len(self._buf), self._buf_size))
        self._buf.sort(cmp=_lines_compare)

        if len(self._chunks):
            # multiple chunks
            sequence = 1
            dumper.append(self._start_line)
            output_size = 0
            sorted_memory = [(key(line), line) for line in self._buf]
            sorted_chunks = [[(key(line), line) for line in chunk]
                             for chunk in self._chunks]
            for _key, _line in heapq.merge(sorted_memory, *sorted_chunks):
                dumper.append(_line)
                output_size += len(_line)
                if output_size > self._max_chunk_size:
                    dumper.append(_END_COPY_LINE)
                    dumper.flush()

                    dumper.new_output('{counter}_{table_name}_{sequence}.sql'.format(
                        counter=self._counter, table_name=self._table_name,
                        sequence=importer.str_in_base(sequence, min_with=4)))
                    output_size = 0
                    sequence += 1

                    dumper.append(self._start_line)
                    # for _key _ine in sorted data
        else:
            dumper.append(self._start_line)
            dumper.add_lines(self._buf)

        dumper.append(_END_COPY_LINE)
        dumper.flush()

        for chunk in self._chunks:
            chunk.close()
        self._chunks = []
        self._buf_size = 0
        self._buf = []

        self._flushed = True
Ejemplo n.º 4
0
    def flush_data(self, dumper):
        if self._flushed:
            raise ValueError("Can't add table data, table interaction already flushed")
        importer.verbose("Storing %s data, %d lines in memory, size %d" %
                         (self._table_name, len(self._buf), self._buf_size))
        self._buf.sort(cmp=_lines_compare)

        # write file
        _end_chunk = False
        _end_insert = True
        sequence = 1
        output_size = 0
        insert_size = 0
        memory_chunk = ((key(line), line) for line in self._buf)
        temp_chunks = (((key(line), line) for line in chunk)
                       for chunk in self._chunks)
        for _key, _line in heapq.merge(memory_chunk, *temp_chunks):
            if _end_chunk:
                dumper.new_output('{counter}_{table_name}_{sequence}.sql'.format(
                    counter=self._counter, table_name=self._table_name,
                    sequence=importer.str_in_base(sequence, min_with=4)))
                output_size = 0

            if _end_chunk or _end_insert:
                dumper.append(self._start_line)
                output_size += len(self._start_line)
                insert_size = len(self._start_line)
            # reset output chunk

            _end_chunk = False
            _end_insert = False

            if output_size + len(_line) + 4 >= self._max_chunk_size:
                _end_chunk = True

            if insert_size + len(_line) + 4 >= 5000:
                _end_insert = True

            dumper.append('(' + _line[:-1] + ')' + (';' if _end_chunk or _end_insert else ',') + '\n')
            output_size += len(_line) + 4
            insert_size += len(_line) + 4

            if _end_chunk or _end_insert:
                dumper.flush()

            if _end_chunk:
                sequence += 1
        # for _key _ine in sorted data

        last_lines = dumper.pop_last_lines(1)
        if len(last_lines) > 0:
            dumper.append(last_lines[0][:-2] + ";\n")
        dumper.flush()

        for chunk in self._chunks:
            chunk.close()
        self._chunks = []
        self._buf_size = 0
        self._buf = []

        self._flushed = True
Ejemplo n.º 5
0
    def flush_data(self, dumper):
        if self._flushed:
            raise ValueError(
                "Can't add table data, table interaction already flushed")
        importer.verbose("Storing %s data, %d lines in memory, size %d" %
                         (self._table_name, len(self._buf), self._buf_size))
        self._buf.sort(cmp=_lines_compare)

        # write file
        _end_chunk = False
        _end_insert = True
        sequence = 1
        output_size = 0
        insert_size = 0
        memory_chunk = ((key(line), line) for line in self._buf)
        temp_chunks = (((key(line), line) for line in chunk)
                       for chunk in self._chunks)
        for _key, _line in heapq.merge(memory_chunk, *temp_chunks):
            if _end_chunk:
                dumper.new_output(
                    '{counter}_{table_name}_{sequence}.sql'.format(
                        counter=self._counter,
                        table_name=self._table_name,
                        sequence=importer.str_in_base(sequence, min_with=4)))
                output_size = 0

            if _end_chunk or _end_insert:
                dumper.append(self._start_line)
                output_size += len(self._start_line)
                insert_size = len(self._start_line)
            # reset output chunk

            _end_chunk = False
            _end_insert = False

            if output_size + len(_line) + 4 >= self._max_chunk_size:
                _end_chunk = True

            if insert_size + len(_line) + 4 >= 5000:
                _end_insert = True

            dumper.append('(' + _line[:-1] + ')' +
                          (';' if _end_chunk or _end_insert else ',') + '\n')
            output_size += len(_line) + 4
            insert_size += len(_line) + 4

            if _end_chunk or _end_insert:
                dumper.flush()

            if _end_chunk:
                sequence += 1
        # for _key _ine in sorted data

        last_lines = dumper.pop_last_lines(1)
        if len(last_lines) > 0:
            dumper.append(last_lines[0][:-2] + ";\n")
        dumper.flush()

        for chunk in self._chunks:
            chunk.close()
        self._chunks = []
        self._buf_size = 0
        self._buf = []

        self._flushed = True