예제 #1
0
def _allocate_output_value_files(connection, repository_path, refs):
    output_value_file_sizes = defaultdict(list)

    file_space_info = load_file_space_info(connection)
    file_space_sanity_check(file_space_info, repository_path)
    space_id = find_least_volume_space_id("storage", file_space_info)

    for ref in refs:
        if len(output_value_file_sizes[ref.collection_id]) == 0:
            output_value_file_sizes[ref.collection_id].append(0)

        expected_size = output_value_file_sizes[ref.collection_id][-1] \
                      + ref.data_size

        if expected_size > _max_value_file_size:
            output_value_file_sizes[ref.collection_id].append(0)

        output_value_file_sizes[ref.collection_id][-1] += ref.data_size

    output_value_files = defaultdict(list)
    for collection_id in output_value_file_sizes.keys():
        for expected_size in output_value_file_sizes[collection_id]:
            output_value_files[collection_id].append(
                OutputValueFile(connection,
                                space_id,
                                repository_path,
                                expected_size=expected_size))

    return output_value_files
예제 #2
0
def _allocate_output_value_files(connection, repository_path, refs):
    output_value_file_sizes = defaultdict(list)

    file_space_info = load_file_space_info(connection) 
    file_space_sanity_check(file_space_info, repository_path)
    space_id = find_least_volume_space_id("storage", file_space_info)

    for ref in refs:
        if len(output_value_file_sizes[ref.collection_id]) == 0:
            output_value_file_sizes[ref.collection_id].append(0)

        expected_size = output_value_file_sizes[ref.collection_id][-1] \
                      + ref.data_size

        if expected_size > _max_value_file_size:
            output_value_file_sizes[ref.collection_id].append(0)

        output_value_file_sizes[ref.collection_id][-1] += ref.data_size

    output_value_files = defaultdict(list)
    for collection_id in output_value_file_sizes.keys():
        for expected_size in output_value_file_sizes[collection_id]:
            output_value_files[collection_id].append(
                OutputValueFile(connection, 
                                space_id,
                                repository_path, 
                                expected_size=expected_size))

    return output_value_files
예제 #3
0
    def store_sequence(
        self, 
        collection_id, 
        key, 
        unified_id,
        timestamp_repr, 
        conjoined_part,
        segment_num, 
        segment_size,
        zfec_padding_size,
        segment_md5_digest,
        segment_adler32,
        sequence_num, 
        data,
        user_request_id
    ):
        """
        store one piece (sequence) of segment data
        """
        segment_key = (unified_id, conjoined_part, segment_num, )
        self._log.info("request {0}: " \
                       "store_sequence {1} {2} {3} {4} {5}: {6} ({7})".format(
                       user_request_id,
                       collection_id, 
                       key, 
                       unified_id,
                       timestamp_repr, 
                       segment_num, 
                       sequence_num,
                       segment_size))
        segment_entry = self._active_segments[segment_key]

        # if this write would put us over the max size,
        # start a new output value file
        if self._value_file.size + segment_size > _max_value_file_size:
            self._value_file.close()
            space_id = find_least_volume_space_id("journal",
                                                  self._file_space_info)
            self._value_file = OutputValueFile(self._connection, 
                                               space_id,
                                               self._repository_path)

        segment_sequence_row = segment_sequence_template(
            collection_id=collection_id,
            segment_id=segment_entry["segment-id"],
            zfec_padding_size=zfec_padding_size,
            value_file_id=self._value_file.value_file_id,
            sequence_num=sequence_num,
            value_file_offset=self._value_file.size,
            size=segment_size,
            hash=psycopg2.Binary(segment_md5_digest),
            adler32=segment_adler32,
        )

        self._value_file.write_data_for_one_sequence(
            collection_id, segment_entry["segment-id"], data
        )

        _insert_segment_sequence_row(self._connection, segment_sequence_row)
예제 #4
0
    def __init__(self, 
                 connection, 
                 file_space_info, 
                 repository_path, 
                 active_segments, 
                 completions
    ):
        self._log = logging.getLogger("Writer")
        self._connection = connection
        self._file_space_info = file_space_info
        self._repository_path = repository_path
        self._active_segments = active_segments
        self._completions = completions
        
        space_id = find_least_volume_space_id("journal", self._file_space_info)

        # open a new value file at startup
        self._value_file = OutputValueFile(self._connection, 
                                           space_id, 
                                           self._repository_path)
예제 #5
0
def _generate_work(connection, file_space_info, value_file_rows):
    log = logging.getLogger("_generate_work")
    prev_handoff_node_id = None
    prev_collection_id = None
    output_value_file = None
    for reference in _query_value_file_references(
        connection, [row.id for row in value_file_rows]
    ):
        if reference.handoff_node_id is not None:
            # at least one distinct value file per handoff node
            if reference.handoff_node_id != prev_handoff_node_id:
                if prev_handoff_node_id is not None:
                    log.debug(
                        "closing output value file handoff node {0}".format(
                            prev_handoff_node_id
                        )
                    )
                    assert output_value_file is not None
                    output_value_file.close()
                    output_value_file = None
                log.debug(
                    "opening value file for handoff node {0}".format(
                        reference.handoff_node_id
                    )
                )
                assert output_value_file is None
                space_id = find_least_volume_space_id("storage", 
                                                      file_space_info)
                output_value_file = OutputValueFile(connection, 
                                                    space_id, 
                                                    _repository_path)
                prev_handoff_node_id = reference.handoff_node_id
        elif reference.collection_id != prev_collection_id:
            if prev_handoff_node_id is not None:
                log.debug(
                    "closing value file for handoff node {0}".format(
                        prev_handoff_node_id
                    )
                )
                assert output_value_file is not None
                output_value_file.close()
                output_value_file = None
                prev_handoff_node_id = None

            # at least one distinct value file per collection_id
            if prev_collection_id is not None:
                log.debug(
                    "closing value file for collection {0}".format(
                        prev_collection_id
                    )
                )
                assert output_value_file is not None
                output_value_file.close()
                output_value_file = None

            log.debug(
                "opening value file for collection {0}".format( 
                    reference.collection_id
                )
            )
            assert output_value_file is None
            space_id = find_least_volume_space_id("storage", 
                                                  file_space_info)
            output_value_file = OutputValueFile(
                connection, space_id, _repository_path
            )
            prev_collection_id = reference.collection_id

        assert output_value_file is not None

        # if this write would put us over the max size,
        # start a new output value file
        expected_size = output_value_file.size + reference.sequence_size
        if expected_size > _max_value_file_size:
            log.debug("closing value_file and opening new one due to size")
            output_value_file.close()
            space_id = find_least_volume_space_id("storage", 
                                                  file_space_info)
            output_value_file = OutputValueFile(
                connection, space_id, _repository_path
            )

        yield reference, output_value_file
    
    if prev_handoff_node_id is not None:
        log.debug(
            "closing final value file for handoff node {0}".format(
                prev_handoff_node_id
            )
        )

    if prev_collection_id is not None:
        log.debug(
            "closing final value file for collection {0}".format(
                prev_collection_id
            )
        )

    output_value_file.close()
예제 #6
0
def _generate_work(connection, file_space_info, value_file_rows):
    log = logging.getLogger("_generate_work")
    prev_handoff_node_id = None
    prev_collection_id = None
    output_value_file = None
    for reference in _query_value_file_references(
            connection, [row.id for row in value_file_rows]):
        if reference.handoff_node_id is not None:
            # at least one distinct value file per handoff node
            if reference.handoff_node_id != prev_handoff_node_id:
                if prev_handoff_node_id is not None:
                    log.debug(
                        "closing output value file handoff node {0}".format(
                            prev_handoff_node_id))
                    assert output_value_file is not None
                    output_value_file.close()
                    output_value_file = None
                log.debug("opening value file for handoff node {0}".format(
                    reference.handoff_node_id))
                assert output_value_file is None
                space_id = find_least_volume_space_id("storage",
                                                      file_space_info)
                output_value_file = OutputValueFile(connection, space_id,
                                                    _repository_path)
                prev_handoff_node_id = reference.handoff_node_id
        elif reference.collection_id != prev_collection_id:
            if prev_handoff_node_id is not None:
                log.debug("closing value file for handoff node {0}".format(
                    prev_handoff_node_id))
                assert output_value_file is not None
                output_value_file.close()
                output_value_file = None
                prev_handoff_node_id = None

            # at least one distinct value file per collection_id
            if prev_collection_id is not None:
                log.debug("closing value file for collection {0}".format(
                    prev_collection_id))
                assert output_value_file is not None
                output_value_file.close()
                output_value_file = None

            log.debug("opening value file for collection {0}".format(
                reference.collection_id))
            assert output_value_file is None
            space_id = find_least_volume_space_id("storage", file_space_info)
            output_value_file = OutputValueFile(connection, space_id,
                                                _repository_path)
            prev_collection_id = reference.collection_id

        assert output_value_file is not None

        # if this write would put us over the max size,
        # start a new output value file
        expected_size = output_value_file.size + reference.sequence_size
        if expected_size > _max_value_file_size:
            log.debug("closing value_file and opening new one due to size")
            output_value_file.close()
            space_id = find_least_volume_space_id("storage", file_space_info)
            output_value_file = OutputValueFile(connection, space_id,
                                                _repository_path)

        yield reference, output_value_file

    if prev_handoff_node_id is not None:
        log.debug("closing final value file for handoff node {0}".format(
            prev_handoff_node_id))

    if prev_collection_id is not None:
        log.debug("closing final value file for collection {0}".format(
            prev_collection_id))

    output_value_file.close()