예제 #1
0
파일: storage.py 프로젝트: kokizzu/redpanda
    def __init__(self, index, header, records):
        self.index = index
        self.header = header
        self.term = None
        self.records = records

        header_crc_bytes = struct.pack(
            "<" + HDR_FMT_RP_PREFIX_NO_CRC + HDR_FMT_CRC, *self.header[1:])
        header_crc = crc32c.crc32c(header_crc_bytes)
        if self.header.header_crc != header_crc:
            raise CorruptBatchError(self)
        crc = crc32c.crc32c(self._crc_header_be_bytes())
        crc = crc32c.crc32c(records, crc)
        if self.header.crc != crc:
            raise CorruptBatchError(self)
예제 #2
0
    def getindex(self, i):
        index = self.index()
        N = index.shape[0]
        offset = index[i, 1]
        if i < N - 1:
            next_offset = index[i + 1, 1]
            value = self.buffer[offset:next_offset]
        else:
            value = self.buffer[offset:]

        if self.format_version == 1:
            stored_check_value = int.from_bytes(value[-4:], byteorder='little')
            value = value[:-4]
            if self.check_crc:
                retrieved_check_value = crc32c.crc32c(value)
                if retrieved_check_value != stored_check_value:
                    raise ValidationError(
                        f"Label {i} failed its crc32c check. Stored: {stored_check_value} Computed: {retrieved_check_value}"
                    )

        encoding = self.compress
        if encoding:
            value = compression.decompress(value, encoding, str(index[i, 0]))

        if self.frombytesfn:
            value = self.frombytesfn(value)

        return value
예제 #3
0
파일: hdfs.py 프로젝트: rgvanwesep/dvc
def md5md5crc32c(path):
    # https://github.com/colinmarc/hdfs/blob/f2f512db170db82ad41590c4ba3b7718b13317d2/file_reader.go#L76
    import hashlib

    from crc32c import crc32c  # pylint: disable=no-name-in-module

    # dfs.bytes-per-checksum = 512, default on hadoop 2.7
    bytes_per_checksum = 512
    padded = 32
    total = 0

    md5md5 = hashlib.md5()

    with open(path, "rb") as fobj:
        while True:
            block = fobj.read(bytes_per_checksum)
            if not block:
                break

            crc_int = crc32c(block)

            # NOTE: hdfs is big-endian
            crc_bytes = crc_int.to_bytes((crc_int.bit_length() + 7) // 8,
                                         "big")

            md5 = hashlib.md5(crc_bytes).digest()

            total += len(md5)
            if padded < total:
                padded *= 2

            md5md5.update(md5)

    md5md5.update(b"\0" * (padded - total))
    return "000002000000000000000000" + md5md5.hexdigest()
예제 #4
0
파일: test_dm.py 프로젝트: ICRAR/daliuge
 def _test_runGraphOneDOPerDOM(self, repeats=1):
     g1 = [memory("A")]
     g2 = [
         {"oid": "B", "type": "app", "app": "dlg.apps.crc.CRCApp"},
         memory("C", producers=["B"]),
     ]
     rels = [DROPRel("B", DROPLinkType.CONSUMER, "A")]
     a_data = os.urandom(32)
     c_data = str(crc32c(a_data, 0)).encode("utf8")
     node_managers = [self._start_dm(threads=self.nm_threads) for _ in range(2)]
     ids = [0] * repeats
     for n in range(repeats):
         choice = 0
         while choice in ids:
             choice = random.randint(0, 1000)
         ids[n] = choice
         sessionId = f"s{choice}"
         self._test_runGraphInTwoNMs(
             copy.deepcopy(g1),
             copy.deepcopy(g2),
             rels,
             a_data,
             c_data,
             sessionId=sessionId,
             node_managers=node_managers,
         )
예제 #5
0
파일: test_dm.py 프로젝트: ICRAR/daliuge
    def test_run_streaming_consumer_remotely2(self):
        """
        Like above, but C is hostd by DM #2.
        """

        g1 = [
            memory("A"),
            {
                "oid": "B",
                "type": "app",
                "app": "dlg.apps.simple.CopyApp",
                "inputs": ["A"],
            },
        ]
        g2 = [
            memory("C"),
            {
                "oid": "D",
                "type": "app",
                "app": "dlg.apps.crc.CRCStreamApp",
                "streamingInputs": ["C"],
                "outputs": ["E"],
            },
            memory("E"),
        ]
        rels = [DROPRel("C", DROPLinkType.OUTPUT, "B")]
        a_data = os.urandom(32)
        e_data = str(crc32c(a_data, 0)).encode("utf8")
        self._test_runGraphInTwoNMs(g1, g2, rels, a_data, e_data, leaf_oid="E")
예제 #6
0
파일: test_drop.py 프로젝트: ICRAR/daliuge
    def _test_dynamic_write_withDropType(self, dropType):
        """
        Test an AbstractDROP and a simple AppDROP (for checksum calculation)
        without an expected drop size (for app compatibility and not
        recommended in production)
        """
        # NOTE: use_staging required for multiple writes to plasma drops
        a = dropType("oid:A", "uid:A", expectedSize=-1, use_staging=True)
        b = SumupContainerChecksum("oid:B", "uid:B")
        c = InMemoryDROP("oid:C", "uid:C")
        b.addInput(a)
        b.addOutput(c)

        test_crc = 0
        with DROPWaiterCtx(self, c):
            for _ in range(self._test_num_blocks):
                a.write(self._test_block)
                test_crc = crc32c(self._test_block, test_crc)
            a.setCompleted()

        # Read the checksum from c
        cChecksum = int(droputils.allDropContents(c))

        self.assertNotEqual(a.checksum, 0)
        self.assertEqual(a.checksum, test_crc)
        self.assertEqual(cChecksum, test_crc)
예제 #7
0
def bep42_prefix(
        ip, crc32_salt,
        first_node_bits):  # first_node_bits determines the last 3 bits
    from crc32c import crc32c
    ip_asint = decode_uint32(encode_ip(ip))
    value = crc32c(
        bytearray(
            encode_uint32((ip_asint & 0x030f3fff)
                          | ((crc32_salt & 0x7) << 29))))
    return (value & 0xfffff800) | ((first_node_bits << 8) & 0x00000700)
예제 #8
0
def crc32c_file_checksum(filepath, ftype):
    """
    Calculates the CRC32C checksum of a file locally
    :param ftype: 'dir' or 'file'
    :param filepath: local absolute filepath
    :return:
    """
    if ftype == 'dir':
        return None
    buf = open(filepath, 'rb').read()
    ret = "%08x" % (crc32c.crc32c(buf) & 0xFFFFFFFF)
    return ret
예제 #9
0
def get_source_hashes_CRC32C(what):
    blocksize = 128 * 256
    for i in what:
        with Path(i[0]).open("rb") as file:
            crcvalue = 0
            while True:
                buf = file.read(blocksize)
                if not buf:
                    break
                crcvalue = (crc32c(buf, crcvalue) & 0xffffffff)
            hashstring = f'{crcvalue:x}'
        i[6] = hashstring
예제 #10
0
 def _read_png_section(self, f):
     section_length = int.from_bytes(f.read(4), byteorder='big',signed=False)
     section_type = f.read(4)
     if section_type == b'':
         raise EOF()
     section_content = f.read(section_length)
     section_crc=f.read(4)
     if crc32c(section_content) != section_crc:
         # print(int(section_crc.hex(),16))
         # print(crc32c(section_content))
         # raise InvalidCRCError(f"{self._source_file_path} section {section_type.decode('latin-1')} has invalid CRC -> data is probably corrupted")
         pass
     
     return (section_type.decode('latin-1'), section_content)
예제 #11
0
def generate_flash(app_eui: int, dev_eui: int, app_key: bytes, board_id: int,
                   board_version: int) -> bytes:
    flash_before_crc = EepromContents(
        crc=0,
        board_id=board_id,
        board_version=board_version,
        app_eui=app_eui,
        dev_eui=dev_eui,
        app_key=app_key,
    )

    # Calculate CRC over all but the CRC bytes
    binary_before_crc = struct.pack(BLOCK_FORMAT, *flash_before_crc)
    flash = flash_before_crc._replace(crc=crc32c.crc32c(
        binary_before_crc[CRC_SIZE:]), )

    # And pack again with the right CRC set
    return struct.pack(BLOCK_FORMAT, *flash)
예제 #12
0
    def dict2buf(self, data, compress=None, tobytesfn=None):
        """Structure [ index length, sorted index, data ]"""
        labels = np.array([int(lbl) for lbl in data.keys()], dtype=self.dtype)
        labels.sort()

        out = np.zeros((len(labels), ), dtype=np.uint64)
        eytzinger_sort(labels, out)
        labels = out

        N = len(labels)
        N_region = N.to_bytes(4, byteorder="little", signed=False)

        compress = compression.normalize_encoding(compress)
        compress_header = nvl(compress, "none")

        header = (MAGIC_NUMBERS + bytes([FORMAT_VERSION]) +
                  compress_header.zfill(4).encode("ascii") + N_region)

        if N == 0:
            return header

        index_length = 2 * N
        index = np.zeros((index_length, ), dtype=self.dtype)
        index[::2] = labels

        noop = lambda x: x
        tobytesfn = nvl(tobytesfn, self.tobytesfn, noop)

        bytes_data = {
            label: compression.compress(tobytesfn(val), method=compress)
            for label, val in data.items()
        }
        for label in bytes_data:
            bytes_data[label] += crc32c.crc32c(bytes_data[label]).to_bytes(
                4, byteorder='little')

        data_region = b"".join((bytes_data[label] for label in labels))
        index[1] = HEADER_LENGTH + index_length * 8
        for i, label in zip(range(1, len(labels)), labels):
            index[i * 2 +
                  1] = index[(i - 1) * 2 + 1] + len(bytes_data[labels[i - 1]])

        return b"".join([header, index.tobytes(), data_region])
def calc_checksum(update: Message) -> int:
    # BOLT #7: The checksum of a `channel_update` is the CRC32C checksum as
    # specified in [RFC3720](https://tools.ietf.org/html/rfc3720#appendix-B.4)
    # of this `channel_update` without its `signature` and `timestamp` fields.
    bufio = io.BytesIO()
    update.write(bufio)
    buf = bufio.getvalue()

    # BOLT #7:
    # 1. type: 258 (`channel_update`)
    # 2. data:
    #     * [`signature`:`signature`]
    #     * [`chain_hash`:`chain_hash`]
    #     * [`short_channel_id`:`short_channel_id`]
    #     * [`u32`:`timestamp`]
    #     * [`byte`:`message_flags`]

    # Note: 2 bytes for `type` field
    return crc32c.crc32c(buf[2 + 64:2 + 64 + 32 + 8] +
                         buf[2 + 64 + 32 + 8 + 4:])
예제 #14
0
 def put(self, source, target):
     last_ex = None
     for _repeat in range(6):
         try:
             key = self.handle.blob(target, chunk_size=self.CHUNK_SIZE)
             with open(source, "rb") as blob_file:
                 crc32 = crc32c.crc32c(blob_file.read())
             key.crc32c = self.crc32c_hash_b64encode(crc32)
             key.upload_from_filename(source)
             break
         except (IOError, BadStatusLine, exceptions.GCloudError,
                 exceptions.BadRequest) as ex:
             sleep(_repeat * 2 + 1)
             self._reconnect(self.name)
             last_ex = ex
         except Exception as ex:
             last_ex = ex
     else:
         raise Exception(
             "Object {} cannot put into the bucket {}: {}!".format(
                 source, self.handle.id, str(last_ex)))
예제 #15
0
파일: test_drop.py 프로젝트: ICRAR/daliuge
    def _test_write_withDropType(self, dropType):
        """
        Test an AbstractDROP and a simple AppDROP (for checksum calculation)
        """
        a = dropType("oid:A", "uid:A", expectedSize=self._test_drop_sz * ONE_MB)
        b = SumupContainerChecksum("oid:B", "uid:B")
        c = InMemoryDROP("oid:C", "uid:C")
        b.addInput(a)
        b.addOutput(c)

        test_crc = 0
        with DROPWaiterCtx(self, c):
            for _ in range(self._test_num_blocks):
                a.write(self._test_block)
                test_crc = crc32c(self._test_block, test_crc)

        # Read the checksum from c
        cChecksum = int(droputils.allDropContents(c))

        self.assertNotEqual(a.checksum, 0)
        self.assertEqual(a.checksum, test_crc)
        self.assertEqual(cChecksum, test_crc)
예제 #16
0
파일: test_dm.py 프로젝트: ICRAR/daliuge
    def test_run_streaming_consumer_remotely(self):
        """
        A test that checks that a streaming consumer works correctly across
        node managers when its input is in a different node, like this:

        DM #1                 DM #2
        ==================    ==============
        | A --> B --> C -|----|--> D --> E |
        ==================    ==============

        Here B is anormal application and D is a streaming consumer of C.
        We use A and E to compare that all data flows correctly.
        """

        g1 = [
            memory("A"),
            {
                "oid": "B",
                "type": "app",
                "app": "dlg.apps.simple.CopyApp",
                "inputs": ["A"],
                "outputs": ["C"],
            },
            memory("C"),
        ]
        g2 = [
            {
                "oid": "D",
                "type": "app",
                "app": "dlg.apps.crc.CRCStreamApp",
                "outputs": ["E"],
            },
            memory("E"),
        ]
        rels = [DROPRel("C", DROPLinkType.STREAMING_INPUT, "D")]
        a_data = os.urandom(32)
        e_data = str(crc32c(a_data, 0)).encode("utf8")
        self._test_runGraphInTwoNMs(g1, g2, rels, a_data, e_data, leaf_oid="E")
예제 #17
0
    def _test_socket_listener(self, **kwargs):
        """
        A simple test to check that SocketListenerApps are indeed working as
        expected; that is, they write the data they receive into their output,
        and finish when the connection is closed from the client side

        The data flow diagram looks like this:

        A --> B --> C --> D
        """

        host = "127.0.0.1"
        port = 9933
        data = os.urandom(1025)

        a = SocketListenerApp("oid:A", "uid:A", host=host, port=port, **kwargs)
        b = InMemoryDROP("oid:B", "uid:B")
        c = SumupContainerChecksum("oid:C", "uid:C")
        d = InMemoryDROP("oid:D", "uid:D")
        a.addOutput(b)
        b.addConsumer(c)
        c.addOutput(d)

        # Create the socket, write, and close the connection, allowing
        # A to move to COMPLETED
        with DROPWaiterCtx(self, d, 3):  # That's plenty of time
            a.async_execute()
            utils.write_to(host, port, data, 1)

        for drop in [a, b, c, d]:
            self.assertEqual(DROPStates.COMPLETED, drop.status)

        # Our expectations are fulfilled!
        bContents = droputils.allDropContents(b)
        dContents = int(droputils.allDropContents(d))
        self.assertEqual(data, bContents)
        self.assertEqual(crc32c(data, 0), dContents)
예제 #18
0
파일: dht.py 프로젝트: FredStober/tinyBT
def bep42_prefix(ip, crc32_salt, first_node_bits): # first_node_bits determines the last 3 bits
	from crc32c import crc32c
	ip_asint = decode_uint32(encode_ip(ip))
	value = crc32c(bytearray(encode_uint32((ip_asint & 0x030f3fff) | ((crc32_salt & 0x7) << 29))))
	return (value & 0xfffff800) | ((first_node_bits << 8) & 0x00000700)
def masked_crc32c(data):
    x = u32(crc32c(data))
    return u32(((x >> 15) | u32(x << 17)) + 0xa282ead8)
예제 #20
0
def _masked_crc32c(data):
    x = _u32(crc32c(data))
    return _u32(((x >> 15) | _u32(x << 17)) + 0xa282ead8)
예제 #21
0
 def init(cls, request, metadata, media, bucket, is_destination, context):
     if context is None:
         instruction = request.headers.get("x-goog-testbench-instructions")
         if instruction == "inject-upload-data-error":
             media = utils.common.corrupt_media(media)
     timestamp = datetime.datetime.now(datetime.timezone.utc)
     metadata.bucket = bucket.name
     metadata.generation = random.getrandbits(63)
     metadata.metageneration = 1
     metadata.id = "%s/o/%s#%d" % (
         metadata.bucket,
         metadata.name,
         metadata.generation,
     )
     metadata.size = len(media)
     actual_md5Hash = base64.b64encode(
         hashlib.md5(media).digest()).decode("utf-8")
     if metadata.md5_hash != "" and actual_md5Hash != metadata.md5_hash:
         utils.error.mismatch("md5Hash", metadata.md5_hash, actual_md5Hash,
                              context)
     actual_crc32c = crc32c.crc32c(media)
     if metadata.HasField(
             "crc32c") and actual_crc32c != metadata.crc32c.value:
         utils.error.mismatch("crc32c", metadata.crc32c.value,
                              actual_crc32c, context)
     metadata.md5_hash = actual_md5Hash
     metadata.crc32c.value = actual_crc32c
     metadata.time_created.FromDatetime(timestamp)
     metadata.updated.FromDatetime(timestamp)
     metadata.owner.entity = utils.acl.get_object_entity("OWNER", context)
     metadata.owner.entity_id = hashlib.md5(
         metadata.owner.entity.encode("utf-8")).hexdigest()
     algorithm, key_b64, key_sha256_b64 = utils.csek.extract(
         request, False, context)
     if algorithm != "":
         utils.csek.check(algorithm, key_b64, key_sha256_b64, context)
         metadata.customer_encryption.encryption_algorithm = algorithm
         metadata.customer_encryption.key_sha256 = key_sha256_b64
     default_projection = CommonEnums.Projection.NO_ACL
     is_uniform = bucket.iam_configuration.uniform_bucket_level_access.enabled
     bucket.iam_configuration.uniform_bucket_level_access.enabled = False
     if len(metadata.acl) != 0:
         default_projection = CommonEnums.Projection.FULL
     else:
         predefined_acl = utils.acl.extract_predefined_acl(
             request, is_destination, context)
         if (predefined_acl == CommonEnums.PredefinedObjectAcl.
                 PREDEFINED_OBJECT_ACL_UNSPECIFIED):
             predefined_acl = (
                 CommonEnums.PredefinedObjectAcl.OBJECT_ACL_PROJECT_PRIVATE)
         elif predefined_acl == "":
             predefined_acl = "projectPrivate"
         elif is_uniform:
             utils.error.invalid(
                 "Predefined ACL with uniform bucket level access enabled",
                 context)
         cls.__insert_predefined_acl(metadata, bucket, predefined_acl,
                                     context)
     bucket.iam_configuration.uniform_bucket_level_access.enabled = is_uniform
     return (
         cls(metadata, media, bucket),
         utils.common.extract_projection(request, default_projection,
                                         context),
     )
예제 #22
0
#!/usr/bin/env python2
import crc32c
import struct

STAGE1_SECTORS = 28

with open("build/stage1/stage1.bin", "r") as f:
    data = f.read()
padded_size = STAGE1_SECTORS * 512 - 4
assert len(data) <= padded_size
data += chr(0) * (padded_size - len(data))
data += struct.pack("<I", crc32c.crc32c(data))
with open("build/stage1.bin", "w") as f:
    f.write(data)
예제 #23
0
def get_modulo_value(experiment, user_id):
    # type: (str, Union[str, int]) -> int
    return crc32c(str(user_id).encode(), crc32c(experiment.encode())) % 100
예제 #24
0
def masked_crc32c(data):
    # mast function defined in: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/lib/hash/crc32c.h#L40
    kMaskDelta = 0xa282ead8
    x = u32(crc32c(data))
    return u32(((x >> 15) | u32(x << 17)) + kMaskDelta)
예제 #25
0
def _masked_crc32c(data):
    x = _u32(crc32c(data))
    return _u32(((x >> 15) | _u32(x << 17)) + 0xA282EAD8)
예제 #26
0
def wfp_for_contents(file: str, contents: bytes):
    file_md5 = hashlib.md5(contents).hexdigest()
    # Print file line
    wfp = 'file={0},{1},{2}\n'.format(file_md5, len(contents), file)
    # We don't process snippets for binaries.
    if skip_snippets(contents.decode('utf-8', 'ignore'), file):
        return wfp
    # Initialize variables
    gram = ""
    window = []
    normalized = 0
    line = 1
    min_hash = MAX_CRC32
    last_hash = MAX_CRC32
    last_line = 0
    output = ""

    # Otherwise recurse src_content and calculate Winnowing hashes
    for byte in contents:

        if byte == ASCII_LF:
            line += 1
            normalized = 0
        else:
            normalized = normalize(byte)

        # Is it a useful byte?
        if normalized:

            # Add byte to gram
            gram += chr(normalized)

            # Do we have a full gram?
            if len(gram) >= GRAM:
                gram_crc32 = crc32c(gram.encode('ascii'))
                window.append(gram_crc32)

                # Do we have a full window?
                if len(window) >= WINDOW:

                    # Select minimum hash for the current window
                    min_hash = min(window)

                    # Is the minimum hash a new one?
                    if min_hash != last_hash:

                        # Hashing the hash will result in a better balanced resulting data set
                        # as it will counter the winnowing effect which selects the "minimum"
                        # hash in each window
                        crc = crc32c((min_hash).to_bytes(4,
                                                         byteorder='little'))
                        crc_hex = '{:08x}'.format(crc)
                        if last_line != line:
                            if output:
                                wfp += output + '\n'
                            output = "%d=%s" % (line, crc_hex)
                        else:
                            output += ',' + crc_hex

                        last_line = line
                        last_hash = min_hash

                    # Shift window
                    window.pop(0)

                # Shift gram
                gram = gram[1:]

    if output:
        wfp += output + '\n'

    return wfp
예제 #27
0
def wfp_for_file(file: str, path: str) -> str:
    """ Returns the WFP for a file by executing the winnowing algorithm over its contents.

  Parameters
  ----------
  file: str
    The name of the file
  path : str
    The full contents of the file as a byte array.
  """
    contents = None
    binary = False

    with open(path, 'rb') as f:
        contents = f.read()

    file_md5 = hashlib.md5(contents).hexdigest()
    # Print file line
    wfp = 'file={0},{1},{2}\n'.format(file_md5, len(contents), file)
    # We don't process snippets for binaries.
    if is_binary(path) or skip_snippets(contents.decode(), file):
        return wfp
    # Initialize variables
    gram = ""
    window = []
    normalized = 0
    line = 1
    min_hash = MAX_CRC32
    last_hash = MAX_CRC32
    last_line = 0
    output = ""

    # Otherwise recurse src_content and calculate Winnowing hashes
    for byte in contents:

        if byte == ASCII_LF:
            line += 1
            normalized = 0
        else:
            normalized = normalize(byte)

        # Is it a useful byte?
        if normalized:

            # Add byte to gram
            gram += chr(normalized)

            # Do we have a full gram?
            if len(gram) >= GRAM:
                gram_crc32 = crc32c(gram.encode('ascii'))
                window.append(gram_crc32)

                # Do we have a full window?
                if len(window) >= WINDOW:

                    # Select minimum hash for the current window
                    min_hash = min(window)

                    # Is the minimum hash a new one?
                    if min_hash != last_hash:

                        # Hashing the hash will result in a better balanced resulting data set
                        # as it will counter the winnowing effect which selects the "minimum"
                        # hash in each window
                        crc = crc32c((min_hash).to_bytes(4,
                                                         byteorder='little'))
                        crc_hex = '{:08x}'.format(crc)
                        if last_line != line:
                            if output:
                                wfp += output + '\n'
                            output = "%d=%s" % (line, crc_hex)
                        else:
                            output += ',' + crc_hex

                        last_line = line
                        last_hash = min_hash

                    # Shift window
                    window.pop(0)

                # Shift gram
                gram = gram[1:]

    if output:
        wfp += output + '\n'

    return wfp
예제 #28
0
 def write(self, chunk):
     self._fileobj.write(chunk)
     self.crc32 = crc32c.crc32c(chunk, self.crc32)