Esempio n. 1
0
 def mask_to_register(mask):
     mask = Mask.as_immediate(mask)
     if mask in maskcache:
         maskcache.move_to_end(mask)
         return maskcache[mask]
     try:
         maskreg = MaskRegister(64, mask)
     except AllocationError:
         _, maskreg = maskcache.popitem(False)
     x86.mov(maskreg, mask)
     maskcache[mask] = maskreg
     return maskreg
Esempio n. 2
0
def square_350_701(dst, src):
    """ Requires source and destination registers to be disjunct. """
    r = src
    r_out = dst

    maskreg = MaskRegister()
    lowbitmask = Mask('0' * 255 + '1')
    x86.vmovdqa(maskreg, lowbitmask)

    lowbitreg = Register()
    x86.vpand(lowbitreg, maskreg, r[0])
    x86.vpandn(r[0], maskreg, r[0])

    rest = Register()
    twobits = Register()
    nexttwobits = Register()
    mask0001 = Mask('0001')
    x86.vmovdqa(maskreg, mask0001)

    for i in range(2, -1, -1):
        x86.vpsllq(rest, r[i], 2)
        x86.vpsrlq(twobits, r[i], 62)
        x86.vpermq(twobits, twobits, '10010011')
        x86.vpand(nexttwobits, maskreg, twobits)
        x86.vpandn(twobits, maskreg, twobits)
        x86.vpxor(r[i], rest, twobits)
        if i + 1 < 3:
            x86.vpxor(r[i + 1], r[i + 1], nexttwobits)

    mask_bit_in_byte = [
        Mask(32 * ([ZERO] * i + [ONE] + [ZERO] * (7 - i))) for i in range(8)
    ]
    bits = Register()
    accum = Register()

    for i in range(2, -1, -1):
        for j in range(8):
            x86.vpand(bits, r[i], mask_bit_in_byte[j])
            if j == 0:
                x86.vpshlq(accum, bits, 7 - 2 * j)
            else:
                x86.vpshlq(bits, bits, 7 - 2 * j)
                if j == 7:
                    x86.vpxor(r[i], accum, bits)
                else:
                    x86.vpxor(accum, accum, bits)

    x86.vpermq(lowbitreg, lowbitreg, '11001111')
    x86.vpshlq(lowbitreg, lowbitreg, 56)
    x86.vpxor(r[2], lowbitreg, r[2])

    indices = IndicesMask(
        list(range(15, -1, -1)) + [None] * 8 + list(range(7, -1, -1)))
    x86.vpshufb(r_out[2], r[0], indices)
    x86.vpermq(r_out[2], r_out[2], '10010011')

    t1 = Register()

    for i in range(2):
        indices = IndicesMask([None] * 24 + list(range(15, 7, -1)))
        x86.vpshufb(r_out[1 - i], r[i], indices)
        indices = IndicesMask(
            list(range(15, -1, -1)) + list(range(7, -1, -1)) + [None] * 8)
        x86.vpshufb(t1, r[i + 1], indices)
        x86.vpxor(r_out[1 - i], t1, r_out[1 - i])
        x86.vpermq(r_out[1 - i], r_out[1 - i], '11010010')