def do_it(o):
    #meat = P.cutoff_comments(g_mul0)
    #meat = P.cutoff_comments(g_mul0_only)
    #meat = P.cutoff_comments(g_mul0_only_immediate_writes)
    #meat = P.cutoff_comments(g_mul0_only_adcx)
    #meat = P.cutoff_comments(g_mul0_only_adox_imm_writes)

    if 0:
        p = list(range(12))
        meat += mul1_code(3, P.cutoff_comments(g_muladd_3), p)
        m4 = P.cutoff_comments(g_muladd_4)
        meat += mul1_code(4, m4, p)
        m5 = P.swap_adox_adcx(m4)
        q = [int(x, 16) for x in g_perm.split(' ')]
        p = P.composition(p, q)
        meat += mul1_code(5, m5, p)
        p = P.composition(p, q)
        meat += mul1_code(6, m4, p)
        p = P.composition(p, q)
        meat += mul1_code(7, m5, p)
        # benchmark only mul0 and tail
        #meat += P.cutoff_comments(g_tail_after_mul0_delayed_writes)
        #meat += P.cutoff_comments(g_tail_after_mul0_immediate_writes)

    code = P.cutoff_comments(g_mul01) + P.cutoff_comments(g_mul01_tail)
    code = '\n'.join(code)
    code = P.replace_symbolic_names_wr(code, g_var_map)
    code = code.split('\n')
    code = [replace_ymm_by_xmm(x) for x in code]

    cook_asm(o, code, 'mul8x2_zen')
def alignment_code(shift):
    p = list(range(12))
    r = muladd_code(0, P.cutoff_comments(g_mul_01), p, shift)
    r += muladd_code(2, P.cutoff_comments(g_mul_2), p, shift)
    r += muladd_code(3, P.cutoff_comments(g_mul_3), p, shift)
    r += muladd_code(4, P.cutoff_comments(g_mul_4), p, shift)
    return r
def alignment_code(shift):
    p = list(range(12))
    r = mul_code(0, P.cutoff_comments(g_mul01), p, shift)
    q = [int(x, 16) for x in g_perm.split(' ')]
    m = P.cutoff_comments(g_mul2)
    for i in range(2, 6):
        r += mul_code(i, m, p, shift)
        p = P.composition(p, q)
    return r
Example #4
def do_it(o):
    meat = P.cutoff_comments(g_mul0)
    muladd = P.cutoff_comments(g_muladd)
    tail = P.cutoff_comments(g_tail)

    for i in range(1, 6):
        meat += mul1_code(i, muladd)
    meat += tail
    P.cook_asm(o, meat, g_var_map, True)
def do_it(o):
    code = P.cutoff_comments(g_preamble)
    mul_01 = P.cutoff_comments(g_mul01)
    code += alignment_code(8)
    code += P.g_std_end + ['retq', 'align0:']
    code += P.cutoff_comments(g_load0)
    code += alignment_code(0)

    P.cook_asm(o, code, E.g_var_map, True)
Example #6
def do_it(name, o):
    meat = P.cutoff_comments(g_mul_01)
    p = list(range(12))
    meat += mul1_code(2, P.cutoff_comments(g_muladd_2), p)
    q = [int(x, 16) for x in g_perm.split(' ')]
    for i in range(3, 8):
        p = P.composition(p, q)
        meat += mul1_code(i, P.cutoff_comments(g_muladd_2), p)

    cook_asm(name, o, meat)
Example #7
def do_it(o, mul_01, muladd, tail, perm, var_map):
    p = list(range(12))
    code = mul_add_code(0, P.cutoff_comments(mul_01), p)
    mm = P.cutoff_comments(muladd)
    code += mul_add_code(2, mm, p)
    q = [int(x, 16) for x in perm.split(' ')]
    for i in range(3, 6):
        p = P.composition(p, q)
        code += mul_add_code(i, mm, p)
    j = [j for j in range(len(mm)) if mm[j].find('mulx 48(') != -1][0]
    mm = mm[:j] + P.cutoff_comments(tail)
    p = P.composition(p, q)
    code += mul_add_code(6, mm, p)
    P.cook_asm(o, code, var_map, True)
def form_m9(src, amd):
    if amd:
        rez = P.cutoff_comments(g_mul_9_bwl_piece0) + src
        p = 'adc3(rr[i+4], sC, s8)'
        i = [i for i in range(len(rez)) if rez[i].find(p) != -1][0] + 1
        rez = rez[:i] + P.cutoff_comments(g_mul_9_zen_piece2)
        rez = P.cutoff_comments(g_mul_9_bwl_piece0) + src
        i = [
            i for i in range(len(rez)) if rez[i].find('movq s3, rr[i+4]') != -1
        i = i[0] + 1
        rez = rez[:i] + P.cutoff_comments(g_mul_9_bwl_piece2)
    rez = inject(rez, 'mulx sp[3]', g_mul_9_bwl_piece1)
    return rez
def mul_code(i, jj_arg, p, align):
    if i:
        rr = ['# mul_add %s' % i]
        rr = []

    if i == 5:
        jj = jj_arg[:-1] + P.cutoff_comments(g_tail)
        jj = jj_arg

    for j in jj:
        m = g_patt.match(j)
        if m:
            u, v =,
            if v == 'i+1':
                k = extract_v(i + 1, u, align)
                k = extract_v(int(v), u, align)
            if k:

    for y in range(len(rr)):
        src = E.apply_s_permutation(rr[y], p)
        for x in range(1, 9):
            ' replace i+x with 8*(i+x) '
            src = src.replace('i+%s(' % x, '%s(' % (8 * (i + x)))
        ' replace i with 8*i '
        src = src.replace('i(', '%s(' % (8 * i))
        rr[y] = src.rstrip()

    return rr
def chew_code(src, amd, i, aligned, p):
    if not isinstance(src, list):
        src = P.cutoff_comments(src)

    if i:
        rr = ['# mul_add %s' % i]
        if p:
            e = '# '
            for x in range(len(p)):
                e += 's%X->W%X ' % (x, p[x])
        rr = []

    for j in src:
        k = evaluate_row(j, i, amd, aligned)
        if k and (k != [None]) and (k != ['']):
            rr += k

    if p:
        re = []
        for x in rr:
            if x[0] == '#':
                re.append(E.apply_s_permutation(x, p))
        return re
    return rr
Example #11
def cook_tail(cc):
    rr = []
    for c in cc:
        if c.find(', t1') != -1:
    return rr + P.cutoff_comments(g_tail)
def chew_code(src, i, aligned, p):
    if not isinstance(src, list):
        src = P.cutoff_comments(src)

    if i:
        rr = ['# mul_add %s' % i]
        rr = []

    for j in src:
        for k in evaluate_row(j, i, aligned):
            if k:
                if k == 'jmp tail':

    if not p:
        return rr
    re = []
    for x in rr:
        if x[0] == '#':
            re.append(E.apply_s_permutation(x, p))
    return re
def alignment_code(alignment):
    if alignment:
        code = []
        code = chew_code(g_load_0, None, 0, None)

    code += chew_code(g_mul_01, 0, alignment, None)
    code += chew_code(g_mul_2, 2, alignment, None)
    p = list(range(0xC + 1))
    q = [int(x, 16) for x in g_perm.split(' ')]
    m3 = P.cutoff_comments(g_mul_3)
    for i in range(3, 11):
        if alignment and (i == 10):
        fresh = chew_code(m3, i, alignment, p)
        if i == 9:
            fresh = F.remove_after_jmp(fresh)
        code += fresh
        if i == 10:
        p = P.composition(p, q)
    if not alignment:
        code.append('# tail')
        code += chew_code(g_tail, 10, alignment, p)[1:]
    return code
def do_it(o, code, var_map):
    code = '\n'.join(P.cutoff_comments(code))
    code = P.replace_symbolic_names_wr(code, var_map)
    comment = P.g_autogenerated_patt % os.path.basename(sys.argv[0])
    o.write(comment.replace('//', '#'))
    P.write_asm_procedure_header(o, P.guess_subroutine_name(sys.argv[1]))
    P.write_asm_inside(o, code + '\nretq')
def mul1_code(i, jj, p):
    rr = ['# mul_add %s' % i]
    for j in jj:
        if j == 'extract v[i+1]':
            rr.append(extract_code(i + 1))
        if j == 'shift v47':
            # only needed once for i = 4
            if i == 4:
                rr.append('vperm2i128 $0x81,v47,v47,v47')

    # for i=7, append tail code
    if i == 7:
        rr += P.cutoff_comments(g_tail)

    # apply permutation p, replace i(rp)
    for y in range(len(rr)):
        src = rr[y]
        for x in range(12):
            a = '%X' % x
            b = '%X' % p[x]
            src = re.sub(r'\bs%s\b' % a, 'w' + b, src)
        src += ' '
        for x in range(1, 9):
            ' replace i+x with 8*(i+x) '
            src = src.replace('i+%s(' % x, '%s(' % (8 * (i + x)))
        ' replace i with 8*i '
        src = src.replace('i(', '%s(' % (8 * i)) + ' '
        rr[y] = src.rstrip()

    return rr
def mul1_code(i, jj, p):
    rr = ['# mul_add %s' % i]
    for j in jj:
        if j == 'extract v[i+1]':
            rr.append(extract_code(i + 1))
        j = replace_rz(j)
        if (i == 7) and (j.find('dd') != -1):
            # no need to update dd

    # for i=7, append tail code
    if i == 7:
        rr += P.cutoff_comments(g_tail)

    # apply permutation p, replace i(rp)
    for y in range(len(rr)):
        src = rr[y]
        for x in range(12):
            a = '%X' % x
            b = '%X' % p[x]
            src = re.sub(r'\bs%s\b' % a, 'w' + b, src)
        src += ' '
        for x in range(1, 9):
            ' replace i+x with 8*(i+x) '
            src = src.replace('i+%s(' % x, '%s(' % (8 * (i + x)))
        ' replace i with 8*i '
        src = src.replace('i(', '%s(' % (8 * i)) + ' '
        rr[y] = src.rstrip()

    return rr
Example #17
def mul1_code(i, jj, p):
    rr = ['# mul_add %s' % i]
    for j in jj:
        if j.find(':=v[i+1]') != -1:
            j = extract_v(i + 1, j[:2])
        elif j == 'shift v47':
            j = shift_v(i + 1)
        if not j:

    if i == 7:
        rr += P.cutoff_comments(g_tail)

    # apply permutation p, replace i(rp)
    for y in range(len(rr)):
        src = rr[y]
        for x in range(12):
            a = '%X' % x
            b = '%X' % p[x]
            src = re.sub(r'\bs%s\b' % a, 'w' + b, src)
        src += ' '
        for x in range(1, 9):
            ' replace i+x with 8*(i+x) '
            src = src.replace('i+%s(' % x, '%s(' % (8 * (i + x)))
        ' replace i with 8*i '
        src = src.replace('i(', '%s(' % (8 * i)) + ' '
        rr[y] = src.rstrip()

    return rr
def do_it(o):
    mul_01 = P.cutoff_comments(E.g_mul_01)
    mul_01 = P.starting_from(mul_01, 'mulx')
    mul_01 = P.replace_in_string_array(mul_01, 'pextrq $0x1, t0, w8',
    code = P.cutoff_comments(g_preamble)
    code += replace_extract_v(mul_01, 8)
    code += alignment_code(8)
    # TODO: is it possible to shorten binary code by jmp to common part?
    code += P.g_std_end + ['retq', 'align0:']

    code += P.cutoff_comments(g_load0)
    code += replace_extract_v(mul_01, 0)
    code += alignment_code(0)

    P.cook_asm(o, code, E.g_var_map, True)
def do_it(o):
    meat = mul0_code(P.cutoff_comments(g_mul_012))

    p = list(range(12))
    meat += mul1_code(3, P.cutoff_comments(g_muladd_3), p)
    m4 = P.cutoff_comments(g_muladd_4)
    meat += mul1_code(4, m4, p)
    m5 = P.swap_adox_adcx(m4)
    q = [int(x, 16) for x in g_perm.split(' ')]
    p = P.composition(p, q)
    meat += mul1_code(5, m5, p)
    p = P.composition(p, q)
    meat += mul1_code(6, m4, p)
    p = P.composition(p, q)
    meat += mul1_code(7, m5, p)

    cook_asm(o, meat)
Example #20
def do_it(o):
    meat = P.cutoff_comments(g_mul_01)
    p = list(range(12))
    m2 = P.cutoff_comments(g_muladd_2)
    m3 = P.swap_adox_adcx(m2)
    meat += mul1_code(2, m2, p)
    q = [int(x, 16) for x in g_perm.split(' ')]
    for i in range(3, 6):
        p = P.composition(p, q)
        if i & 1:
            meat += mul1_code(i, m3, p)
            meat += mul1_code(i, m2, p)

    tail = cook_tail(m2)
    p = P.composition(p, q)
    meat += mul1_code(6, tail, p)

    P.cook_asm(o, meat, g_var_map, True)
Example #21
def do_4arg(o):
    comment = P.g_autogenerated_patt % os.path.basename(sys.argv[0])
    o.write(comment.replace('//', '#'))
    P.write_asm_procedure_header(o, g_subroutine)
    code = P.cutoff_comments(g_code)
    code = '\n'.join(code)
    symb = dict()
    for v_k in g_vars_map.split(' '):
        v, k = v_k.split(',')
        symb[k] = '%' + v
    code = P.replace_symbolic_vars_name(code, symb)
    P.write_asm_inside(o, code)
def v_alignment_code(amd, v_alignment):
    aligned = not v_alignment

    if aligned:
        code = chew_code(g_v_load_0, amd, 0, aligned, None)
        code = []
    code += chew_code(g_mul_0, amd, 0, aligned, None)

    if amd:
        m_1 = P.cutoff_comments(g_mul_1_zen)
        m_2 = P.cutoff_comments(g_mul_2_zen)
        m_A = P.cutoff_comments(g_mul_10_zen)
        q = [int(i, 16) for i in g_perm_zen.split(' ')]
        m_1 = P.cutoff_comments(g_mul_1_bwl)
        m_2 = P.cutoff_comments(g_mul_2_bwl)
        m_A = P.cutoff_comments(g_mul_10_bwl)
        q = [int(i, 16) for i in g_perm_bwl.split(' ')]
    m_9 = form_m9(m_2, amd)

    p = list(range(0xD + 1))

    code += chew_code(m_1, amd, 1, aligned, p)
    code += chew_code(m_2, amd, 2, aligned, p)
    for i in range(3, 9):
        p = P.composition(p, q)
        code += chew_code(m_2, amd, i, aligned, p)
    p = P.composition(p, q)
    code += chew_code(m_9, amd, 9, aligned, p)

    if aligned:
        code += chew_code(m_A, amd, 10, False, None)
        code += chew_code(m_A, amd, 10, True, None)
    return code
def do_it(o):
    meat = mul0_code(P.cutoff_comments(g_mul_01))
    p = list(range(12))
    meat += mul1_code(2, P.cutoff_comments(g_muladd_2), p)
    meat += mul1_code(3, P.cutoff_comments(g_muladd_3), p)
    meat += mul1_code(4, P.cutoff_comments(g_muladd_4), p)
    meat += mul1_code(5, P.cutoff_comments(g_muladd_5), p)
    meat += mul1_code(6, P.cutoff_comments(g_muladd_6), p)
    meat += mul1_code(7, P.cutoff_comments(g_muladd_7), p)

    cook_asm(o, meat)
def do_it(out):
    meat = P.cutoff_comments(g_mul1)
    muladd = P.cutoff_comments(g_muladd)
    tail = P.cutoff_comments(g_tail)

    xmm_save = P.save_registers(meat)

    permutation = list(range(11))
    s = [int(y) for y in g_permutation.split(' ')]
    for i in range(1, 7):
        meat += mul1_code(i, muladd, permutation).split('\n')
        # yy := composition of permutation and s: yy(i) == s(permutation(i))
        yy = [s[j] for j in permutation]
        permutation = yy
    tail = mul1_code(7, tail, permutation)
    for k,v in xmm_save.items():
        if v is None:
            tail = tail.replace('!restore ' + k, 'pop %s      | restore' % k)
            tail = tail.replace('!restore ' + k, 'movq %s, %s | restore' % (v, k))
    tail = tail.replace('!restore', '|restore')
    meat += tail.split('\n')
    cook_asm(out, '\n'.join(meat), xmm_save)
Example #25
def do_it(o):
    preamble = P.cutoff_comments(g_preamble)
    xmm_save = {}
    if not g_writing_macro:
        xmm_save = P.save_registers_in_xmm(preamble, 11)
    p = list(range(14))
    code = mul_code(0, preamble, p)
    m1 = P.cutoff_comments(g_mul_1)
    m2 = P.cutoff_comments(g_mul_2)
    code += mul_code(1, m1, p)
    q = [int(x, 16) for x in g_perm.split(' ')]
    for i in range(2, 5):
        code += mul_code(i, m2, p)
        p = P.composition(p, q)
    tail = m2[:-1]
    # extract rp as soon as s6 becomes useless
    tail = [tail[0], 'movq rp, s6'] + tail[1:] + P.cutoff_comments(g_tail)
    code += mul_code(5, tail, p)

    if not g_writing_macro:
        P.save_in_xmm(code, xmm_save)
        P.insert_restore(code, xmm_save)
    cook_asm(o, code, xmm_save, g_var_map)
Example #26
def alignment_code(alignment, extra):
    if alignment:
        code = chew_code(g_preamble, 0, extra, None, None)
        code = chew_code(g_load_0, None, extra, True, None)

    code += chew_code(g_mul_01, 0, extra, not alignment, None)
    m2 = P.cutoff_comments(g_mul_2)
    m7 = form_tail(m2, extra)
    p = list(range(0xB + 1))
    q = [int(i, 16) for i in g_perm.split(' ')]
    for i in range(2, 7):
        code += chew_code(m2, i, extra, not alignment, p)
        p = P.composition(p, q)
    if not alignment:
        code += chew_code(m7, 7, extra, None, p)
    return code
def alignment_code(alignment):
    if alignment:
        code = chew_code(g_preamble, 0, None, None)
        code = chew_code(g_load_0, None, True, None)

    code += chew_code(g_mul_01, 0, alignment, None)
    m2 = P.cutoff_comments(g_mul_2)
    tt = form_tail(m2)
    p = list(range(0xC + 1))
    q = [int(i, 16) for i in g_perm.split(' ')]
    for i in range(2, 10):
        code += chew_code(m2, i, alignment, p)
        p = P.composition(p, q)
    if not alignment:
        code += chew_code(tt, 10, None, p)
    return code
def alignment_code(shift):
    p = list(range(12))
    m2 = P.cutoff_comments(E.g_muladd_2)
    m3 = P.swap_adox_adcx(m2)
    code = mul1_code(2, m2, p, shift)
    q = [int(x, 16) for x in E.g_perm.split(' ')]
    for i in range(3, 6):
        p = P.composition(p, q)
        if i & 1:
            code += mul1_code(i, m3, p, shift)
            code += mul1_code(i, m2, p, shift)

    tail = E.cook_tail(m2)
    p = P.composition(p, q)
    code += mul1_code(6, tail, p, shift)

    return code
Example #29
def do_it(tgt, code, b_ofs):
    data = {
        'macro_name': P.guess_subroutine_name(sys.argv[1]),
        'scratch': ['w%s s%s' % (i, i) for i in range(4)],
        'vars_type': dict([('s%s' % i, 0) for i in range(4)]),
        'default_type': 'mp_limb_t',
        'input': ['rp r r_p', 'ap r a_p'],
        'clobber': 'memory cc',
        'source': os.path.basename(sys.argv[0]),
        'code_language': 'asm',
        'macro_parameters': 'r_p a_p',

    all_vars = P.extract_int_vars_name(data['scratch']) + ['ap', 'rp']
    code = '\n'.join([chew_line(x, b_ofs) for x in P.cutoff_comments(code)])
    for v in all_vars:
        code = re.sub(r'\b%s\b' % v, '%%[%s]' % v, code)

    P.write_cpp_code(tgt, code, data)
Example #30
def do_it(o, i_name):
    with open(i_name, 'rb') as i:
        code = '\n'.join(chew_code(i))
    comment = P.g_autogenerated_patt % os.path.basename(sys.argv[0])
    o.write(comment.replace('//', '#'))
    P.write_asm_procedure_header(o, g_hooligan)
    P.write_asm_inside(o, code)

    code = P.cutoff_comments(g_code)
    xmm_save = P.save_registers_in_xmm(code, 5)
    P.save_in_xmm(code, xmm_save)

    code = '\n'.join(code).replace('@call', 'call ' + g_hooligan)
    for k, v in xmm_save.items():
        code = code.replace('!restore ' + k, 'movq %s, %s' % (v, k))
    code = P.replace_symbolic_names_wr(code, g_var_map)

    P.write_asm_procedure_header(o, P.guess_subroutine_name(sys.argv[2]))
    P.write_asm_inside(o, code + '\nretq')