def v_alignment_code(amd, v_alignment): aligned = not v_alignment if aligned: code = chew_code(g_v_load_0, amd, 0, aligned, None) else: code = [] code += chew_code(g_mul_0, amd, 0, aligned, None) if amd: m_1 = P.cutoff_comments(g_mul_1_zen) m_2 = P.cutoff_comments(g_mul_2_zen) m_A = P.cutoff_comments(g_mul_10_zen) q = [int(i, 16) for i in g_perm_zen.split(' ')] else: m_1 = P.cutoff_comments(g_mul_1_bwl) m_2 = P.cutoff_comments(g_mul_2_bwl) m_A = P.cutoff_comments(g_mul_10_bwl) q = [int(i, 16) for i in g_perm_bwl.split(' ')] m_9 = form_m9(m_2, amd) p = list(range(0xD + 1)) code += chew_code(m_1, amd, 1, aligned, p) code += chew_code(m_2, amd, 2, aligned, p) for i in range(3, 9): p = P.composition(p, q) code += chew_code(m_2, amd, i, aligned, p) p = P.composition(p, q) code += chew_code(m_9, amd, 9, aligned, p) if aligned: code += chew_code(m_A, amd, 10, False, None) code += chew_code(m_A, amd, 10, True, None) return code
def show_postcondition(): p = list(range(0xD + 1)) q = [int(x, 16) for x in g_perm_bwl.split(' ')] b = '''s7 sC+s9+s5 s8+sB+s1" s3+sA s2+s0' s4 == s6 sD''' l = '''sD s7+sA+s4 s8+s5+s1" s6+s2 s9+s3' s0 == sC sB''' l9 = '''sD sA+s4+s2 s8+s5+sB" s6+s1 s9+s7' sC''' print 'Broadwell' for i in range(2, 10): print 'i=%X pre' % i, E.apply_s_permutation(b, p) if i == 9: print E.apply_s_permutation(l9, p) print 'i=%X pst' % i, E.apply_s_permutation(l, p) p = P.composition(p, q) p = list(range(0xD + 1)) b = '''sD sC+s9+sB sA+s4+s0 s7+s5+s2" s1| s3|' == s6 s8''' l = '''s3 sD+s8+s7 s9+s5+s6 s0+sA+sC" s2| sB|' == s1 s4''' l9 = '''sD s9 s0+sA s2+s7+s1 sB+s3|" s6|' == s4=v[10]''' q = [int(x, 16) for x in g_perm_zen.split(' ')] print '\nZen' for i in range(2, 10): print 'i=%X pre' % i, E.apply_s_permutation(b, p) if i == 9: print E.apply_s_permutation(l9, p) print 'i=%X pst' % i, E.apply_s_permutation(l, p) p = P.composition(p, q)
def do_it(o): #meat = P.cutoff_comments(g_mul0) #meat = P.cutoff_comments(g_mul0_only) #meat = P.cutoff_comments(g_mul0_only_immediate_writes) #meat = P.cutoff_comments(g_mul0_only_adcx) #meat = P.cutoff_comments(g_mul0_only_adox_imm_writes) if 0: p = list(range(12)) meat += mul1_code(3, P.cutoff_comments(g_muladd_3), p) m4 = P.cutoff_comments(g_muladd_4) meat += mul1_code(4, m4, p) m5 = P.swap_adox_adcx(m4) q = [int(x, 16) for x in g_perm.split(' ')] p = P.composition(p, q) meat += mul1_code(5, m5, p) p = P.composition(p, q) meat += mul1_code(6, m4, p) p = P.composition(p, q) meat += mul1_code(7, m5, p) else: # benchmark only mul0 and tail #meat += P.cutoff_comments(g_tail_after_mul0_delayed_writes) #meat += P.cutoff_comments(g_tail_after_mul0_immediate_writes) pass code = P.cutoff_comments(g_mul01) + P.cutoff_comments(g_mul01_tail) code = '\n'.join(code) code = P.replace_symbolic_names_wr(code, g_var_map) code = code.split('\n') code = [replace_ymm_by_xmm(x) for x in code] cook_asm(o, code, 'mul8x2_zen')
def do_it(o, mul_01, muladd, tail, perm, var_map): p = list(range(12)) code = mul_add_code(0, P.cutoff_comments(mul_01), p) mm = P.cutoff_comments(muladd) code += mul_add_code(2, mm, p) q = [int(x, 16) for x in perm.split(' ')] for i in range(3, 6): p = P.composition(p, q) code += mul_add_code(i, mm, p) j = [j for j in range(len(mm)) if mm[j].find('mulx 48(') != -1][0] mm = mm[:j] + P.cutoff_comments(tail) p = P.composition(p, q) code += mul_add_code(6, mm, p) P.cook_asm(o, code, var_map, True)
def alignment_code(alignment): if alignment: code = [] else: code = chew_code(g_load_0, None, 0, None) code += chew_code(g_mul_01, 0, alignment, None) code += chew_code(g_mul_2, 2, alignment, None) p = list(range(0xC + 1)) q = [int(x, 16) for x in g_perm.split(' ')] m3 = P.cutoff_comments(g_mul_3) for i in range(3, 11): if alignment and (i == 10): break fresh = chew_code(m3, i, alignment, p) if i == 9: fresh = F.remove_after_jmp(fresh) code += fresh if i == 10: break p = P.composition(p, q) if not alignment: code.append('# tail') code += chew_code(g_tail, 10, alignment, p)[1:] return code
def do_it(o): meat = mul0_code(P.cutoff_comments(g_mul_012)) p = list(range(12)) meat += mul1_code(3, P.cutoff_comments(g_muladd_3), p) m4 = P.cutoff_comments(g_muladd_4) meat += mul1_code(4, m4, p) m5 = P.swap_adox_adcx(m4) q = [int(x, 16) for x in g_perm.split(' ')] p = P.composition(p, q) meat += mul1_code(5, m5, p) p = P.composition(p, q) meat += mul1_code(6, m4, p) p = P.composition(p, q) meat += mul1_code(7, m5, p) cook_asm(o, meat)
def alignment_code(shift): p = list(range(12)) m2 = P.cutoff_comments(E.g_muladd_2) m3 = P.swap_adox_adcx(m2) code = mul1_code(2, m2, p, shift) q = [int(x, 16) for x in E.g_perm.split(' ')] for i in range(3, 6): p = P.composition(p, q) if i & 1: code += mul1_code(i, m3, p, shift) else: code += mul1_code(i, m2, p, shift) tail = E.cook_tail(m2) p = P.composition(p, q) code += mul1_code(6, tail, p, shift) return code
def alignment_code(shift): p = list(range(12)) r = mul_code(0, P.cutoff_comments(g_mul01), p, shift) q = [int(x, 16) for x in g_perm.split(' ')] m = P.cutoff_comments(g_mul2) for i in range(2, 6): r += mul_code(i, m, p, shift) p = P.composition(p, q) return r
def do_it(o): meat = P.cutoff_comments(g_mul_01) p = list(range(12)) m2 = P.cutoff_comments(g_muladd_2) m3 = P.swap_adox_adcx(m2) meat += mul1_code(2, m2, p) q = [int(x, 16) for x in g_perm.split(' ')] for i in range(3, 6): p = P.composition(p, q) if i & 1: meat += mul1_code(i, m3, p) else: meat += mul1_code(i, m2, p) tail = cook_tail(m2) p = P.composition(p, q) meat += mul1_code(6, tail, p) P.cook_asm(o, meat, g_var_map, True)
def do_it(name, o): meat = P.cutoff_comments(g_mul_01) p = list(range(12)) meat += mul1_code(2, P.cutoff_comments(g_muladd_2), p) q = [int(x, 16) for x in g_perm.split(' ')] for i in range(3, 8): p = P.composition(p, q) meat += mul1_code(i, P.cutoff_comments(g_muladd_2), p) cook_asm(name, o, meat)
def show_postcondition(): p = list(range(0xB + 1)) q = [int(x, 16) for x in g_perm.split(' ')] b = '''s7 ^4+sB+s5 ^3+s8+sA" s6+s3 s2+s1' s0''' l = '''s6 ^5+sB+s0 ^4+s9+s2" s7+s8 sA+s4' s1''' for i in range(2, 8): print 'i=%X pre' % i, E.apply_s_permutation(b, p) if i < 7: print 'i=%X pst' % i, E.apply_s_permutation(l, p) else: k = ' '.join(['s%X' % j for j in range(0xB + 1)]) print k, '\n' + E.apply_s_permutation(k, p) p = P.composition(p, q)
def show_postcondition(): p = list(range(0xC + 1)) q = [int(x, 16) for x in g_perm.split(' ')] assert len(p) == len(q) b = '''sA sB+s2+s8 s7+sC+s0' s4+s3 s1+s5"''' l = '''sB s9+sC+s4 s2+s8+s5' s0+s7 s3+s6"''' for i in range(3, 11): print 'i=%X pre' % i, A.apply_s_permutation(b, p) if i == 10: break pst = A.apply_s_permutation(l, p) print 'i=%X pst' % i, pst pst = P.replace_symbolic_names_wr(pst, g_var_map).replace('%', '') print 'pst again', pst p = P.composition(p, q)
def alignment_code(alignment, extra): if alignment: code = chew_code(g_preamble, 0, extra, None, None) else: code = chew_code(g_load_0, None, extra, True, None) code += chew_code(g_mul_01, 0, extra, not alignment, None) m2 = P.cutoff_comments(g_mul_2) m7 = form_tail(m2, extra) p = list(range(0xB + 1)) q = [int(i, 16) for i in g_perm.split(' ')] for i in range(2, 7): code += chew_code(m2, i, extra, not alignment, p) p = P.composition(p, q) if not alignment: code += chew_code(m7, 7, extra, None, p) return code
def alignment_code(alignment): if alignment: code = chew_code(g_preamble, 0, None, None) else: code = chew_code(g_load_0, None, True, None) code += chew_code(g_mul_01, 0, alignment, None) m2 = P.cutoff_comments(g_mul_2) tt = form_tail(m2) p = list(range(0xC + 1)) q = [int(i, 16) for i in g_perm.split(' ')] for i in range(2, 10): code += chew_code(m2, i, alignment, p) p = P.composition(p, q) if not alignment: code += chew_code(tt, 10, None, p) return code
def do_it(o): preamble = P.cutoff_comments(g_preamble) xmm_save = {} if not g_writing_macro: xmm_save = P.save_registers_in_xmm(preamble, 11) p = list(range(14)) code = mul_code(0, preamble, p) m1 = P.cutoff_comments(g_mul_1) m2 = P.cutoff_comments(g_mul_2) code += mul_code(1, m1, p) q = [int(x, 16) for x in g_perm.split(' ')] for i in range(2, 5): code += mul_code(i, m2, p) p = P.composition(p, q) tail = m2[:-1] # extract rp as soon as s6 becomes useless tail = [tail[0], 'movq rp, s6'] + tail[1:] + P.cutoff_comments(g_tail) code += mul_code(5, tail, p) if not g_writing_macro: P.save_in_xmm(code, xmm_save) P.insert_restore(code, xmm_save) cook_asm(o, code, xmm_save, g_var_map)