def do_it(o): #meat = P.cutoff_comments(g_mul0) #meat = P.cutoff_comments(g_mul0_only) #meat = P.cutoff_comments(g_mul0_only_immediate_writes) #meat = P.cutoff_comments(g_mul0_only_adcx) #meat = P.cutoff_comments(g_mul0_only_adox_imm_writes) if 0: p = list(range(12)) meat += mul1_code(3, P.cutoff_comments(g_muladd_3), p) m4 = P.cutoff_comments(g_muladd_4) meat += mul1_code(4, m4, p) m5 = P.swap_adox_adcx(m4) q = [int(x, 16) for x in g_perm.split(' ')] p = P.composition(p, q) meat += mul1_code(5, m5, p) p = P.composition(p, q) meat += mul1_code(6, m4, p) p = P.composition(p, q) meat += mul1_code(7, m5, p) else: # benchmark only mul0 and tail #meat += P.cutoff_comments(g_tail_after_mul0_delayed_writes) #meat += P.cutoff_comments(g_tail_after_mul0_immediate_writes) pass code = P.cutoff_comments(g_mul01) + P.cutoff_comments(g_mul01_tail) code = '\n'.join(code) code = P.replace_symbolic_names_wr(code, g_var_map) code = code.split('\n') code = [replace_ymm_by_xmm(x) for x in code] cook_asm(o, code, 'mul8x2_zen')
def alignment_code(shift): p = list(range(12)) r = muladd_code(0, P.cutoff_comments(g_mul_01), p, shift) r += muladd_code(2, P.cutoff_comments(g_mul_2), p, shift) r += muladd_code(3, P.cutoff_comments(g_mul_3), p, shift) r += muladd_code(4, P.cutoff_comments(g_mul_4), p, shift) return r
def alignment_code(shift): p = list(range(12)) r = mul_code(0, P.cutoff_comments(g_mul01), p, shift) q = [int(x, 16) for x in g_perm.split(' ')] m = P.cutoff_comments(g_mul2) for i in range(2, 6): r += mul_code(i, m, p, shift) p = P.composition(p, q) return r
def do_it(o): meat = P.cutoff_comments(g_mul0) muladd = P.cutoff_comments(g_muladd) tail = P.cutoff_comments(g_tail) for i in range(1, 6): meat += mul1_code(i, muladd) meat += tail P.cook_asm(o, meat, g_var_map, True)
def do_it(o): code = P.cutoff_comments(g_preamble) mul_01 = P.cutoff_comments(g_mul01) code += alignment_code(8) code += P.g_std_end + ['retq', 'align0:'] code += P.cutoff_comments(g_load0) code += alignment_code(0) P.cook_asm(o, code, E.g_var_map, True)
def do_it(name, o): meat = P.cutoff_comments(g_mul_01) p = list(range(12)) meat += mul1_code(2, P.cutoff_comments(g_muladd_2), p) q = [int(x, 16) for x in g_perm.split(' ')] for i in range(3, 8): p = P.composition(p, q) meat += mul1_code(i, P.cutoff_comments(g_muladd_2), p) cook_asm(name, o, meat)
def do_it(o, mul_01, muladd, tail, perm, var_map): p = list(range(12)) code = mul_add_code(0, P.cutoff_comments(mul_01), p) mm = P.cutoff_comments(muladd) code += mul_add_code(2, mm, p) q = [int(x, 16) for x in perm.split(' ')] for i in range(3, 6): p = P.composition(p, q) code += mul_add_code(i, mm, p) j = [j for j in range(len(mm)) if mm[j].find('mulx 48(') != -1][0] mm = mm[:j] + P.cutoff_comments(tail) p = P.composition(p, q) code += mul_add_code(6, mm, p) P.cook_asm(o, code, var_map, True)
def form_m9(src, amd): if amd: rez = P.cutoff_comments(g_mul_9_bwl_piece0) + src p = 'adc3(rr[i+4], sC, s8)' i = [i for i in range(len(rez)) if rez[i].find(p) != -1][0] + 1 rez = rez[:i] + P.cutoff_comments(g_mul_9_zen_piece2) else: rez = P.cutoff_comments(g_mul_9_bwl_piece0) + src i = [ i for i in range(len(rez)) if rez[i].find('movq s3, rr[i+4]') != -1 ] i = i[0] + 1 rez = rez[:i] + P.cutoff_comments(g_mul_9_bwl_piece2) rez = inject(rez, 'mulx sp[3]', g_mul_9_bwl_piece1) return rez
def mul_code(i, jj_arg, p, align): if i: rr = ['# mul_add %s' % i] else: rr = [] if i == 5: jj = jj_arg[:-1] + P.cutoff_comments(g_tail) else: jj = jj_arg for j in jj: m = g_patt.match(j) if m: u, v = m.group(1), m.group(2) if v == 'i+1': k = extract_v(i + 1, u, align) else: k = extract_v(int(v), u, align) if k: rr.append(k) continue rr.append(j) for y in range(len(rr)): src = E.apply_s_permutation(rr[y], p) for x in range(1, 9): ' replace i+x with 8*(i+x) ' src = src.replace('i+%s(' % x, '%s(' % (8 * (i + x))) ' replace i with 8*i ' src = src.replace('i(', '%s(' % (8 * i)) rr[y] = src.rstrip() return rr
def chew_code(src, amd, i, aligned, p): if not isinstance(src, list): src = P.cutoff_comments(src) if i: rr = ['# mul_add %s' % i] if p: e = '# ' for x in range(len(p)): e += 's%X->W%X ' % (x, p[x]) rr.append(e) else: rr = [] for j in src: k = evaluate_row(j, i, amd, aligned) if k and (k != [None]) and (k != ['']): rr += k if p: re = [] for x in rr: if x[0] == '#': re.append(x) else: re.append(E.apply_s_permutation(x, p)) return re return rr
def cook_tail(cc): rr = [] for c in cc: if c.find(', t1') != -1: break rr.append(c) return rr + P.cutoff_comments(g_tail)
def chew_code(src, i, aligned, p): if not isinstance(src, list): src = P.cutoff_comments(src) if i: rr = ['# mul_add %s' % i] else: rr = [] for j in src: for k in evaluate_row(j, i, aligned): if k: rr.append(k) if k == 'jmp tail': break if not p: return rr re = [] for x in rr: if x[0] == '#': re.append(x) else: re.append(E.apply_s_permutation(x, p)) return re
def alignment_code(alignment): if alignment: code = [] else: code = chew_code(g_load_0, None, 0, None) code += chew_code(g_mul_01, 0, alignment, None) code += chew_code(g_mul_2, 2, alignment, None) p = list(range(0xC + 1)) q = [int(x, 16) for x in g_perm.split(' ')] m3 = P.cutoff_comments(g_mul_3) for i in range(3, 11): if alignment and (i == 10): break fresh = chew_code(m3, i, alignment, p) if i == 9: fresh = F.remove_after_jmp(fresh) code += fresh if i == 10: break p = P.composition(p, q) if not alignment: code.append('# tail') code += chew_code(g_tail, 10, alignment, p)[1:] return code
def do_it(o, code, var_map): code = '\n'.join(P.cutoff_comments(code)) code = P.replace_symbolic_names_wr(code, var_map) comment = P.g_autogenerated_patt % os.path.basename(sys.argv[0]) o.write(comment.replace('//', '#')) P.write_asm_procedure_header(o, P.guess_subroutine_name(sys.argv[1])) P.write_asm_inside(o, code + '\nretq')
def mul1_code(i, jj, p): rr = ['# mul_add %s' % i] for j in jj: if j == 'extract v[i+1]': rr.append(extract_code(i + 1)) continue if j == 'shift v47': # only needed once for i = 4 if i == 4: rr.append('vperm2i128 $0x81,v47,v47,v47') continue rr.append(j) # for i=7, append tail code if i == 7: rr += P.cutoff_comments(g_tail) # apply permutation p, replace i(rp) for y in range(len(rr)): src = rr[y] for x in range(12): a = '%X' % x b = '%X' % p[x] src = re.sub(r'\bs%s\b' % a, 'w' + b, src) src += ' ' for x in range(1, 9): ' replace i+x with 8*(i+x) ' src = src.replace('i+%s(' % x, '%s(' % (8 * (i + x))) ' replace i with 8*i ' src = src.replace('i(', '%s(' % (8 * i)) + ' ' rr[y] = src.rstrip() return rr
def mul1_code(i, jj, p): rr = ['# mul_add %s' % i] for j in jj: if j == 'extract v[i+1]': rr.append(extract_code(i + 1)) continue j = replace_rz(j) if (i == 7) and (j.find('dd') != -1): # no need to update dd continue rr.append(j) # for i=7, append tail code if i == 7: rr += P.cutoff_comments(g_tail) # apply permutation p, replace i(rp) for y in range(len(rr)): src = rr[y] for x in range(12): a = '%X' % x b = '%X' % p[x] src = re.sub(r'\bs%s\b' % a, 'w' + b, src) src += ' ' for x in range(1, 9): ' replace i+x with 8*(i+x) ' src = src.replace('i+%s(' % x, '%s(' % (8 * (i + x))) ' replace i with 8*i ' src = src.replace('i(', '%s(' % (8 * i)) + ' ' rr[y] = src.rstrip() return rr
def mul1_code(i, jj, p): rr = ['# mul_add %s' % i] for j in jj: if j.find(':=v[i+1]') != -1: j = extract_v(i + 1, j[:2]) elif j == 'shift v47': j = shift_v(i + 1) if not j: continue rr.append(j) if i == 7: rr += P.cutoff_comments(g_tail) # apply permutation p, replace i(rp) for y in range(len(rr)): src = rr[y] for x in range(12): a = '%X' % x b = '%X' % p[x] src = re.sub(r'\bs%s\b' % a, 'w' + b, src) src += ' ' for x in range(1, 9): ' replace i+x with 8*(i+x) ' src = src.replace('i+%s(' % x, '%s(' % (8 * (i + x))) ' replace i with 8*i ' src = src.replace('i(', '%s(' % (8 * i)) + ' ' rr[y] = src.rstrip() return rr
def do_it(o): mul_01 = P.cutoff_comments(E.g_mul_01) mul_01 = P.starting_from(mul_01, 'mulx') mul_01 = P.replace_in_string_array(mul_01, 'pextrq $0x1, t0, w8', 'w8:=v[2]') code = P.cutoff_comments(g_preamble) code += replace_extract_v(mul_01, 8) code += alignment_code(8) # TODO: is it possible to shorten binary code by jmp to common part? code += P.g_std_end + ['retq', 'align0:'] code += P.cutoff_comments(g_load0) code += replace_extract_v(mul_01, 0) code += alignment_code(0) P.cook_asm(o, code, E.g_var_map, True)
def do_it(o): meat = mul0_code(P.cutoff_comments(g_mul_012)) p = list(range(12)) meat += mul1_code(3, P.cutoff_comments(g_muladd_3), p) m4 = P.cutoff_comments(g_muladd_4) meat += mul1_code(4, m4, p) m5 = P.swap_adox_adcx(m4) q = [int(x, 16) for x in g_perm.split(' ')] p = P.composition(p, q) meat += mul1_code(5, m5, p) p = P.composition(p, q) meat += mul1_code(6, m4, p) p = P.composition(p, q) meat += mul1_code(7, m5, p) cook_asm(o, meat)
def do_it(o): meat = P.cutoff_comments(g_mul_01) p = list(range(12)) m2 = P.cutoff_comments(g_muladd_2) m3 = P.swap_adox_adcx(m2) meat += mul1_code(2, m2, p) q = [int(x, 16) for x in g_perm.split(' ')] for i in range(3, 6): p = P.composition(p, q) if i & 1: meat += mul1_code(i, m3, p) else: meat += mul1_code(i, m2, p) tail = cook_tail(m2) p = P.composition(p, q) meat += mul1_code(6, tail, p) P.cook_asm(o, meat, g_var_map, True)
def do_4arg(o): comment = P.g_autogenerated_patt % os.path.basename(sys.argv[0]) o.write(comment.replace('//', '#')) P.write_asm_procedure_header(o, g_subroutine) code = P.cutoff_comments(g_code) code = '\n'.join(code) symb = dict() for v_k in g_vars_map.split(' '): v, k = v_k.split(',') symb[k] = '%' + v code = P.replace_symbolic_vars_name(code, symb) P.write_asm_inside(o, code)
def v_alignment_code(amd, v_alignment): aligned = not v_alignment if aligned: code = chew_code(g_v_load_0, amd, 0, aligned, None) else: code = [] code += chew_code(g_mul_0, amd, 0, aligned, None) if amd: m_1 = P.cutoff_comments(g_mul_1_zen) m_2 = P.cutoff_comments(g_mul_2_zen) m_A = P.cutoff_comments(g_mul_10_zen) q = [int(i, 16) for i in g_perm_zen.split(' ')] else: m_1 = P.cutoff_comments(g_mul_1_bwl) m_2 = P.cutoff_comments(g_mul_2_bwl) m_A = P.cutoff_comments(g_mul_10_bwl) q = [int(i, 16) for i in g_perm_bwl.split(' ')] m_9 = form_m9(m_2, amd) p = list(range(0xD + 1)) code += chew_code(m_1, amd, 1, aligned, p) code += chew_code(m_2, amd, 2, aligned, p) for i in range(3, 9): p = P.composition(p, q) code += chew_code(m_2, amd, i, aligned, p) p = P.composition(p, q) code += chew_code(m_9, amd, 9, aligned, p) if aligned: code += chew_code(m_A, amd, 10, False, None) code += chew_code(m_A, amd, 10, True, None) return code
def do_it(o): meat = mul0_code(P.cutoff_comments(g_mul_01)) p = list(range(12)) meat += mul1_code(2, P.cutoff_comments(g_muladd_2), p) meat += mul1_code(3, P.cutoff_comments(g_muladd_3), p) meat += mul1_code(4, P.cutoff_comments(g_muladd_4), p) meat += mul1_code(5, P.cutoff_comments(g_muladd_5), p) meat += mul1_code(6, P.cutoff_comments(g_muladd_6), p) meat += mul1_code(7, P.cutoff_comments(g_muladd_7), p) cook_asm(o, meat)
def do_it(out): meat = P.cutoff_comments(g_mul1) muladd = P.cutoff_comments(g_muladd) tail = P.cutoff_comments(g_tail) xmm_save = P.save_registers(meat) permutation = list(range(11)) s = [int(y) for y in g_permutation.split(' ')] for i in range(1, 7): meat += mul1_code(i, muladd, permutation).split('\n') # yy := composition of permutation and s: yy(i) == s(permutation(i)) yy = [s[j] for j in permutation] permutation = yy tail = mul1_code(7, tail, permutation) for k,v in xmm_save.items(): if v is None: tail = tail.replace('!restore ' + k, 'pop %s | restore' % k) else: tail = tail.replace('!restore ' + k, 'movq %s, %s | restore' % (v, k)) tail = tail.replace('!restore', '|restore') meat += tail.split('\n') cook_asm(out, '\n'.join(meat), xmm_save)
def do_it(o): preamble = P.cutoff_comments(g_preamble) xmm_save = {} if not g_writing_macro: xmm_save = P.save_registers_in_xmm(preamble, 11) p = list(range(14)) code = mul_code(0, preamble, p) m1 = P.cutoff_comments(g_mul_1) m2 = P.cutoff_comments(g_mul_2) code += mul_code(1, m1, p) q = [int(x, 16) for x in g_perm.split(' ')] for i in range(2, 5): code += mul_code(i, m2, p) p = P.composition(p, q) tail = m2[:-1] # extract rp as soon as s6 becomes useless tail = [tail[0], 'movq rp, s6'] + tail[1:] + P.cutoff_comments(g_tail) code += mul_code(5, tail, p) if not g_writing_macro: P.save_in_xmm(code, xmm_save) P.insert_restore(code, xmm_save) cook_asm(o, code, xmm_save, g_var_map)
def alignment_code(alignment, extra): if alignment: code = chew_code(g_preamble, 0, extra, None, None) else: code = chew_code(g_load_0, None, extra, True, None) code += chew_code(g_mul_01, 0, extra, not alignment, None) m2 = P.cutoff_comments(g_mul_2) m7 = form_tail(m2, extra) p = list(range(0xB + 1)) q = [int(i, 16) for i in g_perm.split(' ')] for i in range(2, 7): code += chew_code(m2, i, extra, not alignment, p) p = P.composition(p, q) if not alignment: code += chew_code(m7, 7, extra, None, p) return code
def alignment_code(alignment): if alignment: code = chew_code(g_preamble, 0, None, None) else: code = chew_code(g_load_0, None, True, None) code += chew_code(g_mul_01, 0, alignment, None) m2 = P.cutoff_comments(g_mul_2) tt = form_tail(m2) p = list(range(0xC + 1)) q = [int(i, 16) for i in g_perm.split(' ')] for i in range(2, 10): code += chew_code(m2, i, alignment, p) p = P.composition(p, q) if not alignment: code += chew_code(tt, 10, None, p) return code
def alignment_code(shift): p = list(range(12)) m2 = P.cutoff_comments(E.g_muladd_2) m3 = P.swap_adox_adcx(m2) code = mul1_code(2, m2, p, shift) q = [int(x, 16) for x in E.g_perm.split(' ')] for i in range(3, 6): p = P.composition(p, q) if i & 1: code += mul1_code(i, m3, p, shift) else: code += mul1_code(i, m2, p, shift) tail = E.cook_tail(m2) p = P.composition(p, q) code += mul1_code(6, tail, p, shift) return code
def do_it(tgt, code, b_ofs): data = { 'macro_name': P.guess_subroutine_name(sys.argv[1]), 'scratch': ['w%s s%s' % (i, i) for i in range(4)], 'vars_type': dict([('s%s' % i, 0) for i in range(4)]), 'default_type': 'mp_limb_t', 'input': ['rp r r_p', 'ap r a_p'], 'clobber': 'memory cc', 'source': os.path.basename(sys.argv[0]), 'code_language': 'asm', 'macro_parameters': 'r_p a_p', } all_vars = P.extract_int_vars_name(data['scratch']) + ['ap', 'rp'] code = '\n'.join([chew_line(x, b_ofs) for x in P.cutoff_comments(code)]) for v in all_vars: code = re.sub(r'\b%s\b' % v, '%%[%s]' % v, code) P.write_cpp_code(tgt, code, data)
def do_it(o, i_name): with open(i_name, 'rb') as i: code = '\n'.join(chew_code(i)) comment = P.g_autogenerated_patt % os.path.basename(sys.argv[0]) o.write(comment.replace('//', '#')) P.write_asm_procedure_header(o, g_hooligan) P.write_asm_inside(o, code) code = P.cutoff_comments(g_code) xmm_save = P.save_registers_in_xmm(code, 5) P.save_in_xmm(code, xmm_save) code = '\n'.join(code).replace('@call', 'call ' + g_hooligan) for k, v in xmm_save.items(): code = code.replace('!restore ' + k, 'movq %s, %s' % (v, k)) code = P.replace_symbolic_names_wr(code, g_var_map) o.write('\n') P.write_asm_procedure_header(o, P.guess_subroutine_name(sys.argv[2])) P.write_asm_inside(o, code + '\nretq')