Beispiel #1
0
def conv_3x64_7x32_acc_i (i) :
    global V
    print "	// accumulate r2-r4 to", [read_V(acc_r(i,j)) for j in range(3)]
    print_ldr("r10", acc_r(i,0), "limb 0")
    print_ldr("r12", acc_r(i,1), "limb 1")
    print_ldr("r14", acc_r(i,2), "limb 2")
    print "	add	r2, r2, r10"
    print "	add	r3, r3, r12"
    print "	add	r4, r4, r14"
    print "	asr	r8, r11, #6"
Beispiel #2
0
def conv_3x64_7x32_store_end (i) : # store 4 accumulators at end of thread 
    global V
    print "	// store r6-r8 to", [read_V(acc_r(i,j)) for j in range(4,7)]
    for j in range(4,7) :
        print_str("r"+str(j+2), acc_r(i,j), "limb %d" % (j))
    print "	// compress and store r2-r5"
    print_ldr("r6","hh","reload cursor")
    print_ldr("r7","q","load q")
    print_ldr("r8","q32","load round(-2^32/q)")
    print "	br_32x2	r2, r3, r7, r8, r9"
    print "	br_32x2	r4, r5, r7, r8, r9"
    print "	str	r2, [r6], #4"
    print "	str	r4, [r6], #4"
    print_str("r6","hh","store cursor")
Beispiel #3
0
def conv_3x64_7x32_acc (i) :
    global V
    print "	// accumulate to", [read_V(acc_r(i,j)) for j in range(7)]
    print_ldr("r10", acc_r(i,0), "limb 0")
    print_ldr("r12", acc_r(i,1), "limb 1")
    print_ldr("r14", acc_r(i,2), "limb 2")
    print_ldr("r9", acc_r(i,3), "limb 3")
    print "	add	r2, r2, r10"
    print "	add	r3, r3, r12"
    print "	add	r4, r4, r14"
    print "	add	r5, r5, r9"
    print_ldr("r10", acc_r(i,4), "limb 4")
    print_ldr("r12", acc_r(i,5), "limb 5")
    print_ldr("r8", acc_r(i,6), "limb 6")
    print "	add	r6, r6, r10"
    print "	add	r7, r7, r12"
    print "	add	r8, r8, r11, asr #6"
Beispiel #4
0
def v (s) :
    return read_V(s)
Beispiel #5
0
def conv_3x64_7x32_store (i) :
    global V
    print "	// store r2-r8 to", [read_V(acc_r(i,j)) for j in range(7)]
    for j in range(7) :
        print_str("r"+str(j+2), acc_r(i,j), "limb %d" % (j))