def kernel(trainData, testData, itemMem, idMem, rdv1, rdv2): def train_encoding(m, preTrainData): train_temp = hcl.compute((trainData.shape[1], dim), lambda x, y: itemMem[trainData[m][x]][y] ^ idMem[x][y], name = "train_temp") k1 = hcl.reduce_axis(0, trainData.shape[1], 'k1') train_result = hcl.compute((dim,), lambda x: hcl.sum(train_temp[k1, x], axis = k1, dtype=hcl.Int()), name = "train_result") with hcl.for_(0, dim) as n: preTrainData[m][n] = train_result[n] with hcl.if_((m + 1) % 1000 == 0): hcl.print((m+1), "Finish encoding %d training data\n") def test_encoding(m, preTestData): test_temp = hcl.compute((testData.shape[1], dim), lambda x, y: itemMem[testData[m][x]][y]^idMem[x][y], name = "test_temp") k2 = hcl.reduce_axis(0, testData.shape[1], 'k2') test_result = hcl.compute((dim,), lambda x: hcl.sum(test_temp[k2, x], axis = k2, dtype=hcl.Int()), name = "test_result") with hcl.for_(0, dim) as n: preTestData[m][n] = test_result[n] with hcl.if_((m+1)%100 == 0): hcl.print((m+1), "Finish encoding %d testing data\n") #Encoding hcl.print((), "Encoding the training data into HDVs.\n") preTrainData = hcl.compute((trainData.shape[0], dim), lambda x, y: 0, "preTrainData") hcl.mutate((trainData.shape[0], ), lambda x: train_encoding(x, preTrainData)) hdTrainData = hcl.compute((trainData.shape[0], dim), lambda x, y: 0, "hdTrainData", dtype=hcl.UInt(1)) with hcl.Stage("S1"): with hcl.if_(trainData.shape[1] % 2 == 0): hcl.print((), "Use the random vector\n") hcl.update(hdTrainData, lambda x, y: hcl.select(preTrainData[x][y] + rdv1[x][y] - trainData.shape[1]/2 > 0, 1, 0)) with hcl.else_(): hcl.update(hdTrainData, lambda x, y: hcl.select(preTrainData[x][y] - trainData.shape[1]/2 > 0, 1, 0)) hcl.print((),"Encoding the testing data into HDVs.\n") preTestData = hcl.compute((testData.shape[0], dim), lambda x, y: 0, "preTestData") hcl.mutate((testData.shape[0], ), lambda x: test_encoding(x, preTestData)) hdTestData = hcl.compute((testData.shape[0], dim), lambda x, y: 0, "hdTestData", dtype=hcl.UInt(1)) with hcl.Stage("S2"): with hcl.if_(testData.shape[1] % 2 == 0): hcl.print((), "Use the random vector\n") hcl.update(hdTestData, lambda x, y: hcl.select(preTestData[x][y] + rdv2[x][y] - testData.shape[1]/2 > 0, 1, 0)) with hcl.else_(): hcl.update(hdTestData, lambda x, y: hcl.select(preTestData[x][y] - testData.shape[1]/2 > 0, 1, 0)) ###data_packing pack_train = hcl.pack(hdTrainData, axis=1, dtype=hcl.UInt(bw), name="pack_train") pack_test = hcl.pack(hdTestData, axis=1, dtype=hcl.UInt(bw), name="pack_test") return pack_train, pack_test
def pack(A): pack = hcl.pack(A, axis=1, factor=32, dtype=hcl.UInt(32), bitorder="big") return pack
def SgdLR(data, label, theta, lut): label_local = hcl.unpack(label, name="label_local") theta_local = hcl.unpack(theta, name="theta_local") data_local = hcl.unpack(data, name="data_local") FTYPE = theta_local.dtype def Sigmoid(exponent): ret = hcl.scalar(0.0, "sigmoid", FTYPE) with hcl.if_(exponent > hcl.cast(FTYPE, 4.0)): ret[0] = 1.0 with hcl.elif_(exponent < hcl.cast(FTYPE, -4.0)): ret[0] = 0.0 with hcl.else_(): with hcl.if_(exponent < hcl.cast(FTYPE, 0.0)): num = hcl.scalar(0, dtype=hcl.UFixed(18, 8)) num[0][18:0] = exponent[29:11] num[0] = ~(num[0] << 8) + 1 index = 2047.0 - num[0] ret[0] = lut[hcl.cast(hcl.Int(32), index)] with hcl.else_(): index = exponent[21:11] ret[0] = lut[hcl.cast(hcl.Int(32), index)] return ret[0] with hcl.stage("M"): with hcl.for_(0, NUM_TRAINING) as train_id: training_instance = hcl.compute( (NUM_FEATURES, ), lambda x: data_local[train_id * NUM_FEATURES + x], "training_instance", data_local.dtype) # Main Computation k = hcl.reduce_axis(0, NUM_FEATURES, "k") dot = hcl.compute( (1, ), lambda x: hcl.sum(theta_local[k] * training_instance[k], axis=k, dtype=FTYPE), "dot", dtype=FTYPE) gradient = hcl.compute((NUM_FEATURES, ), lambda x: (Sigmoid(dot[0]) - label_local[ train_id]) * training_instance[x], "gradient", dtype=FTYPE) update = hcl.update( theta_local, lambda x: theta_local[x] - 2565.0 * gradient[x], name="update") theta_pack = hcl.pack(theta_local, name="theta_pack", dtype=theta.dtype) stream_out = hcl.update(theta, lambda x: theta_pack[x], name="stream_out") return stream_out
def build_packed_bnn(input_image, w_conv1, bn_t1, w_conv2, bn_t2, w_fc1, b_fc1, w_fc2, b_fc2): # 1*16*16 if PACK_CONV: conv1 = bnn.packed_conv2d_nchw(input_image, w_conv1, padding=[1, 1], name="conv1", out_dtype=qtype_int) # 16*16*16 bn1 = bnn.packed_batch_norm_threshold(conv1, bn_t1, name="bn1") # bn1 = bnn.packed_conv2d_nchw(input_image, w_conv1, threshold=bn_t1, padding=[1,1], name="conv1", out_dtype=qtype_int) # 16*16*16 else: conv1 = bnn.conv2d_nchw(input_image, w_conv1, padding=[1, 1], name="conv1", out_dtype=qtype_int) # 16*16*16 bn1 = bnn.batch_norm_threshold(conv1, bn_t1, name="bn1") maxpool1 = bnn.packed_max_pool2d_nchw(bn1, [2, 2], [2, 2], name="maxpool1", unpack=not PACK_CONV) # 16*8*8 # maxpool1 = bnn.packed_max_pool2d_LB(bn1, [2,2], [2,2], name="maxpool1") # 16*8*8 if PACK_CONV: conv2 = bnn.packed_conv2d_nchw(maxpool1, w_conv2, padding=[1, 1], name="conv2", out_dtype=qtype_int) # 32*8*8 bn2 = bnn.packed_batch_norm_threshold(conv2, bn_t2, name="bn2") # bn2 = bnn.packed_conv2d_nchw(maxpool1, w_conv2, threshold=bn_t2, padding=[1,1], name="conv2", out_dtype=qtype_int) # 32*8*8 else: conv2 = bnn.conv2d_nchw(maxpool1, w_conv2, padding=[1, 1], name="conv2", out_dtype=qtype_int) # 32*8*8 bn2 = bnn.batch_norm_threshold(conv2, bn_t2, name="bn2") maxpool2 = bnn.packed_max_pool2d_nchw(bn2, [2, 2], [2, 2], name="maxpool2", unpack=not PACK_CONV) # 32*4*4=512 # maxpool2 = bnn.packed_max_pool2d_LB(bn2, [2,2], [2,2], name="maxpool2") # 32*4*4=512 if PACK_CONV: pack = bnn.packed_flatten(maxpool2, name="packed_flatten") else: flat = bnn.flatten(maxpool2, name="flatten") pack = hcl.pack(flat, axis=1, factor=32, dtype=qtype_packed, name="pack") # 512/32=16 fc1 = bnn.packed_dense(pack, w_fc1, b_fc1, True, name="fc1") # 512/32->256/32 fc2 = bnn.packed_dense(fc1, w_fc2, b_fc2, False, name="fc2") # 256/32->10 return fc2
def test_hdc_accu(proto, pack_data, labels, type): #pack the prototype pack_proto = hcl.pack(proto, axis=1, dtype=hcl.UInt(bw), name="pack_proto") ###data preparation distance1 = hcl.compute((pack_data.shape[1], ), lambda x: 0, 'distance1', dtype=hcl.UInt(bw)) pre_hamming = hcl.compute((pack_data.shape[1], ), lambda x: 0, "pre_hamming") hamming_dist1 = hcl.compute((numClasses, ), lambda x: 0, "hamming_dist1") m1 = hcl.reduce_axis(0, pack_data.shape[1], "m1") correct1 = hcl.scalar(0, 'correct1') ### with hcl.for_(0, pack_data.shape[0]) as i: hcl.print((i), "%d suc\n") with hcl.for_(0, numClasses) as n: #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data) hcl.update(distance1, lambda x: pack_data[i][x] ^ pack_proto[n][x]) #Calculate the hamming distance of the two vectors by adding 1s hcl.update(pre_hamming, lambda x: popcount(distance1[x])) hcl.print((), "sum of 1s suc") ###########################seg fault hamming_dist1[n] = hcl.sum(pre_hamming[m1], axis=m1) #Find the one having the least hamming distance and choose it's label as the predicted label pred1 = hcl.scalar(0, 'pred1') with hcl.for_(0, hamming_dist1.shape[0]) as j: with hcl.if_(hamming_dist1[j] < hamming_dist1[pred1]): pred1.v = j with hcl.if_(pred1.v == labels[i]): correct1.v += 1 #Print the accuracy all1 = hcl.scalar(pack_data.shape[0], "all1", dtype=hcl.Float(32)) accuracy1 = hcl.compute((1, ), lambda x: correct1.v / all1.v * 100, "accuracy1", dtype=hcl.Float(32)) with hcl.if_(type == 1): hcl.print((correct1, pack_data.shape[0], accuracy1[0]), "Training accu: %d/%d (%.2f%%)\n") with hcl.else_(): hcl.print((correct1, pack_data.shape[0], accuracy1[0]), "Testing accu: %d/%d (%.2f%%)\n")
def pack(A): return hcl.pack(A, factor=4)
def pack_unpack(A): C = hcl.pack(A, factor=4) return hcl.unpack(C, factor=4)
def pack(A): return hcl.pack(A, axis=1, factor=4)
def pack(A): return hcl.pack(A, dtype=hcl.UInt(A.type.bits * 4))
def kernel(): A = hcl.compute((128,), lambda x: x, dtype="uint1") B = hcl.pack(A, dtype=hcl.UInt(32)) return B
def pack(A): pack = hcl.pack(A, axis=1, factor=32, dtype=hcl.UInt(32)) return pack
def pack(A): return hcl.pack(A, axis=1, dtype=hcl.UInt(4))
def pack(A): pack = hcl.pack(A, axis=1, factor=8, dtype=hcl.UInt(8)) return pack
def pack(A): return hcl.pack(A, axis=1, factor=16, dtype=hcl.UInt(8), bitorder="big")
def kernel(in_train, trainLabels, in_test, testLabels, rdv3, epoch): def popcount(value): #Calculate the number of one in a binary number count = hcl.scalar(0, "count") numb = hcl.scalar(value, "numb", dtype=hcl.UInt(in_bw)) with hcl.while_(numb.v > 0): count.v += 1 numb.v &= numb.v - 1 return count.v def learn(k, in_train, prototype, prototypeCounter): #Find samples that have the label k match = hcl.compute(in_train.shape, lambda x,y: hcl.select(trainLabels[x] == k, in_train[x][y], 0), "match", dtype=hcl.UInt(in_bw)) #Record the number of these samples with hcl.for_(0, in_train.shape[0]) as a: with hcl.if_(trainLabels[a] == k): max[k] += 1 #Do hdc sum on these samples' hdv r = hcl.reduce_axis(0, in_train.shape[0], 'r') with hcl.for_(0, in_bw) as bit: bit_sum = hcl.compute((in_train.shape[1],), lambda y: hcl.sum(match[r][y][bit], axis=r), "result") #Do the binary voting sum1 = hcl.compute((in_train.shape[1],), lambda x: 0, "sum1", dtype=hcl.UInt(1)) with hcl.if_(max[k] % 2 == 0): hcl.update(sum1, lambda x: hcl.select(bit_sum[x] + pack_rdv3[k][x][bit] - max[k]/2 > 0, 1, 0)) with hcl.else_(): hcl.update(sum1, lambda x: hcl.select(bit_sum[x] - max[k]/2 > 0, 1, 0)) #Push the binary sum to prototype and the original sum to prototypeCounter with hcl.for_(0, in_train.shape[1]) as t: prototype[k][t][bit] = sum1[t] prototypeCounter[k][t*in_bw+bit] = bit_sum[t] def test_hdc_accu(proto, pack_data, labels, type): #pack the prototype # pack_proto = hcl.pack(proto, axis=1, dtype=hcl.UInt(in_bw), name="pack_proto") ###data preparation distance1 = hcl.compute((pack_data.shape[1],), lambda x: 0, 'distance1', dtype=hcl.UInt(in_bw)) pre_hamming = hcl.compute((pack_data.shape[1],), lambda x: 0, "pre_hamming") hamming_dist1 = hcl.compute((numClasses,), lambda x: 0, "hamming_dist1") m1 = hcl.reduce_axis(0, pack_data.shape[1], "m1") correct1 = hcl.scalar(0, 'correct1') ### with hcl.for_(0, pack_data.shape[0]) as i: hcl.print((i),"%d suc\n") with hcl.for_(0, numClasses) as n: #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data) hcl.update(distance1, lambda x: pack_data[i][x] ^ proto[n][x]) #Calculate the hamming distance of the two vectors by adding 1s hcl.update(pre_hamming, lambda x: popcount(distance1[x])) hcl.print((),"sum of 1s suc") ###########################seg fault hamming_dist1[n] = hcl.sum(pre_hamming[m1], axis=m1) #Find the one having the least hamming distance and choose its label as the predicted label pred1 = hcl.scalar(0, 'pred1') with hcl.for_(0, hamming_dist1.shape[0]) as j: with hcl.if_(hamming_dist1[j] < hamming_dist1[pred1]): pred1.v = j with hcl.if_(pred1.v == labels[i]): correct1.v += 1 #Print the accuracy all1 = hcl.scalar(pack_data.shape[0], "all1", dtype=hcl.Float(32)) accuracy1 = hcl.compute((1,), lambda x: correct1.v/all1.v*100, "accuracy1" , dtype=hcl.Float(32)) with hcl.if_(type == 1): hcl.print((correct1, pack_data.shape[0], accuracy1[0]), "Training accu: %d/%d (%.2f%%)\n") with hcl.else_(): hcl.print((correct1, pack_data.shape[0], accuracy1[0]), "Testing accu: %d/%d (%.2f%%)\n") def update(l, prototype, prototypeCounter, max): hcl.print((l+1),"%d:Use hard examples to update the prototype counters.\n") ###data preparation distance = hcl.compute((in_train.shape[1],), lambda x: 0, 'distance', dtype=hcl.UInt(in_bw)) pre_dist = hcl.compute((in_train.shape[1],), lambda x: 0, "pre_dist") hamming_dist = hcl.compute((numClasses,), lambda x: 0, "hamming_dist") m = hcl.reduce_axis(0, in_train.shape[1], "m") ### with hcl.for_(0, in_train.shape[0]) as i: hcl.print((i),"%d suc\n") # pack_proto = hcl.pack(prototype, axis=1, dtype=hcl.UInt(in_bw), name="pack_proto") with hcl.for_(0, numClasses) as n: #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data) hcl.update(distance, lambda x: in_train[i][x] ^ prototype[n][x]) #Calculate the hamming distance of the two vectors by adding 1s hcl.update(pre_dist, lambda x: popcount(distance[x])) hcl.print((),"sum of 1s suc") hamming_dist[n] = hcl.sum(pre_dist[m], axis=m) #Find the one having the least hamming distance and choose it's label as the predicted label pred = hcl.scalar(0, 'pred') with hcl.for_(0, hamming_dist.shape[0]) as j: with hcl.if_(hamming_dist[j] < hamming_dist[pred]): pred.v = j #Adjust the proto vectors by adding the sample vector on its label proto hdv and substrct it on its predicted proto hdv with hcl.if_(pred.v != trainLabels[i]): max[trainLabels[i]] += 1 max[pred] -= 1 with hcl.for_(0, in_train.shape[1]) as m: with hcl.for_(0, in_bw) as bit: # with hcl.if_(in_train[i][m][bit] == 1): # ########### # prototypeCounter[trainLabels[i]][m*in_bw+bit] += 1 # prototypeCounter[pred][m*in_bw+bit] -= 1 prototypeCounter[trainLabels[i]][m*in_bw+bit] += in_train[i][m][bit] prototypeCounter[pred][m*in_bw+bit] -= in_train[i][m][bit] with hcl.if_(max[trainLabels[i]] % 2 == 0): with hcl.if_(prototypeCounter[trainLabels[i]][m*in_bw+bit] - max[trainLabels[i]]/2 == 0): prototype[trainLabels[i]][m][bit] &= 1 with hcl.else_(): prototype[trainLabels[i]][m][bit] = hcl.select(prototypeCounter[trainLabels[i]][m*in_bw+bit] - max[trainLabels[i]]/2 > 0, 1, 0) with hcl.if_(max[pred] % 2 == 0): with hcl.if_(prototypeCounter[pred][m*in_bw+bit] - max[pred]/2 == 0): prototype[pred][m][bit] &= 1 with hcl.else_(): prototype[pred][m][bit] = hcl.select(prototypeCounter[pred][m*in_bw+bit] - max[pred]/2 > 0, 1, 0) #print the accuracy hcl.mutate((1,), lambda x: test_hdc_accu(prototype, in_train, trainLabels, 1), 'training_update') hcl.mutate((1,), lambda x: test_hdc_accu(prototype, in_test, testLabels, 2), 'testing_update') pack_rdv3 = hcl.pack(rdv3, axis=1, dtype=hcl.UInt(in_bw), name="pack_rdv3") ###learn hcl.print((),"Learning the prototype HDVs.\n") #prototype is the vector used to represent a label prototype = hcl.compute((numClasses, in_train.shape[1]), lambda x, y: 0, "prototype", dtype=hcl.UInt(in_bw)) prototypeCounter = hcl.compute((numClasses, in_train.shape[1]*in_bw), lambda x, y: 0, "prototypeCounter") #Every dimension is the sum of the targeted data #max is the number records the added vectors, used later for binary voting max = hcl.compute((numClasses, ), lambda x: 0) hcl.mutate((numClasses,), lambda k: learn(k, in_train, prototype, prototypeCounter),"learn") #Test the accuracy after learning hcl.mutate((1,), lambda x: test_hdc_accu(prototype, in_train, trainLabels, 1), "test_train_accu") hcl.mutate((1,), lambda x: test_hdc_accu(prototype, in_test, testLabels, 2), "test_test_accu") ###update hcl.mutate((epoch[0],), lambda x: update(x, prototype, prototypeCounter, max),"update")
def update(l, prototype, prototypeCounter, max): hcl.print((l + 1), "%d:Use hard examples to update the prototype counters.\n") ###data preparation distance = hcl.compute((pack_train.shape[1], ), lambda x: 0, 'distance', dtype=hcl.UInt(bw)) pre_dist = hcl.compute((pack_train.shape[1], ), lambda x: 0, "pre_dist") hamming_dist = hcl.compute((numClasses, ), lambda x: 0, "hamming_dist") m = hcl.reduce_axis(0, pack_train.shape[1], "m") ### with hcl.for_(0, pack_train.shape[0]) as i: hcl.print((i), "%d suc\n") pack_proto = hcl.pack(prototype, axis=1, dtype=hcl.UInt(bw), name="pack_proto") with hcl.for_(0, numClasses) as n: #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data) hcl.update(distance, lambda x: pack_train[i][x] ^ pack_proto[n][x]) #Calculate the hamming distance of the two vectors by adding 1s hcl.update(pre_dist, lambda x: popcount(distance[x])) hcl.print((), "sum of 1s suc") hamming_dist[n] = hcl.sum(pre_dist[m], axis=m) #Find the one having the least hamming distance and choose it's label as the predicted label pred = hcl.scalar(0, 'pred') with hcl.for_(0, hamming_dist.shape[0]) as j: with hcl.if_(hamming_dist[j] < hamming_dist[pred]): pred.v = j #Adjust the proto vectors by adding the sample vector on its label proto hdv and substrct it on its predicted proto hdv with hcl.if_(pred.v != trainLabels[i]): max[trainLabels[i]] += 1 max[pred] -= 1 with hcl.for_(0, dim) as m: with hcl.if_(hdTrainData[i][m] == 1): ########### prototypeCounter[trainLabels[i]][m] += 1 prototypeCounter[pred][m] -= 1 # prototypeCounter[trainLabels[i]][m] += hdTrainData[i][m] # prototypeCounter[pred][m] -= hdTrainData[i][m] with hcl.if_(max[trainLabels[i]] % 2 == 0): with hcl.if_(prototypeCounter[trainLabels[i]][m] - max[trainLabels[i]] / 2 == 0): prototype[trainLabels[i]][m] &= 1 with hcl.else_(): prototype[trainLabels[i]][m] = hcl.select( prototypeCounter[trainLabels[i]][m] - max[trainLabels[i]] / 2 > 0, 1, 0) with hcl.if_(max[pred] % 2 == 0): with hcl.if_(prototypeCounter[pred][m] - max[pred] / 2 == 0): prototype[pred][m] &= 1 with hcl.else_(): prototype[pred][m] = hcl.select( prototypeCounter[pred][m] - max[pred] / 2 > 0, 1, 0) #print the accuracy hcl.mutate( (1, ), lambda x: test_hdc_accu(prototype, pack_train, trainLabels, 1), 'training_update') hcl.mutate( (1, ), lambda x: test_hdc_accu(prototype, pack_test, testLabels, 2), 'testing_update')