Exemple #1
0
 def __str__(self):
     strbuf = 'NSWG: \n'
     strbuf += '{0:>8} {1:>8}\n'.format('MatrixH', 'MatrixW')
     for i in range(len(self.net.layers)):
         if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(
                 self.net.layers[i]):
             strbuf += '{0:>8} {1:>8}\n'.format(str(self.matrixH[i]),
                                                str(self.matrixW[i]))
     strbuf += '\n'
     strbuf += '{0:>8} {1:>8}\n'.format('Synaptic', 'Neuron')
     for i in range(len(self.net.layers)):
         if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(
                 self.net.layers[i]):
             strbuf += '{0:>8} {1:>8}\n'.format(self.synapse_fold[i],
                                                self.neuron_fold[i])
     strbuf += '\n'
     strbuf += '{0:>16} {1:>20} {2:>20} {3:>20} {4:>20}\n'.format(
         'Initial Buffer', 'Write Block Cycles', 'Read Block Cycles',
         'Total Cycles', 'Input Multiplier')
     for i in range(len(self.net.layers)):
         if lb.isConvLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(
                 self.net.layers[i]):
             strbuf += '{0:>16} {1:>20} {2:>20} {3:>20} {4:>20}\n'.format(
                 self.initial_buffer[i], self.write_block_cycles[i],
                 self.read_block_cycles[i], self.total_cycles[i],
                 self.input_multiplier[i])
     return strbuf
Exemple #2
0
 def print_topology(self):
     print "\nNetwork Topology: "
     print '{0:>35} {1:>10} {2:>10} {3:>10} {4:>10} {5:>8} {6:>8} {7:>8}'.format('NAME', 'idx', 'out_dim', 'filter_dim', 'in_chan', 'out_chan', 'stride', 'in_dim')
     for i in range(len(self.net.layers)):
         if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(self.net.layers[i]):
             print '{0:>35} {1:>10} {2:>10} {3:>10} {4:>10} {5:>8} {6:>8} {7:>8}'.format(self.net.layers[i].get_type(), i, self.net.layers[i].get_out_dim(), self.net.layers[i].get_filter_dim(), self.net.layers[i].getInputSize(), self.net.layers[i].getOutputSize(), self.net.layers[i].get_stride(), self.net.layers[i].get_in_dim())
     print ""
Exemple #3
0
 def ops_per_cycle(self, layer_idx):
     if lb.isMatrixLayer(
             self.net.layers[layer_idx]) or lfpga.isFPGAMatrixLayer(
                 self.net.layers[layer_idx]):
         # 2 because MAC
         return self.SIMD[layer_idx] * self.PE[layer_idx] * self.MMV[
             layer_idx] * 2
Exemple #4
0
 def print_folding_factors(self):
     print "\nFolding factors: "
     print '{0:>35} {1:>8} {2:>5} {3:>5} {4:>5}'.format('NAME', 'idx', 'SIMD', 'PE', 'MMV')
     for i in range(len(self.net.layers)):
         if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(self.net.layers[i]):
             print '{0:>35} {1:>8} {2:>5} {3:>5} {4:>5} '.format(self.net.layers[i].get_type(), i, self.SIMD[i], self.PE[i], self.MMV[i])
     print ""
Exemple #5
0
def passFwdPropagateLinear(pipeline):
    "Move linear layers past matrix and pooling layers."
    inStages = pipeline
    inStages.reverse()
    numChanges = 0
    ret = []
    while len(inStages) > 1:
        layerA = inStages.pop()
        layerB = inStages.pop()
        if lb.isLinearLayer(layerA) and lb.isMatrixLayer(layerB):
            # move the scalar ax+b to after the matrix layer Wx
            # originally we have W(ax+b) = Wax + Wb
            # desired: Mx+N = a(Wx) + Wb
            # repeat a and b to make appropriately-sized vectors
            a = layerA.A
            b = layerA.B
            W = layerB.W
            matrixLayerOutSize = W.shape[0]
            scaleNew = a * np.ones(matrixLayerOutSize)
            shiftNew = np.dot(W, b * np.ones(W.shape[1]))
            ret += [layerB, lb.LinearLayer(scaleNew, shiftNew)]
            numChanges += 1
        elif lb.isLinearLayer(layerA) and lb.isPoolingLayer(layerB):
            # TODO do we need to check layerA.A < 0 and maxpooling here?
            ret += [layerB, layerA]
            numChanges += 1
        else:
            ret += [layerA]
            inStages.append(layerB)
    # pop final element, if any left
    if len(inStages) == 1:
        ret += [inStages.pop()]

    return (ret, numChanges)
Exemple #6
0
 def print_hardware_cost(self):
     print "\nHardware Cost:"
     print '{0:>35} {1:>20} {2:>20} {3:>20} {4:>20} {5:>20}'.format(
         'Layer', 'idx', 'Input BRAMS', 'Weights BRAM', 'Total LUTS',
         'Total BRAM')
     total_input_brams = 0
     total_weights_brams = 0
     total_buffer_brams = 0
     total_luts = 0
     total_brams = 0
     for i in range(len(self.net.layers)):
         if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(
                 self.net.layers[i]):
             print '{0:>35} {1:>20} {2:>20} {3:>20} {4:>20} {5:>20}'.format(
                 self.net.layers[i].get_type(), i,
                 self.bram_cost(i)[0],
                 self.bram_cost(i)[1], self.lut_cost(i),
                 sum(self.bram_cost(i)))
             brams = self.bram_cost(i)
             total_input_brams += brams[0]
             total_weights_brams += brams[1]
             total_luts += self.lut_cost(i)
             total_brams += sum(brams)
     print '{0:>35} {1:>20} {2:>20} {3:>20} {4:>20} {5:>20}'.format(
         "Totals", "ALL", total_input_brams, total_weights_brams,
         total_luts, total_brams)
     print ""
Exemple #7
0
 def print_cycles(self):
     print "\nCycles per layer: "
     layer_cycles = self.calculate_layer_cycles()  # Same as est MVC
     print '{0:>35} {1:>8}  {2:>10} {3:>10}'.format('NAME', 'idx', 'ops/layer', 'MVC')
     for i in range(len(self.net.layers)):
         if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(self.net.layers[i]):
             print '{0:>35} {1:>8}  {2:>10} {3:>10}'.format(self.net.layers[i].get_type(), i, self.net.ops_per_layer(self.net.layers[i]), layer_cycles[i])
     print ""
Exemple #8
0
 def find_slowest_layer(self):
     """Find worst case layer as index into layers"""
     slowest_layer = self.find_first_matrix_layer()
     cycles = self.calculate_layer_cycles()
     for idx, cycle in enumerate(cycles):
         if cycle > cycles[slowest_layer] and (lb.isMatrixLayer(self.net.layers[idx]) or lfpga.isFPGAMatrixLayer(self.net.layers[idx])):
             slowest_layer = idx
     return slowest_layer
Exemple #9
0
 def find_first_matrix_layer(self):
     first = -1
     for idx, layer in enumerate(self.net.layers):
         if lb.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(layer):
             first = idx
             break
     assert (first != -1)
     return first
Exemple #10
0
def directlyQuantizeLayer(layer, bits):
    "Apply direct quantization to given layer, returns [quantized layer, scaling layer]"
    assert (lb.isMatrixLayer(layer))
    qlayer = copy.deepcopy(layer)
    (Wint, alpha) = qnt.quantize_matrix(qlayer.W, bits)
    qlayer.W = Wint
    qlayer.wbits = bits
    slayer = lb.LinearLayer(A=alpha, B=np.zeros(alpha.shape))
    return [qlayer, slayer]
Exemple #11
0
 def calculate_matrix_cycles(self):
     layers = []
     for idx, layer in enumerate(self.net.layers):
         if lb.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(layer):
             layers.append(self.net.ops_per_layer(layer) /
                           (self.ops_per_cycle(idx) * layer.get_parallel()))
         else:
             layers.append(0)
     return layers
Exemple #12
0
	def ops_per_layer(self, layer):
		""" if layerType is pool:
				return out_dim * out_dim * filter_dim * filter_dim
			else layerType is conv or fc
				return parallel * 2 * out_dim * out_dim * filter_dim * filter_dim * in_channels * out_channel
		"""
		if layers.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(layer):
			return layer.getNumOps() #/2
		return 0
Exemple #13
0
    def calculate_neural_folding(self):
        self.synapse_fold = self._zeros()
        self.neuron_fold = self._zeros()

        for i in range(len(self.net.layers)):
            layer = self.net.layers[i]
            if lb.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(
                    self.net.layers[i]):
                self.synapse_fold[i] = self.matrixH[i] / self.perf.SIMD[i]
                self.neuron_fold[i] = self.matrixW[i] / self.perf.PE[i]
Exemple #14
0
 def calculate_layer_cycles(self):  # Same as est MVC
     """ For each layer, calculate cycles required
     Formula is ops_per_layer() / ops_per_cycle()"""
     layer_cycles = []
     for idx, layer in enumerate(self.net.layers):
         if lb.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(layer):
             layer_cycles.append(self.net.ops_per_layer(
                 layer) / (self.ops_per_cycle(idx) * self.net.parallel_per_layer(layer)))
         else:
             layer_cycles.append(0)
     return layer_cycles
Exemple #15
0
    def calculate_matrix_sizes(self):
        self.matrixH = self._zeros()
        self.matrixW = self._zeros()

        for i in range(len(self.net.layers)):
            if lb.isMatrixLayer(self.net.layers[i]) or lfpga.isFPGAMatrixLayer(
                    self.net.layers[i]):
                self.matrixW[i] = self.net.layers[i].getOutputSize()
                self.matrixH[i] = self.net.layers[i].getInputSize(
                ) * self.net.layers[i].get_filter_dim(
                ) * self.net.layers[i].get_filter_dim()
Exemple #16
0
def directlyQuantizeAllFloatWeights(pipeline, bits):
    "Quantize all float weights in network to given number of bits."
    ret = []
    pipeline_copy = copy.deepcopy(pipeline)
    for L in pipeline_copy:
        if lb.isMatrixLayer(L):
            if L.wbits == 32:
                ret += directlyQuantizeLayer(L, bits)
            else:
                ret += [L]
        else:
            ret += [L]
    return ret
Exemple #17
0
def summarizePipeline(pipeline):
    totalParams = 0
    totalParamBits = 0
    totalOps = 0
    totalComputeLayers = 0
    output = ""
    output+=  "Per-layer details\n"
    output+=  "==================\n"
    for l in pipeline:
        print l.__class__.__name__
        if layers.isMatrixLayer(l):
            np = l.getParamSize()
            op = l.getNumOps()
            ins = l.getInputSize()
            outs = l.getOutputSize()
            npbits = l.getTotalParamBits()
            totalParamBits += npbits
            totalParams += np
            totalOps += op
            totalComputeLayers += 1
            output += "Type: %s, params: %d, ops: %d, in = %s, out = %s\n" % (l.__class__.__name__, np, op, str(ins), str(outs))
            output += "Bitwidths: input %d, weight %d, output %d\n" % (l.ibits, l.wbits, l.obits)
            inbits = l.getTotalInputBits()
            outbits = l.getTotalOutputBits()
            output += "Total in bits: %d, total weight bits: %d, total out bits: %d\n" % (inbits, npbits, outbits)
            # arithmetic intensity with some components on-chip
            # TODO include output activations once threshold fusion is in place
            #ai_none = float(op) / float(inbits + outbits + npbits)
            #ai_w = float(op) / float(inbits + outbits)
            #ai_wi = float(op) / float(outbits)
            #ai_i = float(op) / float(npbits + outbits)
            ai_wo = float(op) / float(inbits)
            ai_io = float(op) / float(npbits)
            ai_o = float(op) / float(inbits + npbits)
            #output+=  "AI none: %f, w: %f, wi: %f, wo: %f, io: %f, i: %f, o: %f" % (ai_none, ai_w, ai_wi, ai_wo, ai_io, ai_i, ai_o)
            output += "AI on-chip wo: %f, io: %f, o: %f\n" % (ai_wo, ai_io, ai_o)
            output += "-----\n"
    output+=  "Neural network pipeline summary\n"
    output+=  "================================\n"
    output+=  "Pipeline contains %d layers, %d of which are matrix layers\n" % (len(pipeline), totalComputeLayers)
    output+=  "Number of parameters: %f million\n" % (float(totalParams) / 1000000.0)
    output+=  "Total parameter volume: %f MB\n" % (float(totalParamBits) / (8*1024*1024))
    output+=  "Operations per inference: %f million\n" % (float(totalOps) / 1000000.0)
    return output
Exemple #18
0
def passFuseActivations(pipeline):
    "Replace (Matrix, Threshold) layer pairs with fused equivalents."
    inStages = pipeline
    inStages.reverse()
    numChanges = 0
    ret = []
    while len(inStages) > 1:
        layerA = inStages.pop()
        layerB = inStages.pop()
        if lb.isMatrixLayer(layerA) and lb.isThresholdLayer(layerB):
            ret += [lb.MatrixThresholdLayer("", layerA, layerB)]
            numChanges += 1
        else:
            ret += [layerA]
            inStages.append(layerB)
    # pop final element, if any left
    if len(inStages) == 1:
        ret += [inStages.pop()]
    return (ret, numChanges)
Exemple #19
0
	def calculate_activation_counts(self):
		for layer in self.layers:
			if layers.isMatrixLayer(layer) or lfpga.isFPGAMatrixLayer(layer):
				self.num_activations.append(layer.get_out_dim() * layer.get_out_dim() * layer.getOutputSize() )
Exemple #20
0
	def calculate_weight_counts(self):
	    for layer in self.layers:
	        if layers.isMatrixLayer(layer):
	            self.num_weights.append(layer.getParamSize())
Exemple #21
0
	def filter_relevant_layers(self):
	    layers = []
	    for l in self.layers:
	        if layers.isMatrixLayer(l):
	            layers.append(l)
	    self.layers = layers
Exemple #22
0
	def count_matrix_layers(self):
	    count = 0
	    for l in self.layers:
	        if layers.isMatrixLayer(l):
	            count+=1
	    return count
Exemple #23
0
	def parallel_per_layer(self, layer):
            parallel = 1
	    if layers.isMatrixLayer(layer):
	        if hasattr(layer, 'parallel'):
                    parallel = layer.parallel
	    return parallel