def create_fc_layers(opts, batch_shape, random_gen): h1 = 320 h2 = 10 batch_size = batch_shape[0] density1, density2 = opts.densities make_sparse = layers.SparseFcLayer.from_random_generator dtype = tf.float16 if opts.data_type == 'fp16' else tf.float32 fc_layers = {} if density1 >= 1: fc_layers['fc1'] = layers.DenseFcLayer(h1, name='dense_fc', dtype=dtype, bias=True, relu=True) else: limit = np.sqrt(6/((batch_shape[1] + h1)*density1)) glorot_uniform_gen = partial(random_gen.uniform, -limit, limit) indices_random_gen = np.random.default_rng(seed=opts.seed) options = {"metaInfoBucketOversizeProportion": 0.2} fc_layers['fc1'] = make_sparse(h1, batch_shape, density1, glorot_uniform_gen, indices_random_gen, matmul_options=options, name='sparse_fc', dtype=dtype, bias=True, relu=True) if density2 >= 1: fc_layers['fc2'] = layers.DenseFcLayer(h2, name='dense_classifier', dtype=dtype, bias=True, relu=False) else: limit = np.sqrt(6/((h1 + h2)*density2)) glorot_uniform_gen = partial(random_gen.uniform, -limit, limit) indices_random_gen = np.random.default_rng(seed=opts.seed) options = {"metaInfoBucketOversizeProportion": 0.1} fc_layers['fc2'] = make_sparse(h2, [batch_size, h1], density2, glorot_uniform_gen, indices_random_gen, matmul_options=options, name='sparse_classifier', dtype=dtype, bias=True, relu=False) return fc_layers
def create_fc_layers(opts, batch_shape, random_gen): h1 = opts.hidden_size h2 = 10 batch_size = batch_shape[0] density1, density2 = opts.densities make_sparse = layers.SparseFcLayer.from_random_generator dtype = tf.float16 if opts.data_type == 'fp16' else tf.float32 partialsType = 'half' if opts.partials_type == 'fp16' else 'float' logger.info(f"Partials type: {partialsType}") fc_layers = {} if density1 >= 1: fc_layers['fc1'] = layers.DenseFcLayer(h1, name='dense_fc', dtype=dtype, use_bias=True, relu=True) else: limit = np.sqrt(6 / ((batch_shape[1] + h1) * density1)) glorot_uniform_gen = partial(random_gen.uniform, -limit, limit) indices_random_gen = np.random.default_rng(seed=opts.seed) options = { "metaInfoBucketOversizeProportion": 0.5, "partialsType": partialsType, "sharedBuckets": not opts.disable_shared_buckets } fc_layers['fc1'] = make_sparse( h1, batch_shape, density1, block_size=opts.block_size, values_initialiser_gen=glorot_uniform_gen, indices_initialiser_gen=indices_random_gen, matmul_options=options, name='sparse_fc', dtype=dtype, use_bias=True, relu=True, pooling_type=opts.pooling_type) if density2 >= 1: fc_layers['fc2'] = layers.DenseFcLayer(h2, name='dense_classifier', dtype=dtype, use_bias=True, relu=False) else: limit = np.sqrt(6 / ((h1 + h2) * density2)) glorot_uniform_gen = partial(random_gen.uniform, -limit, limit) indices_random_gen = np.random.default_rng(seed=opts.seed) options = { "metaInfoBucketOversizeProportion": 0.3, "sharedBuckets": not opts.disable_shared_buckets } fc_layers['fc2'] = make_sparse( h2, [batch_size, h1], density2, block_size=1, # Layer is too small to use larger blocks values_initialiser_gen=glorot_uniform_gen, indices_initialiser_gen=indices_random_gen, matmul_options=options, name='sparse_classifier', dtype=dtype, use_bias=True, relu=False, pooling_type="NONE") return fc_layers