def model(self): conv0 = layers.conv1d( inputs=self.embed, filters=100, k_size=3, stride=1, padding="SAME", scope_name="conv0", ) relu0 = layers.relu(inputs=conv0, scope_name="relu0") pool0 = layers.one_maxpool(inputs=relu0, padding="VALID", scope_name="pool0") flatten0 = layers.flatten(inputs=pool0, scope_name="flatten0") conv1 = layers.conv1d( inputs=self.embed, filters=100, k_size=4, stride=1, padding="SAME", scope_name="conv1", ) relu1 = layers.relu(inputs=conv1, scope_name="relu0") pool1 = layers.one_maxpool(inputs=relu1, padding="VALID", scope_name="pool1") flatten1 = layers.flatten(inputs=pool1, scope_name="flatten1") conv2 = layers.conv1d( inputs=self.embed, filters=100, k_size=5, stride=1, padding="SAME", scope_name="conv2", ) relu2 = layers.relu(inputs=conv2, scope_name="relu0") pool2 = layers.one_maxpool(inputs=relu2, padding="VALID", scope_name="pool2") flatten2 = layers.flatten(inputs=pool2, scope_name="flatten2") concat0 = layers.concatinate([flatten0, flatten1, flatten2], scope_name="concat0") dropout0 = layers.Dropout(inputs=concat0, rate=1 - self.keep_prob, scope_name="dropout0") self.logits = layers.fully_connected(inputs=dropout0, out_dim=self.n_classes, scope_name="fc0")
def __init__(self, num_time_samples, num_channels=1, num_classes=256, num_blocks=2, num_layers=14, num_hidden=128, gpu_fraction=1.0): self.num_time_samples = num_time_samples self.num_channels = num_channels self.num_classes = num_classes self.num_blocks = num_blocks self.num_layers = num_layers self.num_hidden = num_hidden self.gpu_fraction = gpu_fraction inputs = tf.compat.v1.placeholder( tf.float32, shape=(None, num_time_samples, num_channels)) targets = tf.compat.v1.placeholder( tf.int32, shape=(None, num_time_samples)) h = inputs hs = [] for b in range(num_blocks): for i in range(num_layers): rate = 2**i name = 'b{}-l{}'.format(b, i) h = dilated_conv1d(h, num_hidden, rate=rate, name=name) hs.append(h) outputs = conv1d(h, num_classes, filter_width=1, gain=1.0, activation=None, bias=True) costs = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=outputs, labels=targets) cost = tf.compat.v1.reduce_mean(costs) train_step = tf.compat.v1.train.AdamOptimizer( learning_rate=0.001).minimize(cost) # gpu_options = tf.GPUOptions( # per_process_gpu_memory_fraction=gpu_fraction) # sess = tf.compat.v1.Session( # config=tf.ConfigProto(gpu_options=gpu_options)) sess = tf.compat.v1.Session() sess.run(tf.compat.v1.global_variables_initializer()) self.inputs = inputs self.targets = targets self.outputs = outputs self.hs = hs self.costs = costs self.cost = cost self.train_step = train_step self.sess = sess
def __init__(self, num_time_samples, num_channels, gpu_fraction): inputs = tf.placeholder(tf.float32, shape=(None, num_time_samples, num_channels)) targets = tf.placeholder(tf.int32, shape=(None, num_time_samples)) h = inputs for b in range(2): for i in range(14): rate = 2**i name = 'b{}-l{}'.format(b, i) h = dilated_conv1d(h, 128, rate=rate, name=name) outputs = conv1d(h, 256, filter_width=1, gain=1.0, activation=None, bias=True) cost = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(outputs, targets)) train_step = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess.run(tf.initialize_all_variables()) self.inputs = inputs self.targets = targets self.outputs = outputs self.cost = cost self.train_step = train_step self.sess = sess
def audioencoder(input_tensor, dropout_rate, num_hidden_layers): L1 = conv1d(input_tensor=input_tensor, filters=num_hidden_layers, kernel_size=1, strides=1, padding="CAUSAL", dilation_rate=1, activation=tf.nn.relu, dropout_rate=dropout_rate) L2 = conv1d(input_tensor=L1, filters=None, kernel_size=1, strides=1, padding="CAUSAL", dilation_rate=1, activation=tf.nn.relu, dropout_rate=dropout_rate) L3 = conv1d(input_tensor=L2, filters=None, kernel_size=1, strides=1, padding="CAUSAL", dilation_rate=1, activation=None, dropout_rate=dropout_rate) L4 = highwaynet(input_tensor=L3, filters=None, kernel_size=3, strides=1, padding="CAUSAL", dilation_rate=1, activation=None, dropout_rate=dropout_rate, scope_name="audioencoder_highwaynet_Block" + str(1)) L5 = highwaynet(input_tensor=L4, filters=None, kernel_size=3, strides=1, padding="CAUSAL", dilation_rate=3, activation=None, dropout_rate=dropout_rate, scope_name="audioencoder_highwaynet_Block" + str(2)) L6 = highwaynet(input_tensor=L5, filters=None, kernel_size=3, strides=1, padding="CAUSAL", dilation_rate=9, activation=None, dropout_rate=dropout_rate, scope_name="audioencoder_highwaynet_Block" + str(3)) L7 = highwaynet(input_tensor=L6, filters=None, kernel_size=3, strides=1, padding="CAUSAL", dilation_rate=27, activation=None, dropout_rate=dropout_rate, scope_name="audioencoder_highwaynet_Block" + str(4)) L8 = highwaynet(input_tensor=L7, filters=None, kernel_size=3, strides=1, padding="CAUSAL", dilation_rate=1, activation=None, dropout_rate=dropout_rate, scope_name="audioencoder_highwaynet_Block" + str(5)) L9 = highwaynet(input_tensor=L8, filters=None, kernel_size=3, strides=1, padding="CAUSAL", dilation_rate=3, activation=None, dropout_rate=dropout_rate, scope_name="audioencoder_highwaynet_Block" + str(6)) L10 = highwaynet(input_tensor=L9, filters=None, kernel_size=3, strides=1, padding="CAUSAL", dilation_rate=9, activation=None, dropout_rate=dropout_rate, scope_name="audioencoder_highwaynet_Block" + str(7)) L11 = highwaynet(input_tensor=L10, filters=None, kernel_size=3, strides=1, padding="CAUSAL", dilation_rate=27, activation=None, dropout_rate=dropout_rate, scope_name="audioencoder_highwaynet_Block" + str(8)) L12 = highwaynet(input_tensor=L11, filters=None, kernel_size=3, strides=1, padding="CAUSAL", dilation_rate=3, activation=None, dropout_rate=dropout_rate, scope_name="audioencoder_highwaynet_Block" + str(9)) L13 = highwaynet(input_tensor=L12, filters=None, kernel_size=3, strides=1, padding="CAUSAL", dilation_rate=3, activation=None, dropout_rate=dropout_rate, scope_name="audioencoder_highwaynet_Block" + str(10)) return L13
def textencoder(embeding_tensor, dropout_rate, num_hidden_layers): L1 = conv1d(input_tensor=embeding_tensor, filters=num_hidden_layers * 2, kernel_size=1, strides=1, padding="SAME", dilation_rate=1, activation=tf.nn.relu, dropout_rate=dropout_rate) L2 = conv1d(input_tensor=L1, filters=None, kernel_size=1, strides=1, padding="SAME", dilation_rate=1, activation=None, dropout_rate=dropout_rate) L3 = highwaynet(input_tensor=L2, filters=None, kernel_size=3, strides=1, padding="SAME", dilation_rate=1, activation=None, dropout_rate=dropout_rate, scope_name="textencoder_highwaynet_Block" + str(1)) L4 = highwaynet(input_tensor=L3, filters=None, kernel_size=3, strides=1, padding="SAME", dilation_rate=3, activation=None, dropout_rate=dropout_rate, scope_name="textencoder_highwaynet_Block" + str(2)) L5 = highwaynet(input_tensor=L4, filters=None, kernel_size=3, strides=1, padding="SAME", dilation_rate=9, activation=None, dropout_rate=dropout_rate, scope_name="textencoder_highwaynet_Block" + str(3)) L6 = highwaynet(input_tensor=L5, filters=None, kernel_size=3, strides=1, padding="SAME", dilation_rate=27, activation=None, dropout_rate=dropout_rate, scope_name="textencoder_highwaynet_Block" + str(4)) L7 = highwaynet(input_tensor=L6, filters=None, kernel_size=3, strides=1, padding="SAME", dilation_rate=1, activation=None, dropout_rate=dropout_rate, scope_name="textencoder_highwaynet_Block" + str(5)) L8 = highwaynet(input_tensor=L7, filters=None, kernel_size=3, strides=1, padding="SAME", dilation_rate=3, activation=None, dropout_rate=dropout_rate, scope_name="textencoder_highwaynet_Block" + str(6)) L9 = highwaynet(input_tensor=L8, filters=None, kernel_size=3, strides=1, padding="SAME", dilation_rate=9, activation=None, dropout_rate=dropout_rate, scope_name="textencoder_highwaynet_Block" + str(7)) L10 = highwaynet(input_tensor=L9, filters=None, kernel_size=3, strides=1, padding="SAME", dilation_rate=27, activation=None, dropout_rate=dropout_rate, scope_name="textencoder_highwaynet_Block" + str(8)) L11 = highwaynet(input_tensor=L10, filters=None, kernel_size=3, strides=1, padding="SAME", dilation_rate=1, activation=None, dropout_rate=dropout_rate, scope_name="textencoder_highwaynet_Block" + str(9)) L12 = highwaynet(input_tensor=L11, filters=None, kernel_size=3, strides=1, padding="SAME", dilation_rate=1, activation=None, dropout_rate=dropout_rate, scope_name="textencoder_highwaynet_Block" + str(10)) L13 = highwaynet(input_tensor=L12, filters=None, kernel_size=1, strides=1, padding="SAME", dilation_rate=1, activation=None, dropout_rate=dropout_rate, scope_name="textencoder_highwaynet_Block" + str(11)) L14 = highwaynet(input_tensor=L13, filters=None, kernel_size=1, strides=1, padding="SAME", dilation_rate=1, activation=None, dropout_rate=dropout_rate, scope_name="textencoder_highwaynet_Block" + str(12)) K, V = tf.split(L14, 2, axis=-1) return K, V
def __init__( self, # num_time_samples, num_channels=1, num_classes=256, num_blocks=2, num_layers=14, num_hidden=128, gpu_fraction=1.0, prob_model_type='softmax', ): # self.num_time_samples = num_time_samples self.num_channels = num_channels self.num_classes = num_classes self.num_blocks = num_blocks self.num_layers = num_layers self.num_hidden = num_hidden self.gpu_fraction = gpu_fraction self.prob_model_type = prob_model_type inputs = tf.placeholder(tf.float32, shape=(None, None, num_channels)) # targets = tf.placeholder(tf.int32, shape=(None, num_time_samples)) h = inputs hs = [] for b in range(num_blocks): for i in range(num_layers): rate = 2**i name = 'b{}-l{}'.format(b, i) h = dilated_conv1d(h, num_hidden, rate=rate, name=name) hs.append(h) outputs = conv1d(h, num_classes, filter_width=1, gain=1.0, activation=None, bias=True) raw_outputs_shape = tf.shape(outputs) outputs = tf.reshape(outputs, [-1, num_classes]) if prob_model_type == 'softmax': self.prob_model = MultinomialLayer(outputs, num_classes, num_classes, one_hot=False) elif prob_model_type == 'sdp': self.prob_model = LocallySmoothedMultiscaleLayer(outputs, num_classes, num_classes, one_hot=False, k=1, lam=0) train_step = tf.train.AdamOptimizer(learning_rate=0.001).minimize( self.prob_model.train_loss) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess.run(tf.global_variables_initializer()) self.inputs = inputs # self.targets = targets self.outputs = outputs self.hs = hs # self.costs = costs self.cost = self.prob_model.test_loss self.train_step = train_step self.sess = sess