else: for i in range(config.n_layers): # out = tf.keras.layers.Dropout(0.1)(out) out = tf.keras.layers.Dense(config.n_dim)(out) out = tf.keras.layers.Activation('sigmoid')(out) * out out = tf.keras.layers.Dense(config.n_classes, activation='relu')(out) model = tf.keras.models.Model(inputs=model.input, outputs=out) specs = None for name in config.name.split(','): NAME = name if name.endswith('.h5') else name + '.h5' model.load_weights(NAME) if specs is None: specs = model.predict(wavs, verbose=True) else: specs += model.predict(wavs, verbose=True) specs /= len(config.name.split(',')) specs = specs / config.multiplier specs = tf.reshape(specs, [*specs.shape[:2], 3, 10]) angles = tf.cast(tf.round(tf.reduce_sum(specs, axis=(1, 2))), tf.int32) classes = tf.cast(tf.round(tf.reduce_sum(specs, axis=(1, 3))), tf.int32) d_dir = D_direction(tf.cast(gt_angle, tf.float32), tf.cast(angles, tf.float32)) d_cls = D_class(tf.cast(gt_class, tf.float32), tf.cast(classes, tf.float32))
opt = SGD(config.lr, momentum=0.9) else: opt = RMSprop(config.lr, momentum=0.9) if config.l2 > 0: model = apply_kernel_regularizer( model, tf.keras.regularizers.l2(config.l2)) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['AUC']) model.summary() model.load_weights(NAME) print('loaded pretrained model') """ DATA """ # wavs = glob.glob('/codes/2020_track3/t3_audio/*.wav') wavs = glob.glob( '/media/data1/datasets/ai_challenge/2020_track3/t3_audio/*.wav') wavs.sort() to_mel = magphase_to_mel(config.n_mels) for wav in wavs: sample = load_wav(wav)[None, :] # [1, freq, time, chan2] sample = complex_to_magphase(sample) sample = to_mel(sample) sample = minmax_log_on_mel(sample) # PREDICT output = model.predict(sample)[0] # [time', 30] plt.imshow(output) plt.savefig(os.path.split(wav)[-1].replace('.wav', '.png'))
pdb.set_trace() wavs = wavs.numpy().transpose((2, 0, 1)) wavs = np.concatenate([np.array(list(map(_filt, wavs[:2]))), wavs[2:]]) return tf.convert_to_tensor(wavs) wavs = tf.map_fn(filt, wavs) wavs = complex_to_magphase(wavs) wavs = magphase_to_mel(config.n_mels)(wavs) wavs = minmax_log_on_mel(wavs) wavs = tf.concat([wavs, tf.reverse(wavs, axis=[-1])], axis=0) gt_angle = tf.concat([gt_angle, tf.reverse(gt_angle, axis=[-1])], axis=0) gt_class = tf.concat([gt_class, gt_class], axis=0) wavs = model.predict(wavs) wavs = wavs / config.multiplier wavs = tf.reshape(wavs, [*wavs.shape[:2], 3, 10]) angles = tf.round(tf.reduce_sum(wavs, axis=(1, 2))) classes = tf.round(tf.reduce_sum(wavs, axis=(1, 3))) d_dir = D_direction(tf.cast(gt_angle, tf.float32), tf.cast(angles, tf.float32)) d_cls = D_class(tf.cast(gt_class, tf.float32), tf.cast(classes, tf.float32)) d_total = (d_dir * 0.8 + d_cls * 0.2).numpy() print('total') print(d_total, d_total.mean())
], axis=0) eval_y = np.concatenate([ np.load(os.path.join(PATH, 'test_y.npy')), np.load(os.path.join(PATH, 'noise_test_y.npy')) ], axis=0) n_chan = eval_x.shape[-1] // 2 if config.norm: eval_x = minmax_norm_magphase(eval_x) eval_x = log_magphase(eval_x) eval_y = degree_to_class(eval_y, one_hot=False) # 3. predict pred_y = model.predict(eval_x) if config.verbose: print(pred_y[:5]) print(np.max(pred_y, axis=1)) n_classes = pred_y.shape[-1] pred_y = np.argmax(pred_y, axis=-1) print("GROUND TRUTH\n", eval_y) print("PREDICTIONS\n", pred_y) print("Accuracy:", Accuracy()(eval_y, pred_y).numpy()) print("SCORE:", score(class_to_degree(eval_y), class_to_degree(pred_y)).numpy()) print(confusion_matrix(eval_y, pred_y))
print(x_train.shape) print(y_train.shape) print(x_test.shape) print(y_test.shape) print('끝') # 2. 모델 model = Sequential() model.add(EfficientNetB0(include_top=False)) model.add(Dense(20, activation='softmax',name='s1')) model.summary() # 3. 훈련 model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['acc']) model.fit(x_train, y_train, batch_size=12, epochs=2, validation_split=0.1) # 4. 평가, 예측 loss, acc = model.evaluate(x_train, y_train) y_predict = model.predict(x_test) print("loss : ", loss) print("acc : ", acc) print(y_predict) print('진짜 끝')
def main(config): wavs = load_wav(config) import tensorflow as tf import efficientnet.model as model x = tf.keras.layers.Input(shape=(config.n_mels, None, 2)) model = getattr(model, config.model)( include_top=False, weights=None, input_tensor=x, backend=tf.keras.backend, layers=tf.keras.layers, models=tf.keras.models, utils=tf.keras.utils, ) out = tf.transpose(model.output, perm=[0, 2, 1, 3]) out = tf.keras.layers.Reshape([-1, out.shape[-1] * out.shape[-2]])(out) if config.n_layers > 0: if config.mode == 'GRU': out = tf.keras.layers.Dense(config.n_dim)(out) for i in range(config.n_layers): # out = transformer_layer(config.n_dim, config.n_heads)(out) out = tf.keras.layers.Bidirectional( tf.keras.layers.GRU(config.n_dim, return_sequences=True), backward_layer=tf.keras.layers.GRU(config.n_dim, return_sequences=True, go_backwards=True))(out) elif config.mode == 'transformer': out = tf.keras.layers.Dense(config.n_dim)(out) out = encoder(config.n_layers, config.n_dim, config.n_heads)(out) out = tf.keras.layers.Flatten()(out) out = tf.keras.layers.ReLU()(out) out = tf.keras.layers.Dense(config.n_classes, activation='relu')(out) model = tf.keras.models.Model(inputs=model.input, outputs=out) model.load_weights('model.h5') wavs = model.predict(wavs) wavs = wavs / config.multiplier wavs = tf.reshape(wavs, [*wavs.shape[:2], 3, 10]) angles = tf.cast(tf.round(tf.reduce_sum(wavs, axis=(1, 2))), tf.int8) classes = tf.cast(tf.round(tf.reduce_sum(wavs, axis=(1, 3))), tf.int8) # import numpy as np # from glob import glob # num = len(sorted(glob('dataset/3rd_track3/*.wav'))) # angles = tf.convert_to_tensor(np.zeros((num, 10), dtype=np.int)) # classes = tf.convert_to_tensor(np.ones((num, 3), dtype=np.int) * 2) data = {'track3_results': list()} for idx, (ag, cl) in enumerate(zip(angles, classes)): _data = { 'id': idx, 'angle': ag.numpy().tolist(), 'class': cl.numpy().tolist() } data['track3_results'].append(_data) tojson(data)