def train(): # prepare data tr_X, tr_y, _, te_X, te_y, te_na_list = pp_dev_data.GetAllData( fe_fd, agg_num, hop, fold) [batch_num, n_time, n_freq] = tr_X.shape print tr_X.shape, tr_y.shape print te_X.shape, te_y.shape # build model seq = Sequential() seq.add(InputLayer((n_time, n_freq))) seq.add(Flatten()) # flatten to 2d: (n_time, n_freq) to 1d:(n_time*n_freq) seq.add(Dropout(0.1)) seq.add(Dense(n_hid, act=act)) seq.add(Dropout(0.1)) seq.add(Dense(n_hid, act=act)) seq.add(Dropout(0.1)) seq.add(Dense(n_hid, act=act)) seq.add(Dropout(0.1)) seq.add(Dense(n_out, act='sigmoid')) md = seq.compile() md.summary() # optimizer optimizer = Adam(1e-4) # callbacks # tr_err, te_err are frame based. To get event based err, run recognize.py validation = Validation(tr_x=tr_X, tr_y=tr_y, va_x=None, va_y=None, te_x=te_X, te_y=te_y, batch_size=2000, metrics=['binary_crossentropy'], call_freq=1, dump_path=None) # save model pp_dev_data.CreateFolder(cfg.dev_md_fd) save_model = SaveModel(dump_fd=cfg.dev_md_fd, call_freq=10) # callbacks callbacks = [validation, save_model] # fit model md.fit(x=tr_X, y=tr_y, batch_size=2000, n_epochs=100, loss_func='binary_crossentropy', optimizer=optimizer, callbacks=callbacks, verbose=1)
def recognize(): # prepare data _, _, _, te_X, te_y, _ = pp_dev_data.GetAllData(fe_fd, agg_num, hop, fold) # do recognize and evaluation thres = 0.4 # thres, tune to prec=recall n_labels = len(cfg.labels) gt_roll = [] pred_roll = [] with open(cfg.dev_cv_csv_path, 'rb') as f: reader = csv.reader(f) lis = list(reader) # read one line for li in lis: na = li[1] curr_fold = int(li[2]) if fold == curr_fold: # get features, tags fe_path = fe_fd + '/' + na + '.f' info_path = cfg.dev_wav_fd + '/' + na + '.csv' tags = pp_dev_data.GetTags(info_path) y = pp_dev_data.TagsToCategory(tags) X = cPickle.load(open(fe_path, 'rb')) # aggregate data X3d = mat_2d_to_3d(X, agg_num, hop) p_y_pred = md.predict(X3d) p_y_pred = np.mean(p_y_pred, axis=0) # shape:(n_label) pred = np.zeros(n_labels) pred[np.where(p_y_pred > thres)] = 1 pred_roll.append(pred) gt_roll.append(y) pred_roll = np.array(pred_roll) gt_roll = np.array(gt_roll) # calculate prec, recall, fvalue prec, recall, fvalue = prec_recall_fvalue(pred_roll, gt_roll, thres) print prec, recall, fvalue
from hat.optimizers import SGD, Rmsprop import hat.backend as K import config as cfg import prepare_dev_data as pp_dev_data # hyper-params agg_num = 11 # concatenate frames hop = 5 # step_len act = 'relu' n_hid = 500 fold = 1 n_out = len(cfg.labels) # prepare data tr_X, tr_y, _ = pp_dev_data.GetAllData(cfg.dev_fe_mel_fd, agg_num, hop, fold=None) [batch_num, n_time, n_freq] = tr_X.shape print tr_X.shape, tr_y.shape # build model seq = Sequential() seq.add(InputLayer((n_time, n_freq))) seq.add(Flatten()) # flatten to 2d: (n_time, n_freq) to 1d:(n_time*n_freq) seq.add(Dropout(0.1)) seq.add(Dense(n_hid, act=act)) seq.add(Dropout(0.1)) seq.add(Dense(n_hid, act=act)) seq.add(Dropout(0.1)) seq.add(Dense(n_hid, act=act)) seq.add(Dropout(0.1))
from hat.layers.core import InputLayer, Dense, Dropout, Lambda, Flatten from hat.layers.cnn import Convolution2D, Convolution1D from hat.layers.pool import Pool2D, GlobalMaxPool from hat.callbacks import Validation, SaveModel from hat.preprocessing import pad_trunc_seqs, sparse_to_categorical, reshape_3d_to_4d from hat.optimizers import Rmsprop import prepare_dev_data as pp_dev_data import config as cfg # hyper-params tr_fe_fd = cfg.dev_tr_fe_mel_fd max_len = 100 n_out = len(cfg.labels) # prepare data tr_X, tr_y = pp_dev_data.GetAllData(tr_fe_fd, max_len) tr_y = sparse_to_categorical(tr_y, n_out) print tr_X.shape, tr_y.shape (_, n_time, n_freq) = tr_X.shape # build model seq = Sequential() seq.add(InputLayer((n_time, n_freq))) seq.add(Flatten()) seq.add(Dropout(0.1)) seq.add(Dense(500, 'relu')) seq.add(Dropout(0.1)) seq.add(Dense(500, 'relu')) seq.add(Dropout(0.1)) seq.add(Dense(500, 'relu'))