def get_matrix_format_data(fe_fd, csv_file, n_concat, hop, scaler): """Get training data and ground truth in matrix format. Args: fe_fd: string. Feature folder. csv_file: string. Path of csv file. n_concat: integar. Number of frames to concatenate. hop: integar. Number of hop frames. scaler: None | object. """ with open(csv_file, 'rb') as f: reader = csv.reader(f) lis = list(reader) x3d_all = [] y_all = [] for li in lis: [na, lb] = li[0].split('\t') na = na.split('/')[1][0:-4] path = fe_fd + '/' + na + '.f' x = cPickle.load(open(path, 'rb')) if scaler: x = scaler.transform(x) x3d = mat_2d_to_3d(x, n_concat, hop) # (n_blocks, n_concat, n_freq) x3d_all.append(x3d) y_all += [cfg.lb_to_id[lb]] * len(x3d) x3d_all = np.concatenate(x3d_all) # (n_samples, n_concat, n_freq) y_all = np.array(y_all) y_all = sparse_to_categorical(y_all, len(cfg.labels)) # (n_samples, n_labels) return x3d_all, y_all
def train_cv_model(): # init path if type=='home': fe_fd = cfg.dev_fe_mel_home_fd labels = cfg.labels_home lb_to_id = cfg.lb_to_id_home tr_txt = cfg.dev_evaluation_fd + '/home_fold' + str(fold) + '_train.txt' te_txt = cfg.dev_evaluation_fd + '/home_fold' + str(fold) + '_evaluate.txt' if type=='resi': fe_fd = cfg.dev_fe_mel_resi_fd labels = cfg.labels_resi lb_to_id = cfg.lb_to_id_resi tr_txt = cfg.dev_evaluation_fd + '/residential_area_fold' + str(fold) + '_train.txt' te_txt = cfg.dev_evaluation_fd + '/residential_area_fold' + str(fold) + '_evaluate.txt' n_out = len( labels ) # load data to list tr_X, tr_y = pp_dev_data.LoadAllData( fe_fd, tr_txt, lb_to_id, agg_num, hop ) tr_y = sparse_to_categorical( tr_y, n_out ) print tr_X.shape print tr_y.shape n_freq = tr_X.shape[2] # build model seq = Sequential() seq.add( InputLayer( (agg_num, n_freq) ) ) seq.add( Flatten() ) seq.add( Dense( n_hid, act='relu' ) ) seq.add( Dropout( 0.1 ) ) seq.add( Dense( n_hid, act='relu' ) ) seq.add( Dropout( 0.1 ) ) seq.add( Dense( n_hid, act='relu' ) ) seq.add( Dropout( 0.1 ) ) seq.add( Dense( n_out, 'sigmoid' ) ) md = seq.combine() # print summary info of model md.summary() # optimization method optimizer = Adam(1e-3) # callbacks (optional) # save model every n epoch pp_dev_data.CreateFolder( cfg.dev_md_fd ) save_model = SaveModel( dump_fd=cfg.dev_md_fd, call_freq=5 ) # validate model every n epoch validation = Validation( tr_x=tr_X, tr_y=tr_y, va_x=None, va_y=None, te_x=None, te_y=None, metrics=['binary_crossentropy'], call_freq=1, dump_path=None ) # callbacks function callbacks = [validation, save_model] # train model md.fit( x=tr_X, y=tr_y, batch_size=20, n_epochs=100, loss_func='binary_crossentropy', optimizer=optimizer, callbacks=callbacks )
def reshapeX(X): N = len(X) return X.reshape((N, 1, 28, 28)) ### load & prepare data tr_X, tr_y, va_X, va_y, te_X, te_y = load_data() tr_X, va_X, te_X = reshapeX(tr_X), reshapeX(va_X), reshapeX(te_X) # init params n_in = 784 n_hid = 500 n_out = 10 # sparse label to 1 of K categorical label tr_y = sparse_to_categorical(tr_y, n_out) va_y = sparse_to_categorical(va_y, n_out) te_y = sparse_to_categorical(te_y, n_out) ### Build model act = 'relu' seq = Sequential() seq.add(InputLayer(in_shape=(1, 28, 28))) seq.add(Convolution2D(n_outfmaps=32, n_row=3, n_col=3, act='relu')) seq.add(MaxPool2D(pool_size=(2, 2))) seq.add(Convolution2D(n_outfmaps=32, n_row=3, n_col=3, act='relu')) seq.add(MaxPool2D(pool_size=(2, 2))) seq.add(Dropout(0.2)) seq.add(Flatten()) seq.add(Dense(n_hid, act='relu')) seq.add(Dropout(0.5))
from hat.layers.pool import Pool2D, GlobalMaxPool from hat.callbacks import Validation, SaveModel from hat.preprocessing import pad_trunc_seqs, sparse_to_categorical, reshape_3d_to_4d from hat.optimizers import Rmsprop import prepare_dev_data as pp_dev_data import config as cfg # hyper-params tr_fe_fd = cfg.dev_tr_fe_mel_fd max_len = 100 n_out = len( cfg.labels ) # prepare data tr_X, tr_y = pp_dev_data.GetAllData( tr_fe_fd, max_len ) tr_y = sparse_to_categorical( tr_y, n_out ) print tr_X.shape, tr_y.shape (_, n_time, n_freq) = tr_X.shape # build model seq = Sequential() seq.add( InputLayer( (n_time, n_freq) ) ) seq.add( Flatten() ) seq.add( Dropout(0.1) ) seq.add( Dense(500, 'relu') ) seq.add( Dropout(0.1) ) seq.add( Dense(500, 'relu') ) seq.add( Dropout(0.1) ) seq.add( Dense(500, 'relu') ) seq.add( Dropout(0.1) )
pred_all = np.concatenate(pred_all, axis=0) y_all = np.concatenate(y_all, axis=0) err = metrics.categorical_error(pred_all, y_all) return err if __name__ == '__main__': # Load & prepare data tr_x, tr_y, va_x, va_y, te_x, te_y = load_mnist() # Init params n_in = 784 n_hid = 500 n_out = 10 # Sparse label to 1-of-K categorical label tr_y = sparse_to_categorical(tr_y, n_out) va_y = sparse_to_categorical(va_y, n_out) te_y = sparse_to_categorical(te_y, n_out) # Build model lay_in = InputLayer(in_shape=(n_in,)) a = Dense(n_out=n_hid, act='relu')(lay_in) a = Dropout(p_drop=0.2)(a) a = Dense(n_out=n_hid, act='relu')(a) a = Dropout(p_drop=0.2)(a) lay_out = Dense(n_out=n_out, act='softmax')(a) md = Model(in_layers=[lay_in], out_layers=[lay_out]) md.compile() md.summary()
def train_cv_model(): # init path if type == 'home': fe_fd = cfg.dev_fe_mel_home_fd labels = cfg.labels_home lb_to_id = cfg.lb_to_id_home tr_txt = cfg.dev_evaluation_fd + '/home_fold' + str( fold) + '_train.txt' te_txt = cfg.dev_evaluation_fd + '/home_fold' + str( fold) + '_evaluate.txt' if type == 'resi': fe_fd = cfg.dev_fe_mel_resi_fd labels = cfg.labels_resi lb_to_id = cfg.lb_to_id_resi tr_txt = cfg.dev_evaluation_fd + '/residential_area_fold' + str( fold) + '_train.txt' te_txt = cfg.dev_evaluation_fd + '/residential_area_fold' + str( fold) + '_evaluate.txt' n_out = len(labels) # load data to list tr_X, tr_y = pp_dev_data.LoadAllData(fe_fd, tr_txt, lb_to_id, agg_num, hop) tr_y = sparse_to_categorical(tr_y, n_out) print tr_X.shape print tr_y.shape n_freq = tr_X.shape[2] # build model seq = Sequential() seq.add(InputLayer((agg_num, n_freq))) seq.add(Flatten()) seq.add(Dense(n_hid, act='relu')) seq.add(Dropout(0.1)) seq.add(Dense(n_hid, act='relu')) seq.add(Dropout(0.1)) seq.add(Dense(n_hid, act='relu')) seq.add(Dropout(0.1)) seq.add(Dense(n_out, 'sigmoid')) md = seq.combine() # print summary info of model md.summary() # optimization method optimizer = Adam(1e-3) # callbacks (optional) # save model every n epoch pp_dev_data.CreateFolder(cfg.dev_md_fd) save_model = SaveModel(dump_fd=cfg.dev_md_fd, call_freq=5) # validate model every n epoch validation = Validation(tr_x=tr_X, tr_y=tr_y, va_x=None, va_y=None, te_x=None, te_y=None, metrics=['binary_crossentropy'], call_freq=1, dump_path=None) # callbacks function callbacks = [validation, save_model] # train model md.fit(x=tr_X, y=tr_y, batch_size=20, n_epochs=100, loss_func='binary_crossentropy', optimizer=optimizer, callbacks=callbacks)
from hat.layers.cnn import Convolution2D, Convolution1D from hat.layers.pool import Pool2D, GlobalMaxPool from hat.callbacks import Validation, SaveModel from hat.preprocessing import pad_trunc_seqs, sparse_to_categorical, reshape_3d_to_4d from hat.optimizers import Rmsprop import prepare_dev_data as pp_dev_data import config as cfg # hyper-params tr_fe_fd = cfg.dev_tr_fe_mel_fd max_len = 100 n_out = len(cfg.labels) # prepare data tr_X, tr_y = pp_dev_data.GetAllData(tr_fe_fd, max_len) tr_y = sparse_to_categorical(tr_y, n_out) print tr_X.shape, tr_y.shape (_, n_time, n_freq) = tr_X.shape # build model seq = Sequential() seq.add(InputLayer((n_time, n_freq))) seq.add(Flatten()) seq.add(Dropout(0.1)) seq.add(Dense(500, 'relu')) seq.add(Dropout(0.1)) seq.add(Dense(500, 'relu')) seq.add(Dropout(0.1)) seq.add(Dense(500, 'relu')) seq.add(Dropout(0.1))