def eval(md, x, y, out_dir, out_probs_dir, iter_): # Predict t1 = time.time() (n_clips, n_time, n_freq) = x.shape (x, y) = pp_data.transform_data(x, y) prob = md.predict(x) prob = prob.astype(np.float32) if out_dir: pp_data.create_folder(out_dir) #out_prob_path = os.path.join(out_probs_dir, "prob_%d_iters.p" %iter_) # Dump predicted probabilites for future average if out_probs_dir: pp_data.create_folder(out_probs_dir) out_prob_path = os.path.join(out_probs_dir, "prob_%d_iters.p" % iter_) cPickle.dump(prob, open(out_prob_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) # Compute and dump stats n_out = y.shape[1] stats = [] t1 = time.time() for k in range(n_out): (precisions, recalls, thresholds) = metrics.precision_recall_curve(y[:, k], prob[:, k]) avg_precision = metrics.average_precision_score(y[:, k], prob[:, k], average=None) (fpr, tpr, thresholds) = metrics.roc_curve(y[:, k], prob[:, k]) auc = metrics.roc_auc_score(y[:, k], prob[:, k], average=None) #eer = pp_data.eer(prob[:, k], y[:, k]) skip = 1000 dict = { 'precisions': precisions[0::skip], 'recalls': recalls[0::skip], 'AP': avg_precision, 'fpr': fpr[0::skip], 'fnr': 1. - tpr[0::skip], 'auc': auc } stats.append(dict) logging.info("Callback time: %s" % (time.time() - t1, )) dump_path = os.path.join(out_dir, "md%d_iters.p" % iter_) cPickle.dump(stats, open(dump_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) logging.info("mAP: %f" % np.mean([e['AP'] for e in stats]))
def eval(model, x, y, out_dir, out_probs_dir, md_iter): pp_data.create_folder(out_dir) # Predict t1 = time.time() (n_clips, n_time_, n_freq) = x.shape (x, y) = pp_data.transform_data(x, y) prob = model.predict(x) prob = prob.astype(np.float32) print("The %d time into evalution." % md_iter) if out_probs_dir: pp_data.create_folder(out_probs_dir) out_prob_path = os.path.join(out_probs_dir, "prob_%d_iters.p" % md_iter) #cPickle.dump(prob, open(out_prob_path, 'wb')) # Dump predicted probabilities for future average n_out = y.shape[1] stats = [] t1 = time.time() for k in range(n_out): (precisions, recalls, thresholds) = metrics.precision_recall_curve(y[:, k], prob[:, k]) avg_precision = metrics.average_precision_score(y[:, k], prob[:, k], average=None) (fpr, tpr, thresholds) = metrics.roc_curve(y[:, k], prob[:, k]) auc = metrics.roc_auc_score(y[:, k], prob[:, k], average=None) eer = pp_data.eer(prob[:, k], y[:, k]) skip = 1000 dict = { 'precisions': precisions[0::skip], 'recalls': recalls[0::skip], 'AP': avg_precision, 'fpr': fpr[0::skip], 'fnr': 1. - tpr[0::skip], 'auc': auc } stats.append(dict) logging.info("Callback time: %s" % (time.time() - t1, )) dump_path = os.path.join(out_dir, "model_%d_iters.p" % (md_iter, )) cPickle.dump(stats, open(dump_path, 'wb')) mAP = np.mean([e['AP'] for e in stats]) logging.info("mAP of %d iteration: %f" % (md_iter, mAP)) return mAP
def train(args): cpickle_dir = args.cpickle_dir workspace = args.workspace # Path of hdf5 data bal_train_hdf5_path = os.path.join(cpickle_dir, "bal_train.h5") unbal_train_hdf5_path = os.path.join(cpickle_dir, "unbal_train.h5") eval_hdf5_path = os.path.join(cpickle_dir, "eval.h5") # Load data t1 = time.time() (tr_x1, tr_y1, tr_id_list1) = pp_data.load_data(bal_train_hdf5_path) (tr_x2, tr_y2, tr_id_list2) = pp_data.load_data(unbal_train_hdf5_path) tr_x = np.concatenate((tr_x1, tr_x2)) tr_y = np.concatenate((tr_y1, tr_y2)) tr_id_list = tr_id_list1 + tr_id_list2 (te_x, te_y, te_id_list) = pp_data.load_data(eval_hdf5_path) logging.info("Loading data time: %s s" % (time.time() - t1)) logging.info(tr_x1.shape, tr_x2.shape) logging.info("tr_x.shape: %s" % (tr_x.shape, )) (_, n_time, n_freq) = tr_x.shape # Build model n_hid = 500 n_out = tr_y.shape[1] lay_in = InputLayer(in_shape=(n_time, n_freq)) a = Dense(n_out=n_hid, act='relu')(lay_in) a = Dropout(p_drop=0.2)(a) a = Dense(n_out=n_hid, act='relu')(a) a = Dropout(p_drop=0.2)(a) a = Dense(n_out=n_hid, act='relu')(a) a = Dropout(p_drop=0.2)(a) cla = Dense(n_out=n_out, act='sigmoid', name='cla')(a) att = Dense(n_out=n_out, act='softmax', name='att')(a) # Attention lay_out = Lambda(_attention)([cla, att]) # Compile model md = Model(in_layers=[lay_in], out_layers=[lay_out]) md.compile() md.summary(is_logging=True) # Save model every several iterations call_freq = 1000 dump_fd = os.path.join(workspace, "models", pp_data.get_filename(__file__)) pp_data.create_folder(dump_fd) save_model = SaveModel(dump_fd=dump_fd, call_freq=call_freq, type='iter', is_logging=True) # Callbacks function callbacks = [save_model] batch_size = 500 tr_gen = RatioDataGenerator(batch_size=batch_size, type='train') # Optimization method optimizer = Adam(lr=args.lr) # Train stat_dir = os.path.join(workspace, "stats", pp_data.get_filename(__file__)) pp_data.create_folder(stat_dir) prob_dir = os.path.join(workspace, "probs", pp_data.get_filename(__file__)) pp_data.create_folder(prob_dir) tr_time = time.time() for (tr_batch_x, tr_batch_y) in tr_gen.generate(xs=[tr_x], ys=[tr_y]): # Compute stats every several interations if md.iter_ % call_freq == 0: # Stats of evaluation dataset t1 = time.time() te_err = eval(md=md, x=te_x, y=te_y, out_dir=os.path.join(stat_dir, "test"), out_probs_dir=os.path.join(prob_dir, "test")) logging.info("Evaluate test time: %s" % (time.time() - t1, )) # Stats of training dataset t1 = time.time() tr_bal_err = eval(md=md, x=tr_x1, y=tr_y1, out_dir=os.path.join(stat_dir, "train_bal"), out_probs_dir=None) logging.info("Evaluate tr_bal time: %s" % (time.time() - t1, )) # Update params (tr_batch_x, tr_batch_y) = pp_data.transform_data(tr_batch_x, tr_batch_y) md.train_on_batch(batch_x=tr_batch_x, batch_y=tr_batch_y, loss_func='binary_crossentropy', optimizer=optimizer, callbacks=callbacks) # Stop training when maximum iteration achieves if md.iter_ == call_freq * 31: break
def train(args): EVAL_MAP = -1000. PATIENCE = 0 data_dir = args.data_dir workspace = args.workspace tag = args.tag levels = args.levels # Path for the hdf5 dara bal_train_path = os.path.join(data_dir, "bal_train.h5") unbal_train_path = os.path.join(data_dir, "unbal_train.h5") eval_path = os.path.join(data_dir, "eval.h5") # Load data t1 = time.time() (tr_x1, tr_y1, tr_id_list1) = pp_data.load_data(bal_train_path) (tr_x2, tr_y2, tr_id_list2) = pp_data.load_data(unbal_train_path) (eval_x, eval_y, eval_id_list) = pp_data.load_data(eval_path) #tr_x = tr_x1 #tr_y = tr_y1 #tr_id_list = tr_id_list1 tr_x = np.concatenate((tr_x1, tr_x2)) tr_y = np.concatenate((tr_y1, tr_y2)) tr_id_list = tr_id_list1 + tr_id_list2 logging.info("Loading dat time: %s s" % (time.time() - t1)) logging.info(tr_x1.shape, tr_x2.shape) logging.info("tr_x.shape: %s" % (tr_x.shape, )) (_, n_time, n_freq) = tr_x.shape # Build Model model = get_ml_attention(levels) logging.info(model.to_json()) # Optimization method optimizer = Adam(lr=args.lr) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_accuracy']) #logging.info(model.summary()) # batch_size = 500 tr_gen = RatioDataGenerator(batch_size=batch_size, type='train') # Save Model every call_freq iterations model_iter = 0 call_freq = 1000 dump_fd = os.path.join(workspace, "models", pp_data.get_filename(__file__)) pp_data.create_folder(dump_fd) # Train stat_dir = os.path.join(workspace, "stats", pp_data.get_filename(__file__)) pp_data.create_folder(stat_dir) prob_dir = os.path.join(workspace, "probs", pp_data.get_filename(__file__)) pp_data.create_folder(prob_dir) tr_time = time.time() for (tr_batch_x, tr_batch_y) in tr_gen.generate(xs=[tr_x], ys=[tr_y]): # Computes stats every several iterations print(model_iter) if model_iter % call_freq == 0: # every 1000 iterations # Stats of evaluation dataset t1 = time.time() eval_MAP = eval(model=model, x=eval_x, y=eval_y, out_dir=os.path.join(stat_dir, "eval"), out_probs_dir=os.path.join(prob_dir, "eval"), md_iter=model_iter) logging.info("Evaluate evaluation-set time: %s" % (time.time() - t1, )) if eval_MAP >= EVAL_MAP: #md_name = "/scratch/work/xuz2/model_" + tag + "_.h5" md_name = tag + "_.h5" model.save(md_name) EVAL_MAP = eval_MAP PATIENCE = 0 else: PATIENCE += 1 logging.info("Patience now: %d" % (PATIENCE, )) if PATIENCE >= 10: break # print("Training stop at %s iterations" % (model_iter,)) # break # Stats of training dataset #t1 =time.time() #tr_bal_err = eval(model=model, x=tr_x1, y=tr_y1, # out_dir=os.path.join(stat_dir, "train_bal"), ## out_probs_dir=None, # md_iter=model_iter) #logging.info("Evaluate tr_bal time: %s" % (time.time() - t1,)) # Save Model #if eval_MAP > 0.342: # md_name = "/scratch/work/xuz2/model_" + str(model_iter) + "_.h5" # model.save(md_name) # Update params (tr_batch_x, tr_batch_y) = pp_data.transform_data(tr_batch_x, tr_batch_y) model.train_on_batch(tr_batch_x, tr_batch_y) model_iter += 1 # Stop training when maximum iteration achieves if model_iter == call_freq * 151: break
def train(args): cpickle_dir = args.cpickle_dir workspace = args.workspace # Path of hdf5 data bal_train_hdf5_path = os.path.join(cpickle_dir, "bal_train.h5") unbal_train_hdf5_path = os.path.join(cpickle_dir, "unbal_train.h5") eval_hdf5_path = os.path.join(cpickle_dir, "eval.h5") # Load data t1 = time.time() (tr_x1, tr_y1, tr_id_list1) = pp_data.load_data(bal_train_hdf5_path) (tr_x2, tr_y2, tr_id_list2) = pp_data.load_data(unbal_train_hdf5_path) print(tr_x1.shape) print(tr_x2.shape) tr_x = np.concatenate((tr_x1, tr_x2)) tr_y = np.concatenate((tr_y1, tr_y2)) tr_id_list = tr_id_list1 + tr_id_list2 (te_x, te_y, te_id_list) = pp_data.load_data(eval_hdf5_path) logging.info("Loading data time: %s s" % (time.time() - t1)) logging.info(tr_x1.shape, tr_x2.shape) logging.info("tr_x.shape: %s" % (tr_x.shape, )) (_, n_time, n_freq) = tr_x.shape # Build model n_hid = 600 n_out = tr_y.shape[1] lay_in = Input(shape=(n_time, n_freq)) a_0 = BatchNormalization()(lay_in) a_1 = Dense(n_hid, kernel_regularizer=regularizers.l2(0.001))(a_0) a_1 = BatchNormalization()(a_1) a_1 = Activation('relu')(a_1) a_1 = Dropout(rate=0.4)(a_1) a_2 = Dense(n_hid, kernel_regularizer=regularizers.l2(0.001))(a_1) a_2 = BatchNormalization()(a_2) a_2 = Activation('relu')(a_2) a_2 = Dropout(rate=0.4)(a_2) a_3 = Dense(n_hid, kernel_regularizer=regularizers.l2(0.001))(a_2) a_3 = BatchNormalization()(a_3) a_3 = Activation('relu')(a_3) a_3 = Dropout(rate=0.4)(a_3) cla_1 = Dense(n_out, name='cla_1')(a_3) cla_1 = BatchNormalization()(cla_1) cla_1 = Activation('sigmoid')(cla_1) att_1 = Dense(n_out, name='att_1')(a_3) att_1 = BatchNormalization()(att_1) att_1 = Activation('softmax')(att_1) # Attention lay_out_a = Lambda(_attention, output_shape=_att_output_shape)([cla_1, att_1]) cla_2 = Dense(n_out, name='cla_2')(a_2) cla_2 = BatchNormalization()(cla_2) cla_2 = Activation('sigmoid')(cla_2) att_2 = Dense(n_out, name='att2')(a_2) att_2 = BatchNormalization()(att_2) att_2 = Activation('softmax')(att_2) lay_out_b = Lambda(_attention, output_shape=_att_output_shape)([cla_2, att_2]) lay_out_c = Concatenate(axis=1)([lay_out_a, lay_out_b]) #lay_out = Dense(n_out, activation='sigmoid', name='output')(lay_out_c) lay_out = Dense(n_out, name='output')(lay_out_c) lay_out = BatchNormalization()(lay_out) lay_out = Activation('sigmoid')(lay_out) # Compile model md = Model(inputs=lay_in, outputs=lay_out) md.summary() # Save model every several iterations call_freq = 1000 dump_fd = os.path.join(workspace, "models", pp_data.get_filename(__file__)) pp_data.create_folder(dump_fd) # save_model = SaveModel(dump_fd=dump_fd, call_freq=call_freq, type='iter', is_logging=True) # Callbacks function #callbacks = []#save_model] batch_size = 500 tr_gen = RatioDataGenerator(batch_size=batch_size, type='train') # Optimization method optimizer = Adam(lr=args.lr) md.compile(loss='binary_crossentropy', optimizer=optimizer) #callbacks=callbacks) # Train stat_dir = os.path.join(workspace, "stats", pp_data.get_filename(__file__)) pp_data.create_folder(stat_dir) prob_dir = os.path.join(workspace, "probs", pp_data.get_filename(__file__)) pp_data.create_folder(prob_dir) tr_time = time.time() iter_ = 1 for (tr_batch_x, tr_batch_y) in tr_gen.generate(xs=[tr_x], ys=[tr_y]): # Compute stats every several interations if iter_ % call_freq == 0: # Stats of evaluation dataset t1 = time.time() te_err = eval(md=md, x=te_x, y=te_y, out_dir=os.path.join(stat_dir, "test"), out_probs_dir=os.path.join(prob_dir, "test"), iter_=iter_) logging.info("Evaluate test time: %s" % (time.time() - t1, )) # Stats of training dataset t1 = time.time() tr_bal_err = eval(md=md, x=tr_x1, y=tr_y1, out_dir=os.path.join(stat_dir, "train_bal"), out_probs_dir=None, iter_=iter_) logging.info("Evaluate tr_bal time: %s" % (time.time() - t1, )) iter_ += 1 # Update params (tr_batch_x, tr_batch_y) = pp_data.transform_data(tr_batch_x, tr_batch_y) md.train_on_batch(x=tr_batch_x, y=tr_batch_y) # Stop training when maximum iteration achieves if iter_ == call_freq * 151: break