def predict_main(list_posts, data_folder_path, saved_model_path, temp_save_path): model_type = 'hier_fuse' word_feats_raw = [{ 'emb': 'elmo', 's_enc': 'rnn', 'm_id': '11' }, { 'emb': 'glove', 's_enc': 'rnn', 'm_id': '21' }] sent_enc_feats_raw = [{'emb': 'bert_pre', 'm_id': '1'}] conf_dict_com = { 'poss_sent_enc_feats_emb_dict': { 'bert_pre': 768 }, 'poss_word_feats_emb_dict': { 'glove': 300, 'ling': 33, 'elmo': 3072 } } classi_probs_label_info = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]] data_dict = load_data(list_posts, 16, 35, 198, data_folder_path + 'esp_class_maps.txt') # for cl_post in data_dict['text_sen']: # print(cl_post) word_feats, word_feat_str = word_featurize( word_feats_raw, model_type, data_dict, conf_dict_com['poss_word_feats_emb_dict'], False, False, data_folder_path, temp_save_path, True) sent_enc_feats, sent_enc_feat_str = sent_enc_featurize( sent_enc_feats_raw, model_type, data_dict, conf_dict_com['poss_sent_enc_feats_emb_dict'], False, False, data_folder_path, temp_save_path, True) test_generator = TestGenerator( np.arange(data_dict['test_st_ind'], data_dict['test_en_ind']), word_feats, sent_enc_feats, data_dict, 64) mod_op_list = gen_raw_output(len(classi_probs_label_info), saved_model_path, test_generator, data_dict['NUM_CLASSES']) pred_vals = evaluate_model(mod_op_list, data_dict, classi_probs_label_info) pred_categories = [] for pred_val_set in pred_vals: pred_categories.append( [data_dict['FOR_LMAP'][x] for x in pred_val_set]) return pred_categories
conf_dict_com["save_folder_name"], conf_dict_com['use_conf_scores'], conf_dict_com['multi_task_tl'], conf_dict_com['classi_loss_weight'], prime_filename, aux_filename_str, conf_dict_com['augment_data'], conf_dict_com['single_inp_tasks_list'], conf_dict_com['sep_inp_tasks_list'], conf_dict_com['uncorr_c_pairs_filename'], conf_dict_com['beta'], conf_dict_com['label_corr_setting'], conf_dict_com['train_ratio']) for model_type in conf_dict["model_types"]: for word_feats_raw in conf_dict["word_feats_l"]: word_feats, word_feat_str = word_featurize( word_feats_raw, model_type, data_dict, conf_dict_com['poss_word_feats_emb_dict'], conf_dict_com['use_saved_word_feats'], conf_dict_com['save_word_feats'], conf_dict_com["data_folder_name"], conf_dict_com["save_folder_name"], conf_dict_com["test_mode"], prime_filename) for sep_task_tup in conf_dict_com['sep_inp_tasks_list']: sep_task_name, sep_task_dict = sep_task_tup sep_task_dict['word_feats'], sep_task_dict[ 'word_feat_str'] = word_featurize( word_feats_raw, model_type, sep_task_dict, conf_dict_com['poss_word_feats_emb_dict'], conf_dict_com['use_saved_word_feats'], conf_dict_com['save_word_feats'], conf_dict_com["data_folder_name"], conf_dict_com["save_folder_name"], conf_dict_com["test_mode"], get_filename(sep_task_dict['filename']))
f_tsv.write("model\tword feats\tsent feats\ttrans\tclass imb\tcnn fils\tcnn kernerls\tthresh\trnn dim\tatt dim\tpool k\tstack RNN\tf-We\tf-Ma\tf-Mi\tacc\tp-We\tp_Ma\tp_Mi\tr_We\tr_Ma\tr_Mi\tstd_f_we\trnn type\tl rate\tb size\tdr1\tdr2\ttest mode\tclassi probs labels\n") elif data_dict['prob_type'] == 'binary': f_tsv.write("model\tword feats\tsent feats\ttrans\tclass imb\tcnn fils\tcnn kernerls\tthresh\trnn dim\tatt dim\tpool k\tstack RNN\tf\tp\tr\tacc\tstd_f\trnn type\tl rate\tb size\tdr1\tdr2\ttest mode\tclassi probs labels\n") print("max # sentences: %d, max # words per sentence: %d, max # words per post: %d" % (data_dict['max_num_sent'], data_dict['max_words_sent'], data_dict['max_post_length'])) metr_dict = init_metr_dict(data_dict['prob_type']) classi_probs_label_str = str(conf_dict_com['classi_probs_label_info'])[2:-2].replace('], [', '+').replace(', ','_') for conf_dict in conf_dict_list: for prob_trans_type in conf_dict["prob_trans_types"]: trainY_list, trainY_noncat_list, num_classes_list, bac_map = transform_labels(data_dict['lab'][:data_dict['train_en_ind']], prob_trans_type, conf_dict_com['test_mode'], conf_dict_com["save_folder_name"], data_dict['NUM_CLASSES'], data_dict['prob_type'], conf_dict_com['classi_probs_label_info'], classi_probs_label_str, conf_dict_com['use_saved_data_stuff'], conf_dict_com['save_data_stuff']) for class_imb_flag in conf_dict["class_imb_flags"]: loss_func_list, nonlin, out_vec_size_list, cw_list = class_imb_loss_nonlin(trainY_noncat_list, class_imb_flag, num_classes_list, prob_trans_type, conf_dict_com['test_mode'], conf_dict_com["save_folder_name"], conf_dict_com['use_saved_data_stuff'], conf_dict_com['save_data_stuff'], classi_probs_label_str) for model_type in conf_dict["model_types"]: for word_feats_raw in conf_dict["word_feats_l"]: word_feats, word_feat_str = word_featurize(word_feats_raw, model_type, data_dict, conf_dict_com['poss_word_feats_emb_dict'], conf_dict_com['use_saved_word_feats'], conf_dict_com['save_word_feats'], conf_dict_com["data_folder_name"], conf_dict_com["save_folder_name"], conf_dict_com["test_mode"]) for sent_enc_feats_raw in conf_dict["sent_enc_feats_l"]: sent_enc_feats, sent_enc_feat_str = sent_enc_featurize(sent_enc_feats_raw, model_type, data_dict, conf_dict_com['poss_sent_enc_feats_emb_dict'], conf_dict_com['use_saved_sent_enc_feats'], conf_dict_com['save_sent_enc_feats'], conf_dict_com["data_folder_name"], conf_dict_com["save_folder_name"], conf_dict_com["test_mode"]) for num_cnn_filters in conf_dict["num_cnn_filters"]: for max_pool_k_val in conf_dict["max_pool_k_vals"]: for cnn_kernel_set in conf_dict["cnn_kernel_sets"]: cnn_kernel_set_str = str(cnn_kernel_set)[1:-1].replace(', ','_') for rnn_type in conf_dict["rnn_types"]: for rnn_dim in conf_dict["rnn_dims"]: for att_dim in conf_dict["att_dims"]: for stack_rnn_flag in conf_dict["stack_rnn_flags"]: mod_op_list_save_list = [] for thresh in conf_dict["threshes"]: startTime = time.time() info_str = "model: %s, word_feats = %s, sent_enc_feats = %s, classi_probs_label_info = %s, prob_trans_type = %s, class_imb_flag = %s, num_cnn_filters = %s, cnn_kernel_set = %s, rnn_type = %s, rnn_dim = %s, att_dim = %s, max_pool_k_val = %s, stack_rnn_flag = %s, thresh = %s, test mode = %s" % (model_type,word_feat_str,sent_enc_feat_str,classi_probs_label_str,prob_trans_type,class_imb_flag,num_cnn_filters,cnn_kernel_set,rnn_type,rnn_dim,att_dim,max_pool_k_val,stack_rnn_flag, thresh, conf_dict_com["test_mode"]) fname_part = ("%s~%s~%s~%s~%s~%s~%s~%s~%s~%s~%s~%s~%s~%s" % (model_type,word_feat_str,sent_enc_feat_str,classi_probs_label_str,prob_trans_type,class_imb_flag,num_cnn_filters,cnn_kernel_set_str,rnn_type,rnn_dim,att_dim,max_pool_k_val,stack_rnn_flag, conf_dict_com["test_mode"]))