Exemple #1
0
def verify():
    write_log('$$ Log Load Started $$')
    mail_downloader()
    model_extract()
    data_load()
    write_log('Log Sent')
    write_log('$$ Log Load Ended $$')
Exemple #2
0
def run():
    """
    如果文件不存在,则创建
    :return:
    """
    if not os.path.exists('./res'):
        os.makedirs('res')
    config = get_config()
    if not os.path.exists(config['url']) or not os.path.exists(
            config['title'] or not os.path.exists(config['content'])):
        data_load(config)
    if not os.path.exists(config['content_clean']):
        data_clean_content(config)
    if not os.path.exists(config['content_filter']):
        filter_stop_word(config)
    if not os.path.exists(config['content_stemming']):
        stemming(config)
    if not os.path.exists(config['term_list']):
        create_term_list(config)

    documents = get_content(config)
    tf_documents = get_tf(documents)
    if not os.path.exists(config['idf']):
        create_idf(config, documents)
    idf_documents = get_idf(config)
    if not os.path.exists(config['tf_idf']):
        create_tf_idf(config, tf_documents, idf_documents, documents)
Exemple #3
0
def main():
    df = data_load(args.file_path, args.nrows)
    num = df.shape[0] * 4 // 5
    train_df = df[:num]
    valid_df = df[num:]
    estimator = get_estimator()
    for _ in range(1):
        logging.info('==== Start to train ===>')
        estimator.train(input_fn=lambda: read_csv_data(train_df.values))
        logging.info('==== Start to evaluate ===>')
        estimator.evaluate(input_fn=lambda: read_csv_data(valid_df.values))
Exemple #4
0
def main():
    print("load data")

    dfs = dl.data_load(sample_list, doclean)
    print(dfs['ttW'].columns)
    if process_type == 'plot':
        do_plot(dfs, sample_list)

    elif process_type == 'apply':
        print("apply mode")
        model = load_model('Outputs/training/model_nn_v0.h5')
        with open('Outputs/training/scaler.pickle', 'rb') as f:
            sc = pickle.load(f)
        with open('Outputs/training/vl.pickle', 'rb') as f:
            vl = pickle.load(f)

            #def model_create_feature():
        var_list = dl.sel_vars()
        cat_list = ['l0_id', 'l1_id', 'dileptype']  #,'mjjctag'
        for s in sample_list:
            #print(dfs[s].columns)
            df_trans = dfs[s][var_list]
            df_trans = hp.val_to_cat(dfs[s], cat_list)
            print("df_trans columns: ", df_trans.columns)
            #df_trans = dfs[s][var_list]
            #print("before:\n",df_trans.head())
            df_trans = pd.DataFrame(df_trans, columns=vl)
            df_trans = sc.transform(df_trans)
            print("after transformation:\n", df_trans[:5])
            predictScore = model.predict(df_trans)
            dfs[s].loc[:, 'score'] = dfs[s].loc[:, 'Njets']
            dfs[s].loc[:, 'score'] = predictScore
            #print("with score:",dfs[s].head())

        for i in range(0, 10):
            print(i / 10)
            #pl.plot_var(dfs,sample_list,'mjj',True,i/10)
            #pl.plot_var(dfs,sample_list,'Njets',True,i/10)
            #pl.plot_var(dfs,sample_list,'score',True,i/10)
            pl.plot_var(dfs, sample_list, 'ctaglj0', False, i / 10)
            #pl.plot_var(dfs,sample_list,'mjj',False,i/10)

    elif process_type == 'read' or process_type == 'train':

        if dfs:

            print("prepare for training:")
            print(" - transform input :")
            cat_list = ['l0_id', 'l1_id', 'dileptype']  #,'mjjctag'
            noncat_list = list(set(dfs['ttW'].columns) - set(cat_list))
            #for s in sample_list:
            #dfs[s]=hp.val_to_cat(dfs[s],cat_list)
            #    dfs[s][noncat_list]=hp.norm_gev(dfs[s][noncat_list])

            #var_list= list(dfs['ttW'].columns)
            #print(var_list)
            #print(dfs['ttW'].head())
            print(" - split samples to train/test features/targets :")
            #X_train, X_test, y_train, y_test, w_train, w_test  = hp.pred_ds(dfs)
            X_train, X_test, y_train, y_test, cl_weight, var_list = hp.pred_ds(
                dfs, cat_list, noncat_list)
            #var_list= list(X_train.columns)
            print(type(X_train), " <- type X_train")
            print(type(y_train), " <- type y_train")
            #fl = md.create_feat(noncat_list)
            #print(X_test[:3])
            learning_rate = 0.001
            nepochs = 500
            batch_size = 512
            validation_split = 0.2

            if debug:
                return 0

            if process_type == 'train':

                model = md.create_model(learning_rate, var_list)
                epochs, hist = md.train_model(
                    model,
                    X_train,
                    y_train,  # w_train,
                    cl_weight,
                    nepochs,
                    batch_size,
                    validation_split)

                list_of_metrics_to_plot = ['loss', 'val_loss']
                hp.plot_curve(epochs, hist, list_of_metrics_to_plot)
                list_of_metrics_to_plot = ['acc', 'val_acc']
                hp.plot_curve(epochs, hist, list_of_metrics_to_plot)

                print("\n Train set:")
                score_tr = model.evaluate(X_train,
                                          y_train,
                                          batch_size=batch_size)
                print(score_tr)
                print("\n Evaluate the new model against the test set:")
                score = model.evaluate(X_test, y_test, batch_size=batch_size)
                print(score)

                model.save('Outputs/training/model_nn_v0.h5')
            elif process_type == 'read':
                model = load_model('Outputs/training/model_nn_v0.h5')

            if model and (process_type != 'apply'):
                # X_test=X_test.to_numpy()
                # y_test=y_test.to_numpy()
                # X_train=X_train.to_numpy()
                # y_train=y_train.to_numpy()

                testPredict = model.predict(X_test)
                xt_p = {}
                print(X_test, y_test)
                x_sig, x_bkg = hp.sig_bkg_ds_separate(X_test, y_test)
                print("predict xsig")
                xt_p['ttW'] = model.predict(x_sig)
                xt_p['ttbar'] = model.predict(x_bkg)
                x_p = {}
                x_sig, x_bkg = hp.sig_bkg_ds_separate(X_train, y_train)
                x_p['ttW'] = model.predict(x_sig)
                x_p['ttbar'] = model.predict(x_bkg)

                bins = [i / 80 for i in range(80)]
                bins.append(1.)

                plt.figure("response")
                for i in sample_list:
                    plt.hist(xt_p[i],
                             bins,
                             alpha=0.5,
                             label=i + ' Predict',
                             density=True,
                             color=samples[i]['color'])
                    plt.hist(x_p[i],
                             bins,
                             alpha=1,
                             label=i + ' Train',
                             density=True,
                             color=samples[i]['color'],
                             histtype='step')
                plt.legend(loc="upper right")
                plt.savefig("Outputs/training/classPred_NN_ttw_ttbar.png",
                            transparent=True)
                plt.close("response")

                print(
                    classification_report(y_test,
                                          testPredict.round(),
                                          target_names=["ttbar", "ttW"]))  #
                auc = roc_auc_score(y_test, testPredict)
                print("Area under ROC curve: %.4f" % (auc))
                hp.get_roc(y_test, testPredict)

                print_summary = True
                if print_summary:
                    with open("Outputs/training/summary.txt", "w") as f:
                        f.write("Parameters:\n")
                        #f.write("         classifier_model: {}\n".format(model.get_config()))
                        f.write(
                            "LR {}, epochs {}, batch_size {}, VS {} \n".format(
                                learning_rate, nepochs, batch_size,
                                validation_split))
                        f.write(": {}\n".format(
                            classification_report(
                                y_test,
                                testPredict.round(),
                                target_names=["signal", "background"])))
                        f.write("\nAUC:{}\n".format(auc))
                        f.write("model.summary() :{}\n".format(
                            model.summary()))
Exemple #5
0
#  else:
#    q = np.arccos(q[0]) * q[1:] / np.linalg.norm(q[1:])
#  return q

vo_stats = {}
#seq_num = [1, 2, 3, 4]
vo_stats[seq] = {'R': np.eye(3), 't': np.zeros(3), 's': 1}

mean_t = np.zeros(3)  # optionally, use the ps dictionary to calc stats
std_t = np.ones(3)

train_img, train_pose = data_load(data_dir,
                                  img_width,
                                  img_height,
                                  seq_num,
                                  mean_t=mean_t,
                                  std_t=std_t,
                                  align_R=vo_stats[seq]['R'],
                                  align_t=vo_stats[seq]['t'],
                                  align_s=vo_stats[seq]['s'])

a = np.empty([0, 6])
b = [1, 2, 3, 4, 5, 6]
a = np.append(a, np.array([b]), axis=0)
a = np.append(a, np.array([b]), axis=0)

train_img = np.array(train_img).astype('float32')
train_pose = np.array(train_pose).astype('float32')

print(train_img.shape)
Exemple #6
0
import numpy as np
from data_loader import data_load
from model import Model

train, test = data_load()

m = train.shape[0]
pix = train[1][0].shape[1]

train_x = []
train_y = []
test_x = []
test_y = []
for i in train:
    train_x.append(i[0])
    train_y.append(i[1])
for i in test:
    test_x.append(i[0])
    test_y.append(i[1])

train_x = np.asarray(train_x)
test_x = np.asarray(test_x)
train_y = np.asarray(train_y)
test_y = np.asarray(test_y)

train_x = train_x.reshape(train_x.shape[0], -1).T
test_x = test_x.reshape(test_x.shape[0], -1).T

train_x = train_x / 255
test_x = test_x / 255