Ejemplo n.º 1
0
def main():
    #print("hello world") for luck
    
    in_arg = train_input_args()
    
    print("   data directory =",in_arg.data_dir, "\n   save directory =", in_arg.save_dir, "\n   model architecture =", in_arg.arch,
          "\n   hidden units =", in_arg.hidden_units, "\n   learning rate =", in_arg.learning_rate,
          "\n   epochs =", in_arg.epochs, "\n   device =", in_arg.device)
    
    image_data, data_loader = load_images()
    
    model, optimizer, criterion = build_model(in_arg.arch , in_arg.hidden_units, in_arg.learning_rate)
    
    model = train_model(model,data_loader, in_arg.epochs, criterion, optimizer, in_arg.device)
    
    save_model(model, optimizer, in_arg.arch, in_arg.data_dir, in_arg.save_dir,image_data)
    
    print("-"*40)      
Ejemplo n.º 2
0
def main(args):

    # Load in data from file
    df = functions.data_load()

    MODEL = 0
    # Check if user wanted
    if args.model is not None:
        MODEL = int(args.model[0])

    # If you want to generate a new model
    if MODEL:
        start = time.time()
        # Process data from data_frame
        tagged_recipes = functions.data_process(df)
        end = time.time()

        print("Data process execution: " + str(round((end - start), 2)) +
              " seconds")
        print("Model training and generation started for 20 epochs")

        start = time.time()
        # Build the model for predictions of recipes
        model = functions.build_model(tagged_recipes)
        end = time.time()

        print("Model build execution: " + str(round((end - start), 2)) +
              " seconds")
        print("")

    # Convert list of list to flat list
    ingredients_list = [
        ingredient for sublist in args.ingredient for ingredient in sublist
    ]

    # Generate predictions
    predictions = functions.build_predictions('Models/NLP.model',
                                              ingredients_list)

    # Build output
    functions.output(predictions, df)
Ejemplo n.º 3
0
train_labels_num = le.transform(np.squeeze(train_meta))
train_labels = to_categorical(train_labels_num, num_classes=15)

test = windowize(test_meta)
test_meta = replicate_func(test_meta, 10)

test_labels_num = le.transform(np.squeeze(test_meta))
test_labels = to_categorical(test_labels_num, num_classes=15)

del metadata
del train_meta
del test_meta

batch_size = 10
(n_samples, timesteps, features) = train.shape
print train.shape
model = build_model(batch_size, timesteps, features)
model.summary()

tensorboard = TensorBoard(log_dir='./logs',
                          histogram_freq=10,
                          write_graph=True,
                          write_images=True)

history = model.fit(train, train_labels, epochs=50, batch_size=10, verbose=2)
sio.savemate('history.mat', {'history': history})
scores = model.evaluate(test, test_labels)
print "Accuracy: %.2f" % (scores[1] * 100.0)

model.save_weights('run1.h5')
Ejemplo n.º 4
0
from functions import get_data, build_model
import pandas as pd
import numpy as np

#First column

ColName = 'action_taken_name'
data, mapping = get_data(ColName, nr=500000)
s1 = data[data[ColName] == 0].sample(n=20000)
s2 = data[data[ColName] == 1].sample(n=20000)
d = pd.concat([s1, s2])
y = d[ColName]
X = d.drop(columns=[ColName])
build_model(X, y, cross=10, models=['RandomForest', 'SVM', 'xgb', 'Logistic'])

ColName = 'denial_reason_name_1'
data, mapping = get_data(ColName, nr=500000)
s1 = data[data[ColName] == 0].sample(n=np.minimum(
    data[data[ColName] == 0].shape[0], data[data[ColName] == 1].shape[0]))
s2 = data[data[ColName] == 1].sample(n=np.minimum(
    data[data[ColName] == 0].shape[0], data[data[ColName] == 1].shape[0]))
d = pd.concat([s1, s2])
y = d[ColName]
X = d.drop(columns=[ColName])
build_model(X, y, cross=10, models=['RandomForest', 'SVM', 'xgb', 'Logistic'])

ColName = 'denial_reason_name_2'
data, mapping = get_data(ColName, nr=500000)
s1 = data[data[ColName] == 0].sample(n=np.minimum(
    data[data[ColName] == 0].shape[0], data[data[ColName] == 2].shape[0]))
s2 = data[data[ColName] == 2].sample(n=np.minimum(
Ejemplo n.º 5
0
    additive_score = weighted_add_score(eval_ncs, gensim_w2v_model)
    write_score(eval_ncs, additive_score, args.p2out + 'additive_scores.csv')

    logging.info('Reading train set')
    ncs = read_ncs(args.p2tc)

    logging.info('Creating vector for training instances')
    X, Y = get_vectors(ncs, gensim_w2v_model)
    if model_config.poly_degree > 1:
        X = get_poly_features(X, model_config.poly_degree)

    logging.info('Creating batches')
    in_batches, tar_batches = create_batch(X, Y, model_config.batch_size)

    logging.info('Creating the regression model')
    model, optimizer, criterion = build_model(X, Y)

    logging.info('Training')
    train(in_batches, tar_batches, model, model_config.nb_epochs, optimizer,
          criterion)

    logging.info('Calculating regression-based scores')
    reg_score = regression_score(eval_ncs, gensim_w2v_model, model)
    write_score(eval_ncs, reg_score, args.p2out + 'reg_scores.csv')

    print('Spearman rho bet. inv human score and regression',
          scipy.stats.spearmanr(reg_score, eval_scores_inv))
    print('Spearman rho bet. inv human score and additive score',
          scipy.stats.spearmanr(additive_score, eval_scores_inv))

    if args.rank == 'true':
Ejemplo n.º 6
0
def run():
    # 原始数据路径
    raw_path_140004_130001 = 'raw_140004_130001.txt'
    # 过滤后生成数据路径
    data_path_140004_130001 = 'data_140004_130001.txt'
    # column数列
    # ['blockcnt', 'blocktime', 'b.status', 'alltime', 'contime', 'pinggoogle', 'pingtime', 'b.dnstime', 'tlstime']
    columns_name_140004_130001 = fun.init_column(raw_path_140004_130001)
    print(columns_name_140004_130001)

    # 过滤后生成数据路径
    data_path_140004_130001 = fun.filter_data(raw_path_140004_130001, data_path_140004_130001, columns_name_140004_130001)
    data_set_140004_130001 = fun.pd_read_csv(data_path_140004_130001, columns_name_140004_130001)
    # print(data_set_140004_130001)

    #将预测的标签与其他标签分开
    # data_set_140004_130001.pop('blockcnt')
    # data_set_140004_130001.pop('blocktime')

    #10%数据作为测试数据,其他数据为训练数据blockcnt
    train_dataset_140004_130001 = data_set_140004_130001.sample(frac=0.9, random_state=0)
    # print(train_dataset_140004_130001)
    test_dataset_140004_130001 = data_set_140004_130001.drop(train_dataset_140004_130001.index)

    train_dataset_140004_130001.pop('blocktime')
    test_dataset_140004_130001.pop('blocktime')

    train_labels_140004_130001 = train_dataset_140004_130001.pop('blockcnt')
    test_labels_140004_130001 = test_dataset_140004_130001.pop('blockcnt')

    #检查一下训练数据,diag_kind="kde"是指画图类型
    # print(train_dataset_140004_130001[['blocktime', 'alltime', 'b.dnstime', 'tlstime']])
    # sns.pairplot(train_dataset_140004_130001[['blocktime', 'alltime', 'b.dnstime', 'tlstime']])
    # plt.show()

    # 生成描述性统计数据,总结数据集分布的集中趋势,分散和形状,不包括NaN值。
    # train_stats_140004_130001 = train_dataset_140004_130001.describe()
    # print(train_stats_140004_130001)
    # 行 列 调换
    # train_stats_140004_130001 = train_stats_140004_130001.transpose()
    # print(train_stats_140004_130001)

    # normed_train_data_140004_130001 = norm(train_dataset_140004_130001,train_stats_140004_130001)
    # normed_test_data_140004_130001 = norm(test_dataset_140004_130001)

    # 创建model,
    model = fun.build_model(train_dataset_140004_130001)
    # 检查model
    # model.summary()

    # 试下这个模型咋样
    # example_batch = train_dataset_140004_130001[:10]
    # print(example_batch)
    # example_result = model.predict(example_batch)
    # print(example_result)

    # 训练模型
    history = fun.fit_model(model, train_dataset_140004_130001, train_labels_140004_130001)

    # 画出训练模型
    # fun.plot_history(history)
    # hist = pd.DataFrame(history.history)
    # hist['epoch'] = history.epoch
    # hist.tail()
    # print(hist)
    # print(test_labels_140004_130001)
    # test_dataset_140004_130001 = test_dataset_140004_130001[5:10]
    # print('test_dataset_140004_130001 =')
    # print(test_dataset_140004_130001)
    #
    # test_labels_140004_130001 = test_labels_140004_130001[5:10]
    # print('test_labels_140004_130001 =')
    # print(test_labels_140004_130001)

    # loss, mae, mse = model.evaluate(test_dataset_140004_130001, test_labels_140004_130001, verbose=0)
    # print("Testing set loss {} blokcnt".format(loss))
    # print("Testing set mae {} blokcnt".format(mae))
    # print("Testing set mse {} blokcnt".format(mse))
    # print("Testing set Mean Abs Error: {:5.2f} blokcnt".format(mae))

    # 预测
    fun.predict(model, test_dataset_140004_130001, test_labels_140004_130001)
Ejemplo n.º 7
0
print("Use GPU: {}".format(args.gpu))
print("PyTorch Architecture: {}".format(args.arch))
print()
print("<" + "-" * 10, "Hyperparameters", "-" * 10 + ">")
print("Hidden Units: {}".format(args.hidden_units))
print("Learning Rate: {}".format(args.learning_rate))
print("Epochs: {}".format(args.epochs))
print("Batch Size: {}".format(args.batch_size))
print()

# See if the user would like to continue based on the settings
while True:
    inp = input("Do the settings above look correct? [y/n]: ")
    if inp.lower() == 'y':
        break
    else:
        exit("Adjust the settings and retry again, exiting.")

# Get the class-->name mappings
cat_names = functions.get_category_names('cat_to_name.json')

# Build a transfer learning model and optimizer with user defined settings
model, optimizer = functions.build_model(args.arch, args.hidden_units,
                                         args.learning_rate)

# Train the model
model = functions.train(model, train_data, val_data, optimizer, args.epochs,
                        args.gpu)

# Save the model
functions.save(model, args.arch, class_to_idx, cat_names, args.save_dir)
Ejemplo n.º 8
0
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

print(dataset)

'''
Building the model.
'''


# Length of the vocabulary in chars
vocab_size = len(vocab)

# Build the model with the given parameters
model = build_model(
    vocab_size = len(vocab),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=BATCH_SIZE)

# Run model on an example to check expected output
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")
    
model.summary()

sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

print("Input: \n", repr("".join(idx2char[input_example_batch[0]])))
print()
print(text[:250])

# The unique characters in the file
vocab = sorted(set(text))

# Creating a mapping from unique characters to indices
char2idx = {u: i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

# Length of the vocabulary in chars
vocab_size = len(vocab)

checkpoint_dir = './training_checkpoints'
tf.train.latest_checkpoint(checkpoint_dir)

model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

model.summary()


def generate_text(model, start_string):
    # Evaluation step (generating text using the learned model)

    # Number of characters to generate
    num_generate = 1000

    # Converting our start string to numbers (vectorizing)
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)