Ejemplo n.º 1
0
opt = optimizers.Adam(lr=0.003, decay=0.0001)
ctcvr_model.compile(optimizer=opt,
                    loss=["binary_crossentropy", "binary_crossentropy"],
                    loss_weights=[1.0, 1.0],
                    metrics=[tf.keras.metrics.AUC()])

# keras model save path
filepath = "esmm_best.h5"

# call back function
checkpoint = ModelCheckpoint(filepath,
                             monitor='val_loss',
                             verbose=1,
                             save_best_only=True,
                             mode='min')
reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.8,
                              patience=2,
                              min_lr=0.0001,
                              verbose=1)
earlystopping = EarlyStopping(monitor='val_loss',
                              min_delta=0.0001,
                              patience=8,
                              verbose=1,
                              mode='auto')
callbacks = [checkpoint, reduce_lr, earlystopping]

# trian model
train_model(cate_feature_dict, user_cate_feature_dict, item_cate_feature_dict,
            train_data, val_data)
Ejemplo n.º 2
0
def Main():
    parser = argparse.ArgumentParser(
        description='This is a image identifier training program.')
    parser.add_argument('data_dir',
                       help='Please specify your directory of images you want to use for training the model.'+ \
                       'Please notice that there must be three sub-directories within the directory given,'+ \
                       'named train, test, validation respectively', type=str)
    parser.add_argument(
        '--save_dir',
        help=
        'Please specify the directory that you want to store you model checkpoint file',
        type=str,
        action='store',
        dest='save_directory',
        default='checkpoint.pth')

    parser.add_argument('--arch',
                        help='Please specify the architecture you want to use',
                        type=str,
                        action='store',
                        default='vgg16')
    parser.add_argument('--gpu', help='enter GPU mode', action='store_true')
    parser.add_argument('--hidden_units',
                        help='number of hidden_units',
                        type=int,
                        default=8192)
    parser.add_argument('--epochs',
                        help='number of epochs',
                        type=int,
                        default=5)
    parser.add_argument('--learning_rate',
                        help='learning rate',
                        default=0.0001,
                        type=float)

    args = parser.parse_args()

    # Directory setup
    data_dir = args.data_dir
    train_dir = data_dir + '/train'
    valid_dir = data_dir + '/valid'
    test_dir = data_dir + '/test'

    # GPU setup
    if args.gpu:
        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    else:
        device = 'cpu'

    # load train/validation/test datasets and loaders

    print('Loading images...\n')

    train_datasets = datasets.ImageFolder(train_dir,
                                          preprocess.train_transforms())
    valid_datasets = datasets.ImageFolder(
        valid_dir, preprocess.test_validation_transforms())
    test_datasets = datasets.ImageFolder(
        test_dir, preprocess.test_validation_transforms())

    trainloader = torch.utils.data.DataLoader(train_datasets,
                                              batch_size=32,
                                              shuffle=True)
    testloader = torch.utils.data.DataLoader(test_datasets,
                                             batch_size=32,
                                             shuffle=True)
    validloader = torch.utils.data.DataLoader(valid_datasets,
                                              batch_size=32,
                                              shuffle=True)

    print('Initializing Architecture...\n')

    model = model_train.get_pretrained_model(arch=args.arch)
    for params in model.parameters():
        params.requires_grad = False

    classifier = model_train.def_classifier(args.hidden_units)
    model.classifier = classifier
    model.to(device)

    criterion = model_train.criterion()
    optimizer = model_train.optimizer(learning_rate=args.learning_rate,
                                      params=model.classifier.parameters())
    steps = 0
    print_every = 40

    print('Training...\n')

    model_train.train_model(model, criterion, optimizer, trainloader,
                            validloader, device, steps, print_every,
                            args.epochs)

    model.to('cpu')

    model_train.save_checkpoint(model,
                                optimizer,
                                train_datasets,
                                arch=args.arch,
                                save_directory=args.save_directory)
Ejemplo n.º 3
0
from CSV_play import create_training
from model_train import train_model
import os
import argparse


def get_args():
    parser = argparse.ArgumentParser(
        description="Tarvitsen Tyota Application's trainer.")
    parser.add_argument('-id',
                        type=str,
                        help='Unique Id for Model',
                        required=True)
    parser.add_argument('-f', type=str, help='Filename of csv', required=True)
    args = parser.parse_args()
    return args


args = get_args()
file_location = 'ML/csv/{}'.format(args.f)
models = os.listdir('ML/model')

x_train, y_train = create_training(file_location)

exists = '{}.hd5'.format(args.id) in models
train_model(x_train, y_train, args.id, exists)
if __name__=='__main__':
    # 读取数据到本地
    data_to_local()
    # 处理数据
    process_data()
    # 提取特征:输入值,预测目标名
    gen_feature_file('oper_rev')
    # 划分数据集
    split_data('2011-03-31', 
               '2016-12-31', 
              '2017-03-31', 
               '2017-12-31',
               '2017-12-31',
               '2018-03-31', 'oper_rev')
    # 模型训练,并保存模型
    train_model('train.csv', 'valid.csv', '2018-03-31', 'oper_rev')
    # 预测
    pred_model('2018-03-31', 'oper_rev')
    # 评估
    evaluation_result('2018-03-31', 'oper_rev')
    
        
        
        
        
        
        
        
        
        
        
Ejemplo n.º 5
0
    n_hidden_layers = arguments.hidden_units

    layers = [int(n_inputs)]
    layers.extend([int(layer.strip()) for layer in n_hidden_layers.split(',')])
    layers.append(int(layers_output_n))

    model, save_data = build_model(model_name,
                                   layers,
                                   layer_name,
                                   dropout=dropout_percent,
                                   pretrained=True)

if not save_data.get('idx_to_class'):
    save_data["idx_to_class"] = convert_class_to_idx(class_to_idx)

#train model
timer = Timer.Timer('Start Training')

criterion = nn.NLLLoss()
optimizer = optim.Adam(getattr(model, layer_name).parameters(), learning_rate)

train_model(model,
            criterion,
            optimizer,
            train_images_dataloader,
            test_images_dataloader,
            save_data=save_data,
            epochs=epochs,
            device=device)
timer.stop('end time')
Ejemplo n.º 6
0
#                     metrics=[tf.keras.metrics.AUC()])

# keras model save path
filepath = "esmm_best.h5"

# call back function
# checkpoint = ModelCheckpoint(
#     filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
# reduce_lr = ReduceLROnPlateau(
#     monitor='val_loss', factor=0.8, patience=2, min_lr=0.0001, verbose=1)
# earlystopping = EarlyStopping(
#     monitor='val_loss', min_delta=0.0001, patience=8, verbose=1, mode='auto')
# callbacks = [checkpoint, reduce_lr, earlystopping]

# trian model
ctcvr_model = train_model(cate_feature_dict, user_cate_feature_dict, item_cate_feature_dict, train_data, val_data, filepath)

# load model
# ctcvr_model = tf.keras.models.load_model('esmm_best.h5')

# model predict
[ctr_pred, ctcvr_pred] = ctcvr_model.predict(pred_data)

# get cvr predict
cvr_pred = ctcvr_pred/ctr_pred


# 参考:https://github.com/busesese/ESMM
# 问题:实现这个模型的时候怎么训练,损失函数怎么写,数据怎么构造?
# 这里我们可以看到主任务是CVR任务,副任务是CTR任务,实际生产的数据是用户曝光数据,点击数据和转化数据,
# 那么曝光和点击数据可以构造副任务的CTR模型(正样本:曝光&点击、负样本:曝光&未点击),
Ejemplo n.º 7
0
def train_model():
    return_metrics = model_train.train_model()
    return return_metrics