Beispiel #1
0
def train_vgg19(train_data, test_data):
    print("begin train vgg19")
    ctx = utils.try_all_gpus()
    net = vgg19()
    net.initialize(ctx=ctx, init=init.Xavier())

    soft_max_cross = gluon.loss.SoftmaxCrossEntropyLoss()
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.05})
    utils.train(train_data, test_data, net, soft_max_cross, trainer, ctx, num_epochs=10);
Beispiel #2
0
def train(train_augs, test_augs, learning_rate=.1):
    batch_size = 128
    num_epochs = 10
    ctx = utils.try_all_gpus()
    loss = gluon.loss.SoftmaxCrossEntropyLoss()
    train_data, test_data = get_data(batch_size, train_augs, test_augs)
    net = utils.resnet18(10)
    net.initialize(ctx=ctx, init=init.Xavier())
    net.hybridize()
    trainer = gluon.Trainer(net.collect_params(), 'sgd',
                            {'learning_rate': learning_rate})
    utils.train(train_data, test_data, net, loss, trainer, ctx, num_epochs)
Beispiel #3
0
def main(column, DIM_NUM):
    '''
    :params column 表示使用数据集中的哪部分语料
    :params DIM_NUM 表示使用的词向量的维度
    '''
    Params = CNNParameter(column, DIM_NUM)
    Params.load_dir()
    ctx = utils.try_all_gpus()
    csvfile = Params.train_file
    vocabfile = Params.vocab_file
    vocab = utils.read_vocab(vocabfile)
    glove_embedding = text.embedding.CustomEmbedding(
        pretrained_file_path=Params.embedding_file, vocabulary=vocab)
    net = utils.TextCNN(vocab, DIM_NUM, Params.ngram_kernel_sizes,
                        Params.nums_channels, Params.num_outputs)
    net.initialize(init.Xavier(), ctx=ctx)
    # embedding_static 和 embedding_non_static 均使用预训练的词向量。
    net.embedding_static.weight.set_data(glove_embedding.idx_to_vec)
    #net.embedding_non_static.weight.set_data(glove_embedding.idx_to_vec)
    # 训练中不更新 embedding_static 的词向量,即不更新 embedding_static 的模型参数。
    net.embedding_static.collect_params().setattr('grad_req', 'null')
    trainer = gluon.Trainer(net.collect_params(), 'adam',
                            {'learning_rate': Params.lr})
    loss = gloss.SoftmaxCrossEntropyLoss()
    trainSet, valSet = utils.select_sample_by_class(csvfile, ratio=0.85)
    train_features, test_features, train_labels, test_labels = utils.read_dg_data(
        trainSet, valSet, vocab, column, MAX_LEN=2500)
    train_set = gdata.ArrayDataset(train_features, train_labels)  #训练集
    test_set = gdata.ArrayDataset(test_features, test_labels)  #测试集
    train_loader = gdata.DataLoader(train_set,
                                    batch_size=Params.batch_size,
                                    shuffle=True)
    test_loader = gdata.DataLoader(test_set,
                                   batch_size=Params.batch_size,
                                   shuffle=False)
    logging.info("开始训练cnn {} 文本分类模型...".format(column))
    best_acc = utils.train(train_loader, test_loader, net, loss, trainer, ctx,
                           Params.num_epochs, column, Params.best_param_file)
    logging.info("模型训练完成,最佳的acc:{:.4f} 开始测试...".format(best_acc))
    net.load_parameters(Params.best_param_file, ctx=ctx)
    f1 = utils.evaluate_valset(net, valSet, vocab, column)
    best_file = os.path.join(Params.result_dir,
                             "rnn_{}_{:.4f}.csv".format(column, f1))
    best_prob_file = os.path.join(Params.result_dir,
                                  "rnn_{}_{:.4f}_prob.csv".format(column, f1))
    logging.info("cnn网络在验证集最佳的f1_score:{:.4f}".format(f1))
    logging.info("对数据进行测试")
    textSet = pd.read_csv(Params.test_file)
    y_probs = utils.predict_test_result(net, vocab, textSet, column, best_file)
    logging.info("保存概率数据...")
    utils.save_prob_file(y_probs, best_prob_file)
    logging.info("保存完毕,请查看目录result.")
def main(column,DIM_NUM):
    Params = RNNParameter(column,DIM_NUM)
    Params.load_dir()
    num_outputs = Params.num_outputs
    lr = Params.lr
    num_epochs = Params.num_epochs
    batch_size = Params.batch_size
    embed_size = DIM_NUM
    num_hiddens = Params.num_hiddens
    num_layers = Params.num_layers
    bidirectional = Params.bidirectional
    ctx = utils.try_all_gpus()
    csvfile = Params.train_file
    vocab = utils.read_vocab(Params.vocab_file)
    glove_embedding = text.embedding.CustomEmbedding(pretrained_file_path=Params.embedding_file, vocabulary=vocab)
    net = utils.BiRNN(vocab, embed_size, num_hiddens, num_layers, bidirectional,num_outputs)
    net.initialize(init.Xavier(), ctx=ctx)
    # 设置 embedding 层的 weight 为预训练的词向量。
    net.embedding.weight.set_data(glove_embedding.idx_to_vec)
    # 训练中不更新词向量(net.embedding 中的模型参数)。
    net.embedding.collect_params().setattr('grad_req', 'null')
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
    loss = gloss.SoftmaxCrossEntropyLoss()
    trainSet,valSet = utils.select_sample_by_class(csvfile,ratio=0.85)
    train_features,test_features,train_labels,test_labels=utils.read_dg_data(trainSet,valSet,vocab,column,MAX_LEN=2500)
    train_set = gdata.ArrayDataset(train_features, train_labels) #训练集
    test_set = gdata.ArrayDataset(test_features, test_labels) #测试集
    train_loader = gdata.DataLoader(train_set, batch_size=batch_size,shuffle=True)
    test_loader = gdata.DataLoader(test_set, batch_size=batch_size, shuffle=False)
    logging.info("开始训练rnn {}文本分类模型".format(column))
    best_acc = utils.train(train_loader, test_loader, net, loss, trainer, ctx, num_epochs,column,Params.best_param_file)
    logging.info("模型训练完成,最佳模型的acc:{:.4f} 开始测试.".format(best_acc))
    net.load_parameters(Params.best_param_file,ctx=ctx)
    f1= utils.evaluate_valset(net,valSet,vocab,column)
    logging.info("rnn网络在验证集的f1_score:{:.4f}".format(f1))
    logging.info("对数据进行测试")
    textSet = pd.read_csv(os.path.join(Params.data_dir,'test_set.csv'))
    y_probs = utils.predict_test_result(net,vocab,textSet,column,'result/rnn_{}_{:.4f}.csv'.format(column,f1))
    logging.info("保存概率数据")
    utils.save_prob_file(y_probs,'result/rnn_{}_{:.4f}_prob.csv'.format(column,f1))
    logging.info("保存完毕,请查看目录result.")
Beispiel #5
0
                        action='store_true',
                        help='Use synchronize BN across devices.')
    parser.add_argument(
        '--no-wd',
        action='store_true',
        help=
        'whether to remove weight decay on bias, and beta/gamma for batchnorm layers.'
    )
    args = parser.parse_args()
    return args


data_path = '../traffic-matrices/'
num_epochs = 4096
batchsize = 4096
ctx = utils.try_all_gpus()

tms = utils.buildAllTMToATensor(xmlPath=data_path)
train_data = utils.train_dataloader(tms=tms)
test_data = utils.test_dataloader(tms=tms)
data_train = gdata.ArrayDataset(train_data[0], train_data[1])
data_test = gdata.ArrayDataset(test_data[0], test_data[1])
data_train_iter = gdata.DataLoader(dataset=data_train,
                                   batch_size=batchsize,
                                   shuffle=True)
data_test_iter = gdata.DataLoader(dataset=data_test,
                                  batch_size=batchsize,
                                  shuffle=False)
# for x, y  in data_test_iter:
#     print(x,y)
#     break
Beispiel #6
0
import os

import get_net
import utils
import cv2, numpy as np
from mxnet import nd, image

MODEL_NAME = 'res34_cbam_parallel'
CTX = utils.try_all_gpus()


def load_model(
        model,
        arp_path='../param/res34_bcam_parallel_625_0.2043_0.945_9.74.params'):
    net = model
    import_path = arp_path

    net.load_parameters(import_path)
    net.collect_params().reset_ctx(CTX)
    print('load {} finished on {}'.format(MODEL_NAME, CTX))

    return net


def predict(img, net):
    # X = image.imread(img)
    X = nd.array(img)
    X = normalize_image(X).as_in_context(CTX[0])
    # X = (X.astype('float32') / 255).as_in_context(CTX[0])
    X = X.transpose((2, 0, 1)).expand_dims(axis=0)
    pred = nd.argmin(net(X), axis=1)
Beispiel #7
0
            utils.save_kvstore(vars(args), os.path.join(path, 'config.json')))
        start_epoch = 0
        trainingfile = os.path.join(path, 'training.json')
        utils.save_kvstore({'epoch': 0, 'lr': args.lr}, trainingfile)

    logging.basicConfig(level=logging.INFO,
                        handlers=[
                            logging.StreamHandler(),
                            logging.FileHandler(os.path.join(path, "log.log"))
                        ])

    # config the conputation resources
    if args.cpu:
        ctxs = [mxnet.cpu()]
    else:
        ctxs = utils.try_all_gpus(args.num_gpus)
    assert args.batch_size % len(ctxs) == 0, \
    'Total batch size must be multiple of the number of devices'
    m = args.batch_size // len(ctxs)
    logging.info(
        "Split batch samples (batch size={}) to {}, each device loaded {} samples"
        .format(args.batch_size, ctxs, m))

    # Set the random seed manually for reproducibility.
    if args.seed:
        np.random.seed(args.seed)
        mxnet.random.seed(args.seed)

    ###############################################################################
    # Load data
    ###############################################################################