Ejemplo n.º 1
0
config = ConfigParser().parse_config(config=config, mode='evaluate')

# load model
if config['model'] == 'linear':
    model = models.LinearModel(input_shape=(config['history_length'], ),
                               nb_output_units=1,
                               nb_hidden_units=config['nb_hidden_units'])
elif config['model'] == 'mlp':
    model = models.MLPModel(input_shape=(config['history_length'], ),
                            nb_output_units=1,
                            nb_hidden_units=config['nb_hidden_units'],
                            nb_layers=config['nb_layers'])
elif config['model'] == 'gru':
    model = models.GRUModel(input_shape=(config['history_length'], 1),
                            nb_output_units=1,
                            nb_hidden_units=config['nb_hidden_units'],
                            nb_layers=config['nb_layers'],
                            dropout=config['dropout'],
                            recurrent_dropout=config['recurrent_dropout'])
elif config['model'] == 'lstm':
    model = models.LSTMModel(input_shape=(config['history_length'], 1),
                             nb_output_units=1,
                             nb_hidden_units=config['nb_hidden_units'],
                             nb_layers=config['nb_layers'],
                             dropout=config['dropout'],
                             recurrent_dropout=config['recurrent_dropout'])
elif config['model'] == 'lstm_attention':
    model = models.LSTMAttentionModel(
        input_shape=(config['history_length'], 1),
        nb_output_units=1,
        nb_hidden_units=config['nb_hidden_units'],
        dropout=config['dropout'],
Ejemplo n.º 2
0
def train(spark, args):

    sc = spark.sparkContext
    numExecutors = int(sc._conf.get('spark.executor.instances'))
    exeCores = int(sc._conf.get('spark.executor.cores'))

    labelCol = 'encoded_label'
    if args.model == 'gru':
        featureCol = ['GRU_input']
        model = models.GRUModel()
    elif args.model == 'hlf':
        featureCol = ['HLF_input']
        model = models.HLFmodel()
    elif args.model == 'inclusive':
        featureCol = ['GRU_input', 'HLF_input']
        model = models.InclusiveModel()
    else:
        sys.exit("Error, insert a valid model!")

    ## Load the parquet
    if args.frac != 1:
        sampleDF = True
    else:
        sampleDF = False

    trainDF = loadParquet(spark,
                          args.dataset,
                          featureCol,
                          labelCol,
                          sample=sampleDF,
                          frac=args.frac)
    ## Convert in into an RDD of Sample
    trainRDD = createSample(trainDF, featureCol, labelCol)

    if args.validation != 'False':
        testDF = loadParquet(spark,
                             args.validation,
                             featureCol,
                             labelCol,
                             sample=False,
                             frac=args.frac)
        testRDD = createSample(testDF, featureCol, labelCol)
    else:
        testRDD = False

    batchSize = args.batchMultiplier * numExecutors * exeCores
    appName = args.jobName + "_" + args.model + "_{}exe_{}cores".format(
        numExecutors, exeCores)

    optimizer = buildOptimizer(model=model,
                               trainRDD=trainRDD,
                               valRDD=testRDD,
                               batchSize=batchSize,
                               numEpochs=args.numEpochs,
                               appName=appName,
                               logDir=args.logDir)

    ## Start training
    start = time.time()
    optimizer.optimize()
    stop = time.time()

    print("\n\n Elapsed time: {:.2f}s\n\n".format(stop - start))

    if args.saveModel == True:
        model.saveModel(modelPath=args.modelDir + '/' + appName + '.bigdl',
                        weightPath=args.modelDir + '/' + appName + '.bin',
                        over_write=True)

    if args.saveTime == True:
        with open(args.model + 'Times.csv', 'a') as file:
            file.write("{},{},{},{},{},{:.2f}\n".format(
                args.batchMultiplier, batchSize, exeCores, numExecutors,
                args.numEpochs, stop - start))