コード例 #1
0
ファイル: optimization.py プロジェクト: zzge1984/BigDL
 def to_bigdl_criterion(kloss):
     if isinstance(kloss, six.string_types):
         kloss = kloss.lower()
     if kloss == "categorical_crossentropy" or kloss == categorical_crossentropy:
         return bcriterion.CategoricalCrossEntropy()
     elif kloss == "mse" or kloss == "mean_squared_error" or kloss == mse:
         return bcriterion.MSECriterion()
     elif kloss == "binary_crossentropy" or kloss == binary_crossentropy:
         return bcriterion.BCECriterion()
     elif kloss == "mae" or kloss == "mean_absolute_error" or kloss == mae:
         return bcriterion.AbsCriterion()
     elif kloss == "hinge" or kloss == hinge:
         return bcriterion.MarginCriterion()
     elif kloss == "mean_absolute_percentage_error" or \
             kloss == "mape" or kloss == mean_absolute_percentage_error:
         return bcriterion.MeanAbsolutePercentageCriterion()
     elif kloss == "mean_squared_logarithmic_error" or \
             kloss == "msle" or kloss == mean_squared_logarithmic_error:
         return bcriterion.MeanSquaredLogarithmicCriterion()
     elif kloss == "squared_hinge" or kloss == squared_hinge:
         return bcriterion.MarginCriterion(squared=True)
     elif kloss == "sparse_categorical_crossentropy" or \
             kloss == sparse_categorical_crossentropy:
         return bcriterion.ClassNLLCriterion(logProbAsInput=False)
     elif kloss == "kullback_leibler_divergence" or \
             kloss == "kld" or kloss == kullback_leibler_divergence:
         return bcriterion.KullbackLeiblerDivergenceCriterion()
     elif kloss == "poisson" or kloss == poisson:
         return bcriterion.PoissonCriterion()
     elif kloss == "cosine_proximity" or kloss == "cosine" or kloss == cosine_proximity:
         return bcriterion.CosineProximityCriterion()
     else:
         raise Exception("Not supported loss: %s" % kloss)
コード例 #2
0
 def to_bigdl_criterion(kloss):
     # TODO: it may pass in an object and with parameters
     if kloss == "categorical_crossentropy":
         return bcriterion.ClassNLLCriterion()
     elif kloss == "mse" or kloss == "mean_squared_error":
         return bcriterion.MSECriterion()
     elif kloss == "binary_crossentropy":
         return bcriterion.BCECriterion()
     elif kloss == "mae" or kloss == "mean_absolute_error":
         return bcriterion.AbsCriterion()
     else:
         raise Exception("Not supported type: %s" % kloss)
コード例 #3
0
def main():
    parser = get_parser()
    args = parser.parse_args()

    # BATCH_SIZE must be multiple of <executor.cores>:
    # in this case multiple of 3: 3,6,9,12 etc.
    if args.batch_size % args.executor_cores != 0:
        raise RuntimeError(
            'batch size must be multiple of <executor-cores> parameter!'
        )

    cores = args.executor_cores
    batch_size = args.batch_size
    conf = (
        common.create_spark_conf()
            .setAppName('pyspark-mnist')
            .setMaster(args.master)
    )
    conf = conf.set('spark.executor.cores', cores)
    conf = conf.set('spark.cores.max', cores)
    conf.set("spark.jars",os.environ.get('BIGDL_JARS'))

    LOG.info('initialize with spark conf:')
    sc = pyspark.SparkContext(conf=conf)
    common.init_engine()

    LOG.info('initialize training RDD:')

    ##Files from kuberlab dataset
    files = glob.glob(os.environ.get('DATA_DIR')+'/train/*.png')
    LOG.info('Train size: %d',len(files))
    def mapper(x):
        label = int(x.split('/')[-1].split('-')[-1][:-4])+1
        image = imageio.imread('file://'+x).astype(np.float32).reshape(1,28,28)/255
        return common.Sample.from_ndarray(image, label)
    train_rdd = sc.parallelize(files).map(mapper)

    opt = optimizer.Optimizer(
        model=build_model(10),
        training_rdd=train_rdd,
        criterion=criterion.ClassNLLCriterion(),
        optim_method=optimizer.SGD(
            learningrate=0.01, learningrate_decay=0.0002
        ),
        end_trigger=optimizer.MaxEpoch(args.epoch),
        batch_size=batch_size
    )
    trained_model = opt.optimize()
    LOG.info("training finished")
    LOG.info('saving model...')
    path = args.output_dir
    if not os.path.exists(path):
        os.makedirs(path)
    trained_model.saveModel(
        path + '/model.pb',
        path + '/model.bin',
        over_write=True
    )
    client.update_task_info({'checkpoint_path': path,'model_path': path})
    LOG.info('successfully saved!')
    files = glob.glob(os.environ.get('DATA_DIR')+'/test/*.png')
    LOG.info('Validation size: %d',len(files))
    test_rdd = sc.parallelize(files).map(mapper)
    results = trained_model.evaluate(test_rdd, batch_size , [optimizer.Top1Accuracy()])
    accuracy = results[0].result
    client.update_task_info({'test_accuracy': float(accuracy)})
    sc.stop()