Ejemplo n.º 1
0
def main():
    args = getArgs()

    if args.seed is None:
        seed = random.randrange(sys.maxsize)
        args.seed = seed
        print(f"generating new random seed: {seed}")
    else:
        print(f"setting random seed to: {args.seed}")

    random.seed(args.seed)

    datasetlist = args.datasets

    print(f"doing experiment with {datasetlist} in that order")

    k = args.kfold
    results = {}
    runlist = args.methods

    set_keras_growth(args.gpu)

    prefix = "runner"
    if args.prefix is not None:
        prefix = args.prefix
    rootpath = prefix + datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
    createdir(rootpath)

    min_retain_losses = list()
    min_losses = list()

    writejson(f"{rootpath}/settings.json", sys.argv[1:])

    if args.callbacks is not None:
        callbacks = args.callbacks
    else:
        callbacks = list()
    alphalist = [0.8]
    nresults = list()
    for i in range(0, args.n):
        nresults.append(
            runalldatasets(args,
                           callbacks,
                           datasetlist,
                           rootpath,
                           runlist,
                           alphalist=alphalist,
                           n=i,
                           printcvresults=args.cvsummary,
                           printcv=args.printcv,
                           doevaluation=args.doevaluation,
                           learning_rate=args.learning_rate))
        writejson(f"{rootpath}/data.json", nresults)

    resdf = pd.DataFrame(nresults)
    resdf.to_csv(
        f"{rootpath}/results_{args.kfold}kfold_{args.epochs}epochs_{args.onehot}onehot.csv"
    )
Ejemplo n.º 2
0
def main(mpirank, mpisize, mpicomm):
    args = getArgs()

    if args.seed is None:
        seed = random.randrange(sys.maxsize)
        args.seed = seed
        print(f"generating new random seed:{seed}")
    random.seed(args.seed)
    datasetlist = args.datasets

    k = args.kfold
    results = {}
    runlist = args.methods

    if "," in args.gpu:
        gpus = args.gpu.split(",")
        mygpu = gpus[mpirank % 2]
        set_keras_growth(int(mygpu))
    else:
        set_keras_growth(args.gpu)

    dataset_results = dict()
    prefix = "runner"
    if args.prefix is not None:
        prefix = args.prefix
    rootpath = prefix + datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
    if mpirank == 0:
        createdir(rootpath)
        writejson(f"{rootpath}/settings.json", sys.argv[1:])

    min_retain_losses = list()
    min_losses = list()

    datasetsdone = list()
    if args.callbacks is not None:
        callbacks = args.callbacks
    else:
        callbacks = list()
    nresults = list()
    alphalist = [
        0.8
    ]  # this code does not iterate over alpha, see mpi_deesweighting.py
    for i in range(0, args.n):
        dataset_results = runalldatasetsMPI(args,
                                            callbacks,
                                            datasetlist,
                                            mpicomm,
                                            mpirank,
                                            rootpath,
                                            runlist,
                                            alphalist,
                                            n=i,
                                            printcvresults=args.cvsummary,
                                            printcv=args.printcv,
                                            doevaluation=args.doevaluation)
        nresults.append(dataset_results)

        if mpirank == 0:
            writejson(f"{rootpath}/data.json", nresults)
            resdf = pd.DataFrame(results)
            resdf.to_csv(
                f"{rootpath}/results_{args.kfold}kfold_{args.epochs}epochs_{args.onehot}onehot.csv"
            )
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser(
        description='Plot the validation loss per epoch during training.')
    parser.add_argument(
        '--resultdir',
        metavar='resultdir',
        type=str,
        help='resultdir for loading data files including figures',
        required=True)
    parser.add_argument('--savedir',
                        metavar='savedir',
                        type=str,
                        help='Directory to write the pdf figure to',
                        required=True)
    parser.add_argument('--filename',
                        metavar='filename',
                        type=str,
                        help='Filename for pdf figure',
                        required=True)
    parser.add_argument(
        '-c',
        '--classes',
        metavar="classes",
        help='which classes should be plotted in the clustering visualization.',
        type=lambda s: [item for item in s.split(',')],
        dest="classes")
    parser.add_argument('--addgabelresults',
                        metavar='addgabelresults',
                        type=str2bool,
                        help='Filename for pdf figure',
                        default=False)
    parser.add_argument('--removeoptimizer',
                        metavar='removeoptimizer',
                        type=str2bool,
                        help='remove the optimizer name from the method label',
                        default=False)
    parser.add_argument('--doeval',
                        metavar='doeval',
                        type=str2bool,
                        help='Do eval of the model',
                        default=False)
    parser.add_argument('--font_scale',
                        metavar='font_scale',
                        type=float,
                        help='UI scale for the figure.',
                        required=True)
    parser.add_argument(
        '--split',
        metavar='split',
        type=int,
        help=
        'N split for, 5 means 20% will be used for clustering vizualisation.',
        required=True)
    parser.add_argument('--seed',
                        metavar='seed',
                        type=int,
                        help='random seed',
                        required=True)
    parser.add_argument('--maxdatapoints',
                        metavar='maxdatapoints',
                        type=int,
                        help='maximum number of datapoints to plot',
                        required=True)
    parser.add_argument(
        '--hue_order',
        metavar='hue_order',
        type=str,
        help='Ordering of the different hues in the plot '
        'so that the methods gets the same hue color in each plot',
        required=True)
    # parser.add_argument('--modelfile', metavar='modelfile', type=str,
    #                     help='File name for the saved model', required=True)
    parser.add_argument(
        '-modelfiles',
        '--modelfiles',
        metavar="modelfiles",
        help=
        'the model files of the different similiarity measurement methods.',
        type=lambda s: [item for item in s.split(',')],
        dest="modelfiles")
    # parser.add_argument('--method', metavar='method', type=str,
    #                     help='method to use', required=True)
    args = parser.parse_args()

    if not len(sys.argv) > 1:
        print("not enough arguments")
        parser.print_help()
        sys.exit(1)

    # parser = getParser()

    oldargs = None
    with open(f"{args.resultdir}/settings.json") as f:
        oldargv = jsonlib.load(f)

    oldargs = getArgs(myargs=oldargv)  #parser.parse_args(args=oldargv)
    #fmethods = oldargs.methods.split(",")
    np.random.seed(args.seed)
    oldmethods = {}
    for method in oldargs.methods:
        runner, oldmethods[method.split(":")[0]] = parseMethod(method)

    dataset = Dataset(oldargs.datasets[0])
    dsl, colmap, \
        stratified_fold_generator = fromDataSetToSKLearn(dataset,
                                                         oldargs.onehot,
                                                         n_splits=args.split)
    train, test = next(stratified_fold_generator)

    features = dsl.getFeatures()
    targets = dsl.getTargets()
    resultfile = args.resultdir
    save_directory = args.savedir
    filename = args.filename
    classes = [int(c) for c in args.classes]
    c = 0
    for method in oldmethods:
        modelmaker = oldmethods[method]["runnerdict"]["makeModel"]
        evalfunc = oldmethods[method]["runnerdict"]["eval"]
        model, embeddingmodel = modelmaker(features,
                                           targets,
                                           oldargs.hiddenlayers,
                                           regression=False)
        modelfile = args.modelfiles[c]
        c += 1
        model.load_weights(modelfile)

        randommodel = keras.models.clone_model(model)
        shuffle_weights(randommodel)
        plotROC(save_directory, model, randommodel, filename + method,
                args.font_scale, features, targets, train, test)
Ejemplo n.º 4
0
def main(mpirank, mpisize, mpicomm):
    args = getArgs()
    if args.seed is not None:
        random.seed(args.seed)
    datasetlist = args.datasets
    print(f"doing experiment with {datasetlist} in that order")

    k = args.kfold
    results = {}
    runlist = args.methods

    if "," in args.gpu:
        gpus = args.gpu.split(",")
        mygpu = gpus[mpirank % 2]
        set_keras_growth(int(mygpu))
    else:
        set_keras_growth(args.gpu)

    dataset_results = dict()
    prefix = "runner"
    if args.prefix is not None:
        prefix = args.prefix
    rootpath = prefix + datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
    if mpirank == 0:
        createdir(rootpath)
        writejson(f"{rootpath}/settings.json", vars(args))

    min_retain_losses = list()
    min_losses = list()
    if args.alpharange is not None:
        splits = args.alpharange.split(":")
        alphastart = float(splits[0])
        alphastop = float(splits[1])
        alpharange = np.linspace(alphastart, alphastop, args.alphacount)
    else:
        alpharange = np.linspace(0.000001, 1.00001, args.alphacount)

    datasetsdone = list()
    if args.callbacks is not None:
        callbacks = args.callbacks
    else:
        callbacks = list()
    nresults = list()
    for i in range(0, args.n):
        dataset_results = runalldatasetsMPI(args,
                                            callbacks,
                                            datasetlist,
                                            mpicomm,
                                            mpirank,
                                            rootpath,
                                            runlist,
                                            alpharange,
                                            n=i,
                                            printcvresults=args.cvsummary,
                                            printcv=args.printcv)
        nresults.append(dataset_results)

        if mpirank == 0:
            # plotNAlphaResults(datasetlist, nresults, rootpath)
            writejson(f"{rootpath}/data.json", nresults)
            resdf = pd.DataFrame(results)
            resdf.to_csv(
                f"{rootpath}/results_{args.kfold}kfold_{args.epochs}epochs_{args.onehot}onehot.csv"
            )