Example #1
0
    def email_certificates():
        if not Controller.certificates_generated:
            messagebox.showerror(
                "Operational Error",
                "Certificates not generated yet."
            )
            Controller.logger.info("email_certificates() called before generate_certificates()")
            return

        target_dir = View.target_dir_entry.get()
        email_config = View.email_config_entry.get()
        mailing_list = View.mailing_list_entry.get()

        email_config = format_config(email_config)
        try:
            mailer = Mailer(target_dir, email_config, mailing_list)
            mailer.read_id_email()
            mailer.send_all_emails()

            messagebox.showinfo(
                "Mailer Info",
                "Emails dispatched successfully. Check your sent box."
            )
            Controller.logger.info("Emails dispatched successfully")
        except Exception as exp:
            exception = ''.join(traceback.format_exception(etype=type(exp), value=exp, tb=exp.__traceback__))
            Controller.logger.info("mailer failed due to the following reason")
            Controller.logger.info(exception)
            messagebox.showerror("Unknown exception in mailer", exception)
Example #2
0
def load_patient(subj, tmpl):
    df = pd.read_csv(format_config(tmpl, {
        "subject": subj,
    }),
                     sep="\t",
                     header=0)
    df = df.apply(lambda x: pd.to_numeric(x, errors='coerce'))

    ROIs = [
        "#" + str(y) for y in sorted([int(x[1:]) for x in df.keys().tolist()])
    ]

    functional = np.nan_to_num(df[ROIs].as_matrix().T).tolist()
    functional = preprocessing.scale(functional, axis=1)
    functional = compute_connectivity(functional)
    functional = functional.astype(np.float32)

    return subj, functional.tolist()
Example #3
0
def prepare_folds(hdf5, folds, pheno, derivatives, experiment):
    exps = hdf5.require_group("experiments")
    ids = pheno["FILE_ID"]

    for derivative in derivatives:
        exp = exps.require_group(
            format_config(experiment, {
                "derivative": derivative,
            }))

        exp.attrs["derivative"] = derivative

        skf = StratifiedKFold(n_splits=folds, shuffle=True)
        for i, (train_index,
                test_index) in enumerate(skf.split(ids, pheno["STRAT"])):
            train_index, valid_index = train_test_split(train_index,
                                                        test_size=0.33)
            fold = exp.require_group(str(i))
            fold["train"] = ids[train_index].tolist()
            fold["valid"] = ids[valid_index].tolist()
            fold["test"] = ids[test_index].tolist()
Example #4
0
def nn_results(hdf5, experiment, code_size_1, code_size_2, code_size_3):

    exp_storage = hdf5["experiments"]['cc200_whole']

    experiment = "cc200_whole"

    print exp_storage

    n_classes = 2

    results = []

    list = ['']

    list2 = []

    for fold in exp_storage:

        experiment_cv = format_config("{experiment}_{fold}", {
            "experiment": experiment,
            "fold": fold,
        })

        print "experiment_cv"

        print fold

        X_train, y_train, \
        X_valid, y_valid, \
        X_test, y_test,test_pid = load_fold(hdf5["patients"], exp_storage, fold)

        list.append(test_pid)

        print "X_train"

        print X_train.shape

        y_test = np.array([to_softmax(n_classes, y) for y in y_test])

        ae1_model_path = format_config(
            "./data/cc200_tichu_2500_1250_625/{experiment}_autoencoder-1.ckpt",
            {
                "experiment": experiment_cv,
            })
        ae2_model_path = format_config(
            "./data/cc200_tichu_2500_1250_625/{experiment}_autoencoder-2.ckpt",
            {
                "experiment": experiment_cv,
            })

        ae3_model_path = format_config(
            "./data/cc200_tichu_2500_1250_625/{experiment}_autoencoder-3.ckpt",
            {
                "experiment": experiment_cv,
            })

        nn_model_path = format_config(
            "./data/cc200_tichu_2500_1250_625/{experiment}_mlp.ckpt", {
                "experiment": experiment_cv,
            })

        try:

            model = nn(X_test.shape[1], n_classes, [
                {
                    "size": 2500,
                    "actv": tf.nn.tanh
                },
                {
                    "size": 1250,
                    "actv": tf.nn.tanh
                },
                {
                    "size": 625,
                    "actv": tf.nn.tanh
                },
            ])

            init = tf.global_variables_initializer()
            with tf.Session() as sess:

                sess.run(init)

                saver = tf.train.Saver(model["params"])

                print "savernn_model_path"

                print nn_model_path

                saver.restore(sess, nn_model_path)

                output = sess.run(model["output"],
                                  feed_dict={
                                      model["input"]: X_test,
                                      model["dropouts"][0]: 1.0,
                                      model["dropouts"][1]: 1.0,
                                      model["dropouts"][2]: 1.0,
                                  })

                np.set_printoptions(suppress=True)

                y_score = output[:, 1]

                print "y_score"

                print y_score

                y_pred = np.argmax(output, axis=1)

                print "y_pred"
                print y_pred

                print "output"

                hang = output.shape[0]

                lie = output.shape[1]

                print hang

                print lie

                for tt in range(hang):
                    for xx in range(lie):

                        output[tt][xx] = round(output[tt][xx], 4)

                        output[tt][xx] = str(output[tt][xx])

                aa = output[:, 0]

                print type(aa)

                list2.append(output)

                list.append(y_pred)

                print "-------------------------------------"

                y_true = np.argmax(y_test, axis=1)

                list.append(y_true)

                print "y_true"
                print y_true

                auc_score = roc_auc_score(y_true, y_score)
                print auc_score

                [[TN, FP], [FN,
                            TP]] = confusion_matrix(y_true,
                                                    y_pred,
                                                    labels=[0,
                                                            1]).astype(float)
                accuracy = (TP + TN) / (TP + TN + FP + FN)

                print(TP)
                print(TN)
                print(FP)
                print(FN)
                specificity = TN / (FP + TN)
                precision = TP / (TP + FP)
                sensivity = recall = TP / (TP + FN)
                fscore = 2 * TP / (2 * TP + FP + FN)

                results.append([
                    accuracy, precision, recall, fscore, sensivity,
                    specificity, auc_score
                ])
        finally:
            reset()

    workbook = xlwt.Workbook(encoding='utf-8')

    booksheet = workbook.add_sheet('Sheet 1', cell_overwrite_ok=True)

    wb = xlwt.Workbook(encoding='utf-8')

    worksheet = wb.add_sheet('Sheet 1', cell_overwrite_ok=True)

    DATA = list

    print list2

    for i, row in enumerate(DATA):
        for j, col in enumerate(row):
            booksheet.write(j, i, col)
    # workbook.save('./data/dos_tichu_2500_1250_625_xlst.xls')

    return [experiment] + np.mean(results, axis=0).tolist()
Example #5
0
    hdf5 = hdf5_handler("./data/abide_cc200_tichu.hdf5", "a")

    valid_derivatives = ["cc200", "aal", "ez", "ho", "tt", "dosenbach160"]
    derivatives = [
        derivative for derivative in arguments["<derivative>"]
        if derivative in valid_derivatives
    ]

    experiments = []

    for derivative in derivatives:

        config = {"derivative": derivative}

        if arguments["--whole"]:
            experiments += [format_config("{derivative}_whole", config)],

        if arguments["--male"]:
            experiments += [format_config("{derivative}_male", config)]

        if arguments["--threshold"]:
            experiments += [format_config("{derivative}_threshold", config)]

        if arguments["--leave-site-out"]:
            for site in pheno["SITE_ID"].unique():
                site_config = {"site": site}
                experiments += [
                    format_config("{derivative}_leavesiteout-{site}", config,
                                  site_config)
                ]
Example #6
0
def run_nn(hdf5, experiment, code_size_1, code_size_2):
    # tf.disable_v2_behavior()

    exp_storage = hdf5["experiments"][experiment]

    for fold in exp_storage:

        experiment_cv = format_config("{experiment}_{fold}", {
            "experiment": experiment,
            "fold": fold,
        })

        X_train, y_train, \
        X_valid, y_valid, \
        X_test, y_test = load_fold(hdf5["patients"], exp_storage, fold)

        ae1_model_path = format_config(
            "./data/models/{experiment}_autoencoder-1.ckpt", {
                "experiment": experiment_cv,
            })
        ae2_model_path = format_config(
            "./data/models/{experiment}_autoencoder-2.ckpt", {
                "experiment": experiment_cv,
            })
        nn_model_path = format_config("./data/models/{experiment}_mlp.ckpt", {
            "experiment": experiment_cv,
        })

        reset()

        # Run first autoencoder
        run_autoencoder1(experiment_cv,
                         X_train,
                         y_train,
                         X_valid,
                         y_valid,
                         X_test,
                         y_test,
                         model_path=ae1_model_path,
                         code_size=code_size_1)

        reset()

        # Run second autoencoder
        run_autoencoder2(experiment_cv,
                         X_train,
                         y_train,
                         X_valid,
                         y_valid,
                         X_test,
                         y_test,
                         model_path=ae2_model_path,
                         prev_model_path=ae1_model_path,
                         prev_code_size=code_size_1,
                         code_size=code_size_2)

        reset()

        # Run multilayer NN with pre-trained autoencoders
        run_finetuning(experiment_cv,
                       X_train,
                       y_train,
                       X_valid,
                       y_valid,
                       X_test,
                       y_test,
                       model_path=nn_model_path,
                       prev_model_1_path=ae1_model_path,
                       prev_model_2_path=ae2_model_path,
                       code_size_1=code_size_1,
                       code_size_2=code_size_2)
Example #7
0
        pheno_male = pheno[pheno["SEX"] == "M"]
        prepare_folds(hdf5,
                      folds,
                      pheno_male,
                      derivatives,
                      experiment="{derivative}_male")

    if arguments["--threshold"]:
        print
        print "Preparing thresholded dataset"
        pheno_thresh = pheno[pheno["MEAN_FD"] <= 0.2]
        prepare_folds(hdf5,
                      folds,
                      pheno_thresh,
                      derivatives,
                      experiment="{derivative}_threshold")

    if arguments["--leave-site-out"]:
        print
        print "Preparing leave-site-out dataset"
        for site in pheno["SITE_ID"].unique():
            pheno_without_site = pheno[pheno["SITE_ID"] != site]
            prepare_folds(hdf5,
                          folds,
                          pheno_without_site,
                          derivatives,
                          experiment=format_config(
                              "{derivative}_leavesiteout-{site}", {
                                  "site": site,
                              }))
Example #8
0
def run_autoencoder2(experiment,
                     X_train,
                     y_train,
                     X_valid,
                     y_valid,
                     X_test,
                     y_test,
                     model_path,
                     prev_model_path,
                     code_size=1250,
                     prev_code_size=2500):
    if os.path.isfile(model_path) or \
       os.path.isfile(model_path + ".meta"):
        return
    prev_model = ae(
        X_train.shape[1],
        prev_code_size,
        corruption=0.0,  # Disable corruption for conversion
        enc=tf.nn.tanh,
        dec=None)

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        saver = tf.train.Saver(prev_model["params"],
                               write_version=tf.train.SaverDef.V2)
        if os.path.isfile(prev_model_path):
            saver.restore(sess, prev_model_path)
        X_train = sess.run(prev_model["encode"],
                           feed_dict={prev_model["input"]: X_train})
        X_valid = sess.run(prev_model["encode"],
                           feed_dict={prev_model["input"]: X_valid})
        X_test = sess.run(prev_model["encode"],
                          feed_dict={prev_model["input"]: X_test})
    del prev_model

    reset()

    learning_rate = 0.0001
    corruption = 0.9
    ae_enc = tf.nn.tanh
    ae_dec = None

    training_iters = 2000
    batch_size = 10
    n_classes = 2

    model = ae(prev_code_size,
               code_size,
               corruption=corruption,
               enc=ae_enc,
               dec=ae_dec)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(
        model["cost"])
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        saver = tf.train.Saver(model["params"],
                               write_version=tf.train.SaverDef.V2)
        prev_costs = np.array([9999999999] * 3)
        for epoch in range(training_iters):
            batches = range(len(X_train) / batch_size)
            costs = np.zeros((len(batches), 3))

            for ib in batches:
                from_i = ib * batch_size
                to_i = (ib + 1) * batch_size
                batch_xs, batch_ys = X_train[from_i:to_i], y_train[from_i:to_i]
                _, cost_train = sess.run([optimizer, model["cost"]],
                                         feed_dict={model["input"]: batch_xs})
                cost_valid = sess.run(model["cost"],
                                      feed_dict={model["input"]: X_valid})
                cost_test = sess.run(model["cost"],
                                     feed_dict={model["input"]: X_test})
                costs[ib] = [cost_train, cost_valid, cost_test]
            costs = costs.mean(axis=0)
            cost_train, cost_valid, cost_test = costs
            print format_config(
                "Exp={experiment}, Model=ae2, Iter={epoch:5d}, Cost={cost_train:.6f} {cost_valid:.6f} {cost_test:.6f}",
                {
                    "experiment": experiment,
                    "epoch": epoch,
                    "cost_train": cost_train,
                    "cost_valid": cost_valid,
                    "cost_test": cost_test,
                }),
            if cost_valid < prev_costs[1]:
                print "Saving better model"
                saver.save(sess, model_path)
                prev_costs = costs
            else:
                print
Example #9
0
def run_nn(hdf5, experiment, code_size_1, code_size_2, code_size_3):

    exp_storage = hdf5["experiments"]["cc200_whole"]
    #exp_storage = hdf5["experiments"]["aal_whole"]
    #exp_storage = hdf5["experiments"]["dosenbach160_whole"]

    for fold in exp_storage:

        experiment_cv = format_config("{experiment}_{fold}", {
            "experiment": experiment,
            "fold": fold,
        })

        X_train, y_train, \
        X_valid, y_valid, \
        X_test, y_test,test_pid = load_fold(hdf5["patients"], exp_storage, fold)

        ae1_model_path = format_config(
            "./data/cc200_tichu_2500_1250_625/{experiment}_autoencoder-1.ckpt",
            {
                "experiment": experiment_cv,
            })
        ae2_model_path = format_config(
            "./data/cc200_tichu_2500_1250_625/{experiment}_autoencoder-2.ckpt",
            {
                "experiment": experiment_cv,
            })

        ae3_model_path = format_config(
            "./data/cc200_tichu_2500_1250_625/{experiment}_autoencoder-3.ckpt",
            {
                "experiment": experiment_cv,
            })
        nn_model_path = format_config(
            "./data/cc200_tichu_2500_1250_625/{experiment}_mlp.ckpt", {
                "experiment": experiment_cv,
            })

        #         ae1_model_path = format_config("./data/aal_tichu_2500_1250_625/{experiment}_autoencoder-1.ckpt", {
        #             "experiment": experiment_cv,
        #         })
        #         ae2_model_path = format_config("./data/aal_tichu_2500_1250_625/{experiment}_autoencoder-2.ckpt", {
        #             "experiment": experiment_cv,
        #         })

        #         ae3_model_path = format_config("./data/aal_tichu_2500_1250_625/{experiment}_autoencoder-3.ckpt", {
        #             "experiment": experiment_cv,
        #         })
        #         nn_model_path = format_config("./data/aal_tichu_2500_1250_625/{experiment}_mlp.ckpt", {
        #             "experiment": experiment_cv,
        #         })

        #         ae1_model_path = format_config("./data/dosenbach160_tichu_2500_1250_625/{experiment}_autoencoder-1.ckpt", {
        #             "experiment": experiment_cv,
        #         })
        #         ae2_model_path = format_config("./data/dosenbach160_tichu_2500_1250_625/{experiment}_autoencoder-2.ckpt", {
        #             "experiment": experiment_cv,
        #         })

        #         ae3_model_path = format_config("./data/dosenbach160_tichu_2500_1250_625/{experiment}_autoencoder-3.ckpt", {
        #             "experiment": experiment_cv,
        #         })
        #         nn_model_path = format_config("./data/dosenbach160_tichu_2500_1250_625/{experiment}_mlp.ckpt", {
        #             "experiment": experiment_cv,
        #         })

        reset()

        # Run first autoencoder
        run_autoencoder1(experiment_cv,
                         X_train,
                         y_train,
                         X_valid,
                         y_valid,
                         X_test,
                         y_test,
                         model_path=ae1_model_path,
                         code_size=code_size_1)

        reset()

        # Run second autoencoder
        run_autoencoder2(experiment_cv,
                         X_train,
                         y_train,
                         X_valid,
                         y_valid,
                         X_test,
                         y_test,
                         model_path=ae2_model_path,
                         prev_model_path=ae1_model_path,
                         prev_code_size=code_size_1,
                         code_size=code_size_2)

        reset()

        run_autoencoder3(experiment_cv,
                         X_train,
                         y_train,
                         X_valid,
                         y_valid,
                         X_test,
                         y_test,
                         model_path=ae3_model_path,
                         prev_model_path=ae2_model_path,
                         prev_code_size=code_size_2,
                         code_size=code_size_3)

        reset()

        # Run multilayer NN with pre-trained autoencoders
        run_finetuning(experiment_cv,
                       X_train,
                       y_train,
                       X_valid,
                       y_valid,
                       X_test,
                       y_test,
                       model_path=nn_model_path,
                       prev_model_1_path=ae1_model_path,
                       prev_model_2_path=ae2_model_path,
                       prev_model_3_path=ae3_model_path,
                       code_size_1=code_size_1,
                       code_size_2=code_size_2,
                       code_size_3=code_size_3)
Example #10
0
def run_finetuning(experiment,
                   X_train,
                   y_train,
                   X_valid,
                   y_valid,
                   X_test,
                   y_test,
                   model_path,
                   prev_model_1_path,
                   prev_model_2_path,
                   prev_model_3_path,
                   code_size_1=2500,
                   code_size_2=1250,
                   code_size_3=625):
    learning_rate = 0.0005
    dropout_1 = 0.6
    dropout_2 = 0.8
    dropout_3 = 0.6
    initial_momentum = 0.1
    final_momentum = 0.9  # Increase momentum along epochs to avoid fluctiations
    saturate_momentum = 100

    training_iters = 100
    start_saving_at = 20
    batch_size = 10
    n_classes = 2

    if os.path.isfile(model_path) or \
       os.path.isfile(model_path + ".meta"):
        return

    y_train = np.array([to_softmax(n_classes, y) for y in y_train])
    y_valid = np.array([to_softmax(n_classes, y) for y in y_valid])
    y_test = np.array([to_softmax(n_classes, y) for y in y_test])

    ae1 = load_ae_encoder(X_train.shape[1], code_size_1, prev_model_1_path)
    ae2 = load_ae_encoder(code_size_1, code_size_2, prev_model_2_path)
    ae3 = load_ae_encoder(code_size_2, code_size_3, prev_model_3_path)

    model = nn(X_train.shape[1], n_classes, [
        {
            "size": code_size_1,
            "actv": tf.nn.tanh
        },
        {
            "size": code_size_2,
            "actv": tf.nn.tanh
        },
        {
            "size": code_size_3,
            "actv": tf.nn.tanh
        },
    ], [
        {
            "W": ae1["W_enc"],
            "b": ae1["b_enc"]
        },
        {
            "W": ae2["W_enc"],
            "b": ae2["b_enc"]
        },
        {
            "W": ae3["W_enc"],
            "b": ae3["b_enc"]
        },
    ])

    model["momentum"] = tf.placeholder("float32")
    optimizer = tf.train.MomentumOptimizer(
        learning_rate, model["momentum"]).minimize(model["cost"])

    # Compute accuracies
    correct_prediction = tf.equal(tf.argmax(model["output"], 1),
                                  tf.argmax(model["expected"], 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)

        # Define model saver
        saver = tf.train.Saver(model["params"],
                               write_version=tf.train.SaverDef.V2)
        prev_costs = np.array([9999999999] * 3)
        prev_accs = np.array([0.0] * 3)

        # Iterate Epochs
        for epoch in range(training_iters):

            batches = range(len(X_train) / batch_size)
            costs = np.zeros((len(batches), 3))
            accs = np.zeros((len(batches), 3))

            # Compute momentum saturation
            alpha = float(epoch) / float(saturate_momentum)
            if alpha < 0.:
                alpha = 0.
            if alpha > 1.:
                alpha = 1.
            momentum = initial_momentum * (1 - alpha) + alpha * final_momentum

            for ib in batches:

                from_i = ib * batch_size
                to_i = (ib + 1) * batch_size
                batch_xs, batch_ys = X_train[from_i:to_i], y_train[from_i:to_i]
                _, cost_train, acc_train = sess.run(
                    [optimizer, model["cost"], accuracy],
                    feed_dict={
                        model["input"]: batch_xs,
                        model["expected"]: batch_ys,
                        model["dropouts"][0]: dropout_1,
                        model["dropouts"][1]: dropout_2,
                        model["dropouts"][2]: dropout_3,
                        model["momentum"]: momentum,
                    })
                cost_valid, acc_valid = sess.run(
                    [model["cost"], accuracy],
                    feed_dict={
                        model["input"]: X_valid,
                        model["expected"]: y_valid,
                        model["dropouts"][0]: 1.0,
                        model["dropouts"][1]: 1.0,
                        model["dropouts"][2]: 1.0,
                    })
                cost_test, acc_test = sess.run(
                    [model["cost"], accuracy],
                    feed_dict={
                        model["input"]: X_test,
                        model["expected"]: y_test,
                        model["dropouts"][0]: 1.0,
                        model["dropouts"][1]: 1.0,
                        model["dropouts"][2]: 1.0,
                    })

                costs[ib] = [cost_train, cost_valid, cost_test]
                accs[ib] = [acc_train, acc_valid, acc_test]
            costs = costs.mean(axis=0)
            cost_train, cost_valid, cost_test = costs

            accs = accs.mean(axis=0)
            acc_train, acc_valid, acc_test = accs
            print format_config(
                "Exp={experiment}, Model=mlp, Iter={epoch:5d}, Acc={acc_train:.6f} {acc_valid:.6f} {acc_test:.6f}, Momentum={momentum:.6f}",
                {
                    "experiment": experiment,
                    "epoch": epoch,
                    "acc_train": acc_train,
                    "acc_valid": acc_valid,
                    "acc_test": acc_test,
                    "momentum": momentum,
                }),
            if acc_valid > prev_accs[1] and epoch > start_saving_at:
                print "Saving better model"
                saver.save(sess, model_path)
                prev_accs = accs
                prev_costs = costs
            else:
                print "123"
Example #11
0
def run_autoencoder1(experiment,
                     X_train,
                     y_train,
                     X_valid,
                     y_valid,
                     X_test,
                     y_test,
                     model_path,
                     code_size=2500):
    learning_rate = 0.0001
    sparse = True  # Add sparsity penalty
    sparse_p = 0.2
    sparse_coeff = 0.5
    corruption = 0.3  # Data corruption ratio for denoising
    ae_enc = tf.nn.tanh  # Tangent hyperbolic
    ae_dec = None  # Linear activation

    training_iters = 700
    batch_size = 100
    n_classes = 2

    if os.path.isfile(model_path) or \
       os.path.isfile(model_path + ".meta"):
        return

    model = ae(X_train.shape[1],
               code_size,
               corruption=corruption,
               enc=ae_enc,
               dec=ae_dec)
    if sparse:
        model["cost"] += sparsity_penalty(model["encode"], sparse_p,
                                          sparse_coeff)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(
        model["cost"])

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        saver = tf.train.Saver(model["params"],
                               write_version=tf.train.SaverDef.V2)
        prev_costs = np.array([9999999999] * 3)

        for epoch in range(training_iters):

            batches = range(len(X_train) / batch_size)
            costs = np.zeros((len(batches), 3))

            for ib in batches:
                from_i = ib * batch_size
                to_i = (ib + 1) * batch_size
                batch_xs, batch_ys = X_train[from_i:to_i], y_train[from_i:to_i]
                _, cost_train = sess.run([optimizer, model["cost"]],
                                         feed_dict={model["input"]: batch_xs})
                cost_valid = sess.run(model["cost"],
                                      feed_dict={model["input"]: X_valid})
                cost_test = sess.run(model["cost"],
                                     feed_dict={model["input"]: X_test})
                costs[ib] = [cost_train, cost_valid, cost_test]
            costs = costs.mean(axis=0)
            cost_train, cost_valid, cost_test = costs
            print format_config(
                "Exp={experiment}, Model=ae1, Iter={epoch:5d}, Cost={cost_train:.6f} {cost_valid:.6f} {cost_test:.6f}",
                {
                    "experiment": experiment,
                    "epoch": epoch,
                    "cost_train": cost_train,
                    "cost_valid": cost_valid,
                    "cost_test": cost_test,
                }),
            if cost_valid < prev_costs[1]:
                print "Saving better model"
                saver.save(sess, model_path)
                prev_costs = costs
            else:
                print
Example #12
0
def nn_results(hdf5, experiment, code_size_1, code_size_2):

    exp_storage = hdf5["experiments"][experiment]

    n_classes = 2

    results = []

    for fold in exp_storage:

        experiment_cv = format_config("{experiment}_{fold}", {
            "experiment": experiment,
            "fold": fold,
        })

        X_train, y_train, \
        X_valid, y_valid, \
        X_test, y_test = load_fold(hdf5["patients"], exp_storage, fold)

        y_test = np.array([to_softmax(n_classes, y) for y in y_test])

        ae1_model_path = format_config(
            "./data/models/{experiment}_autoencoder-1.ckpt", {
                "experiment": experiment_cv,
            })
        ae2_model_path = format_config(
            "./data/models/{experiment}_autoencoder-2.ckpt", {
                "experiment": experiment_cv,
            })
        nn_model_path = format_config("./data/models/{experiment}_mlp.ckpt", {
            "experiment": experiment_cv,
        })

        try:

            model = nn(X_test.shape[1], n_classes, [
                {
                    "size": 1000,
                    "actv": tf.nn.tanh
                },
                {
                    "size": 600,
                    "actv": tf.nn.tanh
                },
            ])

            init = tf.global_variables_initializer()
            with tf.Session() as sess:

                sess.run(init)

                saver = tf.train.Saver(model["params"])
                saver.restore(sess, nn_model_path)

                output = sess.run(model["output"],
                                  feed_dict={
                                      model["input"]: X_test,
                                      model["dropouts"][0]: 1.0,
                                      model["dropouts"][1]: 1.0,
                                  })

                print(output)

                y_pred = np.argmax(output, axis=1)
                y_true = np.argmax(y_test, axis=1)

                [[TN, FP], [FN,
                            TP]] = confusion_matrix(y_true,
                                                    y_pred,
                                                    labels=[0,
                                                            1]).astype(float)
                accuracy = (TP + TN) / (TP + TN + FP + FN)
                specificity = TN / (FP + TN)
                precision = TP / (TP + FP)
                sensivity = recall = TP / (TP + FN)
                fscore = 2 * TP / (2 * TP + FP + FN)

                results.append([
                    accuracy, precision, recall, fscore, sensivity, specificity
                ])
        finally:
            reset()

    return [experiment] + np.mean(results, axis=0).tolist()
Example #13
0
def reduce(config, folds, model_path, data_path, id_path):

    n_classes = 2
    fold_data = []

    for fold in range(1, folds + 1):

        config = config.copy()
        config["fold"] = fold

        fold_model_path = format_config(model_path, config)

        train_path = format_config(data_path, config, {"datatype": "train"})
        valid_path = format_config(data_path, config, {"datatype": "valid"})
        test_path = format_config(data_path, config, {"datatype": "test"})

        train_id_path = format_config(id_path, config, {"datatype": "train"})
        valid_id_path = format_config(id_path, config, {"datatype": "valid"})
        test_id_path = format_config(id_path, config, {"datatype": "test"})

        train_data = np.loadtxt(train_path, delimiter=",")
        train_X, train_y = train_data[:, 1:], train_data[:, 0]

        valid_data = np.loadtxt(valid_path, delimiter=",")
        valid_X, valid_y = valid_data[:, 1:], valid_data[:, 0]

        train_ids = np.genfromtxt(train_id_path, dtype="str")
        valid_ids = np.genfromtxt(valid_id_path, dtype="str")

        train_X = np.concatenate([train_X, valid_X])
        train_y = np.concatenate([train_y, valid_y])
        train_ids = np.concatenate([train_ids, valid_ids])

        test_data = np.loadtxt(test_path, delimiter=",")
        test_X, test_y = test_data[:, 1:], test_data[:, 0]
        test_ids = np.genfromtxt(test_id_path, dtype="str")

        model = nn(test_X.shape[1], n_classes, [
            {
                "size": 1000,
                "actv": tf.nn.tanh
            },
            {
                "size": 600,
                "actv": tf.nn.tanh
            },
        ])

        init = tf.global_variables_initializer()
        with tf.Session() as sess:

            sess.run(init)

            saver = tf.train.Saver(model["params"])
            saver.restore(sess, fold_model_path)

            train_X = sess.run(model["actvs"][1],
                               feed_dict={
                                   model["input"]: train_X,
                                   model["dropouts"][0]: 1.0,
                                   model["dropouts"][1]: 1.0,
                               })

            test_X = sess.run(model["actvs"][1],
                              feed_dict={
                                  model["input"]: test_X,
                                  model["dropouts"][0]: 1.0,
                                  model["dropouts"][1]: 1.0,
                              })

        X = np.concatenate([train_X, test_X])
        y = np.concatenate([train_y, test_y]).astype(int)
        ids = np.concatenate([train_ids, test_ids])
        datatype = np.concatenate(
            [np.ones(train_y.shape),
             np.zeros(test_y.shape)]).astype(int)

        fold_data.append({
            "X": X,
            "y": y,
            "ids": ids,
            "datatype": datatype,
            "config": config
        })

    LOG_DIR = './tensorboard/'

    glob_sess = tf.InteractiveSession()
    summary_writer = tf.summary.FileWriter(LOG_DIR)
    projector_config = projector.ProjectorConfig()

    embeddings = []

    for data in fold_data:

        X = data["X"]
        y = data["y"]
        ids = data["ids"]
        datatype = data["datatype"]
        config = data["config"]

        embedding_tensor = format_config("embedding_{fold}", config)
        embedding_var = tf.Variable(X, trainable=False, name=embedding_tensor)
        embedding_var.initializer.run()
        embeddings.append(embedding_var)

        embedding = projector_config.embeddings.add()
        embedding.tensor_name = embedding_tensor
        embedding.metadata_path = os.path.join(
            LOG_DIR, format_config("metadata_{fold}.tsv", config))

        names = ["ASD", "TC"]
        dt = ["Test", "Train"]
        with open(embedding.metadata_path, "w") as metadata_file:
            metadata_file.write("ID\tSite\tClass\tDatatype\n")
            for i, subject in enumerate(ids):
                site = "_".join(subject.split("_")[:-1])
                metadata_file.write(
                    "%s\t%s\t%s\t%s\n" %
                    (subject, site, names[y[i]], dt[datatype[i]]))

    saver = tf.train.Saver(embeddings)
    projector.visualize_embeddings(summary_writer, projector_config)
    saver.save(glob_sess, os.path.join(LOG_DIR, "embeddings.ckpt"))
    glob_sess.close()