コード例 #1
0
def plot_nn_loss_against_epoch(X,
                               Y,
                               layers_dim,
                               activation,
                               epochs,
                               image_name,
                               loss='binary_crossentropy',
                               optimizer='adam'):
    import matplotlib.pyplot as plt
    import numpy as np
    from functions import general_functions as general

    general.check_path_exists(image_name)

    model = build_simplenn_model(layers_dim=layers_dim,
                                 activation=activation,
                                 loss=loss,
                                 optimizer=optimizer)

    print()
    print("Number of epochs:", epochs)
    print("Loss function:", loss)
    print("Optimizer function:", optimizer)
    print()

    H = model.fit(X,
                  Y,
                  epochs=epochs,
                  batch_size=16,
                  verbose=0,
                  validation_split=0.2,
                  shuffle=True)

    training_loss = H.history['loss']
    validation_loss = H.history['val_loss']
    training_acc = H.history['acc']
    validation_acc = H.history['val_acc']

    plt.switch_backend('agg')
    plt.figure()
    plt.plot(np.arange(0, epochs),
             training_loss,
             marker='o',
             label="train_loss")
    plt.plot(np.arange(0, epochs),
             validation_loss,
             marker='o',
             label="val_loss")
    plt.plot(np.arange(0, epochs), training_acc, marker='o', label="train_acc")
    plt.plot(np.arange(0, epochs), validation_acc, marker='o', label="val_acc")
    # plt.title("Loss and Accuracy against Epochs")
    plt.xlabel("Number of Epochs")
    plt.ylabel("Loss / Accuracy")
    plt.legend(loc="best")
    plt.savefig(image_name)

    return training_acc, training_loss, validation_acc, validation_loss
コード例 #2
0
def plot_roc_curve(fpr, tpr, aucs, tprs, image_name):
    import matplotlib.pyplot as plt
    import numpy as np
    from sklearn.metrics import auc
    from functions import general_functions as general

    general.check_path_exists(image_name)
    plt.switch_backend('agg')

    i = 0
    plt.plot([0, 1], [0, 1],
             linestyle='--',
             lw=2,
             color='r',
             label='Chance',
             alpha=.8)
    for fpr_, tpr_, roc_auc in zip(fpr, tpr, aucs):
        i += 1
        plt.plot(
            fpr_, tpr_, lw=1,
            alpha=0.3)  #,label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc))

    mean_tpr = np.mean(tprs, axis=0)
    mean_tpr[-1] = 1.0
    mean_fpr = np.linspace(0, 1, 100)
    mean_auc = auc(mean_fpr, mean_tpr)
    std_auc = np.std(aucs)
    plt.plot(mean_fpr,
             mean_tpr,
             color='b',
             label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
             lw=2,
             alpha=.8)

    std_tpr = np.std(tprs, axis=0)
    tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
    tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
    plt.fill_between(mean_fpr,
                     tprs_lower,
                     tprs_upper,
                     color='grey',
                     alpha=.2,
                     label=r'$\pm$ 1 std. dev.')

    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(
        'ROC of %s' %
        image_name[image_name.rfind('/') + 1:image_name.rfind("dataset") - 1])
    plt.legend(loc="lower right")
    plt.savefig(image_name)
コード例 #3
0
def save_model(model, filename, neural_network):
    """
    Save model to disk
    :param model: model to be saved
    :param filename: (string) filename to save the model to
    :param neural_network: (boolean) whether the model is a neural network model (keras) or conventional machine learning model (scikit-learn).
    :return: None
    """
    from functions import general_functions as general
    general.check_path_exists(filename)

    print("\nSaving model to '%s' ..." % filename)
    if neural_network:
        model.save(filename)
    else:
        #    import pickle
        #    pickle.dump(model, open(filename, 'wb'))
        from sklearn.externals import joblib
        joblib.dump(model, filename)
    print("Saving model done.")
コード例 #4
0
                    help='seed number for random shuffling.')
args = parser.parse_args()

# Check if inputs are available
if args.featurizer not in ['rdk', 'ecfp']:
    raise Exception("Descriptor %s not available. Choose from rdk or ecfp." % args.featurizer)
if args.kernel_regularizer not in ['l1', 'l2', 'l1_l2', 'None']:
    raise Exception("Kernel regularizer %s not available. Choose from l1, l2, l1_l2 or None" % args.regularizer_param)

lr = 0.0001

filename = "NN_training_history/%s/%s/batchsize%s/%s_epochs%s_dropout%s_lr%s_random_2" % (args.featurizer, args.kernel_regularizer, args.batch_size, args.regularizer_param, args.epochs, args.dropout_rate, lr)
logfile = "./logfile/" + filename + ".log"
save_model_path = "./saved_model/" + filename + ".h"

general.check_path_exists(logfile)
sys.stdout = open(logfile, 'wt')

# Import data
temp_path = os.path.join(os.getcwd(), 'data/ft_train_random.pkl')
if os.path.exists(temp_path):
    data = general.import_pandas_dataframe(temp_path)
    print("Shape of data:", data.shape)
else:
    raise Exception("%s does not exist." % temp_path)

# Input and target
X = np.stack(data[args.featurizer])
Y = LabelBinarizer().fit_transform((data['agrochemical']))

コード例 #5
0
if not os.path.exists(args.model_path):
    raise Exception("Pathway to model %s does not exist" % args.model_path)
if args.test:
    args.num_split = None

ml = ['GB', 'RF', 'KNN']
if any(x in args.model_path for x in ml):
    nn = False
else:
    nn = True

logfile = os.path.join(
    os.getcwd(),
    "best_models/%s.log" % args.model_path[args.model_path.find('/'):-2])
general.check_path_exists(logfile)
sys.stdout = open(logfile, 'wt')

print("Loading model from %s..." % args.model_path)
model = model_func.load_model(args.model_path, nn)
print("Finished loading model.")

if args.test:
    test_data_path = os.path.join(os.getcwd(),
                                  'data/ft_test_%s.pkl' % args.split_type)
    test_data = general.import_pandas_dataframe(test_data_path)
    print("\nPrediction on testing data set of shape", test_data.shape, ": ")

    ft = None
    if 'ecfp' in args.model_path:
        ft = 'ecfp'
コード例 #6
0
parser.add_argument('--optimizer', type=str, default='adam', help='type of optimizer function')
args = parser.parse_args()

# Check if the parameters are available for this file
if args.featurizer not in ['daylight', 'ecfp']:
    raise Exception("Descriptor %s not available. Choose from 'daylight' or 'ecfp'."
                    % args.featurizer)
if args.num_layers not in [3, 4, 5]:
    raise Exception("Number of layers not available. Choose from 3, 4 or 5, or add them below. ")


filename = 'simplenn_epoch_image/%s_%s_%slayers.log' % \
           (args.featurizer, args.dataset[:args.dataset.rfind('.')], args.num_layers)

# check if directory exists
general.check_path_exists(filename)

sys.stdout = open(filename,'wt')

# Import data
temp_path = general.file_pathway(args.dataset)
if os.path.exists(temp_path):
    data = general.import_pandas_dataframe(temp_path)
    print("Shape of data:", data.shape)
else:
    raise Exception("%s does not exist." % args.dataset)

# Featurization
if args.featurizer == 'daylight':
    print("Calculating Daylight fingerprint...")
    data['fingerprint'] = data['mol'].apply(ft.daylight_fingerprint)
コード例 #7
0
        'simplenn', 'gradientboosting', 'randomforest', 'knearest'
]:
    raise Exception(
        "%s not available. Choose from simplenn, gradientboosting, randomforest or knearest."
        % args.method)
if args.featurizer not in ['daylight', 'ecfp']:
    raise Exception(
        "Descriptor %s not available. Choose from daylight or ecfp." %
        args.featurizer)

filename = "%s/%s_%s_%s" % (args.featurizer, args.method,
                            args.dataset[:args.dataset.rfind('.')],
                            args.filename_append)
logfile = "./logfile/" + filename + ".log"

general.check_path_exists(logfile)
sys.stdout = open(logfile, 'wt')

# Import data
temp_path = general.file_pathway(args.dataset)
if os.path.exists(temp_path):
    data = general.import_pandas_dataframe(temp_path)
    print("Shape of data:", data.shape)
else:
    raise Exception("%s does not exist." % args.dataset)

if not 'fingerprint' in data.columns:
    if args.featurizer == 'daylight':
        print("Daylight Fingerprinting...")
        data['fingerprint'] = data['mol'].apply(ft.get_rdk)
        print("Daylight Fingerprinting done.")