Beispiel #1
0
    default=0,
    help=
    'Input parameter for model used [IMPORTANT: ORDERING OF INPUT PARAMETER '
    'MATTERS](see function define_model for better understanding).')
parser.add_argument(
    '--filename_append',
    type=str,
    default='',
    help='Filename extension to distinct runs from each other.')
parser.add_argument('--layer_dim', type=int, default=3)
args = parser.parse_args()

# Import data
temp_path = general.file_pathway(args.dataset)
if os.path.exists(temp_path):
    data = general.import_pandas_dataframe(temp_path)
    print("Shape of data:", data.shape)
else:
    raise Exception("%s does not exist." % args.dataset)

if not 'ecfp' in data.columns:
    print("Calculating ECFP...")
    data['ecfp'] = data['mol'].apply(ft.get_ecfp)
    print("ECFP calculation done.")

# Input (x) and class (y)
X = data['ecfp']
Y = data['agrochemical']

X = np.stack(X)
Y = np.array(Y)
Beispiel #2
0
    nn = True

logfile = os.path.join(
    os.getcwd(),
    "best_models/%s.log" % args.model_path[args.model_path.find('/'):-2])
general.check_path_exists(logfile)
sys.stdout = open(logfile, 'wt')

print("Loading model from %s..." % args.model_path)
model = model_func.load_model(args.model_path, nn)
print("Finished loading model.")

if args.test:
    test_data_path = os.path.join(os.getcwd(),
                                  'data/ft_test_%s.pkl' % args.split_type)
    test_data = general.import_pandas_dataframe(test_data_path)
    print("\nPrediction on testing data set of shape", test_data.shape, ": ")

    ft = None
    if 'ecfp' in args.model_path:
        ft = 'ecfp'
    elif 'rdk' in args.model_path:
        ft = 'rdk'
    x_test = np.stack(test_data[ft])
    y_test = LabelBinarizer().fit_transform((test_data['agrochemical']))

    pred_test = model.predict(x_test)
    pred_test = (pred_test == pred_test.max(axis=1, keepdims=1)).astype(float)

    _ = metrics.performance_metrics(y_test, pred_test)