default=0, help= 'Input parameter for model used [IMPORTANT: ORDERING OF INPUT PARAMETER ' 'MATTERS](see function define_model for better understanding).') parser.add_argument( '--filename_append', type=str, default='', help='Filename extension to distinct runs from each other.') parser.add_argument('--layer_dim', type=int, default=3) args = parser.parse_args() # Import data temp_path = general.file_pathway(args.dataset) if os.path.exists(temp_path): data = general.import_pandas_dataframe(temp_path) print("Shape of data:", data.shape) else: raise Exception("%s does not exist." % args.dataset) if not 'ecfp' in data.columns: print("Calculating ECFP...") data['ecfp'] = data['mol'].apply(ft.get_ecfp) print("ECFP calculation done.") # Input (x) and class (y) X = data['ecfp'] Y = data['agrochemical'] X = np.stack(X) Y = np.array(Y)
nn = True logfile = os.path.join( os.getcwd(), "best_models/%s.log" % args.model_path[args.model_path.find('/'):-2]) general.check_path_exists(logfile) sys.stdout = open(logfile, 'wt') print("Loading model from %s..." % args.model_path) model = model_func.load_model(args.model_path, nn) print("Finished loading model.") if args.test: test_data_path = os.path.join(os.getcwd(), 'data/ft_test_%s.pkl' % args.split_type) test_data = general.import_pandas_dataframe(test_data_path) print("\nPrediction on testing data set of shape", test_data.shape, ": ") ft = None if 'ecfp' in args.model_path: ft = 'ecfp' elif 'rdk' in args.model_path: ft = 'rdk' x_test = np.stack(test_data[ft]) y_test = LabelBinarizer().fit_transform((test_data['agrochemical'])) pred_test = model.predict(x_test) pred_test = (pred_test == pred_test.max(axis=1, keepdims=1)).astype(float) _ = metrics.performance_metrics(y_test, pred_test)