x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices num_classes = 10 model = create_keras_model(num_classes) model.load_weights(args.model) if args.train: train(model, x_train, y_train, x_test, y_test) cpu_out = mlp_common.predict_cpu(model, x_test) if args.engine == 'fcn': fpga_out = mlp_common.predict_fpga(model, x_test, xclbin_prop, g_in_scale, g_wgt_scale, g_bias_scale, g_post_scale) else: fpga_out = mlp_common.predict_uspmv_fpga(model, x_test, xclbin_prop) print("compare real data with cpu:") mlp_common.compare_real_results(y_test, np.argmax(cpu_out, axis=1)) print("compare real data with fpga:") mlp_common.compare_real_results(y_test, np.argmax(fpga_out, axis=1)) print("compare cpu with fpga:") mlp_common.compare_real_results(np.argmax(cpu_out, axis=1), np.argmax(fpga_out, axis=1))
gemx.createUSPMVHandle( args, xclbin_prop ) else: gemx.createSPMVHandle( args, xclbin_prop ) train_fd = pd.read_csv(args.data) # Load training data. IDcol = 'Run' # A column used to identified the run for data collection; not an independent variable. target = 'Class' # The column name for our dependent variable. predictors = [x for x in train_fd.columns if x not in [target, IDcol]] # Define column names to use as independent variables. # Encode class values as integers encoder = LabelEncoder() encoder.fit(train_fd[target]) encoded_Y = encoder.transform(train_fd[target]) # Convert integers to dummy variables (i.e. one hot encoded) train_y = np_utils.to_categorical(encoded_Y) num_classes = len(train_fd[target].unique()) model = create_keras_model(train_fd[predictors].values.shape[1],num_classes) model.load_weights(args.model) if args.train: train(train_fd, predictors, train_y, len(train_fd[target].unique())) if args.engine == 'fcn': fpga_out = mlp_common.predict_fpga(model, train_fd[predictors].values, xclbin_prop, g_in_scale, g_wgt_scale, g_wgt_scale, g_post_scale) elif args.engine == 'uspmv': fpga_out = mlp_common.predict_uspmv_fpga(model, train_fd[predictors].values, xclbin_prop) else: fpga_out = mlp_common.predict_spmv_fpga(model, train_fd[predictors].values, xclbin_prop) cpu_out = mlp_common.predict_cpu( model, train_fd[predictors].values) compare_results ( cpu_out, fpga_out)
if args.train: train(train_fd, predictors, train_y, len(train_fd[target].unique())) fpga_rt = [] fpga_out = [] for i in range(numKernels): fpga_rt.append( mlp_common.init_fpga(model, xclbin_opts, g_wgt_scale, g_bias_scale, g_post_scale, None, i, 0)) inp = train_fd[predictors].values if not args.run_async: # for larger batch size, run multi-kernels in parallel will bring up to 4x better performance for i in range(numKernels): fpga_out.append( mlp_common.predict_fpga(fpga_rt[i], inp, g_in_scale)) else: for i in range(numKernels): fpga_rt[i].send_matrices(inp, None) xfblas.executeAsync(numKernels, 0) for i in range(numKernels): fpga_out.append(fpga_rt[i].get_result()) fpga_out[i] = fpga_out[i].astype(np.float32) for j in range(fpga_out[i].shape[0]): fpga_out[i][j, :] = mlp_common.softmax(fpga_out[i][j, :]) cpu_out = mlp_common.predict_cpu(model, train_fd[predictors].values)