def predict(row): df = pd.DataFrame.from_dict([row], orient='columns') #open model RF with open('website/model.pkl', 'rb') as f: model = pickle.load(f) df1 = transform_test(df) #predict probability using model prediction = model.predict_proba(df1.values.reshape(1, -1))[0][1] row['prediction'] = prediction #determine contribution of features to prediction using treeinterpreter prediction, bias, contributions = ti.predict(model, df1.values.reshape(1, -1)) #empty list to hold important features which contributed to prediction important_features = [] #names for features column_features = [ 'name_length', 'num_payouts', 'user_age', 'org_facebook', 'org_twitter', 'body_length', 'gts', 'sale_duration', 'tickets_sold' ] #take the top 3 features which had the highest contribution for feature, key in sorted( zip(abs(contributions[0][:, 1]), column_features))[::-1][:3]: important_features.append(key) row['contributions'] = important_features #returns the prob of fraud which we can later classify using the threshold chosen return row
def predict(row): df = pd.DataFrame.from_dict([row], orient='columns') #open model RF with open('website/model.pkl', 'rb') as f: model = pickle.load(f) df1 = transform_test(df) #predict probability using model prediction = model.predict_proba(df1.values.reshape(1, -1))[0][1] row['prediction'] = prediction #returns the prob of fraud which we can later classify using the threshold chosen return row
return src, tgt, office, visda, noe src, tgt, office, visda, noe = get_datasetname(args) batch_size = {"train": 36, "val": 36, "test": 4} for i in range(10): batch_size["val" + str(i)] = 4 if visda == False: data_transforms = { 'train': tran.transform_train(resize_size=28, crop_size=28), 'val': tran.transform_train(resize_size=28, crop_size=28), } data_transforms = tran.transform_test(data_transforms=data_transforms, resize_size=28, crop_size=28) dsets = { "train": ImageList(open(src).readlines(), transform=data_transforms["train"]), "val": ImageList(open(tgt).readlines(), transform=data_transforms["val"]), "test": ImageList(open(tgt).readlines(), transform=data_transforms["val"]) } dset_loaders = { x: torch.utils.data.DataLoader(dsets[x], batch_size=batch_size[x], shuffle=True, num_workers=4) for x in ['train', 'val']
metavar='S', help='method:l2 or l2+bss') parser.add_argument('--lr', type=float, default=0.01, help='init learning rate') args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id data_transforms = { 'train': trans.transform_train(resize_size=256, crop_size=224), 'val': trans.transform_train(resize_size=256, crop_size=224), } data_transforms = trans.transform_test(data_transforms=data_transforms, resize_size=256, crop_size=224) # set dataset batch_size = {"train": 48, "val": 100, "test": 100} for i in range(10): batch_size["val" + str(i)] = 4 trainpath = args.trainpath testpath = args.testpath dsets = { "train": datasets.ImageFolder(root=trainpath, transform=data_transforms["train"]), "val": datasets.ImageFolder(root=testpath, transform=data_transforms["val"]),
global precisions precision = getPrecision(self.max_depth, self.min_impurity_decrease) ax.scatter(self.max_depth, self.min_impurity_decrease, precision) precisions[self.i] = precision print(self.max_depth, "; ", self.min_impurity_decrease, " done") input, output, input_names, output_names, column_ignored = csv_parsing.parse( "census-income.names", "census-income.data", { "ignore": [24], "ignoreColumnThresold": 0.1 }) transformer, formated_input = transform.transform(input, input_names) test, test_output = csv_test.parse_test("census-income.test", output_names, column_ignored) formated_test = transform.transform_test(test, input_names, transformer) print("Initialization done") fig = plt.figure() ax = fig.add_subplot(111, projection='3d') i = 0 for max_depth in np.arange(1, 10): for min_impurity_decrease in np.arange(0, 0.0001, 0.00001): precisions.append(0) threads.append(Computation(i, max_depth, min_impurity_decrease)) threads[-1].start() i += 1 if (i % 4 == 0): for n in range(i - 4, i):