Ejemplo n.º 1
0
def predict(row):
    df = pd.DataFrame.from_dict([row], orient='columns')
    #open model RF
    with open('website/model.pkl', 'rb') as f:
        model = pickle.load(f)

    df1 = transform_test(df)

    #predict probability using model
    prediction = model.predict_proba(df1.values.reshape(1, -1))[0][1]

    row['prediction'] = prediction

    #determine contribution of features to prediction using treeinterpreter
    prediction, bias, contributions = ti.predict(model,
                                                 df1.values.reshape(1, -1))
    #empty list to hold important features which contributed to prediction
    important_features = []
    #names for features
    column_features = [
        'name_length', 'num_payouts', 'user_age', 'org_facebook',
        'org_twitter', 'body_length', 'gts', 'sale_duration', 'tickets_sold'
    ]
    #take the top 3 features which had the highest contribution
    for feature, key in sorted(
            zip(abs(contributions[0][:, 1]), column_features))[::-1][:3]:
        important_features.append(key)

    row['contributions'] = important_features

    #returns the prob of fraud which we can later classify using the threshold chosen
    return row
Ejemplo n.º 2
0
def predict(row):
    df = pd.DataFrame.from_dict([row], orient='columns')
    #open model RF
    with open('website/model.pkl', 'rb') as f:
        model = pickle.load(f)

    df1 = transform_test(df)

    #predict probability using model
    prediction = model.predict_proba(df1.values.reshape(1, -1))[0][1]

    row['prediction'] = prediction

    #returns the prob of fraud which we can later classify using the threshold chosen
    return row
Ejemplo n.º 3
0
    return src, tgt, office, visda, noe


src, tgt, office, visda, noe = get_datasetname(args)

batch_size = {"train": 36, "val": 36, "test": 4}
for i in range(10):
    batch_size["val" + str(i)] = 4

if visda == False:
    data_transforms = {
        'train': tran.transform_train(resize_size=28, crop_size=28),
        'val': tran.transform_train(resize_size=28, crop_size=28),
    }
    data_transforms = tran.transform_test(data_transforms=data_transforms,
                                          resize_size=28,
                                          crop_size=28)
    dsets = {
        "train":
        ImageList(open(src).readlines(), transform=data_transforms["train"]),
        "val":
        ImageList(open(tgt).readlines(), transform=data_transforms["val"]),
        "test":
        ImageList(open(tgt).readlines(), transform=data_transforms["val"])
    }
    dset_loaders = {
        x: torch.utils.data.DataLoader(dsets[x],
                                       batch_size=batch_size[x],
                                       shuffle=True,
                                       num_workers=4)
        for x in ['train', 'val']
Ejemplo n.º 4
0
                    metavar='S',
                    help='method:l2 or l2+bss')
parser.add_argument('--lr',
                    type=float,
                    default=0.01,
                    help='init learning rate')
args = parser.parse_args()

os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id

data_transforms = {
    'train': trans.transform_train(resize_size=256, crop_size=224),
    'val': trans.transform_train(resize_size=256, crop_size=224),
}
data_transforms = trans.transform_test(data_transforms=data_transforms,
                                       resize_size=256,
                                       crop_size=224)

# set dataset
batch_size = {"train": 48, "val": 100, "test": 100}
for i in range(10):
    batch_size["val" + str(i)] = 4

trainpath = args.trainpath
testpath = args.testpath

dsets = {
    "train":
    datasets.ImageFolder(root=trainpath, transform=data_transforms["train"]),
    "val":
    datasets.ImageFolder(root=testpath, transform=data_transforms["val"]),
Ejemplo n.º 5
0
        global precisions
        precision = getPrecision(self.max_depth, self.min_impurity_decrease)
        ax.scatter(self.max_depth, self.min_impurity_decrease, precision)
        precisions[self.i] = precision
        print(self.max_depth, "; ", self.min_impurity_decrease, " done")


input, output, input_names, output_names, column_ignored = csv_parsing.parse(
    "census-income.names", "census-income.data", {
        "ignore": [24],
        "ignoreColumnThresold": 0.1
    })
transformer, formated_input = transform.transform(input, input_names)
test, test_output = csv_test.parse_test("census-income.test", output_names,
                                        column_ignored)
formated_test = transform.transform_test(test, input_names, transformer)
print("Initialization done")

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

i = 0
for max_depth in np.arange(1, 10):
    for min_impurity_decrease in np.arange(0, 0.0001, 0.00001):
        precisions.append(0)
        threads.append(Computation(i, max_depth, min_impurity_decrease))
        threads[-1].start()
        i += 1

        if (i % 4 == 0):
            for n in range(i - 4, i):