Example #1
0
    def save_images(self, path, relative=True, bar=True):
        """Save the images in self.posts in the dir at the provided path

        :param path: the path to store the images in
        :param relative: whether the path is relative, defaults to True
        :returns: the number of images found and saved"""
        saved = 0
        dir_path = os.path.dirname(os.path.realpath(__file__)) if relative else ""
        for n, post in enumerate(log_progress(self.posts, total=self.posts_len, disable=not bar)):
            url = (post.url)
            file_name = url.split("/")
            if len(file_name) == 0:
                file_name = re.findall("/(.*?)", url)

            file_name = file_name[-1]
            if ".jpg" in file_name or ".png" in file_name:
                file_name = f"{dir_path}{path}{n:04}{file_name[-4:]}"

                r = requests.get(url)
                saved += 1
                with open(file_name,"wb") as f:
                    f.write(r.content)

                image = Image.open(file_name)
                image = crop(image)
                # image.show()
                image.save(file_name)

        return saved
def predict_n_draw(n, alpha, lambdas):
    predictions = []
    for num in log_progress(list(range(10))):
        train = copy.deepcopy(ds)
        test = train.pop(num)

        classificator = SpamClassificator(train, lambdas, n, alpha)

        for test_message in test:
            prediction = classificator.predict_stupid(test_message)

            predictions.append((prediction[True], test_message.is_spam))
    draw(predictions)
def count_true(n, alpha, lambdas, raising):
    true = 0
    erroring = False
    for num in log_progress(list(range(10))):
        train = copy.deepcopy(ds)
        test = train.pop(num)
        
        classificator = SpamClassificator(train, lambdas, n, alpha)
        
        for test_message in test:
            prediction = classificator.predict_stupid(test_message)
            
            result = max(prediction, key=prediction.get)
            target = test_message.is_spam
            
            if result == target:
                true += 1
            if not target and result:
                if raising:
                    raise LambdaIncreaseException
                else:
                    erroring = True
    return true, erroring
    plt.show()

alphas = {
    1: [0.1, 0.01, 0.005, 0.001, 0.0007, 0.0001, 0.00001],
    2: [0.1, 0.01, 0.005],
    3: [0.1, 0.01]
}

params = []
for n in range(1, 4):
    for alpha in alphas[n]:
        params.append((n, alpha))
ds = read_actual_ds()

total = sum([len(x) for x in ds])
for (n, alpha) in log_progress(params):
    if (n, alpha) in results:
        print(n, alpha, results[(n, alpha)])
        continue
    true = 0
    for num in range(10):
        train = copy.deepcopy(ds)
        test = train.pop(num)
        
        classificator = SpamClassificator(train, {False: 1e100, True: 1e100}, n, alpha)
        
        for test_message in test:
            prediction = classificator.predict_stupid(test_message)
            
            result = max(prediction, key=prediction.get)
            target = test_message.is_spam
    xp, yp, _ = ds[ds['class'] == 'P'].T.values
    
    plt.scatter(xn, yn, color='#0000CD', s=40)
    plt.scatter(xp, yp, color='#FF0000', s=40)
    
    plt.show()

results = {}
params_sets = []
for k in kernels:
    for p in parameters[k]:
        for c in C:
            params_sets.append((k, p, c))

ds = pd.read_csv('chips.csv')
for params in log_progress(params_sets):
    (k, p, c) = params
    if params in results:
        # print(params, results[params])
        continue
    results[params] = count_accuracy_lov(ds, k(p), c)
    # print(params, results[params])

results_list = list(results.items())
results_sorted = sorted(results_list, key=lambda x: -x[1])
best = results_sorted[0]
best

draw(ds, best[0], 100)

best_poly = list(filter(lambda x: x[0][0] == polynomial, results_sorted))[0]
    total = tX.shape[0]
    predict_Y = clf.predict(tX)
    for predict, target in zip(predict_Y, tY.values.T[0]):
        if predict == target:
            true_count += 1
    return true_count / total


params = []
for h in range(3, 25):
    for c in ['gini', 'entropy']:
        for s in ['best', 'random']:
            params.append((h, c, s))
results = {i: {} for i in range(1, 22)}

for i in log_progress(range(1, 22)):
    prefix = 'data/{:02d}'.format(i)
    X, Y = load_dataset(prefix + '_train.csv')
    tX, tY = load_dataset(prefix + '_test.csv')
    for ps in params:
        if ps in results[i]:
            continue
        (height, cr, spl) = ps
        clf = build_and_fit(X, Y, cr, spl, height)
        acc = count_accuracy(clf, tX, tY)

        results[i][ps] = (acc, clf.get_depth())

sorted_results = list(
    map(lambda r: (r[0], sorted(r[1].items(), key=lambda e: -e[1][0])),
        results.items()))