Esempio n. 1
0
def run(X, y, n_samples_for_intial, n_queries, n_comittee_members, estimator):
    # start timer
    start_time = time.time()

    # init list of different learners 
    learners = []

    X_train, y_train, X_pool, y_pool = create_random_pool_and_initial_sets(X, y, n_samples_for_intial)

    for member_idx in range(n_comittee_members):
        learners.append(ActiveLearner(estimator=estimator, X_training=X_train, y_training=y_train))
        
    # init committee
    committee = Committee(learner_list=learners, query_strategy=max_disagreement_sampling)

    unqueried_score = committee.score(X, y)
    print('Score over unqueried samples {:0.4f}'.format(unqueried_score))

    performance_history = []

    f1_score = 0
    index = 0
    while f1_score < 0.65:
        index += 1

        # get sample from pool
        query_idx, query_instance = committee.query(X_pool)

        # retrain comittee with new sample
        committee.teach(
            X=X_pool[query_idx].reshape(1, -1),
            y=y_pool[query_idx].reshape(1, )
        )

        # remove queried instance from pool
        X_pool = delete_rows_csr(X_pool, query_idx)
        y_pool = np.delete(y_pool, query_idx)

        y_pred = committee.predict(X)
        f1_score = metrics.f1_score(y, y_pred, average='micro')

        if index % 100 == 0:
            print('F1 score after {n} training samples: {f1:0.4f}'.format(n=index, f1=f1_score))

        # save accuracy score
        performance_history.append(f1_score)
    print("--- %s seconds ---" % (time.time() - start_time))

    print(performance_history)
    return index
def active_learn(df1, first_item_index_of_each_category):
    train_idx = first_item_index_of_each_category
    # X_train = iris['data'][train_idx]
    # y_train = iris['target'][train_idx]

    # initial training data
    data = df1.values[:, 1:]
    target = df1['label'].values

    X_full = df1.values[:, 1:]
    y_full = df1['label'].values

    X_train = df1.values[:, 1:][
        train_idx]  #item from second column as the first column is the label..
    y_train = df1['label'].values[train_idx]

    # X_pool = np.delete(data, train_idx, axis=0)
    # y_pool = np.delete(target, train_idx)

    X_pool = deepcopy(X_full)
    y_pool = deepcopy(y_full)

    # initializing Committee members
    n_members = 2
    learner_list = list()

    for member_idx in range(n_members):
        # initial training data
        # n_initial = 5
        # train_idx = np.random.choice(range(X_pool.shape[0]), size=n_initial, replace=False)
        # X_train = X_pool[train_idx]
        # y_train = y_pool[train_idx]

        # creating a reduced copy of the data with the known instances removed
        X_pool = np.delete(X_pool, train_idx, axis=0)
        y_pool = np.delete(y_pool, train_idx)

        # initializing learner
        learner = ActiveLearner(estimator=RandomForestClassifier(),
                                X_training=X_train,
                                y_training=y_train)
        learner_list.append(learner)
        # assembling the committee
    committee = Committee(learner_list=learner_list)

    # print('Committee initial predictions, accuracy = %1.3f' % committee.score(data, target))
    print('%1.3f' % committee.score(data, target))

    performance_array = []
    n_queries = 505
    for idx in range(n_queries):
        query_idx, query_instance = committee.query(X_pool)
        committee.teach(X=X_pool[query_idx].reshape(1, -1),
                        y=y_pool[query_idx].reshape(1, ))
        # remove queried instance from pool
        X_pool = np.delete(X_pool, query_idx, axis=0)
        y_pool = np.delete(y_pool, query_idx)
        learner_score = committee.score(data, target)
        # print('Committee %d th query predictions, accuracy = %1.3f' % (idx , learner_score))
        print('%1.3f' % (learner_score))
        if (idx % 100 == 0):
            performance_array.append(learner_score)
    percentage_increase(performance_array)
        plt.subplot(1, n_members, learner_idx + 1)
        plt.scatter(x=pca[:, 0],
                    y=pca[:, 1],
                    c=learner.predict(iris['data']),
                    cmap='viridis',
                    s=50)
        plt.title('Learner no. %d initial predictions' % (learner_idx + 1))
    plt.show()

# visualizing the initial predictions
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(7, 7))
    prediction = committee.predict(iris['data'])
    plt.scatter(x=pca[:, 0], y=pca[:, 1], c=prediction, cmap='viridis', s=50)
    plt.title('Committee initial predictions, accuracy = %1.3f' %
              committee.score(iris['data'], iris['target']))
    plt.show()

    # query by committee
    n_queries = 10
    for idx in range(n_queries):
        query_idx, query_instance = committee.query(X_pool)
        committee.teach(X=X_pool[query_idx].reshape(1, -1),
                        y=y_pool[query_idx].reshape(1, ))
        # remove queried instance from pool
        X_pool = np.delete(X_pool, query_idx, axis=0)
        y_pool = np.delete(y_pool, query_idx)

# visualizing the final predictions per learner
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(n_members * 7, 7))
#     for learner_idx, learner in enumerate(committee):
#         plt.subplot(1, n_members, learner_idx + 1)
#         plt.scatter(x=pca[:, 0], y=pca[:, 1], c=learner.predict(iris['data']), cmap='viridis', s=5)
#         plt.title('Learner no. %d initial predictions' % (learner_idx + 1))
#     plt.show()

# visualizing the Committee's predictions per learner
# with plt.style.context('seaborn-white'):
#     plt.figure(figsize=(7, 7))
#     prediction = committee.predict(iris['data'])
#     plt.scatter(x=pca[:, 0], y=pca[:, 1], c=prediction, cmap='viridis', s=5)
#     plt.title('Committee initial predictions')
#     plt.show()

# query by committee
unqueried_score = committee.score(train_features, train_labels)
performance_history = [unqueried_score]
n_queries = 100
for _ in range(n_queries):
    query_idx, query_instance = committee.query(X_pool)  # -> Here
    print(query_instance, " ", query_idx)
    committee.teach(X=X_pool[query_idx].reshape(1, -1),
                    y=y_pool[query_idx].reshape(1, ))
    performance_history.append(committee.score(train_features, train_labels))
    # remove queried instance from pool
    X_pool = np.delete(X_pool, query_idx, axis=0)
    y_pool = np.delete(y_pool, query_idx)

print(performance_history)

# visualizing the final predictions per learner
Esempio n. 5
0
def query():
    # n_initial = 100
    # X, y = load_digits(return_X_y=True)
    # X_train, X_test, y_train, y_test = train_test_split(X, y)
    #
    # initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False)
    #
    # X_initial, y_initial = X_train[initial_idx], y_train[initial_idx]
    # X_pool, y_pool = np.delete(X_train, initial_idx, axis=0), np.delete(y_train, initial_idx, axis=0)
    strategy = None
    classifier = None

    file = request.files['file']
    # if user does not select file, browser also
    # submit a empty part without filename
    filename = secure_filename(file.filename)

    # shutil.rmtree(os.path.join(app.config['UPLOAD_FOLDER'],filename.split(".")[0]))
    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        file.save(os.path.join(UPLOAD_FOLDER, filename))
        if(filename.split(".")[1]=="rar"):
            patoolib.extract_archive(os.path.join(UPLOAD_FOLDER, filename), outdir=os.path.join(UPLOAD_FOLDER))
        else:
            zip_ref = zipfile.ZipFile(os.path.join(UPLOAD_FOLDER, filename), 'r')
            zip_ref.extractall(UPLOAD_FOLDER)
            zip_ref.close()
            print("Succesfull")

    st = request.form.get('strategy_select')
    cl = request.form.get('classifier_select')
    option = int(request.form.get('structure_select'))
    print(cl)
    if(str(cl)=='Random Forest'):
        classifier = RandomForestClassifier()
    elif(str(cl)=='KNN'):
        classifier = KNeighborsClassifier()
    else:
        classifier = DecisionTreeClassifier()

    n_queries = request.form['queries']

    print(st)
    classlist =[]
    classes = {}
    data = {}
    data['image'] = []
    data['label'] = []
    filename = secure_filename(file.filename)
    print(filename)
    if option == 0:
        for dirname, _, filenames in os.walk(os.path.join(UPLOAD_FOLDER,filename.split(".")[0])):
            print(filenames)
            for filename in filenames:
                if('.jpg' in filename or 'jpeg' in filename or 'png' in filename):
                    image = Image.open(os.path.join(dirname, filename))
                    image = image.resize((200,200), Image.ANTIALIAS)
                    size = np.array(image).size
                    if(len(classes)==0):
                        data['image'] = np.array(numpy.array(image)).reshape((1,size))
                    else:
                        try:
                            x = numpy.array(image).reshape((1,size))
                            data['image'] = np.append(data['image'],x,axis=0)
                        except:
                            continue
                    if(dirname.split('\\')[-1] not in classes.keys()):
                        classlist.append({'name':dirname.split('\\')[-1],'number':len(classes)})
                        classes[dirname.split('\\')[-1]] = len(classes)
                            #print(os.path.join(dirname, filename))
                            #print(dirname)

                    data['label'].append(classes[dirname.split('\\')[-1]])
                    print(classes)
    else:
        for imfile in os.listdir(os.path.join(UPLOAD_FOLDER,filename.split(".")[0])):
            if imfile.endswith(".jpg") or imfile.endswith(".jpeg") or imfile.endswith("png"):
                image = Image.open(os.path.join(os.path.join(UPLOAD_FOLDER,filename.split(".")[0]), imfile))
                image = image.resize((200,200), Image.ANTIALIAS)
                size = np.array(image).size

                if(len(classes)==0):
                    data['image'] = np.array(numpy.array(image)).reshape((1,size))
                else:
                    try:
                        x = numpy.array(image).reshape((1,size))
                        data['image'] = np.append(data['image'],x,axis=0)
                    except:
                        continue
                if(("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))) not in classes.keys()):
                    classlist.append({'name':("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))),'number':len(classes)})
                    classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))] = len(classes)
                data['label'].append(classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))])
                print(classes)
            else:
                continue

    X = data['image']
    y = data['label']
    n_initial = 100
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False)
    X_initial=[]
    y_initial = []
    print(type(X_initial))
    for i in range(n_initial):

        v = np.array(X_train[initial_idx[i]]).reshape((1,size))

        #print(v.shape)
        y_initial.append(y_train[i])
        if(i==0):
            X_initial = np.array(X_train[initial_idx[i]]).reshape((1,size))

            print(X_initial.shape)
        else:
            X_initial = np.append(X_initial,v,axis=0)
        #print("X Shape",X_initial.shape)
        #     X_initial = X_initial.append(X_train[initial_idx[i]])
    X_pool, y_pool = np.delete(X_train, initial_idx, axis=0), np.delete(y_train, initial_idx, axis=0)
    print(X.shape)
    print(X[0].shape)
    print(X_initial.shape)

    params = {}
    params["X_test"] = X_test
    params["y_test"] = y_test
    params["counter"] = n_queries
    params["X_pool"] = X_pool
    params["y_pool"] = y_pool
    if(str(st)=='Uncertainty Sampling'):

        print(classifier)
        print(cl)
        learner = ActiveLearner(
            estimator=classifier,
            query_strategy=uncertainty_sampling,
            X_training=X_initial, y_training=y_initial
        )

        params["learner"] = learner
        accuracy_scores = learner.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        print(accuracy_scores)
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries)
        print("Calling Helper")
        return helper()
    elif(str(st)=='Entropy Sampling'):

        print(classifier)
        print(cl)
        learner = ActiveLearner(
            estimator=classifier,
            query_strategy=entropy_sampling,
            X_training=X_initial, y_training=y_initial
        )

        params["learner"] = learner
        accuracy_scores = learner.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        print(accuracy_scores)
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries)
        return helper()
    elif(str(st)=='Random Sampling'):
        learner = ActiveLearner(
            estimator=classifier,
            query_strategy=random_sampling,
            X_training=X_train, y_training=y_train
        )
        accuracy_scores = learner.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        print(accuracy_scores)
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries)
        return helper()
    elif(str(st)=='Query By Committee(Vote Entropy Sampling)'):
        learner1 = ActiveLearner(
            estimator = RandomForestClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner2 = ActiveLearner(
            estimator=KNeighborsClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner3 = ActiveLearner(
            estimator=DecisionTreeClassifier(),
            X_training=X_train,y_training=y_train
        )
        committee = Committee(
            learner_list=[learner1,learner2,learner3],
            query_strategy=vote_entropy_sampling
        )
        params["committee"] = committee
        accuracy_scores = committee.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        print(accuracy_scores)
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries)
        return helper()

    elif(str(st)=='Query By Committee(Uncertainty Sampling)'):
        learner1 = ActiveLearner(
            estimator = RandomForestClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner2 = ActiveLearner(
            estimator=KNeighborsClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner3 = ActiveLearner(
            estimator=DecisionTreeClassifier(),
            X_training=X_train,y_training=y_train
        )
        committee = Committee(
            learner_list=[learner1,learner2,learner3],
            query_strategy=uncertainty_sampling
        )
        params["committee"] = committee
        accuracy_scores = committee.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        print(accuracy_scores)
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries)
        return helper()

    elif(str(st)=='Query By Committee(Max Disagreement Sampling)'):
        learner1 = ActiveLearner(
            estimator = RandomForestClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner2 = ActiveLearner(
            estimator=KNeighborsClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner3 = ActiveLearner(
            estimator=DecisionTreeClassifier(),
            X_training=X_train,y_training=y_train
        )
        committee = Committee(
            learner_list=[learner1,learner2,learner3],
            query_strategy=max_disagreement_sampling
        )
        params["committee"] = committee
        accuracy_scores = committee.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        print(accuracy_scores)
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries)
        return helper()

    elif(str(st)=='Query By Committee(Max STD Sampling)'):
        learner1 = ActiveLearner(
            estimator = RandomForestClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner2 = ActiveLearner(
            estimator=KNeighborsClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner3 = ActiveLearner(
            estimator=DecisionTreeClassifier(),
            X_training=X_train,y_training=y_train
        )
        committee = Committee(
            learner_list=[learner1,learner2,learner3],
            query_strategy=max_std_sampling
        )
        params["committee"] = committee
        accuracy_scores = committee.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        print(accuracy_scores)
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries)
        return helper()

    elif(str(st)=='Query By Committee(Consensus Entropy Sampling)'):
        learner1 = ActiveLearner(
            estimator = RandomForestClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner2 = ActiveLearner(
            estimator=KNeighborsClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner3 = ActiveLearner(
            estimator=DecisionTreeClassifier(),
            X_training=X_train,y_training=y_train
        )
        committee = Committee(
            learner_list=[learner1,learner2,learner3],
            query_strategy=consensus_entropy_sampling
        )
        params["committee"] = committee
        accuracy_scores = committee.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        print(accuracy_scores)
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries)
        return helper()
# In[66]:

version_space_length = version_space.shape[0]
ind = np.argsort(version_space[:, 1])

print('Order of points to label:')

for i in range(version_space_length):
    print('#{size:2d}'.format(size=i) + ' point:' +
          str(version_space_points[ind[i]]) + ' label:' +
          str(version_space_labels[ind[i]]))

# In[67]:

print("Initial accuracy =", committee.score(X, Y))

# In[68]:

x = 40

queries = int((x / 100) * 150)
accuracy_list = []
accuracy_list.append(committee.score(X, Y))

# In[69]:

iter = 0
print("Accuracy after", 0, "iterations :", committee.score(X, Y))

for i in range(0, queries):
Esempio n. 7
0
        plt.subplot(1, n_members, learner_idx + 1)
        plt.scatter(x=pca[:, 0],
                    y=pca[:, 1],
                    c=learner.predict(data),
                    cmap='viridis',
                    s=50)
        plt.title('Learner no. %d initial predictions' % (learner_idx + 1))
    plt.show()

# visualizing the initial predictions
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(7, 7))
    prediction = committee.predict(data)
    plt.scatter(x=pca[:, 0], y=pca[:, 1], c=prediction, cmap='viridis', s=50)
    plt.title('Committee initial predictions, accuracy = %1.3f' %
              committee.score(data, target))
    plt.show()

# query by committee
n_queries = 10
for idx in range(n_queries):
    query_idx, query_instance = committee.query(X_pool)
    committee.teach(X=X_pool[query_idx].reshape(1, -1),
                    y=y_pool[query_idx].reshape(1, ))
    # remove queried instance from pool
    X_pool = np.delete(X_pool, query_idx, axis=0)
    y_pool = np.delete(y_pool, query_idx)

# visualizing the final predictions per learner
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(n_members * 7, 7))
Esempio n. 8
0
    learner_list.append(learner)

# assembling the committee
committee = Committee(learner_list=learner_list,
                      query_strategy=vote_entropy_sampling)
'''
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(n_members*7, 7))
    for learner_idx, learner in enumerate(committee):
        plt.subplot(1, n_members, learner_idx + 1)
        plt.scatter(x=pca[:, 0], y=pca[:, 1], c=learner.predict(iris['data']), cmap='viridis', s=50)
        plt.title('Learner no. %d initial predictions' % (learner_idx + 1))
    plt.show()
'''

unqueried_score = committee.score(iris['data'], iris['target'])
'''
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(7, 7))
    prediction = committee.predict(iris['data'])
    plt.scatter(x=pca[:, 0], y=pca[:, 1], c=prediction, cmap='viridis', s=50)
    plt.title('Committee initial predictions, accuracy = %1.3f' % unqueried_score)
    plt.show()
'''

performance_history = [unqueried_score]

# query by committee
n_queries = 20
for idx in range(n_queries):
    query_idx, query_instance = committee.query(X_pool)
Esempio n. 9
0
def query():
    strategy = None
    classifier = None

    file = request.files['file']
    test = request.files['test_file']
    filename = secure_filename(file.filename)
    test_filename = secure_filename(test.filename)

    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        file.save(os.path.join(UPLOAD_FOLDER, filename))
        zip_ref = zipfile.ZipFile(os.path.join(UPLOAD_FOLDER, filename), 'r')
        zip_ref.extractall(UPLOAD_FOLDER)
        zip_ref.close()

    
    if test and allowed_file(test.filename):
        filename = secure_filename(test.filename)
        test.save(os.path.join(UPLOAD_FOLDER, filename))
        zip_ref = zipfile.ZipFile(os.path.join(UPLOAD_FOLDER, filename), 'r')
        zip_ref.extractall(UPLOAD_FOLDER)
        zip_ref.close()
    

    st = request.form.get('strategy_select')
    cl = request.form.get('classifier_select')
    option = int(request.form.get('structure_select'))
    if(str(cl)=='Random Forest'):
        classifier = RandomForestClassifier()
    elif(str(cl)=='KNN'):
        classifier = KNeighborsClassifier()
    else:
        classifier = DecisionTreeClassifier()

    n_queries = request.form['queries']

    classlist =[]
    classes = {}
    data = {}
    data['image'] = []
    data['label'] = []
    data['image_name'] = []
    image_data = {}
    filename = secure_filename(file.filename)
    if option == 0:
        for root,dirs,filename in os.walk(os.path.join(UPLOAD_FOLDER,filename.split(".")[0])):
            for name in filename:
                if name.endswith(".jpg") or name.endswith(".jpeg") or name.endswith(".png"):
                    image_file_name = os.path.join(root,name)
                    image = Image.open(image_file_name)
                    image = image.resize((200,200), Image.ANTIALIAS)
                    size = np.array(image).size

                    if(len(classes)==0):
                        data['image'] = np.array(numpy.array(image)).reshape((1,size))
                        image_data[(numpy.array(image).reshape((1,size))).tobytes()] = image_file_name
                    else:
                        try:
                            x = numpy.array(image).reshape((1,size))
                            image_data[(numpy.array(image).reshape((1,size))).tobytes()] = image_file_name
                            data['image'] = np.append(data['image'],x,axis=0)
                        except:
                            continue
                    if root.split("\\")[-1] not in classes.keys():
                        classlist.append({'name':root.split('\\')[-1],'number':len(classes)})
                        classes[root.split('\\')[-1]] = len(classes)

                    data['label'].append(classes[root.split('\\')[-1]])
                    data['image_name'].append(image_file_name)
    else:
        for imfile in os.listdir(os.path.join(UPLOAD_FOLDER,filename.split(".")[0])):

            if imfile.endswith(".jpg") or imfile.endswith(".jpeg") or imfile.endswith(".png"):
                image_file_name = os.path.join(os.path.join(UPLOAD_FOLDER,filename.split(".")[0]), imfile)
                image = Image.open(os.path.join(os.path.join(UPLOAD_FOLDER,filename.split(".")[0]), imfile))
                image = image.resize((200,200), Image.ANTIALIAS)
                size = np.array(image).size

                if(len(classes)==0):
                    data['image'] = np.array(numpy.array(image)).reshape((1,size))
                    image_data[(numpy.array(image).reshape((1,size))).tobytes()] = image_file_name
                else:
                    try:
                        x = numpy.array(image).reshape((1,size))
                        image_data[(numpy.array(image).reshape((1,size))).tobytes()] = image_file_name
                        data['image'] = np.append(data['image'],x,axis=0)
                    except:
                        continue
                if(("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))) not in classes.keys()):
                    classlist.append({'name':("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))),'number':len(classes)})
                    classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))] = len(classes)
                data['label'].append(classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))])
                data['image_name'].append(imfile)
            else:
                continue

    test_classlist =[]
    test_classes = {}
    test_data = {}
    test_data['image'] = []     
    test_data['label'] = []
    test_data['image_name'] = []
    if option == 0:
        for dirname, _, filenames in os.walk(os.path.join(UPLOAD_FOLDER,test_filename.split(".")[0])):
            for filename in filenames:
                if('.jpg' in filename or 'jpeg' in filename or 'png' in filename):
                    image = Image.open(os.path.join(dirname, filename))
                    image = image.resize((200,200), Image.ANTIALIAS)
                    size = np.array(image).size
                    if(len(test_classes)==0):
                        test_data['image'] = np.array(numpy.array(image)).reshape((1,size))
                    else:
                        try:
                            x = numpy.array(image).reshape((1,size))
                            test_data['image'] = np.append(test_data['image'],x,axis=0)
                        except:
                            continue
                    if(dirname.split('\\')[-1] not in test_classes.keys()):
                        test_classlist.append({'name':dirname.split('\\')[-1],'number':len(test_classes)})
                        test_classes[dirname.split('\\')[-1]] = len(test_classes)
    
                    test_data['label'].append(test_classes[dirname.split('\\')[-1]])
                    test_data['image_name'].append(filename)

    else:
        for imfile in os.listdir(os.path.join(UPLOAD_FOLDER,test_filename.split(".")[0])):
            if imfile.endswith(".jpg") or imfile.endswith(".jpeg") or imfile.endswith("png"):
                image = Image.open(os.path.join(os.path.join(UPLOAD_FOLDER,test_filename.split(".")[0]), imfile))
                image = image.resize((200,200), Image.ANTIALIAS)
                size = np.array(image).size

                if(len(test_classes)==0):
                    test_data['image'] = np.array(numpy.array(image)).reshape((1,size))
                else:
                    try:
                        x = numpy.array(image).reshape((1,size))
                        test_data['image'] = np.append(test_data['image'],x,axis=0)
                    except:
                        continue
                if(("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))) not in test_classes.keys()):
                    test_classlist.append({'name':("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))),'number':len(test_classes)})
                    test_classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))] = len(test_classes)
                test_data['label'].append(test_classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))])
                test_data['image_name'].append(imfile)

            else:
                continue
    X_train = data['image']
    y_train = data['label']
    X_test = test_data['image']
    y_test = test_data['label']
    n_initial = 100

    initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False)
    X_initial=[]
    y_initial = []
    for i in range(n_initial):

        v = np.array(X_train[initial_idx[i]]).reshape((1,size))

        y_initial.append(y_train[i])
        if(i==0):
            X_initial = np.array(X_train[initial_idx[i]]).reshape((1,size))


        else:
            X_initial = np.append(X_initial,v,axis=0)

    X_pool, y_pool = np.delete(X_train, initial_idx, axis=0), np.delete(y_train, initial_idx, axis=0)

    params = {}
    params["X_test"] = X_test
    params["y_test"] = y_test
    params["counter"] = n_queries
    params["X_pool"] = X_pool
    params["y_pool"] = y_pool
    if(str(st)=='Uncertainty Sampling'):

        learner = ActiveLearner(
            estimator=classifier,
            query_strategy=uncertainty_sampling,
            X_training=X_initial, y_training=y_initial
        )

        params["learner"] = learner
        accuracy_scores = learner.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries,image_data)
        return helper()
    elif(str(st)=='Entropy Sampling'):

        learner = ActiveLearner(
            estimator=classifier,
            query_strategy=entropy_sampling,
            X_training=X_initial, y_training=y_initial
        )

        params["learner"] = learner
        accuracy_scores = learner.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries,image_data)
        return helper()
    elif(str(st)=='Random Sampling'):
        learner = ActiveLearner(
            estimator=classifier,
            query_strategy=random_sampling,
            X_training=X_train, y_training=y_train
        )
        accuracy_scores = learner.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries,image_data)
        return helper()
    elif(str(st)=='Query By Committee(Vote Entropy Sampling)'):
        learner1 = ActiveLearner(
            estimator = RandomForestClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner2 = ActiveLearner(
            estimator=KNeighborsClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner3 = ActiveLearner(
            estimator=DecisionTreeClassifier(),
            X_training=X_train,y_training=y_train
        )
        committee = Committee(
            learner_list=[learner1,learner2,learner3],
            query_strategy=vote_entropy_sampling
        )
        params["committee"] = committee
        accuracy_scores = committee.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries,image_data)
        return helper()

    elif(str(st)=='Query By Committee(Uncertainty Sampling)'):
        learner1 = ActiveLearner(
            estimator = RandomForestClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner2 = ActiveLearner(
            estimator=KNeighborsClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner3 = ActiveLearner(
            estimator=DecisionTreeClassifier(),
            X_training=X_train,y_training=y_train
        )
        committee = Committee(
            learner_list=[learner1,learner2,learner3],
            query_strategy=uncertainty_sampling
        )
        params["committee"] = committee
        accuracy_scores = committee.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries,image_data)
        return helper()

    elif(str(st)=='Query By Committee(Max Disagreement Sampling)'):
        learner1 = ActiveLearner(
            estimator = RandomForestClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner2 = ActiveLearner(
            estimator=KNeighborsClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner3 = ActiveLearner(
            estimator=DecisionTreeClassifier(),
            X_training=X_train,y_training=y_train
        )
        committee = Committee(
            learner_list=[learner1,learner2,learner3],
                query_strategy=max_disagreement_sampling
        )
        params["committee"] = committee
        accuracy_scores = committee.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries,image_data)
        return helper()


    elif(str(st)=='Query By Committee(Consensus Entropy Sampling)'):
        learner1 = ActiveLearner(
            estimator = RandomForestClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner2 = ActiveLearner(
            estimator=KNeighborsClassifier(),
            X_training=X_train,y_training=y_train
        )
        learner3 = ActiveLearner(
            estimator=DecisionTreeClassifier(),
            X_training=X_train,y_training=y_train
        )
        committee = Committee(
            learner_list=[learner1,learner2,learner3],
            query_strategy=consensus_entropy_sampling
        )
        params["committee"] = committee
        accuracy_scores = committee.score(X_test, y_test)
        params["accuracy"] = accuracy_scores
        accuracy = []
        accuracy.append(accuracy_scores)
        data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries,image_data)
        return helper()
                        query_strategy=strategy)
member4 = ActiveLearner(X_training=X_train,
                        y_training=Y_train,
                        estimator=KNeighborsClassifier(n_neighbors=8),
                        query_strategy=strategy)
member5 = ActiveLearner(X_training=X_train,
                        y_training=Y_train,
                        estimator=KNeighborsClassifier(n_neighbors=10),
                        query_strategy=strategy)

committee = Committee(
    learner_list=[member1, member2, member3, member4, member5])

# In[8]:

print("Initial accuracy =", committee.score(X, Y))

# In[9]:

member1r = ActiveLearner(X_training=X_train,
                         y_training=Y_train,
                         estimator=RandomForestClassifier(n_estimators=8),
                         query_strategy=strategy)
member2r = ActiveLearner(X_training=X_train,
                         y_training=Y_train,
                         estimator=RandomForestClassifier(n_estimators=1),
                         query_strategy=strategy)
member3r = ActiveLearner(X_training=X_train,
                         y_training=Y_train,
                         estimator=RandomForestClassifier(n_estimators=10),
                         query_strategy=strategy)