Example #1
0
    def __init__ (self, 
                  retina_size, 
                  num_bits_addr, 
                  bleaching, 
                  confidence_threshold, 
                  ignore_zero_addr,
                  set_of_classes,
                  ss_confidence):

        self.__retina_size = retina_size
        self.__num_bits_addr = num_bits_addr
        self.__is_bleaching = bleaching
        self.__confidence_threshold = confidence_threshold
        self.__is_ignoring_zero_addr = ignore_zero_addr
        self.__set_of_classes = set_of_classes
        self.__ss_confidence = ss_confidence
        self.__num_of_used_Xun = 0

        self.__main_wisard =  WiSARD(retina_size = self.__retina_size,
                                     num_bits_addr = self.__num_bits_addr,
                                     bleaching = self.__is_bleaching,
                                     confidence_threshold = self.__confidence_threshold,
                                     ignore_zero_addr = self.__is_ignoring_zero_addr)
        self.__setup()
Example #2
0
class SemiSupervisedWiSARD(): # Input: labeled examples and unlabeled examples - Output: classifier
    def __init__ (self, 
                  retina_size, 
                  num_bits_addr, 
                  bleaching, 
                  confidence_threshold, 
                  ignore_zero_addr,
                  set_of_classes,
                  ss_confidence):

        self.__retina_size = retina_size
        self.__num_bits_addr = num_bits_addr
        self.__is_bleaching = bleaching
        self.__confidence_threshold = confidence_threshold
        self.__is_ignoring_zero_addr = ignore_zero_addr
        self.__set_of_classes = set_of_classes
        self.__ss_confidence = ss_confidence
        self.__num_of_used_Xun = 0

        self.__main_wisard =  WiSARD(retina_size = self.__retina_size,
                                     num_bits_addr = self.__num_bits_addr,
                                     bleaching = self.__is_bleaching,
                                     confidence_threshold = self.__confidence_threshold,
                                     ignore_zero_addr = self.__is_ignoring_zero_addr)
        self.__setup()
        

    def fit(self, X = [], y = [], unlabeled_X = []):

        if(type(X) is not list):
            raise Exception("Type of X must be a list of examples")
        if(type(unlabeled_X) is not list):
            raise Exception("Type of unlabeled_X must be a list of examples")
        if(len(X) != len(y)):
            raise Exception("Number of Examples must match number of labels")
        if(len(X[0]) != self.__retina_size):
            raise Exception("Size of example must have the same size as retina:\n"+
                                "Size of example = %d, Size of Retina = %d"%(len(unlabeled_X[0]), self.__retina_size))
        if(unlabeled_X != []):
            if(len(unlabeled_X[0]) != self.__retina_size):
                raise Exception("Size of example must have the same size as retina:\n"+
                                "Size of example = %d, Size of Retina = %d"%(len(unlabeled_X[0]), self.__retina_size))
        #here the fit begins
        if(X != []):
            for position in xrange(len(X)):
                class_name = y[position]
                example = X[position]
                self.__main_wisard.add_training(class_name, example)

        if(unlabeled_X != []):
            for position in xrange(len(unlabeled_X)): #here comes the unsupervised part. Learning with unlabeled data.
                possible_classes = self.__main_wisard.classify(unlabeled_X[position]) #result of the classifying #returns a dictionary
                values = possible_classes.values() #list of results
                index = possible_classes.keys() #list of classes
                list_possible_classes = []

                for i in xrange(len(values)): #pairing results and classes
                    list_possible_classes.append([index[i], values[i]])
                list_possible_classes = sorted(list_possible_classes, key=lambda a_entry: a_entry[1]) #sorting list by the values
                if(list_possible_classes[-1][1] != 0): #best result must be diferent from zero
                    ss_confidence = 1 - float(list_possible_classes[-2][1])/float(list_possible_classes[-1][1]) #calculating confidence
                else:
                    ss_confidence = 0.0 #if best result == 0, confidence is zero
                if(ss_confidence >= self.__ss_confidence): #if I have confidence in the result, i'll train this example to the selected class
                    class_name = list_possible_classes[-1][0]
                    self.__main_wisard.add_training(class_name, unlabeled_X[position])
                    self.__num_of_used_Xun += 1         

    def __setup(self):
        for class_name in self.__set_of_classes:
            self.__main_wisard.create_discriminator(class_name)

    def predict(self, testing_corpus):
        result = []
        for position in xrange(len(testing_corpus)):
            result.append(self.__main_wisard.classify(testing_corpus[position]))
        return result

    def get_status(self):
      return self.__num_of_used_Xun
Example #3
0
print("Finished reading and preprocessing data.")
# retinaLength = numberRowsTrainImages*numberColumnsTrainImages (in the updated PyWANN, passing this value as an argument to the WiSARD function isn't needed anymore)

probFile = open("predict_proba.txt",
                'w')  # file for the probabilities of the wrong classifications
difFile = open(
    "difference_proba.txt", 'w'
)  # file for the diference between the probability of the guessed class and the expected one

numberBits = 32
# bleaching = False
correct = 0
numberClasses = 10

wisard = WiSARD.WiSARD(numberBits)
# wisard = WiSARD.WiSARD(numberBits, bleaching)

print("Started training.")
wisard.fit(trainImages, trainLabels)
print("Finished training.")

print("Started classifying.")
result = wisard.predict(testImages)
print("Finished classifying.")

# gets the probabilitles for each class
prob = wisard.predict_proba(testImages)

# to count the frequency of wrong classifications in each expected class
wrongLabelsFreq = [0] * numberClasses
Example #4
0
mndata = MNIST('.')
training_images, training_labels = mndata.load_training()
testing_images, testing_labels = mndata.load_testing()

actv_threshold = 128
tTraining_images = transform(training_images, actv_threshold)
tTesting_images = transform(testing_images, actv_threshold)

print("images loaded!")

bleaching = True
b_bleaching = 1

start_time = time.time()
w = WiSARD.WiSARD(num_bits_addr,
                  bleaching,
                  seed=int(time.time()),
                  defaul_b_bleaching=b_bleaching)

print("beginning training...")

# training discriminators
w.fit(tTraining_images, training_labels)

print("training complete!")
print("beginning tests...")

# Predicting class
# Result will be a dictionary using the classes as key and the WiSARD result as values
result = np.array(w.predict(tTesting_images))
# Return how many items present in both lists are equal
correct_items = np.sum(np.array(testing_labels) == result)
Example #5
0
def wisard_training(register_list, sw):

    # Get training set
    global vectorizer

    # Import wisard from file if the name is set
    if configs.wisard_name:
        with open(configs.wisard_name, 'rb') as input:
            wisard = pickle.load(input)
        with open('vectorizer_' + configs.wisard_name, 'rb') as input:
            vectorizer = pickle.load(input)

        #print vectorizer.vocabulary
        return wisard

    train_set = []
    labels = []
    count_dict = {}

    # Set SKL vectorizer
    #vectorizer =  TfidfVectorizer(use_idf = True, lowercase=True, norm = 'l2', ngram_range=(1,1), min_df=0.0, max_df = 0.8)
    vectorizer = CountVectorizer(lowercase=True,
                                 ngram_range=(1, 1),
                                 min_df=0.0,
                                 max_df=0.8,
                                 binary=True)

    # Building training set
    for register in register_list:
        if register[configs.complaint_type_index] == '1' and register[
                configs.key_index] in configs.classificationList:
            report_classification = register[configs.key_index]
            report_description = register[configs.description_index]

            if (report_classification not in count_dict):
                count_dict[report_classification] = 0

            if (count_dict[report_classification] >= 10):
                continue

            count_dict[
                report_classification] = count_dict[report_classification] + 1
            labels.append(report_classification)
            train_set.append(register[configs.description_index])

    print "Vectorizing sets..."

    V = vectorizer.fit_transform(train_set).toarray()

    print "Training Wisard"

    labels = np.array(labels)  # labels
    wisard = WiSARD.WiSARD(
        3, True,
        ignore_zero_addr=True)  #,True, 3) # Atribui a classe Wisard para w
    v_1 = np.array(V)  # conjunto de treinamento
    wisard.fit(v_1,
               labels)  # Associacao dos labels aos conjuntos de treinamento

    # Saving wisard to file
    file_name = 'wisard' + datetime.datetime.now().strftime(
        "%Y-%m-%d-%H-%M-%S") + '.pkl'

    with open(file_name, 'wb') as output:
        pickle.dump(wisard, output, pickle.HIGHEST_PROTOCOL)

    with open('vectorizer_' + file_name, 'wb') as output:
        pickle.dump(vectorizer, output, pickle.HIGHEST_PROTOCOL)

    return wisard