def readdata(sourcex_matrix=None, sourcey_matrix=None,targetx_matrix=None, targety_matrix=None,src_path='datasets/syndata_002_normalized_no_novel_class_source_stream.csv',
                   tgt_path='datasets/syndata_002_normalized_no_novel_class_target_stream.csv', src_size=None, tgt_size=None):
        """ 
        input is: source dataset with y, here we assume it is a list of list, the name is source, target dataset with yhat, 
        here we assume it is a list of list, the name is target 
        """
        if sourcex_matrix is None:
            sourcex_matrix_, sourcey_matrix = Classification.read_csv(src_path, None)   # matrix_ is source data
        else:
            sourcex_matrix_ = sourcex_matrix
            sourcey_matrix_ = sourcey_matrix
        matrix_ = sourcex_matrix_[:src_size, :]

        if targetx_matrix is None:
            targetx_ ,targety_= Classification.read_csv(tgt_path, size=None)
        else:
            targetx_ = targetx_matrix
            targety_ = targety_matrix
        labellist = []
        for i in range(0, len(targety_)):
            if targety_[i] not in labellist:
                labellist.append(targety_[i])
        sourcey_label = []
        for i in range(0, len(sourcey_matrix)):
            sourcey_label.append(labellist.index(sourcey_matrix[i]))

        for i in range(0, len(targety_)):
            if targety_[i] not in labellist:
                labellist.append(targety_[i])
        targety_label = []
        for i in range(0, len(targety_)):
            targety_label.append(labellist.index(targety_[i]))
        return sourcex_matrix_,sourcey_label, targetx_, targety_label
예제 #2
0
	def __init__(self, Lx, Ly, Ux, Uy, Tx, Ty, method = "svm", budget = 1000):
		self.Lx0 = Lx[:]
		self.Ly0 = Ly[:]
		
		self.Lx = Lx
		self.Ly = Ly
		self.Ux = Ux # TODO should not be here
		self.Uy = Uy # TODO should not be here
		self.Tx = Tx # TODO should not be here
		self.Ty = Ty # TODO should not be here
		
		self.th = 0.9
		self.queried = 0
		self.queries = []
		
		self.ths = []
		self.infos = []
		self.accuracys = []
		self.clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ); self.clf.train()
		
		self.sup_infos = [] # TODO should not be here
		self.sup_accuracys = [] # TODO should not be here
		self.sup_clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ); self.sup_clf.train() # TODO should not be here
		
		# self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "UCB", alpha = 1 )
		self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "reinforcement", alpha = 1 )
예제 #3
0
    def __init__(self, size, features):
        self.size = size
        self.features = features

        self.test_data, t = make_blobs(n_samples=self.size,
                                       n_features=self.features)
        self.test_tensor_data = np.random.random(
            (self.size, self.features, self.features))

        self.kmeans = Clustering(10)
        self.svm = Classification()
        self.gauss = MultivariateGauss()
        self.tensor = TensorDecomposition()
예제 #4
0
	def query_balanced_disag2(self, weighted = True, op=1):
		ids, _ = self.query_margin()
		scores = []
		scores_B = []
		
		commitee = []
		for idp, dp in enumerate(self.Ux):
			if idp in ids[:self.optimize*op]:
				# true_y = self.Uy[idp]
				true_y = self.clf.predict_label(dp)
				
				temp_clf = Classification(self.Lx + [dp], self.Ly + [true_y], method = self.clf.method)
				temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
				commitee.append( (temp_clf, 1) )
		
		for ix, x in enumerate(self.Ux):
			if ix in ids[:self.optimize*op]:
				preds = Counter()
				
				if weighted: # weight using proba distrib of commitee
					for (clf,_) in commitee:
						if self.clf.predict_label(x) != clf.predict_label(x):
							YP = zip( clf.h.classes_, clf.h.predict_proba( x )[0] )
							for (y,p) in YP: preds[y] += p
					
					preds = preds.most_common()
					diff = 0. if preds == [] else preds[0][1]
					# diff = 0. if preds == [] else ( preds[0][1] - preds[1][1] if len(preds)>1 else preds[0][1] )
					
				else:
					# confis = [ clf.getPredictProba(1,x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ]
					labels = [ clf.predict_label(x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ]
					preds = Counter(labels)
					preds = preds.most_common()
					diff = 0. if preds == [] else sum( [pred[1] for pred in preds] )
				
				balance = self.get_balance(x)
				informativeness = diff
				
			else:
				informativeness = 0.
				balance = 0
				
			scores.append( informativeness )
			scores_B.append( balance )
			
		# scores_B = Util.normalize(scores_B)
		scores = [scr*scores_B[iscr] for iscr,scr in enumerate(scores)]
		
		return self.sort_scores(scores)
def get_max_classification_occurrences(examples):
    classification = Classification()
    classifications = dict()

    for c in classification.get_values():
        classifications[c] = 0

    for example in examples:
        classifications[example.get_classification()] += 1

    v = list(classifications.values())

    k = list(classifications.keys())
    return k[v.index(max(v))]
예제 #6
0
	def get_balance(self, x):
		# y = self.Uy[ self.Ux.index(x) ]
		y = self.clf.predict_label(x)
		
		temp_clf = Classification(self.Lx + [x], self.Ly + [y], method = self.clf.method)
		temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
		
		cnt = Counter()
		for dp in self.Ux: cnt[ temp_clf.predict_label(dp) ] += 1. / len(self.Ux)
		P = [ cnt[key] for key in cnt ]
		
		informativeness = -1.0 * sum( [ p * math.log(p, len(P)) for p in P if p > 0 ] )
		
		return informativeness
예제 #7
0
 def __init__(self, webiscorpus, train_samples=1000, noQueryTerms=15, use_ner=True, useTFIDF=True, use_noun=True, use_verb=True, use_adj=True, useHandwrittenAsGold=False, useContext=False):
     self.train_samples = train_samples
     self.model_name = 'clf-model-ner' + str(use_ner) + '-use-handwritten-as-gold' + str(useHandwrittenAsGold) + '-useContext' + str(useContext) + '-noun' + str(use_noun) + '-verb' + str(use_verb) + '-adj' + str(use_adj) + '-' + str(self.train_samples) + \
                       '-QueryTerms' + str(noQueryTerms) + '5context'
     self.train_samples = train_samples
     self.classification = Classification(use_ner=use_ner, train_samples=train_samples, use_noun=use_noun, use_verb=use_verb, use_adj=use_adj, useTFIDF=useTFIDF, useContext=useContext)
     self.silver_dict = Utils.load_from_pickle(
         # 'id-terms-in-common-no-stopwords-and-common-words-automatic-doc-lucene-dict.p')
         'queries-silver.p')
     self.noQueryTerms = noQueryTerms
     self.training_item_generator_func = webiscorpus.corpus_gen_non_white_listed
     if useHandwrittenAsGold:
         self.training_item_generator_func = webiscorpus.corpus_gen_white_listed
         self.silver_dict = Utils.load_from_pickle('queries-handwritten.p')
     super(ClassifierExperminet, self).__init__(self.model_name, webiscorpus, mini_index=False)
예제 #8
0
 def __init__(self):
     cl = Classification()
     self.x_train_std = cl.x_train_std
     self.x_test_std = cl.x_test_std
     self.y_train = cl.y_train
     self.y_test = cl.y_test
     pass
def get_entropy(examples):
    classifications = list()
    classification_occurrences = list()
    classif = Classification()
    entropy = 0
    if len(examples) != 0:
        for example in examples:
            classifications.append(example.get_classification())
        for classification in classif.get_values():
            classification_occurrences.append(classifications.count(classification))
        # print(str(classification_occurrences))
        for class_occur in classification_occurrences:
            probability = class_occur / len(examples)
            if probability != 0:
                entropy = entropy + (probability * math.log2(probability))
    return -entropy
예제 #10
0
    def OpenFile(self):

        self.CurrentFile = filedialog.askopenfilename(
            initialdir=r'C:\Users\user\Desktop\CV',
            title="Select file",
            filetypes=(("jpeg files", "*.jpg"), ("all files", "*.*")))
        if self.CurrentFile:
            x = os.path.basename(self.CurrentFile)
            self.main.after(5, self.Filelabel.config(text='檔案 :' + x))
            self.ic = ImgClass(x)
            im = Image.open(x)
            if (im.size[0] > 600):
                im = im.resize((600, im.size[1]))
            if (im.size[1] > 450):
                im = im.resize((im.size[0], 450))

            self.sizeX = im.size[0]
            self.sizeY = im.size[1]
            self.CanvaLabel.configure(text=str(self.sizeX) + 'X' +
                                      str(self.sizeY))
            img = ImageTk.PhotoImage(image=im)
            self.ImgCanva.create_image(0, 0, image=img, anchor=NW)
            self.ImgCanva.image = img

            self.classfy = Classification(self.ic, self)
            self.program = Mainsys(self.ic, self)
예제 #11
0
def main(MalDir, BenDir, FeatureCombination='111'):
    '''

    :param String MalDir:
    :param String BenDir:
    :param String FeatureCombination:
    '''

    # 1 - Initializing
    createdir('Metadata' + os.path.sep + 'Malware')
    createdir('Metadata' + os.path.sep + 'Benign')
    createdir('Data' + os.path.sep + 'Malware')
    createdir('Data' + os.path.sep + 'Benign')

    # 2 - Extracting flow features from apps
    pool = mp.Pool(NumFlowProcesses)
    for file in walk(MalDir, '.apk'):
        pool.apply_async(ExtractFlowFeatures,
                         args=(file, 'Malware', FeatureCombination))
    for file in walk(BenDir, '.apk'):
        pool.apply_async(ExtractFlowFeatures,
                         args=(file, 'Benign', FeatureCombination))
    pool.close()
    pool.join()

    # 3 - Classifying
    Classification('Data' + os.path.sep + 'Malware',
                   'Data' + os.path.sep + 'Benign', 0.3)

    # 4. Remove feature files after classification
    for file in walk('Data', '.Features'):
        os.system('rm -rf ' + file)
def run():
    logging.getLogger().setLevel(logging.WARNING)
    d = Dataset()
    #d.use_images_in_folder("/home/simon/Datasets/ImageNet_Natural/images/")
    #d.use_images_in_folder("/home/simon/Datasets/ICAO_german/")
    d.use_images_in_folder("/home/simon/Datasets/desko_ids/images_unique/")
    #d.use_images_in_folder("/home/simon/Datasets/croatianFishDataset-final/")
    #d.use_images_in_folder("/home/jaeger/data/croatianFishDataset1-5Dir/")
    d.create_labels_from_path()
    d.fill_split_assignments(1)

    #d.read_from_file("/home/simon/Datasets/CUB_200_2011/cropped_scaled_alex.txt","imagepaths","string")
    #d.read_from_file("/home/simon/Datasets/CUB_200_2011/tr_ID.txt","split_assignments","int")
    #d.read_from_file("/home/simon/Datasets/CUB_200_2011/labels.txt","labels","int")

    c = Classification()
    c.add_algorithm(Resize(512, 320))
    # #c.add_algorithm(Noise('saltpepper',0.1))
    p = ParallelAlgorithm()
    #
    p1 = AlgorithmPipeline()
    p1.add_algorithm(HOG())
    p1.add_algorithm(SpatialPyramid())
    # #p1.add_algorithm(MinMaxNormalize())
    p1.add_algorithm(NormNormalize())
    p.add_pipeline(p1)

    p2 = AlgorithmPipeline()
    p2.add_algorithm(Resize(64, 32))
    p2.add_algorithm(Colorname())
    p2.add_algorithm(SpatialPyramid())
    p2.add_algorithm(NormNormalize())
    # #p2.add_algorithm(MinMaxNormalize())
    p.add_pipeline(p2)

    c.add_algorithm(p)
    # #c.add_algorithm(MinMaxNormalize())
    #c.add_algorithm(NormNormalize())
    # c.add_algorithm(MeanCalculator())
    #c.add_algorithm(Resize(32,24))
    c.add_algorithm(MulticlassSVM())
    # #c.train(d)
    # #for path, gt_label in zip(d.imagepaths, d.labels):
    # #    logging.info("Predicted class for " + path + " is " + str(c.predict(path).data[0]) + " (GT: " + str(gt_label) + ")")

    ## Caffe features
    #c.add_algorithm(Caffe("","","fc7"))
    #c.add_algorithm(MulticlassSVM())

    #with open('run_evaluation.py', 'r') as fin:
    #    print(fin.read())

    mean_acc, mean_mAP = Evaluation.random_split_eval(
        d, c, absolute_train_per_class=1, runs=1)
    #mean_acc,mean_mAP = Evaluation.fixed_split_eval(d,c)
    logging.warning("Total accuracy is " + str(mean_acc))
    logging.warning("Total mAP is " + str(mean_mAP))
예제 #13
0
def test_Classification_dtype():
    """
    Test that the initialization of a Classification class throws a type error for 
    things that are not pandas dataframes
    """
    some = "A wrong data type of type string"
    with pytest.raises(TypeError):
        Classification(some)
예제 #14
0
class ClassifierExperminet(Experiment):

    def __init__(self, webiscorpus, train_samples=1000, noQueryTerms=15, use_ner=True, useTFIDF=True, use_noun=True, use_verb=True, use_adj=True, useHandwrittenAsGold=False, useContext=False):
        self.train_samples = train_samples
        self.model_name = 'clf-model-ner' + str(use_ner) + '-use-handwritten-as-gold' + str(useHandwrittenAsGold) + '-useContext' + str(useContext) + '-noun' + str(use_noun) + '-verb' + str(use_verb) + '-adj' + str(use_adj) + '-' + str(self.train_samples) + \
                          '-QueryTerms' + str(noQueryTerms) + '5context'
        self.train_samples = train_samples
        self.classification = Classification(use_ner=use_ner, train_samples=train_samples, use_noun=use_noun, use_verb=use_verb, use_adj=use_adj, useTFIDF=useTFIDF, useContext=useContext)
        self.silver_dict = Utils.load_from_pickle(
            # 'id-terms-in-common-no-stopwords-and-common-words-automatic-doc-lucene-dict.p')
            'queries-silver.p')
        self.noQueryTerms = noQueryTerms
        self.training_item_generator_func = webiscorpus.corpus_gen_non_white_listed
        if useHandwrittenAsGold:
            self.training_item_generator_func = webiscorpus.corpus_gen_white_listed
            self.silver_dict = Utils.load_from_pickle('queries-handwritten.p')
        super(ClassifierExperminet, self).__init__(self.model_name, webiscorpus, mini_index=False)

    @timing_decorator
    def train_model(self):
        """training classifier"""
        for i, item in enumerate(self.training_item_generator_func()):
            # if i > 0 and i % 100 == 0:
            #     print("accuracy at {} is {}".format(i, self.classification.getAccuracy()))
            if i > self.train_samples:
                break
            self.classification.process_query(item['Subject'], item['Content'], self.silver_dict[item['Id']])
        self.classification.train()
        self.classification.save_model(self.model_name)

    def get_query_per_item(self, item):
        """building dictionaries for item"""
        full_terms = self.classification.process_query(item['Subject'], item['Content'], self.silver_dict[item['Id']])
        result = self.classification.predict().tolist()
        query_terms = []
        while len(query_terms) < self.noQueryTerms and len(result):
            picked_word_index = result.index(max(result))
            picked_word = full_terms[picked_word_index]
            result.pop(picked_word_index)
            full_terms.pop(picked_word_index)
            if picked_word not in query_terms:
                query_terms.append(picked_word)
        query_terms = ' '.join(set([term for term in query_terms if term not in anserini.stopwords_temp]))
        # print('classifier:', query_terms)
        # print('silver:', self.silver_dict[item['Id']])
        return query_terms

    def run(self):
        # try:
        #     self.classification.load_model(self.model_name)
        # except:
        self.train_model()
        self.build_queries()
        self.search_queries()
        _, mrr, _, _ = calculate_mrr(self.result_pickle_name, self.white_list)
        self.mrr = float(mrr)
예제 #15
0
def main(Maldir, Gooddir, NumofProcesses, FeatureCombination='1111'):
    '''

    :param String Maldir:
    :param String Gooddir:
    :param String NumofProcesses:
    :param String FeatureOption: Combination options of CAPI, IA, PAPI and FLOW, 4 chars of '0'/'1'(ex/include)
                                 Default option is '1111'
    '''

    # 1. get features of API, IA, PAPI using Apktool.
    createdir('Metadata' + os.path.sep + 'Malware')
    createdir('Metadata' + os.path.sep + 'Benign')
    createdir('Data' + os.path.sep + 'Malware')
    createdir('Data' + os.path.sep + 'Benign')

    pool = mp.Pool(int(NumofProcesses))
    for file in walk(Maldir, '.apk'):
        pool.apply_async(FeatureExtraction,
                         args=(file, 'Malware', FeatureCombination))
    for file in walk(Gooddir, '.apk'):
        pool.apply_async(FeatureExtraction,
                         args=(file, 'Benign', FeatureCombination))
    pool.close()
    pool.join()

    # 2. get features of FLOW, using FlowDroid
    if FeatureCombination[1] == '1':
        pool = mp.Pool(int(NumFlowProcesses))
        for file in walk(Maldir, '.apk'):
            pool.apply_async(ExtractFlowFeatures, args=(file, 'Malware'))
        for file in walk(Gooddir, '.apk'):
            pool.apply_async(ExtractFlowFeatures, args=(file, 'Benign'))
        pool.close()
        pool.join()

        for file in walk('Data', '.Features'):
            with open(file, 'a') as f_a:
                if os.path.exists(file.replace('.Features',
                                               'FlowFeature.json')):
                    with open(file.replace('.Features', 'FlowFeature.json'),
                              'r') as f:
                        FlowdroidLog = f.readlines()
                        for line in FlowdroidLog:
                            print >> f_a, line
                        f.close()
                    f_a.close()
                else:
                    os.system('rm -rf ' + file)

    # 3. classification
    Classification('Data' + os.path.sep + 'Malware',
                   'Data' + os.path.sep + 'Benign', 0.3)

    # 4. remove feature files
    for file in walk('Data', '.Features'):
        os.system('rm -rf ' + file)
예제 #16
0
 def start_an(self, data):
     cls = Classification(data, ty='datacleaning')
     worker_cl = WorkerLong(cls.classify, 'datacleaning', int(self.monte),
                            self.new_path_model)
     worker_cl.signals.result.connect(self.print_output)
     worker_cl.signals.progress.connect(self.progress_fn)
     worker_cl.signals.progress.connect(self.prog_monte.setValue)
     worker_cl.signals.finished.connect(self.thread_complete)
     self.threadPool.start(worker_cl)
예제 #17
0
 def Url2ic(self):
     if self.url_src.get():
         url = self.url_src.get()
         cap = cv2.VideoCapture(url)
         if (cap.isOpened()):
             ret, img = cap.read()
             if ret:
                 self.ic = ImgClass('')
                 self.ic.FromUrlSrc(img)
                 self.classfy = Classification(self.ic, self)
                 self.program = Mainsys(self.ic, self)
                 ip.CvShow('N', img)
예제 #18
0
def start_from_terminal(app):
    """
    Parse command line options and start the server.
    """
    parser = optparse.OptionParser()
    parser.add_option(
        '-d', '--debug',
        help="enable debug mode",
        action="store_true", default=False)
    parser.add_option(
        '-p', '--port',
        help="which port to serve content on",
        type='int', default=5000)
    parser.add_option(
        '-g', '--gpu',
        help="use gpu mode",
        action='store_true', default=False)

    opts, args = parser.parse_args()
    app.clf = Classification()
    app.face = FaceVerification()
    app.od = ObjectDetection()
    app.caption = ImgCaption()



    # cv2.imshow("luke",img)
    # cv2.waitKey(0)
    # img1 = "luke1.jpg"
    # img2 = "luke2.jpg"
    # Same,drawImg1,drawImg2=app.face.verification(img1,img2)
    # if Same:
    #     print "same"
    # else:
    #     print "different"
    # drawImg = np.concatenate((drawImg1,drawImg2),axis=1)
    # cv2.imshow("face",drawImg)
    # cv2.waitKey(3000)
    # Initialize classifier + warm start by forward for allocation
    # with Manager() as manager:
    #     ret = manager.dict()
    #     p = Process(target=app.clf.classify_image,args=("test.jpg","jpg",ret))
    #     p.start()
    #     p.join()
    #     print ret
    #warm up
    # for i in range(2):
    #     app.clf.classify_image("test.jpg",'jpg')

    if opts.debug:
        app.run(debug=True, host='0.0.0.0', port=opts.port)
    else:
        start_tornado(app, opts.port)
예제 #19
0
	def query_eer(self, limit_Y = 20):
		ids, _ = self.query_margin()
		scores = []
		
		for ix, x in enumerate(self.Ux):
			if ix in ids[:self.optimize]:
				YP = self.clf.predict(x, all = True)
				YP.sort(key=operator.itemgetter(1), reverse=True)
				sums = 0.
				for ir, (yy, proba) in enumerate(YP):
					if ir == limit_Y: break
					temp_clf = Classification(self.Lx + [x], self.Ly + [yy], method = self.clf.method); temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C # TODO FIXME: do it in general not specifically for svm
					temp_clf.train()
					e_h1 = sum( [ temp_clf.uncertainty_entropy(dp) for dp in self.Ux if dp != x ] )
					sums += (proba) * e_h1
					
				informativeness = 1. / sums
			else: informativeness = 0.
			scores.append( informativeness )
			
		return self.sort_scores(scores)
예제 #20
0
	def query_balanced_disag1(self, weighted = True, op=1):
		ids, _ = self.query_margin()
		scores = []
		scores_B = []
		
		for ix, x in enumerate(self.Ux):
			if ix in ids[:self.optimize*op]:
				# true_y = self.Uy[ix]
				true_y = self.clf.predict_label(x)
				
				temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method)
				temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
				
				if not weighted:
					diff = sum([ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ])
				else:
					diff = sum([ abs(temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ])
					# diff = sum([ Util.dist(temp_clf.h.predict_proba(dp)[0], self.clf.h.predict_proba(dp)[0]) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ])
				
				balance = self.get_balance(x)
				informativeness = diff
			else:
				informativeness = 0.
				balance = 0.
				
			scores.append( informativeness )
			scores_B.append( balance )
			
		# scores_B = Util.normalize(scores_B)
		scores = [scr*scores_B[iscr] for iscr,scr in enumerate(scores)]
		
		return self.sort_scores(scores)
예제 #21
0
	def get_change(self, x, y = None):
		if y is None:
			y = self.Uy[ self.Ux.index(x) ]
			# y = self.clf.predict_label(x)
		
		temp_clf = Classification(self.Lx + [x], self.Ly + [y], method = self.clf.method)
		temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
		
		v1 = [ self.clf.getPredictProba(1, dp) for dp in self.Ux if x != dp ]
		v2 = [ temp_clf.getPredictProba(1, dp) for dp in self.Ux if x != dp ]
		
		# informativeness = Util.dist(v1, v2)
		informativeness = math.acos( cosine_similarity(v1, v2) ) / math.pi
		
		# v1 = []; v2 = []
		# for dp in self.Ux:
			# if x != dp:
				# v1 += [ v for v in self.clf.h.predict_proba( dp )[0] ]
				# v2 += [ v for v in temp_clf.h.predict_proba( dp )[0] ]
		# informativeness = distance.cosine(v1, v2)
		
		return informativeness
예제 #22
0
    def __init__(self,
                 classifier_pattern=config.CLASSIFIER_PATTERN,
                 img_content=''):
        if classifier_pattern == 'static':
            self.classifier = Classification(config.IMG_PATH)
        else:
            self.classifier = Classification(config.IMG_PATH,
                                             img_content=img_content)

        self.search_engine = Search()

        self.camera = Camera()

        self.speaker = None

        self.led_device = None

        self.rotate = None

        self.classify_result = None

        self.garbage_tag = None

        self.try_time = 0
예제 #23
0
파일: Main.py 프로젝트: asaadam/TextMining
    def main():
        array_data = {}
        path_data_uji = './Data/Data latih'
        array_kelas = []
        for root, dirs, files in os.walk(path_data_uji, topdown=False):
            for name in files:
                if name.endswith('.txt'):
                    get_root = root.split('/')
                    if (len(get_root) == 4):
                        array_kelas.append(get_root[3])
                        key = root.replace("./", "")
                        path_file = os.path.join(key, name)
                        data_docs = open(path_file, 'r', encoding="ISO-8859-1").read()
                        if key in array_data:
                            temp = array_data[key]
                            temp.append(data_docs)
                            array_data[key] = temp
                        else:
                            array_data[key] = [data_docs]
        array_document = [data for value in array_data.values() for data in value]

        path_data_uji = './Data/Data uji'
        print(len(array_document))
        array_data_testing = {}
        array_kelas_testing = []
        for root, dirs, files in os.walk(path_data_uji, topdown=False):
            for name in files:
                if name.endswith('.txt'):
                    get_root = root.split('/')
                    if (len(get_root) == 4):
                        array_kelas_testing.append(get_root[3])
                        key = root.replace("./", "")
                        path_file = os.path.join(key, name)
                        data_docs = open(path_file, 'r', encoding="ISO-8859-1").read()
                        if key in array_data_testing:
                            temp = array_data_testing[key]
                            temp.append(data_docs)
                            array_data_testing[key] = temp
                        else:
                            array_data_testing[key] = [data_docs]
        array_document_testing = [data for value in array_data_testing.values() for data in value]

        document =['Sekarang saya sedang suka memasak. Masakan kesukaan saya sekarang adalah nasi goreng. Cara memasak nasi goreng adalah nasi digoreng','Ukuran nasi sangatlah kecil, namun saya selalu makan nasi','Nasi berasal dari beras yang ditanam di sawah. Sawah berukuran kecil hanya bisa ditanami sedikit beras','Mobil dan bus dapat mengangkut banyak penumpang. Namun, bus berukuran jauh lebih besar dari mobil, apalagi mobil-mobilan','Bus pada umumnya berukuran besar dan berpenumpang banyak, sehingga bus tidak bisa melewati pemukiman','mobil formula satu melaju kencang di dalam balapan, max verstapen memenangkan gp brazil kemarin, namun sayang ke 2 kuda merah terpaksa gagal finish karena bertberakan satus sama lain','piere gasly memenangkan ajang perlombaan balap yang digelar di brazil kemarin.Mobil yang dikendarainya melaju kencang, namun sayang ke 2 ferari gagal finish karena bertaberakan satu sama lain.mobil melaju cepat, licah gesti dan tak terkalahkan','terjadi kelangkaan beras di dalam Indonesia sehingga harus mengimpor beras dari thailand.Padahal lahan sawah di Indonesia banyak',' bus buatan scania sukses dipasar menjadi bus terbesar di jagat raya.Bus ini dapat dinaiki oleh banyak penumpan.Dan lebih besar dari pada mobil']
        kelas =['A', 'A', 'C', 'B', 'B','D','D','C','B']
        klasifikasi= Classification()
        document_uji = ['nasi goreng pedas','nasi goreng enak sekali','mobil gasly memang sudah cepat','bus ini diluncurkan di Indonesia']
        print(array_document_testing)
        kelas_uji=['A','A','D','C']
        klasifikasi.train(array_document, array_kelas)
        hasil = klasifikasi.testing(array_document_testing)
        klasifikasi.hitung_akurasi(hasil,array_kelas_testing)
예제 #24
0
class Execute:
    def __init__(self):
        self.classification = Classification()
        self.extraction = Extraction()
        self.elastic = Elastic()
        self.z = Database()

    def execute(self, query):
        try:
            json_data = json.loads(query)
            prediction = self.classification.predict(json_data['query'])
            results = self.extraction.processQuery(prediction[0][0],
                                                   json_data['query'])
            if results[0] == 0.0:
                status = self.elastic.process0(results)
                return status
            if results[0] == 1.0:
                houses = self.elastic.process1(results)
                return houses
            elif results[0] == 2.0:
                houses = self.elastic.process2(results)
                return houses
            elif results[0] == 3.0:
                houses = self.elastic.process3(results)
                return houses
            elif results[0] == 4.0:
                houses = self.elastic.process4(results)
                return houses
            elif results[0] == 5.0:
                houses = self.elastic.process5(results)
                return houses
            elif results[0] == 6.0:
                houses = self.elastic.process6(results)
                return houses
            elif results[0] == 7.0:
                houses = self.elastic.process7(results)
                return houses
            elif results[0] == 8.0:
                status = self.elastic.process8(results)
                return status
            elif results[0] == 9.0:
                status = self.elastic.process9(results)
                return status
            else:
                return "query type " + str(results[0]) + "not supported"
        except Exception as ex:
            print(str(ex))
예제 #25
0
	def __init__(self, Lx, Ly, Ux, Uy, Tx, Ty, method = "svm", budget = 1000):
		self.Lx = Lx
		self.Ly = Ly
		self.Ux = Ux
		self.Uy = Uy # TODO should not be here
		self.Tx = Tx # TODO should not be here
		self.Ty = Ty # TODO should not be here
		
		self.optimization_limit = 20
		self.optimization_method = "margin" # margin proba entropy random weight expectedErrorReduction etc
		
		self.budget = budget
		self.accuracys = []
		
		self.clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy )
		self.clf.train()
		
		self.viz_A = []; self.viz_B = []; self.viz_C = []; self.viz_D = []; self.viz_E = []; self.viz_F = []
예제 #26
0
	def get_disag1(self, x, weighted = False):
		true_y = self.Uy[ self.Ux.index(x) ]
		# true_y = self.clf.predict_label(x)
		
		temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method)
		temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
		
		if not weighted:
			diff = sum([ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ])
		else:
			diff = sum([ 1.-abs(temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ])
		
		informativeness = diff
		return informativeness
예제 #27
0
class CPUImpl:
    def __init__(self, size, features):
        self.size = size
        self.features = features

        self.test_data, t = make_blobs(n_samples=self.size,
                                       n_features=self.features)
        self.test_tensor_data = np.random.random(
            (self.size, self.features, self.features))

        self.kmeans = Clustering(10)
        self.svm = Classification()
        self.gauss = MultivariateGauss()
        self.tensor = TensorDecomposition()

    def evaluate(self):
        print("Data set: %s samples" % self.size)
        print("Features: %s" % self.features)
        print("======")
        print("KMeans: %s s" % self.kmeans.evaluate(self.test_data))
        print("OneClassSVM: %s s" % self.svm.evaluate(self.test_data, "svm"))
        print("Gauss: %s s" % self.gauss.evaluate(self.test_data))
        print("Parafac: %s s" % self.tensor.evaluate(self.test_tensor_data))
예제 #28
0
    def start_an(self):
        if not os.path.exists(self.res_path):
            cls = Classification(self.path_work, ty='analysis')
            self.progress(title='Analysis')
            worker_cl = WorkerLong(cls.classify, self.type_an, self.monte_c, self.model_name)
            worker_cl.signals.progress.connect(self.progress_fn)
            worker_cl.signals.progress.connect(self.ui.onCountChanged)
            worker_cl.signals.finished.connect(self.thread_cl_complete)
            self.threadPool.start(worker_cl)
        else:
            self.view('Pred_class', 'result')
            print('process an already done!!!')

        self.v_no_overlayAct.setEnabled(True)
        self.v_all_classAct.setEnabled(True)

        self.v_acAct.setEnabled(True)
        self.v_adAct.setEnabled(True)
        self.v_hAct.setEnabled(True)

        self.v_tot_uAct.setEnabled(True)
        self.v_a_uAct.setEnabled(True)
        self.v_e_uAct.setEnabled(True)
예제 #29
0
    def generateNewModelRULSIF(self, trgx_matrix, srcx_matrix, srcy_matrix,
                               alpha, sigma_list, lambda_list, b, fold,
                               subsize):
        model = Model()

        if len(srcx_matrix) == 0 or len(trgx_matrix) == 0:
            raise Exception(
                'Source or Target stream should have some elements')

        # Create new model
        print('Target model creation')
        model.model = Classification.get_model(trgx_matrix, srcx_matrix,
                                               srcy_matrix, alpha, sigma_list,
                                               lambda_list, b, fold, subsize)

        # compute source and target weight
        print('Computing model weights')
        model.weight = model.computeModelWeightRULSIF(trgx_matrix)

        # update ensemble
        index = self.__addModelRULSIF(model, trgx_matrix)
        if index != -1:
            print('Ensemble updated at ' + str(index))
예제 #30
0
	def __init__(self, Lx, Ly, Ux, Uy, Tx, Ty, method = "svm", budget = 251, optimize = 50, datasetname="dataset"):
		self.datasetname = datasetname
		
		self.Lx = Lx
		self.Ly = Ly
		self.Ux = Ux
		self.Uy = Uy # TODO should not be here
		self.Tx = Tx # TODO should not be here
		self.Ty = Ty # TODO should not be here
		
		self.optimize = optimize
		
		self.budget = budget
		self.accuracys = []
		
		self.clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy )
		self.clf.train()
		
		self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "boltzmann" )
		# self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "UCB" )
		
		# self.mab2 = Bandit( algos = ["disag1", "disag2"], method = "boltzmann" )
		self.mab2 = Bandit( algos = ["disag1", "disag2"], method = "reinforcement" )
예제 #31
0
    def start(self, cata, item_i):

        for i in range(1, 101, 1):
            self.progress['value'] = i
            self.update_idletasks()
            self.label1.config(text=str(i) + "%")
            time.sleep(0.015)
        self.progress['value'] = 100

        item_info = ""
        if cata == "plants":
            if item_i == "Apple":
                item_info = "icon_image/trree_ap.png@Detect Apple plant disease@models/apple_plant_Model.p@#E40000"
            elif item_i == "Corn":
                item_info = "icon_image/corn_plant2.png@Detect Corn plant disease@models/corn_plant_Model.p@#22A61E"
            elif item_i == "Grape":
                item_info = "icon_image/grape_plant.JPG@Detect Grape plant disease@models/grape_plant_Model.p@#678623"
            elif item_i == "Cherry":
                item_info = "icon_image/cherry_plant.jpg@Detect Cherry Plant disease@models/cherry_plant_Model.p@#265909"
            elif item_i == "Pepper":
                item_info = "icon_image/peeper_plant.jpg@Detect Pepper plant disease@models/pepper_plant_Model.p@#b70000"
            elif item_i == "Potato":
                item_info = "icon_image/potato_plant1.png@Detect Potato plant disease@models/potato_plant_Model.p@#a67d00"
            elif item_i == "Tomato":
                item_info = "icon_image/tomato_plant.jpg@Detect Tomato plant disease@models/tomato_plant_Model.p@#620D0D"

        elif cata == 'fruits':
            if item_i == "Apple":
                item_info = "icon_image/apple.jpg@Detect Apple Fruit disease@models/apple_fruit_Model.p@#E40000"
            elif item_i == "Banana":
                item_info = "icon_image/banana.jpg@Detect Banana Fruit disease@models/banana_fruit_Model.p@#ffef00"
            elif item_i == 'Orange':
                item_info = "icon_image/orange.jpg@Detect Orange Fruit disease@models/orange_fruit_Model.p@#f76a04"

        classification_page = Classification(item_info)

        self.destroy()
예제 #32
0
	def query_disagreement_test(self):
		ids, _ = self.query_margin()
		scores = []
		plots_Y = []; plots_X0 = []; plots_X1 = []; plots_X2 = []; plots_X3 = []; plots_X4 = []; plots_X5 = []; plots_X6 = []; viz = Visualize()
		
		commitee = []
		for idp, dp in enumerate(self.Ux):
			if idp in ids[:self.optimize]:
				true_y = self.Uy[idp]
				# true_y = self.clf.predict_label(dp)
				
				temp_clf = Classification(self.Lx + [dp], self.Ly + [true_y], method = self.clf.method)
				temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
				commitee.append( (temp_clf, 1) )
				
		# ===========================
		# sampled = random.sample(ids, 100)
		
		for ix, x in enumerate(self.Ux):
			# if ix in sampled:
			if ix in ids[:self.optimize*9999999]:
				informativeness1 = self.get_disag1(x, weighted = False)
				informativeness2 = self.get_disag2(x, commitee, weighted = False)
				informativeness3 = self.get_disag1(x, weighted = True)
				informativeness4 = self.get_disag2(x, commitee, weighted = True)
				informativeness5 = self.clf.uncertainty_prediction(x)
				informativeness6 = self.get_balance(x)
				
				temp_clf = Classification(self.Lx + [x], self.Ly + [self.Uy[ix]], method = self.clf.method)
				temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
				acc = temp_clf.getTestAccuracy( self.Tx, self.Ty )
				
				plots_X0.append( acc )
				plots_X1.append( informativeness1 )
				plots_X2.append( informativeness2 )
				plots_X3.append( informativeness3 )
				plots_X4.append( informativeness4 )
				plots_X5.append( informativeness5 )
				plots_X6.append( informativeness6 )
				plots_Y.append( 'r' if self.Uy[ix] != self.clf.predict_label(x) else 'b' )
				
				fig, axs = plt.subplots( 1, 1, sharex=True )
				axs.scatter( plots_X1, plots_X2, c = plots_Y, marker = "o", cmap = plt.copper() )
				plt.savefig(str(len(self.Lx)) + self.datasetname+'.1-2.png'); plt.close()
				
				fig, axs = plt.subplots( 1, 1, sharex=True )
				axs.scatter( plots_X3, plots_X4, c = plots_Y, marker = "o", cmap = plt.copper() )
				plt.savefig(str(len(self.Lx)) + self.datasetname+'.3-4.png'); plt.close()
				
				fig, axs = plt.subplots( 1, 1, sharex=True )
				axs.scatter( plots_X1, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				plt.savefig(str(len(self.Lx)) + self.datasetname+'.1-acc.png'); plt.close()
				
				fig, axs = plt.subplots( 1, 1, sharex=True )
				axs.scatter( plots_X2, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				plt.savefig(str(len(self.Lx)) + self.datasetname+'.2-acc.png'); plt.close()
				
				fig, axs = plt.subplots( 1, 1, sharex=True )
				axs.scatter( plots_X3, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				plt.savefig(str(len(self.Lx)) + self.datasetname+'.3-acc.png'); plt.close()
				
				fig, axs = plt.subplots( 1, 1, sharex=True )
				axs.scatter( plots_X4, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				plt.savefig(str(len(self.Lx)) + self.datasetname+'.4-acc.png'); plt.close()
				
				fig, axs = plt.subplots( 1, 1, sharex=True )
				axs.scatter( plots_X5, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				plt.savefig(str(len(self.Lx)) + self.datasetname+'.5-acc.png'); plt.close()
				
				fig, axs = plt.subplots( 1, 1, sharex=True )
				axs.scatter( plots_X6, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				plt.savefig(str(len(self.Lx)) + self.datasetname+'.6-acc.png'); plt.close()
				
				# plots = [ plots_X1, plots_X2, plots_X3, plots_X4, plots_X5, plots_X6 ]
				# fig, axs = plt.subplots( 5, 1, sharex=True )
				# axs[0].scatter( Util.normalize(plots_X1), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				# axs[1].scatter( Util.normalize(plots_X2), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				# axs[2].scatter( Util.normalize(plots_X3), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				# axs[3].scatter( Util.normalize(plots_X4), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				# axs[4].scatter( Util.normalize(plots_X5), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				# axs[5].scatter( Util.normalize(plots_X6), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				# plt.savefig(str(len(self.Lx)) + self.datasetname+'.png')
				# plt.close()

				informativeness = acc
			else: informativeness = 0.
			
			scores.append( informativeness )
		
		return self.sort_scores(scores)
예제 #33
0
def main():

    # Global control parameters, used for debugging, documentation etc...
    showAndSaveImagesFlag = False  # However the classified featureplot and final classification is still showed...
    normalization = True # Showing normalization data
    vizualize = True
    saveImagePath = "/home/christian/workspace_python/MasterThesis/FinalProject/writefiles/"

    # Initialize the Input component with cameraIndex = 0 (webcamera inbuilt in PC)
    # Input: Plug and play webcamera
    # Output: RGB image, training data and testing data
    i = Input(0)

    # Initialize the Preprocessing component with the training data1, 2, 3
    p1 = Preprocessing(i.trainingData1, 1, saveImagePath)
    p2 = Preprocessing(i.trainingData2, 2, saveImagePath)
    p3 = Preprocessing(i.trainingData3, 3, saveImagePath)

    # Initializing the Segmentation component with 3 clases.
    # Using global HSV setting
    s1 = Segmentation(i.trainingData1, p1.imgFrontGround, p1.imgSeedandSproutRepaired, p1.imgSproutRepaired, 1, saveImagePath)
    s2 = Segmentation(i.trainingData2, p2.imgFrontGround, p2.imgSeedandSproutRepaired, p2.imgSproutRepaired, 2, saveImagePath)
    s3 = Segmentation(i.trainingData3, p3.imgFrontGround, p3.imgSeedandSproutRepaired, p3.imgSproutRepaired, 3, saveImagePath)

    # Choise which feature to use:
    # featureCenterOfMassList,                   # feature 0
    # featureLengthList,                         # feature 1
    # featureWidthList,                          # feature 2
    # featureRatioList,                          # feature 3
    # featureNumberOfSproutPixelsList,           # feature 4
    # featureHueMeanList,                        # feature 5
    # featureHueStdList,                         # feature 6
    # featureClassStampList                      # feature 7
    featureIndexX = 3
    featureIndexY = 4

    # Initialize the clasification component for the training data
    c = Classification(s1.listOfFeatures, s2.listOfFeatures, s3.listOfFeatures, featureIndexX, featureIndexY, vizualize, saveImagePath, normalization)

    # Initialize the Output component
    o = Output()

    # At this point, the whole system has been taught with supervised learning.
    # Training data has been loaded, preprocessed, segmented, feature extracted and classified.
    # From here, the testing data is loaded by using the webcam, where each seed will be preprocessed, segmented and classified
    # based on what how the line of seperation lies.

    userCloseDown = False
    TrackBarInit(i)

    if False:
        ShowAndSaveTrainingFigures(i, p1, p2, p3, s1, s2, s3, saveImagePath)

    # while i.cameraIsOpen: # To avoid beiing depended on the camera or not, we just say the camera is always open.
    # We use still images anayway at the moment...
    while True:
        # print "Camera is open..."
        # If the user has not pushed the start button.
        TrackBarStart(i)

        # if user wants to close down the program, we do it..
        if userCloseDown:
            break

        #############################################################
        # After the training we run in this while loop...
        #############################################################

        # Clear the trackbar setting window, since we only want to look at the final classification image
        DestroyWindows()

        # Input from webcamera - Testing data
        # imgInput = i.getCroppedImg()
        imgInput = i.testingData # Using a still test image, when the real USB camera is not available

        # The input image is processed through each component as followed, with class 0, since it is unknow which class the
        # test image belogns to...
        p = Preprocessing(imgInput, 0, saveImagePath)

        # The FrontGround image and SeedAndSprout image is used in the segmentation component
        # s = Segmentation(imgInput, p.imgFrontGround, p.imgSeedAndSprout, p.imgSprout, 0)
        s = Segmentation(imgInput, p.imgFrontGround, p.imgSeedandSproutRepaired, p.imgSproutRepaired, 0, saveImagePath)

        if vizualize:
            ShowFeaturePlotClass0(featureIndexX, featureIndexY, s, c, saveImagePath, normalization)

        featureClass1ListX, \
        featureClass1ListY, \
        centerClass1List, \
        featureClass2ListX, \
        featureClass2ListY, \
        centerClass2List, \
        featureClass3ListX, \
        featureClass3ListY, \
        centerClass3List = c.getClassifiedLists3classes(s.listOfFeatures[featureIndexX], s.listOfFeatures[featureIndexY], s.listOfFeatures[0], imgInput)

        if vizualize:
            ShowFeaturePlotClass0Classified(featureClass1ListX, featureClass2ListX, featureClass3ListX, featureClass1ListY, featureClass2ListY, featureClass3ListY,c, saveImagePath)

        #############################################
        # Finally we show the result
        ############################################

        cv2.imshow("The final classification", c.imgClassified)
        cv2.imwrite(saveImagePath + "imgClassified.png", c.imgClassified)

        # If the user push "ESC" the program close down.
        k = cv2.waitKey(30) & 0xff
        if k == 27:
            userCloseDown = True
            # i.closeDown()
            break
    if userCloseDown:
        print "User closed the program..."
    else:
        print "The camera is not open.... "
예제 #34
0
	def train(self, mtd = "margin", backupfile = "backupfile.txt"):
		for i, x in enumerate(self.Ux):
			y1 = self.clf.predict_label(x)
			
			if mtd == "supervised": informativeness = sys.float_info.max
			if mtd == "margin": informativeness = self.clf.uncertainty_margin(x)
			
			# ===============================
			id_th = self.mab.choose()
			self.th = self.mab.algos[ id_th ]
			print "Choosen =", self.th, "nb_choices =", self.mab.nb_choices, ("avg rwd=", [ np.mean(L) for L in self.mab.rewards ] if self.mab.rewards[0]!=[] else " "), "expected=", sum([ a*l for a,l in zip(self.mab.algos,self.mab.nb_choices) ]) / sum(self.mab.nb_choices)
			prev_clf = Classification(self.Lx, self.Ly, method = self.clf.method)
			prev_clf.GAMMA, prev_clf.C = self.clf.GAMMA, self.clf.C; prev_clf.train()
			# ===============================
			# avg_rewards = [ np.mean(L[:-20]) if len(L)>0 else 1. for L in self.mab.rewards ]
			# self.th = sum([ a*l for a,l in zip(self.mab.algos,avg_rewards) ]) / sum(avg_rewards)
			# print "Choosen =", self.th, "avg rwd=", avg_rewards
			# ===============================
			
			if informativeness > self.th:
				qx = x
				qy = self.Uy[i]
				
				self.Lx.append(qx)
				self.Ly.append(qy)
				self.queried += 1
			
				self.clf.X = self.Lx; self.clf.Y = self.Ly; self.clf.train()
			
			# ===============================
			reward = 1. - abs( 0.1 - self.queried / (i+1.) )
			self.mab.update(id_th, reward)
			# ===============================
			# for idt in range(len(self.mab.algos)):
				# reward = 1. - abs( 0.3 - (self.queried-1+1) / (i+1.) ) if informativeness > self.mab.algos[idt] else 1. - abs( 0.4 - (self.queried-1) / (i+1.) )
				# self.mab.update(idt, reward)
			# ===============================
			
			
			self.ths.append( self.th )
			self.infos.append( informativeness )
			self.accuracys.append( self.clf.getTestAccuracy( self.Tx, self.Ty ) )
			self.queries.append( self.queried )
			
			self.sup_infos.append( self.sup_clf.uncertainty_margin(x) ) # TODO should not be here
			self.sup_clf.X = self.Lx0+self.Ux[:i+1]; self.sup_clf.Y = self.Ly0+self.Uy[:i+1]; self.sup_clf.train() # TODO should not be here
			self.sup_accuracys.append( self.sup_clf.getTestAccuracy( self.Tx, self.Ty ) ) # TODO should not be here
			
			
			'''
			if i>10:
				# last_infos = self.infos[-100:] if len(self.infos) > 100 else self.infos[:]
				# self.th = np.mean( last_infos )
				
				if informativeness > self.th: # queried
					if y1 == qy: # but was correctly predicted
						self.th = self.th + 0.1 * (informativeness - self.th)
				else:
					if y1 != qy:
						self.th = self.th - 0.1 * (self.th - informativeness )
			'''
			
			
			print "i=", i+1, self.queried, self.queried / (i+1.), "-- acc=%.4f"%(self.accuracys[-1]*100), "%.4f"%(self.sup_accuracys[-1]*100), "-- %.4f"%(np.mean(self.accuracys)*100), "%.4f"%(np.average(self.accuracys, weights = range(1,1+len(self.accuracys)))*100), "--", informativeness
			
			if (i+1)%10 == 0:
				Util.pickleSave(backupfile, self); viz = Visualize()
				
				viz.do_plot( [range(len(self.infos)), self.ths], color = 'b', marker = '-' )
				viz.do_plot( [range(len(self.infos)), self.infos], color = 'r', marker = '-' )
				viz.do_plot( [range(len(self.sup_infos)), self.sup_infos], color = 'y', marker = '-' )
				viz.end_plot( fig = backupfile+"_stream_inf.png" )
				
				viz.do_plot( [range(len(self.accuracys)), self.accuracys], color = 'r', marker = '-' )
				viz.do_plot( [range(len(self.sup_accuracys)), self.sup_accuracys], color = 'y', marker = '-' )
				viz.end_plot( fig = backupfile+"_stream_acc.png" )
				
				viz.do_plot( [range(len(self.queries)), self.queries], color = 'r', marker = '-' )
				viz.do_plot( [range(len(self.queries)), range(len(self.queries))], color = 'y', marker = '-' )
				viz.end_plot( fig = backupfile+"_stream_lab.png" )
				'''
from Classification import Classification

'''
    Author: Rowland DePree              Test.py

    A program designed to test the classification program.

    To make this work, change the first parm from classification to the location of the train data and change the second parm to
    the train label data.  Then change the parm for classify new sentence to the location of the test data
'''

c = Classification(r'C:\Users\depre\PycharmProjects\Classification_Assignment\traindata',
                   r'C:\Users\depre\PycharmProjects\Classification_Assignment\trainlabels')
c.classify_new_senetence(r'C:\Users\depre\PycharmProjects\Classification_Assignment\testdataforclass')
c.print_to_document(r'C:\Users\depre\PycharmProjects\Classification_Assignment\resultlabelsforclass')
예제 #36
0
class OnlineActiveLearning:
	def __init__(self, Lx, Ly, Ux, Uy, Tx, Ty, method = "svm", budget = 1000):
		self.Lx0 = Lx[:]
		self.Ly0 = Ly[:]
		
		self.Lx = Lx
		self.Ly = Ly
		self.Ux = Ux # TODO should not be here
		self.Uy = Uy # TODO should not be here
		self.Tx = Tx # TODO should not be here
		self.Ty = Ty # TODO should not be here
		
		self.th = 0.9
		self.queried = 0
		self.queries = []
		
		self.ths = []
		self.infos = []
		self.accuracys = []
		self.clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ); self.clf.train()
		
		self.sup_infos = [] # TODO should not be here
		self.sup_accuracys = [] # TODO should not be here
		self.sup_clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ); self.sup_clf.train() # TODO should not be here
		
		# self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "UCB", alpha = 1 )
		self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "reinforcement", alpha = 1 )
		
	#---------------------------------------
	def train(self, mtd = "margin", backupfile = "backupfile.txt"):
		for i, x in enumerate(self.Ux):
			y1 = self.clf.predict_label(x)
			
			if mtd == "supervised": informativeness = sys.float_info.max
			if mtd == "margin": informativeness = self.clf.uncertainty_margin(x)
			
			# ===============================
			id_th = self.mab.choose()
			self.th = self.mab.algos[ id_th ]
			print "Choosen =", self.th, "nb_choices =", self.mab.nb_choices, ("avg rwd=", [ np.mean(L) for L in self.mab.rewards ] if self.mab.rewards[0]!=[] else " "), "expected=", sum([ a*l for a,l in zip(self.mab.algos,self.mab.nb_choices) ]) / sum(self.mab.nb_choices)
			prev_clf = Classification(self.Lx, self.Ly, method = self.clf.method)
			prev_clf.GAMMA, prev_clf.C = self.clf.GAMMA, self.clf.C; prev_clf.train()
			# ===============================
			# avg_rewards = [ np.mean(L[:-20]) if len(L)>0 else 1. for L in self.mab.rewards ]
			# self.th = sum([ a*l for a,l in zip(self.mab.algos,avg_rewards) ]) / sum(avg_rewards)
			# print "Choosen =", self.th, "avg rwd=", avg_rewards
			# ===============================
			
			if informativeness > self.th:
				qx = x
				qy = self.Uy[i]
				
				self.Lx.append(qx)
				self.Ly.append(qy)
				self.queried += 1
			
				self.clf.X = self.Lx; self.clf.Y = self.Ly; self.clf.train()
			
			# ===============================
			reward = 1. - abs( 0.1 - self.queried / (i+1.) )
			self.mab.update(id_th, reward)
			# ===============================
			# for idt in range(len(self.mab.algos)):
				# reward = 1. - abs( 0.3 - (self.queried-1+1) / (i+1.) ) if informativeness > self.mab.algos[idt] else 1. - abs( 0.4 - (self.queried-1) / (i+1.) )
				# self.mab.update(idt, reward)
			# ===============================
			
			
			self.ths.append( self.th )
			self.infos.append( informativeness )
			self.accuracys.append( self.clf.getTestAccuracy( self.Tx, self.Ty ) )
			self.queries.append( self.queried )
			
			self.sup_infos.append( self.sup_clf.uncertainty_margin(x) ) # TODO should not be here
			self.sup_clf.X = self.Lx0+self.Ux[:i+1]; self.sup_clf.Y = self.Ly0+self.Uy[:i+1]; self.sup_clf.train() # TODO should not be here
			self.sup_accuracys.append( self.sup_clf.getTestAccuracy( self.Tx, self.Ty ) ) # TODO should not be here
			
			
			'''
			if i>10:
				# last_infos = self.infos[-100:] if len(self.infos) > 100 else self.infos[:]
				# self.th = np.mean( last_infos )
				
				if informativeness > self.th: # queried
					if y1 == qy: # but was correctly predicted
						self.th = self.th + 0.1 * (informativeness - self.th)
				else:
					if y1 != qy:
						self.th = self.th - 0.1 * (self.th - informativeness )
			'''
			
			
			print "i=", i+1, self.queried, self.queried / (i+1.), "-- acc=%.4f"%(self.accuracys[-1]*100), "%.4f"%(self.sup_accuracys[-1]*100), "-- %.4f"%(np.mean(self.accuracys)*100), "%.4f"%(np.average(self.accuracys, weights = range(1,1+len(self.accuracys)))*100), "--", informativeness
			
			if (i+1)%10 == 0:
				Util.pickleSave(backupfile, self); viz = Visualize()
				
				viz.do_plot( [range(len(self.infos)), self.ths], color = 'b', marker = '-' )
				viz.do_plot( [range(len(self.infos)), self.infos], color = 'r', marker = '-' )
				viz.do_plot( [range(len(self.sup_infos)), self.sup_infos], color = 'y', marker = '-' )
				viz.end_plot( fig = backupfile+"_stream_inf.png" )
				
				viz.do_plot( [range(len(self.accuracys)), self.accuracys], color = 'r', marker = '-' )
				viz.do_plot( [range(len(self.sup_accuracys)), self.sup_accuracys], color = 'y', marker = '-' )
				viz.end_plot( fig = backupfile+"_stream_acc.png" )
				
				viz.do_plot( [range(len(self.queries)), self.queries], color = 'r', marker = '-' )
				viz.do_plot( [range(len(self.queries)), range(len(self.queries))], color = 'y', marker = '-' )
				viz.end_plot( fig = backupfile+"_stream_lab.png" )
				'''
				colors = ['r', 'b', 'g', 'k', 'm', 'c', '0.10', '0.35', '0.60', '0.90']
				viz.plot( zip(*self.Lx+self.Ux), fig = backupfile+"__.png", color = [colors[int(l)] for l in self.Ly+self.Uy], marker = 'o' )
				viz.do_plot( zip(*self.Ux), color = ['y']*len(self.Ux), marker = '.' )
				viz.do_plot( zip(*self.Lx), color = [colors[int(l)] for l in self.Ly], marker = 'o' )
				viz.end_plot( fig = backupfile+"_.png" )
				'''
	
	#---------------------------------------
	def get_change(self, prev_clf, curr_clf, U):
		v1 = [ prev_clf.getPredictProba(1, dp) for dp in U ]
		v2 = [ curr_clf.getPredictProba(1, dp) for dp in U ]
		if v1 == v2: return 0.
		
		return math.acos( cosine_similarity(v1, v2) ) / math.pi
예제 #37
0
PORT = sys.argv[1]
COMMIT_HASH = sys.argv[2].strip('"')
BRANCH_NAME = sys.argv[3].strip('"')
ACTIVE_NODE_PATH = sys.argv[4].strip('"')
ACTIVE_SELECTION_PATHS = []

if sys.argv[5] != '""':
    ACTIVE_SELECTION_PATHS = sys.argv[5].strip('"').split(',')
    if ACTIVE_SELECTION_PATHS[0] == '':
        ACTIVE_SELECTION_PATHS.pop(0)

NAMESPACE = sys.argv[6].strip('"')

logger.debug('commit-hash: {0}'.format(COMMIT_HASH))
logger.debug('branch-name: {0}'.format(BRANCH_NAME))
logger.debug('active-node-path: {0}'.format(ACTIVE_NODE_PATH))
logger.debug('active-selection-paths: {0}'.format(ACTIVE_SELECTION_PATHS))
logger.debug('name-space: {0}'.format(NAMESPACE))

# Create an instance of WebGME and the plugin
webgme = WebGME(PORT, logger)
plugin = Classification(webgme, COMMIT_HASH, BRANCH_NAME, ACTIVE_NODE_PATH,
                        ACTIVE_SELECTION_PATHS, NAMESPACE)

# Do the work
plugin.main()

# Finally disconnect from the zmq-server
webgme.disconnect()
예제 #38
0
	def sortForInformativeness(self, mtd):
		if mtd in ["etc", "etc_", "expectedErrorReduction", "weight", "optimal", "test", "intuition"] :
			ids, scores = self.sortForInformativeness(self.optimization_method)
			
		scores = []
		for ix, x in enumerate(self.Ux):
			y1, y2, p1, p2 = self.clf.getMarginInfo(x)
			
			if mtd == "intuitionM":
				if ix in ids[:self.optimization_limit]:
					informativeness = self.clf.uncertainty_margin(x)
				else:
					informativeness = 0.
			#---------------------------------------------------------
			if mtd == "margin":
				informativeness = self.clf.uncertainty_margin(x)
			
			#---------------------------------------------------------
			elif mtd == "proba":
				informativeness = self.clf.uncertainty_prediction(x)
			
			#---------------------------------------------------------
			elif mtd == "entropy":
				informativeness = self.clf.uncertainty_entropy(x)
			
			#---------------------------------------------------------
			elif mtd == "random":
				informativeness = random.uniform(0., 1.)
			
			#---------------------------------------------------------
			elif mtd == "weight":
				if ix in ids[:self.optimization_limit]:
					informativeness = self.clf.uncertainty_weight(x, self.Lx, self.Ly)
				else: informativeness = 0.
			
			#---------------------------------------------------------
			elif mtd == "expectedErrorReduction":
				if ix in ids[:self.optimization_limit]:
					sums = 0.
					YP = self.clf.predict(x, all = True)
					YP.sort(key=operator.itemgetter(1), reverse=True)
					for ir, (yy, proba) in enumerate(YP):
						if ir == 5: break
						temp_clf = Classification(self.Lx + [x], self.Ly + [yy], method = self.clf.method); temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C # TODO FIXME: do it in general not specifically for svm
						temp_clf.train()
						e_h1 = sum( [ temp_clf.uncertainty_entropy(dp) for dp in self.Ux if dp != x ] )
						
						sums += (proba) * e_h1
					informativeness = 1. / sums
				else:
					informativeness = 0.
			
			#---------------------------------------------------------
			elif mtd == "etc":
				if ix in ids[:self.optimization_limit]:
					temp_clf1 = Classification(self.Lx + [x], self.Ly + [y1], method = self.clf.method); temp_clf1.GAMMA, temp_clf1.C = self.clf.GAMMA, self.clf.C
					temp_clf1.train()
					diff1 = sum( [ abs(temp_clf1.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf1.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.)
					
					temp_clf2 = Classification(self.Lx + [x], self.Ly + [y2], method = self.clf.method); temp_clf2.GAMMA, temp_clf2.C = self.clf.GAMMA, self.clf.C
					temp_clf2.train()
					diff2 = sum( [ abs(temp_clf2.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf2.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.)
					
					informativeness = diff1 # this one is particularly good for rejection (to be confirmed)
					informativeness = diff1 if p1/(1+diff1) >= p2/(1+diff2) else diff2
					informativeness = p1*diff1 + p2*diff2 + 1.
				else:
					informativeness = 0.
				
			#---------------------------------------------------------
			elif mtd == "etc_":
				if ix in ids[:self.optimization_limit]:
					temp_clf1 = Classification(self.Lx + [x], self.Ly + [y1], method = self.clf.method); temp_clf1.GAMMA, temp_clf1.C = self.clf.GAMMA, self.clf.C
					temp_clf1.train()
					diff1 = sum( [ 1. if temp_clf1.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.)
					
					temp_clf2 = Classification(self.Lx + [x], self.Ly + [y2], method = self.clf.method); temp_clf2.GAMMA, temp_clf2.C = self.clf.GAMMA, self.clf.C
					temp_clf2.train()
					diff2 = sum( [ 1. if temp_clf2.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.)
					
					informativeness = diff1 # this one is particularly good for rejection (to be confirmed)
					informativeness = diff1 if p1/(1+diff1) >= p2/(1+diff2) else diff2
					informativeness = p1*diff1 + p2*diff2 + 1.
				else:
					informativeness = 0.
				
			#---------------------------------------------------------
			elif mtd == "test":
				if ix in ids[:self.optimization_limit]:
					temp_clf1 = Classification(self.Lx + [x], self.Ly + [y1], method = self.clf.method); temp_clf1.GAMMA, temp_clf1.C = self.clf.GAMMA, self.clf.C
					temp_clf1.train()
					diff1 = np.mean( [0.]+[ abs(temp_clf1.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) for dp in self.Ux if temp_clf1.predict_label(dp) != self.clf.predict_label(dp) and dp != x ] )
					
					temp_clf2 = Classification(self.Lx + [x], self.Ly + [y2], method = self.clf.method); temp_clf2.GAMMA, temp_clf2.C = self.clf.GAMMA, self.clf.C
					temp_clf2.train()
					diff2 = np.mean( [0.]+[ abs(temp_clf2.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) for dp in self.Ux if temp_clf2.predict_label(dp) != self.clf.predict_label(dp) and dp != x ] )
					
					informativeness = diff1 # this one is particularly good for rejection (to be confirmed)
					informativeness = diff1 if p1/(1+diff1) >= p2/(1+diff2) else diff2
					informativeness = p1*diff1 + p2*diff2 + 1.
				else:
					informativeness = 0.
				
			#---------------------------------------------------------
			elif mtd == "intuition":
				if ix in ids[:self.optimization_limit]:
					true_y = self.Uy[ self.Ux.index(x) ]
					
					temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method)
					temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
					
					ucts = [ temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp) for dp in self.Tx ]
					ids_ucts = (-np.array(ucts)).argsort()[:50]
					
					# diff = np.mean( [ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Tx ] )
					diff = np.mean([1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) and idp in ids_ucts else 0. for idp,dp in enumerate(self.Tx) ])
					
					informativeness = diff
				else:
					informativeness = 0.
			#---------------------------------------------------------
			elif mtd == "intuition":
				if ix in ids[:self.optimization_limit]:
					true_y = self.Uy[ self.Ux.index(x) ]
					
					temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method)
					temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
					
					# ---------------------
					imp_x = [ xdp for xdp in self.Tx if temp_clf.predict_label(xdp) != self.clf.predict_label(xdp) ]
					imp_y_hh = [ temp_clf.predict_label(xdp) for xdp in self.Tx if temp_clf.predict_label(xdp) != self.clf.predict_label(xdp) ]
					
					if len( set(imp_y_hh) ) > 1: 
						# hh = Classification(imp_x, imp_y_hh, method = self.clf.method)
						hh = Classification(imp_x + [x], imp_y_hh + [true_y], method = self.clf.method, tuning = False)
						hh.GAMMA, hh.C = self.clf.GAMMA, self.clf.C; hh.train()
					else:
						hh = self.clf
					# ---------------------
					
					h_inconsistant_truth = 0; hh_inconsistant_truth = 0; hh_inconsistant_h = 0; h_consistency = []; hh_consistency = []
					for ilx, lx in enumerate(self.Lx):
						h_consistency.append( self.clf.getProbaOf( self.Ly[ilx], lx ) )
						# hh_consistency.append( hh.getProbaOf( self.Ly[ilx], lx ) )
						hh_consistency.append( hh.getProbaOf( self.Ly[ilx], lx ) if hh.predict_label(lx) == self.Ly[ilx] else 0. )
						
						if self.clf.predict_label(lx) != self.Ly[ilx]: h_inconsistant_truth += 1.
						if hh.predict_label(lx) != self.Ly[ilx]: hh_inconsistant_truth += 1.
						if hh.predict_label(lx) != self.clf.predict_label(lx): hh_inconsistant_h += 1.
					h_consistency = np.mean(h_consistency)
					hh_consistency = np.mean(hh_consistency) if len( set(imp_y_hh) ) > 1 else 0.
					
					consistency_dif = hh_consistency - h_consistency
					
					# ---------------------
					diff = []; errors = 0.; trues = 0.; impacted = 0; impacted_probs = [];
					for idp, dp in enumerate(self.Tx):
						if temp_clf.predict_label(dp) != self.clf.predict_label(dp): ##################
							impacted += 1.
							impacted_probs.append( abs( temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp) ) )
							if self.Ty[idp]!=temp_clf.predict_label(dp): errors += 1.
							else: trues += 1.
						
						# if temp_clf.predict_label(dp) != self.clf.predict_label(dp) and self.Ty[idp]==temp_clf.predict_label(dp): diff.append( 1. )
						# if temp_clf.predict_label(dp) != self.clf.predict_label(dp) and trues - errors > 0: diff.append( 1. )
						# if temp_clf.predict_label(dp) != self.clf.predict_label(dp): diff.append( 1. )
						
						if temp_clf.predict_label(dp) != self.clf.predict_label(dp): diff.append( 1. )
						
						else: diff.append( 0. )
					diff = np.mean( diff )
					
					# diff = diff * np.mean(impacted_probs) # seems to be working ...
					
					# ---------------------
					# self.viz_A.append( consistency_dif )
					self.viz_A.append( hh_consistency )
					self.viz_B.append( errors )
					self.viz_C.append( trues )
					self.viz_D.append( trues - errors ); posI = [inb for inb,nbD in enumerate(self.viz_D) if nbD >= 0.]
					self.viz_E.append( impacted )
					self.viz_F.append( np.mean(impacted_probs) )
					viz = Visualize(); viz.plot( [self.viz_A, self.viz_B], fig = "test_errors.png", color = 'r', marker = 'o' )
					vizu = Visualize(); vizu.plot( [self.viz_A, self.viz_C], fig = "test_trues.png", color = 'r', marker = 'o' )
					vizuu = Visualize(); vizuu.plot( [self.viz_A, self.viz_D], fig = "test_trues_errors.png", color = 'r', marker = 'o' )
					
					vizuuu = Visualize(); vizuuu.do_plot( [self.viz_A, self.viz_E], color = 'r', marker = 'o' )
					vizuuu.do_plot( [[self.viz_A[inb] for inb in posI], [self.viz_E[inb] for inb in posI]], color = 'b', marker = 'o' )
					vizuuu.end_plot(fig = "impacted.png")
					
					print hh_consistency, hh_inconsistant_truth, "---", len(imp_x), len( set(imp_y_hh) ), "============>", impacted, trues - errors
					
					informativeness = diff
				else:
					informativeness = 0.
				
			#---------------------------------------------------------
			
			scores.append( informativeness )
		
		ids = (-np.array(scores)).argsort()
		sorted_scores = [ scores[id] for id in ids ]	
		# sorted_scores = [ 1.*scores[id] / sum(scores) for id in ids ]	
		
		return ids, sorted_scores
예제 #39
0
    def Trainning(self, Classify: Classification):

        Main = self.Main
        Main.SetConsole('Train!')
        Target = self.ic
        pic, ClassContours = Classify.Predict(Target)
        process = Target.BinaryProcess()

        #area = ip.MappingCnt(contours)
        if os.path.isfile('samples.data') and os.path.isfile('res.data'):
            print('LoadData')
            Main.SetConsole('LoadData!')
            samples = np.loadtxt('samples.data', np.float32)
            responses = np.loadtxt('res.data', np.float32)

        else:
            print('NewData')
            Main.SetConsole('NewData!')
            samples = np.empty((0, 100))
            responses = []

        keys = [i for i in range(48, 58)] + [j for j in range(97, 123)]
        close = 0
        for cnt in ClassContours:
            original = Target.img.copy()
            [x, y, w, h] = cv2.boundingRect(cnt)
            cv2.rectangle(original, (x, y), (x + w, y + h), (0, 0, 255), 2)
            TargetPxs = process[y:y + h, x:x + w]
            TargetPxsSmall = cv2.resize(TargetPxs, (10, 10))

            cv2.imshow('CarLicense', original)
            while close == 0:
                key = cv2.waitKey(100)
                if key != -1:
                    print(key)
                if cv2.getWindowProperty('CarLicense', 0) == -1:
                    print('Close')
                    Main.SetConsole('Close!')
                    close = 1

                if key == 27:  # (escape to quit)
                    cv2.destroyAllWindows()
                    close = 1
                    print('Esc')
                    Main.SetConsole('Esc!')

                elif key == 32:
                    print('Skip')
                    Main.SetConsole('Skip!')

                elif key in keys:
                    responses = np.append(responses, key)
                    sample = TargetPxsSmall.reshape((1, 100))
                    samples = np.append(samples, sample, 0)
                    Main.SetConsole('Add Sample!')
                    break

            if close == 1:
                return 0

        cv2.destroyAllWindows()
        responses = np.array(responses, np.float32)
        responses = responses.reshape((responses.size, 1))
        print('training complete')
        Main.SetConsole('訓練完成,進行存檔!')

        np.savetxt('samples.data', samples)
        np.savetxt('res.data', responses)
        return 1
예제 #40
0
class ActiveLearning:
	def __init__(self, Lx, Ly, Ux, Uy, Tx, Ty, method = "svm", budget = 1000):
		self.Lx = Lx
		self.Ly = Ly
		self.Ux = Ux
		self.Uy = Uy # TODO should not be here
		self.Tx = Tx # TODO should not be here
		self.Ty = Ty # TODO should not be here
		
		self.optimization_limit = 20
		self.optimization_method = "margin" # margin proba entropy random weight expectedErrorReduction etc
		
		self.budget = budget
		self.accuracys = []
		
		self.clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy )
		self.clf.train()
		
		self.viz_A = []; self.viz_B = []; self.viz_C = []; self.viz_D = []; self.viz_E = []; self.viz_F = []
		
	#---------------------------------------
	def train(self, mtd = "margin", backupfile = "backupfile"): # TODO implement sample_weight + make method to shuffle and return sublist with data_limit
		backupfile += ".opt-"+str(self.optimization_limit)+"-"+self.optimization_method+".txt"
		for i in range(self.budget):
			if len(self.Ux) <= 1: break
			# self.viz_A = []; self.viz_B = []; self.viz_C = []; self.viz_D = []; self.viz_E = []; self.viz_F = []
			
			ids, scores = self.sortForInformativeness(mtd)
			id = ids[0]
			
			qx = self.Ux[id]
			qy = self.Uy[id]
			
			self.Lx.append(qx)
			self.Ly.append(qy)
			self.Ux.pop(id)
			self.Uy.pop(id)
			
			self.clf.X = self.Lx; self.clf.Y = self.Ly
			self.clf.train()
			
			test_accuracy = self.clf.getTestAccuracy( self.Tx, self.Ty )
			self.accuracys.append( test_accuracy )
			
			print "i=", i+1, "; acc=%.4f"%(test_accuracy*100), "%.4f"%(np.mean(self.accuracys)*100), "%.4f"%(np.average(self.accuracys, weights = range(1,1+len(self.accuracys)))*100), scores[0]
			
			if (i+1)%10 == 0:
				Util.pickleSave(backupfile, self)
				viz = Visualize(); viz.plot( [range(len(self.accuracys)), self.accuracys], fig = backupfile+".png", color = 'r', marker = '-' )
			
	#---------------------------------------
	def sortForInformativeness(self, mtd):
		if mtd in ["etc", "etc_", "expectedErrorReduction", "weight", "optimal", "test", "intuition"] :
			ids, scores = self.sortForInformativeness(self.optimization_method)
			
		scores = []
		for ix, x in enumerate(self.Ux):
			y1, y2, p1, p2 = self.clf.getMarginInfo(x)
			
			if mtd == "intuitionM":
				if ix in ids[:self.optimization_limit]:
					informativeness = self.clf.uncertainty_margin(x)
				else:
					informativeness = 0.
			#---------------------------------------------------------
			if mtd == "margin":
				informativeness = self.clf.uncertainty_margin(x)
			
			#---------------------------------------------------------
			elif mtd == "proba":
				informativeness = self.clf.uncertainty_prediction(x)
			
			#---------------------------------------------------------
			elif mtd == "entropy":
				informativeness = self.clf.uncertainty_entropy(x)
			
			#---------------------------------------------------------
			elif mtd == "random":
				informativeness = random.uniform(0., 1.)
			
			#---------------------------------------------------------
			elif mtd == "weight":
				if ix in ids[:self.optimization_limit]:
					informativeness = self.clf.uncertainty_weight(x, self.Lx, self.Ly)
				else: informativeness = 0.
			
			#---------------------------------------------------------
			elif mtd == "expectedErrorReduction":
				if ix in ids[:self.optimization_limit]:
					sums = 0.
					YP = self.clf.predict(x, all = True)
					YP.sort(key=operator.itemgetter(1), reverse=True)
					for ir, (yy, proba) in enumerate(YP):
						if ir == 5: break
						temp_clf = Classification(self.Lx + [x], self.Ly + [yy], method = self.clf.method); temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C # TODO FIXME: do it in general not specifically for svm
						temp_clf.train()
						e_h1 = sum( [ temp_clf.uncertainty_entropy(dp) for dp in self.Ux if dp != x ] )
						
						sums += (proba) * e_h1
					informativeness = 1. / sums
				else:
					informativeness = 0.
			
			#---------------------------------------------------------
			elif mtd == "etc":
				if ix in ids[:self.optimization_limit]:
					temp_clf1 = Classification(self.Lx + [x], self.Ly + [y1], method = self.clf.method); temp_clf1.GAMMA, temp_clf1.C = self.clf.GAMMA, self.clf.C
					temp_clf1.train()
					diff1 = sum( [ abs(temp_clf1.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf1.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.)
					
					temp_clf2 = Classification(self.Lx + [x], self.Ly + [y2], method = self.clf.method); temp_clf2.GAMMA, temp_clf2.C = self.clf.GAMMA, self.clf.C
					temp_clf2.train()
					diff2 = sum( [ abs(temp_clf2.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf2.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.)
					
					informativeness = diff1 # this one is particularly good for rejection (to be confirmed)
					informativeness = diff1 if p1/(1+diff1) >= p2/(1+diff2) else diff2
					informativeness = p1*diff1 + p2*diff2 + 1.
				else:
					informativeness = 0.
				
			#---------------------------------------------------------
			elif mtd == "etc_":
				if ix in ids[:self.optimization_limit]:
					temp_clf1 = Classification(self.Lx + [x], self.Ly + [y1], method = self.clf.method); temp_clf1.GAMMA, temp_clf1.C = self.clf.GAMMA, self.clf.C
					temp_clf1.train()
					diff1 = sum( [ 1. if temp_clf1.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.)
					
					temp_clf2 = Classification(self.Lx + [x], self.Ly + [y2], method = self.clf.method); temp_clf2.GAMMA, temp_clf2.C = self.clf.GAMMA, self.clf.C
					temp_clf2.train()
					diff2 = sum( [ 1. if temp_clf2.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.)
					
					informativeness = diff1 # this one is particularly good for rejection (to be confirmed)
					informativeness = diff1 if p1/(1+diff1) >= p2/(1+diff2) else diff2
					informativeness = p1*diff1 + p2*diff2 + 1.
				else:
					informativeness = 0.
				
			#---------------------------------------------------------
			elif mtd == "test":
				if ix in ids[:self.optimization_limit]:
					temp_clf1 = Classification(self.Lx + [x], self.Ly + [y1], method = self.clf.method); temp_clf1.GAMMA, temp_clf1.C = self.clf.GAMMA, self.clf.C
					temp_clf1.train()
					diff1 = np.mean( [0.]+[ abs(temp_clf1.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) for dp in self.Ux if temp_clf1.predict_label(dp) != self.clf.predict_label(dp) and dp != x ] )
					
					temp_clf2 = Classification(self.Lx + [x], self.Ly + [y2], method = self.clf.method); temp_clf2.GAMMA, temp_clf2.C = self.clf.GAMMA, self.clf.C
					temp_clf2.train()
					diff2 = np.mean( [0.]+[ abs(temp_clf2.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) for dp in self.Ux if temp_clf2.predict_label(dp) != self.clf.predict_label(dp) and dp != x ] )
					
					informativeness = diff1 # this one is particularly good for rejection (to be confirmed)
					informativeness = diff1 if p1/(1+diff1) >= p2/(1+diff2) else diff2
					informativeness = p1*diff1 + p2*diff2 + 1.
				else:
					informativeness = 0.
				
			#---------------------------------------------------------
			elif mtd == "intuition":
				if ix in ids[:self.optimization_limit]:
					true_y = self.Uy[ self.Ux.index(x) ]
					
					temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method)
					temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
					
					ucts = [ temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp) for dp in self.Tx ]
					ids_ucts = (-np.array(ucts)).argsort()[:50]
					
					# diff = np.mean( [ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Tx ] )
					diff = np.mean([1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) and idp in ids_ucts else 0. for idp,dp in enumerate(self.Tx) ])
					
					informativeness = diff
				else:
					informativeness = 0.
			#---------------------------------------------------------
			elif mtd == "intuition":
				if ix in ids[:self.optimization_limit]:
					true_y = self.Uy[ self.Ux.index(x) ]
					
					temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method)
					temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
					
					# ---------------------
					imp_x = [ xdp for xdp in self.Tx if temp_clf.predict_label(xdp) != self.clf.predict_label(xdp) ]
					imp_y_hh = [ temp_clf.predict_label(xdp) for xdp in self.Tx if temp_clf.predict_label(xdp) != self.clf.predict_label(xdp) ]
					
					if len( set(imp_y_hh) ) > 1: 
						# hh = Classification(imp_x, imp_y_hh, method = self.clf.method)
						hh = Classification(imp_x + [x], imp_y_hh + [true_y], method = self.clf.method, tuning = False)
						hh.GAMMA, hh.C = self.clf.GAMMA, self.clf.C; hh.train()
					else:
						hh = self.clf
					# ---------------------
					
					h_inconsistant_truth = 0; hh_inconsistant_truth = 0; hh_inconsistant_h = 0; h_consistency = []; hh_consistency = []
					for ilx, lx in enumerate(self.Lx):
						h_consistency.append( self.clf.getProbaOf( self.Ly[ilx], lx ) )
						# hh_consistency.append( hh.getProbaOf( self.Ly[ilx], lx ) )
						hh_consistency.append( hh.getProbaOf( self.Ly[ilx], lx ) if hh.predict_label(lx) == self.Ly[ilx] else 0. )
						
						if self.clf.predict_label(lx) != self.Ly[ilx]: h_inconsistant_truth += 1.
						if hh.predict_label(lx) != self.Ly[ilx]: hh_inconsistant_truth += 1.
						if hh.predict_label(lx) != self.clf.predict_label(lx): hh_inconsistant_h += 1.
					h_consistency = np.mean(h_consistency)
					hh_consistency = np.mean(hh_consistency) if len( set(imp_y_hh) ) > 1 else 0.
					
					consistency_dif = hh_consistency - h_consistency
					
					# ---------------------
					diff = []; errors = 0.; trues = 0.; impacted = 0; impacted_probs = [];
					for idp, dp in enumerate(self.Tx):
						if temp_clf.predict_label(dp) != self.clf.predict_label(dp): ##################
							impacted += 1.
							impacted_probs.append( abs( temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp) ) )
							if self.Ty[idp]!=temp_clf.predict_label(dp): errors += 1.
							else: trues += 1.
						
						# if temp_clf.predict_label(dp) != self.clf.predict_label(dp) and self.Ty[idp]==temp_clf.predict_label(dp): diff.append( 1. )
						# if temp_clf.predict_label(dp) != self.clf.predict_label(dp) and trues - errors > 0: diff.append( 1. )
						# if temp_clf.predict_label(dp) != self.clf.predict_label(dp): diff.append( 1. )
						
						if temp_clf.predict_label(dp) != self.clf.predict_label(dp): diff.append( 1. )
						
						else: diff.append( 0. )
					diff = np.mean( diff )
					
					# diff = diff * np.mean(impacted_probs) # seems to be working ...
					
					# ---------------------
					# self.viz_A.append( consistency_dif )
					self.viz_A.append( hh_consistency )
					self.viz_B.append( errors )
					self.viz_C.append( trues )
					self.viz_D.append( trues - errors ); posI = [inb for inb,nbD in enumerate(self.viz_D) if nbD >= 0.]
					self.viz_E.append( impacted )
					self.viz_F.append( np.mean(impacted_probs) )
					viz = Visualize(); viz.plot( [self.viz_A, self.viz_B], fig = "test_errors.png", color = 'r', marker = 'o' )
					vizu = Visualize(); vizu.plot( [self.viz_A, self.viz_C], fig = "test_trues.png", color = 'r', marker = 'o' )
					vizuu = Visualize(); vizuu.plot( [self.viz_A, self.viz_D], fig = "test_trues_errors.png", color = 'r', marker = 'o' )
					
					vizuuu = Visualize(); vizuuu.do_plot( [self.viz_A, self.viz_E], color = 'r', marker = 'o' )
					vizuuu.do_plot( [[self.viz_A[inb] for inb in posI], [self.viz_E[inb] for inb in posI]], color = 'b', marker = 'o' )
					vizuuu.end_plot(fig = "impacted.png")
					
					print hh_consistency, hh_inconsistant_truth, "---", len(imp_x), len( set(imp_y_hh) ), "============>", impacted, trues - errors
					
					informativeness = diff
				else:
					informativeness = 0.
				
			#---------------------------------------------------------
			
			scores.append( informativeness )
		
		ids = (-np.array(scores)).argsort()
		sorted_scores = [ scores[id] for id in ids ]	
		# sorted_scores = [ 1.*scores[id] / sum(scores) for id in ids ]	
		
		return ids, sorted_scores
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-s', '--select', action='store_true')
    parser.add_argument('-t', '--test', action='store_true')
    parser.add_argument('-c', '--count', action='store_true')
    parser.add_argument('-m', '--merge', '--combine', action='store_true')
    parser.add_argument('-e', '--extract', action='store_true')
    parser.add_argument('-d', '--debuginput', action='store_true')
    parser.add_argument('-r', '--review', action='store_true')
    parser.add_argument('--replace-debug', action='store_true')
    parser.add_argument('-p', '--plot-data', action='store_true')
    parser.add_argument('--base-data', action='store_true')
    parser.add_argument('--features', default='original')
    parser.add_argument('-f', '--homology-filter', action='store_true')
    parser.add_argument('-y', '--classify', action='store_true')
    parser.add_argument('--grid-search', action='store_true')
    parser.add_argument('--plot', action='store_true')
    parser.add_argument('--fit', action='store_true')
    parser.add_argument('--count-total-number-of-genes', action='store_true')

    args = parser.parse_args()

    if args.select:
        if args.test:
            selector = Selector("config/Test/selection_config.json")
        else:
            selector = Selector("config/selection_config.json")
        selector.select()
        selector.selected_to_folder()

    if args.count:
        if args.test:
            counter = Counter("config/Test/counter_config.json")
        else:
            counter = Counter("config/counter_config.json")
        counter.count_all_viruses()

    if args.merge:
        if args.test:
            combiner = Combiner("config/Test/combiner_config.json")
        else:
            combiner = Combiner("config/combiner_config.json")
        combiner.combine_all_viruses()

    if args.debuginput:
        debug_input_collector = DebugInfoCollector("config/debug_info_collector_config.json")
        if args.replace_debug:
            debug_input_collector.collect(True)
        else:
            debug_input_collector.collect()

    if args.review:
        import Review
        Review.run()

    if args.plot_data:
        data_plotter = DataPlotter("config/data_plotter_config.json")
        data_plotter.plot()

    if args.base_data:
        base_data = BaseData("config/base_data_config.json")
        base_data.create_data()

    if args.homology_filter:
        homology_filter = HomologyFilter('config/homology_filter.json')
        homology_filter.filter()

    if args.extract:
        feature_extractor = FeatureExtraction("config/feature_extraction_config.json")
        feature_extractor.extract(args.features)

    if args.count_total_number_of_genes:
        combiner = Combiner("config/combiner_config.json")
        combiner.print_number_of_genes()

    if args.classify:
        if args.grid_search:
            MLgrid = [
                {
                    "booster": ["gblinear"],
                    # "lambda": [0, 0.0001, 0.001],
                    "lambda": [0],
                    # "updater": ["shotgun", "coord_descent"],
                    "updater": ["coord_descent", "shotgun"],
                    # "feature_selector": ["cyclic", "shuffle", "random", "greedy", "thrifty"]
                    "feature_selector": ["shuffle"]
                }
                # {
                #     "booster": ["gbtree"],
                #     # "max_depth": range(3, 10, 2),
                #     # "min_child_weight": range(1, 6, 2)
                # }
            ]
            _1vsAgrid = [
                {
                    "estimator__booster": ["gblinear"],
                    "estimator__lambda": [0.1],
                    "estimator__updater": ["coord_descent"],
                    "estimator__feature_selector": ["shuffle"]
                },
                # {
                #     "estimator__booster": ["gbtree"],
                #     "estimator__max_depth": range(3, 10, 2),
                #     "estimator__min_child_weight": range(1, 6, 2)
                # }

            ]
            RRgrid = [
                {
                    "estimator__booster": ["gblinear"],
                    "estimator__lambda": [0.1],
                    "estimator__updater": ["coord_descent"],
                    "estimator__feature_selector": ["shuffle"]
                },
                # {
                #     "estimator__booster": ["gbtree"]
                # #     "estimator__max_depth": range(3, 10, 2),
                # #     "estimator__min_child_weight": range(1, 6, 2)
                # }

            ]
            classification = Classification('config/classification_config.json', args.features)
            classification.grid_search('ML', 'XGBoost', MLgrid, 200, 'no-pca')
        else:
            if args.fit:
                classification = Classification('config/classification_config.json', args.features)
                classification.fit_all()

            if args.plot:
                cp = ClassificationPlotter('config/classification_config.json', args.features)
                cp.plot_all()
예제 #42
0
def numOfCorrectlyClassified(listOfCountedParams, listOfDecisionsInTST,
                             textFile):
    listOfClassifications = []
    i = 1
    listOfParamsInLoop = []
    properlyClassified = 0
    classified = 0

    for uDecision in unique(listOfDecisionsInTST):
        classification = Classification()
        classification.setCObject(uDecision)
        classification.setListOfClassified(0)
        classification.setListOfClassifiedCorrectly(0)
        listOfClassifications.append(classification)

    enum = 0
    for countedParam in listOfCountedParams:
        xObject = "x" + str(i)
        if xObject == countedParam.getTestObject():
            listOfParamsInLoop.append(countedParam)

        if xObject != listOfCountedParams[enum + 1].getTestObject() or \
                len(listOfDecisionsInTST) == 1 and len(listOfParamsInLoop) == 2:
            cObject = ""
            param = 0
            highestX = ""
            listOfParamsInLoopIterator = 0
            for elem in listOfParamsInLoop:
                if param < elem.getParam():
                    param = elem.getParam()
                    highestX = elem.getTestObject()
                    cObject = elem.getCObject()
                    listOfParamsInLoopIterator += 1

            if listOfParamsInLoopIterator > 1:
                textFile.write("Param c==" +
                               listOfParamsInLoop[0].getCObject() + "<" +
                               "Param C==" +
                               listOfParamsInLoop[len(listOfParamsInLoop) -
                                                  1].getCObject() +
                               " dla obiektu " + highestX)
            if listOfParamsInLoopIterator <= 1:
                textFile.write("Param c==" +
                               listOfParamsInLoop[0].getCObject() + ">" +
                               "Param C==" +
                               listOfParamsInLoop[len(listOfParamsInLoop) -
                                                  1].getCObject() +
                               " dla obiektu " + highestX)

            # textFile.write("Dla "+highestX+" param c=="+cObject+" jest największe\n")
            if areParamsInLoopEqual(listOfParamsInLoop):
                randomParam = random.choice(listOfParamsInLoop)
                if randomParam.getCObject() == listOfDecisionsInTST[i - 1]:
                    textFile.write(
                        " ta decyzja jest zgodna z ukryta decyzja eksperta (decyzja eksperta == "
                        + listOfDecisionsInTST[i - 1] + ")\n")
                    for element in listOfClassifications:
                        if element.getCObject() == randomParam.getCObject():
                            element.setListOfClassifiedCorrectly(
                                element.getListOfClassifiedCorrectly() + 1)
                            element.setListOfClassified(
                                element.getListOfClassified() + 1)

                else:
                    textFile.write(
                        " ta decyzja jest nie zgodna z ukryta decyzja eksperta (decyzja eksperta == "
                        + listOfDecisionsInTST[i - 1] + ")\n")
                    for element in listOfClassifications:
                        if element.getCObject() == randomParam.getCObject():
                            element.setListOfClassified(
                                element.getListOfClassified() + 1)
            else:
                if cObject == listOfDecisionsInTST[i - 1]:
                    textFile.write(
                        " ta decyzja jest zgodna z ukryta decyzja eksperta (decyzja eksperta == "
                        + listOfDecisionsInTST[i - 1] + ")\n")
                    for element in listOfClassifications:
                        if element.getCObject() == cObject:
                            element.setListOfClassifiedCorrectly(
                                element.getListOfClassifiedCorrectly() + 1)
                            element.setListOfClassified(
                                element.getListOfClassified() + 1)
                else:
                    textFile.write(
                        " ta decyzja jest nie zgodna z ukryta decyzja eksperta (decyzja eksperta == "
                        + listOfDecisionsInTST[i - 1] + ")\n")
                    for element in listOfClassifications:
                        if element.getCObject() == cObject:
                            element.setListOfClassified(
                                element.getListOfClassified() + 1)
            i += 1
            listOfParamsInLoop = []
        enum += 1
        if enum == len(listOfCountedParams) - 1:
            enum = 0

    return listOfClassifications
    def Process(self, sourcex,sourcey, targetx,targety,subsize):
        # fixed size windows for source stream and target stream

        sourceIndex = 0
        targetIndex = 0
        src_count = 0
        tgtchange_count = 0
        threshold = 1.0
        src_size, _ = sourcex.shape
        tgt_size, _ = targetx.shape
        #true_label = []
        #for i in range(len(np.array(targety))):
            #if np.array(targety)[i] == 'class1':
                #true_label.append(1)
            #if np.array(targety)[i] == 'class2':
                #true_label.append(2)
            #if np.array(targety)[i] == 'class3':
                #true_label.append(3)
            #if np.array(targety)[i] == 'class4':
                #true_label.append(4)
            #if np.array(targety)[i] == 'class5':
                #true_label.append(5)
            #if np.array(targety)[i] == 'class6':
                #true_label.append(6)
            #if np.array(targety)[i] == 'class7':
                #true_label.append(7)

        windowsize = 1000
        sourcewindowstart = 0
        sourcewindowend = sourcewindowstart + windowsize -1
        targetwindowstart = 0
        targetwindowend = targetwindowstart + windowsize - 1
        sourcexwindow = sourcex[sourcewindowstart:sourcewindowend]
        sourceywindow = sourcey[sourcewindowstart:sourcewindowend]
        targetxwindow = targetx[targetwindowstart:targetwindowend]
        targetywindow = targety[targetwindowstart:targetwindowend]

        ### get the initial model by using the first source and target windows
        alpha = 0.05
        b = targetxwindow.T.shape[1];
        fold = 5
        sigma_list = Classification.sigma_list(np.array(targetxwindow.T),
                                               np.array(sourcexwindow.T));
        lambda_list = Classification.lambda_list();
        srcx_array = np.array(sourcexwindow.T);
        trgx_array = np.array(targetxwindow.T);
        (thetah_old, w, sce_old, sigma_old) = Classification.R_ULSIF(trgx_array, srcx_array, alpha, sigma_list, lambda_list, b, fold)

        self.Ensemble.generateNewModelRULSIF(targetxwindow, sourcexwindow, sourceywindow, alpha, sigma_list,
                                             lambda_list, b, fold,subsize)
        # print "update model", src_size, source.shape
        truelablecount = 0.0
        totalcount = 0.0


        #tmpsrccount = 0
        tmptrgcount = 0
        changeindex = -1
        updatestartindex = 0
        while True:
            if sourcewindowend >= src_size or targetwindowend >= tgt_size:
                break

            data_type = randint(1, 10)
            if data_type < 2:
                print("get data from source")
                sourcewindowstart+=1
                sourcewindowend+=1
                sourcexwindow = sourcex[sourcewindowstart:sourcewindowend]
                sourceywindow = sourcey[sourcewindowstart:sourcewindowend]
                sourceIndex += 1
                #src_count += 1
                #tmpsrccount += 1
                print("sourceIndex", sourceIndex)
            else:
                print("get data from target")
                targetwindowstart+=1
                targetwindowend+=1
                targetxwindow = targetx[targetwindowstart:targetwindowend]
                targetywindow = targety[targetwindowstart:targetwindowend]
                targetIndex += 1
                tgtchange_count += 1
                tmptrgcount += 1
                print("targetIndex", targetIndex)
            if tgtchange_count>=1000:
                changeindex = 1
                tgtchange_count = 0
                confidencelist = []
                for i in range(targetwindowstart, targetwindowend+1):
                    instanceresult = self.Ensemble.evaluateEnsembleRULSIF(targetx[i])
                    confidencelist.append(instanceresult[1])
                confvar = np.var(confidencelist)
                changetestresult = pelt(normal_mean(confidencelist, confvar), len(confidencelist))
                if len(changetestresult)>1:
                    alpha = 0.05
                    b = targetxwindow.T.shape[1];
                    fold = 5
                    sigma_list = Classification.sigma_list(np.array(targetxwindow.T),
                                                           np.array(sourcexwindow.T));
                    lambda_list = Classification.lambda_list();
                    self.Ensemble.generateNewModelRULSIF(targetxwindow, sourcexwindow, sourceywindow, alpha, sigma_list,
                                                         lambda_list, b, fold, subsize)

                #x_nu = np.array(targetxwindow.T);
                #(thetah_new, w, sce_new, sigma_new) = Classification.R_ULSIF(trgx_array, srcx_array, alpha, sigma_list,
                                                                             #lambda_list, b, fold)
                #targetweight_old = Classification.compute_target_weight(thetah_old, sce_old, sigma_old, x_nu)
                #targetweight_new = Classification.compute_target_weight(thetah_new, sce_new, sigma_new, x_nu)
                #l_ratios = targetweight_new / targetweight_old

                #lnWeightTrgData = np.log(l_ratios, dtype='float64')
                #changeScore = np.sum(lnWeightTrgData, dtype='float64')
                #tgtchange_count=0
                #print "changeScore", changeScore
                #if changeScore > threshold:
                    #alpha = 0.05
                    #b = targetxwindow.T.shape[1];
                    #fold = 5
                    #sigma_list = Classification.sigma_list(np.array(targetxwindow.T),
                                                           #np.array(sourcexwindow.T));
                    #lambda_list = Classification.lambda_list();
                    #self.Ensemble.generateNewModelRULSIF(targetxwindow, sourcexwindow, sourceywindow, alpha, sigma_list,
                                                         #lambda_list, b, fold, subsize)



            if tmptrgcount>=2000:
                # force update model
                tmptrgcount=0
                #update predictions for updatestartindex to targetIndex
                for i in range(updatestartindex,targetIndex+1):
                    print("targetx[i]", targetx[i])
                    instanceresult = self.Ensemble.evaluateEnsembleRULSIF(targetx[i])
                    print("instanceresult", instanceresult)
                    print("instanceresult[0]", instanceresult[0])
                    print("truelabel[i]", targety[i])
                    if instanceresult[0] == targety[i]:
                        truelablecount +=1.0
                    totalcount +=1.0
                print("truelablecount",truelablecount)
                print("totalcount", totalcount)
                with open('errorsyn002405.csv', 'a+') as f:
                    writer = csv.writer(f)
                    writer.writerow([targetIndex, truelablecount,totalcount,truelablecount/totalcount ])
                updatestartindex = targetIndex+1
                alpha = 0.05
                b = targetxwindow.T.shape[1];
                fold = 5
                sigma_list = Classification.sigma_list(np.array(targetxwindow.T),
                                                       np.array(sourcexwindow.T));
                lambda_list = Classification.lambda_list();
                self.Ensemble.generateNewModelRULSIF(targetxwindow, sourcexwindow, sourceywindow, alpha, sigma_list,
                                                     lambda_list, b, fold,subsize)
예제 #44
0
class ActiveLearning:
	def __init__(self, Lx, Ly, Ux, Uy, Tx, Ty, method = "svm", budget = 251, optimize = 50, datasetname="dataset"):
		self.datasetname = datasetname
		
		self.Lx = Lx
		self.Ly = Ly
		self.Ux = Ux
		self.Uy = Uy # TODO should not be here
		self.Tx = Tx # TODO should not be here
		self.Ty = Ty # TODO should not be here
		
		self.optimize = optimize
		
		self.budget = budget
		self.accuracys = []
		
		self.clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy )
		self.clf.train()
		
		self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "boltzmann" )
		# self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "UCB" )
		
		# self.mab2 = Bandit( algos = ["disag1", "disag2"], method = "boltzmann" )
		self.mab2 = Bandit( algos = ["disag1", "disag2"], method = "reinforcement" )
		# self.mab2 = Bandit( algos = ["disag1", "disag2"], method = "EXP3" )
		
	#---------------------------------------
	def train(self, mtd = "margin", backupfile = "backupfile.txt"): # TODO implement sample_weight + make method to shuffle and return sublist with data_limit
		for i in range(self.budget):
			if len(self.Ux) <= 1: break
			
			
			if mtd == "margin": ids, scores = self.query_margin()
			if mtd == "proba": ids, scores = self.query_proba()
			if mtd == "entropy": ids, scores = self.query_entropy()
			if mtd == "random": ids, scores = self.query_random()
			if mtd == "weight": ids, scores = self.query_sufficient_weight()
			if mtd == "eer": ids, scores = self.query_eer()
			if mtd == "dist": ids, scores = self.query_sufficient_distance()
			if mtd == "disag1": ids, scores = self.query_disagreement1()
			if mtd == "disag2": ids, scores = self.query_disagreement2()
			if mtd == "disag3": ids, scores = self.query_disagreement3()
			if mtd == "balance": ids, scores = self.query_balance()
			if mtd == "balanced_disag1": ids, scores = self.query_balanced_disag1()
			if mtd == "balanced_disag2": ids, scores = self.query_balanced_disag2()
			if mtd == "disag1_balanced": ids, scores = self.query_disag1_balanced()
			if mtd == "disag2_balanced": ids, scores = self.query_disag2_balanced()
			if mtd == "exp": ids, scores = self.query_explote_explore()
			if mtd == "test": ids, scores = self.query_disagreement_test()
			
			id = ids[0]
			
			qx = self.Ux[id]
			qy = self.Uy[id]
			
			self.Lx.append(qx)
			self.Ly.append(qy)
			self.Ux.pop(id)
			self.Uy.pop(id)
			
			self.clf.X = self.Lx; self.clf.Y = self.Ly
			self.clf.train()
			
			test_accuracy = self.clf.getTestAccuracy( self.Tx, self.Ty )
			self.accuracys.append( test_accuracy )
			
			print "i=", i+1, "-- acc=%.4f"%(test_accuracy*100), "-- %.4f"%(np.mean(self.accuracys)*100), "%.4f"%(np.average(self.accuracys, weights = range(1,1+len(self.accuracys)))*100), "--", scores[0], scores[1]
			
			if (i+1)%10 == 0:
				Util.pickleSave(backupfile, self)
				# viz = Visualize()
				# viz.plot( [range(len(self.accuracys)), self.accuracys], fig = backupfile+".png", color = 'r', marker = '-' )
				'''
				colors = ['r', 'b', 'g', 'k', 'm', 'c', '0.10', '0.35', '0.60', '0.90']
				viz.plot( zip(*self.Lx+self.Ux), fig = backupfile+"__.png", color = [colors[int(l)] for l in self.Ly+self.Uy], marker = 'o' )
				viz.do_plot( zip(*self.Ux), color = ['y']*len(self.Ux), marker = '.' )
				viz.do_plot( zip(*self.Lx), color = [colors[int(l)] for l in self.Ly], marker = 'o' )
				viz.end_plot( fig = backupfile+"_.png" )
				'''
	#---------------------------------------
	def sort_scores(self, scores):
		if sum(scores) == 0.: scores = [ self.clf.uncertainty_margin(x) for x in self.Ux ]
		
		ids = (-np.array(scores)).argsort()
		sorted_scores = [ scores[id] for id in ids ]	
		return ids, sorted_scores
		
	#---------------------------------------
	def query_margin(self):
		return self.sort_scores( [ self.clf.uncertainty_margin(x) for x in self.Ux ] )
	#---------------------------------------
	def query_proba(self):
		return self.sort_scores( [ self.clf.uncertainty_prediction(x) for x in self.Ux ] )
	#---------------------------------------
	def query_entropy(self):
		return self.sort_scores( [ self.clf.uncertainty_entropy(x) for x in self.Ux ] )
	#---------------------------------------
	def query_random(self):
		return self.sort_scores( [ random.uniform(0., 1.) for x in self.Ux ] )
	#---------------------------------------
	def query_sufficient_weight(self):
		ids, _ = self.query_margin()
		return self.sort_scores( [ self.clf.uncertainty_weight(x, self.Lx, self.Ly) if ix in ids[:self.optimize] else 0. for ix, x in enumerate(self.Ux) ] )
	#---------------------------------------
	def query_eer(self, limit_Y = 20):
		ids, _ = self.query_margin()
		scores = []
		
		for ix, x in enumerate(self.Ux):
			if ix in ids[:self.optimize]:
				YP = self.clf.predict(x, all = True)
				YP.sort(key=operator.itemgetter(1), reverse=True)
				sums = 0.
				for ir, (yy, proba) in enumerate(YP):
					if ir == limit_Y: break
					temp_clf = Classification(self.Lx + [x], self.Ly + [yy], method = self.clf.method); temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C # TODO FIXME: do it in general not specifically for svm
					temp_clf.train()
					e_h1 = sum( [ temp_clf.uncertainty_entropy(dp) for dp in self.Ux if dp != x ] )
					sums += (proba) * e_h1
					
				informativeness = 1. / sums
			else: informativeness = 0.
			scores.append( informativeness )
			
		return self.sort_scores(scores)
	#---------------------------------------
	def query_sufficient_distance(self):
		ids, _ = self.query_margin()
		scores = []
		
		for ix, x in enumerate(self.Ux):
			if ix in ids[:self.optimize]:
				y1, y2, p1, p2 = self.clf.getMarginInfo(x)
				C = [dp for idp, dp in enumerate(self.Lx) if self.Ly[idp] == y2 ]
				CDx = [Util.dist(dp, x) for idp, dp in enumerate(self.Lx) if self.Ly[idp] == y2 ]
				idsC = (np.array(CDx)).argsort(); xx = Util.medoid( [ C[idp] for idp in idsC[:1] ] )
				
				step = 0.01; lower = 0.; upper = 1.
				while (upper - lower > step):
					w = (upper + lower) / 2.
					px = np.array(x) + w * ( np.array(xx) - np.array(x) )
					
					if self.clf.predict_label(px) != y1: upper = w
					else: lower = w
					
				informativeness = 1. - w
			else: informativeness = 0.
			scores.append( informativeness )
		
		return self.sort_scores(scores)
	#---------------------------------------
	def query_disagreement1(self, weighted = False, op = 1):
		ids, _ = self.query_margin()
		scores = []
		
		for ix, x in enumerate(self.Ux):
			if ix in ids[:self.optimize*op]:
				# true_y = self.Uy[ix]
				true_y = self.clf.predict_label(x)
				
				temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method)
				temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
				
				if not weighted:
					diff = sum([ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ])
				else:
					diff = sum([ abs(temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ])
					# diff = sum([ Util.dist(temp_clf.h.predict_proba(dp)[0], self.clf.h.predict_proba(dp)[0]) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ])
				
				informativeness = diff
			else: informativeness = 0.
			scores.append( informativeness )
		
		return self.sort_scores(scores)
	#---------------------------------------
	def query_disagreement2(self, weighted = False, op = 1):
		ids, _ = self.query_margin()
		scores = []
		
		commitee = []
		for idp, dp in enumerate(self.Ux):
			if idp in ids[:self.optimize*op]:
				# true_y = self.Uy[idp]
				true_y = self.clf.predict_label(dp)
				
				temp_clf = Classification(self.Lx + [dp], self.Ly + [true_y], method = self.clf.method)
				temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
				commitee.append( (temp_clf, 1) )
		
		for ix, x in enumerate(self.Ux):
			if ix in ids[:self.optimize*op]:
				preds = Counter()
				
				if weighted: # weight using proba distrib of commitee
					for (clf,_) in commitee:
						if self.clf.predict_label(x) != clf.predict_label(x):
							YP = zip( clf.h.classes_, clf.h.predict_proba( x )[0] )
							for (y,p) in YP: preds[y] += p
					
					preds = preds.most_common()
					diff = 0. if preds == [] else preds[0][1]
					# diff = 0. if preds == [] else ( preds[0][1] - preds[1][1] if len(preds)>1 else preds[0][1] )
					
				else:
					# confis = [ clf.getPredictProba(1,x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ]
					labels = [ clf.predict_label(x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ]
					preds = Counter(labels)
					preds = preds.most_common()
					diff = 0. if preds == [] else sum( [pred[1] for pred in preds] )
				
				informativeness = diff
				
			else: informativeness = 0.
			scores.append( informativeness )
		
		return self.sort_scores(scores)
	
	#---------------------------------------
	def query_disagreement3(self):
		id_algo = self.mab2.choose()
		algo = self.mab2.algos[ id_algo ]
		print "Choosen =", algo, "nb_choices =", self.mab2.nb_choices, "mean rew=", [ np.mean(L) for L in self.mab2.rewards ]
		
		if algo == "disag1": ids, scores = self.query_disagreement1(weighted = True)
		if algo == "disag2": ids, scores = self.query_disagreement2()
		
		reward = self.get_change( self.Ux[ids[0]], self.Uy[ids[0]] )
		self.mab2.update(id_algo, reward)
		
		return ids, scores
		
	#---------------------------------------
	def query_balanced_disag1(self, weighted = True, op=1):
		ids, _ = self.query_margin()
		scores = []
		scores_B = []
		
		for ix, x in enumerate(self.Ux):
			if ix in ids[:self.optimize*op]:
				# true_y = self.Uy[ix]
				true_y = self.clf.predict_label(x)
				
				temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method)
				temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
				
				if not weighted:
					diff = sum([ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ])
				else:
					diff = sum([ abs(temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ])
					# diff = sum([ Util.dist(temp_clf.h.predict_proba(dp)[0], self.clf.h.predict_proba(dp)[0]) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ])
				
				balance = self.get_balance(x)
				informativeness = diff
			else:
				informativeness = 0.
				balance = 0.
				
			scores.append( informativeness )
			scores_B.append( balance )
			
		# scores_B = Util.normalize(scores_B)
		scores = [scr*scores_B[iscr] for iscr,scr in enumerate(scores)]
		
		return self.sort_scores(scores)

	#---------------------------------------
	def query_balanced_disag2(self, weighted = True, op=1):
		ids, _ = self.query_margin()
		scores = []
		scores_B = []
		
		commitee = []
		for idp, dp in enumerate(self.Ux):
			if idp in ids[:self.optimize*op]:
				# true_y = self.Uy[idp]
				true_y = self.clf.predict_label(dp)
				
				temp_clf = Classification(self.Lx + [dp], self.Ly + [true_y], method = self.clf.method)
				temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
				commitee.append( (temp_clf, 1) )
		
		for ix, x in enumerate(self.Ux):
			if ix in ids[:self.optimize*op]:
				preds = Counter()
				
				if weighted: # weight using proba distrib of commitee
					for (clf,_) in commitee:
						if self.clf.predict_label(x) != clf.predict_label(x):
							YP = zip( clf.h.classes_, clf.h.predict_proba( x )[0] )
							for (y,p) in YP: preds[y] += p
					
					preds = preds.most_common()
					diff = 0. if preds == [] else preds[0][1]
					# diff = 0. if preds == [] else ( preds[0][1] - preds[1][1] if len(preds)>1 else preds[0][1] )
					
				else:
					# confis = [ clf.getPredictProba(1,x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ]
					labels = [ clf.predict_label(x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ]
					preds = Counter(labels)
					preds = preds.most_common()
					diff = 0. if preds == [] else sum( [pred[1] for pred in preds] )
				
				balance = self.get_balance(x)
				informativeness = diff
				
			else:
				informativeness = 0.
				balance = 0
				
			scores.append( informativeness )
			scores_B.append( balance )
			
		# scores_B = Util.normalize(scores_B)
		scores = [scr*scores_B[iscr] for iscr,scr in enumerate(scores)]
		
		return self.sort_scores(scores)

	#---------------------------------------
	def query_balance(self):
		ids, _ = self.query_margin()
		scores = []
		
		for ix, x in enumerate(self.Ux):
			if ix in ids[:self.optimize*4]:
				informativeness = self.get_balance(x)
				
			else: informativeness = 0.
			scores.append( informativeness )
			
		return self.sort_scores(scores)
		
	#---------------------------------------
	def query_disag1_balanced(self, weighted = True):
		ids, _ = self.query_disagreement1(weighted=weighted, op=2)
		# ids, _ = self.query_margin()
		scores = []
		
		for ix, x in enumerate(self.Ux):
			if ix in ids[:self.optimize/2]:
				informativeness = self.get_balance(x)
				
			else: informativeness = 0.
			scores.append( informativeness )
			
		return self.sort_scores(scores)
		
	#---------------------------------------
	def query_disag2_balanced(self, weighted = True):
		ids, _ = self.query_disagreement2(weighted=weighted, op=2)
		# ids, _ = self.query_margin()
		scores = []
		
		for ix, x in enumerate(self.Ux):
			if ix in ids[:self.optimize/2]:
				informativeness = self.get_balance(x)
				
			else: informativeness = 0.
			scores.append( informativeness )
			
		return self.sort_scores(scores)
		
	#---------------------------------------
	def query_explote_explore(self):
		id_eps = self.mab.choose()
		eps = self.mab.algos[ id_eps ]
		# print "Choosen = ", eps, "Expected = ", sum([ a*l for a,l in zip(self.mab.algos,self.mab.nb_choices) ]) / sum(self.mab.nb_choices)
		
		
		rnd = random.uniform(0., 1.)
		# if rnd > eps: ids, scores = self.query_disagreement1(weighted = False)
		# if rnd > eps: ids, scores = self.query_disagreement1(weighted = True)
		if rnd > eps: ids, scores = self.query_disagreement2()
		
		# else: ids, scores = self.query_balance()
		else: ids, scores = self.query_random()
		
		reward = self.get_change( self.Ux[ids[0]], self.Uy[ids[0]] )
		self.mab.update(id_eps, reward)
		
		return ids, scores

	#---------------------------------------
	#---------------------------------------
	#---------------------------------------
	#---------------------------------------
	#---------------------------------------
	#---------------------------------------
	#---------------------------------------
	def get_disag1(self, x, weighted = False):
		true_y = self.Uy[ self.Ux.index(x) ]
		# true_y = self.clf.predict_label(x)
		
		temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method)
		temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
		
		if not weighted:
			diff = sum([ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ])
		else:
			diff = sum([ 1.-abs(temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ])
		
		informativeness = diff
		return informativeness
	#
	def get_disag2(self, x, commitee, weighted = False):
		preds = Counter()
		
		if weighted: # weight using proba distrib of commitee
			for (clf,_) in commitee:
				if self.clf.predict_label(x) != clf.predict_label(x):
					YP = zip( clf.h.classes_, clf.h.predict_proba( x )[0] )
					for (y,p) in YP: preds[y] += p
					
			preds = preds.most_common()
			diff = 0. if preds == [] else preds[0][1]
		else:
			# confis = [ clf.getPredictProba(1,x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ]
			labels = [ clf.predict_label(x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ]
			preds = Counter(labels)
			
			preds = preds.most_common()
			diff = 0. if preds == [] else sum( [pred[1] for pred in preds] )
		
		informativeness = diff
		return informativeness
	#
	def query_disagreement_test(self):
		ids, _ = self.query_margin()
		scores = []
		plots_Y = []; plots_X0 = []; plots_X1 = []; plots_X2 = []; plots_X3 = []; plots_X4 = []; plots_X5 = []; plots_X6 = []; viz = Visualize()
		
		commitee = []
		for idp, dp in enumerate(self.Ux):
			if idp in ids[:self.optimize]:
				true_y = self.Uy[idp]
				# true_y = self.clf.predict_label(dp)
				
				temp_clf = Classification(self.Lx + [dp], self.Ly + [true_y], method = self.clf.method)
				temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
				commitee.append( (temp_clf, 1) )
				
		# ===========================
		# sampled = random.sample(ids, 100)
		
		for ix, x in enumerate(self.Ux):
			# if ix in sampled:
			if ix in ids[:self.optimize*9999999]:
				informativeness1 = self.get_disag1(x, weighted = False)
				informativeness2 = self.get_disag2(x, commitee, weighted = False)
				informativeness3 = self.get_disag1(x, weighted = True)
				informativeness4 = self.get_disag2(x, commitee, weighted = True)
				informativeness5 = self.clf.uncertainty_prediction(x)
				informativeness6 = self.get_balance(x)
				
				temp_clf = Classification(self.Lx + [x], self.Ly + [self.Uy[ix]], method = self.clf.method)
				temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
				acc = temp_clf.getTestAccuracy( self.Tx, self.Ty )
				
				plots_X0.append( acc )
				plots_X1.append( informativeness1 )
				plots_X2.append( informativeness2 )
				plots_X3.append( informativeness3 )
				plots_X4.append( informativeness4 )
				plots_X5.append( informativeness5 )
				plots_X6.append( informativeness6 )
				plots_Y.append( 'r' if self.Uy[ix] != self.clf.predict_label(x) else 'b' )
				
				fig, axs = plt.subplots( 1, 1, sharex=True )
				axs.scatter( plots_X1, plots_X2, c = plots_Y, marker = "o", cmap = plt.copper() )
				plt.savefig(str(len(self.Lx)) + self.datasetname+'.1-2.png'); plt.close()
				
				fig, axs = plt.subplots( 1, 1, sharex=True )
				axs.scatter( plots_X3, plots_X4, c = plots_Y, marker = "o", cmap = plt.copper() )
				plt.savefig(str(len(self.Lx)) + self.datasetname+'.3-4.png'); plt.close()
				
				fig, axs = plt.subplots( 1, 1, sharex=True )
				axs.scatter( plots_X1, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				plt.savefig(str(len(self.Lx)) + self.datasetname+'.1-acc.png'); plt.close()
				
				fig, axs = plt.subplots( 1, 1, sharex=True )
				axs.scatter( plots_X2, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				plt.savefig(str(len(self.Lx)) + self.datasetname+'.2-acc.png'); plt.close()
				
				fig, axs = plt.subplots( 1, 1, sharex=True )
				axs.scatter( plots_X3, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				plt.savefig(str(len(self.Lx)) + self.datasetname+'.3-acc.png'); plt.close()
				
				fig, axs = plt.subplots( 1, 1, sharex=True )
				axs.scatter( plots_X4, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				plt.savefig(str(len(self.Lx)) + self.datasetname+'.4-acc.png'); plt.close()
				
				fig, axs = plt.subplots( 1, 1, sharex=True )
				axs.scatter( plots_X5, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				plt.savefig(str(len(self.Lx)) + self.datasetname+'.5-acc.png'); plt.close()
				
				fig, axs = plt.subplots( 1, 1, sharex=True )
				axs.scatter( plots_X6, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				plt.savefig(str(len(self.Lx)) + self.datasetname+'.6-acc.png'); plt.close()
				
				# plots = [ plots_X1, plots_X2, plots_X3, plots_X4, plots_X5, plots_X6 ]
				# fig, axs = plt.subplots( 5, 1, sharex=True )
				# axs[0].scatter( Util.normalize(plots_X1), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				# axs[1].scatter( Util.normalize(plots_X2), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				# axs[2].scatter( Util.normalize(plots_X3), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				# axs[3].scatter( Util.normalize(plots_X4), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				# axs[4].scatter( Util.normalize(plots_X5), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				# axs[5].scatter( Util.normalize(plots_X6), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() )
				# plt.savefig(str(len(self.Lx)) + self.datasetname+'.png')
				# plt.close()

				informativeness = acc
			else: informativeness = 0.
			
			scores.append( informativeness )
		
		return self.sort_scores(scores)
	
	#
	def get_balance(self, x):
		# y = self.Uy[ self.Ux.index(x) ]
		y = self.clf.predict_label(x)
		
		temp_clf = Classification(self.Lx + [x], self.Ly + [y], method = self.clf.method)
		temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
		
		cnt = Counter()
		for dp in self.Ux: cnt[ temp_clf.predict_label(dp) ] += 1. / len(self.Ux)
		P = [ cnt[key] for key in cnt ]
		
		informativeness = -1.0 * sum( [ p * math.log(p, len(P)) for p in P if p > 0 ] )
		
		return informativeness
	#
	def get_change(self, x, y = None):
		if y is None:
			y = self.Uy[ self.Ux.index(x) ]
			# y = self.clf.predict_label(x)
		
		temp_clf = Classification(self.Lx + [x], self.Ly + [y], method = self.clf.method)
		temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
		
		v1 = [ self.clf.getPredictProba(1, dp) for dp in self.Ux if x != dp ]
		v2 = [ temp_clf.getPredictProba(1, dp) for dp in self.Ux if x != dp ]
		
		# informativeness = Util.dist(v1, v2)
		informativeness = math.acos( cosine_similarity(v1, v2) ) / math.pi
		
		# v1 = []; v2 = []
		# for dp in self.Ux:
			# if x != dp:
				# v1 += [ v for v in self.clf.h.predict_proba( dp )[0] ]
				# v2 += [ v for v in temp_clf.h.predict_proba( dp )[0] ]
		# informativeness = distance.cosine(v1, v2)
		
		return informativeness
예제 #45
0
                target_path = os.path.join(target_root_path, pred)
                if not os.path.exists(target_path):
                    os.makedirs(target_path)
                target_file_path = os.path.join(target_path, e)
                if os.path.splitext(subdir)[1] == '.jpg':
                    cv2.imencode('.jpg', image)[1].tofile(target_file_path)
                if os.path.splitext(subdir)[1] == '.bmp':
                    cv2.imencode('.bmp', image)[1].tofile(target_file_path)
                if os.path.splitext(subdir)[1] == '.png':
                    cv2.imencode('.png', image)[1].tofile(target_file_path)
        elif os.path.isdir(subdir):  # 如果是路径
            batch_clfmove(clf, subdir, target_root_path)


if __name__ == '__main__':

    # 测试用例
    pb_path = r'F:\models-master\cell\分类\pb/frozen_inception_resnet_v2_inf_graph.pb'
    img_root_path = r'F:\models-master\cell\分类\test_set'

    clf = Classification(pb_path)

    batch_test(clf,
               img_root_path,
               labels_list=[
                   'danhe', 'linba', 'shijian', 'shisuan', 'zhongxing',
                   'broken', 'background'
               ])

    batch_clfmove(clf, img_root_path, r'F:\models-master\cell\分类\1')
예제 #46
0
    COREZMQ_SERVER_FILE = os.path.join(os.getcwd(), 'bin', 'corezmq_server.js')

# Star the server (see bin/corezmq_server.js for more options e.g. for how to pass a pluginConfig)
node_process = subprocess.Popen([
    'node', COREZMQ_SERVER_FILE, PROJECT_NAME, '-p', PORT, '-m', METADATA_PATH
],
                                stdout=sys.stdout,
                                stderr=sys.stderr)

logger.info('Node-process running at PID {0}'.format(node_process.pid))
# Create an instance of WebGME and the plugin
webgme = WebGME(PORT, logger)


def exit_handler():
    logger.info('Cleaning up!')
    webgme.disconnect()
    node_process.send_signal(signal.SIGTERM)


atexit.register(exit_handler)

commit_hash = webgme.project.get_branch_hash(BRANCH_NAME)
plugin = Classification(webgme, commit_hash, BRANCH_NAME, ACTIVE_NODE_PATH,
                        ACTIVE_SELECTION_PATHS, NAMESPACE)

# Do the work
plugin.main()

# The exit_handler will be invoked after this line