def readdata(sourcex_matrix=None, sourcey_matrix=None,targetx_matrix=None, targety_matrix=None,src_path='datasets/syndata_002_normalized_no_novel_class_source_stream.csv', tgt_path='datasets/syndata_002_normalized_no_novel_class_target_stream.csv', src_size=None, tgt_size=None): """ input is: source dataset with y, here we assume it is a list of list, the name is source, target dataset with yhat, here we assume it is a list of list, the name is target """ if sourcex_matrix is None: sourcex_matrix_, sourcey_matrix = Classification.read_csv(src_path, None) # matrix_ is source data else: sourcex_matrix_ = sourcex_matrix sourcey_matrix_ = sourcey_matrix matrix_ = sourcex_matrix_[:src_size, :] if targetx_matrix is None: targetx_ ,targety_= Classification.read_csv(tgt_path, size=None) else: targetx_ = targetx_matrix targety_ = targety_matrix labellist = [] for i in range(0, len(targety_)): if targety_[i] not in labellist: labellist.append(targety_[i]) sourcey_label = [] for i in range(0, len(sourcey_matrix)): sourcey_label.append(labellist.index(sourcey_matrix[i])) for i in range(0, len(targety_)): if targety_[i] not in labellist: labellist.append(targety_[i]) targety_label = [] for i in range(0, len(targety_)): targety_label.append(labellist.index(targety_[i])) return sourcex_matrix_,sourcey_label, targetx_, targety_label
def __init__(self, Lx, Ly, Ux, Uy, Tx, Ty, method = "svm", budget = 1000): self.Lx0 = Lx[:] self.Ly0 = Ly[:] self.Lx = Lx self.Ly = Ly self.Ux = Ux # TODO should not be here self.Uy = Uy # TODO should not be here self.Tx = Tx # TODO should not be here self.Ty = Ty # TODO should not be here self.th = 0.9 self.queried = 0 self.queries = [] self.ths = [] self.infos = [] self.accuracys = [] self.clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ); self.clf.train() self.sup_infos = [] # TODO should not be here self.sup_accuracys = [] # TODO should not be here self.sup_clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ); self.sup_clf.train() # TODO should not be here # self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "UCB", alpha = 1 ) self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "reinforcement", alpha = 1 )
def __init__(self, size, features): self.size = size self.features = features self.test_data, t = make_blobs(n_samples=self.size, n_features=self.features) self.test_tensor_data = np.random.random( (self.size, self.features, self.features)) self.kmeans = Clustering(10) self.svm = Classification() self.gauss = MultivariateGauss() self.tensor = TensorDecomposition()
def query_balanced_disag2(self, weighted = True, op=1): ids, _ = self.query_margin() scores = [] scores_B = [] commitee = [] for idp, dp in enumerate(self.Ux): if idp in ids[:self.optimize*op]: # true_y = self.Uy[idp] true_y = self.clf.predict_label(dp) temp_clf = Classification(self.Lx + [dp], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() commitee.append( (temp_clf, 1) ) for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize*op]: preds = Counter() if weighted: # weight using proba distrib of commitee for (clf,_) in commitee: if self.clf.predict_label(x) != clf.predict_label(x): YP = zip( clf.h.classes_, clf.h.predict_proba( x )[0] ) for (y,p) in YP: preds[y] += p preds = preds.most_common() diff = 0. if preds == [] else preds[0][1] # diff = 0. if preds == [] else ( preds[0][1] - preds[1][1] if len(preds)>1 else preds[0][1] ) else: # confis = [ clf.getPredictProba(1,x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ] labels = [ clf.predict_label(x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ] preds = Counter(labels) preds = preds.most_common() diff = 0. if preds == [] else sum( [pred[1] for pred in preds] ) balance = self.get_balance(x) informativeness = diff else: informativeness = 0. balance = 0 scores.append( informativeness ) scores_B.append( balance ) # scores_B = Util.normalize(scores_B) scores = [scr*scores_B[iscr] for iscr,scr in enumerate(scores)] return self.sort_scores(scores)
def get_max_classification_occurrences(examples): classification = Classification() classifications = dict() for c in classification.get_values(): classifications[c] = 0 for example in examples: classifications[example.get_classification()] += 1 v = list(classifications.values()) k = list(classifications.keys()) return k[v.index(max(v))]
def get_balance(self, x): # y = self.Uy[ self.Ux.index(x) ] y = self.clf.predict_label(x) temp_clf = Classification(self.Lx + [x], self.Ly + [y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() cnt = Counter() for dp in self.Ux: cnt[ temp_clf.predict_label(dp) ] += 1. / len(self.Ux) P = [ cnt[key] for key in cnt ] informativeness = -1.0 * sum( [ p * math.log(p, len(P)) for p in P if p > 0 ] ) return informativeness
def __init__(self, webiscorpus, train_samples=1000, noQueryTerms=15, use_ner=True, useTFIDF=True, use_noun=True, use_verb=True, use_adj=True, useHandwrittenAsGold=False, useContext=False): self.train_samples = train_samples self.model_name = 'clf-model-ner' + str(use_ner) + '-use-handwritten-as-gold' + str(useHandwrittenAsGold) + '-useContext' + str(useContext) + '-noun' + str(use_noun) + '-verb' + str(use_verb) + '-adj' + str(use_adj) + '-' + str(self.train_samples) + \ '-QueryTerms' + str(noQueryTerms) + '5context' self.train_samples = train_samples self.classification = Classification(use_ner=use_ner, train_samples=train_samples, use_noun=use_noun, use_verb=use_verb, use_adj=use_adj, useTFIDF=useTFIDF, useContext=useContext) self.silver_dict = Utils.load_from_pickle( # 'id-terms-in-common-no-stopwords-and-common-words-automatic-doc-lucene-dict.p') 'queries-silver.p') self.noQueryTerms = noQueryTerms self.training_item_generator_func = webiscorpus.corpus_gen_non_white_listed if useHandwrittenAsGold: self.training_item_generator_func = webiscorpus.corpus_gen_white_listed self.silver_dict = Utils.load_from_pickle('queries-handwritten.p') super(ClassifierExperminet, self).__init__(self.model_name, webiscorpus, mini_index=False)
def __init__(self): cl = Classification() self.x_train_std = cl.x_train_std self.x_test_std = cl.x_test_std self.y_train = cl.y_train self.y_test = cl.y_test pass
def get_entropy(examples): classifications = list() classification_occurrences = list() classif = Classification() entropy = 0 if len(examples) != 0: for example in examples: classifications.append(example.get_classification()) for classification in classif.get_values(): classification_occurrences.append(classifications.count(classification)) # print(str(classification_occurrences)) for class_occur in classification_occurrences: probability = class_occur / len(examples) if probability != 0: entropy = entropy + (probability * math.log2(probability)) return -entropy
def OpenFile(self): self.CurrentFile = filedialog.askopenfilename( initialdir=r'C:\Users\user\Desktop\CV', title="Select file", filetypes=(("jpeg files", "*.jpg"), ("all files", "*.*"))) if self.CurrentFile: x = os.path.basename(self.CurrentFile) self.main.after(5, self.Filelabel.config(text='檔案 :' + x)) self.ic = ImgClass(x) im = Image.open(x) if (im.size[0] > 600): im = im.resize((600, im.size[1])) if (im.size[1] > 450): im = im.resize((im.size[0], 450)) self.sizeX = im.size[0] self.sizeY = im.size[1] self.CanvaLabel.configure(text=str(self.sizeX) + 'X' + str(self.sizeY)) img = ImageTk.PhotoImage(image=im) self.ImgCanva.create_image(0, 0, image=img, anchor=NW) self.ImgCanva.image = img self.classfy = Classification(self.ic, self) self.program = Mainsys(self.ic, self)
def main(MalDir, BenDir, FeatureCombination='111'): ''' :param String MalDir: :param String BenDir: :param String FeatureCombination: ''' # 1 - Initializing createdir('Metadata' + os.path.sep + 'Malware') createdir('Metadata' + os.path.sep + 'Benign') createdir('Data' + os.path.sep + 'Malware') createdir('Data' + os.path.sep + 'Benign') # 2 - Extracting flow features from apps pool = mp.Pool(NumFlowProcesses) for file in walk(MalDir, '.apk'): pool.apply_async(ExtractFlowFeatures, args=(file, 'Malware', FeatureCombination)) for file in walk(BenDir, '.apk'): pool.apply_async(ExtractFlowFeatures, args=(file, 'Benign', FeatureCombination)) pool.close() pool.join() # 3 - Classifying Classification('Data' + os.path.sep + 'Malware', 'Data' + os.path.sep + 'Benign', 0.3) # 4. Remove feature files after classification for file in walk('Data', '.Features'): os.system('rm -rf ' + file)
def run(): logging.getLogger().setLevel(logging.WARNING) d = Dataset() #d.use_images_in_folder("/home/simon/Datasets/ImageNet_Natural/images/") #d.use_images_in_folder("/home/simon/Datasets/ICAO_german/") d.use_images_in_folder("/home/simon/Datasets/desko_ids/images_unique/") #d.use_images_in_folder("/home/simon/Datasets/croatianFishDataset-final/") #d.use_images_in_folder("/home/jaeger/data/croatianFishDataset1-5Dir/") d.create_labels_from_path() d.fill_split_assignments(1) #d.read_from_file("/home/simon/Datasets/CUB_200_2011/cropped_scaled_alex.txt","imagepaths","string") #d.read_from_file("/home/simon/Datasets/CUB_200_2011/tr_ID.txt","split_assignments","int") #d.read_from_file("/home/simon/Datasets/CUB_200_2011/labels.txt","labels","int") c = Classification() c.add_algorithm(Resize(512, 320)) # #c.add_algorithm(Noise('saltpepper',0.1)) p = ParallelAlgorithm() # p1 = AlgorithmPipeline() p1.add_algorithm(HOG()) p1.add_algorithm(SpatialPyramid()) # #p1.add_algorithm(MinMaxNormalize()) p1.add_algorithm(NormNormalize()) p.add_pipeline(p1) p2 = AlgorithmPipeline() p2.add_algorithm(Resize(64, 32)) p2.add_algorithm(Colorname()) p2.add_algorithm(SpatialPyramid()) p2.add_algorithm(NormNormalize()) # #p2.add_algorithm(MinMaxNormalize()) p.add_pipeline(p2) c.add_algorithm(p) # #c.add_algorithm(MinMaxNormalize()) #c.add_algorithm(NormNormalize()) # c.add_algorithm(MeanCalculator()) #c.add_algorithm(Resize(32,24)) c.add_algorithm(MulticlassSVM()) # #c.train(d) # #for path, gt_label in zip(d.imagepaths, d.labels): # # logging.info("Predicted class for " + path + " is " + str(c.predict(path).data[0]) + " (GT: " + str(gt_label) + ")") ## Caffe features #c.add_algorithm(Caffe("","","fc7")) #c.add_algorithm(MulticlassSVM()) #with open('run_evaluation.py', 'r') as fin: # print(fin.read()) mean_acc, mean_mAP = Evaluation.random_split_eval( d, c, absolute_train_per_class=1, runs=1) #mean_acc,mean_mAP = Evaluation.fixed_split_eval(d,c) logging.warning("Total accuracy is " + str(mean_acc)) logging.warning("Total mAP is " + str(mean_mAP))
def test_Classification_dtype(): """ Test that the initialization of a Classification class throws a type error for things that are not pandas dataframes """ some = "A wrong data type of type string" with pytest.raises(TypeError): Classification(some)
class ClassifierExperminet(Experiment): def __init__(self, webiscorpus, train_samples=1000, noQueryTerms=15, use_ner=True, useTFIDF=True, use_noun=True, use_verb=True, use_adj=True, useHandwrittenAsGold=False, useContext=False): self.train_samples = train_samples self.model_name = 'clf-model-ner' + str(use_ner) + '-use-handwritten-as-gold' + str(useHandwrittenAsGold) + '-useContext' + str(useContext) + '-noun' + str(use_noun) + '-verb' + str(use_verb) + '-adj' + str(use_adj) + '-' + str(self.train_samples) + \ '-QueryTerms' + str(noQueryTerms) + '5context' self.train_samples = train_samples self.classification = Classification(use_ner=use_ner, train_samples=train_samples, use_noun=use_noun, use_verb=use_verb, use_adj=use_adj, useTFIDF=useTFIDF, useContext=useContext) self.silver_dict = Utils.load_from_pickle( # 'id-terms-in-common-no-stopwords-and-common-words-automatic-doc-lucene-dict.p') 'queries-silver.p') self.noQueryTerms = noQueryTerms self.training_item_generator_func = webiscorpus.corpus_gen_non_white_listed if useHandwrittenAsGold: self.training_item_generator_func = webiscorpus.corpus_gen_white_listed self.silver_dict = Utils.load_from_pickle('queries-handwritten.p') super(ClassifierExperminet, self).__init__(self.model_name, webiscorpus, mini_index=False) @timing_decorator def train_model(self): """training classifier""" for i, item in enumerate(self.training_item_generator_func()): # if i > 0 and i % 100 == 0: # print("accuracy at {} is {}".format(i, self.classification.getAccuracy())) if i > self.train_samples: break self.classification.process_query(item['Subject'], item['Content'], self.silver_dict[item['Id']]) self.classification.train() self.classification.save_model(self.model_name) def get_query_per_item(self, item): """building dictionaries for item""" full_terms = self.classification.process_query(item['Subject'], item['Content'], self.silver_dict[item['Id']]) result = self.classification.predict().tolist() query_terms = [] while len(query_terms) < self.noQueryTerms and len(result): picked_word_index = result.index(max(result)) picked_word = full_terms[picked_word_index] result.pop(picked_word_index) full_terms.pop(picked_word_index) if picked_word not in query_terms: query_terms.append(picked_word) query_terms = ' '.join(set([term for term in query_terms if term not in anserini.stopwords_temp])) # print('classifier:', query_terms) # print('silver:', self.silver_dict[item['Id']]) return query_terms def run(self): # try: # self.classification.load_model(self.model_name) # except: self.train_model() self.build_queries() self.search_queries() _, mrr, _, _ = calculate_mrr(self.result_pickle_name, self.white_list) self.mrr = float(mrr)
def main(Maldir, Gooddir, NumofProcesses, FeatureCombination='1111'): ''' :param String Maldir: :param String Gooddir: :param String NumofProcesses: :param String FeatureOption: Combination options of CAPI, IA, PAPI and FLOW, 4 chars of '0'/'1'(ex/include) Default option is '1111' ''' # 1. get features of API, IA, PAPI using Apktool. createdir('Metadata' + os.path.sep + 'Malware') createdir('Metadata' + os.path.sep + 'Benign') createdir('Data' + os.path.sep + 'Malware') createdir('Data' + os.path.sep + 'Benign') pool = mp.Pool(int(NumofProcesses)) for file in walk(Maldir, '.apk'): pool.apply_async(FeatureExtraction, args=(file, 'Malware', FeatureCombination)) for file in walk(Gooddir, '.apk'): pool.apply_async(FeatureExtraction, args=(file, 'Benign', FeatureCombination)) pool.close() pool.join() # 2. get features of FLOW, using FlowDroid if FeatureCombination[1] == '1': pool = mp.Pool(int(NumFlowProcesses)) for file in walk(Maldir, '.apk'): pool.apply_async(ExtractFlowFeatures, args=(file, 'Malware')) for file in walk(Gooddir, '.apk'): pool.apply_async(ExtractFlowFeatures, args=(file, 'Benign')) pool.close() pool.join() for file in walk('Data', '.Features'): with open(file, 'a') as f_a: if os.path.exists(file.replace('.Features', 'FlowFeature.json')): with open(file.replace('.Features', 'FlowFeature.json'), 'r') as f: FlowdroidLog = f.readlines() for line in FlowdroidLog: print >> f_a, line f.close() f_a.close() else: os.system('rm -rf ' + file) # 3. classification Classification('Data' + os.path.sep + 'Malware', 'Data' + os.path.sep + 'Benign', 0.3) # 4. remove feature files for file in walk('Data', '.Features'): os.system('rm -rf ' + file)
def start_an(self, data): cls = Classification(data, ty='datacleaning') worker_cl = WorkerLong(cls.classify, 'datacleaning', int(self.monte), self.new_path_model) worker_cl.signals.result.connect(self.print_output) worker_cl.signals.progress.connect(self.progress_fn) worker_cl.signals.progress.connect(self.prog_monte.setValue) worker_cl.signals.finished.connect(self.thread_complete) self.threadPool.start(worker_cl)
def Url2ic(self): if self.url_src.get(): url = self.url_src.get() cap = cv2.VideoCapture(url) if (cap.isOpened()): ret, img = cap.read() if ret: self.ic = ImgClass('') self.ic.FromUrlSrc(img) self.classfy = Classification(self.ic, self) self.program = Mainsys(self.ic, self) ip.CvShow('N', img)
def start_from_terminal(app): """ Parse command line options and start the server. """ parser = optparse.OptionParser() parser.add_option( '-d', '--debug', help="enable debug mode", action="store_true", default=False) parser.add_option( '-p', '--port', help="which port to serve content on", type='int', default=5000) parser.add_option( '-g', '--gpu', help="use gpu mode", action='store_true', default=False) opts, args = parser.parse_args() app.clf = Classification() app.face = FaceVerification() app.od = ObjectDetection() app.caption = ImgCaption() # cv2.imshow("luke",img) # cv2.waitKey(0) # img1 = "luke1.jpg" # img2 = "luke2.jpg" # Same,drawImg1,drawImg2=app.face.verification(img1,img2) # if Same: # print "same" # else: # print "different" # drawImg = np.concatenate((drawImg1,drawImg2),axis=1) # cv2.imshow("face",drawImg) # cv2.waitKey(3000) # Initialize classifier + warm start by forward for allocation # with Manager() as manager: # ret = manager.dict() # p = Process(target=app.clf.classify_image,args=("test.jpg","jpg",ret)) # p.start() # p.join() # print ret #warm up # for i in range(2): # app.clf.classify_image("test.jpg",'jpg') if opts.debug: app.run(debug=True, host='0.0.0.0', port=opts.port) else: start_tornado(app, opts.port)
def query_eer(self, limit_Y = 20): ids, _ = self.query_margin() scores = [] for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize]: YP = self.clf.predict(x, all = True) YP.sort(key=operator.itemgetter(1), reverse=True) sums = 0. for ir, (yy, proba) in enumerate(YP): if ir == limit_Y: break temp_clf = Classification(self.Lx + [x], self.Ly + [yy], method = self.clf.method); temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C # TODO FIXME: do it in general not specifically for svm temp_clf.train() e_h1 = sum( [ temp_clf.uncertainty_entropy(dp) for dp in self.Ux if dp != x ] ) sums += (proba) * e_h1 informativeness = 1. / sums else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores)
def query_balanced_disag1(self, weighted = True, op=1): ids, _ = self.query_margin() scores = [] scores_B = [] for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize*op]: # true_y = self.Uy[ix] true_y = self.clf.predict_label(x) temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() if not weighted: diff = sum([ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) else: diff = sum([ abs(temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) # diff = sum([ Util.dist(temp_clf.h.predict_proba(dp)[0], self.clf.h.predict_proba(dp)[0]) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) balance = self.get_balance(x) informativeness = diff else: informativeness = 0. balance = 0. scores.append( informativeness ) scores_B.append( balance ) # scores_B = Util.normalize(scores_B) scores = [scr*scores_B[iscr] for iscr,scr in enumerate(scores)] return self.sort_scores(scores)
def get_change(self, x, y = None): if y is None: y = self.Uy[ self.Ux.index(x) ] # y = self.clf.predict_label(x) temp_clf = Classification(self.Lx + [x], self.Ly + [y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() v1 = [ self.clf.getPredictProba(1, dp) for dp in self.Ux if x != dp ] v2 = [ temp_clf.getPredictProba(1, dp) for dp in self.Ux if x != dp ] # informativeness = Util.dist(v1, v2) informativeness = math.acos( cosine_similarity(v1, v2) ) / math.pi # v1 = []; v2 = [] # for dp in self.Ux: # if x != dp: # v1 += [ v for v in self.clf.h.predict_proba( dp )[0] ] # v2 += [ v for v in temp_clf.h.predict_proba( dp )[0] ] # informativeness = distance.cosine(v1, v2) return informativeness
def __init__(self, classifier_pattern=config.CLASSIFIER_PATTERN, img_content=''): if classifier_pattern == 'static': self.classifier = Classification(config.IMG_PATH) else: self.classifier = Classification(config.IMG_PATH, img_content=img_content) self.search_engine = Search() self.camera = Camera() self.speaker = None self.led_device = None self.rotate = None self.classify_result = None self.garbage_tag = None self.try_time = 0
def main(): array_data = {} path_data_uji = './Data/Data latih' array_kelas = [] for root, dirs, files in os.walk(path_data_uji, topdown=False): for name in files: if name.endswith('.txt'): get_root = root.split('/') if (len(get_root) == 4): array_kelas.append(get_root[3]) key = root.replace("./", "") path_file = os.path.join(key, name) data_docs = open(path_file, 'r', encoding="ISO-8859-1").read() if key in array_data: temp = array_data[key] temp.append(data_docs) array_data[key] = temp else: array_data[key] = [data_docs] array_document = [data for value in array_data.values() for data in value] path_data_uji = './Data/Data uji' print(len(array_document)) array_data_testing = {} array_kelas_testing = [] for root, dirs, files in os.walk(path_data_uji, topdown=False): for name in files: if name.endswith('.txt'): get_root = root.split('/') if (len(get_root) == 4): array_kelas_testing.append(get_root[3]) key = root.replace("./", "") path_file = os.path.join(key, name) data_docs = open(path_file, 'r', encoding="ISO-8859-1").read() if key in array_data_testing: temp = array_data_testing[key] temp.append(data_docs) array_data_testing[key] = temp else: array_data_testing[key] = [data_docs] array_document_testing = [data for value in array_data_testing.values() for data in value] document =['Sekarang saya sedang suka memasak. Masakan kesukaan saya sekarang adalah nasi goreng. Cara memasak nasi goreng adalah nasi digoreng','Ukuran nasi sangatlah kecil, namun saya selalu makan nasi','Nasi berasal dari beras yang ditanam di sawah. Sawah berukuran kecil hanya bisa ditanami sedikit beras','Mobil dan bus dapat mengangkut banyak penumpang. Namun, bus berukuran jauh lebih besar dari mobil, apalagi mobil-mobilan','Bus pada umumnya berukuran besar dan berpenumpang banyak, sehingga bus tidak bisa melewati pemukiman','mobil formula satu melaju kencang di dalam balapan, max verstapen memenangkan gp brazil kemarin, namun sayang ke 2 kuda merah terpaksa gagal finish karena bertberakan satus sama lain','piere gasly memenangkan ajang perlombaan balap yang digelar di brazil kemarin.Mobil yang dikendarainya melaju kencang, namun sayang ke 2 ferari gagal finish karena bertaberakan satu sama lain.mobil melaju cepat, licah gesti dan tak terkalahkan','terjadi kelangkaan beras di dalam Indonesia sehingga harus mengimpor beras dari thailand.Padahal lahan sawah di Indonesia banyak',' bus buatan scania sukses dipasar menjadi bus terbesar di jagat raya.Bus ini dapat dinaiki oleh banyak penumpan.Dan lebih besar dari pada mobil'] kelas =['A', 'A', 'C', 'B', 'B','D','D','C','B'] klasifikasi= Classification() document_uji = ['nasi goreng pedas','nasi goreng enak sekali','mobil gasly memang sudah cepat','bus ini diluncurkan di Indonesia'] print(array_document_testing) kelas_uji=['A','A','D','C'] klasifikasi.train(array_document, array_kelas) hasil = klasifikasi.testing(array_document_testing) klasifikasi.hitung_akurasi(hasil,array_kelas_testing)
class Execute: def __init__(self): self.classification = Classification() self.extraction = Extraction() self.elastic = Elastic() self.z = Database() def execute(self, query): try: json_data = json.loads(query) prediction = self.classification.predict(json_data['query']) results = self.extraction.processQuery(prediction[0][0], json_data['query']) if results[0] == 0.0: status = self.elastic.process0(results) return status if results[0] == 1.0: houses = self.elastic.process1(results) return houses elif results[0] == 2.0: houses = self.elastic.process2(results) return houses elif results[0] == 3.0: houses = self.elastic.process3(results) return houses elif results[0] == 4.0: houses = self.elastic.process4(results) return houses elif results[0] == 5.0: houses = self.elastic.process5(results) return houses elif results[0] == 6.0: houses = self.elastic.process6(results) return houses elif results[0] == 7.0: houses = self.elastic.process7(results) return houses elif results[0] == 8.0: status = self.elastic.process8(results) return status elif results[0] == 9.0: status = self.elastic.process9(results) return status else: return "query type " + str(results[0]) + "not supported" except Exception as ex: print(str(ex))
def __init__(self, Lx, Ly, Ux, Uy, Tx, Ty, method = "svm", budget = 1000): self.Lx = Lx self.Ly = Ly self.Ux = Ux self.Uy = Uy # TODO should not be here self.Tx = Tx # TODO should not be here self.Ty = Ty # TODO should not be here self.optimization_limit = 20 self.optimization_method = "margin" # margin proba entropy random weight expectedErrorReduction etc self.budget = budget self.accuracys = [] self.clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ) self.clf.train() self.viz_A = []; self.viz_B = []; self.viz_C = []; self.viz_D = []; self.viz_E = []; self.viz_F = []
def get_disag1(self, x, weighted = False): true_y = self.Uy[ self.Ux.index(x) ] # true_y = self.clf.predict_label(x) temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() if not weighted: diff = sum([ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) else: diff = sum([ 1.-abs(temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) informativeness = diff return informativeness
class CPUImpl: def __init__(self, size, features): self.size = size self.features = features self.test_data, t = make_blobs(n_samples=self.size, n_features=self.features) self.test_tensor_data = np.random.random( (self.size, self.features, self.features)) self.kmeans = Clustering(10) self.svm = Classification() self.gauss = MultivariateGauss() self.tensor = TensorDecomposition() def evaluate(self): print("Data set: %s samples" % self.size) print("Features: %s" % self.features) print("======") print("KMeans: %s s" % self.kmeans.evaluate(self.test_data)) print("OneClassSVM: %s s" % self.svm.evaluate(self.test_data, "svm")) print("Gauss: %s s" % self.gauss.evaluate(self.test_data)) print("Parafac: %s s" % self.tensor.evaluate(self.test_tensor_data))
def start_an(self): if not os.path.exists(self.res_path): cls = Classification(self.path_work, ty='analysis') self.progress(title='Analysis') worker_cl = WorkerLong(cls.classify, self.type_an, self.monte_c, self.model_name) worker_cl.signals.progress.connect(self.progress_fn) worker_cl.signals.progress.connect(self.ui.onCountChanged) worker_cl.signals.finished.connect(self.thread_cl_complete) self.threadPool.start(worker_cl) else: self.view('Pred_class', 'result') print('process an already done!!!') self.v_no_overlayAct.setEnabled(True) self.v_all_classAct.setEnabled(True) self.v_acAct.setEnabled(True) self.v_adAct.setEnabled(True) self.v_hAct.setEnabled(True) self.v_tot_uAct.setEnabled(True) self.v_a_uAct.setEnabled(True) self.v_e_uAct.setEnabled(True)
def generateNewModelRULSIF(self, trgx_matrix, srcx_matrix, srcy_matrix, alpha, sigma_list, lambda_list, b, fold, subsize): model = Model() if len(srcx_matrix) == 0 or len(trgx_matrix) == 0: raise Exception( 'Source or Target stream should have some elements') # Create new model print('Target model creation') model.model = Classification.get_model(trgx_matrix, srcx_matrix, srcy_matrix, alpha, sigma_list, lambda_list, b, fold, subsize) # compute source and target weight print('Computing model weights') model.weight = model.computeModelWeightRULSIF(trgx_matrix) # update ensemble index = self.__addModelRULSIF(model, trgx_matrix) if index != -1: print('Ensemble updated at ' + str(index))
def __init__(self, Lx, Ly, Ux, Uy, Tx, Ty, method = "svm", budget = 251, optimize = 50, datasetname="dataset"): self.datasetname = datasetname self.Lx = Lx self.Ly = Ly self.Ux = Ux self.Uy = Uy # TODO should not be here self.Tx = Tx # TODO should not be here self.Ty = Ty # TODO should not be here self.optimize = optimize self.budget = budget self.accuracys = [] self.clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ) self.clf.train() self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "boltzmann" ) # self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "UCB" ) # self.mab2 = Bandit( algos = ["disag1", "disag2"], method = "boltzmann" ) self.mab2 = Bandit( algos = ["disag1", "disag2"], method = "reinforcement" )
def start(self, cata, item_i): for i in range(1, 101, 1): self.progress['value'] = i self.update_idletasks() self.label1.config(text=str(i) + "%") time.sleep(0.015) self.progress['value'] = 100 item_info = "" if cata == "plants": if item_i == "Apple": item_info = "icon_image/trree_ap.png@Detect Apple plant disease@models/apple_plant_Model.p@#E40000" elif item_i == "Corn": item_info = "icon_image/corn_plant2.png@Detect Corn plant disease@models/corn_plant_Model.p@#22A61E" elif item_i == "Grape": item_info = "icon_image/grape_plant.JPG@Detect Grape plant disease@models/grape_plant_Model.p@#678623" elif item_i == "Cherry": item_info = "icon_image/cherry_plant.jpg@Detect Cherry Plant disease@models/cherry_plant_Model.p@#265909" elif item_i == "Pepper": item_info = "icon_image/peeper_plant.jpg@Detect Pepper plant disease@models/pepper_plant_Model.p@#b70000" elif item_i == "Potato": item_info = "icon_image/potato_plant1.png@Detect Potato plant disease@models/potato_plant_Model.p@#a67d00" elif item_i == "Tomato": item_info = "icon_image/tomato_plant.jpg@Detect Tomato plant disease@models/tomato_plant_Model.p@#620D0D" elif cata == 'fruits': if item_i == "Apple": item_info = "icon_image/apple.jpg@Detect Apple Fruit disease@models/apple_fruit_Model.p@#E40000" elif item_i == "Banana": item_info = "icon_image/banana.jpg@Detect Banana Fruit disease@models/banana_fruit_Model.p@#ffef00" elif item_i == 'Orange': item_info = "icon_image/orange.jpg@Detect Orange Fruit disease@models/orange_fruit_Model.p@#f76a04" classification_page = Classification(item_info) self.destroy()
def query_disagreement_test(self): ids, _ = self.query_margin() scores = [] plots_Y = []; plots_X0 = []; plots_X1 = []; plots_X2 = []; plots_X3 = []; plots_X4 = []; plots_X5 = []; plots_X6 = []; viz = Visualize() commitee = [] for idp, dp in enumerate(self.Ux): if idp in ids[:self.optimize]: true_y = self.Uy[idp] # true_y = self.clf.predict_label(dp) temp_clf = Classification(self.Lx + [dp], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() commitee.append( (temp_clf, 1) ) # =========================== # sampled = random.sample(ids, 100) for ix, x in enumerate(self.Ux): # if ix in sampled: if ix in ids[:self.optimize*9999999]: informativeness1 = self.get_disag1(x, weighted = False) informativeness2 = self.get_disag2(x, commitee, weighted = False) informativeness3 = self.get_disag1(x, weighted = True) informativeness4 = self.get_disag2(x, commitee, weighted = True) informativeness5 = self.clf.uncertainty_prediction(x) informativeness6 = self.get_balance(x) temp_clf = Classification(self.Lx + [x], self.Ly + [self.Uy[ix]], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() acc = temp_clf.getTestAccuracy( self.Tx, self.Ty ) plots_X0.append( acc ) plots_X1.append( informativeness1 ) plots_X2.append( informativeness2 ) plots_X3.append( informativeness3 ) plots_X4.append( informativeness4 ) plots_X5.append( informativeness5 ) plots_X6.append( informativeness6 ) plots_Y.append( 'r' if self.Uy[ix] != self.clf.predict_label(x) else 'b' ) fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X1, plots_X2, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.1-2.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X3, plots_X4, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.3-4.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X1, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.1-acc.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X2, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.2-acc.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X3, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.3-acc.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X4, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.4-acc.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X5, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.5-acc.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X6, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.6-acc.png'); plt.close() # plots = [ plots_X1, plots_X2, plots_X3, plots_X4, plots_X5, plots_X6 ] # fig, axs = plt.subplots( 5, 1, sharex=True ) # axs[0].scatter( Util.normalize(plots_X1), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # axs[1].scatter( Util.normalize(plots_X2), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # axs[2].scatter( Util.normalize(plots_X3), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # axs[3].scatter( Util.normalize(plots_X4), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # axs[4].scatter( Util.normalize(plots_X5), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # axs[5].scatter( Util.normalize(plots_X6), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # plt.savefig(str(len(self.Lx)) + self.datasetname+'.png') # plt.close() informativeness = acc else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores)
def main(): # Global control parameters, used for debugging, documentation etc... showAndSaveImagesFlag = False # However the classified featureplot and final classification is still showed... normalization = True # Showing normalization data vizualize = True saveImagePath = "/home/christian/workspace_python/MasterThesis/FinalProject/writefiles/" # Initialize the Input component with cameraIndex = 0 (webcamera inbuilt in PC) # Input: Plug and play webcamera # Output: RGB image, training data and testing data i = Input(0) # Initialize the Preprocessing component with the training data1, 2, 3 p1 = Preprocessing(i.trainingData1, 1, saveImagePath) p2 = Preprocessing(i.trainingData2, 2, saveImagePath) p3 = Preprocessing(i.trainingData3, 3, saveImagePath) # Initializing the Segmentation component with 3 clases. # Using global HSV setting s1 = Segmentation(i.trainingData1, p1.imgFrontGround, p1.imgSeedandSproutRepaired, p1.imgSproutRepaired, 1, saveImagePath) s2 = Segmentation(i.trainingData2, p2.imgFrontGround, p2.imgSeedandSproutRepaired, p2.imgSproutRepaired, 2, saveImagePath) s3 = Segmentation(i.trainingData3, p3.imgFrontGround, p3.imgSeedandSproutRepaired, p3.imgSproutRepaired, 3, saveImagePath) # Choise which feature to use: # featureCenterOfMassList, # feature 0 # featureLengthList, # feature 1 # featureWidthList, # feature 2 # featureRatioList, # feature 3 # featureNumberOfSproutPixelsList, # feature 4 # featureHueMeanList, # feature 5 # featureHueStdList, # feature 6 # featureClassStampList # feature 7 featureIndexX = 3 featureIndexY = 4 # Initialize the clasification component for the training data c = Classification(s1.listOfFeatures, s2.listOfFeatures, s3.listOfFeatures, featureIndexX, featureIndexY, vizualize, saveImagePath, normalization) # Initialize the Output component o = Output() # At this point, the whole system has been taught with supervised learning. # Training data has been loaded, preprocessed, segmented, feature extracted and classified. # From here, the testing data is loaded by using the webcam, where each seed will be preprocessed, segmented and classified # based on what how the line of seperation lies. userCloseDown = False TrackBarInit(i) if False: ShowAndSaveTrainingFigures(i, p1, p2, p3, s1, s2, s3, saveImagePath) # while i.cameraIsOpen: # To avoid beiing depended on the camera or not, we just say the camera is always open. # We use still images anayway at the moment... while True: # print "Camera is open..." # If the user has not pushed the start button. TrackBarStart(i) # if user wants to close down the program, we do it.. if userCloseDown: break ############################################################# # After the training we run in this while loop... ############################################################# # Clear the trackbar setting window, since we only want to look at the final classification image DestroyWindows() # Input from webcamera - Testing data # imgInput = i.getCroppedImg() imgInput = i.testingData # Using a still test image, when the real USB camera is not available # The input image is processed through each component as followed, with class 0, since it is unknow which class the # test image belogns to... p = Preprocessing(imgInput, 0, saveImagePath) # The FrontGround image and SeedAndSprout image is used in the segmentation component # s = Segmentation(imgInput, p.imgFrontGround, p.imgSeedAndSprout, p.imgSprout, 0) s = Segmentation(imgInput, p.imgFrontGround, p.imgSeedandSproutRepaired, p.imgSproutRepaired, 0, saveImagePath) if vizualize: ShowFeaturePlotClass0(featureIndexX, featureIndexY, s, c, saveImagePath, normalization) featureClass1ListX, \ featureClass1ListY, \ centerClass1List, \ featureClass2ListX, \ featureClass2ListY, \ centerClass2List, \ featureClass3ListX, \ featureClass3ListY, \ centerClass3List = c.getClassifiedLists3classes(s.listOfFeatures[featureIndexX], s.listOfFeatures[featureIndexY], s.listOfFeatures[0], imgInput) if vizualize: ShowFeaturePlotClass0Classified(featureClass1ListX, featureClass2ListX, featureClass3ListX, featureClass1ListY, featureClass2ListY, featureClass3ListY,c, saveImagePath) ############################################# # Finally we show the result ############################################ cv2.imshow("The final classification", c.imgClassified) cv2.imwrite(saveImagePath + "imgClassified.png", c.imgClassified) # If the user push "ESC" the program close down. k = cv2.waitKey(30) & 0xff if k == 27: userCloseDown = True # i.closeDown() break if userCloseDown: print "User closed the program..." else: print "The camera is not open.... "
def train(self, mtd = "margin", backupfile = "backupfile.txt"): for i, x in enumerate(self.Ux): y1 = self.clf.predict_label(x) if mtd == "supervised": informativeness = sys.float_info.max if mtd == "margin": informativeness = self.clf.uncertainty_margin(x) # =============================== id_th = self.mab.choose() self.th = self.mab.algos[ id_th ] print "Choosen =", self.th, "nb_choices =", self.mab.nb_choices, ("avg rwd=", [ np.mean(L) for L in self.mab.rewards ] if self.mab.rewards[0]!=[] else " "), "expected=", sum([ a*l for a,l in zip(self.mab.algos,self.mab.nb_choices) ]) / sum(self.mab.nb_choices) prev_clf = Classification(self.Lx, self.Ly, method = self.clf.method) prev_clf.GAMMA, prev_clf.C = self.clf.GAMMA, self.clf.C; prev_clf.train() # =============================== # avg_rewards = [ np.mean(L[:-20]) if len(L)>0 else 1. for L in self.mab.rewards ] # self.th = sum([ a*l for a,l in zip(self.mab.algos,avg_rewards) ]) / sum(avg_rewards) # print "Choosen =", self.th, "avg rwd=", avg_rewards # =============================== if informativeness > self.th: qx = x qy = self.Uy[i] self.Lx.append(qx) self.Ly.append(qy) self.queried += 1 self.clf.X = self.Lx; self.clf.Y = self.Ly; self.clf.train() # =============================== reward = 1. - abs( 0.1 - self.queried / (i+1.) ) self.mab.update(id_th, reward) # =============================== # for idt in range(len(self.mab.algos)): # reward = 1. - abs( 0.3 - (self.queried-1+1) / (i+1.) ) if informativeness > self.mab.algos[idt] else 1. - abs( 0.4 - (self.queried-1) / (i+1.) ) # self.mab.update(idt, reward) # =============================== self.ths.append( self.th ) self.infos.append( informativeness ) self.accuracys.append( self.clf.getTestAccuracy( self.Tx, self.Ty ) ) self.queries.append( self.queried ) self.sup_infos.append( self.sup_clf.uncertainty_margin(x) ) # TODO should not be here self.sup_clf.X = self.Lx0+self.Ux[:i+1]; self.sup_clf.Y = self.Ly0+self.Uy[:i+1]; self.sup_clf.train() # TODO should not be here self.sup_accuracys.append( self.sup_clf.getTestAccuracy( self.Tx, self.Ty ) ) # TODO should not be here ''' if i>10: # last_infos = self.infos[-100:] if len(self.infos) > 100 else self.infos[:] # self.th = np.mean( last_infos ) if informativeness > self.th: # queried if y1 == qy: # but was correctly predicted self.th = self.th + 0.1 * (informativeness - self.th) else: if y1 != qy: self.th = self.th - 0.1 * (self.th - informativeness ) ''' print "i=", i+1, self.queried, self.queried / (i+1.), "-- acc=%.4f"%(self.accuracys[-1]*100), "%.4f"%(self.sup_accuracys[-1]*100), "-- %.4f"%(np.mean(self.accuracys)*100), "%.4f"%(np.average(self.accuracys, weights = range(1,1+len(self.accuracys)))*100), "--", informativeness if (i+1)%10 == 0: Util.pickleSave(backupfile, self); viz = Visualize() viz.do_plot( [range(len(self.infos)), self.ths], color = 'b', marker = '-' ) viz.do_plot( [range(len(self.infos)), self.infos], color = 'r', marker = '-' ) viz.do_plot( [range(len(self.sup_infos)), self.sup_infos], color = 'y', marker = '-' ) viz.end_plot( fig = backupfile+"_stream_inf.png" ) viz.do_plot( [range(len(self.accuracys)), self.accuracys], color = 'r', marker = '-' ) viz.do_plot( [range(len(self.sup_accuracys)), self.sup_accuracys], color = 'y', marker = '-' ) viz.end_plot( fig = backupfile+"_stream_acc.png" ) viz.do_plot( [range(len(self.queries)), self.queries], color = 'r', marker = '-' ) viz.do_plot( [range(len(self.queries)), range(len(self.queries))], color = 'y', marker = '-' ) viz.end_plot( fig = backupfile+"_stream_lab.png" ) '''
from Classification import Classification ''' Author: Rowland DePree Test.py A program designed to test the classification program. To make this work, change the first parm from classification to the location of the train data and change the second parm to the train label data. Then change the parm for classify new sentence to the location of the test data ''' c = Classification(r'C:\Users\depre\PycharmProjects\Classification_Assignment\traindata', r'C:\Users\depre\PycharmProjects\Classification_Assignment\trainlabels') c.classify_new_senetence(r'C:\Users\depre\PycharmProjects\Classification_Assignment\testdataforclass') c.print_to_document(r'C:\Users\depre\PycharmProjects\Classification_Assignment\resultlabelsforclass')
class OnlineActiveLearning: def __init__(self, Lx, Ly, Ux, Uy, Tx, Ty, method = "svm", budget = 1000): self.Lx0 = Lx[:] self.Ly0 = Ly[:] self.Lx = Lx self.Ly = Ly self.Ux = Ux # TODO should not be here self.Uy = Uy # TODO should not be here self.Tx = Tx # TODO should not be here self.Ty = Ty # TODO should not be here self.th = 0.9 self.queried = 0 self.queries = [] self.ths = [] self.infos = [] self.accuracys = [] self.clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ); self.clf.train() self.sup_infos = [] # TODO should not be here self.sup_accuracys = [] # TODO should not be here self.sup_clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ); self.sup_clf.train() # TODO should not be here # self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "UCB", alpha = 1 ) self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "reinforcement", alpha = 1 ) #--------------------------------------- def train(self, mtd = "margin", backupfile = "backupfile.txt"): for i, x in enumerate(self.Ux): y1 = self.clf.predict_label(x) if mtd == "supervised": informativeness = sys.float_info.max if mtd == "margin": informativeness = self.clf.uncertainty_margin(x) # =============================== id_th = self.mab.choose() self.th = self.mab.algos[ id_th ] print "Choosen =", self.th, "nb_choices =", self.mab.nb_choices, ("avg rwd=", [ np.mean(L) for L in self.mab.rewards ] if self.mab.rewards[0]!=[] else " "), "expected=", sum([ a*l for a,l in zip(self.mab.algos,self.mab.nb_choices) ]) / sum(self.mab.nb_choices) prev_clf = Classification(self.Lx, self.Ly, method = self.clf.method) prev_clf.GAMMA, prev_clf.C = self.clf.GAMMA, self.clf.C; prev_clf.train() # =============================== # avg_rewards = [ np.mean(L[:-20]) if len(L)>0 else 1. for L in self.mab.rewards ] # self.th = sum([ a*l for a,l in zip(self.mab.algos,avg_rewards) ]) / sum(avg_rewards) # print "Choosen =", self.th, "avg rwd=", avg_rewards # =============================== if informativeness > self.th: qx = x qy = self.Uy[i] self.Lx.append(qx) self.Ly.append(qy) self.queried += 1 self.clf.X = self.Lx; self.clf.Y = self.Ly; self.clf.train() # =============================== reward = 1. - abs( 0.1 - self.queried / (i+1.) ) self.mab.update(id_th, reward) # =============================== # for idt in range(len(self.mab.algos)): # reward = 1. - abs( 0.3 - (self.queried-1+1) / (i+1.) ) if informativeness > self.mab.algos[idt] else 1. - abs( 0.4 - (self.queried-1) / (i+1.) ) # self.mab.update(idt, reward) # =============================== self.ths.append( self.th ) self.infos.append( informativeness ) self.accuracys.append( self.clf.getTestAccuracy( self.Tx, self.Ty ) ) self.queries.append( self.queried ) self.sup_infos.append( self.sup_clf.uncertainty_margin(x) ) # TODO should not be here self.sup_clf.X = self.Lx0+self.Ux[:i+1]; self.sup_clf.Y = self.Ly0+self.Uy[:i+1]; self.sup_clf.train() # TODO should not be here self.sup_accuracys.append( self.sup_clf.getTestAccuracy( self.Tx, self.Ty ) ) # TODO should not be here ''' if i>10: # last_infos = self.infos[-100:] if len(self.infos) > 100 else self.infos[:] # self.th = np.mean( last_infos ) if informativeness > self.th: # queried if y1 == qy: # but was correctly predicted self.th = self.th + 0.1 * (informativeness - self.th) else: if y1 != qy: self.th = self.th - 0.1 * (self.th - informativeness ) ''' print "i=", i+1, self.queried, self.queried / (i+1.), "-- acc=%.4f"%(self.accuracys[-1]*100), "%.4f"%(self.sup_accuracys[-1]*100), "-- %.4f"%(np.mean(self.accuracys)*100), "%.4f"%(np.average(self.accuracys, weights = range(1,1+len(self.accuracys)))*100), "--", informativeness if (i+1)%10 == 0: Util.pickleSave(backupfile, self); viz = Visualize() viz.do_plot( [range(len(self.infos)), self.ths], color = 'b', marker = '-' ) viz.do_plot( [range(len(self.infos)), self.infos], color = 'r', marker = '-' ) viz.do_plot( [range(len(self.sup_infos)), self.sup_infos], color = 'y', marker = '-' ) viz.end_plot( fig = backupfile+"_stream_inf.png" ) viz.do_plot( [range(len(self.accuracys)), self.accuracys], color = 'r', marker = '-' ) viz.do_plot( [range(len(self.sup_accuracys)), self.sup_accuracys], color = 'y', marker = '-' ) viz.end_plot( fig = backupfile+"_stream_acc.png" ) viz.do_plot( [range(len(self.queries)), self.queries], color = 'r', marker = '-' ) viz.do_plot( [range(len(self.queries)), range(len(self.queries))], color = 'y', marker = '-' ) viz.end_plot( fig = backupfile+"_stream_lab.png" ) ''' colors = ['r', 'b', 'g', 'k', 'm', 'c', '0.10', '0.35', '0.60', '0.90'] viz.plot( zip(*self.Lx+self.Ux), fig = backupfile+"__.png", color = [colors[int(l)] for l in self.Ly+self.Uy], marker = 'o' ) viz.do_plot( zip(*self.Ux), color = ['y']*len(self.Ux), marker = '.' ) viz.do_plot( zip(*self.Lx), color = [colors[int(l)] for l in self.Ly], marker = 'o' ) viz.end_plot( fig = backupfile+"_.png" ) ''' #--------------------------------------- def get_change(self, prev_clf, curr_clf, U): v1 = [ prev_clf.getPredictProba(1, dp) for dp in U ] v2 = [ curr_clf.getPredictProba(1, dp) for dp in U ] if v1 == v2: return 0. return math.acos( cosine_similarity(v1, v2) ) / math.pi
PORT = sys.argv[1] COMMIT_HASH = sys.argv[2].strip('"') BRANCH_NAME = sys.argv[3].strip('"') ACTIVE_NODE_PATH = sys.argv[4].strip('"') ACTIVE_SELECTION_PATHS = [] if sys.argv[5] != '""': ACTIVE_SELECTION_PATHS = sys.argv[5].strip('"').split(',') if ACTIVE_SELECTION_PATHS[0] == '': ACTIVE_SELECTION_PATHS.pop(0) NAMESPACE = sys.argv[6].strip('"') logger.debug('commit-hash: {0}'.format(COMMIT_HASH)) logger.debug('branch-name: {0}'.format(BRANCH_NAME)) logger.debug('active-node-path: {0}'.format(ACTIVE_NODE_PATH)) logger.debug('active-selection-paths: {0}'.format(ACTIVE_SELECTION_PATHS)) logger.debug('name-space: {0}'.format(NAMESPACE)) # Create an instance of WebGME and the plugin webgme = WebGME(PORT, logger) plugin = Classification(webgme, COMMIT_HASH, BRANCH_NAME, ACTIVE_NODE_PATH, ACTIVE_SELECTION_PATHS, NAMESPACE) # Do the work plugin.main() # Finally disconnect from the zmq-server webgme.disconnect()
def sortForInformativeness(self, mtd): if mtd in ["etc", "etc_", "expectedErrorReduction", "weight", "optimal", "test", "intuition"] : ids, scores = self.sortForInformativeness(self.optimization_method) scores = [] for ix, x in enumerate(self.Ux): y1, y2, p1, p2 = self.clf.getMarginInfo(x) if mtd == "intuitionM": if ix in ids[:self.optimization_limit]: informativeness = self.clf.uncertainty_margin(x) else: informativeness = 0. #--------------------------------------------------------- if mtd == "margin": informativeness = self.clf.uncertainty_margin(x) #--------------------------------------------------------- elif mtd == "proba": informativeness = self.clf.uncertainty_prediction(x) #--------------------------------------------------------- elif mtd == "entropy": informativeness = self.clf.uncertainty_entropy(x) #--------------------------------------------------------- elif mtd == "random": informativeness = random.uniform(0., 1.) #--------------------------------------------------------- elif mtd == "weight": if ix in ids[:self.optimization_limit]: informativeness = self.clf.uncertainty_weight(x, self.Lx, self.Ly) else: informativeness = 0. #--------------------------------------------------------- elif mtd == "expectedErrorReduction": if ix in ids[:self.optimization_limit]: sums = 0. YP = self.clf.predict(x, all = True) YP.sort(key=operator.itemgetter(1), reverse=True) for ir, (yy, proba) in enumerate(YP): if ir == 5: break temp_clf = Classification(self.Lx + [x], self.Ly + [yy], method = self.clf.method); temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C # TODO FIXME: do it in general not specifically for svm temp_clf.train() e_h1 = sum( [ temp_clf.uncertainty_entropy(dp) for dp in self.Ux if dp != x ] ) sums += (proba) * e_h1 informativeness = 1. / sums else: informativeness = 0. #--------------------------------------------------------- elif mtd == "etc": if ix in ids[:self.optimization_limit]: temp_clf1 = Classification(self.Lx + [x], self.Ly + [y1], method = self.clf.method); temp_clf1.GAMMA, temp_clf1.C = self.clf.GAMMA, self.clf.C temp_clf1.train() diff1 = sum( [ abs(temp_clf1.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf1.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.) temp_clf2 = Classification(self.Lx + [x], self.Ly + [y2], method = self.clf.method); temp_clf2.GAMMA, temp_clf2.C = self.clf.GAMMA, self.clf.C temp_clf2.train() diff2 = sum( [ abs(temp_clf2.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf2.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.) informativeness = diff1 # this one is particularly good for rejection (to be confirmed) informativeness = diff1 if p1/(1+diff1) >= p2/(1+diff2) else diff2 informativeness = p1*diff1 + p2*diff2 + 1. else: informativeness = 0. #--------------------------------------------------------- elif mtd == "etc_": if ix in ids[:self.optimization_limit]: temp_clf1 = Classification(self.Lx + [x], self.Ly + [y1], method = self.clf.method); temp_clf1.GAMMA, temp_clf1.C = self.clf.GAMMA, self.clf.C temp_clf1.train() diff1 = sum( [ 1. if temp_clf1.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.) temp_clf2 = Classification(self.Lx + [x], self.Ly + [y2], method = self.clf.method); temp_clf2.GAMMA, temp_clf2.C = self.clf.GAMMA, self.clf.C temp_clf2.train() diff2 = sum( [ 1. if temp_clf2.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.) informativeness = diff1 # this one is particularly good for rejection (to be confirmed) informativeness = diff1 if p1/(1+diff1) >= p2/(1+diff2) else diff2 informativeness = p1*diff1 + p2*diff2 + 1. else: informativeness = 0. #--------------------------------------------------------- elif mtd == "test": if ix in ids[:self.optimization_limit]: temp_clf1 = Classification(self.Lx + [x], self.Ly + [y1], method = self.clf.method); temp_clf1.GAMMA, temp_clf1.C = self.clf.GAMMA, self.clf.C temp_clf1.train() diff1 = np.mean( [0.]+[ abs(temp_clf1.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) for dp in self.Ux if temp_clf1.predict_label(dp) != self.clf.predict_label(dp) and dp != x ] ) temp_clf2 = Classification(self.Lx + [x], self.Ly + [y2], method = self.clf.method); temp_clf2.GAMMA, temp_clf2.C = self.clf.GAMMA, self.clf.C temp_clf2.train() diff2 = np.mean( [0.]+[ abs(temp_clf2.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) for dp in self.Ux if temp_clf2.predict_label(dp) != self.clf.predict_label(dp) and dp != x ] ) informativeness = diff1 # this one is particularly good for rejection (to be confirmed) informativeness = diff1 if p1/(1+diff1) >= p2/(1+diff2) else diff2 informativeness = p1*diff1 + p2*diff2 + 1. else: informativeness = 0. #--------------------------------------------------------- elif mtd == "intuition": if ix in ids[:self.optimization_limit]: true_y = self.Uy[ self.Ux.index(x) ] temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() ucts = [ temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp) for dp in self.Tx ] ids_ucts = (-np.array(ucts)).argsort()[:50] # diff = np.mean( [ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Tx ] ) diff = np.mean([1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) and idp in ids_ucts else 0. for idp,dp in enumerate(self.Tx) ]) informativeness = diff else: informativeness = 0. #--------------------------------------------------------- elif mtd == "intuition": if ix in ids[:self.optimization_limit]: true_y = self.Uy[ self.Ux.index(x) ] temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() # --------------------- imp_x = [ xdp for xdp in self.Tx if temp_clf.predict_label(xdp) != self.clf.predict_label(xdp) ] imp_y_hh = [ temp_clf.predict_label(xdp) for xdp in self.Tx if temp_clf.predict_label(xdp) != self.clf.predict_label(xdp) ] if len( set(imp_y_hh) ) > 1: # hh = Classification(imp_x, imp_y_hh, method = self.clf.method) hh = Classification(imp_x + [x], imp_y_hh + [true_y], method = self.clf.method, tuning = False) hh.GAMMA, hh.C = self.clf.GAMMA, self.clf.C; hh.train() else: hh = self.clf # --------------------- h_inconsistant_truth = 0; hh_inconsistant_truth = 0; hh_inconsistant_h = 0; h_consistency = []; hh_consistency = [] for ilx, lx in enumerate(self.Lx): h_consistency.append( self.clf.getProbaOf( self.Ly[ilx], lx ) ) # hh_consistency.append( hh.getProbaOf( self.Ly[ilx], lx ) ) hh_consistency.append( hh.getProbaOf( self.Ly[ilx], lx ) if hh.predict_label(lx) == self.Ly[ilx] else 0. ) if self.clf.predict_label(lx) != self.Ly[ilx]: h_inconsistant_truth += 1. if hh.predict_label(lx) != self.Ly[ilx]: hh_inconsistant_truth += 1. if hh.predict_label(lx) != self.clf.predict_label(lx): hh_inconsistant_h += 1. h_consistency = np.mean(h_consistency) hh_consistency = np.mean(hh_consistency) if len( set(imp_y_hh) ) > 1 else 0. consistency_dif = hh_consistency - h_consistency # --------------------- diff = []; errors = 0.; trues = 0.; impacted = 0; impacted_probs = []; for idp, dp in enumerate(self.Tx): if temp_clf.predict_label(dp) != self.clf.predict_label(dp): ################## impacted += 1. impacted_probs.append( abs( temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp) ) ) if self.Ty[idp]!=temp_clf.predict_label(dp): errors += 1. else: trues += 1. # if temp_clf.predict_label(dp) != self.clf.predict_label(dp) and self.Ty[idp]==temp_clf.predict_label(dp): diff.append( 1. ) # if temp_clf.predict_label(dp) != self.clf.predict_label(dp) and trues - errors > 0: diff.append( 1. ) # if temp_clf.predict_label(dp) != self.clf.predict_label(dp): diff.append( 1. ) if temp_clf.predict_label(dp) != self.clf.predict_label(dp): diff.append( 1. ) else: diff.append( 0. ) diff = np.mean( diff ) # diff = diff * np.mean(impacted_probs) # seems to be working ... # --------------------- # self.viz_A.append( consistency_dif ) self.viz_A.append( hh_consistency ) self.viz_B.append( errors ) self.viz_C.append( trues ) self.viz_D.append( trues - errors ); posI = [inb for inb,nbD in enumerate(self.viz_D) if nbD >= 0.] self.viz_E.append( impacted ) self.viz_F.append( np.mean(impacted_probs) ) viz = Visualize(); viz.plot( [self.viz_A, self.viz_B], fig = "test_errors.png", color = 'r', marker = 'o' ) vizu = Visualize(); vizu.plot( [self.viz_A, self.viz_C], fig = "test_trues.png", color = 'r', marker = 'o' ) vizuu = Visualize(); vizuu.plot( [self.viz_A, self.viz_D], fig = "test_trues_errors.png", color = 'r', marker = 'o' ) vizuuu = Visualize(); vizuuu.do_plot( [self.viz_A, self.viz_E], color = 'r', marker = 'o' ) vizuuu.do_plot( [[self.viz_A[inb] for inb in posI], [self.viz_E[inb] for inb in posI]], color = 'b', marker = 'o' ) vizuuu.end_plot(fig = "impacted.png") print hh_consistency, hh_inconsistant_truth, "---", len(imp_x), len( set(imp_y_hh) ), "============>", impacted, trues - errors informativeness = diff else: informativeness = 0. #--------------------------------------------------------- scores.append( informativeness ) ids = (-np.array(scores)).argsort() sorted_scores = [ scores[id] for id in ids ] # sorted_scores = [ 1.*scores[id] / sum(scores) for id in ids ] return ids, sorted_scores
def Trainning(self, Classify: Classification): Main = self.Main Main.SetConsole('Train!') Target = self.ic pic, ClassContours = Classify.Predict(Target) process = Target.BinaryProcess() #area = ip.MappingCnt(contours) if os.path.isfile('samples.data') and os.path.isfile('res.data'): print('LoadData') Main.SetConsole('LoadData!') samples = np.loadtxt('samples.data', np.float32) responses = np.loadtxt('res.data', np.float32) else: print('NewData') Main.SetConsole('NewData!') samples = np.empty((0, 100)) responses = [] keys = [i for i in range(48, 58)] + [j for j in range(97, 123)] close = 0 for cnt in ClassContours: original = Target.img.copy() [x, y, w, h] = cv2.boundingRect(cnt) cv2.rectangle(original, (x, y), (x + w, y + h), (0, 0, 255), 2) TargetPxs = process[y:y + h, x:x + w] TargetPxsSmall = cv2.resize(TargetPxs, (10, 10)) cv2.imshow('CarLicense', original) while close == 0: key = cv2.waitKey(100) if key != -1: print(key) if cv2.getWindowProperty('CarLicense', 0) == -1: print('Close') Main.SetConsole('Close!') close = 1 if key == 27: # (escape to quit) cv2.destroyAllWindows() close = 1 print('Esc') Main.SetConsole('Esc!') elif key == 32: print('Skip') Main.SetConsole('Skip!') elif key in keys: responses = np.append(responses, key) sample = TargetPxsSmall.reshape((1, 100)) samples = np.append(samples, sample, 0) Main.SetConsole('Add Sample!') break if close == 1: return 0 cv2.destroyAllWindows() responses = np.array(responses, np.float32) responses = responses.reshape((responses.size, 1)) print('training complete') Main.SetConsole('訓練完成,進行存檔!') np.savetxt('samples.data', samples) np.savetxt('res.data', responses) return 1
class ActiveLearning: def __init__(self, Lx, Ly, Ux, Uy, Tx, Ty, method = "svm", budget = 1000): self.Lx = Lx self.Ly = Ly self.Ux = Ux self.Uy = Uy # TODO should not be here self.Tx = Tx # TODO should not be here self.Ty = Ty # TODO should not be here self.optimization_limit = 20 self.optimization_method = "margin" # margin proba entropy random weight expectedErrorReduction etc self.budget = budget self.accuracys = [] self.clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ) self.clf.train() self.viz_A = []; self.viz_B = []; self.viz_C = []; self.viz_D = []; self.viz_E = []; self.viz_F = [] #--------------------------------------- def train(self, mtd = "margin", backupfile = "backupfile"): # TODO implement sample_weight + make method to shuffle and return sublist with data_limit backupfile += ".opt-"+str(self.optimization_limit)+"-"+self.optimization_method+".txt" for i in range(self.budget): if len(self.Ux) <= 1: break # self.viz_A = []; self.viz_B = []; self.viz_C = []; self.viz_D = []; self.viz_E = []; self.viz_F = [] ids, scores = self.sortForInformativeness(mtd) id = ids[0] qx = self.Ux[id] qy = self.Uy[id] self.Lx.append(qx) self.Ly.append(qy) self.Ux.pop(id) self.Uy.pop(id) self.clf.X = self.Lx; self.clf.Y = self.Ly self.clf.train() test_accuracy = self.clf.getTestAccuracy( self.Tx, self.Ty ) self.accuracys.append( test_accuracy ) print "i=", i+1, "; acc=%.4f"%(test_accuracy*100), "%.4f"%(np.mean(self.accuracys)*100), "%.4f"%(np.average(self.accuracys, weights = range(1,1+len(self.accuracys)))*100), scores[0] if (i+1)%10 == 0: Util.pickleSave(backupfile, self) viz = Visualize(); viz.plot( [range(len(self.accuracys)), self.accuracys], fig = backupfile+".png", color = 'r', marker = '-' ) #--------------------------------------- def sortForInformativeness(self, mtd): if mtd in ["etc", "etc_", "expectedErrorReduction", "weight", "optimal", "test", "intuition"] : ids, scores = self.sortForInformativeness(self.optimization_method) scores = [] for ix, x in enumerate(self.Ux): y1, y2, p1, p2 = self.clf.getMarginInfo(x) if mtd == "intuitionM": if ix in ids[:self.optimization_limit]: informativeness = self.clf.uncertainty_margin(x) else: informativeness = 0. #--------------------------------------------------------- if mtd == "margin": informativeness = self.clf.uncertainty_margin(x) #--------------------------------------------------------- elif mtd == "proba": informativeness = self.clf.uncertainty_prediction(x) #--------------------------------------------------------- elif mtd == "entropy": informativeness = self.clf.uncertainty_entropy(x) #--------------------------------------------------------- elif mtd == "random": informativeness = random.uniform(0., 1.) #--------------------------------------------------------- elif mtd == "weight": if ix in ids[:self.optimization_limit]: informativeness = self.clf.uncertainty_weight(x, self.Lx, self.Ly) else: informativeness = 0. #--------------------------------------------------------- elif mtd == "expectedErrorReduction": if ix in ids[:self.optimization_limit]: sums = 0. YP = self.clf.predict(x, all = True) YP.sort(key=operator.itemgetter(1), reverse=True) for ir, (yy, proba) in enumerate(YP): if ir == 5: break temp_clf = Classification(self.Lx + [x], self.Ly + [yy], method = self.clf.method); temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C # TODO FIXME: do it in general not specifically for svm temp_clf.train() e_h1 = sum( [ temp_clf.uncertainty_entropy(dp) for dp in self.Ux if dp != x ] ) sums += (proba) * e_h1 informativeness = 1. / sums else: informativeness = 0. #--------------------------------------------------------- elif mtd == "etc": if ix in ids[:self.optimization_limit]: temp_clf1 = Classification(self.Lx + [x], self.Ly + [y1], method = self.clf.method); temp_clf1.GAMMA, temp_clf1.C = self.clf.GAMMA, self.clf.C temp_clf1.train() diff1 = sum( [ abs(temp_clf1.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf1.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.) temp_clf2 = Classification(self.Lx + [x], self.Ly + [y2], method = self.clf.method); temp_clf2.GAMMA, temp_clf2.C = self.clf.GAMMA, self.clf.C temp_clf2.train() diff2 = sum( [ abs(temp_clf2.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf2.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.) informativeness = diff1 # this one is particularly good for rejection (to be confirmed) informativeness = diff1 if p1/(1+diff1) >= p2/(1+diff2) else diff2 informativeness = p1*diff1 + p2*diff2 + 1. else: informativeness = 0. #--------------------------------------------------------- elif mtd == "etc_": if ix in ids[:self.optimization_limit]: temp_clf1 = Classification(self.Lx + [x], self.Ly + [y1], method = self.clf.method); temp_clf1.GAMMA, temp_clf1.C = self.clf.GAMMA, self.clf.C temp_clf1.train() diff1 = sum( [ 1. if temp_clf1.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.) temp_clf2 = Classification(self.Lx + [x], self.Ly + [y2], method = self.clf.method); temp_clf2.GAMMA, temp_clf2.C = self.clf.GAMMA, self.clf.C temp_clf2.train() diff2 = sum( [ 1. if temp_clf2.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.) informativeness = diff1 # this one is particularly good for rejection (to be confirmed) informativeness = diff1 if p1/(1+diff1) >= p2/(1+diff2) else diff2 informativeness = p1*diff1 + p2*diff2 + 1. else: informativeness = 0. #--------------------------------------------------------- elif mtd == "test": if ix in ids[:self.optimization_limit]: temp_clf1 = Classification(self.Lx + [x], self.Ly + [y1], method = self.clf.method); temp_clf1.GAMMA, temp_clf1.C = self.clf.GAMMA, self.clf.C temp_clf1.train() diff1 = np.mean( [0.]+[ abs(temp_clf1.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) for dp in self.Ux if temp_clf1.predict_label(dp) != self.clf.predict_label(dp) and dp != x ] ) temp_clf2 = Classification(self.Lx + [x], self.Ly + [y2], method = self.clf.method); temp_clf2.GAMMA, temp_clf2.C = self.clf.GAMMA, self.clf.C temp_clf2.train() diff2 = np.mean( [0.]+[ abs(temp_clf2.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) for dp in self.Ux if temp_clf2.predict_label(dp) != self.clf.predict_label(dp) and dp != x ] ) informativeness = diff1 # this one is particularly good for rejection (to be confirmed) informativeness = diff1 if p1/(1+diff1) >= p2/(1+diff2) else diff2 informativeness = p1*diff1 + p2*diff2 + 1. else: informativeness = 0. #--------------------------------------------------------- elif mtd == "intuition": if ix in ids[:self.optimization_limit]: true_y = self.Uy[ self.Ux.index(x) ] temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() ucts = [ temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp) for dp in self.Tx ] ids_ucts = (-np.array(ucts)).argsort()[:50] # diff = np.mean( [ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Tx ] ) diff = np.mean([1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) and idp in ids_ucts else 0. for idp,dp in enumerate(self.Tx) ]) informativeness = diff else: informativeness = 0. #--------------------------------------------------------- elif mtd == "intuition": if ix in ids[:self.optimization_limit]: true_y = self.Uy[ self.Ux.index(x) ] temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() # --------------------- imp_x = [ xdp for xdp in self.Tx if temp_clf.predict_label(xdp) != self.clf.predict_label(xdp) ] imp_y_hh = [ temp_clf.predict_label(xdp) for xdp in self.Tx if temp_clf.predict_label(xdp) != self.clf.predict_label(xdp) ] if len( set(imp_y_hh) ) > 1: # hh = Classification(imp_x, imp_y_hh, method = self.clf.method) hh = Classification(imp_x + [x], imp_y_hh + [true_y], method = self.clf.method, tuning = False) hh.GAMMA, hh.C = self.clf.GAMMA, self.clf.C; hh.train() else: hh = self.clf # --------------------- h_inconsistant_truth = 0; hh_inconsistant_truth = 0; hh_inconsistant_h = 0; h_consistency = []; hh_consistency = [] for ilx, lx in enumerate(self.Lx): h_consistency.append( self.clf.getProbaOf( self.Ly[ilx], lx ) ) # hh_consistency.append( hh.getProbaOf( self.Ly[ilx], lx ) ) hh_consistency.append( hh.getProbaOf( self.Ly[ilx], lx ) if hh.predict_label(lx) == self.Ly[ilx] else 0. ) if self.clf.predict_label(lx) != self.Ly[ilx]: h_inconsistant_truth += 1. if hh.predict_label(lx) != self.Ly[ilx]: hh_inconsistant_truth += 1. if hh.predict_label(lx) != self.clf.predict_label(lx): hh_inconsistant_h += 1. h_consistency = np.mean(h_consistency) hh_consistency = np.mean(hh_consistency) if len( set(imp_y_hh) ) > 1 else 0. consistency_dif = hh_consistency - h_consistency # --------------------- diff = []; errors = 0.; trues = 0.; impacted = 0; impacted_probs = []; for idp, dp in enumerate(self.Tx): if temp_clf.predict_label(dp) != self.clf.predict_label(dp): ################## impacted += 1. impacted_probs.append( abs( temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp) ) ) if self.Ty[idp]!=temp_clf.predict_label(dp): errors += 1. else: trues += 1. # if temp_clf.predict_label(dp) != self.clf.predict_label(dp) and self.Ty[idp]==temp_clf.predict_label(dp): diff.append( 1. ) # if temp_clf.predict_label(dp) != self.clf.predict_label(dp) and trues - errors > 0: diff.append( 1. ) # if temp_clf.predict_label(dp) != self.clf.predict_label(dp): diff.append( 1. ) if temp_clf.predict_label(dp) != self.clf.predict_label(dp): diff.append( 1. ) else: diff.append( 0. ) diff = np.mean( diff ) # diff = diff * np.mean(impacted_probs) # seems to be working ... # --------------------- # self.viz_A.append( consistency_dif ) self.viz_A.append( hh_consistency ) self.viz_B.append( errors ) self.viz_C.append( trues ) self.viz_D.append( trues - errors ); posI = [inb for inb,nbD in enumerate(self.viz_D) if nbD >= 0.] self.viz_E.append( impacted ) self.viz_F.append( np.mean(impacted_probs) ) viz = Visualize(); viz.plot( [self.viz_A, self.viz_B], fig = "test_errors.png", color = 'r', marker = 'o' ) vizu = Visualize(); vizu.plot( [self.viz_A, self.viz_C], fig = "test_trues.png", color = 'r', marker = 'o' ) vizuu = Visualize(); vizuu.plot( [self.viz_A, self.viz_D], fig = "test_trues_errors.png", color = 'r', marker = 'o' ) vizuuu = Visualize(); vizuuu.do_plot( [self.viz_A, self.viz_E], color = 'r', marker = 'o' ) vizuuu.do_plot( [[self.viz_A[inb] for inb in posI], [self.viz_E[inb] for inb in posI]], color = 'b', marker = 'o' ) vizuuu.end_plot(fig = "impacted.png") print hh_consistency, hh_inconsistant_truth, "---", len(imp_x), len( set(imp_y_hh) ), "============>", impacted, trues - errors informativeness = diff else: informativeness = 0. #--------------------------------------------------------- scores.append( informativeness ) ids = (-np.array(scores)).argsort() sorted_scores = [ scores[id] for id in ids ] # sorted_scores = [ 1.*scores[id] / sum(scores) for id in ids ] return ids, sorted_scores
def main(): parser = argparse.ArgumentParser() parser.add_argument('-s', '--select', action='store_true') parser.add_argument('-t', '--test', action='store_true') parser.add_argument('-c', '--count', action='store_true') parser.add_argument('-m', '--merge', '--combine', action='store_true') parser.add_argument('-e', '--extract', action='store_true') parser.add_argument('-d', '--debuginput', action='store_true') parser.add_argument('-r', '--review', action='store_true') parser.add_argument('--replace-debug', action='store_true') parser.add_argument('-p', '--plot-data', action='store_true') parser.add_argument('--base-data', action='store_true') parser.add_argument('--features', default='original') parser.add_argument('-f', '--homology-filter', action='store_true') parser.add_argument('-y', '--classify', action='store_true') parser.add_argument('--grid-search', action='store_true') parser.add_argument('--plot', action='store_true') parser.add_argument('--fit', action='store_true') parser.add_argument('--count-total-number-of-genes', action='store_true') args = parser.parse_args() if args.select: if args.test: selector = Selector("config/Test/selection_config.json") else: selector = Selector("config/selection_config.json") selector.select() selector.selected_to_folder() if args.count: if args.test: counter = Counter("config/Test/counter_config.json") else: counter = Counter("config/counter_config.json") counter.count_all_viruses() if args.merge: if args.test: combiner = Combiner("config/Test/combiner_config.json") else: combiner = Combiner("config/combiner_config.json") combiner.combine_all_viruses() if args.debuginput: debug_input_collector = DebugInfoCollector("config/debug_info_collector_config.json") if args.replace_debug: debug_input_collector.collect(True) else: debug_input_collector.collect() if args.review: import Review Review.run() if args.plot_data: data_plotter = DataPlotter("config/data_plotter_config.json") data_plotter.plot() if args.base_data: base_data = BaseData("config/base_data_config.json") base_data.create_data() if args.homology_filter: homology_filter = HomologyFilter('config/homology_filter.json') homology_filter.filter() if args.extract: feature_extractor = FeatureExtraction("config/feature_extraction_config.json") feature_extractor.extract(args.features) if args.count_total_number_of_genes: combiner = Combiner("config/combiner_config.json") combiner.print_number_of_genes() if args.classify: if args.grid_search: MLgrid = [ { "booster": ["gblinear"], # "lambda": [0, 0.0001, 0.001], "lambda": [0], # "updater": ["shotgun", "coord_descent"], "updater": ["coord_descent", "shotgun"], # "feature_selector": ["cyclic", "shuffle", "random", "greedy", "thrifty"] "feature_selector": ["shuffle"] } # { # "booster": ["gbtree"], # # "max_depth": range(3, 10, 2), # # "min_child_weight": range(1, 6, 2) # } ] _1vsAgrid = [ { "estimator__booster": ["gblinear"], "estimator__lambda": [0.1], "estimator__updater": ["coord_descent"], "estimator__feature_selector": ["shuffle"] }, # { # "estimator__booster": ["gbtree"], # "estimator__max_depth": range(3, 10, 2), # "estimator__min_child_weight": range(1, 6, 2) # } ] RRgrid = [ { "estimator__booster": ["gblinear"], "estimator__lambda": [0.1], "estimator__updater": ["coord_descent"], "estimator__feature_selector": ["shuffle"] }, # { # "estimator__booster": ["gbtree"] # # "estimator__max_depth": range(3, 10, 2), # # "estimator__min_child_weight": range(1, 6, 2) # } ] classification = Classification('config/classification_config.json', args.features) classification.grid_search('ML', 'XGBoost', MLgrid, 200, 'no-pca') else: if args.fit: classification = Classification('config/classification_config.json', args.features) classification.fit_all() if args.plot: cp = ClassificationPlotter('config/classification_config.json', args.features) cp.plot_all()
def numOfCorrectlyClassified(listOfCountedParams, listOfDecisionsInTST, textFile): listOfClassifications = [] i = 1 listOfParamsInLoop = [] properlyClassified = 0 classified = 0 for uDecision in unique(listOfDecisionsInTST): classification = Classification() classification.setCObject(uDecision) classification.setListOfClassified(0) classification.setListOfClassifiedCorrectly(0) listOfClassifications.append(classification) enum = 0 for countedParam in listOfCountedParams: xObject = "x" + str(i) if xObject == countedParam.getTestObject(): listOfParamsInLoop.append(countedParam) if xObject != listOfCountedParams[enum + 1].getTestObject() or \ len(listOfDecisionsInTST) == 1 and len(listOfParamsInLoop) == 2: cObject = "" param = 0 highestX = "" listOfParamsInLoopIterator = 0 for elem in listOfParamsInLoop: if param < elem.getParam(): param = elem.getParam() highestX = elem.getTestObject() cObject = elem.getCObject() listOfParamsInLoopIterator += 1 if listOfParamsInLoopIterator > 1: textFile.write("Param c==" + listOfParamsInLoop[0].getCObject() + "<" + "Param C==" + listOfParamsInLoop[len(listOfParamsInLoop) - 1].getCObject() + " dla obiektu " + highestX) if listOfParamsInLoopIterator <= 1: textFile.write("Param c==" + listOfParamsInLoop[0].getCObject() + ">" + "Param C==" + listOfParamsInLoop[len(listOfParamsInLoop) - 1].getCObject() + " dla obiektu " + highestX) # textFile.write("Dla "+highestX+" param c=="+cObject+" jest największe\n") if areParamsInLoopEqual(listOfParamsInLoop): randomParam = random.choice(listOfParamsInLoop) if randomParam.getCObject() == listOfDecisionsInTST[i - 1]: textFile.write( " ta decyzja jest zgodna z ukryta decyzja eksperta (decyzja eksperta == " + listOfDecisionsInTST[i - 1] + ")\n") for element in listOfClassifications: if element.getCObject() == randomParam.getCObject(): element.setListOfClassifiedCorrectly( element.getListOfClassifiedCorrectly() + 1) element.setListOfClassified( element.getListOfClassified() + 1) else: textFile.write( " ta decyzja jest nie zgodna z ukryta decyzja eksperta (decyzja eksperta == " + listOfDecisionsInTST[i - 1] + ")\n") for element in listOfClassifications: if element.getCObject() == randomParam.getCObject(): element.setListOfClassified( element.getListOfClassified() + 1) else: if cObject == listOfDecisionsInTST[i - 1]: textFile.write( " ta decyzja jest zgodna z ukryta decyzja eksperta (decyzja eksperta == " + listOfDecisionsInTST[i - 1] + ")\n") for element in listOfClassifications: if element.getCObject() == cObject: element.setListOfClassifiedCorrectly( element.getListOfClassifiedCorrectly() + 1) element.setListOfClassified( element.getListOfClassified() + 1) else: textFile.write( " ta decyzja jest nie zgodna z ukryta decyzja eksperta (decyzja eksperta == " + listOfDecisionsInTST[i - 1] + ")\n") for element in listOfClassifications: if element.getCObject() == cObject: element.setListOfClassified( element.getListOfClassified() + 1) i += 1 listOfParamsInLoop = [] enum += 1 if enum == len(listOfCountedParams) - 1: enum = 0 return listOfClassifications
def Process(self, sourcex,sourcey, targetx,targety,subsize): # fixed size windows for source stream and target stream sourceIndex = 0 targetIndex = 0 src_count = 0 tgtchange_count = 0 threshold = 1.0 src_size, _ = sourcex.shape tgt_size, _ = targetx.shape #true_label = [] #for i in range(len(np.array(targety))): #if np.array(targety)[i] == 'class1': #true_label.append(1) #if np.array(targety)[i] == 'class2': #true_label.append(2) #if np.array(targety)[i] == 'class3': #true_label.append(3) #if np.array(targety)[i] == 'class4': #true_label.append(4) #if np.array(targety)[i] == 'class5': #true_label.append(5) #if np.array(targety)[i] == 'class6': #true_label.append(6) #if np.array(targety)[i] == 'class7': #true_label.append(7) windowsize = 1000 sourcewindowstart = 0 sourcewindowend = sourcewindowstart + windowsize -1 targetwindowstart = 0 targetwindowend = targetwindowstart + windowsize - 1 sourcexwindow = sourcex[sourcewindowstart:sourcewindowend] sourceywindow = sourcey[sourcewindowstart:sourcewindowend] targetxwindow = targetx[targetwindowstart:targetwindowend] targetywindow = targety[targetwindowstart:targetwindowend] ### get the initial model by using the first source and target windows alpha = 0.05 b = targetxwindow.T.shape[1]; fold = 5 sigma_list = Classification.sigma_list(np.array(targetxwindow.T), np.array(sourcexwindow.T)); lambda_list = Classification.lambda_list(); srcx_array = np.array(sourcexwindow.T); trgx_array = np.array(targetxwindow.T); (thetah_old, w, sce_old, sigma_old) = Classification.R_ULSIF(trgx_array, srcx_array, alpha, sigma_list, lambda_list, b, fold) self.Ensemble.generateNewModelRULSIF(targetxwindow, sourcexwindow, sourceywindow, alpha, sigma_list, lambda_list, b, fold,subsize) # print "update model", src_size, source.shape truelablecount = 0.0 totalcount = 0.0 #tmpsrccount = 0 tmptrgcount = 0 changeindex = -1 updatestartindex = 0 while True: if sourcewindowend >= src_size or targetwindowend >= tgt_size: break data_type = randint(1, 10) if data_type < 2: print("get data from source") sourcewindowstart+=1 sourcewindowend+=1 sourcexwindow = sourcex[sourcewindowstart:sourcewindowend] sourceywindow = sourcey[sourcewindowstart:sourcewindowend] sourceIndex += 1 #src_count += 1 #tmpsrccount += 1 print("sourceIndex", sourceIndex) else: print("get data from target") targetwindowstart+=1 targetwindowend+=1 targetxwindow = targetx[targetwindowstart:targetwindowend] targetywindow = targety[targetwindowstart:targetwindowend] targetIndex += 1 tgtchange_count += 1 tmptrgcount += 1 print("targetIndex", targetIndex) if tgtchange_count>=1000: changeindex = 1 tgtchange_count = 0 confidencelist = [] for i in range(targetwindowstart, targetwindowend+1): instanceresult = self.Ensemble.evaluateEnsembleRULSIF(targetx[i]) confidencelist.append(instanceresult[1]) confvar = np.var(confidencelist) changetestresult = pelt(normal_mean(confidencelist, confvar), len(confidencelist)) if len(changetestresult)>1: alpha = 0.05 b = targetxwindow.T.shape[1]; fold = 5 sigma_list = Classification.sigma_list(np.array(targetxwindow.T), np.array(sourcexwindow.T)); lambda_list = Classification.lambda_list(); self.Ensemble.generateNewModelRULSIF(targetxwindow, sourcexwindow, sourceywindow, alpha, sigma_list, lambda_list, b, fold, subsize) #x_nu = np.array(targetxwindow.T); #(thetah_new, w, sce_new, sigma_new) = Classification.R_ULSIF(trgx_array, srcx_array, alpha, sigma_list, #lambda_list, b, fold) #targetweight_old = Classification.compute_target_weight(thetah_old, sce_old, sigma_old, x_nu) #targetweight_new = Classification.compute_target_weight(thetah_new, sce_new, sigma_new, x_nu) #l_ratios = targetweight_new / targetweight_old #lnWeightTrgData = np.log(l_ratios, dtype='float64') #changeScore = np.sum(lnWeightTrgData, dtype='float64') #tgtchange_count=0 #print "changeScore", changeScore #if changeScore > threshold: #alpha = 0.05 #b = targetxwindow.T.shape[1]; #fold = 5 #sigma_list = Classification.sigma_list(np.array(targetxwindow.T), #np.array(sourcexwindow.T)); #lambda_list = Classification.lambda_list(); #self.Ensemble.generateNewModelRULSIF(targetxwindow, sourcexwindow, sourceywindow, alpha, sigma_list, #lambda_list, b, fold, subsize) if tmptrgcount>=2000: # force update model tmptrgcount=0 #update predictions for updatestartindex to targetIndex for i in range(updatestartindex,targetIndex+1): print("targetx[i]", targetx[i]) instanceresult = self.Ensemble.evaluateEnsembleRULSIF(targetx[i]) print("instanceresult", instanceresult) print("instanceresult[0]", instanceresult[0]) print("truelabel[i]", targety[i]) if instanceresult[0] == targety[i]: truelablecount +=1.0 totalcount +=1.0 print("truelablecount",truelablecount) print("totalcount", totalcount) with open('errorsyn002405.csv', 'a+') as f: writer = csv.writer(f) writer.writerow([targetIndex, truelablecount,totalcount,truelablecount/totalcount ]) updatestartindex = targetIndex+1 alpha = 0.05 b = targetxwindow.T.shape[1]; fold = 5 sigma_list = Classification.sigma_list(np.array(targetxwindow.T), np.array(sourcexwindow.T)); lambda_list = Classification.lambda_list(); self.Ensemble.generateNewModelRULSIF(targetxwindow, sourcexwindow, sourceywindow, alpha, sigma_list, lambda_list, b, fold,subsize)
class ActiveLearning: def __init__(self, Lx, Ly, Ux, Uy, Tx, Ty, method = "svm", budget = 251, optimize = 50, datasetname="dataset"): self.datasetname = datasetname self.Lx = Lx self.Ly = Ly self.Ux = Ux self.Uy = Uy # TODO should not be here self.Tx = Tx # TODO should not be here self.Ty = Ty # TODO should not be here self.optimize = optimize self.budget = budget self.accuracys = [] self.clf = Classification( self.Lx, self.Ly, method = method, Vx = Lx+Ux, Vy = Ly+Uy ) self.clf.train() self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "boltzmann" ) # self.mab = Bandit( algos = np.arange(0., 1.1, 0.1), method = "UCB" ) # self.mab2 = Bandit( algos = ["disag1", "disag2"], method = "boltzmann" ) self.mab2 = Bandit( algos = ["disag1", "disag2"], method = "reinforcement" ) # self.mab2 = Bandit( algos = ["disag1", "disag2"], method = "EXP3" ) #--------------------------------------- def train(self, mtd = "margin", backupfile = "backupfile.txt"): # TODO implement sample_weight + make method to shuffle and return sublist with data_limit for i in range(self.budget): if len(self.Ux) <= 1: break if mtd == "margin": ids, scores = self.query_margin() if mtd == "proba": ids, scores = self.query_proba() if mtd == "entropy": ids, scores = self.query_entropy() if mtd == "random": ids, scores = self.query_random() if mtd == "weight": ids, scores = self.query_sufficient_weight() if mtd == "eer": ids, scores = self.query_eer() if mtd == "dist": ids, scores = self.query_sufficient_distance() if mtd == "disag1": ids, scores = self.query_disagreement1() if mtd == "disag2": ids, scores = self.query_disagreement2() if mtd == "disag3": ids, scores = self.query_disagreement3() if mtd == "balance": ids, scores = self.query_balance() if mtd == "balanced_disag1": ids, scores = self.query_balanced_disag1() if mtd == "balanced_disag2": ids, scores = self.query_balanced_disag2() if mtd == "disag1_balanced": ids, scores = self.query_disag1_balanced() if mtd == "disag2_balanced": ids, scores = self.query_disag2_balanced() if mtd == "exp": ids, scores = self.query_explote_explore() if mtd == "test": ids, scores = self.query_disagreement_test() id = ids[0] qx = self.Ux[id] qy = self.Uy[id] self.Lx.append(qx) self.Ly.append(qy) self.Ux.pop(id) self.Uy.pop(id) self.clf.X = self.Lx; self.clf.Y = self.Ly self.clf.train() test_accuracy = self.clf.getTestAccuracy( self.Tx, self.Ty ) self.accuracys.append( test_accuracy ) print "i=", i+1, "-- acc=%.4f"%(test_accuracy*100), "-- %.4f"%(np.mean(self.accuracys)*100), "%.4f"%(np.average(self.accuracys, weights = range(1,1+len(self.accuracys)))*100), "--", scores[0], scores[1] if (i+1)%10 == 0: Util.pickleSave(backupfile, self) # viz = Visualize() # viz.plot( [range(len(self.accuracys)), self.accuracys], fig = backupfile+".png", color = 'r', marker = '-' ) ''' colors = ['r', 'b', 'g', 'k', 'm', 'c', '0.10', '0.35', '0.60', '0.90'] viz.plot( zip(*self.Lx+self.Ux), fig = backupfile+"__.png", color = [colors[int(l)] for l in self.Ly+self.Uy], marker = 'o' ) viz.do_plot( zip(*self.Ux), color = ['y']*len(self.Ux), marker = '.' ) viz.do_plot( zip(*self.Lx), color = [colors[int(l)] for l in self.Ly], marker = 'o' ) viz.end_plot( fig = backupfile+"_.png" ) ''' #--------------------------------------- def sort_scores(self, scores): if sum(scores) == 0.: scores = [ self.clf.uncertainty_margin(x) for x in self.Ux ] ids = (-np.array(scores)).argsort() sorted_scores = [ scores[id] for id in ids ] return ids, sorted_scores #--------------------------------------- def query_margin(self): return self.sort_scores( [ self.clf.uncertainty_margin(x) for x in self.Ux ] ) #--------------------------------------- def query_proba(self): return self.sort_scores( [ self.clf.uncertainty_prediction(x) for x in self.Ux ] ) #--------------------------------------- def query_entropy(self): return self.sort_scores( [ self.clf.uncertainty_entropy(x) for x in self.Ux ] ) #--------------------------------------- def query_random(self): return self.sort_scores( [ random.uniform(0., 1.) for x in self.Ux ] ) #--------------------------------------- def query_sufficient_weight(self): ids, _ = self.query_margin() return self.sort_scores( [ self.clf.uncertainty_weight(x, self.Lx, self.Ly) if ix in ids[:self.optimize] else 0. for ix, x in enumerate(self.Ux) ] ) #--------------------------------------- def query_eer(self, limit_Y = 20): ids, _ = self.query_margin() scores = [] for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize]: YP = self.clf.predict(x, all = True) YP.sort(key=operator.itemgetter(1), reverse=True) sums = 0. for ir, (yy, proba) in enumerate(YP): if ir == limit_Y: break temp_clf = Classification(self.Lx + [x], self.Ly + [yy], method = self.clf.method); temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C # TODO FIXME: do it in general not specifically for svm temp_clf.train() e_h1 = sum( [ temp_clf.uncertainty_entropy(dp) for dp in self.Ux if dp != x ] ) sums += (proba) * e_h1 informativeness = 1. / sums else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores) #--------------------------------------- def query_sufficient_distance(self): ids, _ = self.query_margin() scores = [] for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize]: y1, y2, p1, p2 = self.clf.getMarginInfo(x) C = [dp for idp, dp in enumerate(self.Lx) if self.Ly[idp] == y2 ] CDx = [Util.dist(dp, x) for idp, dp in enumerate(self.Lx) if self.Ly[idp] == y2 ] idsC = (np.array(CDx)).argsort(); xx = Util.medoid( [ C[idp] for idp in idsC[:1] ] ) step = 0.01; lower = 0.; upper = 1. while (upper - lower > step): w = (upper + lower) / 2. px = np.array(x) + w * ( np.array(xx) - np.array(x) ) if self.clf.predict_label(px) != y1: upper = w else: lower = w informativeness = 1. - w else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores) #--------------------------------------- def query_disagreement1(self, weighted = False, op = 1): ids, _ = self.query_margin() scores = [] for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize*op]: # true_y = self.Uy[ix] true_y = self.clf.predict_label(x) temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() if not weighted: diff = sum([ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) else: diff = sum([ abs(temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) # diff = sum([ Util.dist(temp_clf.h.predict_proba(dp)[0], self.clf.h.predict_proba(dp)[0]) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) informativeness = diff else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores) #--------------------------------------- def query_disagreement2(self, weighted = False, op = 1): ids, _ = self.query_margin() scores = [] commitee = [] for idp, dp in enumerate(self.Ux): if idp in ids[:self.optimize*op]: # true_y = self.Uy[idp] true_y = self.clf.predict_label(dp) temp_clf = Classification(self.Lx + [dp], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() commitee.append( (temp_clf, 1) ) for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize*op]: preds = Counter() if weighted: # weight using proba distrib of commitee for (clf,_) in commitee: if self.clf.predict_label(x) != clf.predict_label(x): YP = zip( clf.h.classes_, clf.h.predict_proba( x )[0] ) for (y,p) in YP: preds[y] += p preds = preds.most_common() diff = 0. if preds == [] else preds[0][1] # diff = 0. if preds == [] else ( preds[0][1] - preds[1][1] if len(preds)>1 else preds[0][1] ) else: # confis = [ clf.getPredictProba(1,x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ] labels = [ clf.predict_label(x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ] preds = Counter(labels) preds = preds.most_common() diff = 0. if preds == [] else sum( [pred[1] for pred in preds] ) informativeness = diff else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores) #--------------------------------------- def query_disagreement3(self): id_algo = self.mab2.choose() algo = self.mab2.algos[ id_algo ] print "Choosen =", algo, "nb_choices =", self.mab2.nb_choices, "mean rew=", [ np.mean(L) for L in self.mab2.rewards ] if algo == "disag1": ids, scores = self.query_disagreement1(weighted = True) if algo == "disag2": ids, scores = self.query_disagreement2() reward = self.get_change( self.Ux[ids[0]], self.Uy[ids[0]] ) self.mab2.update(id_algo, reward) return ids, scores #--------------------------------------- def query_balanced_disag1(self, weighted = True, op=1): ids, _ = self.query_margin() scores = [] scores_B = [] for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize*op]: # true_y = self.Uy[ix] true_y = self.clf.predict_label(x) temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() if not weighted: diff = sum([ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) else: diff = sum([ abs(temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) # diff = sum([ Util.dist(temp_clf.h.predict_proba(dp)[0], self.clf.h.predict_proba(dp)[0]) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) balance = self.get_balance(x) informativeness = diff else: informativeness = 0. balance = 0. scores.append( informativeness ) scores_B.append( balance ) # scores_B = Util.normalize(scores_B) scores = [scr*scores_B[iscr] for iscr,scr in enumerate(scores)] return self.sort_scores(scores) #--------------------------------------- def query_balanced_disag2(self, weighted = True, op=1): ids, _ = self.query_margin() scores = [] scores_B = [] commitee = [] for idp, dp in enumerate(self.Ux): if idp in ids[:self.optimize*op]: # true_y = self.Uy[idp] true_y = self.clf.predict_label(dp) temp_clf = Classification(self.Lx + [dp], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() commitee.append( (temp_clf, 1) ) for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize*op]: preds = Counter() if weighted: # weight using proba distrib of commitee for (clf,_) in commitee: if self.clf.predict_label(x) != clf.predict_label(x): YP = zip( clf.h.classes_, clf.h.predict_proba( x )[0] ) for (y,p) in YP: preds[y] += p preds = preds.most_common() diff = 0. if preds == [] else preds[0][1] # diff = 0. if preds == [] else ( preds[0][1] - preds[1][1] if len(preds)>1 else preds[0][1] ) else: # confis = [ clf.getPredictProba(1,x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ] labels = [ clf.predict_label(x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ] preds = Counter(labels) preds = preds.most_common() diff = 0. if preds == [] else sum( [pred[1] for pred in preds] ) balance = self.get_balance(x) informativeness = diff else: informativeness = 0. balance = 0 scores.append( informativeness ) scores_B.append( balance ) # scores_B = Util.normalize(scores_B) scores = [scr*scores_B[iscr] for iscr,scr in enumerate(scores)] return self.sort_scores(scores) #--------------------------------------- def query_balance(self): ids, _ = self.query_margin() scores = [] for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize*4]: informativeness = self.get_balance(x) else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores) #--------------------------------------- def query_disag1_balanced(self, weighted = True): ids, _ = self.query_disagreement1(weighted=weighted, op=2) # ids, _ = self.query_margin() scores = [] for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize/2]: informativeness = self.get_balance(x) else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores) #--------------------------------------- def query_disag2_balanced(self, weighted = True): ids, _ = self.query_disagreement2(weighted=weighted, op=2) # ids, _ = self.query_margin() scores = [] for ix, x in enumerate(self.Ux): if ix in ids[:self.optimize/2]: informativeness = self.get_balance(x) else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores) #--------------------------------------- def query_explote_explore(self): id_eps = self.mab.choose() eps = self.mab.algos[ id_eps ] # print "Choosen = ", eps, "Expected = ", sum([ a*l for a,l in zip(self.mab.algos,self.mab.nb_choices) ]) / sum(self.mab.nb_choices) rnd = random.uniform(0., 1.) # if rnd > eps: ids, scores = self.query_disagreement1(weighted = False) # if rnd > eps: ids, scores = self.query_disagreement1(weighted = True) if rnd > eps: ids, scores = self.query_disagreement2() # else: ids, scores = self.query_balance() else: ids, scores = self.query_random() reward = self.get_change( self.Ux[ids[0]], self.Uy[ids[0]] ) self.mab.update(id_eps, reward) return ids, scores #--------------------------------------- #--------------------------------------- #--------------------------------------- #--------------------------------------- #--------------------------------------- #--------------------------------------- #--------------------------------------- def get_disag1(self, x, weighted = False): true_y = self.Uy[ self.Ux.index(x) ] # true_y = self.clf.predict_label(x) temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() if not weighted: diff = sum([ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) else: diff = sum([ 1.-abs(temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ]) informativeness = diff return informativeness # def get_disag2(self, x, commitee, weighted = False): preds = Counter() if weighted: # weight using proba distrib of commitee for (clf,_) in commitee: if self.clf.predict_label(x) != clf.predict_label(x): YP = zip( clf.h.classes_, clf.h.predict_proba( x )[0] ) for (y,p) in YP: preds[y] += p preds = preds.most_common() diff = 0. if preds == [] else preds[0][1] else: # confis = [ clf.getPredictProba(1,x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ] labels = [ clf.predict_label(x) for (clf,_) in commitee if self.clf.predict_label(x) != clf.predict_label(x) ] preds = Counter(labels) preds = preds.most_common() diff = 0. if preds == [] else sum( [pred[1] for pred in preds] ) informativeness = diff return informativeness # def query_disagreement_test(self): ids, _ = self.query_margin() scores = [] plots_Y = []; plots_X0 = []; plots_X1 = []; plots_X2 = []; plots_X3 = []; plots_X4 = []; plots_X5 = []; plots_X6 = []; viz = Visualize() commitee = [] for idp, dp in enumerate(self.Ux): if idp in ids[:self.optimize]: true_y = self.Uy[idp] # true_y = self.clf.predict_label(dp) temp_clf = Classification(self.Lx + [dp], self.Ly + [true_y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() commitee.append( (temp_clf, 1) ) # =========================== # sampled = random.sample(ids, 100) for ix, x in enumerate(self.Ux): # if ix in sampled: if ix in ids[:self.optimize*9999999]: informativeness1 = self.get_disag1(x, weighted = False) informativeness2 = self.get_disag2(x, commitee, weighted = False) informativeness3 = self.get_disag1(x, weighted = True) informativeness4 = self.get_disag2(x, commitee, weighted = True) informativeness5 = self.clf.uncertainty_prediction(x) informativeness6 = self.get_balance(x) temp_clf = Classification(self.Lx + [x], self.Ly + [self.Uy[ix]], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() acc = temp_clf.getTestAccuracy( self.Tx, self.Ty ) plots_X0.append( acc ) plots_X1.append( informativeness1 ) plots_X2.append( informativeness2 ) plots_X3.append( informativeness3 ) plots_X4.append( informativeness4 ) plots_X5.append( informativeness5 ) plots_X6.append( informativeness6 ) plots_Y.append( 'r' if self.Uy[ix] != self.clf.predict_label(x) else 'b' ) fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X1, plots_X2, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.1-2.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X3, plots_X4, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.3-4.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X1, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.1-acc.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X2, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.2-acc.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X3, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.3-acc.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X4, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.4-acc.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X5, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.5-acc.png'); plt.close() fig, axs = plt.subplots( 1, 1, sharex=True ) axs.scatter( plots_X6, plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) plt.savefig(str(len(self.Lx)) + self.datasetname+'.6-acc.png'); plt.close() # plots = [ plots_X1, plots_X2, plots_X3, plots_X4, plots_X5, plots_X6 ] # fig, axs = plt.subplots( 5, 1, sharex=True ) # axs[0].scatter( Util.normalize(plots_X1), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # axs[1].scatter( Util.normalize(plots_X2), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # axs[2].scatter( Util.normalize(plots_X3), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # axs[3].scatter( Util.normalize(plots_X4), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # axs[4].scatter( Util.normalize(plots_X5), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # axs[5].scatter( Util.normalize(plots_X6), plots_X0, c = plots_Y, marker = "o", cmap = plt.copper() ) # plt.savefig(str(len(self.Lx)) + self.datasetname+'.png') # plt.close() informativeness = acc else: informativeness = 0. scores.append( informativeness ) return self.sort_scores(scores) # def get_balance(self, x): # y = self.Uy[ self.Ux.index(x) ] y = self.clf.predict_label(x) temp_clf = Classification(self.Lx + [x], self.Ly + [y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() cnt = Counter() for dp in self.Ux: cnt[ temp_clf.predict_label(dp) ] += 1. / len(self.Ux) P = [ cnt[key] for key in cnt ] informativeness = -1.0 * sum( [ p * math.log(p, len(P)) for p in P if p > 0 ] ) return informativeness # def get_change(self, x, y = None): if y is None: y = self.Uy[ self.Ux.index(x) ] # y = self.clf.predict_label(x) temp_clf = Classification(self.Lx + [x], self.Ly + [y], method = self.clf.method) temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train() v1 = [ self.clf.getPredictProba(1, dp) for dp in self.Ux if x != dp ] v2 = [ temp_clf.getPredictProba(1, dp) for dp in self.Ux if x != dp ] # informativeness = Util.dist(v1, v2) informativeness = math.acos( cosine_similarity(v1, v2) ) / math.pi # v1 = []; v2 = [] # for dp in self.Ux: # if x != dp: # v1 += [ v for v in self.clf.h.predict_proba( dp )[0] ] # v2 += [ v for v in temp_clf.h.predict_proba( dp )[0] ] # informativeness = distance.cosine(v1, v2) return informativeness
target_path = os.path.join(target_root_path, pred) if not os.path.exists(target_path): os.makedirs(target_path) target_file_path = os.path.join(target_path, e) if os.path.splitext(subdir)[1] == '.jpg': cv2.imencode('.jpg', image)[1].tofile(target_file_path) if os.path.splitext(subdir)[1] == '.bmp': cv2.imencode('.bmp', image)[1].tofile(target_file_path) if os.path.splitext(subdir)[1] == '.png': cv2.imencode('.png', image)[1].tofile(target_file_path) elif os.path.isdir(subdir): # 如果是路径 batch_clfmove(clf, subdir, target_root_path) if __name__ == '__main__': # 测试用例 pb_path = r'F:\models-master\cell\分类\pb/frozen_inception_resnet_v2_inf_graph.pb' img_root_path = r'F:\models-master\cell\分类\test_set' clf = Classification(pb_path) batch_test(clf, img_root_path, labels_list=[ 'danhe', 'linba', 'shijian', 'shisuan', 'zhongxing', 'broken', 'background' ]) batch_clfmove(clf, img_root_path, r'F:\models-master\cell\分类\1')
COREZMQ_SERVER_FILE = os.path.join(os.getcwd(), 'bin', 'corezmq_server.js') # Star the server (see bin/corezmq_server.js for more options e.g. for how to pass a pluginConfig) node_process = subprocess.Popen([ 'node', COREZMQ_SERVER_FILE, PROJECT_NAME, '-p', PORT, '-m', METADATA_PATH ], stdout=sys.stdout, stderr=sys.stderr) logger.info('Node-process running at PID {0}'.format(node_process.pid)) # Create an instance of WebGME and the plugin webgme = WebGME(PORT, logger) def exit_handler(): logger.info('Cleaning up!') webgme.disconnect() node_process.send_signal(signal.SIGTERM) atexit.register(exit_handler) commit_hash = webgme.project.get_branch_hash(BRANCH_NAME) plugin = Classification(webgme, commit_hash, BRANCH_NAME, ACTIVE_NODE_PATH, ACTIVE_SELECTION_PATHS, NAMESPACE) # Do the work plugin.main() # The exit_handler will be invoked after this line