def ValidateFineTuning(dataset,validateall=True,validateat=0): datas = ReadValidationData(dataset) result = [] time = 0 params = GetParams(dataset) cnnfilename = params['FineTuning']['cnn_load_file'] rnnfilename = params['FineTuning']['rnn_load_file'] ensemblefilename = params['FineTuning']['ensemble_load_file'] finetuningfilename = params['FineTuning']['ensemble_save_file'] for data in datas: time += 1 if not validateall: if validateat > time: continue elif validateat < time: break input = data['input'] label = data['label'] params['FineTuning']['cnn_load_file'] = cnnfilename+'_'+str(time) params['FineTuning']['rnn_load_file'] = rnnfilename+'_'+str(time) params['FineTuning']['ensemble_load_file'] = ensemblefilename+'_'+str(time) params['FineTuning']['ensemble_save_file'] = finetuningfilename+'_'+str(time) r = None r = Ensemble(params['FineTuning'],input['train']['onehot'],input['train']['biofeat'],label['train'], input['test']['onehot'],input['test']['biofeat'],label['test'],issave=True, withbiofeature=True,rnn_trainable=True,cnn_trainable=True) result.append(r['loss']) print(result)
def plot_error(l): res = [] for alpha in l: res.append( test( Ensemble(5, linear_model.Lasso(alpha=alpha), [clf, clf1, clf2, clf3]))) plt.plot(l, res) plt.show()
def Pipeline(dataset, pretrainCNN=False, pretrainRNN=False, ensemble=False, fineTuning=False): data = ReadData(dataset) params = GetParams(dataset) input = data['input'] label = data['label'] r = None if pretrainCNN: r = CNN(params['CNNParams'], input['train']['onehot'], label['train'], input['test']['onehot'], label['test']) if pretrainRNN: r = RNN(params['RNNParams'], input['train']['onehot'], label['train'], input['test']['onehot'], label['test']) if ensemble: input_train_onehot, input_train_biofeat, y_train = input['train'][ 'onehot'], input['train']['biofeat'], label['train'] r = Ensemble(params['EnsembleParams'], input['train']['onehot'], input['train']['biofeat'], label['train'], input['test']['onehot'], input['test']['biofeat'], label['test'], withbiofeature=True, cnn_trainable=False, rnn_trainable=False) if fineTuning: r = Ensemble(params['FineTuning'], input['train']['onehot'], input['train']['biofeat'], label['train'], input['test']['onehot'], input['test']['biofeat'], label['test'], withbiofeature=True, cnn_trainable=True, rnn_trainable=True, load_weight=True) return r
def __init__(self, chemin_données="", chemin_elagage=""): """ chemin_données est l'emplacement du fichier contenant les données chemin_elagage est l'emplacement d'un autre set permettant d'élaguer l'arbre """ #initialisation de l'ensemble avec le fichier dans chemin_données self.ensemble = Ensemble(chemin_données) #initialisation du nœud principal de l'arbre self.arbre = None self.chemin_elagage = chemin_elagage
def main(): # Load the classes data_dir = pathlib.Path('./data/tiny-imagenet-200/train/') CLASSES = sorted([item.name for item in data_dir.glob('*')]) im_height, im_width = 64, 64 models = load_models() data_transforms = transforms.Compose([ transforms.ToTensor(), # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), transforms.Normalize((0, 0, 0), tuple(np.sqrt((255, 255, 255)))) ]) evalcsv_path = sys.argv[1] print("path to the eval.csv file:", evalcsv_path) # Loop through the CSV file and make a prediction for each line with open( 'eval_classified.csv', 'w' ) as eval_output_file: # Open the evaluation CSV file for writing # eval_dir = pathlib.Path('eval.csv') eval_dir = pathlib.Path(evalcsv_path) for line in eval_dir.open(): # for line in pathlib.Path(sys.argv[1]).open(): # Open the input CSV file for reading image_id, image_path, image_height, image_width, image_channels = line.strip( ).split(',') # Extract CSV info print(image_id, image_path, image_height, image_width, image_channels) with open(image_path, 'rb') as f: img = Image.open(f).convert('RGB') img = data_transforms(img)[None, :] ensemble_solver = Ensemble(models) predicted = ensemble_solver.evaluate_testdata(img) print("predicted class:", CLASSES[predicted]) print() # Write the prediction to the output file eval_output_file.write('{},{}\n'.format(image_id, CLASSES[predicted]))
help="Type of estimator (maximum likelihood (ml) or shrinkage") (options, args) = parser.parse_args() if not options.trajectory_filename or not options.topology_filename: parser.error("--trajectory and --topology options must be specified") # Construct reference if available try: reference = MDAnalysis.Universe(options.topology_filename, options.topology_filename) except: reference = None # Construct ensemble ensemble = Ensemble(topology=options.topology_filename, trajectory=options.trajectory_filename, atom_selection_string=options.atom_selection_string, frame_interval=options.frame_interval) # Align ensemble to reference if not options.no_align: ensemble.align(reference) # Select covariance estimator estimator = EstimatorML() if options.covariance_estimator == "shrinkage": estimator = EstimatorShrinkage() # Disable reference unless use_distance_to_reference is set if not options.use_distance_to_reference: reference = None
write_top_k_ssd_edges(sorted_hyperedges, pow(2, i)) return sorted_hyperedges def write_top_k_ssd_edges(sorted_hyperedges, k): sorted_hyperedges = sorted_hyperedges[:k] with open(results_folder + '/lam/top-' + str(k) + '-hyperedges.tsv', 'w+') as f: for hyperedge in sorted_hyperedges: f.write(list_to_tab_seperated_string(list(hyperedge)) + '\n') print('Updated top %d hyperedges' % k) return None with open(datafile, 'r') as f: T = Ensemble() for line in f: line = line.strip() timestamp, v1, v2 = line.split('\t', 2) if v1 != v2: T.add_edge(timestamp, int(v1), int(v2)) else: T.add_node(int(v1)) with open(results_folder + '/' + datafile + '.summary', 'w+') as f: edges = [g.size() for g in T.get_all_static_graphs()] summary = '%d\t%d\t%d\t%d\t%d' % (T.get_num_of_timestamps(), T.order(), min(edges), max(edges), T.size()) print('Graph Summary: %s' % summary) f.write(summary)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- from Ensemble import Ensemble from data import load_dataset import numpy as np # ============================================================================= # Crear un modelo con 5 clasificadores, se guardan en el directorio # clasificadores/, el directorio debe existir # ============================================================================= Test = Ensemble(5, 'clasificadores2') # ============================================================================= # Cargar datos # ============================================================================= # Dataset completo X, y = load_dataset() total_size = X.shape[0] train_size = int(np.floor(total_size * .8)) test_size = total_size - train_size print('- Dataset: %d samples' % total_size) print('- Training set: %d samples' % train_size) print('- Test set: %d samples' % test_size) np.random.seed(54) rp = np.random.permutation(total_size)
item_id = names[i].split('.')[0] result = None if (y[i] == 0): result = 1 else: result = 0 conf = confidence[i][y[i]] f.write("%s,%d,%f\n" % (item_id, result, conf)) ip = ImagesProcessor() images, y = ip.getImages('../imgs/test/dataset/', size=None, training=False) # Esto es lo que hay que usar para predecir el resultado final if True: ensemble = Ensemble() ensemble.load() X_predictions = ensemble.predict_small(images) y_hat = ensemble.predict_big(X_predictions) confidence = ensemble.ensemble_logistic_regression.predict_proba( X_predictions) printResult(y, y_hat, confidence) #score(y_hat, y) # Esto es lo que hay que usar para calcular al regression lineal y gurdarla if False: ensemble = Ensemble() ensemble.load() X_validation_predictions = ensemble.predict_small(images) ensemble.fit_big(X_validation_predictions, y) f = file("./ensemble_logistic_regression", 'wb')
import sys from Ensemble import Ensemble import matplotlib.pyplot as plt __author__ = "adb" INPUTFILE = sys.argv[1] SIGMA = float(sys.argv[2]) CONSTRAINT = sys.argv[3] with open(INPUTFILE, "r") as f: T = Ensemble() for line in f: line = line.strip() timestamp, v1, v2 = line.split("\t", 2) if v1 != v2: T.add_edge(timestamp, v1, v2) else: T.add_node(v1) # if CONSTRAINT == 'am': # T.generate_antimonotone_hyperedges_report(SIGMA) # elif CONSTRAINT == 'lam': # T.generate_looselyantimonotone_hyperedges_report(SIGMA) # else: # print(T.maximal_sigma_ssd_ucs(0.01)) d = T.compute_am_sigma_hyperedges_dict([0.01, 0.1, 0.2, 0.3]) for i in d:
d = compute_subgraph_distribution_for_nodes(ensemble, nodes) return tuple([len(v) for k, v in d.items()]) def test_equivalence_partition(iterable_list, relation = lambda x, y: x == y): classes, partitions, ids = EquivalenceClass.equivalence_enumeration( iterable_list, relation ) EquivalenceClass.check_equivalence_partition(classes, partitions, relation) # for c in classes: print(c) # for o, c in partitions.items(): print(o, ':', c) return classes, partitions with open('seprox', 'r') as f: T = Ensemble() for line in f: line = line.strip() timestamp, v1, v2 = line.split('\t', 2) if v1 != v2: T.add_edge(timestamp, int(v1), int(v2)) else: T.add_node(int(v1)) nodes_freq_dist_map = collections.OrderedDict() ssd_buckets = collections.OrderedDict() with open('hyperedge-results/seprox/lam/top-1024-hyperedges.tsv', 'r') as f: for line in f: line = line.strip() nodes = [int(n) for n in line.split('\t')[:-1]] ssd = float(line.split('\t')[-1])
def RandomForest(X, y, epsilon, num_learners, depth, min_sample): dt = DecisionTree(epsilon, min_sample, depth) result = Ensemble(dt, num_learners, 0.8) result.fit(X, y) return result
sorted_hyperedges = sorted(ssd_hyperedges_in_sigma_range, key=lambda x: x[-1]) for i in range(16): write_top_k_ssd_edges(sorted_hyperedges, pow(2, i)) return sorted_hyperedges def write_top_k_ssd_edges(sorted_hyperedges, k): sorted_hyperedges = sorted_hyperedges[:k] with open(results_folder + '/am/top-' + str(k) + '-hyperedges.tsv', 'w+') as f: for hyperedge in sorted_hyperedges: f.write(list_to_tab_seperated_string(list(hyperedge)) + '\n') print('Updated top %d hyperedges' % k) return None with open(datafile, 'r') as f: T = Ensemble() for line in f: line = line.strip() timestamp, v1, v2 = line.split('\t', 2) if v1 != v2: T.add_edge(timestamp, int(v1), int(v2)) else: T.add_node(int(v1)) with open(results_folder + '/' + datafile + '.summary', 'w+') as f: edges = [g.size() for g in T.get_all_static_graphs()] summary = '%d\t%d\t%d\t%d\t%d' % (T.get_num_of_timestamps(), T.order(), min(edges), max(edges), T.size()) print('Graph Summary: %s' % summary) f.write(summary) ssd_hyperedges_in_sigma_range = []
# largest_ligand = ref_kr.find_largest_ligand() prot = Protein.from_file(join(dirname(largest_ligand), 'protein.mol2')) bs = Protein.BindingSiteFromMolecule( protein=prot, molecule=MoleculeReader(largest_ligand)[0], distance=7.0) s_paths_file = join(main_dir, "shrunk_hot_paths_{}.json".format(nrot)) if exists(s_paths_file): with open(s_paths_file, "r") as f: s_paths_dict = json.load(f) else: s_paths_dict = {} ensemble = Ensemble(root_dir=join(main_dir, e)) ensemble.reference_binding_site = bs #hot_paths = glob(join(ensemble.root_dir, '*', "fullsize_hotspots_{}".format(nrot), "out.zip")) s_paths = ensemble.shrink_hotspots(hotspot_paths=hot_paths, padding=2.0) s_paths_dict[e] = s_paths for probe in ["donor", "acceptor", "apolar"]: paths = [join(t, '{}.ccp4'.format(probe)) for t in s_paths] gr = GridEnsemble(paths) gr.get_ensemble_array() gr.save_gridensemble( join(main_dir, e, '{}_{}.p'.format(probe, nrot))) spd = json.dumps(s_paths_dict, sort_keys=True, indent=4,
obj = getattr(self.object, "get_" + self.name)() return obj.shape return (len(self), ) shape = property(get_shape) def __repr__(self): return repr(self[:]) def _str__(self): return str(self[:]) # turn off debugging ##def debug(message): pass if __name__ == "__main__": from pdb import pm from logging import debug import logging logging.basicConfig(level=logging.DEBUG) from Ensemble_registers import Ensemble_registers from Ensemble import Ensemble, ensemble_driver ensemble = Ensemble() positions = ArrayWrapper(ensemble, "position") command_dial_values = ArrayWrapper(ensemble, "command_dial_values", method="multiple") floating_point_variables =\ ArrayWrapper(ensemble,"floating_point_variables",method="multiple")
clf = linear_model.Ridge(alpha=15, fit_intercept=True) #les coeffs ont été trouvés en en faisant varier 1 avec les 2 autres fixés clf2 = XGBRegressor(max_depth=2, learning_rate=0.08, n_estimators=955, subsample=0.96) #clf4 = neighbors.KNeighborsRegressor(weights = 'distance') clf1 = linear_model.Lasso(alpha=0.0005) clf3 = ensemble.RandomForestRegressor(n_estimators=100, max_depth=15) clf5 = Ensemble(5, linear_model.Lasso(alpha=0.0005), [clf, clf1, clf2, clf3]) """ clf = regularization.logReg(l=0.00001) clf.fit(X1,y) """ RMSE_Score = metrics.make_scorer(RMSE) def test(model): return (np.mean( cross_validation.cross_val_score(model, X1, y['SalePrice'], cv=5, scoring=RMSE_Score)))
import sys from Ensemble import Ensemble import matplotlib.pyplot as plt __author__ = 'adb' INPUTFILE = sys.argv[1] SIGMA = float(sys.argv[2]) CONSTRAINT = sys.argv[3] with open(INPUTFILE, 'r') as f: T = Ensemble() for line in f: line = line.strip() timestamp, v1, v2 = line.split('\t', 2) if v1 != v2: T.add_edge(timestamp, v1, v2) else: T.add_node(v1) # if CONSTRAINT == 'am': # T.generate_antimonotone_hyperedges_report(SIGMA) # elif CONSTRAINT == 'lam': # T.generate_looselyantimonotone_hyperedges_report(SIGMA) # else: # print(T.maximal_sigma_ssd_ucs(0.01)) d = T.compute_am_sigma_hyperedges_dict([0.01, 0.1, 0.2, 0.3]) for i in d: print(i, len(d[i]))
plot_ucs_size_composition_vs_ssd_cutoff(threeS, fourS, fiveS, ssd_range) return size_map_for_ssd_range with open(graph_summary_file, 'r') as f: for line in f: line = line.strip() summary = line.split('\t', 4) num_of_timestamps, num_of_nodes, min_num_of_edges, max_num_of_edges, total_num_of_edges = int( summary[0]), int(summary[1]), int(summary[2]), int( summary[3]), int(summary[4]) with open(datafile, 'r') as f: T = Ensemble() for line in f: line = line.strip() timestamp, v1, v2 = line.split('\t', 2) if v1 != v2: T.add_edge(timestamp, int(v1), int(v2)) else: T.add_node(int(v1)) plot_ssd_vs_rank(128) # plot_percent_of_nodes_vs_rank(1024, num_of_nodes) # plot_ssd_of_uncovered_nodes_vs_rank(2048, T) # print(plot_num_of_k_size_ucs_vs_ssd_cutoff_for_am([0.1, 0.2, 0.3, 0.4, 0.5, 0.6])) # print(plot_num_of_k_size_ucs_vs_ssd_cutoff_for_lam([0.1, 0.2, 0.3, 0.4, 0.5, 0.6])) plt.show()
class Arbre_C45(Arbre_ID3): """ Un arbre C4.5 hérite d'un arbre ID3 mais se construit différemment """ def __init__(self, chemin_données="", chemin_elagage=""): """ chemin_données est l'emplacement du fichier contenant les données chemin_elagage est l'emplacement d'un autre set permettant d'élaguer l'arbre """ #initialisation de l'ensemble avec le fichier dans chemin_données self.ensemble = Ensemble(chemin_données) #initialisation du nœud principal de l'arbre self.arbre = None self.chemin_elagage = chemin_elagage def construire(self): """ retourne l'arbre au complet """ #si le set est corrompu (attributs manquants), on le restaure self.ensemble.restaurer_valeurs_manquantes() self.arbre = self.__construire_arbre(self.ensemble) def __construire_arbre(self, ensemble): if not isinstance(ensemble, Ensemble): raise TypeError("ensemble doit être un Ensemble et non un {}" \ .format(type(ensemble))) #si la liste est vide if len(ensemble) == 0: raise ValueError("la liste d'exemples ne peut être vide !") #testons si tous les exemples ont la même étiquette if ensemble.entropie() == 0: #on retourne l'étiquette en question return Feuille(ensemble.liste_exemples[0].etiquette) #s'il ne reste d'attribut à tester if len(ensemble.liste_attributs) == 0: max, etiquette_finale = 0, "" #on teste toutes les étiquettes possibles de l'ensemble for etiquette in ensemble.etiquettes_possibles(): sous_ensemble = ensemble.sous_ensemble_etiquette(etiquette) #si c'est la plus fréquente, c'est celle qu'on choisit if len(sous_ensemble) > max: max, etiquette_finale = len(sous_ensemble), etiquette #et on la retourne dans une feuille return Feuille(etiquette_finale) #ne pas oublier de sauver les valeurs pour pouvoir les restituer au cas #où l'attribut discrétisé n'est pas choisi sauvegarde_valeurs = ensemble.sauvegarder_valeurs_discretes() #pour chaque valeur à discrétiser for attribut, valeurs in sauvegarde_valeurs: #on discrétise ensemble.discretiser(attribut) #on récupère l'attribut optimal #ATTENTION : préciser ID3=False pour utiliser le ratio de gain a_tester = ensemble.attribut_optimal(ID3=False) #pour chaque attribut sauvegardé for attribut, valeurs in sauvegarde_valeurs: #si ce n'est pas l'attribut choisi if attribut != a_tester: #on remet les anciennes valeurs continues for i in range(len(valeurs)): ensemble.liste_exemples[i].dict_attributs[attribut] = \ valeurs[i] #si on arrive ici, on retourne d'office un nœud et pas une feuille noeud = Noeud(a_tester) #pour chaque valeur que peut prendre l'attribut à tester for valeur in ensemble.valeurs_possibles_attribut(a_tester): #on crée un sous-ensemble sous_ensemble = ensemble.sous_ensemble_attribut(a_tester, valeur) #et on en crée un nouveau nœud noeud.enfants[valeur] = self.__construire_arbre(sous_ensemble) #on retourne le nœud que l'on vient de créer return noeud def etiqueter(self, exemple): #on initialise le nœud actuel avec le haut de l'arbre noeud_actuel = self.arbre #tant que l'on est sur un nœud et pas sur une feuille while isinstance(noeud_actuel, Noeud): #valeur == valeur de l'exemple à étiqueter pour l'attribut du nœud valeur = exemple.dict_attributs[noeud_actuel.attribut_teste] #si valeur représente un nombre try: valeur = float(valeur) #si ça ne marche pas, tout va bien : c'est une valeur discrète except: pass #si c'est une valeur continue, on la transforme en intervalle else: for intervalle in noeud_actuel.enfants: if valeur < intervalle[1] and valeur >= intervalle[0]: valeur = intervalle break finally: #mais il faut bien faire avancer le nœud noeud_actuel = noeud_actuel.enfants[valeur] #une fois l'exploration terminée, on étiquette l'exemple exemple.etiquette = noeud_actuel.etiquette def taux_erreur(self, ensemble): """ renvoie un nombre dans [0, 1] correspondant à la proportion d'exemples dans ensemble qui se font étiqueter correctement avec l'arbre tel quel """ compteur_etiquetages_incorrects = 0 #pour chaque exemple for exemple in ensemble.liste_exemples: #on garde son étiquette etiquette = exemple.etiquette self.etiqueter(exemple) #et on la compare à l'étiquette donnée par l'arbre if etiquette != exemple.etiquette: #si elles sont différentes, on augmente la proportion #d'étiquetages incorrects et on rétablit la bonne étiquette compteur_etiquetages_incorrects += 1 exemple.etiquette = etiquette return compteur_etiquetages_incorrects/len(ensemble) def elaguer(self): """ modifie l'arbre en élaguant suivant l'ensemble de travail d'élagage donné dans self.chemin_elagage """ #ATTENTION : si le chemin n'a pas été donné, on n'élague pas ! if self.chemin_elagage != "": self.arbre = self.__elaguer_noeud(self.arbre, Ensemble(self.chemin_elagage)) def __elaguer_noeud(self, noeud, ensemble_elagage): """ élague le noeud passé en paramètre et le retourne """ #si on est sur une feuille, on ne va pas plus loin if isinstance(noeud, Feuille): return noeud min_erreur, etiquette_gardee = 1.0, "" proportion_initiale = self.taux_erreur(ensemble_elagage) sauvegarde = self.arbre #pour chaque étiquette for etiquette in ensemble_elagage.etiquettes_possibles(): self.arbre = Feuille(etiquette) #on calcule le taux d'erreur si on remplace le nœud par #l'étiquette en question taux_erreur_actuel = self.taux_erreur(ensemble_elagage) #on sauvegarde le meilleur taux if taux_erreur_actuel < min_erreur: min_erreur, etiquette_gardee = taux_erreur_actuel, etiquette #s'il existe un taux avantageux on élague à cet endroit if min_erreur <= proportion_initiale: return Feuille(etiquette_gardee) else: self.arbre = sauvegarde #on n'oublie pas de restaurer la valeur du sommet de l'arbre ! sauvegarde, self.arbre = self.arbre, sauvegarde #on teste chaque enfant pour voir s'il est élagable for enfant in noeud.enfants: sous_ensemble = ensemble_elagage.sous_ensemble_attribut( noeud.attribut_teste, enfant) #s'il l'est, on l'élague if len(sous_ensemble) != 0: noeud.enfants[enfant] = self.__elaguer_noeud( noeud.enfants[enfant], sous_ensemble) #et au final, on renvoie le nœud aux enfants peut-être élagués return noeud
return iarr if __name__ == "__main__": import pandas as pd from Ensemble import Ensemble from os import mkdir from os.path import exists, join, dirname import tempfile tempfile.tempdir = "/home/jin76872/Desktop/Mih/Data/tmp_superstar_ghecom" brd1_data = pd.read_csv( "/home/jin76872/Desktop/Mih/Data/SIENA/EnsembleAligner/BRD1.csv") ref_ID = brd1_data.loc[9].squeeze() e = Ensemble(root_dir="", ref_ID=ref_ID) hot_paths = glob( "/home/jin76872/Desktop/Mih/Data/SIENA/EnsembleAligner/BAZ2B/Hotspots/*/out.zip" )[:100] hot_paths += glob( "/home/jin76872/Desktop/Mih/Data/SIENA/EnsembleAligner/BRD1/Hotspots/*/out.zip" )[:100] # trunc_paths = e.shrink_hotspots(hot_paths) # with open("hot_paths.txt", "w") as f: # for h in trunc_paths: # f.write(h + "\n") with open("hot_paths.txt") as f: trunc_paths = [line.strip() for line in f.readlines()] brd1_hots = [join(t, "acceptor.ccp4") for t in trunc_paths if "BRD1" in t]
def load_model(model_name): model = None if 'resnet152_ddn_jpeg'.__eq__(model_name): print("load model resnet152_ddn_jpeg") m = models.resnet152(pretrained=False) weight = './weights/jpeg_ddn_resnet152/jpeg_ddn_resnet152.pth' image_mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1) image_std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1) pretrained_model = NormalizedModel(model=m, mean=image_mean, std=image_std) loaded_state_dict = torch.load(weight) pretrained_model.load_state_dict(loaded_state_dict) model = pretrained_model elif 'wide_resnet101_2_dnn_jpeg'.__eq__(model_name): print("load model wide_resnet101_2_dnn_jpeg") m = models.wide_resnet101_2(pretrained=False) weight = './weights/jpeg_ddn_wide_resnet101/jpeg_ddn_wide_resnet101.pth' image_mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1) image_std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1) model = NormalizedModel(model=m, mean=image_mean, std=image_std) loaded_state_dict = torch.load(weight) model.load_state_dict(loaded_state_dict) elif 'densenet161_ddn_jpeg'.__eq__(model_name): print("load model densenet161_ddn_jpeg") m = models.densenet161(pretrained=False) weight = './weights/jpeg_ddn_densenet161/jpeg_ddn_densenet161.pth' image_mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1) image_std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1) model = NormalizedModel(model=m, mean=image_mean, std=image_std) loaded_state_dict = torch.load(weight) model.load_state_dict(loaded_state_dict) elif 'hrnet_w64_ddn_jpeg'.__eq__(model_name): print("load model: hrnet_w64_ddn_jpeg") model = timm.create_model('hrnet_w64', pretrained=False) image_mean = torch.tensor([0.5000, 0.5000, 0.5000]).view(1, 3, 1, 1) image_std = torch.tensor([0.5000, 0.5000, 0.5000]).view(1, 3, 1, 1) model = NormalizedModel(model=model, mean=image_mean, std=image_std) weight= './weights/jpeg_ddn_hrnet_w64/jpeg_ddn_hrnet_w64.pth' loaded_state_dict = torch.load(weight) model.load_state_dict(loaded_state_dict) elif 'Ensemble_dsn161_jpeg_rn162_jpeg_wrn101_jpeg'.__eq__(model_name): print("load model: Ensemble_dsn161_jpeg_rn162_jpeg_wrn101_jpeg") model1 = load_model("densenet161_ddn_jpeg") model2 = load_model("resnet152_ddn_jpeg") model3 = load_model("wide_resnet101_2_dnn_jpeg") model = Ensemble(model1, model2, model3) elif 'Ensemble_dsn161_jpeg_wrn101_jpeg_hrn_jpeg'.__eq__(model_name): print("load model: Ensemble_dsn161_jpeg_wrn101_jpeg_hrn_jpeg") model1 = load_model("densenet161_ddn_jpeg") model2 = load_model("wide_resnet101_2_dnn_jpeg") model3 = load_model("hrnet_w64_ddn_jpeg") model = Ensemble3_hrn(model1, model2, model3) elif 'Ensemble_dsn161_jpeg_wrn101_jpeg'.__eq__(model_name): print("load model: Ensemble_dsn161_jpeg_wrn101_jpeg") model1 = load_model("densenet161_ddn_jpeg") model2 = load_model("wide_resnet101_2_dnn_jpeg") model = Ensemble2(model1, model2) elif 'Ensemble_dsn161_jpeg_rn162_jpeg_wrn101_jpeg_hrn_jpeg'.__eq__(model_name): print("load model: Ensemble_dsn161_jpeg_rn162_jpeg_wrn101_jpeg_hrn_jpeg") model1 = load_model("densenet161_ddn_jpeg") model2 = load_model("resnet152_ddn_jpeg") model3 = load_model("wide_resnet101_2_dnn_jpeg") model4 = load_model("hrnet_w64_ddn_jpeg") model = Ensemble4(model1, model2, model3,model4) elif 'Ensemble_dsn161_jpeg_rn162_jpeg'.__eq__(model_name): print("load model: Ensemble_dsn161_jpeg_rn162_jpeg") model1 = load_model("densenet161_ddn_jpeg") model2 = load_model("resnet152_ddn_jpeg") model = Ensemble2(model1, model2) else: print("can not load model") return model
######################## # EXECUTION REGRESSION # ######################## Regression = Task.Regression(fn.ML_cup, num_epoch, dim_output, hidden_units, batch_array, learning_rate_init, type_learning_rate, alfa, v_lambda, fun, weight, early_stopping, num_training) top_models = Regression.startexecution_k_fold() num_training = Regression.num_training ######################## # RANDOMIZATION PHASE # ######################## ensamble = Ensemble(top_models, 8) random_top_models = [] #take the best model and create a new_model for each with a perturbation of all hyperparameters for model in top_models: #save the hyperparameters of model alfa = [copy.deepcopy(model.NN.alfa)] v_lambda = [copy.deepcopy(model.NN.v_lambda)] learning_rate_init = [copy.deepcopy(model.NN.learning_rate)] hidden_units = [copy.deepcopy(model.NN.units)] batch_size = [copy.deepcopy(model.NN.batch_size)] #perturbation hidden_units, batch_size, learning_rate_init, alfa, v_lambda = Function.pertubation(
def print_table(self): model_names = ["Resnet152", "VGG19_bn", "DenseNet", "ResAttNet"] print("Validation Accuracy Table") for i in range(len(self.models)): criterion = nn.CrossEntropyLoss() ensemble_solver = Ensemble([self.models[i]]) top1_acc, top5_acc, val_loss = ensemble_solver.evaluate_all(criterion, self.dataloaders, self.dataset_sizes) fgsm_top1_acc, fgsm_top5_acc, fgsm_val_loss = ensemble_solver.evaluate_all(criterion, self.fgsm_dataloader, self.fgsm_dataset_sizes) blurred_top1_acc, blurred_top5_acc, blurred_val_loss = ensemble_solver.evaluate_all(criterion, self.blurred_dataloader, self.blurred_dataset_sizes) print("{} = top1_acc: {}, top5_acc:{}, fgsm_top1_acc:{}, blurred_top1_acc:{}".format(model_names[i], top1_acc, top5_acc, fgsm_top1_acc, blurred_top1_acc)) print() resnet_model, vgg_model, dense_model, attention_model = self.models combo = [ [resnet_model, dense_model, vgg_model, attention_model], [resnet_model, dense_model, attention_model], [resnet_model, vgg_model, attention_model], [resnet_model, dense_model, vgg_model], [dense_model, vgg_model, attention_model] ] combo_names = [ ["Resnet152, VGG19_bn, DenseNet, ResAttNet"], ["Resnet152, DenseNet, ResAttNet"], ["Resnet152, VGG19_bn, ResAttNet"], ["Resnet152, VGG19_bn, DenseNet"], ["DenseNet, VGG19_bn, ResAttNet"] ] print("Ensemble by Averaging logits") for i in range(len(combo)): criterion = nn.CrossEntropyLoss() ensemble_solver = Ensemble(combo[i]) top1_acc, top5_acc, val_loss = ensemble_solver.evaluate_all(criterion, self.dataloaders, self.dataset_sizes) fgsm_top1_acc, fgsm_top5_acc, fgsm_val_loss = ensemble_solver.evaluate_all(criterion, self.fgsm_dataloader, self.fgsm_dataset_sizes) blurred_top1_acc, blurred_top5_acc, blurred_val_loss = ensemble_solver.evaluate_all(criterion, self.blurred_dataloader, self.blurred_dataset_sizes) print(combo_names[i][0]) print("Validation top1_acc: {}, top5_acc:{}, fgsm_top1_acc:{}, blurred_top1_acc:{}".format(top1_acc, top5_acc, fgsm_top1_acc, blurred_top1_acc)) print() print("Ensemble by Majority Vote") for i in range(len(combo)): criterion = nn.CrossEntropyLoss() ensemble_solver = Ensemble(combo[i]) top1_acc, top5_acc, val_loss = ensemble_solver.evaluate_all(criterion, self.dataloaders, self.dataset_sizes, mode="maj vote") fgsm_top1_acc, fgsm_top5_acc, fgsm_val_loss = ensemble_solver.evaluate_all(criterion, self.fgsm_dataloader, self.fgsm_dataset_sizes, mode="maj vote") blurred_top1_acc, blurred_top5_acc, blurred_val_loss = ensemble_solver.evaluate_all(criterion, self.blurred_dataloader, self.blurred_dataset_sizes, mode="maj vote") print(combo_names[i][0]) print("Validation top1_acc: {}, top5_acc:{}, fgsm_top1_acc:{}, blurred_top1_acc:{}".format(top1_acc, top5_acc, fgsm_top1_acc, blurred_top1_acc)) print()
class Update(object): def __init__(self): self.Ensemble=Ensemble(3) super(Update, self).__init__() @staticmethod def readdata(sourcex_matrix=None, sourcey_matrix=None,targetx_matrix=None, targety_matrix=None,src_path='datasets/syndata_002_normalized_no_novel_class_source_stream.csv', tgt_path='datasets/syndata_002_normalized_no_novel_class_target_stream.csv', src_size=None, tgt_size=None): """ input is: source dataset with y, here we assume it is a list of list, the name is source, target dataset with yhat, here we assume it is a list of list, the name is target """ if sourcex_matrix is None: sourcex_matrix_, sourcey_matrix = Classification.read_csv(src_path, None) # matrix_ is source data else: sourcex_matrix_ = sourcex_matrix sourcey_matrix_ = sourcey_matrix matrix_ = sourcex_matrix_[:src_size, :] if targetx_matrix is None: targetx_ ,targety_= Classification.read_csv(tgt_path, size=None) else: targetx_ = targetx_matrix targety_ = targety_matrix labellist = [] for i in range(0, len(targety_)): if targety_[i] not in labellist: labellist.append(targety_[i]) sourcey_label = [] for i in range(0, len(sourcey_matrix)): sourcey_label.append(labellist.index(sourcey_matrix[i])) for i in range(0, len(targety_)): if targety_[i] not in labellist: labellist.append(targety_[i]) targety_label = [] for i in range(0, len(targety_)): targety_label.append(labellist.index(targety_[i])) return sourcex_matrix_,sourcey_label, targetx_, targety_label def Process(self, sourcex,sourcey, targetx,targety,subsize): # fixed size windows for source stream and target stream sourceIndex = 0 targetIndex = 0 src_count = 0 tgtchange_count = 0 threshold = 1.0 src_size, _ = sourcex.shape tgt_size, _ = targetx.shape #true_label = [] #for i in range(len(np.array(targety))): #if np.array(targety)[i] == 'class1': #true_label.append(1) #if np.array(targety)[i] == 'class2': #true_label.append(2) #if np.array(targety)[i] == 'class3': #true_label.append(3) #if np.array(targety)[i] == 'class4': #true_label.append(4) #if np.array(targety)[i] == 'class5': #true_label.append(5) #if np.array(targety)[i] == 'class6': #true_label.append(6) #if np.array(targety)[i] == 'class7': #true_label.append(7) windowsize = 1000 sourcewindowstart = 0 sourcewindowend = sourcewindowstart + windowsize -1 targetwindowstart = 0 targetwindowend = targetwindowstart + windowsize - 1 sourcexwindow = sourcex[sourcewindowstart:sourcewindowend] sourceywindow = sourcey[sourcewindowstart:sourcewindowend] targetxwindow = targetx[targetwindowstart:targetwindowend] targetywindow = targety[targetwindowstart:targetwindowend] ### get the initial model by using the first source and target windows alpha = 0.05 b = targetxwindow.T.shape[1]; fold = 5 sigma_list = Classification.sigma_list(np.array(targetxwindow.T), np.array(sourcexwindow.T)); lambda_list = Classification.lambda_list(); srcx_array = np.array(sourcexwindow.T); trgx_array = np.array(targetxwindow.T); (thetah_old, w, sce_old, sigma_old) = Classification.R_ULSIF(trgx_array, srcx_array, alpha, sigma_list, lambda_list, b, fold) self.Ensemble.generateNewModelRULSIF(targetxwindow, sourcexwindow, sourceywindow, alpha, sigma_list, lambda_list, b, fold,subsize) # print "update model", src_size, source.shape truelablecount = 0.0 totalcount = 0.0 #tmpsrccount = 0 tmptrgcount = 0 changeindex = -1 updatestartindex = 0 while True: if sourcewindowend >= src_size or targetwindowend >= tgt_size: break data_type = randint(1, 10) if data_type < 2: print("get data from source") sourcewindowstart+=1 sourcewindowend+=1 sourcexwindow = sourcex[sourcewindowstart:sourcewindowend] sourceywindow = sourcey[sourcewindowstart:sourcewindowend] sourceIndex += 1 #src_count += 1 #tmpsrccount += 1 print("sourceIndex", sourceIndex) else: print("get data from target") targetwindowstart+=1 targetwindowend+=1 targetxwindow = targetx[targetwindowstart:targetwindowend] targetywindow = targety[targetwindowstart:targetwindowend] targetIndex += 1 tgtchange_count += 1 tmptrgcount += 1 print("targetIndex", targetIndex) if tgtchange_count>=1000: changeindex = 1 tgtchange_count = 0 confidencelist = [] for i in range(targetwindowstart, targetwindowend+1): instanceresult = self.Ensemble.evaluateEnsembleRULSIF(targetx[i]) confidencelist.append(instanceresult[1]) confvar = np.var(confidencelist) changetestresult = pelt(normal_mean(confidencelist, confvar), len(confidencelist)) if len(changetestresult)>1: alpha = 0.05 b = targetxwindow.T.shape[1]; fold = 5 sigma_list = Classification.sigma_list(np.array(targetxwindow.T), np.array(sourcexwindow.T)); lambda_list = Classification.lambda_list(); self.Ensemble.generateNewModelRULSIF(targetxwindow, sourcexwindow, sourceywindow, alpha, sigma_list, lambda_list, b, fold, subsize) #x_nu = np.array(targetxwindow.T); #(thetah_new, w, sce_new, sigma_new) = Classification.R_ULSIF(trgx_array, srcx_array, alpha, sigma_list, #lambda_list, b, fold) #targetweight_old = Classification.compute_target_weight(thetah_old, sce_old, sigma_old, x_nu) #targetweight_new = Classification.compute_target_weight(thetah_new, sce_new, sigma_new, x_nu) #l_ratios = targetweight_new / targetweight_old #lnWeightTrgData = np.log(l_ratios, dtype='float64') #changeScore = np.sum(lnWeightTrgData, dtype='float64') #tgtchange_count=0 #print "changeScore", changeScore #if changeScore > threshold: #alpha = 0.05 #b = targetxwindow.T.shape[1]; #fold = 5 #sigma_list = Classification.sigma_list(np.array(targetxwindow.T), #np.array(sourcexwindow.T)); #lambda_list = Classification.lambda_list(); #self.Ensemble.generateNewModelRULSIF(targetxwindow, sourcexwindow, sourceywindow, alpha, sigma_list, #lambda_list, b, fold, subsize) if tmptrgcount>=2000: # force update model tmptrgcount=0 #update predictions for updatestartindex to targetIndex for i in range(updatestartindex,targetIndex+1): print("targetx[i]", targetx[i]) instanceresult = self.Ensemble.evaluateEnsembleRULSIF(targetx[i]) print("instanceresult", instanceresult) print("instanceresult[0]", instanceresult[0]) print("truelabel[i]", targety[i]) if instanceresult[0] == targety[i]: truelablecount +=1.0 totalcount +=1.0 print("truelablecount",truelablecount) print("totalcount", totalcount) with open('errorsyn002405.csv', 'a+') as f: writer = csv.writer(f) writer.writerow([targetIndex, truelablecount,totalcount,truelablecount/totalcount ]) updatestartindex = targetIndex+1 alpha = 0.05 b = targetxwindow.T.shape[1]; fold = 5 sigma_list = Classification.sigma_list(np.array(targetxwindow.T), np.array(sourcexwindow.T)); lambda_list = Classification.lambda_list(); self.Ensemble.generateNewModelRULSIF(targetxwindow, sourcexwindow, sourceywindow, alpha, sigma_list, lambda_list, b, fold,subsize)
(options, args) = parser.parse_args() if not options.trajectory_filename or not options.topology_filename: parser.error("--trajectory and --topology options must be specified") # Construct reference if available try: reference = MDAnalysis.Universe(options.topology_filename, options.topology_filename) except: reference = None # Construct ensemble ensemble = Ensemble(topology=options.topology_filename, trajectory=options.trajectory_filename, atom_selection_string=options.atom_selection_string, frame_interval=options.frame_interval) # Align ensemble to reference if not options.no_align: ensemble.align(reference) # Select covariance estimator estimator = EstimatorML() if options.covariance_estimator == "shrinkage": estimator = EstimatorShrinkage() # Disable reference unless use_distance_to_reference is set if not options.use_distance_to_reference: reference = None
def __init__(self): self.Ensemble=Ensemble(3) super(Update, self).__init__()
def get_algorithm(algorithm_type, alpha): if algorithm_type == AlgorithmType.Random: return Random(alpha) elif algorithm_type == AlgorithmType.EFirst: return EFirst(alpha) elif algorithm_type == AlgorithmType.EGreedy: return EGreedy(alpha) elif algorithm_type == AlgorithmType.EGreedy_Disjoint: return EGreedy_Disjoint(alpha) elif algorithm_type == AlgorithmType.EGreedy_Hybrid: return EGreedy_Hybrid(alpha) elif algorithm_type == AlgorithmType.EGreedy_Seg: return Combo_Seg(alpha, AlgorithmType.EGreedy) elif algorithm_type == AlgorithmType.LinUCB_Disjoint: return LinUCB_Disjoint(alpha) elif algorithm_type == AlgorithmType.LinUCB_GP: return LinUCB_GP(alpha) elif algorithm_type == AlgorithmType.LinUCB_GP_All: return LinUCB_GP_All(alpha) elif algorithm_type == AlgorithmType.LinUCB_Hybrid: return LinUCB_Hybrid(alpha) elif algorithm_type == AlgorithmType.UCB: return UCB(alpha) elif algorithm_type == AlgorithmType.UCB_Seg: return Combo_Seg(alpha, AlgorithmType.UCB) elif algorithm_type == AlgorithmType.EGreedy_Lin: return EGreedy_Lin(alpha) elif algorithm_type == AlgorithmType.EGreedy_Seg_Lin: return Combo_Seg(alpha, AlgorithmType.EGreedy_Lin) elif algorithm_type == AlgorithmType.EGreedy_Lin_Hybrid: return EGreedy_Lin_Hybrid(alpha) elif algorithm_type == AlgorithmType.TS: return TS(alpha) elif algorithm_type == AlgorithmType.TS_Bootstrap: return TS_Bootstrap(alpha) elif algorithm_type == AlgorithmType.TS_Lin: return TS_Lin(alpha) elif algorithm_type == AlgorithmType.TS_Seg: return Combo_Seg(alpha, AlgorithmType.TS) elif algorithm_type == AlgorithmType.TS_Disjoint: return TS_Disjoint(alpha) elif algorithm_type == AlgorithmType.TS_Hybrid: return TS_Hybrid(alpha) elif algorithm_type == AlgorithmType.TS_Truncated: return TS_Truncated(alpha) elif algorithm_type == AlgorithmType.EGreedy_TS: return EGreedy_TS(alpha) elif algorithm_type == AlgorithmType.TS_Gibbs: return TS_Gibbs(alpha) elif algorithm_type == AlgorithmType.TS_Laplace: return TS_Laplace(alpha) elif algorithm_type == AlgorithmType.EGreedy_Annealing: return EGreedy_Annealing(alpha) elif algorithm_type == AlgorithmType.NN: return NN(alpha) elif algorithm_type == AlgorithmType.Ensemble: return Ensemble(alpha) elif algorithm_type == AlgorithmType.TS_RLR: return TS_RLR(alpha) else: raise NotImplementedError("Non-implemented algorithm." + algorithm_type.name)