Exemple #1
0
def ValidateFineTuning(dataset,validateall=True,validateat=0):
    datas = ReadValidationData(dataset)
    result = []
    time = 0
    params = GetParams(dataset)
    cnnfilename = params['FineTuning']['cnn_load_file']
    rnnfilename = params['FineTuning']['rnn_load_file']
    ensemblefilename = params['FineTuning']['ensemble_load_file']
    finetuningfilename = params['FineTuning']['ensemble_save_file']
    for data in datas:
        time += 1
        if not validateall:
            if validateat > time:
                continue
            elif validateat < time:
                break
        input = data['input']
        label = data['label']
        params['FineTuning']['cnn_load_file'] = cnnfilename+'_'+str(time)
        params['FineTuning']['rnn_load_file'] = rnnfilename+'_'+str(time)
        params['FineTuning']['ensemble_load_file'] = ensemblefilename+'_'+str(time)
        params['FineTuning']['ensemble_save_file'] = finetuningfilename+'_'+str(time)
        r = None
        r = Ensemble(params['FineTuning'],input['train']['onehot'],input['train']['biofeat'],label['train'],
             input['test']['onehot'],input['test']['biofeat'],label['test'],issave=True,
             withbiofeature=True,rnn_trainable=True,cnn_trainable=True)
        result.append(r['loss'])
        print(result)
def plot_error(l):
    res = []
    for alpha in l:
        res.append(
            test(
                Ensemble(5, linear_model.Lasso(alpha=alpha),
                         [clf, clf1, clf2, clf3])))
    plt.plot(l, res)
    plt.show()
Exemple #3
0
def Pipeline(dataset,
             pretrainCNN=False,
             pretrainRNN=False,
             ensemble=False,
             fineTuning=False):
    data = ReadData(dataset)
    params = GetParams(dataset)
    input = data['input']
    label = data['label']
    r = None
    if pretrainCNN:
        r = CNN(params['CNNParams'], input['train']['onehot'], label['train'],
                input['test']['onehot'], label['test'])
    if pretrainRNN:
        r = RNN(params['RNNParams'], input['train']['onehot'], label['train'],
                input['test']['onehot'], label['test'])
    if ensemble:
        input_train_onehot, input_train_biofeat, y_train = input['train'][
            'onehot'], input['train']['biofeat'], label['train']
        r = Ensemble(params['EnsembleParams'],
                     input['train']['onehot'],
                     input['train']['biofeat'],
                     label['train'],
                     input['test']['onehot'],
                     input['test']['biofeat'],
                     label['test'],
                     withbiofeature=True,
                     cnn_trainable=False,
                     rnn_trainable=False)
    if fineTuning:
        r = Ensemble(params['FineTuning'],
                     input['train']['onehot'],
                     input['train']['biofeat'],
                     label['train'],
                     input['test']['onehot'],
                     input['test']['biofeat'],
                     label['test'],
                     withbiofeature=True,
                     cnn_trainable=True,
                     rnn_trainable=True,
                     load_weight=True)
    return r
 def __init__(self, chemin_données="", chemin_elagage=""):
     """
         chemin_données est l'emplacement du fichier contenant les données
         chemin_elagage est l'emplacement d'un autre set permettant
         d'élaguer l'arbre
     """
     #initialisation de l'ensemble avec le fichier dans chemin_données
     self.ensemble = Ensemble(chemin_données)
     #initialisation du nœud principal de l'arbre
     self.arbre = None
     self.chemin_elagage = chemin_elagage
Exemple #5
0
def main():
    # Load the classes
    data_dir = pathlib.Path('./data/tiny-imagenet-200/train/')
    CLASSES = sorted([item.name for item in data_dir.glob('*')])
    im_height, im_width = 64, 64

    models = load_models()

    data_transforms = transforms.Compose([
        transforms.ToTensor(),
        #         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        transforms.Normalize((0, 0, 0), tuple(np.sqrt((255, 255, 255))))
    ])

    evalcsv_path = sys.argv[1]
    print("path to the eval.csv file:", evalcsv_path)

    # Loop through the CSV file and make a prediction for each line
    with open(
            'eval_classified.csv', 'w'
    ) as eval_output_file:  # Open the evaluation CSV file for writing
        #         eval_dir = pathlib.Path('eval.csv')
        eval_dir = pathlib.Path(evalcsv_path)
        for line in eval_dir.open():
            # for line in pathlib.Path(sys.argv[1]).open():  # Open the input CSV file for reading
            image_id, image_path, image_height, image_width, image_channels = line.strip(
            ).split(',')  # Extract CSV info

            print(image_id, image_path, image_height, image_width,
                  image_channels)
            with open(image_path, 'rb') as f:
                img = Image.open(f).convert('RGB')
            img = data_transforms(img)[None, :]
            ensemble_solver = Ensemble(models)
            predicted = ensemble_solver.evaluate_testdata(img)
            print("predicted class:", CLASSES[predicted])
            print()

            # Write the prediction to the output file
            eval_output_file.write('{},{}\n'.format(image_id,
                                                    CLASSES[predicted]))
Exemple #6
0
        help="Type of estimator (maximum likelihood (ml) or shrinkage")
    (options, args) = parser.parse_args()

    if not options.trajectory_filename or not options.topology_filename:
        parser.error("--trajectory and --topology options must be specified")

    # Construct reference if available
    try:
        reference = MDAnalysis.Universe(options.topology_filename,
                                        options.topology_filename)
    except:
        reference = None

    # Construct ensemble
    ensemble = Ensemble(topology=options.topology_filename,
                        trajectory=options.trajectory_filename,
                        atom_selection_string=options.atom_selection_string,
                        frame_interval=options.frame_interval)

    # Align ensemble to reference
    if not options.no_align:
        ensemble.align(reference)

    # Select covariance estimator
    estimator = EstimatorML()
    if options.covariance_estimator == "shrinkage":
        estimator = EstimatorShrinkage()

    # Disable reference unless use_distance_to_reference is set
    if not options.use_distance_to_reference:
        reference = None
Exemple #7
0
        write_top_k_ssd_edges(sorted_hyperedges, pow(2, i))
    return sorted_hyperedges


def write_top_k_ssd_edges(sorted_hyperedges, k):
    sorted_hyperedges = sorted_hyperedges[:k]
    with open(results_folder + '/lam/top-' + str(k) + '-hyperedges.tsv',
              'w+') as f:
        for hyperedge in sorted_hyperedges:
            f.write(list_to_tab_seperated_string(list(hyperedge)) + '\n')
    print('Updated top %d hyperedges' % k)
    return None


with open(datafile, 'r') as f:
    T = Ensemble()
    for line in f:
        line = line.strip()
        timestamp, v1, v2 = line.split('\t', 2)
        if v1 != v2:
            T.add_edge(timestamp, int(v1), int(v2))
        else:
            T.add_node(int(v1))

with open(results_folder + '/' + datafile + '.summary', 'w+') as f:
    edges = [g.size() for g in T.get_all_static_graphs()]
    summary = '%d\t%d\t%d\t%d\t%d' % (T.get_num_of_timestamps(), T.order(),
                                      min(edges), max(edges), T.size())
    print('Graph Summary: %s' % summary)
    f.write(summary)
Exemple #8
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from Ensemble import Ensemble
from data import load_dataset
import numpy as np

# =============================================================================
# Crear un modelo con 5 clasificadores, se guardan en el directorio
# clasificadores/, el directorio debe existir
# =============================================================================

Test = Ensemble(5, 'clasificadores2')

# =============================================================================
# Cargar datos
# =============================================================================

# Dataset completo
X, y = load_dataset()

total_size = X.shape[0]
train_size = int(np.floor(total_size * .8))
test_size = total_size - train_size

print('- Dataset: %d samples' % total_size)
print('- Training set: %d samples' % train_size)
print('- Test set: %d samples' % test_size)

np.random.seed(54)
rp = np.random.permutation(total_size)
Exemple #9
0
            item_id = names[i].split('.')[0]
            result = None
            if (y[i] == 0):
                result = 1
            else:
                result = 0
            conf = confidence[i][y[i]]
            f.write("%s,%d,%f\n" % (item_id, result, conf))


ip = ImagesProcessor()
images, y = ip.getImages('../imgs/test/dataset/', size=None, training=False)

# Esto es lo que hay que usar para predecir el resultado final
if True:
    ensemble = Ensemble()
    ensemble.load()
    X_predictions = ensemble.predict_small(images)
    y_hat = ensemble.predict_big(X_predictions)
    confidence = ensemble.ensemble_logistic_regression.predict_proba(
        X_predictions)
    printResult(y, y_hat, confidence)
    #score(y_hat, y)

# Esto es lo que hay que usar para calcular al regression lineal y gurdarla
if False:
    ensemble = Ensemble()
    ensemble.load()
    X_validation_predictions = ensemble.predict_small(images)
    ensemble.fit_big(X_validation_predictions, y)
    f = file("./ensemble_logistic_regression", 'wb')
Exemple #10
0
import sys
from Ensemble import Ensemble
import matplotlib.pyplot as plt

__author__ = "adb"


INPUTFILE = sys.argv[1]
SIGMA = float(sys.argv[2])
CONSTRAINT = sys.argv[3]


with open(INPUTFILE, "r") as f:
    T = Ensemble()
    for line in f:
        line = line.strip()
        timestamp, v1, v2 = line.split("\t", 2)
        if v1 != v2:
            T.add_edge(timestamp, v1, v2)
        else:
            T.add_node(v1)

# if CONSTRAINT == 'am':
#     T.generate_antimonotone_hyperedges_report(SIGMA)
# elif CONSTRAINT == 'lam':
#     T.generate_looselyantimonotone_hyperedges_report(SIGMA)
# else:
#     print(T.maximal_sigma_ssd_ucs(0.01))

d = T.compute_am_sigma_hyperedges_dict([0.01, 0.1, 0.2, 0.3])
for i in d:
Exemple #11
0
    d = compute_subgraph_distribution_for_nodes(ensemble, nodes)
    return tuple([len(v) for k, v in d.items()])


def test_equivalence_partition(iterable_list, relation = lambda x, y: x == y):
    classes, partitions, ids = EquivalenceClass.equivalence_enumeration(
        iterable_list,
        relation
    )
    EquivalenceClass.check_equivalence_partition(classes, partitions, relation)
    # for c in classes: print(c)
    # for o, c in partitions.items(): print(o, ':', c)
    return classes, partitions

with open('seprox', 'r') as f:
    T = Ensemble()
    for line in f:
        line = line.strip()
        timestamp, v1, v2 = line.split('\t', 2)
        if v1 != v2:
            T.add_edge(timestamp, int(v1), int(v2))
        else:
            T.add_node(int(v1))

nodes_freq_dist_map = collections.OrderedDict()
ssd_buckets = collections.OrderedDict()
with open('hyperedge-results/seprox/lam/top-1024-hyperedges.tsv', 'r') as f:
    for line in f:
        line = line.strip()
        nodes = [int(n) for n in line.split('\t')[:-1]]
        ssd = float(line.split('\t')[-1])
Exemple #12
0
def RandomForest(X, y, epsilon, num_learners, depth, min_sample):
    dt = DecisionTree(epsilon, min_sample, depth)
    result = Ensemble(dt, num_learners, 0.8)
    result.fit(X, y)
    return result
Exemple #13
0
    sorted_hyperedges = sorted(ssd_hyperedges_in_sigma_range, key=lambda x: x[-1])
    for i in range(16):
        write_top_k_ssd_edges(sorted_hyperedges, pow(2, i))
    return sorted_hyperedges


def write_top_k_ssd_edges(sorted_hyperedges, k):
    sorted_hyperedges = sorted_hyperedges[:k]
    with open(results_folder + '/am/top-' + str(k) + '-hyperedges.tsv', 'w+') as f:
        for hyperedge in sorted_hyperedges:
            f.write(list_to_tab_seperated_string(list(hyperedge)) + '\n')
    print('Updated top %d hyperedges' % k)
    return None

with open(datafile, 'r') as f:
    T = Ensemble()
    for line in f:
        line = line.strip()
        timestamp, v1, v2 = line.split('\t', 2)
        if v1 != v2:
            T.add_edge(timestamp, int(v1), int(v2))
        else:
            T.add_node(int(v1))

with open(results_folder + '/' + datafile + '.summary', 'w+') as f:
    edges = [g.size() for g in T.get_all_static_graphs()]
    summary = '%d\t%d\t%d\t%d\t%d' % (T.get_num_of_timestamps(), T.order(), min(edges), max(edges), T.size())
    print('Graph Summary: %s' % summary)
    f.write(summary)

ssd_hyperedges_in_sigma_range = []
Exemple #14
0
        #
        largest_ligand = ref_kr.find_largest_ligand()
        prot = Protein.from_file(join(dirname(largest_ligand), 'protein.mol2'))
        bs = Protein.BindingSiteFromMolecule(
            protein=prot,
            molecule=MoleculeReader(largest_ligand)[0],
            distance=7.0)

        s_paths_file = join(main_dir, "shrunk_hot_paths_{}.json".format(nrot))
        if exists(s_paths_file):
            with open(s_paths_file, "r") as f:
                s_paths_dict = json.load(f)
        else:
            s_paths_dict = {}

        ensemble = Ensemble(root_dir=join(main_dir, e))
        ensemble.reference_binding_site = bs
        #hot_paths = glob(join(ensemble.root_dir, '*', "fullsize_hotspots_{}".format(nrot), "out.zip"))
        s_paths = ensemble.shrink_hotspots(hotspot_paths=hot_paths,
                                           padding=2.0)
        s_paths_dict[e] = s_paths
        for probe in ["donor", "acceptor", "apolar"]:
            paths = [join(t, '{}.ccp4'.format(probe)) for t in s_paths]
            gr = GridEnsemble(paths)
            gr.get_ensemble_array()
            gr.save_gridensemble(
                join(main_dir, e, '{}_{}.p'.format(probe, nrot)))

    spd = json.dumps(s_paths_dict,
                     sort_keys=True,
                     indent=4,
Exemple #15
0
            obj = getattr(self.object, "get_" + self.name)()
            return obj.shape
        return (len(self), )

    shape = property(get_shape)

    def __repr__(self):
        return repr(self[:])

    def _str__(self):
        return str(self[:])


# turn off debugging
##def debug(message): pass

if __name__ == "__main__":
    from pdb import pm
    from logging import debug
    import logging
    logging.basicConfig(level=logging.DEBUG)
    from Ensemble_registers import Ensemble_registers
    from Ensemble import Ensemble, ensemble_driver
    ensemble = Ensemble()
    positions = ArrayWrapper(ensemble, "position")
    command_dial_values = ArrayWrapper(ensemble,
                                       "command_dial_values",
                                       method="multiple")
    floating_point_variables =\
        ArrayWrapper(ensemble,"floating_point_variables",method="multiple")
Exemple #16
0
clf = linear_model.Ridge(alpha=15, fit_intercept=True)

#les coeffs ont été trouvés en en faisant varier 1 avec les 2 autres fixés
clf2 = XGBRegressor(max_depth=2,
                    learning_rate=0.08,
                    n_estimators=955,
                    subsample=0.96)

#clf4 = neighbors.KNeighborsRegressor(weights = 'distance')

clf1 = linear_model.Lasso(alpha=0.0005)

clf3 = ensemble.RandomForestRegressor(n_estimators=100, max_depth=15)

clf5 = Ensemble(5, linear_model.Lasso(alpha=0.0005), [clf, clf1, clf2, clf3])
"""
clf = regularization.logReg(l=0.00001)
clf.fit(X1,y)
"""

RMSE_Score = metrics.make_scorer(RMSE)


def test(model):
    return (np.mean(
        cross_validation.cross_val_score(model,
                                         X1,
                                         y['SalePrice'],
                                         cv=5,
                                         scoring=RMSE_Score)))
Exemple #17
0
import sys
from Ensemble import Ensemble
import matplotlib.pyplot as plt

__author__ = 'adb'

INPUTFILE = sys.argv[1]
SIGMA = float(sys.argv[2])
CONSTRAINT = sys.argv[3]

with open(INPUTFILE, 'r') as f:
    T = Ensemble()
    for line in f:
        line = line.strip()
        timestamp, v1, v2 = line.split('\t', 2)
        if v1 != v2:
            T.add_edge(timestamp, v1, v2)
        else:
            T.add_node(v1)

# if CONSTRAINT == 'am':
#     T.generate_antimonotone_hyperedges_report(SIGMA)
# elif CONSTRAINT == 'lam':
#     T.generate_looselyantimonotone_hyperedges_report(SIGMA)
# else:
#     print(T.maximal_sigma_ssd_ucs(0.01))

d = T.compute_am_sigma_hyperedges_dict([0.01, 0.1, 0.2, 0.3])
for i in d:
    print(i, len(d[i]))
Exemple #18
0
    plot_ucs_size_composition_vs_ssd_cutoff(threeS, fourS, fiveS, ssd_range)

    return size_map_for_ssd_range


with open(graph_summary_file, 'r') as f:
    for line in f:
        line = line.strip()
        summary = line.split('\t', 4)
        num_of_timestamps, num_of_nodes, min_num_of_edges, max_num_of_edges, total_num_of_edges = int(
            summary[0]), int(summary[1]), int(summary[2]), int(
                summary[3]), int(summary[4])

with open(datafile, 'r') as f:
    T = Ensemble()
    for line in f:
        line = line.strip()
        timestamp, v1, v2 = line.split('\t', 2)
        if v1 != v2:
            T.add_edge(timestamp, int(v1), int(v2))
        else:
            T.add_node(int(v1))

plot_ssd_vs_rank(128)
# plot_percent_of_nodes_vs_rank(1024, num_of_nodes)
# plot_ssd_of_uncovered_nodes_vs_rank(2048, T)
# print(plot_num_of_k_size_ucs_vs_ssd_cutoff_for_am([0.1, 0.2, 0.3, 0.4, 0.5, 0.6]))
# print(plot_num_of_k_size_ucs_vs_ssd_cutoff_for_lam([0.1, 0.2, 0.3, 0.4,  0.5, 0.6]))

plt.show()
class Arbre_C45(Arbre_ID3):
    """
        Un arbre C4.5 hérite d'un arbre ID3 mais se construit différemment
    """

    def __init__(self, chemin_données="", chemin_elagage=""):
        """
            chemin_données est l'emplacement du fichier contenant les données
            chemin_elagage est l'emplacement d'un autre set permettant
            d'élaguer l'arbre
        """
        #initialisation de l'ensemble avec le fichier dans chemin_données
        self.ensemble = Ensemble(chemin_données)
        #initialisation du nœud principal de l'arbre
        self.arbre = None
        self.chemin_elagage = chemin_elagage

    def construire(self):
        """
            retourne l'arbre au complet
        """
        #si le set est corrompu (attributs manquants), on le restaure
        self.ensemble.restaurer_valeurs_manquantes()
        self.arbre = self.__construire_arbre(self.ensemble)

    def __construire_arbre(self, ensemble):
        if not isinstance(ensemble, Ensemble):
            raise TypeError("ensemble doit être un Ensemble et non un {}" \
                            .format(type(ensemble)))
        #si la liste est vide
        if len(ensemble) == 0:
            raise ValueError("la liste d'exemples ne peut être vide !")
        #testons si tous les exemples ont la même étiquette
        if ensemble.entropie() == 0:
            #on retourne l'étiquette en question
            return Feuille(ensemble.liste_exemples[0].etiquette)
        #s'il ne reste d'attribut à tester 
        if len(ensemble.liste_attributs) == 0:
            max, etiquette_finale = 0, ""
            #on teste toutes les étiquettes possibles de l'ensemble
            for etiquette in ensemble.etiquettes_possibles():
                sous_ensemble = ensemble.sous_ensemble_etiquette(etiquette)
                #si c'est la plus fréquente, c'est celle qu'on choisit
                if len(sous_ensemble) > max:
                    max, etiquette_finale = len(sous_ensemble), etiquette
            #et on la retourne dans une feuille
            return Feuille(etiquette_finale)

        #ne pas oublier de sauver les valeurs pour pouvoir les restituer au cas
        #où l'attribut discrétisé n'est pas choisi
        sauvegarde_valeurs = ensemble.sauvegarder_valeurs_discretes()
        #pour chaque valeur à discrétiser
        for attribut, valeurs in sauvegarde_valeurs:
            #on discrétise
            ensemble.discretiser(attribut)
        #on récupère l'attribut optimal
        #ATTENTION : préciser ID3=False pour utiliser le ratio de gain
        a_tester = ensemble.attribut_optimal(ID3=False)
        #pour chaque attribut sauvegardé
        for attribut, valeurs in sauvegarde_valeurs:
            #si ce n'est pas l'attribut choisi
            if attribut != a_tester:
                #on remet les anciennes valeurs continues
                for i in range(len(valeurs)):
                    ensemble.liste_exemples[i].dict_attributs[attribut] = \
                                                                    valeurs[i]
        #si on arrive ici, on retourne d'office un nœud et pas une feuille
        noeud = Noeud(a_tester)
        #pour chaque valeur que peut prendre l'attribut à tester
        for valeur in ensemble.valeurs_possibles_attribut(a_tester):
            #on crée un sous-ensemble
            sous_ensemble = ensemble.sous_ensemble_attribut(a_tester, valeur)
            #et on en crée un nouveau nœud
            noeud.enfants[valeur] = self.__construire_arbre(sous_ensemble)
        #on retourne le nœud que l'on vient de créer
        return noeud

    def etiqueter(self, exemple):
        #on initialise le nœud actuel avec le haut de l'arbre
        noeud_actuel = self.arbre
        #tant que l'on est sur un nœud et pas sur une feuille
        while isinstance(noeud_actuel, Noeud):
            #valeur == valeur de l'exemple à étiqueter pour l'attribut du nœud
            valeur = exemple.dict_attributs[noeud_actuel.attribut_teste]
            #si valeur représente un nombre
            try:
                valeur = float(valeur)
            #si ça ne marche pas, tout va bien : c'est une valeur discrète
            except:
                pass
            #si c'est une valeur continue, on la transforme en intervalle
            else:
                for intervalle in noeud_actuel.enfants:
                    if valeur < intervalle[1] and valeur >= intervalle[0]:
                        valeur = intervalle
                        break
            finally:
                #mais il faut bien faire avancer le nœud
                noeud_actuel = noeud_actuel.enfants[valeur]
        #une fois l'exploration terminée, on étiquette l'exemple
        exemple.etiquette = noeud_actuel.etiquette

    def taux_erreur(self, ensemble):
        """
            renvoie un nombre dans [0, 1] correspondant à la proportion
            d'exemples dans ensemble qui se font étiqueter correctement
            avec l'arbre tel quel
        """
        compteur_etiquetages_incorrects = 0
        #pour chaque exemple
        for exemple in ensemble.liste_exemples:
            #on garde son étiquette
            etiquette = exemple.etiquette
            self.etiqueter(exemple)
            #et on la compare à l'étiquette donnée par l'arbre
            if etiquette != exemple.etiquette:
                #si elles sont différentes, on augmente la proportion
                #d'étiquetages incorrects et on rétablit la bonne étiquette
                compteur_etiquetages_incorrects += 1
                exemple.etiquette = etiquette
        return compteur_etiquetages_incorrects/len(ensemble)

    def elaguer(self):
        """
            modifie l'arbre en élaguant suivant l'ensemble de travail
            d'élagage donné dans self.chemin_elagage
        """
        #ATTENTION : si le chemin n'a pas été donné, on n'élague pas !
        if self.chemin_elagage != "":
            self.arbre = self.__elaguer_noeud(self.arbre,
                                              Ensemble(self.chemin_elagage))

    def __elaguer_noeud(self, noeud, ensemble_elagage):
        """
            élague le noeud passé en paramètre et le retourne
        """
        #si on est sur une feuille, on ne va pas plus loin
        if isinstance(noeud, Feuille):
            return noeud
        min_erreur, etiquette_gardee = 1.0, ""
        proportion_initiale = self.taux_erreur(ensemble_elagage)
        sauvegarde = self.arbre
        #pour chaque étiquette
        for etiquette in ensemble_elagage.etiquettes_possibles():
            self.arbre = Feuille(etiquette)
            #on calcule le taux d'erreur si on remplace le nœud par
            #l'étiquette en question
            taux_erreur_actuel = self.taux_erreur(ensemble_elagage)
            #on sauvegarde le meilleur taux
            if taux_erreur_actuel < min_erreur:
                min_erreur, etiquette_gardee = taux_erreur_actuel, etiquette
        #s'il existe un taux avantageux on élague à cet endroit
        if min_erreur <= proportion_initiale:
            return Feuille(etiquette_gardee)
        else:
            self.arbre = sauvegarde
        #on n'oublie pas de restaurer la valeur du sommet de l'arbre !
        sauvegarde, self.arbre = self.arbre, sauvegarde
        #on teste chaque enfant pour voir s'il est élagable
        for enfant in noeud.enfants:
            sous_ensemble = ensemble_elagage.sous_ensemble_attribut(
                                                    noeud.attribut_teste,
                                                    enfant)
            #s'il l'est, on l'élague
            if len(sous_ensemble) != 0:
                noeud.enfants[enfant] = self.__elaguer_noeud(
                                                    noeud.enfants[enfant],
                                                    sous_ensemble)
        #et au final, on renvoie le nœud aux enfants peut-être élagués 
        return noeud
Exemple #20
0
        return iarr


if __name__ == "__main__":
    import pandas as pd
    from Ensemble import Ensemble
    from os import mkdir
    from os.path import exists, join, dirname
    import tempfile

    tempfile.tempdir = "/home/jin76872/Desktop/Mih/Data/tmp_superstar_ghecom"

    brd1_data = pd.read_csv(
        "/home/jin76872/Desktop/Mih/Data/SIENA/EnsembleAligner/BRD1.csv")
    ref_ID = brd1_data.loc[9].squeeze()
    e = Ensemble(root_dir="", ref_ID=ref_ID)
    hot_paths = glob(
        "/home/jin76872/Desktop/Mih/Data/SIENA/EnsembleAligner/BAZ2B/Hotspots/*/out.zip"
    )[:100]
    hot_paths += glob(
        "/home/jin76872/Desktop/Mih/Data/SIENA/EnsembleAligner/BRD1/Hotspots/*/out.zip"
    )[:100]

    # trunc_paths = e.shrink_hotspots(hot_paths)
    # with open("hot_paths.txt", "w") as f:
    #     for h in trunc_paths:
    #         f.write(h + "\n")
    with open("hot_paths.txt") as f:
        trunc_paths = [line.strip() for line in f.readlines()]

    brd1_hots = [join(t, "acceptor.ccp4") for t in trunc_paths if "BRD1" in t]
Exemple #21
0
def load_model(model_name):
    model = None

    if 'resnet152_ddn_jpeg'.__eq__(model_name):
        print("load model resnet152_ddn_jpeg")
        m = models.resnet152(pretrained=False)
        weight = './weights/jpeg_ddn_resnet152/jpeg_ddn_resnet152.pth'
        image_mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
        image_std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
        pretrained_model = NormalizedModel(model=m, mean=image_mean, std=image_std)
        loaded_state_dict = torch.load(weight)
        pretrained_model.load_state_dict(loaded_state_dict)
        model = pretrained_model

    elif 'wide_resnet101_2_dnn_jpeg'.__eq__(model_name):
        print("load model wide_resnet101_2_dnn_jpeg")
        m = models.wide_resnet101_2(pretrained=False)
        weight = './weights/jpeg_ddn_wide_resnet101/jpeg_ddn_wide_resnet101.pth'
        image_mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
        image_std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
        model = NormalizedModel(model=m, mean=image_mean, std=image_std)
        loaded_state_dict = torch.load(weight)
        model.load_state_dict(loaded_state_dict)

    elif 'densenet161_ddn_jpeg'.__eq__(model_name):
        print("load model densenet161_ddn_jpeg")

        m = models.densenet161(pretrained=False)
        weight = './weights/jpeg_ddn_densenet161/jpeg_ddn_densenet161.pth'

        image_mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
        image_std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
        model = NormalizedModel(model=m, mean=image_mean, std=image_std)
        loaded_state_dict = torch.load(weight)
        model.load_state_dict(loaded_state_dict)

    elif 'hrnet_w64_ddn_jpeg'.__eq__(model_name):
        print("load model: hrnet_w64_ddn_jpeg")
        model = timm.create_model('hrnet_w64', pretrained=False)
        image_mean = torch.tensor([0.5000, 0.5000, 0.5000]).view(1, 3, 1, 1)
        image_std = torch.tensor([0.5000, 0.5000, 0.5000]).view(1, 3, 1, 1)
        model = NormalizedModel(model=model, mean=image_mean, std=image_std)
        weight= './weights/jpeg_ddn_hrnet_w64/jpeg_ddn_hrnet_w64.pth'
        loaded_state_dict = torch.load(weight)
        model.load_state_dict(loaded_state_dict)

    elif 'Ensemble_dsn161_jpeg_rn162_jpeg_wrn101_jpeg'.__eq__(model_name):
        print("load model: Ensemble_dsn161_jpeg_rn162_jpeg_wrn101_jpeg")
        model1 = load_model("densenet161_ddn_jpeg")
        model2 = load_model("resnet152_ddn_jpeg")
        model3 = load_model("wide_resnet101_2_dnn_jpeg")
        model = Ensemble(model1, model2, model3)
    elif 'Ensemble_dsn161_jpeg_wrn101_jpeg_hrn_jpeg'.__eq__(model_name):
        print("load model: Ensemble_dsn161_jpeg_wrn101_jpeg_hrn_jpeg")
        model1 = load_model("densenet161_ddn_jpeg")
        model2 = load_model("wide_resnet101_2_dnn_jpeg")
        model3 = load_model("hrnet_w64_ddn_jpeg")
        model = Ensemble3_hrn(model1, model2, model3)
    elif 'Ensemble_dsn161_jpeg_wrn101_jpeg'.__eq__(model_name):
        print("load model: Ensemble_dsn161_jpeg_wrn101_jpeg")
        model1 = load_model("densenet161_ddn_jpeg")
        model2 = load_model("wide_resnet101_2_dnn_jpeg")
        model = Ensemble2(model1, model2)
    elif 'Ensemble_dsn161_jpeg_rn162_jpeg_wrn101_jpeg_hrn_jpeg'.__eq__(model_name):
        print("load model: Ensemble_dsn161_jpeg_rn162_jpeg_wrn101_jpeg_hrn_jpeg")
        model1 = load_model("densenet161_ddn_jpeg")
        model2 = load_model("resnet152_ddn_jpeg")
        model3 = load_model("wide_resnet101_2_dnn_jpeg")
        model4 = load_model("hrnet_w64_ddn_jpeg")
        model = Ensemble4(model1, model2, model3,model4)

    elif 'Ensemble_dsn161_jpeg_rn162_jpeg'.__eq__(model_name):
        print("load model: Ensemble_dsn161_jpeg_rn162_jpeg")
        model1 = load_model("densenet161_ddn_jpeg")
        model2 = load_model("resnet152_ddn_jpeg")
        model = Ensemble2(model1, model2)

    else:
        print("can not load model")

    return model
Exemple #22
0
########################
# EXECUTION REGRESSION #
########################

Regression = Task.Regression(fn.ML_cup, num_epoch, dim_output, hidden_units,
                             batch_array, learning_rate_init,
                             type_learning_rate, alfa, v_lambda, fun, weight,
                             early_stopping, num_training)

top_models = Regression.startexecution_k_fold()
num_training = Regression.num_training

########################
#  RANDOMIZATION PHASE #
########################
ensamble = Ensemble(top_models, 8)

random_top_models = []

#take the best model and create a new_model for each with a perturbation of all hyperparameters
for model in top_models:

    #save the hyperparameters of model
    alfa = [copy.deepcopy(model.NN.alfa)]
    v_lambda = [copy.deepcopy(model.NN.v_lambda)]
    learning_rate_init = [copy.deepcopy(model.NN.learning_rate)]
    hidden_units = [copy.deepcopy(model.NN.units)]
    batch_size = [copy.deepcopy(model.NN.batch_size)]

    #perturbation
    hidden_units, batch_size, learning_rate_init, alfa, v_lambda = Function.pertubation(
Exemple #23
0
    def print_table(self):
        model_names = ["Resnet152", "VGG19_bn", "DenseNet", "ResAttNet"]
            
        print("Validation Accuracy Table")
        for i in range(len(self.models)):
            criterion = nn.CrossEntropyLoss()
            ensemble_solver = Ensemble([self.models[i]])
            top1_acc, top5_acc, val_loss = ensemble_solver.evaluate_all(criterion, self.dataloaders, self.dataset_sizes)
            fgsm_top1_acc, fgsm_top5_acc, fgsm_val_loss = ensemble_solver.evaluate_all(criterion, self.fgsm_dataloader, self.fgsm_dataset_sizes)
            blurred_top1_acc, blurred_top5_acc, blurred_val_loss = ensemble_solver.evaluate_all(criterion, self.blurred_dataloader, self.blurred_dataset_sizes)
            print("{} = top1_acc: {}, top5_acc:{}, fgsm_top1_acc:{}, blurred_top1_acc:{}".format(model_names[i], top1_acc, top5_acc, fgsm_top1_acc, blurred_top1_acc))
            
        print()
        resnet_model, vgg_model, dense_model, attention_model = self.models
        
        combo = [
            [resnet_model, dense_model, vgg_model, attention_model],
            [resnet_model, dense_model, attention_model],
            [resnet_model, vgg_model, attention_model],
            [resnet_model, dense_model, vgg_model],
            [dense_model, vgg_model, attention_model]
        ]
        combo_names = [
            ["Resnet152, VGG19_bn, DenseNet, ResAttNet"],
            ["Resnet152, DenseNet, ResAttNet"],
            ["Resnet152, VGG19_bn, ResAttNet"],
            ["Resnet152, VGG19_bn, DenseNet"],
            ["DenseNet, VGG19_bn, ResAttNet"]
        ]
            
        print("Ensemble by Averaging logits")
        for i in range(len(combo)):
            criterion = nn.CrossEntropyLoss()
            ensemble_solver = Ensemble(combo[i])
            top1_acc, top5_acc, val_loss = ensemble_solver.evaluate_all(criterion, self.dataloaders, self.dataset_sizes)
            fgsm_top1_acc, fgsm_top5_acc, fgsm_val_loss = ensemble_solver.evaluate_all(criterion, self.fgsm_dataloader, self.fgsm_dataset_sizes)
            blurred_top1_acc, blurred_top5_acc, blurred_val_loss = ensemble_solver.evaluate_all(criterion, self.blurred_dataloader, self.blurred_dataset_sizes)
            print(combo_names[i][0])
            print("Validation top1_acc: {}, top5_acc:{}, fgsm_top1_acc:{}, blurred_top1_acc:{}".format(top1_acc, top5_acc, fgsm_top1_acc, blurred_top1_acc))

        print()
        print("Ensemble by Majority Vote")
        for i in range(len(combo)):
            criterion = nn.CrossEntropyLoss()
            ensemble_solver = Ensemble(combo[i])
            top1_acc, top5_acc, val_loss = ensemble_solver.evaluate_all(criterion, self.dataloaders, self.dataset_sizes, mode="maj vote")
            fgsm_top1_acc, fgsm_top5_acc, fgsm_val_loss = ensemble_solver.evaluate_all(criterion, self.fgsm_dataloader, self.fgsm_dataset_sizes, mode="maj vote")
            blurred_top1_acc, blurred_top5_acc, blurred_val_loss = ensemble_solver.evaluate_all(criterion, self.blurred_dataloader, self.blurred_dataset_sizes, mode="maj vote")
            print(combo_names[i][0])
            print("Validation top1_acc: {}, top5_acc:{}, fgsm_top1_acc:{}, blurred_top1_acc:{}".format(top1_acc, top5_acc, fgsm_top1_acc, blurred_top1_acc))
            print()
class Update(object):

    def __init__(self):
        self.Ensemble=Ensemble(3)
        super(Update, self).__init__()

    @staticmethod
    def readdata(sourcex_matrix=None, sourcey_matrix=None,targetx_matrix=None, targety_matrix=None,src_path='datasets/syndata_002_normalized_no_novel_class_source_stream.csv',
                   tgt_path='datasets/syndata_002_normalized_no_novel_class_target_stream.csv', src_size=None, tgt_size=None):
        """ 
        input is: source dataset with y, here we assume it is a list of list, the name is source, target dataset with yhat, 
        here we assume it is a list of list, the name is target 
        """
        if sourcex_matrix is None:
            sourcex_matrix_, sourcey_matrix = Classification.read_csv(src_path, None)   # matrix_ is source data
        else:
            sourcex_matrix_ = sourcex_matrix
            sourcey_matrix_ = sourcey_matrix
        matrix_ = sourcex_matrix_[:src_size, :]

        if targetx_matrix is None:
            targetx_ ,targety_= Classification.read_csv(tgt_path, size=None)
        else:
            targetx_ = targetx_matrix
            targety_ = targety_matrix
        labellist = []
        for i in range(0, len(targety_)):
            if targety_[i] not in labellist:
                labellist.append(targety_[i])
        sourcey_label = []
        for i in range(0, len(sourcey_matrix)):
            sourcey_label.append(labellist.index(sourcey_matrix[i]))

        for i in range(0, len(targety_)):
            if targety_[i] not in labellist:
                labellist.append(targety_[i])
        targety_label = []
        for i in range(0, len(targety_)):
            targety_label.append(labellist.index(targety_[i]))
        return sourcex_matrix_,sourcey_label, targetx_, targety_label

    def Process(self, sourcex,sourcey, targetx,targety,subsize):
        # fixed size windows for source stream and target stream

        sourceIndex = 0
        targetIndex = 0
        src_count = 0
        tgtchange_count = 0
        threshold = 1.0
        src_size, _ = sourcex.shape
        tgt_size, _ = targetx.shape
        #true_label = []
        #for i in range(len(np.array(targety))):
            #if np.array(targety)[i] == 'class1':
                #true_label.append(1)
            #if np.array(targety)[i] == 'class2':
                #true_label.append(2)
            #if np.array(targety)[i] == 'class3':
                #true_label.append(3)
            #if np.array(targety)[i] == 'class4':
                #true_label.append(4)
            #if np.array(targety)[i] == 'class5':
                #true_label.append(5)
            #if np.array(targety)[i] == 'class6':
                #true_label.append(6)
            #if np.array(targety)[i] == 'class7':
                #true_label.append(7)

        windowsize = 1000
        sourcewindowstart = 0
        sourcewindowend = sourcewindowstart + windowsize -1
        targetwindowstart = 0
        targetwindowend = targetwindowstart + windowsize - 1
        sourcexwindow = sourcex[sourcewindowstart:sourcewindowend]
        sourceywindow = sourcey[sourcewindowstart:sourcewindowend]
        targetxwindow = targetx[targetwindowstart:targetwindowend]
        targetywindow = targety[targetwindowstart:targetwindowend]

        ### get the initial model by using the first source and target windows
        alpha = 0.05
        b = targetxwindow.T.shape[1];
        fold = 5
        sigma_list = Classification.sigma_list(np.array(targetxwindow.T),
                                               np.array(sourcexwindow.T));
        lambda_list = Classification.lambda_list();
        srcx_array = np.array(sourcexwindow.T);
        trgx_array = np.array(targetxwindow.T);
        (thetah_old, w, sce_old, sigma_old) = Classification.R_ULSIF(trgx_array, srcx_array, alpha, sigma_list, lambda_list, b, fold)

        self.Ensemble.generateNewModelRULSIF(targetxwindow, sourcexwindow, sourceywindow, alpha, sigma_list,
                                             lambda_list, b, fold,subsize)
        # print "update model", src_size, source.shape
        truelablecount = 0.0
        totalcount = 0.0


        #tmpsrccount = 0
        tmptrgcount = 0
        changeindex = -1
        updatestartindex = 0
        while True:
            if sourcewindowend >= src_size or targetwindowend >= tgt_size:
                break

            data_type = randint(1, 10)
            if data_type < 2:
                print("get data from source")
                sourcewindowstart+=1
                sourcewindowend+=1
                sourcexwindow = sourcex[sourcewindowstart:sourcewindowend]
                sourceywindow = sourcey[sourcewindowstart:sourcewindowend]
                sourceIndex += 1
                #src_count += 1
                #tmpsrccount += 1
                print("sourceIndex", sourceIndex)
            else:
                print("get data from target")
                targetwindowstart+=1
                targetwindowend+=1
                targetxwindow = targetx[targetwindowstart:targetwindowend]
                targetywindow = targety[targetwindowstart:targetwindowend]
                targetIndex += 1
                tgtchange_count += 1
                tmptrgcount += 1
                print("targetIndex", targetIndex)
            if tgtchange_count>=1000:
                changeindex = 1
                tgtchange_count = 0
                confidencelist = []
                for i in range(targetwindowstart, targetwindowend+1):
                    instanceresult = self.Ensemble.evaluateEnsembleRULSIF(targetx[i])
                    confidencelist.append(instanceresult[1])
                confvar = np.var(confidencelist)
                changetestresult = pelt(normal_mean(confidencelist, confvar), len(confidencelist))
                if len(changetestresult)>1:
                    alpha = 0.05
                    b = targetxwindow.T.shape[1];
                    fold = 5
                    sigma_list = Classification.sigma_list(np.array(targetxwindow.T),
                                                           np.array(sourcexwindow.T));
                    lambda_list = Classification.lambda_list();
                    self.Ensemble.generateNewModelRULSIF(targetxwindow, sourcexwindow, sourceywindow, alpha, sigma_list,
                                                         lambda_list, b, fold, subsize)

                #x_nu = np.array(targetxwindow.T);
                #(thetah_new, w, sce_new, sigma_new) = Classification.R_ULSIF(trgx_array, srcx_array, alpha, sigma_list,
                                                                             #lambda_list, b, fold)
                #targetweight_old = Classification.compute_target_weight(thetah_old, sce_old, sigma_old, x_nu)
                #targetweight_new = Classification.compute_target_weight(thetah_new, sce_new, sigma_new, x_nu)
                #l_ratios = targetweight_new / targetweight_old

                #lnWeightTrgData = np.log(l_ratios, dtype='float64')
                #changeScore = np.sum(lnWeightTrgData, dtype='float64')
                #tgtchange_count=0
                #print "changeScore", changeScore
                #if changeScore > threshold:
                    #alpha = 0.05
                    #b = targetxwindow.T.shape[1];
                    #fold = 5
                    #sigma_list = Classification.sigma_list(np.array(targetxwindow.T),
                                                           #np.array(sourcexwindow.T));
                    #lambda_list = Classification.lambda_list();
                    #self.Ensemble.generateNewModelRULSIF(targetxwindow, sourcexwindow, sourceywindow, alpha, sigma_list,
                                                         #lambda_list, b, fold, subsize)



            if tmptrgcount>=2000:
                # force update model
                tmptrgcount=0
                #update predictions for updatestartindex to targetIndex
                for i in range(updatestartindex,targetIndex+1):
                    print("targetx[i]", targetx[i])
                    instanceresult = self.Ensemble.evaluateEnsembleRULSIF(targetx[i])
                    print("instanceresult", instanceresult)
                    print("instanceresult[0]", instanceresult[0])
                    print("truelabel[i]", targety[i])
                    if instanceresult[0] == targety[i]:
                        truelablecount +=1.0
                    totalcount +=1.0
                print("truelablecount",truelablecount)
                print("totalcount", totalcount)
                with open('errorsyn002405.csv', 'a+') as f:
                    writer = csv.writer(f)
                    writer.writerow([targetIndex, truelablecount,totalcount,truelablecount/totalcount ])
                updatestartindex = targetIndex+1
                alpha = 0.05
                b = targetxwindow.T.shape[1];
                fold = 5
                sigma_list = Classification.sigma_list(np.array(targetxwindow.T),
                                                       np.array(sourcexwindow.T));
                lambda_list = Classification.lambda_list();
                self.Ensemble.generateNewModelRULSIF(targetxwindow, sourcexwindow, sourceywindow, alpha, sigma_list,
                                                     lambda_list, b, fold,subsize)
    (options, args) = parser.parse_args()

    if not options.trajectory_filename or not options.topology_filename:
        parser.error("--trajectory and --topology options must be specified")


    # Construct reference if available
    try:
        reference = MDAnalysis.Universe(options.topology_filename, 
                                        options.topology_filename)
    except:
        reference = None

    # Construct ensemble
    ensemble = Ensemble(topology=options.topology_filename,
                        trajectory=options.trajectory_filename, 
                        atom_selection_string=options.atom_selection_string,
                        frame_interval=options.frame_interval)

    # Align ensemble to reference
    if not options.no_align:
        ensemble.align(reference)

    # Select covariance estimator
    estimator = EstimatorML()
    if options.covariance_estimator == "shrinkage":
        estimator = EstimatorShrinkage()

    # Disable reference unless use_distance_to_reference is set
    if not options.use_distance_to_reference:
        reference = None
    
 def __init__(self):
     self.Ensemble=Ensemble(3)
     super(Update, self).__init__()
Exemple #27
0
    def get_algorithm(algorithm_type, alpha):
        if algorithm_type == AlgorithmType.Random:
            return Random(alpha)

        elif algorithm_type == AlgorithmType.EFirst:
            return EFirst(alpha)

        elif algorithm_type == AlgorithmType.EGreedy:
            return EGreedy(alpha)

        elif algorithm_type == AlgorithmType.EGreedy_Disjoint:
            return EGreedy_Disjoint(alpha)

        elif algorithm_type == AlgorithmType.EGreedy_Hybrid:
            return EGreedy_Hybrid(alpha)

        elif algorithm_type == AlgorithmType.EGreedy_Seg:
            return Combo_Seg(alpha, AlgorithmType.EGreedy)

        elif algorithm_type == AlgorithmType.LinUCB_Disjoint:
            return LinUCB_Disjoint(alpha)

        elif algorithm_type == AlgorithmType.LinUCB_GP:
            return LinUCB_GP(alpha)

        elif algorithm_type == AlgorithmType.LinUCB_GP_All:
            return LinUCB_GP_All(alpha)

        elif algorithm_type == AlgorithmType.LinUCB_Hybrid:
            return LinUCB_Hybrid(alpha)

        elif algorithm_type == AlgorithmType.UCB:
            return UCB(alpha)

        elif algorithm_type == AlgorithmType.UCB_Seg:
            return Combo_Seg(alpha, AlgorithmType.UCB)

        elif algorithm_type == AlgorithmType.EGreedy_Lin:
            return EGreedy_Lin(alpha)

        elif algorithm_type == AlgorithmType.EGreedy_Seg_Lin:
            return Combo_Seg(alpha, AlgorithmType.EGreedy_Lin)

        elif algorithm_type == AlgorithmType.EGreedy_Lin_Hybrid:
            return EGreedy_Lin_Hybrid(alpha)

        elif algorithm_type == AlgorithmType.TS:
            return TS(alpha)

        elif algorithm_type == AlgorithmType.TS_Bootstrap:
            return TS_Bootstrap(alpha)

        elif algorithm_type == AlgorithmType.TS_Lin:
            return TS_Lin(alpha)

        elif algorithm_type == AlgorithmType.TS_Seg:
            return Combo_Seg(alpha, AlgorithmType.TS)

        elif algorithm_type == AlgorithmType.TS_Disjoint:
            return TS_Disjoint(alpha)

        elif algorithm_type == AlgorithmType.TS_Hybrid:
            return TS_Hybrid(alpha)

        elif algorithm_type == AlgorithmType.TS_Truncated:
            return TS_Truncated(alpha)

        elif algorithm_type == AlgorithmType.EGreedy_TS:
            return EGreedy_TS(alpha)

        elif algorithm_type == AlgorithmType.TS_Gibbs:
            return TS_Gibbs(alpha)

        elif algorithm_type == AlgorithmType.TS_Laplace:
            return TS_Laplace(alpha)

        elif algorithm_type == AlgorithmType.EGreedy_Annealing:
            return EGreedy_Annealing(alpha)

        elif algorithm_type == AlgorithmType.NN:
            return NN(alpha)

        elif algorithm_type == AlgorithmType.Ensemble:
            return Ensemble(alpha)

        elif algorithm_type == AlgorithmType.TS_RLR:
            return TS_RLR(alpha)

        else:
            raise NotImplementedError("Non-implemented algorithm." +
                                      algorithm_type.name)