Esempio n. 1
0
def cla_filter():
    aux = []
    resul1 = [[], [], [], [], [], [], []]
    resul2 = [[], [], [], [], [], [], []]
    resul3 = [[], [], [], [], [], [], []]
    resul4 = [[], [], [], [], [], [], []]
    resul5 = [[], [], [], [], [], [], []]
    resul6 = [[], [], [], [], [], [], []]
    resul7 = [[], [], [], [], [], [], []]
    resul8 = [[], [], [], [], [], [], []]
    resul9 = [[], [], [], [], [], [], []]
    roc_m_1 = [[], [], [], [], [], [], []]
    roc_m_2 = [[], [], [], [], [], [], []]
    roc_m_3 = [[], [], [], [], [], [], []]
    roc_m_4 = [[], [], [], [], [], [], []]
    roc_m_5 = [[], [], [], [], [], [], []]
    roc_m_6 = [[], [], [], [], [], [], []]
    roc_m_7 = [[], [], [], [], [], [], []]
    roc_m_8 = [[], [], [], [], [], [], []]
    roc_m_9 = [[], [], [], [], [], [], []]
    SMILaMax = [simpleMIL(), {'type': 'max'}, 'MIL max', resul1, roc_m_1]
    SMILaMin = [simpleMIL(), {'type': 'min'}, 'MIL min', resul2, roc_m_2]
    SMILaExt = [
        simpleMIL(), {
            'type': 'extreme'
        }, 'MIL Extreme', resul3, roc_m_3
    ]
    BOW_clas = [
        BOW(), {
            'k': 90,
            'covar_type': 'diag',
            'n_iter': 20
        }, 'BOW', resul4, roc_m_4
    ]
    CKNN_cla = [
        CKNN(), {
            'references': 3,
            'citers': 5
        }, 'CKNN', resul5, roc_m_5
    ]
    maxDD_cl = [maxDD(), {}, 'DIVERSE DENSITY', resul6, roc_m_6]
    EMDD_cla = [EMDD(), {}, 'EM-DD', resul7, roc_m_7]
    MILB_cla = [MILBoost(), {}, 'MILBOOST', resul8, roc_m_8]
    MILES_cl = [MILES(), {}, 'MILES', resul9, roc_m_9]
    aux.append(SMILaMax)
    #    aux.append(SMILaMin)
    #    aux.append(SMILaExt)
    aux.append(BOW_clas)
    #    aux.append(CKNN_cla)
    aux.append(maxDD_cl)
    #    aux.append(EMDD_cla)
    #    aux.append(MILB_cla)
    #    aux.append(MILES_cl)
    return aux
Esempio n. 2
0
def cla_filter_ipf():
    aux = []
    resul1 = [[],[],[],[],[],[],[]]
    roc_m_1 = [[],[],[],[],[],[],[]]
    SMILaMax = [simpleMIL(),{'type': 'max'},'MIL max',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)]
    SMILaMin = [simpleMIL(),{'type': 'min'},'MIL min',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)]
    SMILaExt = [simpleMIL(),{'type': 'extreme'},'MIL Extreme',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)]
    BOW_clas = [BOW(),{'k':90,'covar_type':'diag','n_iter':20},'BOW',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)]
    CKNN_cla = [CKNN(),{'references': 3, 'citers': 5},'CKNN',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)]
    maxDD_cl = [maxDD(),{},'DIVERSE DENSITY',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)]
    EMDD_cla = [EMDD(),{},'EM-DD',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)]
    MILB_cla = [MILBoost(),{},'MILBOOST',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)]
#    MILES_cl = [MILES(),{},'MILES',copy.deepcopy(resul1),copy.deepcopy(roc_m_1)]
    aux.append(SMILaMax)
    aux.append(SMILaMin)
    aux.append(SMILaExt)
    aux.append(BOW_clas)
    aux.append(CKNN_cla)
    aux.append(maxDD_cl)
    aux.append(EMDD_cla)
    aux.append(MILB_cla)
#    aux.append(MILES_cl)
    return aux
Esempio n. 3
0
def mil_cv_filter_ipf(bags_f,labels_f,folds,votacion,clasificador_):
#    print('\t\t\tFiltrando...')
    error = 0.01
    toStop = 3
    stop = True
    countToStop = 0
    if len(labels_f) < folds:
        folds = len(labels_f)
    skf = StratifiedKFold(n_splits=folds)
    totalNoisyLabel = 0
    while stop:
        bags_f,labels_f = shuffle(bags_f, labels_f, random_state=rand.randint(0, len(labels_f)-1))
        isCorrectLabel = np.ones((folds, len(labels_f)), dtype=bool)
        fold = 0        
        for train_index, test_index in skf.split(bags_f, labels_f.reshape(len(labels_f))):
            X_train = [bags_f[i] for i in train_index]        
            Y_train = labels_f[train_index]
#            print('\t\t\t=>FOLD : '+str(fold))
            try:
                if len(clasificador_[1]) > 0:
                    clasificador_[0].fit(X_train, Y_train, **clasificador_[1])
                else:
                    clasificador_[0].fit(bags_f, labels_f)
                predictions = clasificador_[0].predict(X_train)
                if (isinstance(predictions, tuple)):
                    predictions = predictions[0]
            except:
                print('Fallo, segundo intento')
                try:
                    if len(clasificador_[1]) > 0:
                        clasificador_[0].fit(X_train, Y_train, **clasificador_[1])
                    else:
                        clasificador_[0].fit(bags_f, labels_f)
                    predictions = clasificador_[0].predict(X_train)
                    
                    if (isinstance(predictions, tuple)):
                        predictions = predictions[0]
                    print('OK')
                except:
                    print('Posible fallo en bolsa...')
                    try:
                        print('Cambiando clasificador..')
                        Cla_error = simpleMIL()
                        par_error = {'type': 'max'}
                        if len(par_error) > 0:
                            Cla_error.fit(X_train, Y_train, **par_error)
                        else:
                            Cla_error.fit(X_train, Y_train)
                        predictions = Cla_error.predict(X_train)
                        if (isinstance(predictions, tuple)):
                            predictions = predictions[0]
                        print('OK')
                    except:
                        predictions = np.ones((1, len(Y_train)), dtype=int)
                        predictions = predictions[0]
                        print('Fallo')
            for l,p in enumerate(train_index):
                try:
                    isCorrectLabel[fold][p] = (Y_train.T[0][l] == np.sign(predictions[l]))
                except IndexError:
                    print("Fallo en ultimo indice!")
            fold = fold + 1
        if votacion == 'maxVotos':
            noisyBags = []
            for n in range(0,len(labels_f)):
                aux = 0
                for m in range(0,folds):
                    if not isCorrectLabel[m][n]:
                        aux = aux+1
                if aux > folds/2:
                    noisyBags.append(n)
        if votacion == 'consenso':
            noisyBags = []
            for n in range(0,len(labels_f)):
                aux = True
                for m in range(0,folds):
                    if aux:
                        if isCorrectLabel[m][n]:
                            aux = False
                if aux:
                    noisyBags.append(n)
        nonNoisyBags = [] 
        cont = 0
        if len(noisyBags) == 0:
            for z in range(0,len(bags_f)):
                nonNoisyBags.append(z)
        else:
            for z in range(0,len(bags_f)):
                if cont < len(noisyBags) and noisyBags[cont] == z:
                    cont = cont + 1
                else:
                    nonNoisyBags.append(z)
        if len(noisyBags) < (len(bags_f)*error):
            countToStop = countToStop + 1
        else:
            countToStop = 0
        if countToStop == toStop:
            stop = False
        else:
            bags_f = [bags_f[d] for d in nonNoisyBags] 
            labels_f = labels_f[nonNoisyBags]
        if len(bags_f) < len(labels_f.reshape(len(labels_f))):
            print('Número de bolsas, menor al número de etiquetas, no se puede continuar')
            stop = False
        #Comprobacion nueva 28/10/19
        if len(labels_f) < 1:
            stop = False
        totalNoisyLabel+=len(noisyBags)
    print('\t\t\t=>Elementos eliminados por '+clasificador_[2]+': '+str(totalNoisyLabel))
    X_train_NoNy = bags_f
    Y_train_NoNy = labels_f
    return X_train_NoNy,Y_train_NoNy
Esempio n. 4
0
def mil_cv_filter_ef(bags_f, labels_f, folds, votacion, num):
    #    print('\t\t\tFiltrando...')
    if num == 1:
        Clasificadores = cla_filter()
    else:
        Clasificadores = cla_filter2()
    bags_f, labels_f = shuffle(bags_f,
                               labels_f,
                               random_state=rand.randint(0, 100))
    if len(labels_f) < folds:
        folds = len(labels_f)
    skf = StratifiedKFold(n_splits=folds)
    isCorrectLabel = np.ones((len(Clasificadores), len(labels_f)), dtype=bool)
    for train_index, test_index in skf.split(bags_f,
                                             labels_f.reshape(len(labels_f))):
        X_train = [bags_f[i] for i in train_index]
        Y_train = labels_f[train_index]
        X_test = [bags_f[i] for i in test_index]
        Y_test = labels_f[test_index]
        for s, cl in enumerate(Clasificadores):

            try:
                if len(Clasificadores[s][1]) > 0:
                    Clasificadores[s][0].fit(X_train, Y_train,
                                             **Clasificadores[s][1])
                else:
                    Clasificadores[s][0].fit(bags_f, labels_f)
                predictions = Clasificadores[s][0].predict(X_test)
                if (isinstance(predictions, tuple)):
                    predictions = predictions[0]
            except:
                print('Fallo, segundo intento')
                try:
                    if len(Clasificadores[s][1]) > 0:
                        Clasificadores[s][0].fit(X_train, Y_train,
                                                 **Clasificadores[s][1])
                    else:
                        Clasificadores[s][0].fit(bags_f, labels_f)
                    predictions = Clasificadores[s][0].predict(X_test)
                    if (isinstance(predictions, tuple)):
                        predictions = predictions[0]
                    print('OK')
                except:
                    print('Posible fallo en bolsa...')
                    try:
                        if len(Clasificadores[s][1]) > 0:
                            Clasificadores[s][0].fit(X_train, Y_train,
                                                     **Clasificadores[s][1])
                        else:
                            Clasificadores[s][0].fit(X_train, Y_train)
                        predictions = Clasificadores[s][0].predict(X_test)
                        if (isinstance(predictions, tuple)):
                            predictions = predictions[0]
                        print('OK')
                    except:
                        try:
                            print('Cambiando clasificador..')
                            Cla_error = simpleMIL()
                            par_error = {'type': 'max'}
                            if len(par_error) > 0:
                                Cla_error.fit(X_train, Y_train, **par_error)
                            else:
                                Cla_error.fit(X_train, Y_train)
                            predictions = Cla_error.predict(X_train)
                            if (isinstance(predictions, tuple)):
                                predictions = predictions[0]
                            print('OK')
                        except:
                            predictions = np.ones((1, len(Y_train)), dtype=int)
                            print('Fallo')
            for l, p in enumerate(test_index):
                try:
                    isCorrectLabel[s][p] = (Y_test.T[0][l] == np.sign(
                        predictions[l]))
                except IndexError:
                    print("Fallo en ultimo indice!")
    if votacion == 'maxVotos':
        noisyBags = []
        for n in range(0, len(labels_f)):
            aux = 0
            for m in range(0, len(Clasificadores)):
                if not isCorrectLabel[m][n]:
                    aux = aux + 1
            if aux > len(Clasificadores) / 2:
                noisyBags.append(n)
    if votacion == 'consenso':
        noisyBags = []
        for n in range(0, len(labels_f)):
            aux = True
            for m in range(0, len(Clasificadores)):
                if aux:
                    if isCorrectLabel[m][n]:
                        aux = False
            if aux:
                noisyBags.append(n)
    nonNoisyBags = []
    cont = 0
    if len(noisyBags) == 0:
        for z in range(0, len(bags_f)):
            nonNoisyBags.append(z)
    else:
        for z in range(0, len(bags_f)):
            if cont < len(noisyBags) and noisyBags[cont] == z:
                cont = cont + 1
            else:
                nonNoisyBags.append(z)
    print('\t\t\t=>Elementos eliminados con Filter ' + str(num + 1) + ': ' +
          str(len(noisyBags)))
    X_train_NoNy = [bags_f[i] for i in nonNoisyBags]
    Y_train_NoNy = labels_f[nonNoisyBags]
    return X_train_NoNy, Y_train_NoNy
Esempio n. 5
0
bags, labels = shuffle(bags, labels, random_state=rand.randint(0, 100))

#Number of Folds
folds = 5

bow_classifier = BOW()
#parameters_bow = {'k':100,'covar_type':'diag','n_iter':20}
parameters_bow = {'k': 10, 'covar_type': 'diag', 'n_iter': 20}
accuracie, results_accuracie, auc, results_auc = mil_cross_val(
    bags=bags,
    labels=labels,
    model=bow_classifier,
    folds=folds,
    parameters=parameters_bow)

SMILa = simpleMIL()
parameters_smil = {'type': 'max'}
#En este me funciono maxDD porque no tiene problem con parametros
accuracie, results_accuracie, auc, results_auc, elapsed = mil_cross_val(
    bags=bags,
    labels=labels,
    model=SMILa,
    folds=folds,
    parameters=parameters_smil,
    timer=True)

parameters_smil = {'type': 'min'}
#En este me funciono maxDD porque no tiene problem con parametros
accuracie, results_accuracie, auc, results_auc = mil_cross_val(
    bags=bags,
    labels=labels,
Esempio n. 6
0
                   train_labels,
                   k=10,
                   covar_type='diag',
                   n_iter=20)
predictions = bow_classifier.predict(test_bags)
accuracie = np.average(test_labels.T == np.sign(predictions))
print '\n Accuracy: %.2f%%' % (100 * accuracie)
fpr, tpr, thresholds = metrics.roc_curve(test_labels,
                                         predictions,
                                         pos_label=1.)
metrics.auc(fpr, tpr)

#####################
#simpleMIL [average]#
#####################
SMILa = simpleMIL()
SMILa.fit(train_bags, train_labels, type='average')
predictions = SMILa.predict(test_bags)
accuracie = np.average(test_labels.T == np.sign(predictions))
print '\n Accuracy: %.2f%%' % (100 * accuracie)
fpr, tpr, thresholds = metrics.roc_curve(test_labels,
                                         predictions,
                                         pos_label=1.)
metrics.auc(fpr, tpr)

#####################
#simpleMIL [extreme]#
#####################
SMILe = simpleMIL()
SMILe.fit(train_bags, train_labels, type='extreme')
predictions = SMILe.predict(test_bags)