def test_compute_class_weight_auto_negative():
    # Test compute_class_weight when labels are negative
    # Test with balanced class labels.
    classes = np.array([-2, -1, 0])
    y = np.asarray([-1, -1, 0, 0, -2, -2])
    cw = assert_warns(DeprecationWarning, compute_class_weight, "auto",
                      classes, y)
    assert_almost_equal(cw.sum(), classes.shape)
    assert_equal(len(cw), len(classes))
    assert_array_almost_equal(cw, np.array([1., 1., 1.]))

    cw = compute_class_weight("balanced", classes, y)
    assert_equal(len(cw), len(classes))
    assert_array_almost_equal(cw, np.array([1., 1., 1.]))

    # Test with unbalanced class labels.
    y = np.asarray([-1, 0, 0, -2, -2, -2])
    cw = assert_warns(DeprecationWarning, compute_class_weight, "auto",
                      classes, y)
    assert_almost_equal(cw.sum(), classes.shape)
    assert_equal(len(cw), len(classes))
    assert_array_almost_equal(cw, np.array([0.545, 1.636, 0.818]), decimal=3)

    cw = compute_class_weight("balanced", classes, y)
    assert_equal(len(cw), len(classes))
    class_counts = np.bincount(y + 2)
    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
    assert_array_almost_equal(cw, [2. / 3, 2., 1.])
def test_compute_class_weight():
    # Test (and demo) compute_class_weight.
    y = np.asarray([2, 2, 2, 3, 3, 4])
    classes = np.unique(y)
    cw = compute_class_weight("auto", classes, y)
    assert_almost_equal(cw.sum(), classes.shape)
    assert_true(cw[0] < cw[1] < cw[2])
def test_compute_class_weight_auto_negative():
    # Test compute_class_weight when labels are negative
    # Test with balanced class labels.
    classes = np.array([-2, -1, 0])
    y = np.asarray([-1, -1, 0, 0, -2, -2])
    cw = compute_class_weight("auto", classes, y)
    assert_almost_equal(cw.sum(), classes.shape)
    assert_equal(len(cw), len(classes))
    assert_array_almost_equal(cw, np.array([1., 1., 1.]))

    # Test with unbalanced class labels.
    y = np.asarray([-1, 0, 0, -2, -2, -2])
    cw = compute_class_weight("auto", classes, y)
    assert_almost_equal(cw.sum(), classes.shape)
    assert_equal(len(cw), len(classes))
    assert_array_almost_equal(cw, np.array([0.545, 1.636, 0.818]), decimal=3)
def test_compute_class_weight_auto_unordered():
    # Test compute_class_weight when classes are unordered
    classes = np.array([1, 0, 3])
    y = np.asarray([1, 0, 0, 3, 3, 3])
    cw = compute_class_weight("auto", classes, y)
    assert_almost_equal(cw.sum(), classes.shape)
    assert_equal(len(cw), len(classes))
    assert_array_almost_equal(cw, np.array([1.636, 0.818, 0.545]), decimal=3)
def test_compute_class_weight_not_present():
    """Test compute_class_weight in case y doesn't contain all classes."""
    classes = np.arange(4)
    y = np.asarray([0, 0, 0, 1, 1, 2])
    cw = compute_class_weight("auto", classes, y)
    assert_almost_equal(cw.sum(), classes.shape)
    assert_equal(len(cw), len(classes))
    assert_true(cw[0] < cw[1] < cw[2] <= cw[3])
def test_compute_class_weight_balanced_negative():
    # Test compute_class_weight when labels are negative
    # Test with balanced class labels.
    classes = np.array([-2, -1, 0])
    y = np.asarray([-1, -1, 0, 0, -2, -2])

    cw = compute_class_weight("balanced", classes, y)
    assert_equal(len(cw), len(classes))
    assert_array_almost_equal(cw, np.array([1., 1., 1.]))

    # Test with unbalanced class labels.
    y = np.asarray([-1, 0, 0, -2, -2, -2])

    cw = compute_class_weight("balanced", classes, y)
    assert_equal(len(cw), len(classes))
    class_counts = np.bincount(y + 2)
    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
    assert_array_almost_equal(cw, [2. / 3, 2., 1.])
def test_compute_class_weight_balanced_unordered():
    # Test compute_class_weight when classes are unordered
    classes = np.array([1, 0, 3])
    y = np.asarray([1, 0, 0, 3, 3, 3])

    cw = compute_class_weight("balanced", classes, y)
    class_counts = np.bincount(y)[classes]
    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
    assert_array_almost_equal(cw, [2., 1., 2. / 3])
def test_compute_class_weight():
    # Test (and demo) compute_class_weight.
    y = np.asarray([2, 2, 2, 3, 3, 4])
    classes = np.unique(y)

    cw = compute_class_weight("balanced", classes, y)
    # total effect of samples is preserved
    class_counts = np.bincount(y)[2:]
    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
    assert cw[0] < cw[1] < cw[2]
def test_compute_class_weight_default():
    # Test for the case where no weight is given for a present class.
    # Current behaviour is to assign the unweighted classes a weight of 1.
    y = np.asarray([2, 2, 2, 3, 3, 4])
    classes = np.unique(y)
    classes_len = len(classes)

    # Test for non specified weights
    cw = compute_class_weight(None, classes, y)
    assert_equal(len(cw), classes_len)
    assert_array_almost_equal(cw, np.ones(3))

    # Tests for partly specified weights
    cw = compute_class_weight({2: 1.5}, classes, y)
    assert_equal(len(cw), classes_len)
    assert_array_almost_equal(cw, [1.5, 1., 1.])

    cw = compute_class_weight({2: 1.5, 4: 0.5}, classes, y)
    assert_equal(len(cw), classes_len)
    assert_array_almost_equal(cw, [1.5, 1., 0.5])
Beispiel #10
0
def test_compute_class_weight_auto_unordered():
    # Test compute_class_weight when classes are unordered
    classes = np.array([1, 0, 3])
    y = np.asarray([1, 0, 0, 3, 3, 3])
    cw = assert_warns(DeprecationWarning, compute_class_weight, "auto", classes, y)
    assert_almost_equal(cw.sum(), classes.shape)
    assert_equal(len(cw), len(classes))
    assert_array_almost_equal(cw, np.array([1.636, 0.818, 0.545]), decimal=3)

    cw = compute_class_weight("balanced", classes, y)
    class_counts = np.bincount(y)[classes]
    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
    assert_array_almost_equal(cw, [2.0, 1.0, 2.0 / 3])
Beispiel #11
0
def test_compute_class_weight():
    # Test (and demo) compute_class_weight.
    y = np.asarray([2, 2, 2, 3, 3, 4])
    classes = np.unique(y)
    cw = assert_warns(DeprecationWarning, compute_class_weight, "auto", classes, y)
    assert_almost_equal(cw.sum(), classes.shape)
    assert_true(cw[0] < cw[1] < cw[2])

    cw = compute_class_weight("balanced", classes, y)
    # total effect of samples is preserved
    class_counts = np.bincount(y)[2:]
    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
    assert_true(cw[0] < cw[1] < cw[2])
Beispiel #12
0
def test_compute_class_weight_dict():
    classes = np.arange(3)
    class_weights = {0: 1.0, 1: 2.0, 2: 3.0}
    y = np.asarray([0, 0, 1, 2])
    cw = compute_class_weight(class_weights, classes, y)

    # When the user specifies class weights, compute_class_weights should just
    # return them.
    assert_array_almost_equal(np.asarray([1.0, 2.0, 3.0]), cw)

    # When a class weight is specified that isn't in classes, a ValueError
    # should get raised
    msg = "Class label 4 not present."
    class_weights = {0: 1.0, 1: 2.0, 2: 3.0, 4: 1.5}
    assert_raise_message(ValueError, msg, compute_class_weight, class_weights, classes, y)
    msg = "Class label -1 not present."
    class_weights = {-1: 5.0, 0: 1.0, 1: 2.0, 2: 3.0}
    assert_raise_message(ValueError, msg, compute_class_weight, class_weights, classes, y)
Beispiel #13
0
new_shape = (416, 416)

re_train_images, re_train_masks = resize_images_masks(train_images,
                                                      train_masks, new_shape)
re_val_images, re_val_masks = resize_images_masks(val_images, val_masks,
                                                  new_shape)

train_images = re_train_images
train_masks = re_train_masks

val_images = re_val_images
val_masks = re_val_masks

# Compute the appropriate class weights
weights = class_weight.compute_class_weight('balanced',
                                            np.unique(train_masks > 0.5),
                                            (train_masks > 0.5).flatten())

# Callbacks
filepath = 'weights.hdf5'
checkpoint = ModelCheckpoint(filepath,
                             monitor='val_loss',
                             verbose=1,
                             save_best_only=True,
                             mode='min')
callbacks = [checkpoint]

# Define the model
BACKBONE = 'resnet101'

model = Unet(BACKBONE, classes=1, activation='sigmoid')
train_generator = datagen.flow_from_directory(train_data_dir,
                                              target_size=(img_width,
                                                           img_height),
                                              batch_size=batch_size,
                                              class_mode="categorical",
                                              shuffle=True)

validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=True)

class_weight = class_weight.compute_class_weight(
    'balanced', np.unique(train_generator.classes), train_generator.classes)

print(class_weight)

print("Downloading Base Model.....")

base_model = Xception(include_top=False, weights='imagenet')

for layer in base_model.layers:
    layer.trainable = False

# get layers and add average pooling layer
## set model architechture
x = base_model.output
x = attach_attention_module(x, attention_module)
x = GlobalAveragePooling2D()(x)
    def update_general(self, X, y, w, load_func, reset=False):
        
        log = logging.getLogger(__name__)
        
        data_size = y.shape[0]
          
        #figure out the weights
        if w is None and len(y.shape)==1:
            y_unique = np.unique(y)
            weights = class_weight.compute_class_weight(self.class_weight, y_unique, y)
            self.class_actual_weight_ = {}
            for i, y_val in enumerate(y_unique):
                self.class_actual_weight_[y_val] = weights[i]
                
            w = np.zeros(data_size)
            for i,v in enumerate(y):
                w[i] = self.class_actual_weight_[v]
        elif w is None:
            log.warn('Do not know how to make class weights for multidimensinal output. If neeed, specify weights directly. Assuming uniform weights.')
            w = np.ones(data_size)
        else:
            assert w.shape[0]==data_size, 'Weight size should match data size.'    

        if self.background:        
            queue = multiprocessing.Queue()
        else:
            queue = Queue.Queue()
        

        log.info('Starting to fit the NN model.')
        
        if self.callbacks is not None:
            for callback in self.callbacks:
                if callback is not None:
                    callback.on_train_begin(self)
                
        for epoch in xrange(self.nb_epoch):
            
            last_update = time.time()-1000
            start_time = time.time()

            #generate the progress bar
            if self.verbose>0:
                progbar = Progbar(data_size, width=80, verbose=self.verbose)

            #get random permutation
            p = np.random.permutation(range(data_size))            
                        
            #load the first batch
            batch_idx = p[0:self.memory_batch_size];
            self.matrix_load_into_queue(X, batch_idx, queue, load_func, y, w);
            X_batch,y_batch,w_batch = queue.get()
            
            if reset and epoch==0:
                
                n_features = self.get_dimensions(X_batch)
                log.info('Compiling the NN model with {} dimensions.'.format(n_features))
                self.generate_and_compile_model_(n_features)
            
            samples = 0
            for batch, i in enumerate(xrange(0, len(p), self.memory_batch_size)):
                
                #compute indicies for next batch
                next_start = i+len(batch_idx)
                next_end = min(len(p), next_start+self.memory_batch_size)
                if next_end>next_start:
                    #spin the thread up                
                    batch_idx_next = p[next_start:next_end];

                    #load data in background
                    thread = 0                   
                    if self.background:
                        thread = multiprocessing.Process(target=self.matrix_load_into_queue, args=(X,batch_idx_next,queue,load_func, y, w))
                        thread.start()
                else:         
                    batch_idx_next = None
                    thread = None
                
                #perform update
                loss = self.batch_update(X_batch, y_batch, w_batch)
                
                #increment the counter
                samples+= len(batch_idx)

                curr_update = time.time()
                if  self.verbose>0 and (curr_update-last_update>=0.5 or (samples)>=len(p)):
                    progbar.update(samples, [('Loss', loss)])
                    last_update = curr_update

                if self.callbacks is not None:
                    for callback in self.callbacks:
                        if callback is not None:
                            r = callback.on_batch_end(self, epoch+1, batch+1)

                #wait for the next load to happen                
                if thread is not None:
                    #if no background, load the data now
                    if not self.background:
                        self.matrix_load_into_queue(X, batch_idx, queue, load_func, y, w)
                    X_batch,y_batch,w_batch = queue.get()
                    
                    #if loading a background process, do a join
                    if self.background:
                        thread.join()
                    
                #now add the next batch
                batch_idx = batch_idx_next

            finish_time = time.time()-start_time
            if self.verbose>0:
                log.info('Finished epoch {}/{}. Time per epoch (s): {:0.2f}, Time per sample (s): {}.'.format(epoch+1, self.nb_epoch,finish_time,finish_time/len(p)))
            
            #process the end of epoch, and see if need to quit out
            quit_now = False
            if self.callbacks is not None:
                for callback in self.callbacks:
                    if callback is not None:
                        r = callback.on_epoch_end(self, epoch+1)
                        if r is not None and r is True:
                            quit_now = True
                        
            
            if quit_now:
                break    
        
        return self   
Beispiel #16
0
encoder = LabelEncoder()

y_train = encoder.fit_transform(train['sentiment'].values)
y_train = to_categorical(y_train)

y_test = encoder.fit_transform(test['sentiment'].values)
y_test = to_categorical(y_test)

# get an idea of the distribution of the text values
from collections import Counter
ctr = Counter(train['sentiment'].values)
print('Distribution of Classes:', ctr)

# get class weights for the training data, this will be used data
y_train_int = np.argmax(y_train, axis=1)
cws = class_weight.compute_class_weight('balanced', np.unique(y_train_int),
                                        y_train_int)
print(cws)

print('Dominant Class: ', ctr.most_common(n=1)[0][0])
print('Baseline Accuracy Dominant Class',
      (ctr.most_common(n=1)[0][0] == test['sentiment'].values).mean())

preds = np.zeros_like(y_test)
preds[:, 0] = 1
preds[0] = 1  #done to suppress warning from numpy for f1 score
print('F1 Score:', f1_score(y_test, preds, average='weighted'))


def threshold_search(y_true, y_proba, average=None):
    best_threshold = 0
    best_score = 0
Beispiel #17
0
    model = create_lstm(len(X_train[0]))
    model.summary()

    # save checkpoint
    filepath = checkpoint_dir + "/weightsRibo250-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='val_acc',
                                 verbose=0,
                                 save_best_only=True,
                                 mode='max')
    callbacks_list = [checkpoint]

    print('Fitting model...')
    print(np.unique(y_train))
    class_weight = class_weight.compute_class_weight(
        'balanced', np.unique(y_train),
        y_train)  # y_ints = [y.argmax() for y in y_train]
    print("Class Weights")
    print(class_weight)
    history = model.fit(X_train,
                        y_train,
                        batch_size=BATCH_SIZE,
                        class_weight=class_weight,
                        epochs=EPOCHS,
                        callbacks=callbacks_list,
                        validation_split=0.2,
                        verbose=1,
                        shuffle=True)
    # history = model.fit(X_train, y_train, batch_size=BATCH_SIZE, class_weight="auto", epochs=EPOCHS, callbacks=callbacks_list, validation_split = 0.1, verbose = 1)

    # serialize model to JSON
Beispiel #18
0
train = X_train/X_train.max()      # centering the data
X_valid = X_valid/X_train.max()


# i. Building the model
model = Sequential()
model.add(InputLayer((7*7*512,)))    # input layer
model.add(Dense(units=1024, activation='relu', input_dim=7*7*512))   # hidden layer
model.add(Dropout(0.5))      # adding dropout
model.add(Dense(units=512, activation='relu'))    # hidden layer
model.add(Dropout(0.5))      # adding dropout
model.add(Dense(units=256, activation='relu'))    # hidden layer
model.add(Dropout(0.5))      # adding dropout
model.add(Dense(3, activation='softmax'))            # output layer

# ii. Compiling the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# iii. Training the model
# model.fit(train, y_train, epochs=100, validation_data=(X_valid, y_valid))

class_weights = compute_class_weight('balanced',np.unique(data.Class), data.Class)  # computing weights of different classes

filepath="weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]      # model check pointing based on validation loss

model.fit(train, y_train, epochs=100, validation_data=(X_valid, y_valid), class_weight=class_weights, callbacks=callbacks_list)

Beispiel #19
0
                                               drop_last=True)
    test_loader = torch.utils.data.DataLoader(MyDataset(test_set, test_label),
                                              batch_size=BATCH_SIZE)

    model = CapsuleNet()
    model = nn.DataParallel(model).cuda()
    optimizer = Adam(model.parameters())
    capsule_loss = CapsuleLoss()

    train_loss_curve = []
    test_loss_curve = []
    train_acc_curve = []
    test_acc_curve = []

    classes = [0, 1]
    weight = compute_class_weight('balanced', classes, training_label)

    # train(train_loader, test_loader)
    for epoch in range(NUM_EPOCHS):
        # TRAIN MODE
        model.train()
        train_loss = 0
        train_acc = 0
        for batch_id, (data, target) in tqdm(enumerate(train_loader, 0),
                                             desc='Batch',
                                             total=len(train_loader)):
            target = torch.sparse.torch.eye(NUM_CLASSES).index_select(
                dim=0, index=target)
            weighted_target = [weight] * BATCH_SIZE
            weighted_target = Variable(
                torch.tensor(weighted_target).float() * target).cuda()
Beispiel #20
0
def mode
# Retrieve data from pickle file. 
# From ubs_process.py, will be a 3-item tuple(X, y(categorical matrix), folds)
vec_type = 'mels'
data_path = os.path.join('pickles', 'urbansound_'+ vec_type + '.p')

with open(data_path, 'rb') as handle:
    data = pickle.load(handle)

X, y, folds = data[0], data[1], data[2]


# Pre-specify global variables for model
# num_rows as specified by number of mfccs or mels. 
# Columns expected to be same as <max_pad_len> in ubs_process
if vec_type == 'mfccs':
    num_rows = 40
elif vec_type == 'mels':
    num_rows = 60 

num_columns = 174
num_channels = 1

X = X.reshape(X.shape[0], num_rows, num_columns, num_channels)

num_labels = y.shape[1]
filter_size = 2

### TRAINING

# user specified number of epochs
num_epochs = int(input('Enter number of epochs: '))
# num_epochs = 72
num_batch_size = 8

# start the timer before training. This will include all the fold durations
start = datetime.now()
# print a model summary
tmp = get_conv_model()
tmp.summary()

### Cross validation. Fold indices pre-specified for UrbanSound8k dataset
fold_accuracies = {}

logo = LeaveOneGroupOut()

for train_idx, test_idx in logo.split(X, y, folds):
    ## test_idx groups samples with the same fold, train_idx is all NOT in the test fold
    X_train, X_test, y_train, y_test = X[train_idx], X[test_idx], y[train_idx], y[test_idx]
    
    ### compute class weights
    y_flat = np.argmax(y_train, axis=1)
    class_weights = compute_class_weight(class_weight='balanced' , classes=np.unique(y_flat), y=y_flat )

    fold = folds[test_idx][0] 

    model = get_conv_model()

    #create checkpoint to save best model 
    checkpoint = ModelCheckpoint(filepath=f'models/{vec_type}/cnn_fold{fold}.hdf5', 
                            monitor='val_acc', verbose=1, save_best_only=True,
                            save_weights_only=False)

    # add early stopping checkpoint
    earlystop = EarlyStopping(monitor='val_acc', patience=50, mode='auto')

    # put the different runs into a tensorboard log directory
    log_dir = f"logs/fit/fold{fold}_" + datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=2, 
                        batch_size=num_batch_size, write_graph=True, 
                        write_grads=True, write_images=True)

    start_fold = datetime.now()

    history = model.fit(X_train, y_train, batch_size=num_batch_size,
            epochs=num_epochs, class_weight=class_weights, validation_data=(X_test, y_test), 
            callbacks=[checkpoint, earlystop, tensorboard], verbose=1)
    
    duration_fold = datetime.now() - start_fold
    print("Fold training completed in time: ", duration_fold)

    score_test = history.history['val_acc'][-1]
    print("Final Testing Accuracy: ", score_test)

    best_score = max(history.history['val_acc'])
    print("Best Testing Accuracy: ", best_score)

    fold_accuracies[fold] = best_score

    clear_session()


### Review results of total training
duration = datetime.now() - start
print("Training completed in time: ", duration)

# compute average accuracy

for k, v in sorted(fold_accuracies.items()):
    print(f'Fold {k}:    accuracy = {v}')

avg_score = np.mean([v for v in fold_accuracies.values()])
print('Average Accuracy: ', avg_score)
Beispiel #21
0
        im = cv2.resize(cv2.imread(image_file), (wd, ht))
        #im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
        X_train.append(im)
        Y_train.append([0, 1])
        print(image_file)

    return X_train, Y_train


X_train, labels = datasetCreate(TR_0_folder, TR_1_folder)
X_train = np.array(X_train)
labels = np.array(labels)
tr_indices = np.argmax(labels, axis=1)

cw = class_weight.compute_class_weight(class_weight='balanced',
                                       classes=np.unique(
                                           np.argmax(labels, axis=1)),
                                       y=np.argmax(labels, axis=1))
print(cw)

a = optimizers.Adam(lr=0.0005)

base_model = ResNet50(weights=None,
                      include_top=True,
                      input_shape=(wd, ht, 3),
                      classes=2)

for layer in base_model.layers:
    layer.trainable = True

base_model.compile(optimizer=a,
                   loss='categorical_crossentropy',
Beispiel #22
0
    totalLosses = all[:, 0].sum()
    giniSum = all[:, 0].cumsum().sum() / totalLosses

    giniSum -= (len(actual) + 1) / 2.
    return giniSum / len(actual)


def gini_normalized(a, p):
    return gini(a, p) / gini(a, a)


model.compile(loss='binary_crossentropy', optimizer=OPTIMIZER, metrics=['acc'])

# Adjust class weights
from sklearn.utils import class_weight
class_weight = class_weight.compute_class_weight('balanced', np.unique(y), y)
class_weight_dict = dict(enumerate(class_weight))
# Train
history = model.fit(x_train,
                    y_train,
                    batch_size=BATCH_SIZE,
                    epochs=NB_EPOCH,
                    verbose=VERBOSE,
                    validation_split=VALIDATION_SPLIT,
                    class_weight=class_weight_dict)

score = model.evaluate(x_test, y_test, verbose=0)
print("Test score:", score[0])
print('Test accuracy:', score[1])

# Calculate gini of test
Beispiel #23
0
def weight_categorical_crossentropy(weights):

    weights = K.variable(weights)

    def loss(y_true, y_pred):
        y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
        y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
        loss = y_true * K.log(y_pred) * weights
        loss = -K.sum(loss, -1)
        return loss

    return loss


class_weights = class_weight.compute_class_weight('balanced',
                                                  np.unique(y_train.target),
                                                  y_train.target)
print(class_weights)
# Here is our playground for testing with weights

class_weights = [1.5, 0.5, 1, 1]  # TODO use api result instead 05281347


def fit_hydra_head_model(fX_train, fy_train, fX_valid, fy_valid, n_model):

    oy_train = to_categorical(
        fy_train
    )  # https://www.tensorflow.org/api_docs/python/tf/keras/utils/to_categorical
    oy_valid = to_categorical(fy_valid)

    model = Sequential(head_nn_models[n_model])
def train_DNN(X_train, X_test, y_train, y_test, base_X_train):

    #        def focal_loss(y_true, y_pred):
    #            gamma = 2.0
    #            alpha = 0.25
    #            pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
    #            pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
    #    #        pt_1 = K.clip(pt_1, 1e-3, .999)
    #    #        pt_0 = K.clip(pt_0, 1e-3, .999)
    #
    #            return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log( pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0 ))

    #bias for predictions
    fl_pi = 0.01
    final_bias = -np.log((1 - fl_pi) / fl_pi)
    num_labels = len(set(y_test))
    from sklearn.utils import class_weight
    class_weights = class_weight.compute_class_weight('balanced',
                                                      np.unique(y_train),
                                                      y_train)
    tf.keras.backend.clear_session()
    fast_NN = Sequential(name='quick')
    #fast_NN.add(GaussianNoise(.5))
    fast_NN.add(Dense(512, activation='sigmoid', name='input'))
    fast_NN.add(Dropout(0.5))
    fast_NN.add(
        Dense(128,
              activation='relu',
              name='first',
              bias_initializer=tf.keras.initializers.Constant(value=0.1)))
    #fast_NN.add(Dropout(0.5))
    fast_NN.add(
        Dense(64,
              activation='relu',
              name='second',
              bias_initializer=tf.keras.initializers.Constant(value=0.1)))
    #fast_NN.add(Dropout(0.5))
    fast_NN.add(
        Dense(16,
              activation='relu',
              name='third',
              bias_initializer=tf.keras.initializers.Constant(value=0.1)))
    #fast_NN.add(Dropout(0.25))
    fast_NN.add(
        Dense(
            num_labels,
            activation='softmax',
            name='predict',
            bias_initializer=tf.keras.initializers.Constant(value=final_bias)))
    fast_NN.compile(loss='categorical_crossentropy',
                    optimizer='adam',
                    metrics=[
                        'categorical_accuracy',
                        tf.keras.metrics.Recall(),
                        tf.keras.metrics.Precision()
                    ])
    fast_NN_model = fast_NN.fit(X_train,
                                to_categorical(y_train),
                                validation_data=(X_test,
                                                 to_categorical(y_test)),
                                epochs=10,
                                batch_size=500,
                                class_weight=class_weights,
                                shuffle=True,
                                verbose=0)
    test_NN_test_preds = fast_NN.predict(X_test)
    train_NN_test_preds = fast_NN.predict(X_train)
    base_train_NN_test_preds = fast_NN.predict(base_X_train)

    return train_NN_test_preds[:,
                               1], test_NN_test_preds[:,
                                                      1], base_train_NN_test_preds[:,
                                                                                   1], fast_NN_model.history
    labels_gt = labels_gt[labels_gt < 2]
#%%

batch_size = 128
num_classes = 2
epochs = 5
test_fraction = 0.25
augmentation = True
# input image dimensions
img_rows, img_cols = 50, 50


x_train, x_test, y_train, y_test = train_test_split(
    all_masks_gt, labels_gt, test_size=test_fraction)

class_weight = cw.compute_class_weight('balanced', np.unique(y_train), y_train)

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
#x_train /= 255
#x_test /= 255
print('x_train shape:', x_train.shape)
Beispiel #26
0
    def fit(self,
            x_train: List[List[str]],
            y_train: Union[List[str], List[List[str]], List[Tuple[str]]],
            x_validate: List[List[str]] = None,
            y_validate: Union[List[str], List[List[str]],
                              List[Tuple[str]]] = None,
            batch_size: int = 64,
            epochs: int = 5,
            class_weight: bool = False,
            fit_kwargs: Dict = None,
            **kwargs):
        """

        :param x_train: list of training data.
        :param y_train: list of training target label data.
        :param x_validate: list of validation data.
        :param y_validate: list of validation target label data.
        :param batch_size: batch size for trainer model
        :param epochs: Number of epochs to train the model.
        :param class_weight: set class weights for imbalanced classes
        :param fit_kwargs: additional kwargs to be passed to
               :func:`~keras.models.Model.fit`
        :param kwargs:
        :return:
        """
        assert len(x_train) == len(y_train)
        self.build_token2id_label2id_dict(x_train, y_train, x_validate,
                                          y_validate)

        if len(x_train) < batch_size:
            batch_size = len(x_train) // 2

        if not self.model:
            if self.embedding.sequence_length == 0:
                self.embedding.sequence_length = sorted(
                    [len(x) for x in x_train])[int(0.95 * len(x_train))]
                logging.info('sequence length set to {}'.format(
                    self.embedding.sequence_length))
            self.build_model()

        train_generator = self.get_data_generator(
            x_train, y_train, batch_size, is_bert=self.embedding.is_bert)

        if fit_kwargs is None:
            fit_kwargs = {}

        if x_validate:
            validation_generator = self.get_data_generator(
                x_validate,
                y_validate,
                batch_size,
                is_bert=self.embedding.is_bert)
            fit_kwargs['validation_data'] = validation_generator
            fit_kwargs['validation_steps'] = max(
                len(x_validate) // batch_size, 1)

        if class_weight:
            y_list = self.convert_label_to_idx(y_train)
            class_weights = class_weight_calculte.compute_class_weight(
                'balanced', np.unique(y_list), y_list)
        else:
            class_weights = None

        self.model.fit_generator(train_generator,
                                 steps_per_epoch=len(x_train) // batch_size,
                                 epochs=epochs,
                                 class_weight=class_weights,
                                 **fit_kwargs)
    loss = 'mse'
    model_type = 'lstm_time'
    base_path = '/scratch/sk7898/pedbike/window_256'
    out_dir = '/scratch/sk7898/radar_counting/models/'
    batch_size = 64
    scaling = False
    fileloc = os.path.join(base_path, 'downstream_time')

    x_train, x_val, x_test, y_train, y_val, y_test, seqs_train, seqs_val, seqs_test = get_data(
        fileloc)

    n_bins = int(len(seqs_train) / batch_size)
    assert x_train.shape[0] == y_train.shape[0] == seqs_train.shape[0]

    class_weights = class_weight.compute_class_weight('balanced',
                                                      np.unique(list(y_train)),
                                                      y_train)

    n_timesteps, n_features = None, window * 2
    input_shape = (n_timesteps, n_features)

    y_val = np.array(y_val).reshape(-1, 1)
    train_gen = train_generator(n_bins,
                                x_train,
                                y_train,
                                seq_lengths=seqs_train,
                                padding=True,
                                padding_value=0.0)
    val_gen = val_generator(x_val, y_val)

    output_dir = os.path.join(out_dir + loss, model_type)
Beispiel #28
0
thio_X = X.iloc[24550:24650, :]
thio_y = y_.iloc[24550:24650]
X = X.drop(X.index[24550:24650])
y_ = y_.drop(y_.index[24550:24650])
y_w = y_w.drop(y_w.index[24550:24650])

X_test = pd.concat([metal_X, sulph_X, dis_X, thio_X], axis=0)
y_test = np.asarray(pd.concat([metal_y, sulph_y, dis_y, thio_y], axis=0))
y_train = np.asarray(y_)
X_train = X

y_w = list(y_w.values)
encoder = LabelEncoder()
encoder.fit(y_w)
y_w = encoder.transform(y_w)
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_w),
                                                  y_w)
print(class_weights, np.unique(y_w))

X_train, y_train = shuffle(X_train, y_train, random_state=42)

y_ = pd.DataFrame(y_)
X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)

# NN (Skip Connections) Model
input_ = Input(shape=(
    len(X.columns),
    1,
))
x = Conv1D(128, (3),
           padding='same',
Beispiel #29
0
 def computeClassWeight_balanced(cls, lY):
     Y = np.hstack(lY)
     Y_unique = np.unique(Y)
     class_weights = compute_class_weight("balanced", Y_unique, Y)
     del Y, Y_unique
     return class_weights
Beispiel #30
0
def get_weight(y):
    class_weight_current = class_weight.compute_class_weight(
        'balanced', np.unique(y), y)
    return class_weight_current
    'dim_x': MAX_QRY_LENGTH,
    'dim_y': MAX_DOC_LENGTH,
    'dim_x1': NUM_OF_FEATS,
    'batch_size': batch_size,
    'shuffle': True
}
'''
# Datasets
partition = # IDs
{'train': ['id-1', 'id-2', 'id-3'], 'validation': ['id-4']}
labels = # Labels
{'id-1': 0, 'id-2': 1, 'id-3': 2, 'id-4': 1}
'''
[partition, labels, partition_answer] = input_data_process.genTrainValidSet()
class_weight = class_weight.compute_class_weight(
    'balanced', np.unique(partition_answer['train']),
    partition_answer['train'])
class_weight = {0: class_weight[0], 1: class_weight[1]}

print "Training: ", len(partition['train'])
print "Validation: ", len(partition['validation'])
print "Class Weight:", class_weight

# Generators
training_generator = DataGenerator(**params).generate(labels,
                                                      partition['train'])
validation_generator = DataGenerator(**params).generate(
    labels, partition['validation'])

# Model check point
checkpoint = ModelCheckpoint(exp_path,
Beispiel #32
0
def Conv2DClassifierIn1(x_train, y_train, x_test, y_test):
    summary = True
    verbose = 1

    # setHyperParams------------------------------------------------------------------------------------------------
    batch_size = {{choice([32, 64, 128, 256, 512])}}
    epoch = {{choice([25, 50, 75, 100, 125, 150, 175, 200])}}

    conv_block = {{choice(['two', 'three', 'four'])}}

    conv1_num = {{choice([8, 16, 32, 64])}}
    conv2_num = {{choice([16, 32, 64, 128])}}
    conv3_num = {{choice([32, 64, 128])}}
    conv4_num = {{choice([32, 64, 128, 256])}}

    dense1_num = {{choice([128, 256, 512])}}
    dense2_num = {{choice([64, 128, 256])}}

    l1_regular_rate = {{uniform(0.00001, 1)}}
    l2_regular_rate = {{uniform(0.000001, 1)}}
    drop1_num = {{uniform(0.1, 1)}}
    drop2_num = {{uniform(0.0001, 1)}}

    activator = {{choice(['elu', 'relu', 'tanh'])}}
    optimizer = {{choice(['adam', 'rmsprop', 'SGD'])}}

    #---------------------------------------------------------------------------------------------------------------
    kernel_size = (3, 3)
    pool_size = (2, 2)
    initializer = 'random_uniform'
    padding_style = 'same'
    loss_type = 'binary_crossentropy'
    metrics = ['accuracy']
    my_callback = None
    # early_stopping = EarlyStopping(monitor='val_loss', patience=4)
    # checkpointer = ModelCheckpoint(filepath='keras_weights.hdf5',
    #                                verbose=1,
    #                                save_best_only=True)
    # my_callback = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
    #                                           patience=5, min_lr=0.0001)

    # build --------------------------------------------------------------------------------------------------------
    input_layer = Input(shape=x_train.shape[1:])
    conv = layers.Conv2D(conv1_num,
                         kernel_size,
                         padding=padding_style,
                         kernel_initializer=initializer,
                         activation=activator)(input_layer)
    conv = layers.Conv2D(conv1_num,
                         kernel_size,
                         padding=padding_style,
                         kernel_initializer=initializer,
                         activation=activator)(conv)
    pool = layers.MaxPooling2D(pool_size, padding=padding_style)(conv)
    if conv_block == 'two':
        conv = layers.Conv2D(conv2_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(pool)
        conv = layers.Conv2D(conv2_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(conv)
        BatchNorm = layers.BatchNormalization(axis=-1)(conv)
        pool = layers.MaxPooling2D(pool_size, padding=padding_style)(BatchNorm)
    elif conv_block == 'three':
        conv = layers.Conv2D(conv2_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(pool)
        conv = layers.Conv2D(conv2_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(conv)
        BatchNorm = layers.BatchNormalization(axis=-1)(conv)
        pool = layers.MaxPooling2D(pool_size, padding=padding_style)(BatchNorm)

        conv = layers.Conv2D(conv3_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(pool)
        conv = layers.Conv2D(conv3_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(conv)
        BatchNorm = layers.BatchNormalization(axis=-1)(conv)
        pool = layers.MaxPooling2D(pool_size, padding=padding_style)(BatchNorm)
    elif conv_block == 'four':
        conv = layers.Conv2D(conv2_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(pool)
        conv = layers.Conv2D(conv2_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(conv)
        BatchNorm = layers.BatchNormalization(axis=-1)(conv)
        pool = layers.MaxPooling2D(pool_size, padding=padding_style)(BatchNorm)

        conv = layers.Conv2D(conv3_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(pool)
        conv = layers.Conv2D(conv3_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(conv)
        BatchNorm = layers.BatchNormalization(axis=-1)(conv)
        pool = layers.MaxPooling2D(pool_size, padding=padding_style)(BatchNorm)

        conv = layers.Conv2D(conv4_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(pool)
        conv = layers.Conv2D(conv4_num,
                             kernel_size,
                             padding=padding_style,
                             kernel_initializer=initializer,
                             activation=activator)(conv)
        BatchNorm = layers.BatchNormalization(axis=-1)(conv)
        pool = layers.MaxPooling2D(pool_size, padding=padding_style)(BatchNorm)

    flat = layers.Flatten()(pool)
    drop = layers.Dropout(drop1_num)(flat)

    dense = layers.Dense(dense1_num,
                         activation=activator,
                         kernel_regularizer=regularizers.l1_l2(
                             l1=l1_regular_rate, l2=l2_regular_rate))(drop)
    BatchNorm = layers.BatchNormalization(axis=-1)(dense)
    drop = layers.Dropout(drop2_num)(BatchNorm)

    dense = layers.Dense(dense2_num,
                         activation=activator,
                         kernel_regularizer=regularizers.l1_l2(
                             l1=l1_regular_rate, l2=l2_regular_rate))(drop)

    output_layer = layers.Dense(len(np.unique(y_train)),
                                activation='softmax')(dense)

    model = models.Model(inputs=input_layer, outputs=output_layer)

    if summary:
        model.summary()

# train(self):
    class_weights = class_weight.compute_class_weight('balanced',
                                                      np.unique(y_train),
                                                      y_train.reshape(-1))
    class_weights_dict = dict(enumerate(class_weights))
    model.compile(
        optimizer=optimizer,
        loss=loss_type,
        metrics=metrics  # accuracy
    )

    result = model.fit(x=x_train,
                       y=y_train,
                       batch_size=batch_size,
                       epochs=epoch,
                       verbose=verbose,
                       callbacks=my_callback,
                       validation_data=(x_test, y_test),
                       shuffle=True,
                       class_weight=class_weights_dict)

    validation_acc = np.amax(result.history['val_acc'])
    print('Best validation acc of epoch:', validation_acc)
    return {'loss': -validation_acc, 'status': STATUS_OK, 'model': model}
Beispiel #33
0
    def train(self, train_params):
        if self.training_data["loaded"]:
            try:
                x = self.training_data["x_data"]
                y = self.model_data["y_labels"]
                epochs = train_params["epochs"]
                batch_size = train_params["batch_size"]
            except Exception as e:
                print("Incorrect training parameters! ", e)
                return False
        else:
            print("Training data not loaded!")
            return False

        model = self.model
        run_threaded = False
        if train_params.get("threaded"):
            model = train_params["model"].model
            run_threaded = True

        if "eval_data" in train_params:
            eval_data = train_params["eval_data"]
            if isinstance(eval_data, float):
                x, xTest, y, yTest = train_test_split(x,
                                                      y,
                                                      test_size=eval_data)
                eval_data = (xTest, yTest)
        else:
            eval_data = None

        y_ints = [d.argmax() for d in y]
        class_weights = class_weight.compute_class_weight(
            'balanced', np.unique(y_ints), y_ints)
        class_weights = dict(enumerate(class_weights))

        cb = []
        plot_cb = train_params.get("plot_cb", None)
        stop_cb = train_params.get("stop_cb", None)
        save_best = train_params.get("save_best", None)
        steps = int(np.ceil(x.shape[0] / batch_size))
        func = TrainCallback(plot_cb=plot_cb,
                             steps_per_epoch=steps,
                             stop_cb=stop_cb,
                             save_best=save_best,
                             parent=self)
        cb.append(func)

        if plot_cb is not None:
            verbose = 0
        else:
            verbose = 1

        if "dropout" in train_params:
            dropout = train_params["dropout"]
            if isinstance(dropout, dict):
                if dropout["monitor"] in [
                        "acc", "val_acc", "loss", "val_loss"
                ]:
                    cb_early_stop = EarlyStopping(
                        monitor=dropout["monitor"],
                        min_delta=dropout["min_delta"],
                        patience=dropout["patience"],
                        verbose=verbose,
                        mode="auto")
                cb.append(cb_early_stop)

        # This will only save the model, nothing else!
        '''
        if train_params.get("save_best"):
            cb_best = ModelCheckpoint(os.path.join(train_params["save_best"], "best_model.npy"),
                                      monitor="val_loss",
                                      verbose=0,
                                      save_best_only=True,
                                      save_weights_only=False,
                                      mode="auto",
                                      period=1
                                      )
            cb.append(cb_best)
        '''

        if run_threaded:
            tf_session = train_params["session"].as_default()
            tf_graph = train_params["graph"].as_default()
        else:
            tf_session = self.tf_session.as_default()
            tf_graph = self.tf_graph.as_default()

        optimizer = None
        if train_params.get("optimizer"):
            optimizer = train_params["optimizer"]

        with tf_session:
            with tf_graph:
                if optimizer is not None:
                    self.set_optimizer(optimizer)
                if "learning_rate" in train_params:
                    K.set_value(model.optimizer.lr,
                                train_params["learning_rate"])
                history = model.fit(
                    x,
                    y,
                    epochs=epochs,
                    batch_size=batch_size,
                    callbacks=cb,
                    validation_data=eval_data,
                    verbose=verbose,
                    class_weight=class_weights,
                )

        if run_threaded:
            return model, self.get_current_graph(), self.get_current_session()
        else:
            return history
Beispiel #34
0
validation_batches = ImageDataGenerator(
    rescale=1 / 255.0,
    rotation_range=30,
    zoom_range=0.20,
    fill_mode="nearest",
    shear_range=0.20,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1).flow_from_dataframe(val_df,
                                                x_col='image_name',
                                                y_col='target',
                                                target_size=(224, 224),
                                                class_mode="categorical",
                                                shuffle=True)

class_weights = class_weight.compute_class_weight(
    'balanced', np.unique(train_batches.classes), train_batches.classes)

epochs = 100
batch_size = 32

#**********************************************************************************

#***********************************************************************************

from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.layers import GlobalAveragePooling2D
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc',
                                            patience=5,
                                            verbose=1,
                                            factor=0.5,
                                            min_lr=1e-7)
Beispiel #35
0
    extra_train_ids = [os.path.join(EXTRA_TRAIN_FOLDER,line.rstrip('\n')) for line in open(os.path.join(EXTRA_TRAIN_FOLDER, 'good_jpgs'))]
    extra_train_ids.sort()
    ids_train.extend(extra_train_ids)

    extra_val_ids = glob.glob(os.path.join(EXTRA_VAL_FOLDER,'*/*.jpg'))
    extra_val_ids.sort()
    ids_val.extend(extra_val_ids)

classes = [get_class(idx.split('/')[-2]) for idx in ids_train]

classes_count = np.bincount(classes)
for class_name, class_count in zip(CLASSES, classes_count):
    print('{:>22}: {:5d} ({:04.1f}%)'.format(class_name, class_count, 100. * class_count / len(classes)))

class_weight = class_weight.compute_class_weight('balanced', np.unique(classes), classes)

ids_test = glob.glob(os.path.join(TEST_FOLDER,'*.tif'))

train_loader = gen(ids_train, args.batch_size)
val_loader = gen(ids_val, args.batch_size)
test_loader = gen(ids_test, args.batch_size)

#%%
print(args)
#original_model = models.resnet101(pretrained=True)
original_model = globals()[args.arch](pretrained=args.pretrained)
if args.finetune:
    for param in original_model.parameters():
        param.requires_grad = False
Beispiel #36
0
def train_wrapper(filename):  # filename='measurements.mat'
    global dataset
    global seed
    global model
    global ss
    global mX
    global nX
    global mY
    global nY

    data = loadmat(filename)  # this is a dict.
    keys = list(data.keys())[3:]  # skip the first three columns
    values = list(data.values())[3:]

    dataset = pd.DataFrame()
    dataset = dataset.reindex(columns=keys)  # create an empty dataframe

    for ii in np.arange(len(values)):
        v_ = np.array(values[ii])
        dataset[keys[ii]] = pd.Series(
            v_.flatten())  # cannot add the data to this empty df.

    dataset['y'] = 1 * (dataset['BLER'] <= 0.1)  # H-ARQ target.
    dataset = dataset[['RSRP', 'TBSINR_1', 'rank', 'y']]
    dataset.dropna(inplace=True, axis=0)
    if os.path.exists('dataset.csv'):
        dataset.to_csv('dataset.csv', index=False, mode='a',
                       header=False)  # append
    else:
        dataset.to_csv('dataset.csv', index=False)

    #print(dataset.head())

    # Perform a split 30-70
    train, test = train_test_split(dataset, test_size=0.30, random_state=seed)

    X_train = train.drop('y', axis=1)
    X_test = test.drop('y', axis=1)

    y_train = train['y'].values
    y_test = test['y'].values

    mX, nX = X_train.shape
    mY = y_train.shape
    nY = 1

    ss = MinMaxScaler(feature_range=(0, 1))

    # Scale the variables
    X_train_sc = ss.fit_transform(X_train)
    X_test_sc = ss.transform(X_test)

    model = KerasClassifier(build_fn=create_mlp,
                            verbose=0,
                            epochs=10,
                            batch_size=8)

    # The hyperparameters
    width_dims = [3, 5, 10]
    n_hiddens = [3, 5]  # the depth of hidden layers

    hyperparameters = dict(width=width_dims, depth=n_hiddens)
    class_weights = class_weight.compute_class_weight('balanced',
                                                      np.unique(y_train),
                                                      y_train)

    grid = GridSearchCV(estimator=model,
                        param_grid=hyperparameters,
                        n_jobs=1,
                        cv=3)
    gpu_available = tf.test.is_gpu_available()
    if (gpu_available == False):
        print('WARNING: No GPU available.  Will continue with CPU.')

    with tf.device('/gpu:0'):
        grid_result = grid.fit(X_train_sc, y_train, class_weight=class_weights)

    # This is the best model
    best_model_mlp = grid_result.best_params_
    print(best_model_mlp)

    model = grid_result.best_estimator_
    mlp = model

    y_pred = mlp.predict(X_test_sc)
    y_score = mlp.predict_proba(X_test_sc)

    mu = accuracy_score(y_test, y_pred)
    # Compute ROC curve and ROC area
    try:
        roc_auc = roc_auc_score(y_test, y_score[:, 1])
    except:
        print('WARNING: ROC was not computed.  Returning NaN')
        roc_auc = np.nan

    print('ROC for training is: {}'.format(roc_auc))
    print('Misclassification error for training is: {:.3f}'.format(1 - mu))

    return [roc_auc, 1 - mu]  # model is valid
def calculating_class_weights(labels):
    number_dim = np.shape(labels)[1]
    weights = np.empty([number_dim, 2])
    for i in range(number_dim):
        weights[i] = compute_class_weight('balanced', [0., 1.], labels[:, i])
    return weights
def test_compute_class_weight():
    """Test (and demo) compute_class_weight."""
    classes, y = unique(np.asarray([2, 2, 2, 3, 3, 4]), return_inverse=True)
    cw = compute_class_weight("auto", classes, y)
    assert_almost_equal(cw.sum(), classes.shape)
    assert_true(cw[0] < cw[1] < cw[2])
Beispiel #39
0
    n_tag_2 = len(np.unique(tag_2))
    n_tag_3 = len(np.unique(tag_3))
    n_tag_4 = len(np.unique(tag_4))

    print('Creating stratified train/validation/test splits (80%, 10%, 10%)..')
    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=555)
    train_index, test_valid_index = next(sss.split(allpkts, meta.filename))
    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=555)
    test_index, val_index = next(
        sss.split(allpkts.iloc[test_valid_index],
                  meta.filename.iloc[test_valid_index]))
    test_index = allpkts.iloc[test_valid_index].iloc[test_index].index
    val_index = allpkts.iloc[test_valid_index].iloc[val_index].index

    print('Calculating class weigths..')
    tag_1_class_weights = class_weight.compute_class_weight(
        'balanced', np.unique(tag_1), tag_1)
    tag_2_class_weights = class_weight.compute_class_weight(
        'balanced', np.unique(tag_2), tag_2)
    tag_3_class_weights = class_weight.compute_class_weight(
        'balanced', np.unique(tag_3), tag_3)
    tag_4_class_weights = class_weight.compute_class_weight(
        'balanced', np.unique(tag_4), tag_4)

    losses = {
        'tag_1': 'sparse_categorical_crossentropy',
        'tag_2': 'sparse_categorical_crossentropy',
        'tag_3': 'sparse_categorical_crossentropy',
        'tag_4': 'sparse_categorical_crossentropy'
    }

    class_weights = {
Beispiel #40
0
            idx_to_transfer = [idx for idx in ids_train \
                if get_class(idx.split('/')[-2]) == class_idx][:max_classes_val_count-classes_val_count[class_idx]]

            ids_train = list(set(ids_train).difference(set(idx_to_transfer)))

            ids_val.extend(idx_to_transfer)

    print("Training set distribution:")
    print_distribution(ids_train)

    print("Validation set distribution:")
    print_distribution(ids_val)

    classes_train = [get_class(idx.split('/')[-2]) for idx in ids_train]
    class_weight = class_weight.compute_class_weight('balanced',
                                                     np.unique(classes_train),
                                                     classes_train)
    classes_val = [get_class(idx.split('/')[-2]) for idx in ids_val]

    weights = [class_weight[i_class] for i_class in classes_train]
    weights = torch.DoubleTensor(weights)
    train_sampler = sampler.WeightedRandomSampler(weights, len(weights))

    weights = [class_weight[i_class] for i_class in classes_val]
    weights = torch.DoubleTensor(weights)
    val_sampler = sampler.WeightedRandomSampler(weights, len(weights))

    train_dataset = IEEECameraDataset(ids_train,
                                      crop_size=CROP_SIZE,
                                      training=True)
    val_dataset = IEEECameraDataset(ids_val,
Beispiel #41
0
cls_train = generator_train.classes
cls_test = generator_test.classes

# 获取数据集的类名称。
class_names = list(generator_train.class_indices.keys())
# class_names # ['forky', 'knifey', 'spoony']

# 获取数据集的类别总数。
num_classes = generator_train.num_class
# print(num_classes)

# 数据集相当不平衡,在这里,我们使用scikit-learn来计算将适当平衡数据集的权重。
# 在训练期间将这些权重应用于批次中的每个图像的梯度,以便缩放对批次的总体梯度的影响
from sklearn.utils.class_weight import compute_class_weight
class_weight = compute_class_weight(class_weight='balanced',
                                    classes=np.unique(cls_train),
                                    y=cls_train)
# print(class_weight)

# 迁移学习
'''
预先训练的VGG16模型无法分类来自Knifey-Spoony数据集的图像。 
原因可能是VGG16模型是在所谓的ImageNet数据集上进行训练的,可能没有包含许多餐具图像。
卷积神经网络的较低层可以识别图像中的许多不同形状或特征。 它是将这些特征组合成整个图像分类的最后几个完全连通的图层。
因此,我们可以尝试将VGG16模型的最后一个卷积层的输出重新路由到一个新的完全连接的神经网络,我们创建这个网络是为了在Knifey-Spoony数据集上进行分类。

首先我们打印VGG16模型的摘要,以便我们可以看到它的图层的名称和类型,以及层之间流动张量的形状。 
这是我们在本教程中使用VGG16模型的主要原因之一,因为Inception v3模型有很多图层,打印出来会令人困惑。
'''

# model.summary()
def get_class_weights(y_train):
    labels = np.unique(y_train)
    weights = compute_class_weight('balanced', labels, y_train)
    class_weight_dict = {}
    for i, w in enumerate(weights):
        class_weight_dict[labels[i]] = weights[i]

############################################################################

opt_DRf = keras.optimizers.SGD(momentum=0.5, lr=0.01)
DRf = Model(input=[inputs], output=[model(inputs), advmodel(inputs)])
DRf.compile(loss=[make_loss_model(c=1.0),
                  make_loss_advmodel(c=-lam)],
            optimizer=opt_DRf)

opt_DfR = keras.optimizers.SGD(momentum=0.5, lr=0.01)

DfR = Model(input=[inputs], output=[advmodel(inputs)])
DfR.compile(loss=[make_loss_advmodel(c=1.0)], optimizer=opt_DfR)

classWeight = class_weight.compute_class_weight('balanced', np.unique(train_y),
                                                train_y[:])

# Pretraining of "model"
model.trainable = True
advmodel.trainable = False

numberOfEpochs = 100
batchSize = 256
earlystop1 = keras.callbacks.EarlyStopping(monitor='val_loss',
                                           min_delta=0.0001,
                                           patience=10)
#model.summary()

# With sample weights
model.fit(
    train_x,
Beispiel #44
0
    x_fc = Flatten()(x_fc)
    x_fc = Dense(2, activation='softmax', name='fc2')(x_fc)

    model = Model(img_input, x_fc)

    # load weights
    if weights_path:
        model.load_weights(weights_path, by_name=True)

    return model


##transfering learning resnet-152

map_characters1 = {0: 'No Pneumonia', 1: 'Yes Pneumonia'}
class_weight1 = class_weight.compute_class_weight('balanced',
                                                  np.unique(y_train), y_train)
# weight_path1 = '/home/patrik/jogos_mortais/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
# weight_path2 = '/home/patrik/jogos_mortais/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'
# weight_path3 = '/home/patrik/jogos_mortais/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
weight_path4 = '/home/patrik/jogos_mortais/resnet152_weights_tf.h5'
# Patrik = tambem pegar os pesos da ResNet para colocar aqui em outro pretreino
# tamanho padrao VGG: 224,224,3 variar
# pretrained_model_1 = VGG16(weights = weight_path1, include_top=False, input_shape=(img_width, img_height, 3))
# tamanho padrao inception 299,299,3
# pretrained_model_2 = InceptionV3(weights = weight_path2, include_top=False, input_shape=(img_width, img_height, 3))

# tamaho padrao resnet 224x224x3
# pretrained_model_3 = ResNet50(weights = weight_path3, include_top=False, input_shape=(img_width, img_height, 3))

# resnet-152
pretrained_model_4 = resnet152_model(weight_path4)
# %% pass the pre-trained BERT to our define architecture
model = BERT_Arch(bert)

# %% push the model to GPU
model = model.to(device)

# optimizer from hugging face transformers

# define the optimizer
optimizer = AdamW(model.parameters(), lr=1e-3)

# %% - Find Class Weights¶

# compute the class weights
class_wts = compute_class_weight('balanced', np.unique(train_labels),
                                 train_labels)

print(class_wts)

# convert class weights to tensor
weights = torch.tensor(class_wts, dtype=torch.float)
weights = weights.to(device)

# loss function
cross_entropy = nn.NLLLoss(weight=weights)

# number of training epochs
epochs = 10

# %% - Fine-Tune BERT¶
    all_masks_gt = all_masks_gt[labels_gt < 2]
    labels_gt = labels_gt[labels_gt < 2]
#%%
batch_size = 128
num_classes = 2
epochs = 5
test_fraction = 0.25
augmentation = True
# input image dimensions
img_rows, img_cols = 48, 48


x_train, x_test, y_train, y_test = train_test_split(
    all_masks_gt, labels_gt, test_size=test_fraction)

class_weight = cw.compute_class_weight('balanced', np.unique(y_train), y_train)

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
#x_train /= 255
#x_test /= 255
print('x_train shape:', x_train.shape)
#PCA
#pca_model = PCA(n_components=10, svd_solver='full')
#X_train_test = pca_model.fit_transform(X_train_test, Y_labels)
#X_to_predict = pca_model.transform(X_to_predict)


#Select best features
#selecter = SelectKBest(chi2, k=6)
#X_train_test = selecter.fit_transform(X_train_test, Y_labels)
#X_to_predict = selecter.transform(X_to_predict)


gettingDistributionOfDatas()

#Getting class_weirght distribution
class_weight = class_weight.compute_class_weight('balanced', np.unique(Y_labels), Y_labels)
class_weight_dict = {1: class_weight[0], 0: class_weight[1]}
print(class_weight_dict)
print('')

#Percentage train
porcentagem_de_treino = 0.8
tamanho_de_treino = porcentagem_de_treino * len(Y_labels)
treino_dados = X_train_test[:int(tamanho_de_treino)]
treino_marcacoes = Y_labels[:int(tamanho_de_treino)]


#Percentage test
validacao_dados = X_train_test[int(tamanho_de_treino):]
validacao_marcacoes = Y_labels[int(tamanho_de_treino):]