Example #1
0
def label_glitches(image_data,
                   model_adr,
                   image_size=[140, 170],
                   verbose=False):

    # the path where the trained is saved there
    model_adr += '/'

    np.random.seed(1986)  # for reproducibility

    img_rows, img_cols = image_size[0], image_size[1]

    # load a model and weights
    if verbose:
        print('Retrieving the trained ML classifier')
    load_folder = model_adr
    f = gzip.open(load_folder + '/model.pklz', 'rb')
    json_string = cPickle.load(f)
    f.close()
    final_model = model_from_json(json_string)
    final_model.load_weights(load_folder + '/model_weights.h5')

    final_model.compile(loss='categorical_crossentropy',
                        optimizer='adadelta',
                        metrics=['accuracy'])

    if verbose:
        print('Scoring unlabelled glitches')

    # read in 4 durations
    test_set_unlabelled_x_1 = image_data.filter(
        regex=("1.0.png")).iloc[0].iloc[0].reshape(-1, 1, img_rows, img_cols)
    test_set_unlabelled_x_2 = image_data.filter(
        regex=("2.0.png")).iloc[0].iloc[0].reshape(-1, 1, img_rows, img_cols)
    test_set_unlabelled_x_3 = image_data.filter(
        regex=("4.0.png")).iloc[0].iloc[0].reshape(-1, 1, img_rows, img_cols)
    test_set_unlabelled_x_4 = image_data.filter(
        regex=("0.5.png")).iloc[0].iloc[0].reshape(-1, 1, img_rows, img_cols)

    concat_test_unlabelled = square_early_concatenate_feature(
        test_set_unlabelled_x_1, test_set_unlabelled_x_2,
        test_set_unlabelled_x_3, test_set_unlabelled_x_4, [img_rows, img_cols])

    score3_unlabelled = final_model.predict_proba(concat_test_unlabelled,
                                                  verbose=0)

    return score3_unlabelled, np.argmax(score3_unlabelled)
def label_glitches(image_data, model_adr, image_size=[140, 170], verbose=False):

    # the path where the trained is saved there
    model_adr += '/'

    np.random.seed(1986)  # for reproducibility

    img_rows, img_cols = image_size[0], image_size[1] 

    # load a model and weights
    if verbose:
        print ('Retrieving the trained ML classifier')
    load_folder = model_adr
    f = gzip.open(load_folder + '/model.pklz', 'rb')
    json_string = cPickle.load(f)
    f.close()
    final_model = model_from_json(json_string)
    final_model.load_weights(load_folder + '/model_weights.h5')

    final_model.compile(loss='categorical_crossentropy',
                        optimizer='adadelta',
                        metrics=['accuracy'])

    if verbose:
        print ('Scoring unlabelled glitches')

    # read in 4 durations
    test_set_unlabelled_x_1 = image_data.filter(regex=("1.0.png")).iloc[0].iloc[0].reshape(-1, 1, img_rows, img_cols)
    test_set_unlabelled_x_2 = image_data.filter(regex=("2.0.png")).iloc[0].iloc[0].reshape(-1, 1, img_rows, img_cols)
    test_set_unlabelled_x_3 = image_data.filter(regex=("4.0.png")).iloc[0].iloc[0].reshape(-1, 1, img_rows, img_cols)
    test_set_unlabelled_x_4 = image_data.filter(regex=("0.5.png")).iloc[0].iloc[0].reshape(-1, 1, img_rows, img_cols)

    concat_test_unlabelled = square_early_concatenate_feature(test_set_unlabelled_x_1,
                            test_set_unlabelled_x_2, test_set_unlabelled_x_3, test_set_unlabelled_x_4, [img_rows, img_cols])

    score3_unlabelled = final_model.predict_proba(concat_test_unlabelled, verbose=0)

    return score3_unlabelled, np.argmax(score3_unlabelled)
def main(pickle_adr,model_adr,save_adr,verbose):

    # Pickles of unlabelled glitches have already saved in this address
    pickle_adr += '/'

    # the path where the trained is saved there
    model_adr += '/'

    # the path where the .csv files of the results are saved there
    save_adr += '/'

    if not os.path.exists(save_adr):
        if verbose:
            print ('making... ' + save_adr)
        os.makedirs(save_adr)

    np.random.seed(1986)  # for reproducibility

    img_rows, img_cols = 47, 57
    nb_classes = 20

    # load a model and weights
    if verbose:
        print ('Retrieving the trained ML classifier')
    load_folder = model_adr
    f = gzip.open(load_folder + '/model.pklz', 'rb')
    json_string = cPickle.load(f)
    f.close()
    final_model = model_from_json(json_string)
    final_model.load_weights(load_folder + '/model_weights.h5')

    final_model.compile(loss='categorical_crossentropy',
                        optimizer='adadelta',
                        metrics=['accuracy'])

    if verbose:
        print ('Scoring unlabelled glitches')

    # reading all 4 duration pickles
    unlabelled_pickles = ['img_1.0*', 'img_2.0*', 'img_4.0*', 'img_5.0*']  # adding option to do in in alphabetical order

    # read duration 1 second

    dataset_test_unlabelled_1 = load_dataset_unlabelled_glitches(glob.glob(pickle_adr + unlabelled_pickles[0])[0],verbose)
    [test_set_unlabelled_x_1, test_set_unlabelled_y_1, test_set_unlabelled_name_1] = dataset_test_unlabelled_1
    test_set_unlabelled_x_1 = test_set_unlabelled_x_1.reshape(-1, 1, img_rows, img_cols)

    dataset_test_unlabelled_2 = load_dataset_unlabelled_glitches(glob.glob(pickle_adr + unlabelled_pickles[1])[0],verbose)
    [test_set_unlabelled_x_2, test_set_unlabelled_y_2, test_set_unlabelled_name_2] = dataset_test_unlabelled_2
    test_set_unlabelled_x_2 = test_set_unlabelled_x_2.reshape(-1, 1, img_rows, img_cols)

    dataset_test_unlabelled_3 = load_dataset_unlabelled_glitches(glob.glob(pickle_adr + unlabelled_pickles[2])[0],verbose)
    [test_set_unlabelled_x_3, test_set_unlabelled_y_3, test_set_unlabelled_name_3] = dataset_test_unlabelled_3
    test_set_unlabelled_x_3 = test_set_unlabelled_x_3.reshape(-1, 1, img_rows, img_cols)

    dataset_test_unlabelled_4 = load_dataset_unlabelled_glitches(glob.glob(pickle_adr + unlabelled_pickles[3])[0],verbose)
    [test_set_unlabelled_x_4, test_set_unlabelled_y_4, test_set_unlabelled_name_4] = dataset_test_unlabelled_4
    test_set_unlabelled_x_4 = test_set_unlabelled_x_4.reshape(-1, 1, img_rows, img_cols)

    if verbose:
        print('The number of unlabelled glitches is: ', test_set_unlabelled_x_1.shape[0])


    concat_test_unlabelled = square_early_concatenate_feature(test_set_unlabelled_x_1, \
                            test_set_unlabelled_x_2, test_set_unlabelled_x_3, test_set_unlabelled_x_4,[img_rows, img_cols])

    score3_unlabelled = final_model.predict_proba(concat_test_unlabelled, verbose=0)

    name_array_unlabelled = np.array([test_set_unlabelled_name_1.tolist()])
    name_array_unlabelled = np.transpose(name_array_unlabelled)

    dw = np.concatenate((name_array_unlabelled, score3_unlabelled), axis=1)

    return dw[0],np.argmax(score3_unlabelled[0])
def main(batch_size,nb_epoch,train_flag,pickle_adr,save_address,number_of_classes,verbose):

    np.random.seed(1986)  # for reproducibility

    img_rows, img_cols = 47, 57
    nb_classes = number_of_classes

    if verbose:
        print 'save adress', save_address
        print 'train flag', train_flag

    if not os.path.exists(save_address):
        if verbose:
            print ('making... ' + save_address)
        os.makedirs(save_address)

    dataset_name1 = 'img_5.0'
    ad1 = pickle_adr + dataset_name1 + '_class' + str(nb_classes) + '_norm.pkl.gz'

    dataset_name2 = 'img_4.0'
    ad2 = pickle_adr + dataset_name2 + '_class' + str(nb_classes) + '_norm.pkl.gz'

    dataset_name3 = 'img_1.0'
    ad3 = pickle_adr + dataset_name3 + '_class' + str(nb_classes) + '_norm.pkl.gz'

    dataset_name4 = 'img_2.0'
    ad4 = pickle_adr + dataset_name4 + '_class' + str(nb_classes) + '_norm.pkl.gz'

    print 'batch size', batch_size
    print 'iteration', nb_epoch
    print 'flag mode', train_flag
    print 'Reading the pickles from: ', pickle_adr
    print 'pickle_1: ', ad1
    print 'pickle_2: ', ad2
    print 'pickle_3: ', ad3
    print 'pickle_4: ', ad4
    print 'saving the trained model in: ', save_address

    datasets1 = my_load_dataset(ad1)
    test_set_x_1, test_set_y_1, test_set_name_1 = datasets1[2]
    valid_set_x_1, valid_set_y_1, valid_set_name_1 = datasets1[1]
    train_set_x_1, train_set_y_1, train_set_name_1 = datasets1[0]
    test_set_x_1 = test_set_x_1.reshape(-1, 1, img_rows, img_cols)
    train_set_x_1 = train_set_x_1.reshape(-1, 1, img_rows, img_cols)
    valid_set_x_1 = valid_set_x_1.reshape(-1, 1, img_rows, img_cols)

    datasets2 = my_load_dataset(ad2)
    test_set_x_2, test_set_y_2, test_set_name_2 = datasets2[2]
    valid_set_x_2, valid_set_y_2, valid_set_name_2 = datasets2[1]
    train_set_x_2, train_set_y_2, train_set_name_2 = datasets2[0]
    test_set_x_2 = test_set_x_2.reshape(-1, 1, img_rows, img_cols)
    train_set_x_2 = train_set_x_2.reshape(-1, 1, img_rows, img_cols)
    valid_set_x_2 = valid_set_x_2.reshape(-1, 1, img_rows, img_cols)


    datasets3 = my_load_dataset(ad3)
    test_set_x_3, test_set_y_3, test_set_name_3 = datasets3[2]
    valid_set_x_3, valid_set_y_3, valid_set_name_3 = datasets3[1]
    train_set_x_3, train_set_y_3, train_set_name_3 = datasets3[0]
    test_set_x_3 = test_set_x_3.reshape(-1, 1, img_rows, img_cols)
    train_set_x_3 = train_set_x_3.reshape(-1, 1, img_rows, img_cols)
    valid_set_x_3 = valid_set_x_3.reshape(-1, 1, img_rows, img_cols)

    datasets4 = my_load_dataset(ad4)
    test_set_x_4, test_set_y_4, test_set_name_4 = datasets4[2]
    valid_set_x_4, valid_set_y_4, valid_set_name_4 = datasets4[1]
    train_set_x_4, train_set_y_4, train_set_name_4 = datasets4[0]
    test_set_x_4 = test_set_x_4.reshape(-1, 1, img_rows, img_cols)
    train_set_x_4 = train_set_x_4.reshape(-1, 1, img_rows, img_cols)
    valid_set_x_4 = valid_set_x_4.reshape(-1, 1, img_rows, img_cols)

    assert (test_set_y_2 == test_set_y_1).all()
    assert (valid_set_y_2 == valid_set_y_1).all()
    assert (train_set_y_2 == train_set_y_1).all()

    assert (test_set_y_2 == test_set_y_3).all()
    assert (valid_set_y_2 == valid_set_y_3).all()
    assert (train_set_y_2 == train_set_y_3).all()

    assert (test_set_y_3 == test_set_y_4).all()
    assert (valid_set_y_3 == valid_set_y_4).all()
    assert (train_set_y_3 == train_set_y_4).all()

    assert (test_set_name_1 == test_set_name_2).all()
    assert (test_set_name_2 == test_set_name_3).all()
    assert (test_set_name_3 == test_set_name_4).all()

    assert (valid_set_name_1 == valid_set_name_2).all()
    assert (valid_set_name_2 == valid_set_name_3).all()
    assert (valid_set_name_3 == valid_set_name_4).all()

    assert (train_set_name_1 == train_set_name_2).all()
    assert (train_set_name_2 == train_set_name_3).all()
    assert (train_set_name_3 == train_set_name_4).all()

    cat_train_set_y_1 = np_utils.to_categorical(train_set_y_1, nb_classes)
    cat_valid_set_y_1 = np_utils.to_categorical(valid_set_y_1, nb_classes)
    cat_test_set_y_1 = np_utils.to_categorical(test_set_y_1, nb_classes)



    concat_train = square_early_concatenate_feature(train_set_x_1, train_set_x_2,train_set_x_3, train_set_x_4, [img_rows, img_cols])
    concat_test = square_early_concatenate_feature(test_set_x_1, test_set_x_2, test_set_x_3, test_set_x_4,[img_rows, img_cols])
    concat_valid = square_early_concatenate_feature(valid_set_x_1, valid_set_x_2, valid_set_x_3, valid_set_x_4,[img_rows, img_cols])


    cnn1 = build_cnn(img_rows*2, img_cols*2)
    final_model = Sequential()
    final_model.add(cnn1)
    final_model.add(Dense(nb_classes, activation='softmax'))

    #model_optimizer = RMSprop(lr=0.1)
    final_model.compile(loss='categorical_crossentropy',
                  optimizer='adadelta',
                  # optimizer=model_optimizer,
                  metrics=['accuracy'])

    model_adr = save_address + '/models/'
    if not os.path.exists(model_adr):
        print ('making models address ... ' + model_adr)
        os.makedirs(model_adr)

    acc_checker = ModelCheckpoint(model_adr + "/best_weights.h5", monitor='val_acc', verbose=1,
                                  save_best_only=True, mode='max', save_weights_only=True)

    if train_flag:
        print(concat_train.shape[0], 'train samples')
        print(concat_valid.shape[0], 'validation samples')
        print(concat_test.shape[0], 'test samples')

        final_model.fit(concat_train, cat_train_set_y_1,
                            batch_size=batch_size, epochs=nb_epoch, verbose=1,
                            validation_data=(concat_valid, cat_valid_set_y_1), callbacks=[acc_checker])

        final_model.load_weights(model_adr + "/best_weights.h5")
        score = final_model.evaluate(concat_test, cat_test_set_y_1, verbose=0)
        print('Test score:', score[0])
        print('Test accuracy:', score[1])

        #print final_model.summary()
        print 'done'
    else:
        all_data_for_train = np.append(concat_train, concat_valid, axis=0)
        all_data_for_train = np.append(all_data_for_train, concat_test, axis=0)
        all_label_for_train = np.append(cat_train_set_y_1, cat_valid_set_y_1, axis=0)
        all_label_for_train = np.append(all_label_for_train, cat_test_set_y_1, axis=0)

        print('Number of training samples:', all_data_for_train.shape[0])
        final_model.fit(all_data_for_train, all_label_for_train,
                    batch_size=batch_size, epochs=nb_epoch, verbose=1,
                    validation_data=(concat_valid, cat_valid_set_y_1), callbacks=[acc_checker])
        final_model.load_weights(model_adr + "/best_weights.h5")



    # save model and weights
    json_string = final_model.to_json()
    f = gzip.open(save_address + '/model.pklz', 'wb')
    cPickle.dump(json_string, f, protocol=cPickle.HIGHEST_PROTOCOL)
    f.close()
    final_model.save_weights(save_address + '/model_weights.h5', overwrite=True)
Example #5
0
def main(batch_size, nb_epoch, train_flag, pickle_adr, save_address,
         number_of_classes, verbose):

    np.random.seed(1986)  # for reproducibility

    img_rows, img_cols = 47, 57
    nb_classes = number_of_classes

    if verbose:
        print 'save adress', save_address
        print 'train flag', train_flag

    if not os.path.exists(save_address):
        if verbose:
            print('making... ' + save_address)
        os.makedirs(save_address)

    dataset_name1 = 'img_5.0'
    ad1 = pickle_adr + dataset_name1 + '_class' + str(
        nb_classes) + '_norm.pkl.gz'

    dataset_name2 = 'img_4.0'
    ad2 = pickle_adr + dataset_name2 + '_class' + str(
        nb_classes) + '_norm.pkl.gz'

    dataset_name3 = 'img_1.0'
    ad3 = pickle_adr + dataset_name3 + '_class' + str(
        nb_classes) + '_norm.pkl.gz'

    dataset_name4 = 'img_2.0'
    ad4 = pickle_adr + dataset_name4 + '_class' + str(
        nb_classes) + '_norm.pkl.gz'

    print 'batch size', batch_size
    print 'iteration', nb_epoch
    print 'flag mode', train_flag
    print 'Reading the pickles from: ', pickle_adr
    print 'pickle_1: ', ad1
    print 'pickle_2: ', ad2
    print 'pickle_3: ', ad3
    print 'pickle_4: ', ad4
    print 'saving the trained model in: ', save_address

    datasets1 = my_load_dataset(ad1)
    test_set_x_1, test_set_y_1, test_set_name_1 = datasets1[2]
    valid_set_x_1, valid_set_y_1, valid_set_name_1 = datasets1[1]
    train_set_x_1, train_set_y_1, train_set_name_1 = datasets1[0]
    test_set_x_1 = test_set_x_1.reshape(-1, 1, img_rows, img_cols)
    train_set_x_1 = train_set_x_1.reshape(-1, 1, img_rows, img_cols)
    valid_set_x_1 = valid_set_x_1.reshape(-1, 1, img_rows, img_cols)

    datasets2 = my_load_dataset(ad2)
    test_set_x_2, test_set_y_2, test_set_name_2 = datasets2[2]
    valid_set_x_2, valid_set_y_2, valid_set_name_2 = datasets2[1]
    train_set_x_2, train_set_y_2, train_set_name_2 = datasets2[0]
    test_set_x_2 = test_set_x_2.reshape(-1, 1, img_rows, img_cols)
    train_set_x_2 = train_set_x_2.reshape(-1, 1, img_rows, img_cols)
    valid_set_x_2 = valid_set_x_2.reshape(-1, 1, img_rows, img_cols)

    datasets3 = my_load_dataset(ad3)
    test_set_x_3, test_set_y_3, test_set_name_3 = datasets3[2]
    valid_set_x_3, valid_set_y_3, valid_set_name_3 = datasets3[1]
    train_set_x_3, train_set_y_3, train_set_name_3 = datasets3[0]
    test_set_x_3 = test_set_x_3.reshape(-1, 1, img_rows, img_cols)
    train_set_x_3 = train_set_x_3.reshape(-1, 1, img_rows, img_cols)
    valid_set_x_3 = valid_set_x_3.reshape(-1, 1, img_rows, img_cols)

    datasets4 = my_load_dataset(ad4)
    test_set_x_4, test_set_y_4, test_set_name_4 = datasets4[2]
    valid_set_x_4, valid_set_y_4, valid_set_name_4 = datasets4[1]
    train_set_x_4, train_set_y_4, train_set_name_4 = datasets4[0]
    test_set_x_4 = test_set_x_4.reshape(-1, 1, img_rows, img_cols)
    train_set_x_4 = train_set_x_4.reshape(-1, 1, img_rows, img_cols)
    valid_set_x_4 = valid_set_x_4.reshape(-1, 1, img_rows, img_cols)

    assert (test_set_y_2 == test_set_y_1).all()
    assert (valid_set_y_2 == valid_set_y_1).all()
    assert (train_set_y_2 == train_set_y_1).all()

    assert (test_set_y_2 == test_set_y_3).all()
    assert (valid_set_y_2 == valid_set_y_3).all()
    assert (train_set_y_2 == train_set_y_3).all()

    assert (test_set_y_3 == test_set_y_4).all()
    assert (valid_set_y_3 == valid_set_y_4).all()
    assert (train_set_y_3 == train_set_y_4).all()

    assert (test_set_name_1 == test_set_name_2).all()
    assert (test_set_name_2 == test_set_name_3).all()
    assert (test_set_name_3 == test_set_name_4).all()

    assert (valid_set_name_1 == valid_set_name_2).all()
    assert (valid_set_name_2 == valid_set_name_3).all()
    assert (valid_set_name_3 == valid_set_name_4).all()

    assert (train_set_name_1 == train_set_name_2).all()
    assert (train_set_name_2 == train_set_name_3).all()
    assert (train_set_name_3 == train_set_name_4).all()

    cat_train_set_y_1 = np_utils.to_categorical(train_set_y_1, nb_classes)
    cat_valid_set_y_1 = np_utils.to_categorical(valid_set_y_1, nb_classes)
    cat_test_set_y_1 = np_utils.to_categorical(test_set_y_1, nb_classes)

    concat_train = square_early_concatenate_feature(train_set_x_1,
                                                    train_set_x_2,
                                                    train_set_x_3,
                                                    train_set_x_4,
                                                    [img_rows, img_cols])
    concat_test = square_early_concatenate_feature(test_set_x_1, test_set_x_2,
                                                   test_set_x_3, test_set_x_4,
                                                   [img_rows, img_cols])
    concat_valid = square_early_concatenate_feature(valid_set_x_1,
                                                    valid_set_x_2,
                                                    valid_set_x_3,
                                                    valid_set_x_4,
                                                    [img_rows, img_cols])

    cnn1 = build_cnn(img_rows * 2, img_cols * 2)
    final_model = Sequential()
    final_model.add(cnn1)
    final_model.add(Dense(nb_classes, activation='softmax'))

    #model_optimizer = RMSprop(lr=0.1)
    final_model.compile(
        loss='categorical_crossentropy',
        optimizer='adadelta',
        # optimizer=model_optimizer,
        metrics=['accuracy'])

    model_adr = save_address + '/models/'
    if not os.path.exists(model_adr):
        print('making models address ... ' + model_adr)
        os.makedirs(model_adr)

    acc_checker = ModelCheckpoint(model_adr + "/best_weights.h5",
                                  monitor='val_acc',
                                  verbose=1,
                                  save_best_only=True,
                                  mode='max',
                                  save_weights_only=True)

    if train_flag:
        print(concat_train.shape[0], 'train samples')
        print(concat_valid.shape[0], 'validation samples')
        print(concat_test.shape[0], 'test samples')

        final_model.fit(concat_train,
                        cat_train_set_y_1,
                        batch_size=batch_size,
                        epochs=nb_epoch,
                        verbose=1,
                        validation_data=(concat_valid, cat_valid_set_y_1),
                        callbacks=[acc_checker])

        final_model.load_weights(model_adr + "/best_weights.h5")
        score = final_model.evaluate(concat_test, cat_test_set_y_1, verbose=0)
        print('Test score:', score[0])
        print('Test accuracy:', score[1])

        #print final_model.summary()
        print 'done'
    else:
        all_data_for_train = np.append(concat_train, concat_valid, axis=0)
        all_data_for_train = np.append(all_data_for_train, concat_test, axis=0)
        all_label_for_train = np.append(cat_train_set_y_1,
                                        cat_valid_set_y_1,
                                        axis=0)
        all_label_for_train = np.append(all_label_for_train,
                                        cat_test_set_y_1,
                                        axis=0)

        print('Number of training samples:', all_data_for_train.shape[0])
        final_model.fit(all_data_for_train,
                        all_label_for_train,
                        batch_size=batch_size,
                        epochs=nb_epoch,
                        verbose=1,
                        validation_data=(concat_valid, cat_valid_set_y_1),
                        callbacks=[acc_checker])
        final_model.load_weights(model_adr + "/best_weights.h5")

    # save model and weights
    json_string = final_model.to_json()
    f = gzip.open(save_address + '/model.pklz', 'wb')
    cPickle.dump(json_string, f, protocol=cPickle.HIGHEST_PROTOCOL)
    f.close()
    final_model.save_weights(save_address + '/model_weights.h5',
                             overwrite=True)