Esempio n. 1
0
def predictionPooling(p):

    #You can test different prediction pooling strategies here
    if p.ndim == 2:

        try:

            # Median filtered pooling for monophonic recordings
            row_median = np.median(p, axis=1, keepdims=True)
            p[p < row_median * 1.5] = 0.0
            p_pool = np.mean((p * 2)**2, axis=0)
            p_pool -= p_pool.min()
            if p_pool.max() > 1.0:
                p_pool /= p_pool.max()

            # Mean exponential pooling for monophonic recordings
            #p_pool = np.mean((p * 2) ** 2, axis=0)
            #p_pool[p_pool > 1.0] = 1.0

            # Simple average pooling
            #p_pool = np.mean(p, axis=0)
            #p_pool = sigmoid(p_pool)

        except:
            p_pool = cfg.getRandomState().normal(0.0, 1.0, (p.shape[1]))

    else:
        p_pool = p

    return p_pool
Esempio n. 2
0
def build_pi_model():

    log.i('BUILDING RASBPERRY PI MODEL...')

    # Random Seed
    lasagne_random.set_rng(cfg.getRandomState())

    # Input layer for images
    net = l.InputLayer((None, cfg.IM_DIM, cfg.IM_SIZE[1], cfg.IM_SIZE[0]))

    # Convolutinal layer groups
    for i in range(len(cfg.FILTERS)):

        # 3x3 Convolution + Stride
        net = batch_norm(
            l.Conv2DLayer(net,
                          num_filters=cfg.FILTERS[i],
                          filter_size=cfg.KERNEL_SIZES[i],
                          num_groups=cfg.NUM_OF_GROUPS[i],
                          pad='same',
                          stride=2,
                          W=initialization(cfg.NONLINEARITY),
                          nonlinearity=nonlinearity(cfg.NONLINEARITY)))

        log.i(('\tGROUP', i + 1, 'OUT SHAPE:', l.get_output_shape(net)))

    # Fully connected layers + dropout layers
    net = l.DenseLayer(net,
                       cfg.DENSE_UNITS,
                       nonlinearity=nonlinearity(cfg.NONLINEARITY),
                       W=initialization(cfg.NONLINEARITY))
    net = l.DropoutLayer(net, p=cfg.DROPOUT)

    net = l.DenseLayer(net,
                       cfg.DENSE_UNITS,
                       nonlinearity=nonlinearity(cfg.NONLINEARITY),
                       W=initialization(cfg.NONLINEARITY))
    net = l.DropoutLayer(net, p=cfg.DROPOUT)

    # Classification Layer (Softmax)
    net = l.DenseLayer(net,
                       len(cfg.CLASSES),
                       nonlinearity=nonlinearity('softmax'),
                       W=initialization('softmax'))

    log.i(("\tFINAL NET OUT SHAPE:", l.get_output_shape(net)))
    log.i("...DONE!")

    # Model stats
    log.i(("MODEL HAS",
           (sum(hasattr(layer, 'W')
                for layer in l.get_all_layers(net))), "WEIGHTED LAYERS"))
    log.i(("MODEL HAS", l.count_params(net), "PARAMS"))

    return net
Esempio n. 3
0
def getSpecBatches(split):

    # Random Seed
    random = cfg.getRandomState()

    # Make predictions for every testfile
    for t in split:

        # Spec batch
        spec_batch = []

        # Get specs for file
        for spec in audio.specsFromFile(t[0],
                                        cfg.SAMPLE_RATE,
                                        cfg.SPEC_LENGTH,
                                        cfg.SPEC_OVERLAP,
                                        cfg.SPEC_MINLEN,
                                        shape=(cfg.IM_SIZE[1], cfg.IM_SIZE[0]),
                                        fmin=cfg.SPEC_FMIN,
                                        fmax=cfg.SPEC_FMAX,
                                        spec_type=cfg.SPEC_TYPE):

            # Resize spec
            spec = image.resize(spec,
                                cfg.IM_SIZE[0],
                                cfg.IM_SIZE[1],
                                mode=cfg.RESIZE_MODE)

            # Normalize spec
            spec = image.normalize(spec, cfg.ZERO_CENTERED_NORMALIZATION)

            # Prepare as input
            spec = image.prepare(spec)

            # Add to batch
            if len(spec_batch) > 0:
                spec_batch = np.vstack((spec_batch, spec))
            else:
                spec_batch = spec

            # Batch too large?
            if spec_batch.shape[0] >= cfg.MAX_SPECS_PER_FILE:
                break

        # No specs?
        if len(spec_batch) == 0:
            spec = random.normal(0.0, 1.0, (cfg.IM_SIZE[1], cfg.IM_SIZE[0]))
            spec_batch = image.prepare(spec)

        # Shuffle spec batch
        spec_batch = shuffle(spec_batch, random_state=random)

        # yield batch, labels and filename
        yield spec_batch[:cfg.MAX_SPECS_PER_FILE], t[1], t[0].split(os.sep)[-1]
Esempio n. 4
0
def sortDataset(mdata):

    print 'PARSING CLASSES...'

    # Parse classes
    for c in mdata:

        print '\t', c

        # Determine size of val split (10% but at least 1 file)
        val = max(1, len(mdata[c]) * 0.1)

        # Shuffle list of files
        mdata[c] = shuffle(mdata[c], random_state=cfg.getRandomState())

        # Parse list of files and copy to destination
        for f in mdata[c]:

            # Get class name (we use the sci-name which makes it easier to evaluate with background species)
            # The submission format uses class id only - so we have to figure that out later
            cname = f['sci-name']

            # Make folders
            m_path = os.path.join(cfg.TRAINSET_PATH, 'metadata')
            if not os.path.exists(m_path):
                os.makedirs(m_path)

            t_path = os.path.join(cfg.TRAINSET_PATH, 'train', cname)
            if not os.path.exists(t_path):
                os.makedirs(t_path)

            v_path = os.path.join(cfg.TRAINSET_PATH, 'val', cname)
            if not os.path.exists(v_path):
                os.makedirs(v_path)

            # Copy files
            with open(
                    os.path.join(m_path,
                                 f['filename'].rsplit('.')[0] + '.json'),
                    'w') as mfile:
                json.dump(f, mfile)

            if mdata[c].index(f) < val:
                copyfile(os.path.join(cfg.TRAINSET_PATH, 'wav', f['filename']),
                         os.path.join(v_path, f['filename']))
            else:
                copyfile(os.path.join(cfg.TRAINSET_PATH, 'wav', f['filename']),
                         os.path.join(t_path, f['filename']))

    print '...DONE!'
Esempio n. 5
0
def parseTestSet():

    # Random Seed
    random = cfg.getRandomState()

    # Status
    log.i('PARSING TEST SET...', new_line=False)
    TEST = []

    # List of test files
    fnames = []
    for path, dirs, files in os.walk(cfg.TESTSET_PATH):
        if path.split(os.sep)[-1] in cfg.CLASSES:
            scnt = 0
            for f in files:
                fnames.append(os.path.join(path, f))
                scnt += 1
                if scnt >= cfg.MAX_TEST_SAMPLES_PER_CLASS and cfg.MAX_TEST_SAMPLES_PER_CLASS > 0:
                    break
    fnames = sorted(shuffle(fnames, random_state=random)[:cfg.MAX_TEST_FILES])

    # Get ground truth from metadata
    for f in fnames:

        # Metadata path
        m_path = os.path.join(cfg.METADATA_PATH,
                              f.split(os.sep)[-1].split('.')[0] + '.json')

        # Load JSON
        with open(m_path) as jfile:
            data = json.load(jfile)

        # Get Species (+ background species)
        # Only species present in the trained classes are relevant for the metric
        # Still, we are adding anything we have right now and sort it out later
        if cfg.TEST_WITH_BG_SPECIES:
            bg = data['background']
        else:
            bg = []
        species = [data['sci-name']] + bg

        # Add data to test set
        TEST.append((f, species))

    # Status
    log.i('DONE!')
    log.i(('TEST FILES:', len(TEST)))

    return TEST
Esempio n. 6
0
def parseDataset():

    # Random Seed
    random = cfg.getRandomState()

    # We use subfolders as class labels
    classes = [folder for folder in sorted(os.listdir(cfg.DATASET_PATH)) if folder in cfg.CLASS_WHITELIST or len(cfg.CLASS_WHITELIST) == 0]
    if not cfg.SORT_CLASSES_ALPHABETICALLY:
        classes = shuffle(classes, random_state=random)
    classes = classes[:cfg.MAX_CLASSES]

    # Now we enlist all image paths for each class
    images = []
    tclasses = []
    sample_count = {}
    for c in classes:
        c_images = [os.path.join(cfg.DATASET_PATH, c, path) for path in shuffle(os.listdir(os.path.join(cfg.DATASET_PATH, c)), random_state=random) if isValidClass(c, path)][:cfg.MAX_SAMPLES_PER_CLASS]
        
        sample_count[c] = len(c_images)
        images += c_images
        
        # Do we want to correct class imbalance?
        # This will affect validation scores as we use some samples in TRAIN and VAL
        while sample_count[c] < cfg.MIN_SAMPLES_PER_CLASS:
            images += [c_images[random.randint(0, len(c_images))]]
            sample_count[c] += 1

    # Add labels to image paths
    for i in range(len(images)):
        path = images[i]
        label = images[i].split(os.sep)[-2]
        images[i] = (path, label)

    # Shuffle image paths
    images = shuffle(images, random_state=random)

    # Validation split
    vsplit = int(len(images) * cfg.VAL_SPLIT)
    train = images[:-vsplit]
    val = images[-vsplit:]

    # Show some stats
    log.i(("CLASSES:", len(classes)))
    log.i(( "CLASS LABELS:", sorted(sample_count.items(), key=operator.itemgetter(1))))
    log.i(("TRAINING IMAGES:", len(train)))
    log.i(("VALIDATION IMAGES:", len(val)))

    return classes, train, val
Esempio n. 7
0
# Author: Stefan Kahl, 2018, Chemnitz University of Technology

import os
import time

import numpy as np
import cv2

from sklearn.utils import shuffle

import config as cfg
from utils import audio
from utils import log

######################## CONFIG #########################
RANDOM = cfg.getRandomState()


######################### SPEC ##########################
def getSpecs(path):

    specs = []
    noise = []

    # Get mel-specs for file
    for spec in audio.specsFromFile(path,
                                    rate=cfg.SAMPLE_RATE,
                                    seconds=cfg.SPEC_LENGTH,
                                    overlap=cfg.SPEC_OVERLAP,
                                    minlen=cfg.SPEC_MINLEN,
                                    fmin=cfg.SPEC_FMIN,
Esempio n. 8
0
def build_baseline_model():

    log.i('BUILDING BASELINE MODEL...')

    # Random Seed
    lasagne_random.set_rng(cfg.getRandomState())

    # Input layer for images
    net = l.InputLayer((None, cfg.IM_DIM, cfg.IM_SIZE[1], cfg.IM_SIZE[0]))

    # Stride size (as an alternative to max pooling)
    if cfg.MAX_POOLING:
        s = 1
    else:
        s = 2

    # Convolutinal layer groups
    for i in range(len(cfg.FILTERS)):

        # 3x3 Convolution + Stride
        net = batch_norm(
            l.Conv2DLayer(net,
                          num_filters=cfg.FILTERS[i],
                          filter_size=cfg.KERNEL_SIZES[i],
                          num_groups=cfg.NUM_OF_GROUPS[i],
                          pad='same',
                          stride=s,
                          W=initialization(cfg.NONLINEARITY),
                          nonlinearity=nonlinearity(cfg.NONLINEARITY)))

        # Pooling layer
        if cfg.MAX_POOLING:
            net = l.MaxPool2DLayer(net, pool_size=2)

        # Dropout Layer (we support different types of dropout)
        if cfg.DROPOUT_TYPE == 'channels' and cfg.DROPOUT > 0.0:
            net = l.dropout_channels(net, p=cfg.DROPOUT)
        elif cfg.DROPOUT_TYPE == 'location' and cfg.DROPOUT > 0.0:
            net = l.dropout_location(net, p=cfg.DROPOUT)
        elif cfg.DROPOUT > 0.0:
            net = l.DropoutLayer(net, p=cfg.DROPOUT)

        log.i(('\tGROUP', i + 1, 'OUT SHAPE:', l.get_output_shape(net)))

    # Final 1x1 Convolution
    net = batch_norm(
        l.Conv2DLayer(net,
                      num_filters=cfg.FILTERS[i] * 2,
                      filter_size=1,
                      W=initialization('identity'),
                      nonlinearity=nonlinearity('identity')))

    log.i(('\tFINAL CONV OUT SHAPE:', l.get_output_shape(net)))

    # Global Pooling layer (default mode = average)
    net = l.GlobalPoolLayer(net)
    log.i(("\tFINAL POOLING SHAPE:", l.get_output_shape(net)))

    # Classification Layer (Softmax)
    net = l.DenseLayer(net,
                       len(cfg.CLASSES),
                       nonlinearity=nonlinearity('softmax'),
                       W=initialization('softmax'))

    log.i(("\tFINAL NET OUT SHAPE:", l.get_output_shape(net)))
    log.i("...DONE!")

    # Model stats
    log.i(("MODEL HAS",
           (sum(hasattr(layer, 'W')
                for layer in l.get_all_layers(net))), "WEIGHTED LAYERS"))
    log.i(("MODEL HAS", l.count_params(net), "PARAMS"))

    return net
Esempio n. 9
0
def build_resnet_model():

    log.i('BUILDING RESNET MODEL...')

    # Random Seed
    lasagne_random.set_rng(cfg.getRandomState())

    # Input layer for images
    net = l.InputLayer((None, cfg.IM_DIM, cfg.IM_SIZE[1], cfg.IM_SIZE[0]))

    # First Convolution
    net = l.Conv2DLayer(net,
                        num_filters=cfg.FILTERS[0],
                        filter_size=cfg.KERNEL_SIZES[0],
                        pad='same',
                        W=initialization(cfg.NONLINEARITY),
                        nonlinearity=None)

    log.i(("\tFIRST CONV OUT SHAPE:", l.get_output_shape(net), "LAYER:",
           len(l.get_all_layers(net)) - 1))

    # Residual Stacks
    for i in range(0, len(cfg.FILTERS)):
        net = resblock(net,
                       filters=cfg.FILTERS[i] * cfg.RESNET_K,
                       kernel_size=cfg.KERNEL_SIZES[i],
                       stride=2,
                       num_groups=cfg.NUM_OF_GROUPS[i])
        for _ in range(1, cfg.RESNET_N):
            net = resblock(net,
                           filters=cfg.FILTERS[i] * cfg.RESNET_K,
                           kernel_size=cfg.KERNEL_SIZES[i],
                           num_groups=cfg.NUM_OF_GROUPS[i],
                           preactivated=False)
        log.i(("\tRES STACK", i + 1, "OUT SHAPE:", l.get_output_shape(net),
               "LAYER:", len(l.get_all_layers(net)) - 1))

    # Post Activation
    net = batch_norm(net)
    net = l.NonlinearityLayer(net, nonlinearity=nonlinearity(cfg.NONLINEARITY))

    # Pooling
    net = l.GlobalPoolLayer(net)
    log.i(("\tFINAL POOLING SHAPE:", l.get_output_shape(net), "LAYER:",
           len(l.get_all_layers(net)) - 1))

    # Classification Layer
    net = l.DenseLayer(net,
                       len(cfg.CLASSES),
                       nonlinearity=nonlinearity('identity'),
                       W=initialization('identity'))
    net = l.NonlinearityLayer(net, nonlinearity=nonlinearity('softmax'))

    log.i(("\tFINAL NET OUT SHAPE:", l.get_output_shape(net), "LAYER:",
           len(l.get_all_layers(net))))
    log.i("...DONE!")

    # Model stats
    log.i(("MODEL HAS",
           (sum(hasattr(layer, 'W')
                for layer in l.get_all_layers(net))), "WEIGHTED LAYERS"))
    log.i(("MODEL HAS", l.count_params(net), "PARAMS"))

    return net
Esempio n. 10
0
def getSpecBatches(split):

    # Random Seed
    random = cfg.getRandomState()

    # Make predictions for every testfile
    for t in split:

        # Spec batch
        spec_batch = []

        # Keep track of timestamps
        pred_start = 0

        # Get specs for file
        for spec in audio.specsFromFile(t[0],
                                        cfg.SAMPLE_RATE,
                                        cfg.SPEC_LENGTH,
                                        cfg.SPEC_OVERLAP,
                                        cfg.SPEC_MINLEN,
                                        shape=(cfg.IM_SIZE[1], cfg.IM_SIZE[0]),
                                        fmin=cfg.SPEC_FMIN,
                                        fmax=cfg.SPEC_FMAX):

            # Resize spec
            spec = image.resize(spec,
                                cfg.IM_SIZE[0],
                                cfg.IM_SIZE[1],
                                mode=cfg.RESIZE_MODE)

            # Normalize spec
            spec = image.normalize(spec, cfg.ZERO_CENTERED_NORMALIZATION)

            # Prepare as input
            spec = image.prepare(spec)

            # Add to batch
            if len(spec_batch) > 0:
                spec_batch = np.vstack((spec_batch, spec))
            else:
                spec_batch = spec

            # Batch too large?
            if spec_batch.shape[0] >= cfg.MAX_SPECS_PER_FILE:
                break

            # Do we have enough specs for a prediction?
            if len(spec_batch) >= cfg.SPECS_PER_PREDICTION:

                # Calculate next timestamp
                pred_end = pred_start + cfg.SPEC_LENGTH + (
                    (len(spec_batch) - 1) *
                    (cfg.SPEC_LENGTH - cfg.SPEC_OVERLAP))

                # Store prediction
                ts = getTimestamp(int(pred_start), int(pred_end))

                # Advance to next timestamp
                pred_start = pred_end - cfg.SPEC_OVERLAP

                yield spec_batch, t[1], ts, t[0].split(os.sep)[-1]

                # Spec batch
                spec_batch = []
Esempio n. 11
0
def resetRandomState():
    global RANDOM
    RANDOM = cfg.getRandomState()
Esempio n. 12
0
def train(NET, TRAIN, VAL):

    # Random Seed
    random = cfg.getRandomState()
    image.resetRandomState()

    # Load pretrained model
    if cfg.PRETRAINED_MODEL_NAME:
        snapshot = io.loadModel(cfg.PRETRAINED_MODEL_NAME)
        NET = io.loadParams(NET, snapshot['params'])

    # Load teacher models
    teach_funcs = []
    if len(cfg.TEACHER) > 0:
        for t in cfg.TEACHER:
            snapshot = io.loadModel(t)
            TEACHER = snapshot['net']
            teach_funcs.append(birdnet.test_function(TEACHER, hasTargets=False))

    # Compile Theano functions
    train_net = birdnet.train_function(NET)
    test_net = birdnet.test_function(NET)

    # Status
    log.i("START TRAINING...")

    # Train for some epochs...
    for epoch in range(cfg.EPOCH_START, cfg.EPOCHS + 1):

        try:

            # Stop?
            if cfg.DO_BREAK:
                break

            # Clear stats for every epoch
            stats.clearStats()
            stats.setValue('sample_count', len(TRAIN) + len(VAL))

            # Start timer
            stats.tic('epoch_time')
            
            # Shuffle dataset (this way we get "new" batches every epoch)
            TRAIN = shuffle(TRAIN, random_state=random)

            # Iterate over TRAIN batches of images
            for image_batch, target_batch in bg.nextBatch(TRAIN):

                # Show progress
                stats.showProgress(epoch)

                # If we have a teacher, we use that model to get new targets
                if len(teach_funcs) > 0:
                    target_batch = np.zeros((len(teach_funcs), target_batch.shape[0], target_batch.shape[1]), dtype='float32')
                    for i in range(len(teach_funcs)):
                        target_batch[i] = teach_funcs[i](image_batch)
                    target_batch = np.mean(target_batch, axis=0)
                
                # Calling the training functions returns the current loss
                loss = train_net(image_batch, target_batch, lr.dynamicLearningRate(cfg.LR_SCHEDULE, epoch))
                stats.setValue('train loss', loss, 'append')
                stats.setValue('batch_count', 1, 'add')

                # Stop?
                if cfg.DO_BREAK:
                    break

            # Iterate over VAL batches of images
            for image_batch, target_batch in bg.nextBatch(VAL, False, True):

                # Calling the test function returns the net output, loss and accuracy
                prediction_batch, loss, acc = test_net(image_batch, target_batch)
                stats.setValue('val loss', loss, 'append')
                stats.setValue('val acc', acc, 'append')
                stats.setValue('batch_count', 1, 'add')
                stats.setValue('lrap', [metrics.lrap(prediction_batch, target_batch)], 'add')

                # Show progress
                stats.showProgress(epoch)

                # Stop?
                if cfg.DO_BREAK:
                    break

            # Show stats for epoch
            stats.showProgress(epoch, done=True)
            stats.toc('epoch_time')
            log.r(('TRAIN LOSS:', np.mean(stats.getValue('train loss'))), new_line=False)
            log.r(('VAL LOSS:', np.mean(stats.getValue('val loss'))), new_line=False)
            log.r(('VAL ACC:', int(np.mean(stats.getValue('val acc')) * 10000) / 100.0, '%'), new_line=False)          
            log.r(('MLRAP:', int(np.mean(stats.getValue('lrap')) * 1000) / 1000.0), new_line=False)
            log.r(('TIME:', stats.getValue('epoch_time'), 's'))

            # Save snapshot?
            if not epoch % cfg.SNAPSHOT_EPOCHS:
                io.saveModel(NET, cfg.CLASSES, epoch)
                print('vish')
                io.saveParams(NET, cfg.CLASSES, epoch)

            # New best net?
            if np.mean(stats.getValue('lrap')) > stats.getValue('best_mlrap'):
                stats.setValue('best_net', NET, static=True)
                stats.setValue('best_epoch', epoch, static=True)
                stats.setValue('best_mlrap', np.mean(stats.getValue('lrap')), static=True)

            # Early stopping?
            if epoch - stats.getValue('best_epoch') >= cfg.EARLY_STOPPING_WAIT:
                log.i('EARLY STOPPING!')
                break

            # Stop?
            if cfg.DO_BREAK:
                break

        except KeyboardInterrupt:
            log.i('KeyboardInterrupt')
            cfg.DO_BREAK = True
            break

    # Status
    log.i('TRAINING DONE!')
    log.r(('BEST MLRAP:', stats.getValue('best_mlrap'), 'EPOCH:', stats.getValue('best_epoch')))

    # Save best model and return
    io.saveParams(stats.getValue('best_net'), cfg.CLASSES, stats.getValue('best_epoch'))
    print('in training vish')
    return io.saveModel(stats.getValue('best_net'), cfg.CLASSES, stats.getValue('best_epoch'))