예제 #1
0
 acc = []
 for i in range(20):
     n_classes = 4
     dataset = OneHotDataset(n_classes)
     a_xs = dataset.x
     a_ys = dataset.y
     v_xs = dataset.x
     v_ys = dataset.y
     # scale audio data to 0-1 range
     a_xs = MinMaxScaler().fit_transform(a_xs)
     v_xs = MinMaxScaler().fit_transform(v_xs)
     a_dim = len(a_xs[0])
     v_dim = len(v_xs[0])
     som_a = SOM(5,
                 5,
                 a_dim,
                 checkpoint_dir=soma_path,
                 n_iterations=100,
                 batch_size=4)
     som_v = SOM(5,
                 5,
                 v_dim,
                 checkpoint_dir=somv_path,
                 n_iterations=100,
                 batch_size=4)
     som_a.train(a_xs)
     som_v.train(v_xs)
     som_a.memorize_examples_by_class(a_xs, a_ys)
     som_v.memorize_examples_by_class(v_xs, v_ys)
     hebbian_model = HebbianModel(som_a,
                                  som_v,
                                  a_dim=a_dim,
 for i in range(0, 1):
     a_xs_train, a_ys_train, a_xs_test, a_ys_test = get_random_classes(
         a_xs, a_ys, classes, 10, 2)
     v_xs_train, v_ys_train, v_xs_test, v_ys_test = get_random_classes(
         v_xs, v_ys, classes, 10, 2)
     # print('shape audio input train', np.shape(a_xs_train))
     # print('shape audio labels train', np.shape(a_ys_train))
     # print('shape visual input train', np.shape(v_xs_train))
     # print('shape visual labels train', np.shape(v_ys_train))
     a_dim = len(a_xs[0])
     v_dim = len(v_xs[0])
     som_a = SOM(20,
                 30,
                 a_dim,
                 alpha=0.9,
                 sigma=30,
                 n_iterations=100,
                 tau=0.1,
                 threshold=0.6,
                 batch_size=1)
     type_file = 'visual_' + str(i + 1)
     som_v = SOM(20,
                 30,
                 v_dim,
                 alpha=0.7,
                 sigma=15,
                 n_iterations=100,
                 threshold=0.6,
                 batch_size=1,
                 data=type_file)
     som_a.train(a_xs_train,
예제 #3
0

if __name__ == '__main__':
    a_xs, a_ys = from_csv(audio_data_path)
    v_xs, v_ys = from_csv_visual_10classes(visual_data_path)
    # fix labels to 0-9 range
    a_ys = [int(y) - 1000 for y in a_ys]
    v_ys = [int(y) - 1000 for y in v_ys]
    # scale data to 0-1 range
    a_xs = MinMaxScaler().fit_transform(a_xs)
    v_xs = MinMaxScaler().fit_transform(v_xs)
    # create fake examples for audio
    a_xs, a_ys = create_dummy_audio_examples(a_xs, v_xs, a_ys, v_ys)
    a_dim = len(a_xs[0])
    v_dim = len(v_xs[0])
    som_a = SOM(20, 30, a_dim, checkpoint_dir=soma_path, n_iterations=200)
    som_v = SOM(20, 30, v_dim, checkpoint_dir=somv_path, n_iterations=200)
    som_a.restore_trained()
    som_v.restore_trained()
    for n in range(1, 15):
        hebbian_model = HebbianModel(som_a,
                                     som_v,
                                     a_dim=a_dim,
                                     v_dim=v_dim,
                                     n_presentations=n,
                                     checkpoint_dir=hebbian_path,
                                     tau=0.1,
                                     learning_rate=100)
        # create em folds
        v_ys = np.array(v_ys)
        v_xs = np.array(v_xs)
from utils.utils import load_data
from utils.utils import from_csv_with_filenames
from utils.constants import Constants
from sklearn.externals import joblib
import os
import logging
import numpy as np

csv_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_data.csv')
LOAD = True

if __name__ == '__main__':
    logging.info('Loading data')
    xs, ys, filenames = from_csv_with_filenames(csv_path)
    ys = [int(y) - 1000 for y in ys]
    vect_size = len(xs[0])
    audio_som = SOM(20,
                    30,
                    vect_size,
                    n_iterations=100,
                    checkpoint_dir=os.path.join(Constants.DATA_FOLDER,
                                                '10classes', 'audio_model',
                                                ''))
    if not LOAD:
        audio_som.train(xs)
    else:
        logging.info('Training som')
        audio_som.restore_trained()
    #audio_som.plot_som(xs, ys, plot_name='audio_som.png')
    showSom(audio_som, xs, ys, 1, 'Audio Map', filenames=filenames)
예제 #5
0
from models.som.SOM import SOM
from models.som.SOMTest import showSom
import numpy as np
from utils.constants import Constants
from utils.utils import from_csv_visual
from sklearn.preprocessing import MinMaxScaler
import os
import logging

visual_data_path = os.path.join(Constants.DATA_FOLDER, '10classes',
                                'VisualInputTrainingSet.csv')
N = 1000
lenExample = 2048

if __name__ == '__main__':
    v_xs, v_ys = from_csv_visual(visual_data_path)
    v_xs = MinMaxScaler().fit_transform(v_xs)

    som = SOM(20,
              30,
              lenExample,
              checkpoint_dir=os.path.join(Constants.DATA_FOLDER,
                                          'visual_model_mine', ''),
              n_iterations=100,
              sigma=4.0)

    som.restore_trained()

    showSom(som, v_xs, v_ys, 1, 'Visual map')
if __name__ == '__main__':
    acc = []
    for i in range(20):
        n_classes = 4
        dataset = OneHotDataset(n_classes)
        a_xs = dataset.x
        a_ys = dataset.y
        v_xs = dataset.x
        v_ys = dataset.y
        # scale audio data to 0-1 range
        a_xs = MinMaxScaler().fit_transform(a_xs)
        v_xs = MinMaxScaler().fit_transform(v_xs)
        a_dim = len(a_xs[0])
        v_dim = len(v_xs[0])
        som_a = SOM(5, 5, a_dim, n_iterations=100, batch_size=4)
        som_v = SOM(5, 5, v_dim, n_iterations=100, batch_size=4)
        som_a.train(a_xs, input_classes=v_ys)
        som_v.train(v_xs, input_classes=v_xs)
        som_a.memorize_examples_by_class(a_xs, a_ys)
        som_v.memorize_examples_by_class(v_xs, v_ys)
        hebbian_model = HebbianModel(som_a, som_v, a_dim=a_dim,
                                     v_dim=v_dim, n_presentations=1, learning_rate=1, n_classes=n_classes,
                                     checkpoint_dir=hebbian_path)
        print('Training...')
        hebbian_model.train(a_xs, v_xs)
        print('Evaluating...')
        accuracy = hebbian_model.evaluate(a_xs, v_xs, a_ys, v_ys, source='a', prediction_alg='regular')
        hebbian_model.make_plot(a_xs[0], v_xs[0], v_ys[0], v_xs, source='a')
        acc.append(accuracy)
        print('n={}, accuracy={}'.format(1, accuracy))
예제 #7
0
from utils.constants import Constants
from utils.utils import from_csv_with_filenames, from_csv_visual
import os

soma_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_model', '')
somv_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'visual_model',
                         '')
hebbian_path = os.path.join(Constants.DATA_FOLDER, '10classes',
                            'hebbian_model', '')
audio_data_path = os.path.join(Constants.DATA_FOLDER, '10classes',
                               'audio_data.csv')
visual_data_path = os.path.join(Constants.DATA_FOLDER, '10classes',
                                'VisualInputTrainingSet.csv')

if __name__ == '__main__':
    a_xs, a_ys, filenames = from_csv_with_filenames(audio_data_path)
    v_xs, v_ys = from_csv_visual(visual_data_path)
    a_dim = len(a_xs[0])
    v_dim = len(v_xs[0])
    som_a = SOM(20, 30, a_dim, checkpoint_dir=soma_path)
    som_v = SOM(20, 30, v_dim, checkpoint_dir=somv_path)
    som_a.restore_trained()
    som_v.restore_trained()
    hebbian_model = HebbianModel(som_a,
                                 som_v,
                                 a_dim=a_dim,
                                 v_dim=v_dim,
                                 n_presentations=10,
                                 checkpoint_dir=hebbian_path)
    hebbian_model.train(a_xs[:10], v_xs[:10])
    parser.add_argument('--alpha', metavar='alpha', type=float, default=0.0001, help='The SOM initial learning rate')
    parser.add_argument('--epochs', type=int, default=10000,
                        help='Number of epochs the SOM will be trained for')

    args = parser.parse_args()

    if args.data == 'audio':
        xs, ys, _ = from_csv_with_filenames(audio_data_path)
    elif args.data == 'video':
        xs, ys = from_csv_visual_100classes(visual_data_path)
    else:
        raise ValueError('--data argument not recognized')

    dim = len(xs[0])

    som = SOM(args.neurons1, args.neurons2, dim, n_iterations=args.epochs, alpha=args.alpha, checkpoint_loc=args.path,
                 tau=0.1, threshold=0.6, batch_size=args.batch, data=args.data, sigma=args.sigma)

    ys = np.array(ys)
    xs = np.array(xs)

    if args.subsample:
        xs, _, ys, _ = train_test_split(xs, ys, test_size=0.6, stratify=ys, random_state=args.seed)

    xs_train, xs_test, ys_train, ys_test = train_test_split(xs, ys, test_size=0.2, stratify=ys,
                                                            random_state=args.seed)

    xs_train, xs_val, ys_train, ys_val = train_test_split(xs_train, ys_train, test_size=0.5, stratify=ys_train,
                                                            random_state=args.seed)

    xs_train, xs_test = transform_data(xs_train, xs_val, rotation=args.rotation)
예제 #9
0
if __name__ == '__main__':
    a_xs, a_ys, _ = from_csv_with_filenames(audio_data_path)
    v_xs, v_ys = from_csv_visual(visual_data_path)
    # fix labels to 0-9 range
    a_ys = [int(y) - 1000 for y in a_ys]
    v_ys = [int(y) - 1000 for y in v_ys]
    # scale data to 0-1 range
    a_xs = MinMaxScaler().fit_transform(a_xs)
    v_xs = MinMaxScaler().fit_transform(v_xs)
    a_dim = len(a_xs[0])
    v_dim = len(v_xs[0])
    som_a = SOM(20,
                30,
                a_dim,
                checkpoint_dir=soma_path,
                n_iterations=200,
                tau=0.1,
                threshold=0.6)
    som_v = SOM(20,
                30,
                v_dim,
                checkpoint_dir=somv_path,
                n_iterations=200,
                tau=0.1,
                threshold=0.6)

    v_ys = np.array(v_ys)
    v_xs = np.array(v_xs)
    a_xs = np.array(a_xs)
    a_ys = np.array(a_ys)
예제 #10
0
from models.som.SOM import SOM
from models.som.wordLearningTest import iterativeTraining
from utils.constants import Constants
from utils.utils import from_csv_with_filenames
import os
"""
Train an auditive som, test it alongside the visual one
"""

somv_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'visual_model')

somu_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_model')

audio_data_path = os.path.join(Constants.DATA_FOLDER, '10classes',
                               'audio_data_40t.csv')

if __name__ == '__main__':
    xs, ys, filenames = from_csv_with_filenames(audio_data_path)
    vect_size = len(xs[0])
    audio_som = SOM(20,
                    30,
                    vect_size,
                    n_iterations=100,
                    checkpoint_dir=somu_path)
    audio_som.train(xs)
    iterativeTraining(somv_path, somu_path)
예제 #11
0
    parser.add_argument('--batch', type=int, default=128)

    args = parser.parse_args()


    if args.data == 'audio':
        xs, ys, _ = from_csv_with_filenames(audio_data_path)
    elif args.data == 'video':
        xs, ys = from_csv_visual_100classes(visual_data_path)
    else:
        raise ValueError('--data argument not recognized')

    dim = len(xs[0])

    som = SOM(args.neurons1, args.neurons2, dim, n_iterations=args.epochs, alpha=args.alpha,
                 tau=0.1, threshold=0.6, batch_size=args.batch, data=args.data, sigma=args.sigma,
                 num_classes=args.classes, sigma_decay='constant')

    ys = np.array(ys)
    xs = np.array(xs)

    if args.subsample:
        xs, _, ys, _ = train_test_split(xs, ys, test_size=0.6, stratify=ys, random_state=args.seed)
    print('Training on {} examples.'.format(len(xs)))

    xs_train, xs_test, ys_train, ys_test = train_test_split(xs, ys, test_size=0.2, stratify=ys,
                                                            random_state=args.seed)

    xs_train, xs_val, ys_train, ys_val = train_test_split(xs_train, ys_train, test_size=0.5, stratify=ys_train,
                                                            random_state=args.seed)
    parser.add_argument('--train', action='store_true', default=True)
    args = parser.parse_args()
    exp_description = 'lr' + str(
        args.lr) + '_algo_' + args.algo + '_source_' + args.source

    a_xs, a_ys, _ = from_csv_with_filenames(audio_data_path)
    v_xs, v_ys = from_csv_visual_10classes(visual_data_path)
    # fix labels to 0-9 range
    a_ys = [int(y) - 1000 for y in a_ys]
    v_ys = [int(y) - 1000 for y in v_ys]
    # scale data to 0-1 range
    a_xs = MinMaxScaler().fit_transform(a_xs)
    v_xs = MinMaxScaler().fit_transform(v_xs)
    a_dim = len(a_xs[0])
    v_dim = len(v_xs[0])
    som_a = SOM(20, 30, a_dim, n_iterations=100, tau=0.1, threshold=0.6)
    som_v = SOM(20, 30, v_dim, n_iterations=100, tau=0.1, threshold=0.6)

    v_ys = np.array(v_ys)
    v_xs = np.array(v_xs)
    a_xs = np.array(a_xs)
    a_ys = np.array(a_ys)
    a_xs_train, a_xs_test, a_ys_train, a_ys_test = train_test_split(
        a_xs, a_ys, test_size=0.2)
    v_xs_train, v_xs_test, v_ys_train, v_ys_test = train_test_split(
        v_xs, v_ys, test_size=0.2)
    a_xs_train, a_xs_dev, a_ys_train, a_ys_dev = train_test_split(
        a_xs, a_ys, test_size=0.2)
    v_xs_train, v_xs_dev, v_ys_train, v_ys_dev = train_test_split(
        v_xs, v_ys, test_size=0.2)
예제 #13
0
    nameInputs = list()
    with open(fInput, 'r') as inp:
        i = 0
        for line in inp:
            if len(line) > 2:
                inputs[i] = (np.array(line.split(',')[1:])).astype(np.float)
                nameInputs.append((line.split(',')[0]).split('/')[6])
                i = i + 1

    print(nameInputs[0])

    #get the 20x30 SOM or train a new one (if the folder does not contain the model)
    som = SOM(20,
              30,
              lenExample,
              checkpoint_dir=os.path.join(Constants.DATA_FOLDER,
                                          'VisualModel10classes/'),
              n_iterations=20,
              sigma=4.0)

    loaded = som.restore_trained()
    if not loaded:
        logging.info('Training SOM')
        som.train(inputs)

    for k in range(len(nameInputs)):
        nameInputs[k] = nameInputs[k].split('_')[0]

    #shows the SOM
    showSom(som, inputs, nameInputs, 1, 'Visual map')
    parser.add_argument(
        '--is-audio',
        action='store_true',
        default=False,
        help=
        'Specify whether the csv contains audio representations, as the loading functions are different.'
    )
    args = parser.parse_args()

    if not args.classes100:
        num_classes = 10
        if not args.is_audio:
            xs, ys = from_csv_visual_10classes(args.csv_path)
        else:
            xs, ys, _ = from_csv_with_filenames(args.csv_path)
        ys = [int(y) - 1000
              for y in ys]  # see comment in average_prototype_distance_matrix
    else:
        num_classes = 100
        if not args.is_audio:
            xs, ys = from_csv_visual_100classes(args.csv_path)
        else:
            xs, ys, _ = from_csv_with_filenames(args.csv_path)

    som = SOM(20, 30, len(xs[0]), checkpoint_dir=args.model_path)
    som.restore_trained()
    measure = class_compactness(som, xs, ys)
    print('Class Compactness: {}.'.format(measure))
    print('Avg Compactness: {}\n Variance: {}'.format(np.mean(measure),
                                                      np.var(measure)))