def populate_table(table_fname, features, labels, camNames, actNames,
                   partiNames):

    n_samples = labels.shape[0]
    pbar = start_progressbar(n_samples, '%i features to Pytable' % (n_samples))

    h5 = ta.openFile(table_fname, mode='a')
    table = h5.root.input_output_data.readout
    pp = table.row

    for i in xrange(n_samples):
        pp['frame_index'] = i
        pp['features'] = features[i, :]
        pp['label'] = labels[i]
        #pp['aviNames']    = aviNames[i][0:-4]
        pp['camNames'] = camNames[i]
        pp['actNames'] = actNames[i]
        pp['partiNames'] = partiNames[i]
        pp.append()
        update_progressbar(pbar, i)

    end_progressbar(pbar)
    # save everything in the file and close it
    table.cols.camNames.createIndex()
    table.cols.actNames.createIndex()
    table.cols.partiNames.createIndex()
    table.flush()
    h5.close()
def get_bfast_splits(table_fname, settings, n_samples = N_SAMPLES, n_features = N_FEATURES, n_lab = N_LAB, contig_labels = True):
    
    h5_all = ta.openFile(table_fname, mode = 'r')
    table_all = h5_all.root.input_output_data.readout

    train_p = settings['train_p']
    test_p = settings['test_p']
    cams = settings['cameras']

    #import ipdb; ipdb.set_trace()
    #row_train = [table_all.where("(partiNames == '%s') & (camNames == '%s')" % (ll, nn) ) for ll in train_p for nn in cams ]
    labels_train = []
    features_train = []
    labels_test = []
    features_test = []
    
    pbar = start_progressbar(len(train_p), str(len(train_p))+ ' training participants' )
    for jj,pat in enumerate(train_p):
        for cam in cams:
            labels_train = labels_train + [row['label'] for row in
                                           table_all.readWhere("(partiNames == '%s') & (camNames == '%s')" % (pat, cam)) ]
            features_train = features_train+ [row['features'] for row in table_all.readWhere("(partiNames == '%s') & (camNames == '%s')" % (pat, cam)) ]
        update_progressbar(pbar, jj)
    end_progressbar(pbar)

    pbar = start_progressbar(len(train_p), str(len(train_p))+ ' testing participants' )
    for jj,pat in enumerate(test_p):
        for cam in cams:
            labels_test = labels_test + [row['label'] for row in
                                           table_all.readWhere("(partiNames == '%s') & (camNames == '%s')" % (pat, cam)) ]
            features_test = features_test+ [row['features'] for row in table_all.readWhere("(partiNames == '%s') & (camNames == '%s')" % (pat, cam)) ]
        update_progressbar(pbar, jj)
    end_progressbar(pbar)

    tic = time.time()
    #uniqLabels = np.intersect1d(np.unique(labels_train), np.unique(labels_test))
    uniqLabels = np.unique(labels_train)
    #KILL UNUSED
    uniqLabels=uniqLabels[uniqLabels!='SIL']
    uniqLabels = uniqLabels[:n_lab]
    print 'using ',str(len(uniqLabels)),' labels in total'

    labels_train = np.array(labels_train)
    selector = np.zeros_like(labels_train, dtype= 'bool')
    for uL in uniqLabels:
        selector = np.squeeze(selector|[labels_train == uL])
    labels_train = labels_train[selector]
    features_train = np.array(features_train)[selector,:n_features]

    labels_test = np.array(labels_test)
    selector = np.zeros_like(labels_test, dtype= 'bool')
    for uL in uniqLabels:
        selector = np.squeeze(selector|[labels_test == uL])
    labels_test = labels_test[selector]
    features_test = np.array(features_test)[selector,:n_features]
    print "Loaded features converted in ", round(time.time() - tic) , "seconds"
    
    table_all.flush()
    h5_all.close()
    return features_train , labels_train, features_test, labels_test
def train_adaboost(features, labels, learning_rate, n_lab, n_runs, n_estim, n_samples):
    uniqLabels = np.unique(labels)
    print 'Taking ', str(n_lab), ' labels'
    uniqLabels = uniqLabels[:n_lab]
    used_labels = uniqLabels
    pbar = start_progressbar(len(uniqLabels), 'training adaboost for %i labels' %len(uniqLabels))
    allLearners = []
    for yy ,targetLab in enumerate(uniqLabels):
        runs=[]
        for rrr in xrange(n_runs):
            #import ipdb;ipdb.set_trace()
            feats,labs = get_binary_sets(features, labels, targetLab, n_samples)
            #print 'fitting stump'
            #import ipdb;ipdb.set_trace()
            baseClf = DecisionTreeClassifier(max_depth=4, min_samples_leaf=10, min_samples_split=10)
            baseClf.fit(feats, labs)
            ada_real = AdaBoostClassifier( base_estimator=baseClf, learning_rate=learning_rate,
                                      n_estimators=n_estim,
                                      algorithm="SAMME.R")
            #import ipdb;ipdb.set_trace()
            runs.append(ada_real.fit(feats, labs))
        allLearners.append(runs)
        update_progressbar(pbar, yy)
    end_progressbar(pbar)
    
    return allLearners, used_labels
def populate_table(table_fname, features, labels, camNames, actNames, partiNames):
    
    n_samples = labels.shape[0]
    pbar = start_progressbar(n_samples, '%i features to Pytable' % (n_samples))
    
    h5 = ta.openFile(table_fname, mode = 'a')
    table = h5.root.input_output_data.readout
    pp = table.row
    
    for i in xrange(n_samples):
        pp['frame_index'] = i
        pp['features']    = features[i, :]
        pp['label']       = labels[i]
        #pp['aviNames']    = aviNames[i][0:-4]
        pp['camNames']   = camNames[i]
        pp['actNames']   = actNames[i]
        pp['partiNames'] = partiNames[i]
        pp.append()
        update_progressbar(pbar, i)
    
    end_progressbar(pbar)
    # save everything in the file and close it
    table.cols.camNames.createIndex()
    table.cols.actNames.createIndex()
    table.cols.partiNames.createIndex()
    table.flush()
    h5.close()
def compute_confidence(allLearners, dada, classifier_type):
    #import ipdb;ipdb.set_trace()
    
    tic = time.time()
    #import ipdb;ipdb.set_trace()
    
    if classifier_type == 'adaboost':
        lab_confidence = np.zeros([dada.shape[0], len(allLearners)], dtype='float64')
        pbar = start_progressbar(len(allLearners), '%i producing weighted outputs' % len(allLearners))
        for ii,thisLab in enumerate(allLearners):
            res = np.zeros([dada.shape[0]], dtype='float64')
            for jj, thisLearner in enumerate(thisLab):
                my_weights = thisLearner.estimator_weights_
                #tic = time.time()
                for hh, thisEstimator in enumerate(thisLearner):
                    res = res+thisEstimator.predict(dada)*my_weights[hh]
                    #import ipdb;ipdb.set_trace()
            lab_confidence[:,ii] = np.float64(res)
            update_progressbar(pbar, ii)
        end_progressbar(pbar)
    
    if classifier_type == 'randomforest':
        #import ipdb;ipdb.set_trace()
        lab_confidence = np.zeros((dada.shape[0],len(allLearners[0].classes_)), dtype='float64')
        pbar = start_progressbar(len(allLearners), '%i producing weighted outputs' % len(allLearners[0].classes_))
        for ii, thisRun in enumerate(allLearners):
            lab_confidence +=  thisRun.predict_proba(dada)
            update_progressbar(pbar, ii)
        end_progressbar(pbar)

    return lab_confidence
Example #6
0
def train_adaboost(features, labels, learning_rate, n_lab, n_runs, n_estim,
                   n_samples):
    uniqLabels = np.unique(labels)
    print 'Taking ', str(n_lab), ' labels'
    uniqLabels = uniqLabels[:n_lab]
    used_labels = uniqLabels
    pbar = start_progressbar(
        len(uniqLabels), 'training adaboost for %i labels' % len(uniqLabels))
    allLearners = []
    for yy, targetLab in enumerate(uniqLabels):
        runs = []
        for rrr in xrange(n_runs):
            #import ipdb;ipdb.set_trace()
            feats, labs = get_binary_sets(features, labels, targetLab,
                                          n_samples)
            #print 'fitting stump'
            #import ipdb;ipdb.set_trace()
            baseClf = DecisionTreeClassifier(max_depth=1,
                                             min_samples_leaf=4,
                                             min_samples_split=4)
            baseClf.fit(feats, labs)
            ada_real = AdaBoostClassifier(base_estimator=baseClf,
                                          learning_rate=learning_rate,
                                          n_estimators=n_estim,
                                          algorithm="SAMME.R")
            #import ipdb;ipdb.set_trace()
            runs.append(ada_real.fit(feats, labs))
        allLearners.append(runs)
        update_progressbar(pbar, yy)
    end_progressbar(pbar)

    return allLearners, used_labels
Example #7
0
def getMonkeySplits_lim_old(table_fname,
                            splitNo,
                            n_samples=N_SAMPLES,
                            n_features=N_FEATURES):

    h5_tr = ta.openFile(table_fname + str(splitNo) + '_train.h5', mode='r')
    table_tr = h5_tr.root.input_output_data.readout
    h5_te = ta.openFile(table_fname + str(splitNo) + '_test.h5', mode='r')
    table_te = h5_te.root.input_output_data.readout
    print 'Converting arrays to sp'

    uniqLabels = np.unique(table_tr.cols.label)
    #KILL UNUSED
    uniqLabels = uniqLabels[uniqLabels != 'unused']

    labels_train = []
    features_train = []
    exctCnt = 0
    pbar = start_progressbar(len(uniqLabels),
                             [int(len(uniqLabels)), ' training labels'])
    for i, thisLab in enumerate(uniqLabels):
        tempLabels = [
            row['label'] for row in table_tr.where("label == thisLab")
        ]
        try:
            selInd = random.sample(range(0, len(tempLabels)), n_samples)
        except ValueError:
            selInd = range(0, len(tempLabels))
            exctCnt = exctCnt + 1
        labels_train = labels_train + [tempLabels[gg] for gg in selInd]
        tempFeatures = [
            row['features'][:][:n_features]
            for row in table_tr.where("label == thisLab")
        ]
        features_train = features_train + [tempFeatures[gg] for gg in selInd]
        update_progressbar(pbar, i)

    end_progressbar(pbar)
    print '%d exceptions occured' % (exctCnt)

    features_train = sp.array(features_train)[:, :n_features]
    labels_train = sp.array(labels_train)

    labels_test = sp.array(table_te.cols.label)
    #import ipdb; ipdb.set_trace()
    features_test = sp.array(
        table_te.cols.features)[labels_test != 'unused', :n_features]
    labels_test = labels_test[labels_test != 'unused']

    print 'Converted'
    #import ipdb; ipdb.set_trace()
    table_tr.flush()
    table_te.flush()
    h5_tr.close()
    h5_te.close()
    print "feature loading completed"
    return features_train, labels_train, features_test, labels_test
def split_data_from_table(table_fname, n_samples = N_SAMPLES, n_features = N_FEATURES):

    h5 = ta.openFile(table_fname, mode = 'r')
    table = h5.root.input_output_data.readout

    l_features = table.cols.features
    l_index  = table.cols.frame_index
    l_labels = table.cols.label
    import ipdb; ipdb.set_trace()
    
    n_samples_total = len(l_labels)
    assert(2*n_samples < n_samples_total)

    import warnings ; warnings.warn("""have something that takes a split depending on classes to keep it balanced""")

    #TODO: have a balanced split on each class
    ind_total = sp.random.permutation(n_samples_total)
    ind_train = ind_total[:n_samples]
    ind_test = ind_total[n_samples:2*n_samples]
    sp.array([(ind_train == test).any() for test in ind_test]).any()
    print "checked that train and test do not overlap"

    """
    features_train = features.T[ind_train, :n_features]
    labels_train = labels[ind_train]
    features_test = features.T[ind_test, :n_features]
    labels_test = labels[ind_test]
    """

    features_train = sp.zeros((n_samples, n_features), dtype = 'uint8')
    features_test = sp.zeros((n_samples, n_features), dtype = 'uint8')
    labels_train = []
    labels_test = []

    pbar = start_progressbar(len(ind_train), '%i train features' % (len(ind_train)))
    for i, ind in enumerate(ind_train):
        features_train[i] = l_features[ind][:n_features]
        labels_train.append(l_labels[ind])
        update_progressbar(pbar, i)
    end_progressbar(pbar)

    pbar = start_progressbar(len(ind_test), '%i test features' % (len(ind_test)))
    for i, ind in enumerate(ind_test):
        features_test[i] = l_features[ind][:n_features]
        labels_test.append(l_labels[i])
        update_progressbar(pbar, i)
    end_progressbar(pbar)

    labels_train = sp.array(labels_train)
    labels_test = sp.array(labels_test)

    table.flush()
    h5.close()

    return features_train , labels_train, features_test, labels_test
def getMonkeySplits_lim_old(table_fname, splitNo, n_samples = N_SAMPLES, n_features = N_FEATURES):
    
    h5_tr = ta.openFile(table_fname + str(splitNo) + '_train.h5', mode = 'r')
    table_tr = h5_tr.root.input_output_data.readout
    h5_te = ta.openFile(table_fname + str(splitNo) + '_test.h5', mode = 'r')
    table_te = h5_te.root.input_output_data.readout
    print 'Converting arrays to sp'
    
    uniqLabels = np.unique(table_tr.cols.label)
    #KILL UNUSED
    uniqLabels=uniqLabels[uniqLabels!='unused']
    
    labels_train = []
    features_train = []
    exctCnt = 0
    pbar = start_progressbar(len(uniqLabels), [ int(len(uniqLabels)), ' training labels'] )
    for i, thisLab in enumerate(uniqLabels):
        tempLabels = [row['label'] for row in table_tr.where("label == thisLab")]
        try:
            selInd = random.sample(range(0,len(tempLabels)), n_samples)
        except ValueError:
            selInd = range(0,len(tempLabels))
            exctCnt = exctCnt+1
        labels_train = labels_train + [tempLabels[gg] for gg in selInd]
        tempFeatures = [row['features'][:][:n_features] for row in table_tr.where("label == thisLab")]
        features_train = features_train + [tempFeatures[gg] for gg in selInd]
        update_progressbar(pbar, i)
    
    end_progressbar(pbar)
    print '%d exceptions occured' % (exctCnt)

    features_train = sp.array(features_train)[:,:n_features]
    labels_train = sp.array(labels_train)

    labels_test = sp.array(table_te.cols.label)
    #import ipdb; ipdb.set_trace()
    features_test = sp.array(table_te.cols.features)[labels_test!='unused',:n_features]
    labels_test=labels_test[labels_test!='unused']

    print 'Converted'
    #import ipdb; ipdb.set_trace()
    table_tr.flush()
    table_te.flush()
    h5_tr.close()
    h5_te.close()
    print "feature loading completed"
    return features_train , labels_train, features_test, labels_test
def populate_table(table_fname, features, labels, names):
    n_samples = labels.shape[0]
    pbar = start_progressbar(n_samples, '%i features to Pytable' % (n_samples))

    h5 = ta.openFile(table_fname, mode='a')
    table = h5.root.input_output_data.readout
    pp = table.row

    for i in xrange(n_samples):
        pp['features'] = features[i, :]
        pp['label'] = labels[i]
        pp.append()
        update_progressbar(pbar, i)

    end_progressbar(pbar)
    # save everything in the file and close it
    table.flush()
    h5.close()
def populate_table(table_fname, features, labels, names):
    n_samples = labels.shape[0]
    pbar = start_progressbar(n_samples, '%i features to Pytable' % (n_samples))
    
    h5 = ta.openFile(table_fname, mode = 'a')
    table = h5.root.input_output_data.readout
    pp = table.row
    
    for i in xrange(n_samples):
        pp['features']    = features[i,:]
        pp['label']       = labels[i]
        pp.append()
        update_progressbar(pbar, i)
    
    end_progressbar(pbar)
    # save everything in the file and close it
    table.flush()
    h5.close()
Example #12
0
def compute_confidence(allLearners, dada):
    #import ipdb;ipdb.set_trace()
    lab_confidence = np.zeros([dada.shape[0], len(allLearners)])
    tic = time.time()
    #import ipdb;ipdb.set_trace()
    pbar = start_progressbar(len(allLearners), '%i producing weighted outputs' % len(allLearners))
    
    for ii,thisLab in enumerate(allLearners):
        res = np.zeros([dada.shape[0]])
        for jj, thisLearner in enumerate(thisLab):
            for hh, thisEstimator in enumerate(thisLearner):
                #multiply the predictions with the weight of the learner
                res = res+thisEstimator.predict(dada)*thisLearner.estimator_weights_[hh]
        lab_confidence[:,ii] = res
        update_progressbar(pbar, ii)
    end_progressbar(pbar)
    print "time taken to produce confidence:", round(time.time() - tic,2), "seconds"
    #import ipdb;ipdb.set_trace()
    return lab_confidence
Example #13
0
def compute_confidence(allLearners, dada):
    #import ipdb;ipdb.set_trace()
    lab_confidence = np.zeros([dada.shape[0], len(allLearners)])
    tic = time.time()
    #import ipdb;ipdb.set_trace()
    pbar = start_progressbar(
        len(allLearners), '%i producing weighted outputs' % len(allLearners))

    for ii, thisLab in enumerate(allLearners):
        res = np.zeros([dada.shape[0]])
        for jj, thisLearner in enumerate(thisLab):
            for hh, thisEstimator in enumerate(thisLearner):
                #multiply the predictions with the weight of the learner
                res = res + thisEstimator.predict(
                    dada) * thisLearner.estimator_weights_[hh]
        lab_confidence[:, ii] = res
        update_progressbar(pbar, ii)
    end_progressbar(pbar)
    print "time taken to produce confidence:", round(time.time() - tic,
                                                     2), "seconds"
    #import ipdb;ipdb.set_trace()
    return lab_confidence
Example #14
0
def compute_confidence(allLearners, dada, classifier_type):
    #import ipdb;ipdb.set_trace()

    tic = time.time()
    #import ipdb;ipdb.set_trace()

    if classifier_type == 'adaboost':
        lab_confidence = np.zeros(
            [dada.shape[0], len(allLearners)], dtype='float64')
        pbar = start_progressbar(
            len(allLearners),
            '%i producing weighted outputs' % len(allLearners))
        for ii, thisLab in enumerate(allLearners):
            res = np.zeros([dada.shape[0]], dtype='float64')
            for jj, thisLearner in enumerate(thisLab):
                my_weights = thisLearner.estimator_weights_
                #tic = time.time()
                for hh, thisEstimator in enumerate(thisLearner):
                    res = res + thisEstimator.predict(dada) * my_weights[hh]
                    #import ipdb;ipdb.set_trace()
            lab_confidence[:, ii] = np.float64(res)
            update_progressbar(pbar, ii)
        end_progressbar(pbar)

    if classifier_type == 'randomforest':
        #import ipdb;ipdb.set_trace()
        lab_confidence = np.zeros(
            (dada.shape[0], len(allLearners[0].classes_)), dtype='float64')
        pbar = start_progressbar(
            len(allLearners),
            '%i producing weighted outputs' % len(allLearners[0].classes_))
        for ii, thisRun in enumerate(allLearners):
            lab_confidence += thisRun.predict_proba(dada)
            update_progressbar(pbar, ii)
        end_progressbar(pbar)

    return lab_confidence
def euclidien(features1, features2=None):
    """
    Builds a similarity matrix based ont eh euclidien distance
    """
    if features2 is None:
        features2 = features1

    nfeat1 = len(features1)
    nfeat2 = len(features2)

    # go
    kernelmatrix = sp.empty((nfeat1, nfeat2), dtype="float")

    if features1 is features2:

        # set up progress bar
        n_iter = 0
        niter = nfeat1 * (nfeat2 + 1) / 2
        pbar = start_progressbar(niter, "Kernel Train")

        for ifeat1, feat1 in enumerate(features1):

            a_2 = (feat1**2.).sum()

            for ifeat2, feat2 in enumerate(features2):

                if ifeat1 == ifeat2:
                    kernelmatrix[ifeat1, ifeat2] = 0

                # mattrix symmetric, do only top triangle
                elif ifeat1 > ifeat2:

                    a_b = sp.dot(feat1, feat2.T)

                    b_2 = (feat2**2.).sum()
                    dist = (a_2 - 2 * a_b + b_2)

                    # since kernel matrix is symmetric
                    kernelmatrix[ifeat1, ifeat2] = dist
                    kernelmatrix[ifeat2, ifeat1] = dist

                    update_progressbar(pbar, n_iter + 1)
                    n_iter += 1
    else:

        # set up progress bar
        n_iter = 0
        niter = nfeat1 * nfeat2
        pbar = start_progressbar(niter, "Kernel Test")

        for ifeat1, feat1 in enumerate(features1):

            a_2 = (feat1**2.).sum()

            for ifeat2, feat2 in enumerate(features2):

                a_b = sp.dot(feat1, feat2.T)

                b_2 = (feat2**2.).sum()
                dist = (a_2 - 2 * a_b + b_2)

                kernelmatrix[ifeat1, ifeat2] = dist

                pbar = update_progressbar(pbar, n_iter)
                n_iter += 1

    end_progressbar(pbar)

    return kernelmatrix
def get_monkey_splits_lim(table_fname, splitNo, n_samples = N_SAMPLES, n_features = N_FEATURES, n_lab = N_LAB, contig_labels = True):
    
    h5_tr = ta.openFile(table_fname + str(splitNo) + '_train.h5', mode = 'r')
    table_tr = h5_tr.root.input_output_data.readout
    h5_te = ta.openFile(table_fname + str(splitNo) + '_test.h5', mode = 'r')
    table_te = h5_te.root.input_output_data.readout
    
#    if label_focus == 'test':
#        uniqLabels = np.unique(table_te.cols.label)
#    else:
#        uniqLabels = np.unique(table_tr.cols.label)

    uniqLabels = np.intersect1d(np.unique(table_te.cols.label), np.unique(table_tr.cols.label))
    
    #KILL UNUSED
    uniqLabels=uniqLabels[uniqLabels!='unused']
    uniqLabels = uniqLabels[:n_lab]
    
    labels_train = []
    features_train = []
    exctCnt = 0
    pbar = start_progressbar(len(uniqLabels), 'fetching %i training labels' %len(uniqLabels))
    
    for i, thisLab in enumerate(uniqLabels):
        tempLabels = [row['label'] for row in table_tr.where("label == thisLab")]
        if contig_labels:
                toThis = min(len(tempLabels), n_samples)
                selInd = range(0,toThis)
        else:
            try:
                selInd = random.sample(range(0,len(tempLabels)), n_samples)
            except ValueError:
                selInd = range(0,len(tempLabels))
                exctCnt = exctCnt+1
        labels_train = labels_train + [tempLabels[gg] for gg in selInd]
        tempFeatures = [row['features'][:][:n_features] for row in table_tr.where("label == thisLab")]
        features_train = features_train + [tempFeatures[gg] for gg in selInd]
        
        update_progressbar(pbar, i)
    
    end_progressbar(pbar)
    #import ipdb; ipdb.set_trace()
    print '%d exceptions occured' % (exctCnt)
    
    pbar = start_progressbar(len(uniqLabels), 'fetching %i testing labels' %len(uniqLabels))
    labels_test = []
    features_test = []
    for i, thisLab in enumerate(uniqLabels):
        tempLabels = [row['label'] for row in table_te.where("label == thisLab")]
        labels_test = labels_test + tempLabels
        tempFeatures = [row['features'][:][:n_features] for row in table_te.where("label == thisLab")]
        features_test = features_test + tempFeatures
        update_progressbar(pbar, i)
    end_progressbar(pbar)

    features_train = sp.array(features_train)[:,:n_features]
    labels_train = sp.array(labels_train)
    features_test = sp.array(features_test)[:,:n_features]
    labels_test = sp.array(labels_test)
    print 'Converted'

    table_tr.flush()
    table_te.flush()
    h5_tr.close()
    h5_te.close()
    print "feature loading completed"
    return features_train , labels_train, features_test, labels_test
def get_HMDB_splits(table_fname, vidName, vidMode, n_samples = N_SAMPLES, n_features = N_FEATURES):

    h5 = ta.openFile(table_fname, mode = 'r')
    table = h5.root.input_output_data.readout
    
    l_features = table.cols.features
    l_index  = table.cols.frame_index
    l_labels = table.cols.label
    l_aviNames = table.cols.aviNames
    #    assert(2*n_samples < n_samples_total)

    trVidName = [];
    teVidName = [];
    noVidName = [];
    for j in range(0, len(vidMode)-1):
        innerVidMode = vidMode[j]
        #import ipdb; ipdb.set_trace()
        trVidName = trVidName + [vidName[j][i] for i, x in enumerate(innerVidMode) if x == '1']
        teVidName = teVidName + [vidName[j][i] for i, x in enumerate(innerVidMode) if x == '2']
        noVidName = noVidName + [vidName[j][i] for i, x in enumerate(innerVidMode) if x == '0']
    
    features_train =[]
    labels_train = []
    
    features_test = []
    labels_test = []

#import ipdb; ipdb.set_trace()
    
    exctCnt = 0
    pbar = start_progressbar(len(trVidName), '%i train features' % (len(trVidName)))
    for i, vid in enumerate(trVidName):
        tempLabels = [row['label'] for row in table.where("aviNames == vid")]
        try:
            selInd = random.sample(range(0,len(tempLabels)), n_samples)
        except ValueError:
            selInd = range(0,len(tempLabels))
            exctCnt = exctCnt+1
        labels_train = labels_train + [tempLabels[gg] for gg in selInd]
        tempFeatures = [row['features'][:][:n_features] for row in table.where("aviNames == vid")]
        features_train = features_train + [tempFeatures[gg] for gg in selInd]
        update_progressbar(pbar, i)
        
    end_progressbar(pbar)
    
    print'finished with %i exceptions' % (exctCnt)
    
    pbar = start_progressbar(len(teVidName), '%i test features' % (len(teVidName)))
    for i, vid in enumerate(teVidName):
        labels_test = labels_test + [row['label'] for row in table.where("aviNames == vid")]
        features_test = features_test + [row['features'][:][:n_features] for row in table.where("aviNames == vid")]
        update_progressbar(pbar, i)
    end_progressbar(pbar)

    print 'Converting arrays to sp'
    features_train = sp.array(features_train, dtype = 'uint8')
    features_test = sp.array(features_test, dtype = 'uint8')
    labels_train = sp.array(labels_train)
    labels_test = sp.array(labels_test)
    print 'Converted'
    
    table.flush()
    h5.close()
    print "feature loading completed"
    return features_train , labels_train, features_test, labels_test
def euclidien(features1, features2 = None):
    """
    Builds a similarity matrix based ont eh euclidien distance
    """
    if features2 is None:
        features2 = features1

    nfeat1 = len(features1)
    nfeat2 = len(features2)

    # go
    kernelmatrix = sp.empty((nfeat1, nfeat2), dtype="float")

    if features1 is features2:

        # set up progress bar
        n_iter = 0
        niter = nfeat1 * (nfeat2+1) / 2
        pbar = start_progressbar(niter, "Kernel Train")

        for ifeat1, feat1 in enumerate(features1):

            a_2 = (feat1**2.).sum()

            for ifeat2, feat2 in enumerate(features2):

                if ifeat1 == ifeat2:
                    kernelmatrix[ifeat1, ifeat2] = 0

                # mattrix symmetric, do only top triangle
                elif ifeat1 > ifeat2:

                    a_b = sp.dot(feat1, feat2.T)

                    b_2 = (feat2**2.).sum()
                    dist = (a_2 - 2 *a_b + b_2)

                    # since kernel matrix is symmetric
                    kernelmatrix[ifeat1, ifeat2] = dist
                    kernelmatrix[ifeat2, ifeat1] = dist

                    update_progressbar(pbar, n_iter+1)
                    n_iter += 1
    else:

        # set up progress bar
        n_iter = 0
        niter = nfeat1 * nfeat2
        pbar = start_progressbar(niter, "Kernel Test")

        for ifeat1, feat1 in enumerate(features1):

            a_2 = (feat1**2.).sum()

            for ifeat2, feat2 in enumerate(features2):

                a_b = sp.dot(feat1, feat2.T)

                b_2 = (feat2**2.).sum()
                dist = (a_2 - 2 *a_b + b_2)

                kernelmatrix[ifeat1, ifeat2] = dist

                pbar = update_progressbar(pbar, n_iter)
                n_iter += 1

    end_progressbar(pbar)

    return kernelmatrix
Example #19
0
def get_bfast_splits(table_fname,
                     settings,
                     n_samples=N_SAMPLES,
                     n_features=N_FEATURES,
                     n_lab=N_LAB,
                     contig_labels=True):

    h5_all = ta.openFile(table_fname, mode='r')
    table_all = h5_all.root.input_output_data.readout

    train_p = settings['train_p']
    test_p = settings['test_p']
    cams = settings['cameras']

    #import ipdb; ipdb.set_trace()
    #row_train = [table_all.where("(partiNames == '%s') & (camNames == '%s')" % (ll, nn) ) for ll in train_p for nn in cams ]
    labels_train = []
    features_train = []
    labels_test = []
    features_test = []

    pbar = start_progressbar(len(train_p),
                             str(len(train_p)) + ' training participants')
    for jj, pat in enumerate(train_p):
        for cam in cams:
            labels_train = labels_train + [
                row['label'] for row in table_all.readWhere(
                    "(partiNames == '%s') & (camNames == '%s')" % (pat, cam))
            ]
            features_train = features_train + [
                row['features'] for row in table_all.readWhere(
                    "(partiNames == '%s') & (camNames == '%s')" % (pat, cam))
            ]
        update_progressbar(pbar, jj)
    end_progressbar(pbar)

    pbar = start_progressbar(len(train_p),
                             str(len(train_p)) + ' testing participants')
    for jj, pat in enumerate(test_p):
        for cam in cams:
            labels_test = labels_test + [
                row['label'] for row in table_all.readWhere(
                    "(partiNames == '%s') & (camNames == '%s')" % (pat, cam))
            ]
            features_test = features_test + [
                row['features'] for row in table_all.readWhere(
                    "(partiNames == '%s') & (camNames == '%s')" % (pat, cam))
            ]
        update_progressbar(pbar, jj)
    end_progressbar(pbar)

    tic = time.time()
    #uniqLabels = np.intersect1d(np.unique(labels_train), np.unique(labels_test))
    uniqLabels = np.unique(labels_train)
    #KILL UNUSED
    uniqLabels = uniqLabels[uniqLabels != 'SIL']
    uniqLabels = uniqLabels[:n_lab]
    print 'using ', str(len(uniqLabels)), ' labels in total'

    labels_train = np.array(labels_train)
    selector = np.zeros_like(labels_train, dtype='bool')
    for uL in uniqLabels:
        selector = np.squeeze(selector | [labels_train == uL])
    labels_train = labels_train[selector]
    features_train = np.array(features_train)[selector, :n_features]

    labels_test = np.array(labels_test)
    selector = np.zeros_like(labels_test, dtype='bool')
    for uL in uniqLabels:
        selector = np.squeeze(selector | [labels_test == uL])
    labels_test = labels_test[selector]
    features_test = np.array(features_test)[selector, :n_features]
    print "Loaded features converted in ", round(time.time() - tic), "seconds"

    table_all.flush()
    h5_all.close()
    return features_train, labels_train, features_test, labels_test
Example #20
0
def get_bfast_splits(table_fname, settings, n_samples = N_SAMPLES, n_features = N_FEATURES, n_lab = N_LAB, contig_labels = True):
    
    h5_all = ta.openFile(table_fname, mode = 'r')
    table_all = h5_all.root.input_output_data.readout

    train_p = settings['train_p']
    test_p = settings['test_p']
    cams = settings['cameras']
    cur_cam = settings['cur_cam']
    lab_per_cam = {}
    
    print 'figuring out the shared labels between cams'
    for cam in cams:
        lab_per_pat = []
        for pat in train_p:
            rowz = [row['label'] for row in table_all.readWhere("(partiNames == '%s') & (camNames == '%s')" % (pat, cam)) ]
            
            lab_per_pat += list(np.unique(rowz))
        lab_per_cam[cam] = np.unique(lab_per_pat)
    #import ipdb; ipdb.set_trace()
            
    aaa = lab_per_cam.values() #avoiding a possible bug here by making sure we take only the labels that exist for all cams
    labs_for_all = set(aaa[0]).intersection(*aaa) # will use this towards the end to weed out un-shared ones

    len_data = 0
    print 'figuring out how many training samples we have for the cam ', cur_cam
    for pat in train_p:
        rowz = [row['label'] for row in table_all.readWhere("(partiNames == '%s') & (camNames == '%s')" % (pat, cur_cam)) ]
        len_data += len(rowz)
    
    features_train = np.empty((len_data,n_features), dtype='float64')
    labels_train = np.empty(len_data, dtype='|S24')
    
    cnt =0
    pbar = start_progressbar(len(train_p), str(len(train_p))+ ' training participants loading for cam '+cur_cam )
    for jj,pat in enumerate(train_p):
        temp = [row['features'] for row in table_all.readWhere("(partiNames == '%s') & (camNames == '%s')" % (pat, cur_cam)) ]
        temp2 =  [row['label'] for row in table_all.readWhere("(partiNames == '%s') & (camNames == '%s')" % (pat, cur_cam)) ]
        temp = [roww[:n_features] for roww in temp]
        if temp:
            #features_train[cnt:cnt+len(temp),:] = np.array(temp)[:,:n_features]
            features_train[cnt:cnt+len(temp),:] = temp
            labels_train[cnt:cnt+len(temp)] = temp2
            cnt = cnt+len(temp)
        update_progressbar(pbar, jj)
    end_progressbar(pbar)


    len_data = 0
    pbar = start_progressbar(len(test_p), ' now figuring out how many test samples we have' )
    for jj,pat in enumerate(test_p):
        for cam in cams:
            len_data += len([row['label'] for row in table_all.readWhere("(partiNames == '%s') & (camNames == '%s')" % (pat, cam)) ])
        update_progressbar(pbar, jj)
    end_progressbar(pbar)

    features_test = np.empty((len_data,n_features), dtype='float64')
    labels_test = np.empty(len_data, dtype='|S24')

    cnt =0
    pbar2 = start_progressbar(len(test_p), str(len(test_p))+ ' testing participants loading' )
    for jj,pat in enumerate(test_p):
        for cam in cams:
            temp = [row['features'] for row in table_all.readWhere("(partiNames == '%s') & (camNames == '%s')" % (pat, cam)) ]
            temp2 =  [row['label'] for row in table_all.readWhere("(partiNames == '%s') & (camNames == '%s')" % (pat, cam)) ]
            temp = [roww[:n_features] for roww in temp]
            if temp:
                #features_train[cnt:cnt+len(temp),:] = np.array(temp)[:,:n_features]
                features_test[cnt:cnt+len(temp),:] = temp
                labels_test[cnt:cnt+len(temp)] = temp2
                #import ipdb; ipdb.set_trace()
                cnt = cnt+len(temp)
        update_progressbar(pbar2, jj)
    end_progressbar(pbar2)

    tic = time.time()
            
    uniqLabels = np.intersect1d(labs_for_all, np.unique(labels_test))
    #KILL UNUSED
    uniqLabels = uniqLabels[uniqLabels!='SIL']
    
    uniqLabels = uniqLabels[:n_lab]
    print 'using ',str(len(uniqLabels)),' labels in total'


    labels_train = np.array(labels_train)
    selector = np.zeros_like(labels_train, dtype= 'bool')
    excpt = 0
    
    for uL in uniqLabels:
        label_all = labels_train == uL
        label_all_subs = np.where(label_all)[0]
        if  label_all_subs.shape >= n_samples:
            label_some_subs = label_all_subs[:n_samples]
        else:
            excpt += 1
            label_some_subs = label_all_subs
        label_lim = np.zeros_like(label_all,dtype='bool')
        label_lim[label_some_subs] = True
        selector = np.squeeze(selector|[label_lim])
    labels_train = labels_train[selector]
    features_train = features_train[selector,:n_features]

    labels_test = np.array(labels_test)
    selector = np.zeros_like(labels_test, dtype= 'bool')
    for uL in uniqLabels:
        selector = np.squeeze(selector|[labels_test == uL])
    labels_test = labels_test[selector]
    features_test = features_test[selector,:n_features]
    print "Loaded features converted in ", round(time.time() - tic) , "seconds"
    print "there were ", str(excpt), " exceptions "
    
    table_all.flush()
    h5_all.close()

    labels_train = group_labels(labels_train)
    labels_test = group_labels(labels_test)
    print 'using ',str(len(labels_test)),' labels in total'
    return features_train , labels_train, features_test, labels_test 
def split_data_from_table(table_fname,
                          n_samples=N_SAMPLES,
                          n_features=N_FEATURES):

    h5 = ta.openFile(table_fname, mode='r')
    table = h5.root.input_output_data.readout

    l_features = table.cols.features
    l_index = table.cols.frame_index
    l_labels = table.cols.label
    import ipdb
    ipdb.set_trace()

    n_samples_total = len(l_labels)
    assert (2 * n_samples < n_samples_total)

    import warnings
    warnings.warn(
        """have something that takes a split depending on classes to keep it balanced"""
    )

    #TODO: have a balanced split on each class
    ind_total = sp.random.permutation(n_samples_total)
    ind_train = ind_total[:n_samples]
    ind_test = ind_total[n_samples:2 * n_samples]
    sp.array([(ind_train == test).any() for test in ind_test]).any()
    print "checked that train and test do not overlap"
    """
    features_train = features.T[ind_train, :n_features]
    labels_train = labels[ind_train]
    features_test = features.T[ind_test, :n_features]
    labels_test = labels[ind_test]
    """

    features_train = sp.zeros((n_samples, n_features), dtype='uint8')
    features_test = sp.zeros((n_samples, n_features), dtype='uint8')
    labels_train = []
    labels_test = []

    pbar = start_progressbar(len(ind_train),
                             '%i train features' % (len(ind_train)))
    for i, ind in enumerate(ind_train):
        features_train[i] = l_features[ind][:n_features]
        labels_train.append(l_labels[ind])
        update_progressbar(pbar, i)
    end_progressbar(pbar)

    pbar = start_progressbar(len(ind_test),
                             '%i test features' % (len(ind_test)))
    for i, ind in enumerate(ind_test):
        features_test[i] = l_features[ind][:n_features]
        labels_test.append(l_labels[i])
        update_progressbar(pbar, i)
    end_progressbar(pbar)

    labels_train = sp.array(labels_train)
    labels_test = sp.array(labels_test)

    table.flush()
    h5.close()

    return features_train, labels_train, features_test, labels_test
Example #22
0
def get_monkey_splits_lim(table_fname,
                          splitNo,
                          n_samples=N_SAMPLES,
                          n_features=N_FEATURES,
                          n_lab=N_LAB,
                          contig_labels=True):

    h5_tr = ta.openFile(table_fname + str(splitNo) + '_train.h5', mode='r')
    table_tr = h5_tr.root.input_output_data.readout
    h5_te = ta.openFile(table_fname + str(splitNo) + '_test.h5', mode='r')
    table_te = h5_te.root.input_output_data.readout

    #    if label_focus == 'test':
    #        uniqLabels = np.unique(table_te.cols.label)
    #    else:
    #        uniqLabels = np.unique(table_tr.cols.label)

    uniqLabels = np.intersect1d(np.unique(table_te.cols.label),
                                np.unique(table_tr.cols.label))

    #KILL UNUSED
    uniqLabels = uniqLabels[uniqLabels != 'unused']
    uniqLabels = uniqLabels[:n_lab]

    labels_train = []
    features_train = []
    exctCnt = 0
    pbar = start_progressbar(len(uniqLabels),
                             'fetching %i training labels' % len(uniqLabels))

    for i, thisLab in enumerate(uniqLabels):
        tempLabels = [
            row['label'] for row in table_tr.where("label == thisLab")
        ]
        if contig_labels:
            toThis = min(len(tempLabels), n_samples)
            selInd = range(0, toThis)
        else:
            try:
                selInd = random.sample(range(0, len(tempLabels)), n_samples)
            except ValueError:
                selInd = range(0, len(tempLabels))
                exctCnt = exctCnt + 1
        labels_train = labels_train + [tempLabels[gg] for gg in selInd]
        tempFeatures = [
            row['features'][:][:n_features]
            for row in table_tr.where("label == thisLab")
        ]
        features_train = features_train + [tempFeatures[gg] for gg in selInd]

        update_progressbar(pbar, i)

    end_progressbar(pbar)
    #import ipdb; ipdb.set_trace()
    print '%d exceptions occured' % (exctCnt)

    pbar = start_progressbar(len(uniqLabels),
                             'fetching %i testing labels' % len(uniqLabels))
    labels_test = []
    features_test = []
    for i, thisLab in enumerate(uniqLabels):
        tempLabels = [
            row['label'] for row in table_te.where("label == thisLab")
        ]
        labels_test = labels_test + tempLabels
        tempFeatures = [
            row['features'][:][:n_features]
            for row in table_te.where("label == thisLab")
        ]
        features_test = features_test + tempFeatures
        update_progressbar(pbar, i)
    end_progressbar(pbar)

    features_train = sp.array(features_train)[:, :n_features]
    labels_train = sp.array(labels_train)
    features_test = sp.array(features_test)[:, :n_features]
    labels_test = sp.array(labels_test)
    print 'Converted'

    table_tr.flush()
    table_te.flush()
    h5_tr.close()
    h5_te.close()
    print "feature loading completed"
    return features_train, labels_train, features_test, labels_test
Example #23
0
def get_HMDB_splits(table_fname,
                    vidName,
                    vidMode,
                    n_samples=N_SAMPLES,
                    n_features=N_FEATURES):

    h5 = ta.openFile(table_fname, mode='r')
    table = h5.root.input_output_data.readout

    l_features = table.cols.features
    l_index = table.cols.frame_index
    l_labels = table.cols.label
    l_aviNames = table.cols.aviNames
    #    assert(2*n_samples < n_samples_total)

    trVidName = []
    teVidName = []
    noVidName = []
    for j in range(0, len(vidMode) - 1):
        innerVidMode = vidMode[j]
        #import ipdb; ipdb.set_trace()
        trVidName = trVidName + [
            vidName[j][i] for i, x in enumerate(innerVidMode) if x == '1'
        ]
        teVidName = teVidName + [
            vidName[j][i] for i, x in enumerate(innerVidMode) if x == '2'
        ]
        noVidName = noVidName + [
            vidName[j][i] for i, x in enumerate(innerVidMode) if x == '0'
        ]

    features_train = []
    labels_train = []

    features_test = []
    labels_test = []

    #import ipdb; ipdb.set_trace()

    exctCnt = 0
    pbar = start_progressbar(len(trVidName),
                             '%i train features' % (len(trVidName)))
    for i, vid in enumerate(trVidName):
        tempLabels = [row['label'] for row in table.where("aviNames == vid")]
        try:
            selInd = random.sample(range(0, len(tempLabels)), n_samples)
        except ValueError:
            selInd = range(0, len(tempLabels))
            exctCnt = exctCnt + 1
        labels_train = labels_train + [tempLabels[gg] for gg in selInd]
        tempFeatures = [
            row['features'][:][:n_features]
            for row in table.where("aviNames == vid")
        ]
        features_train = features_train + [tempFeatures[gg] for gg in selInd]
        update_progressbar(pbar, i)

    end_progressbar(pbar)

    print 'finished with %i exceptions' % (exctCnt)

    pbar = start_progressbar(len(teVidName),
                             '%i test features' % (len(teVidName)))
    for i, vid in enumerate(teVidName):
        labels_test = labels_test + [
            row['label'] for row in table.where("aviNames == vid")
        ]
        features_test = features_test + [
            row['features'][:][:n_features]
            for row in table.where("aviNames == vid")
        ]
        update_progressbar(pbar, i)
    end_progressbar(pbar)

    print 'Converting arrays to sp'
    features_train = sp.array(features_train, dtype='uint8')
    features_test = sp.array(features_test, dtype='uint8')
    labels_train = sp.array(labels_train)
    labels_test = sp.array(labels_test)
    print 'Converted'

    table.flush()
    h5.close()
    print "feature loading completed"
    return features_train, labels_train, features_test, labels_test