Ejemplo n.º 1
0
def train(cfg):

    model = IBnet(cfg)

    dataset = IBDataset(cfg['NAME'])

    dataloader = DataLoader(dataset,
                            batch_size=cfg['SGD_BATCHSIZE'],
                            shuffle=False,
                            num_workers=0)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=cfg['SGD_LEARNINGRATE'])
    Loss = torch.nn.CrossEntropyLoss()

    full = construct_full_dataset(dataset.trn, dataset.tst)

    for epoch in range(cfg['NUM_EPOCHS']):

        for batch_id, (data, label) in enumerate(dataloader):
            output = model(data)
            loss = Loss(output, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if batch_id == 0:  # every epoch report once
                max_element = model.report(full.X, epoch)
                if max_element > cfg['MAX_ELEMENT']:
                    cfg['MAX_ELEMENT'] = max_element

        if epoch in cfg['EPOCHS']:
            test_accuracy = model.get_accuracy(dataset.tst.X, dataset.tst.y)
            print('epoch:{}, loss:{}, test_accuracy:{}'.format(
                epoch, loss, test_accuracy))
Ejemplo n.º 2
0
    def __init__(self, train, test, log_dir='./logs', embeddings_freq=10):
        """
        Args:
            train: The training data
            test: The test data
            log_dir: Path to directory used for logging
            embeddings_freq: Defines how often embedding variables will be saved to
                the log directory. If set to 1, this is done every epoch, if it is set to 10 every 10th epoch and so forth.
        """
        super().__init__()

        self.sess = K.get_session()

        self.log_dir = log_dir
        self.embeddings_freq = embeddings_freq

        self.writer = tf_summary.FileWriter(self.log_dir)
        self.saver: tf.train.Saver

        self.embeddings_ckpt_path = os.path.join(self.log_dir,
                                                 'keras_embedding.ckpt')

        self.train = train
        self.test = test
        self.full = utils.construct_full_dataset(train, test)

        # Save metadata.
        np.savetxt(f'{log_dir}/metadata.tsv', self.test.y, fmt='%i')
    def __init__(self, cfg, trn, tst, do_save_func=None, *kargs, **kwargs):
        super(LoggingReporter, self).__init__(*kargs, **kwargs)
        self.cfg = cfg # Configuration options dictionary
        self.trn = trn  # Train data
        self.tst = tst  # Test data

        if 'FULL_MI' not in cfg:
            self.cfg['FULL_MI'] = False # Whether to compute MI on train and test data, or just test

        if self.cfg['FULL_MI']:
            self.full = utils.construct_full_dataset(trn,tst)

        # do_save_func(epoch) should return True if we should save on that epoch
        self.do_save_func = do_save_func
entropy_func_lower = K.function([
    Klayer_activity,
], [
    kde.entropy_estimator_bd(Klayer_activity, noise_variance),
])

# nats to bits conversion factor
nats2bits = 1.0 / np.log(2)

# Save indexes of tests data for each of the output classes
saved_labelixs = {}

y = tst.y
Y = tst.Y
if FULL_MI:
    full = utils.construct_full_dataset(trn, tst)
    y = full.y
    Y = full.Y

for i in range(NUM_LABELS):
    saved_labelixs[i] = y == i

labelprobs = np.mean(Y, axis=0)

# In[14]:

PLOT_LAYERS = None  # Which layers to plot.  If None, all saved layers are plotted

# Data structure used to store results
measures = OrderedDict()
measures['tanh'] = {}
Ejemplo n.º 5
0
def computeMI(id):
    train, test = utils.get_IB_data('2017_12_21_16_51_3_275766')

    # For both train and test musr correspond with saving code
    FULL_MI = True

    # MI Measure
    infoplane_measure = 'bin'

    DO_SAVE = True
    DO_LOWER = (infoplane_measure == 'lower')
    DO_BINNED = (infoplane_measure == 'bin')

    MAX_EPOCHS = 5000
    NUM_LABELS = 2

    COLORBAR_MAX_EPOCHS = 5000

    # Directory For Loading saved Data
    ARCH = '10-7-5-4-3'
    DIR_TEMPLATE = '%%s_%s' % ARCH

    noise_variance = 1e-3
    binsize = 0.07
    Klayer_activity = K.placeholder(ndim=2)
    entropy_func_upper = K.function([
        Klayer_activity,
    ], [
        kde.entropy_estimator_kl(Klayer_activity, noise_variance),
    ])
    entropy_func_lower = K.function([
        Klayer_activity,
    ], [
        kde.entropy_estimator_bd(Klayer_activity, noise_variance),
    ])

    # Nats to bits conversion
    nats2bits = 1.0 / np.log(2)

    # Indexes of tests data for each of Output Classes
    saved_labelixs = {}

    y = test.y
    Y = test.Y
    if FULL_MI:
        full = utils.construct_full_dataset(train, test)
        y = full.y
        Y = full.Y

    for i in range(NUM_LABELS):
        saved_labelixs[i] = (y == i)

    labelprobs = np.mean(Y, axis=0)

    # Layers to plot, None for all
    PLOT_LAYERS = None

    # Store Results
    measures = OrderedDict()
    measures['tanh'] = {}
    measures['relu'] = {}

    for activation in measures.keys():
        cur_dir = 'rawdata/' + DIR_TEMPLATE % activation
        if not os.path.exists(cur_dir):
            print("Directory %s not found" % cur_dir)
            continue

        print("******* Loading %s ******" % cur_dir)
        for epochfile in sorted(os.listdir(cur_dir)):
            if not epochfile.startswith('epoch'):
                continue
            fname = cur_dir + '/' + epochfile
            with open(fname, 'rb') as f:
                d = cPickle.load(f)

            epoch = d['epoch']
            if epoch in measures[activation]:
                continue

            if epoch > MAX_EPOCHS:
                continue

            print("Measureing ", fname)

            num_layers = len(d['data']['activity_tst'])
            if PLOT_LAYERS is None:
                PLOT_LAYERS = []
                for lndx in range(num_layers):
                    PLOT_LAYERS.append(lndx)

            cepochdata = defaultdict(list)
            for lndx in range(num_layers):
                activity = d['data']['activity_tst'][lndx]

                h_upper = entropy_func_upper([
                    activity,
                ])[0]
                if DO_LOWER:
                    h_lower = entropy_func_lower()

                hM_given_X = kde.kde_condentropy(activity, noise_variance)
                hM_given_Y_upper = 0
                for i in range(NUM_LABELS):
                    hcond_upper = entropy_func_upper([
                        activity[saved_labelixs[i], :],
                    ])[0]
                    hM_given_Y_upper += labelprobs[i] * hcond_upper

                if DO_LOWER:
                    hM_given_Y_lower = 0
                    for i in range(NUM_LABELS):
                        hcond_lower = entropy_func_lower([
                            activity[saved_labelixs[i], :],
                        ])[0]
                        hM_given_Y_lower += labelprobs[i] * hcond_lower

                cepochdata['MI_XM_upper'].append(nats2bits *
                                                 (h_upper - hM_given_X))
                cepochdata['MI_YM_upper'].append(nats2bits *
                                                 (h_upper - hM_given_Y_upper))
                cepochdata['H_M_upper'].append(nats2bits * h_upper)

                pstr = 'upper: MI(X;M)=%0.3f, MI(Y;M)=%0.3f' % (
                    cepochdata['MI_XM_upper'][-1],
                    cepochdata['MI_YM_upper'][-1])
                if DO_LOWER:
                    cepochdata['MI_XM_lower'].append(nats2bits *
                                                     (h_lower - hM_given_X))
                    cepochdata['MI_YM_lower'].append(
                        nats2bits * (h_lower - hM_given_Y_lower))
                    cepochdata['H_M_lower'].append(nats2bits * h_lower)
                    pstr += 'lower: MI(X;M)=%0.3f, MI(Y;M)=%0.3f' % (
                        cepochdata['MI_XM_lower'][-1],
                        cepochdata['MI_YM_lower'][-1])

                if DO_BINNED:
                    binxm, binym = simplebinmi.bin_calc_information2(
                        saved_labelixs, activity, binsize)
                    cepochdata['MI_XM_bin'].append(nats2bits * binxm)
                    cepochdata['MI_YM_bin'].append(nats2bits * binym)
                    pstr += 'bin: MI(X;M)=%0.3f, MI(Y;M)=%0.3f' % (
                        cepochdata['MI_XM_bin'][-1],
                        cepochdata['MI_YM_bin'][-1])
                print('- Layer %d %s' % (lndx, pstr))
            measures[activation][epoch] = cepochdata
    with open("MI" + str(id), 'wb') as f:
        cPickle.dump(measures, f)
Ejemplo n.º 6
0
            output = model(data)
            loss = Loss(output, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if batch_id == 0:  # every epoch report once
                max_element = model.report(full.X, epoch)
                if max_element > cfg['MAX_ELEMENT']:
                    cfg['MAX_ELEMENT'] = max_element

        if epoch in cfg['EPOCHS']:
            test_accuracy = model.get_accuracy(dataset.tst.X, dataset.tst.y)
            print('epoch:{}, loss:{}, test_accuracy:{}'.format(
                epoch, loss, test_accuracy))


if __name__ == "__main__":
    cfg = get_cfg()

    #train(cfg)
    print('--------max_elements:{}---------------'.format(cfg['MAX_ELEMENT']))
    dataset = IBDataset(cfg['NAME'])

    full = construct_full_dataset(dataset.trn, dataset.tst)
    measures = MI(cfg, infoplane_measure='HSIC', full_dataset=full)
    plot_infoplane(cfg,
                   infoplane_measure='HSIC',
                   measures=measures,
                   PLOT_LAYERS=[0, 1, 2, 3, 4])

    #plot_SNR(cfg, PLOT_LAYERS=[0,1,2,3,4])