optimizer=int(x[6]),
                   verbose=True)
print("loading model 3")
mymodel.load_state_dict(
    torch.load("%s/IN%s_bestmodel3.params" %
               (loc, '_sumO' if mymodel.sum_O else '')))

inputTestFiles = glob.glob(
    "/mnt/ccnas2/bdp/mzl20/proj_conda/lhc_jet_tagging_NN/JEDInet_pytorch/150p_dataset/val/jetImage*_%sp*.h5"
    % nParticles)

loss = nn.CrossEntropyLoss(reduction='mean')

test_set = InEventLoader(file_names=inputTestFiles,
                         nP=nParticles,
                         feature_name='jetConstituentList',
                         label_name='jets',
                         verbose=False)
test_loader = torch.utils.data.DataLoader(test_set,
                                          batch_size=batch_size,
                                          shuffle=False)

# PRINTING DATA

import sys
import numpy
"""
for mydict in test_loader:
	data = mydict['jetConstituentList']
	target = mydict['jets']
	np.set_printoptions(suppress=True)
if mymodel.optimizer == 1:        
    optimizer = optim.Adadelta(mymodel.parameters(), lr = 0.0001)
else:
    optimizer = optim.Adam(mymodel.parameters(), lr = 0.0001)
loss_train = np.zeros(n_epochs)
loss_val = np.zeros(n_epochs)
nBatches_per_training_epoch = len(inputTrainFiles)*10000/batch_size
nBatches_per_validation_epoch = len(inputValFiles)*10000/batch_size
print("nBatches_per_training_epoch: %i" %nBatches_per_training_epoch)
print("nBatches_per_validation_epoch: %i" %nBatches_per_validation_epoch)
for i in range(n_epochs):
    if mymodel.verbose: print("Epoch %s" % i)
    # Define the data generators from the training set and validation set.
    random.shuffle(inputTrainFiles)
    random.shuffle(inputValFiles)
    train_set = InEventLoader(file_names=inputTrainFiles, nP=nParticles,
                              feature_name ='jetConstituentList',label_name = 'jets', verbose=False)
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=False)
    val_set = InEventLoader(file_names=inputValFiles, nP=nParticles,
                            feature_name ='jetConstituentList',label_name = 'jets', verbose=False)
    val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False)
    ####
    # train
    for batch_idx, mydict in enumerate(train_loader):
        data = mydict['jetConstituentList']
        target = mydict['jets']
        if args_cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        out = mymodel(data)
        l = loss(out, target)
Ejemplo n.º 3
0
def model_evaluate(mymodel):
    #loss = nn.CrossEntropyLoss(reduction='sum')
    loss = nn.CrossEntropyLoss(reduction='mean')
    if mymodel.optimizer == 1:
        optimizer = optim.Adadelta(mymodel.parameters(), lr=0.0001)
    else:
        optimizer = optim.Adam(mymodel.parameters(), lr=0.0001)
    loss_train = np.zeros(n_epochs)
    loss_val = np.zeros(n_epochs)

    # Define the data generators from the training set and validation set. Let's try a batch size of 12.
    import glob
    #inputTrainFiles = glob.glob("/data/ML/mpierini/hls-fml/jetImage*_%sp*.h5" %nParticles)
    #inputValFiles = glob.glob("/data/ML/mpierini/hls-fml/VALIDATION/jetImage*_%sp*.h5" %nParticles)
    import os
    if os.path.isdir('/imdata/NEWDATA'):
        inputTrainFiles = glob.glob("/imdata/NEWDATA/jetImage*_%sp*.h5" %
                                    nParticles)
        inputValFiles = glob.glob(
            "/imdata/NEWDATA/VALIDATION/jetImage*_%sp*.h5" % nParticles)
    elif os.path.isdir('/data/shared/hls-fml/NEWDATA'):
        inputTrainFiles = glob.glob(
            "/data/shared/hls-fml/NEWDATA/jetImage*_%sp*.h5" % nParticles)
        inputValFiles = glob.glob(
            "/data/shared/hls-fml/NEWDATA/VALIDATION/jetImage*_%sp*.h5" %
            nParticles)
    elif os.path.isdir('/home/jduarte/NEWDATA'):
        inputTrainFiles = glob.glob("/home/jduarte/NEWDATA/jetImage*_%sp*.h5" %
                                    nParticles)
        inputValFiles = glob.glob(
            "/home/jduarte/NEWDATA/VALIDATION/jetImage*_%sp*.h5" % nParticles)
    elif os.path.isdir('/bigdata/shared/hls-fml/NEWDATA'):
        inputTrainFiles = glob.glob(
            "/bigdata/shared/hls-fml/NEWDATA/jetImage*_%sp*.h5" % nParticles)
        inputValFiles = glob.glob(
            "/bigdata/shared/hls-fml/NEWDATA/VALIDATION/jetImage*_%sp*.h5" %
            nParticles)

    random.shuffle(inputTrainFiles)
    random.shuffle(inputValFiles)

    print("example file", inputTrainFiles[0])
    nBatches_per_training_epoch = len(inputTrainFiles) * 10000 / batch_size
    nBatches_per_validation_epoch = len(inputValFiles) * 10000 / batch_size

    train_set = InEventLoader(file_names=inputTrainFiles,
                              nP=nParticles,
                              feature_name='jetConstituentList',
                              label_name='jets',
                              verbose=False)
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch_size,
                                               shuffle=False)
    val_set = InEventLoader(file_names=inputValFiles,
                            nP=nParticles,
                            feature_name='jetConstituentList',
                            label_name='jets',
                            verbose=False)
    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=batch_size,
                                             shuffle=False)

    for i in range(n_epochs):
        if mymodel.verbose: print("Epoch %s" % i)
        #for j in range(0, xtrain.size()[0], batch_size):
        #for (batch_idx, mydict) in tqdm(enumerate(train_loader),total=nBatches_per_training_epoch):
        for (batch_idx, mydict) in enumerate(train_loader):
            data = mydict['jetConstituentList']
            target = mydict['jets']
            if args_cuda:
                data, target = data.cuda(), target.cuda()
            #### ?????
            data, target = Variable(data), Variable(target)
            optimizer.zero_grad()
            out = mymodel(data)
            l = loss(out, target)
            l.backward()
            optimizer.step()
            loss_train[i] += l.cpu().data.numpy() / nBatches_per_training_epoch
        #loss_train[i] = loss_train[i]/float(xtrain.size()[0])
        #for j in range(0, xval.size()[0], batch_size):
        #for (batch_idx, mydict) in tqdm(enumerate(val_loader),total=nBatches_per_validation_epoch):
        for (batch_idx, mydict) in enumerate(val_loader):
            data = mydict['jetConstituentList']
            target = mydict['jets']
            if args_cuda:
                data, target = data.cuda(), target.cuda()
            #### ?????
            data, target = Variable(data, volatile=True), Variable(target)
            out_val = mymodel(data)
            l_val = loss(out_val, target)
            loss_val[i] += l_val.cpu().data.numpy(
            ) / nBatches_per_validation_epoch
        #loss_val[i] = loss_val[i]/float(xval.size()[0])
        if mymodel.verbose: print("Training   Loss: %f" % loss_train[i])
        if mymodel.verbose: print("Validation Loss: %f" % loss_val[i])
        #that below does not trigger soon enough
        if all(loss_val[max(0, i - patience):i] > min(
                np.append(loss_val[0:max(0, i -
                                         patience)], 200))) and i > patience:
            print("Early Stopping at", i)
            break
        #that above does not trigger soon enough
        if i > (2 * patience):
            last_avg = np.mean(loss_val[i - patience:i])
            previous_avg = np.mean(loss_val[i - 2 * patience:i - patience])
            #print ("last",last_avg,"previous",previous_avg)
            if last_avg > previous_avg:
                print("Early Avg Stopping at", i)
                break
        if i > patience:
            last_min = min(loss_val[i - patience:i])
            overall_min = min(loss_val[:i - patience])
            #print ("last",last_min,"overall",overall_min)
            if last_min > overall_min:
                print("Early min Stopping at", i)
                break
    loss_val = loss_val[loss_val > 0]
    return loss_val[-1]