def LSTMforFixedWidthForget2InputRatio(n_hidden=200,
                                       n_epochs=10,
                                       use_cuda=False,
                                       numOfClasses=4):
    # Parameters
    learning_rate = 0.001
    batch_size = 1
    numOfFrames = 755  # 378 for contineous presentation
    lastLayerSize = 60

    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize((28, 28)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, ), (0.5, ))
    ])

    train_set = tomImageFolderFrameSeriesAllClasses(root=os.path.join(
        '/N/u/aalipour/Carbonate/Downloads/2ndYearproject/2020Wave/TrainTest/revierwersAsked/MIROMidsizeOri/train'
    ),
                                                    transform=transform)

    test_set = tomImageFolderFrameSeriesAllClasses(root=os.path.join(
        '/N/u/aalipour/Carbonate/Downloads/2ndYearproject/2020Wave/TrainTest/revierwersAsked/MIROMidsizeOri/validation'
    ),
                                                   transform=transform)
    #classes=('obj1','obj2','obj3','obj4','obj5','obj6','obj7','obj8','obj9','obj10','obj11','obj12','obj13','obj14','obj15','obj16')
    #classes=('Ori1','Ori2','Ori3','Ori4','Ori5','Ori6','Ori7','Ori8','Ori9','Ori10','Ori11','Ori12','Ori13','Ori14','Ori15','Ori16')

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=2)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=2)

    # Use CUDA?

    device = torch.device("cuda" if torch.cuda.is_available()
                          & use_cuda else "cpu")

    #cortical column

    class column(nn.Module):
        """
        cortical column model
        """
        def __init__(self, preTrainedModel):
            super(column, self).__init__()
            self.frontEnd = preTrainedModel
            self.lstm = MYLSTMWithGateoutput(lastLayerSize, n_hidden)
            #        self.out=etnn.RRCell(n_hidden,16,softmax_output=False,ridge_param=0.05)
            self.outLinear = nn.Linear(n_hidden, numOfClasses, bias=True)

        def forward(self,
                    x,
                    y=None,
                    use_cuda=False):  #implement the forward pass
            # device = torch.device("cuda" if torch.cuda.is_available() &use_cuda else "cpu")
            with torch.no_grad():
                hh = np.empty((batch_size, x.size(1), lastLayerSize))
                hh[:] = np.nan
                hh = torch.FloatTensor(hh)
                #        pdb.set_trace()
                for batchNum in range(batch_size):
                    m = x[batchNum, :, :, :].unsqueeze(1)
                    m = self.frontEnd(m)
                    hh[batchNum, :, :] = m.detach()
                if use_cuda:
                    x = hh.cuda()
                else:
                    x = hh
    #        pdb.set_trace()
            [x, forgetGate, inputGate] = self.lstm(x, use_cuda=use_cuda)
            x = self.outLinear(x.to(device))

            return x, forgetGate, inputGate

    # end class definition

    c1 = column(newnet)
    c1.to(device)
    if use_cuda:
        c1.cuda()
        c1.lstm.cuda()
        c1.lstm.lstm_cell.cuda()
    # end if

    # Objective function
    criterion = nn.CrossEntropyLoss()
    # Optimizer Adam for now
    optimizer = optim.Adam(
        c1.parameters(), lr=learning_rate
    )  #, lr=learning_rate, momentum=momentum, weight_decay=weight_decay)

    #freezing the pretrained front end
    freeze_layer(c1.frontEnd)

    #network.frame.load_state_dict(torch.load(os.path.join('E:\\', 'Abolfazl' , '2ndYearproject','code','savedNetworks','LSTMForRotatingObj')))

    # For each iteration
    for epoch in range(n_epochs):
        # Iterate over batches
        #        i=1
        for data in train_loader:
            # Inputs and outputs
            inputs, targets = data
            inputs, targets = Variable(inputs), Variable(targets)
            if use_cuda: inputs, targets = inputs.cuda(), targets.cuda()

            # Gradients to zero
            optimizer.zero_grad()

            # Forward

            #        pdb.set_trace()
            out = c1(inputs, use_cuda=use_cuda)[0]

            loss = criterion(out.permute(0, 2, 1), targets.long())

            loss.backward(retain_graph=False)

            #
            # Optimize
            optimizer.step()
            # Print error measures


#            print(u"Train CrossEntropyLoss: {}".format(float(loss.data)))
#            i=i+1

#testing the performance
    correct = 0
    total = 0
    # cumFGateVals=np.empty([len(test_loader),numOfFrames,n_hidden])
    # cumIGateVals=np.empty([len(test_loader),numOfFrames,n_hidden])
    cumFGateVals = []
    cumIGateVals = []
    kk = 0  #how many objects, used for indexing cumalative_F2IRatio
    for images, labels in test_loader:
        #    pdb.set_trace()
        with torch.no_grad():  #this saves some GPU memory
            images, labels = Variable(images), Variable(labels)
            if use_cuda:
                outputs = c1(images.cuda(), use_cuda=True)
                _, predicted = torch.max(outputs[0][0], dim=1)
                #                showMe=predicted[1]-labels.cuda()[0]
                #                numberOfMistakes+=len([i for i, e in enumerate(showMe) if e != 0])
                cumFGateVals.append(
                    outputs[1].squeeze())  #/torch.numel(outputs[1])
                cumIGateVals.append(
                    outputs[2].squeeze()
                )  # I don't know why it was np.max(forgetGate2inputGateRatio.data.numpy()) itshould be np.mean (), so I changed it

                kk += 1
                correct += (predicted == labels.cuda()).sum()
                total += labels.size(1)
                del labels, outputs
                torch.cuda.empty_cache()

            else:
                outputs = c1(images)
                _, predicted = torch.max(outputs[0][0], dim=1)
                #             print(torch.min(outputs[1][0],dim=1))
                correct += (predicted == labels).sum()
                total += labels.size(1)

                cumFGateVals.append(
                    outputs[1].squeeze())  #/torch.numel(outputs[1])
                cumIGateVals.append(
                    outputs[2].squeeze()
                )  # I don't know why it was np.max(forgetGate2inputGateRatio.data.numpy()) itshould be np.mean (), so I changed it

                kk += 1
    correct = correct.cpu().numpy()
    total = (torch.tensor(total)).numpy()
    accVec = (correct / total)
    return (accVec, cumFGateVals, cumIGateVals)
def ESNFixedWidth(leaky_rate=0.5,
                  n_iterations=140,
                  seed=72347,
                  n_hidden=200,
                  use_cuda=True):
    # Parameters
    spectral_radius = 0.9
    learning_rate = 0.005
    batch_size = 1
    numOfClasses = 6
    numOfFrames = 755
    lastLayerSize = 60
    train_leaky_rate = True

    class column(nn.Module):
        """
        cortical column model
        """
        def __init__(self,
                     preTrainedModel,
                     leaky_rate,
                     spectral_radius=spectral_radius,
                     n_hidden=n_hidden,
                     numOfClasses=numOfClasses,
                     lastLayerSize=lastLayerSize):
            super(column, self).__init__()
            self.frontEnd = preTrainedModel
            self.echo = etnn.LiESNCell(leaky_rate,
                                       train_leaky_rate,
                                       lastLayerSize,
                                       n_hidden,
                                       spectral_radius=0.5,
                                       nonlin_func=torch.nn.functional.relu,
                                       seed=seed)
            self.outLinear = nn.Linear(n_hidden, numOfClasses, bias=True)

        def forward(self,
                    x,
                    y=None,
                    batch_size=1,
                    lastLayerSize=60,
                    numOfFrames=numOfFrames):  #implement the forward pass
            with torch.no_grad():
                hh = np.empty((batch_size, numOfFrames, lastLayerSize))
                hh[:] = np.nan
                hh = torch.FloatTensor(hh)
                #        pdb.set_trace()
                for batchNum in range(batch_size):
                    m = x[batchNum, :, :, :].unsqueeze(1)
                    m = self.frontEnd(m)
                    hh[batchNum, :, :] = m.detach()
                if use_cuda:
                    x = hh.cuda()
                else:
                    x = hh
                x = x.detach()
            x = self.echo.forward(x)
            x = self.outLinear(x)
            return x

    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize((28, 28)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, ), (0.5, ))
    ])

    train_set = tomImageFolderFrameSeriesAllClasses(root=os.path.join(
        'E:\\', 'Abolfazl', '2ndYearproject', 'datasets', 'coil-100Width'),
                                                    transform=transform)

    test_set = tomImageFolderFrameSeriesAllClasses(root=os.path.join(
        'E:\\', 'Abolfazl', '2ndYearproject', 'datasets', 'coil-100Width'),
                                                   transform=transform)
    classes = ('Ori1', 'Ori2', 'Ori3', 'Ori4', 'Ori5', 'Ori6')
    #classes=('Ori1','Ori2','Ori3','Ori4','Ori5','Ori6','Ori7','Ori8','Ori9','Ori10','Ori11','Ori12','Ori13','Ori14','Ori15','Ori16')

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=2)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=2)

    # Use CUDA?
    use_cuda = torch.cuda.is_available() if use_cuda else False

    #cortical column

    c1 = column(newnet, leaky_rate)

    if use_cuda:
        c1.cuda()
    # end if

    # Objective function
    criterion = nn.CrossEntropyLoss()
    # Stochastic Gradient Descent
    optimizer = optim.Adam(
        c1.parameters(), lr=learning_rate
    )  #, lr=learning_rate, momentum=momentum, weight_decay=weight_decay)

    #freezing the pretrained front end
    freeze_layer(c1.frontEnd)

    # For each iteration
    #    training_start_time=time.time()
    for epoch in range(n_iterations):
        # Iterate over batches
        #        i=1
        for data in train_loader:
            # Inputs and outputs
            inputs, targets = data
            inputs, targets = Variable(inputs), Variable(targets)
            if use_cuda: inputs, targets = inputs.cuda(), targets.cuda()

            # Gradients to zero
            optimizer.zero_grad()
            # Forward

            out = c1(inputs, targets)
            #            pdb.set_trace()
            loss = criterion(out.permute(0, 2, 1), targets.long())

            loss.backward(retain_graph=False)

            #
            # Optimize
            optimizer.step()
            del inputs, targets, out, loss
            torch.cuda.empty_cache()
            # Print error measures
#            print(u"Train CrossEntropyLoss: {}".format(float(loss.data)))
#            i+=1
#            print('leaky Rate', c1.echo.leaky_rate)
#            print('output weights', param_printer(c1.outLinear)   )
#            print(i)
# end for
#    print(u"Time For 1 Leaky Rate: ", (time.time() - training_start_time))
    correct = 0
    total = 0
    for images, labels in test_loader:
        with torch.no_grad():
            images, labels = Variable(images), Variable(labels)
            if use_cuda:
                outputs = c1(images.cuda())
                _, predicted = torch.max(outputs[0], dim=1)
                #                showMe=predicted[1]-labels.cuda()[0]
                #                numberOfMistakes+=len([i for i, e in enumerate(showMe) if e != 0])
                correct += (predicted == labels.cuda()).sum()
                total += labels.size(1)
                del labels, outputs
                torch.cuda.empty_cache()
            else:
                outputs = c1(images)
                _, predicted = torch.max(outputs[0], dim=1)
                correct += (predicted == labels).sum()
                total += labels.size(1)
    correct = correct.cpu().numpy()
    total = (torch.tensor(total)).numpy()
    return (correct / total)  #accur