Esempio n. 1
0
        print(target.cpu().numpy()[:10])  # Target pattern to be reconstructed
        print(
            inputs.cpu().numpy()[-1][0][:10]
        )  # Last input contains the degraded pattern fed to the network at test time
        print(y.data.cpu().numpy()[:10])  # Final output of the network
        previoustime = nowtime
        nowtime = time.time()
        print("Time spent on last", print_every, "iters: ",
              nowtime - previoustime)
        total_loss /= print_every
        all_losses.append(total_loss)
        print("Mean loss over last", print_every, "iters:", total_loss)
        print("")
    if (numiter + 1) % save_every == 0:
        fname = 'loss_binary_lstm_nbiter_' + str(
            params['nbiter']
        ) + '_nbhneur_' + str(NBHIDDENNEUR) + '_clamp_' + str(
            CLAMPING) + '_lr_' + str(ADAMLEARNINGRATE) + '_prestime_' + str(
                PRESTIME) + '_ipd_' + str(INTERPRESDELAY) + '_rngseed_' + str(
                    RNGSEED) + '.txt'
        with open(fname, 'w') as fo:
            for item in all_losses:
                fo.write("%s\n" % item)

        # Uber-only (comment out if not at Uber)
        if checkHdfs():
            print("Transfering to HDFS...")
            transferFileToHdfsDir(fname, '/ailabs/tmiconi/simple/')

        total_loss = 0
Esempio n. 2
0
def train(paramdict):
    #params = dict(click.get_current_context().params)
    print("Starting training...")
    params = {}
    #params.update(defaultParams)
    params.update(paramdict)
    print("Passed params: ", params)
    print(platform.uname())
    #params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['interpresdelay']) * params['nbclasses']) + params['prestimetest']  # Total number of steps per episode
    suffix = "maze_" + "".join([
        str(x) + "_" if pair[0] is not 'nbsteps' and pair[0] is not 'rngseed'
        and pair[0] is not 'save_every' and pair[0] is not 'test_every' else ''
        for pair in sorted(zip(params.keys(), params.values()),
                           key=lambda x: x[0]) for x in pair
    ])[:-1] + "_rngseed_" + str(
        params['rngseed']
    )  # Turning the parameters into a nice suffix for filenames

    # Initialize random seeds (first two redundant?)
    print("Setting random seeds")
    np.random.seed(params['rngseed'])
    random.seed(params['rngseed'])
    torch.manual_seed(params['rngseed'])

    print("Initializing network")
    net = Network(params)
    print("Shape of all optimized parameters:",
          [x.size() for x in net.parameters()])
    allsizes = [torch.numel(x.data.cpu()) for x in net.parameters()]
    print("Size (numel) of all optimized elements:", allsizes)
    print("Total size (numel) of all optimized elements:", sum(allsizes))

    print("Initializing optimizer")
    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=1.0 * params['lr'],
                                 eps=1e-4)
    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, gamma=params['gamma'], step_size=params['steplr'])

    LABSIZE = params['labsize']
    lab = np.ones((LABSIZE, LABSIZE))
    CTR = LABSIZE // 2

    # Simple cross maze
    #lab[CTR, 1:LABSIZE-1] = 0
    #lab[1:LABSIZE-1, CTR] = 0

    # Double-T maze
    #lab[CTR, 1:LABSIZE-1] = 0
    #lab[1:LABSIZE-1, 1] = 0
    #lab[1:LABSIZE-1, LABSIZE - 2] = 0

    # Grid maze
    lab[1:LABSIZE - 1, 1:LABSIZE - 1].fill(0)
    for row in range(1, LABSIZE - 1):
        for col in range(1, LABSIZE - 1):
            if row % 2 == 0 and col % 2 == 0:
                lab[row, col] = 1
    lab[CTR,
        CTR] = 0  # Not really necessary, but nicer to not start on a wall, and perhaps helps localization by introducing a detectable irregularity in the center?

    all_losses = []
    all_losses_objective = []
    all_losses_eval = []
    all_losses_v = []
    lossbetweensaves = 0
    nowtime = time.time()

    print("Starting episodes...")
    sys.stdout.flush()

    pos = 0
    hidden = net.initialZeroState()
    hebb = net.initialZeroHebb()

    # Starting episodes!

    for numiter in range(params['nbiter']):

        PRINTTRACE = 0
        if (numiter + 1) % (1 + params['print_every']) == 0:
            PRINTTRACE = 1

        # Note: it doesn't matter if the reward is on the center (reward is only computed after an action is taken). All we need is not to put it on a wall or pillar (lab=1)
        rposr = 0
        rposc = 0
        if params['rp'] == 0:
            # If we want to constrain the reward to fall on the periphery of the maze
            while lab[rposr, rposc] == 1:
                rposr = np.random.randint(1, LABSIZE - 1)
                rposc = np.random.randint(1, LABSIZE - 1)
        elif params['rp'] == 1:
            while lab[rposr,
                      rposc] == 1 or (rposr != 1 and rposr != LABSIZE - 2
                                      and rposc != 1 and rposc != LABSIZE - 2):
                rposr = np.random.randint(1, LABSIZE - 1)
                rposc = np.random.randint(1, LABSIZE - 1)
        #print("Reward pos:", rposr, rposc)

        # Agent always starts an episode from the center
        posc = CTR
        posr = CTR

        optimizer.zero_grad()
        loss = 0
        lossv = 0
        hidden = net.initialZeroState()
        hebb = net.initialZeroHebb()

        reward = 0.0
        rewards = []
        vs = []
        logprobs = []
        sumreward = 0.0
        dist = 0

        for numstep in range(params['eplen']):

            inputsN = np.zeros((1, TOTALNBINPUTS), dtype='float32')
            inputsN[0, 0:RFSIZE *
                    RFSIZE] = lab[posr - RFSIZE // 2:posr + RFSIZE // 2 + 1,
                                  posc - RFSIZE // 2:posc + RFSIZE // 2 +
                                  1].flatten()

            inputs = torch.from_numpy(inputsN).cuda()
            # Previous chosen action
            #inputs[0][numactionchosen] = 1
            inputs[0][-1] = 1  # Bias neuron
            inputs[0][-2] = numstep
            inputs[0][-3] = reward

            # Running the network
            y, v, hidden, hebb = net(
                Variable(inputs, requires_grad=False), hidden, hebb
            )  # y  should output probabilities; v is the value prediction

            distrib = torch.distributions.Categorical(y)
            actionchosen = distrib.sample(
            )  # sample() returns a Pytorch tensor of size 1; this is needed for the backprop below
            numactionchosen = actionchosen.data[0]  # Turn to scalar

            # Target position, based on the selected action
            tgtposc = posc
            tgtposr = posr
            if numactionchosen == 0:  # Up
                tgtposr -= 1
            elif numactionchosen == 1:  # Down
                tgtposr += 1
            elif numactionchosen == 2:  # Left
                tgtposc -= 1
            elif numactionchosen == 3:  # Right
                tgtposc += 1
            else:
                raise ValueError("Wrong Action")

            reward = 0.0
            if lab[tgtposr][tgtposc] == 1:
                reward = -.1
            else:
                dist += 1
                posc = tgtposc
                posr = tgtposr

            # Did we hit the reward location ? Increase reward and teleport!
            # Note that it doesn't matter if we teleport onto the reward, since reward hitting is only evaluated after the (obligatory) move
            if rposr == posr and rposc == posc:
                reward += 10
                if params['randstart'] == 1:
                    posr = np.random.randint(1, LABSIZE - 1)
                    posc = np.random.randint(1, LABSIZE - 1)
                    while lab[posr, posc] == 1:
                        posr = np.random.randint(1, LABSIZE - 1)
                        posc = np.random.randint(1, LABSIZE - 1)
                else:
                    posr = CTR
                    posc = CTR

            # Store the obtained reward, value prediction, and log-probabilities, for this time step
            rewards.append(reward)
            sumreward += reward
            vs.append(v)
            logprobs.append(distrib.log_prob(actionchosen))

            # A3C/A2C has an entropy reward on the output probabilities, to
            # encourage exploration. Our version of PyTorch does not have an
            # entropy() function for Distribution, so we use a penalty on the
            # sum of squares instead, which has the same basic property
            # (discourages concentration). It really does help!
            loss += params['bentropy'] * y.pow(2).sum()

            #if PRINTTRACE:
            #    print("Probabilities:", y.data.cpu().numpy(), "Picked action:", numactionchosen, ", got reward", reward)

        # Do the A2C ! (essentially copied from V. Mnih, https://arxiv.org/abs/1602.01783, Algorithm S3)
        R = 0
        gammaR = params['gr']
        for numstepb in reversed(range(params['eplen'])):
            R = gammaR * R + rewards[numstepb]
            lossv += (vs[numstepb][0] - R).pow(2)
            loss -= logprobs[numstepb] * (R - vs[numstepb].data[0][0])

        if PRINTTRACE:
            print("lossv: ", lossv.data.cpu().numpy()[0])
            print("Total reward for this episode:", sumreward, "Dist:", dist)

        # Do we want to squash rewards for stabilization?
        if params['squash'] == 1:
            if sumreward < 0:
                sumreward = -np.sqrt(-sumreward)
            else:
                sumreward = np.sqrt(sumreward)
        elif params['squash'] == 0:
            pass
        else:
            raise ValueError("Incorrect value for squash parameter")

        # Mixing the reward loss and the value-prediction loss
        loss += params['blossv'] * lossv
        loss /= params['eplen']
        loss.backward()

        #scheduler.step()
        optimizer.step()
        #torch.cuda.empty_cache()

        lossnum = loss.data[0]
        lossbetweensaves += lossnum
        if (numiter + 1) % 10 == 0:
            all_losses_objective.append(lossnum)
            all_losses_eval.append(sumreward)
            all_losses_v.append(lossv.data[0])

        # Algorithm done. Now print statistics and save files.

        if (numiter + 1) % params['print_every'] == 0:

            print(numiter, "====")
            print("Mean loss: ", lossbetweensaves / params['print_every'])
            lossbetweensaves = 0
            previoustime = nowtime
            nowtime = time.time()
            print("Time spent on last", params['print_every'], "iters: ",
                  nowtime - previoustime)
            if params['type'] == 'plastic' or params['type'] == 'lstmplastic':
                print("ETA: ",
                      net.eta.data.cpu().numpy(), "alpha[0,1]: ",
                      net.alpha.data.cpu().numpy()[0, 1], "w[0,1]: ",
                      net.w.data.cpu().numpy()[0, 1])
            elif params['type'] == 'rnn':
                print("w[0,1]: ", net.w.data.cpu().numpy()[0, 1])

        if (numiter + 1) % params['save_every'] == 0:
            print("Saving files...")
            losslast100 = np.mean(all_losses_objective[-100:])
            print("Average loss over the last 100 episodes:", losslast100)
            print("Saving local files...")
            with open('params_' + suffix + '.dat', 'wb') as fo:
                pickle.dump(params, fo)
            with open('lossv_' + suffix + '.txt', 'w') as thefile:
                for item in all_losses_v:
                    thefile.write("%s\n" % item)
            with open('loss_' + suffix + '.txt', 'w') as thefile:
                for item in all_losses_eval:
                    thefile.write("%s\n" % item)
            torch.save(net.state_dict(), 'torchmodel_' + suffix + '.dat')
            # Uber-only
            print("Saving HDFS files...")
            if checkHdfs():
                print("Transfering to HDFS...")
                transferFileToHdfsDir('loss_' + suffix + '.txt',
                                      '/ailabs/tmiconi/gridlab/')
                transferFileToHdfsDir('torchmodel_' + suffix + '.dat',
                                      '/ailabs/tmiconi/gridlab/')
                transferFileToHdfsDir('params_' + suffix + '.dat',
                                      '/ailabs/tmiconi/gridlab/')