Python Unpickler.pop Examples

Programming Language: Python

Namespace/Package Name: pickle

Class/Type: Unpickler

Method/Function: pop

Examples at hotexamples.com: 3

Python Unpickler.pop - 3 examples found. These are the top rated real world Python examples of pickle.Unpickler.pop extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Unpickler(30)

load(30)

find_class(8)

__init__(7)

persistent_load(5)

dump(5)

pop(3)

get_vocab(3)

find_global(3)

append(2)

get_train_batch(1)

extend(1)

items(1)

eta(1)

load_build(1)

memo(1)

momentum(1)

adaptive_k(1)

popleft(1)

predict_proba(1)

Example #1

Show file

File: self_play.py Project: langorou/vamperouge

class SelfPlay:
    """
    Implementation of the self-play and training of the neural network
    """

    def __init__(self, neural_net, config):
        self.neural_net = neural_net
        # competitor neural network
        self.competitor_nn = vamperouge_net(config)
        self.config = config
        self.mcts = MCTS(neural_net, config)
        self.train_samples_history = []
        if self.config.load_samples:
            self.load_train_samples()
        self.skip_first_self_play = False

    def run_episode(self):
        """
        Runs one episode of self-play, starting with player 1, and return a
        training sample containing (canon_state, policy, value) tuples.
        """
        train_samples = []
        state = game.get_init_state()
        current_player = 1
        episode_step = 0

        while True:
            episode_step += 1
            canon_state = game.get_canonical_form(state, current_player)
            temp = int(episode_step < self.config.temperature_threshold)

            policy = self.mcts.get_move_probabilities(canon_state, temp=temp)
            sym = game.get_symmetries(canon_state, policy)
            for s, p in sym:
                train_samples.append([s, current_player, p, None])

            move = np.random.choice(len(policy), p=policy)
            state, current_player = game.get_next_state(state, current_player, move)

            r = game.get_state_score(state, current_player)

            if r != 0:
                return [
                    (s, pcy, r * ((-1) ** (pyr != current_player)))
                    for s, pyr, pcy, _ in train_samples
                ]

    def learn(self):
        """
        Performs num_iters iterations with num_eps episodes of self-play
        """

        for i in range(1, self.config.num_iters + 1):
            print("------iteration " + str(i) + "------")
            if not self.skip_first_self_play or i > 1:
                iteration_train_samples = deque([], maxlen=self.config.max_queue_length)

                episode_time = AverageMeter()
                bar = Bar("Self Play", max=self.config.num_eps)
                end = time.time()

                for episode in range(self.config.num_eps):
                    # reset search tree
                    self.mcts = MCTS(self.neural_net, self.config)
                    iteration_train_samples += self.run_episode()

                    episode_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = "({ep}/{max_ep}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}".format(
                        ep=episode + 1,
                        max_ep=self.config.num_eps,
                        et=episode_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td,
                    )
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.train_samples_history.append(iteration_train_samples)

            if (
                len(self.train_samples_history)
                > self.config.num_iters_for_train_samples_history
            ):
                print(
                    "len(train_samples_history) =",
                    len(self.train_samples_history),
                    " => remove the oldest train_samples",
                )
                self.train_samples_history.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.save_train_samples(i - 1)

            # shuffle examples before training
            train_samples = []
            for e in self.train_samples_history:
                train_samples.extend(e)
            shuffle(train_samples)

            # training new network, keeping a copy of the old one
            self.neural_net.save_checkpoint(
                folder=self.config.checkpoint, filename="temp.pth.tar"
            )
            self.competitor_nn.load_checkpoint(
                folder=self.config.checkpoint, filename="temp.pth.tar"
            )
            previous_mcts = MCTS(self.competitor_nn, self.config)

            self.neural_net.train_from_samples(train_samples)
            new_mcts = MCTS(self.neural_net, self.config)

            print("battling against previous version")
            arena = Arena(
                lambda x: np.argmax(previous_mcts.get_move_probabilities(x, temp=0)),
                lambda x: np.argmax(new_mcts.get_move_probabilities(x, temp=0)),
            )
            prev_wins, new_wins, draws = arena.play_games(self.config.arena_compare)

            print("new/prev wins : %d / %d ; draws : %d" % (new_wins, prev_wins, draws))
            if (
                prev_wins + new_wins == 0
                or float(new_wins) / (prev_wins + new_wins)
                < self.config.update_threshold
            ):
                print("rejecting new model")
                self.neural_net.load_checkpoint(
                    folder=self.config.checkpoint, filename="temp.pth.tar"
                )
            else:
                print("accepting new model")
                self.neural_net.save_checkpoint(
                    folder=self.config.checkpoint, filename=self.get_checkpoint_file(i)
                )
                self.neural_net.save_checkpoint(
                    folder=self.config.checkpoint, filename="best.pth.tar"
                )

    def get_checkpoint_file(self, iteration):
        return "checkpoint_" + str(iteration) + ".pth.tar"

    def delete_train_samples(self, iteration):
        filename = os.path.join(
            self.config.checkpoint, self.get_checkpoint_file(iteration) + ".samples"
        )
        try:
            os.remove(filename)
        except OSError as e:
            if e.errno != errno.ENOENT:
                raise

    def save_train_samples(self, iteration):
        folder = self.config.checkpoint
        if not os.path.exists(folder):
            os.makedirs(folder)
        filename = os.path.join(
            folder, self.get_checkpoint_file(iteration) + ".samples"
        )
        with open(filename, "wb+") as f:
            Pickler(f).dump(self.train_samples_history)
        f.closed
        if self.config.delete_old_samples:
            self.delete_train_samples(iteration - 1)

    def load_train_samples(self):
        samples_file = os.path.join(
            self.config.load_samples_folder_file[0],
            self.config.load_samples_folder_file[1],
        )
        if not os.path.isfile(samples_file):
            print(samples_file)
            r = input("File with train samples not found. Continue? [y|n]")
            if r != "y":
                sys.exit()
        else:
            print("File with train samples found. Read it.")
            with open(samples_file, "rb") as f:
                self.train_samples_history = Unpickler(f).load()
            f.closed
            while (
                len(self.train_samples_history)
                > self.config.num_iters_for_train_samples_history
            ):
                print(
                    "len(train_samples_history) =",
                    len(self.train_samples_history),
                    " => remove the oldest train_samples",
                )
                self.train_samples_history.pop(0)
            # examples based on the model were already collected (loaded)?
            self.skip_first_self_play = self.config.skip_first_self_play

Example #2

Show file

File: AlphaZero.py Project: duvallj/RLAIT

class AlphaZero(Approach):
    def __init__(self, argdict, approach_name="alphazero"):
        """
        Initializes an approach before
        it applies itself to a task

        Parameters
        ----------
        argdict : rlait.util.misc.dotdict
            A dictionary containing all the extra arguments that AlphaZero uses.
            Fully explained in the Notes section
        approach_name : str
            Name of the approach, used for printing

        Notes
        -----
        Full list of possible arguments that can be provided in argdict,
        as well as their default values:

        * lr : float (0.001)
            Backpropagation learning rate
        * dropout : float (0.3)
            Dropout factor to use in the densely connected layers
        * epochs : int (10)
            Number of epochs to train network on past examples each iteration
        * batch_size : int (64)
            Input batch size to use with neural network
        * cuda : bool (True)
            Use CUDA to speed training?
        * num_channels : int (512)
            Number of features to detect in the convolutional layers
        (The number of layers, activation functions are fixed)

        * startFromEp : int (0)
            The episode number to start from. Useful if a previous run got interrupted
        * numEps : int (30)
            The number of playout episodes to run per training iteration
        * tempThreshold : int (15)
            The number of moves to make using weighted plays instead of maximum
            plays when training
        * updateThreshold : float (0.51)
            Fraction of games a challenger network needs to win in order to
            become the new base.
        * maxlenOfQueue : int (200000)
            The maximum number of training examples to train on.
        * numMCTSSims : int (30)
            The number of times to run MCTS per move during self-play and actual play
        * arenaCompare : int (11)
            The number of games to play against the previous best AI at the end
            of a training iteration
        * cpuct : float (1.0)
            A factor that determines how likely the MCTS is to explore.
        * maxDepth : int (5000)
            The maximum search depth to explore for one move
        * startWithRandomPlay : bool (false)
            If starting at the first iteration, do we use random play to generate
            some training examples to look at first?
        * numRandomPlayExamples : int (100)
            Number of random game examples to generate

        * load_checkpoint : bool (False)
            Do we load a checkpoint?
        * prevHistory : str (None)
            Previous history to load. Can be set if `load_checkpoint` is set. If
            set, AlphaZero skips the first self-play iteration and jumps straight
            to training a new network on the provided history.
        * checkpoint : str (None)
            Checkpoint to load. Must be set if `load_checkpoint` is set, should
            be a file path relative to the below directory.
        * checkpoint_dir : str ("./checkpoints")
            Folder to store the checkpoints in. Must be an absolute path or a
            path relative to the location of the script running.
        * numItersForTrainExamplesHistory : int(30)
            The number of past iterations to store in a single history file.
        """
        super().__init__(approach_name)

        self.args = dotdict(argdict)

        self.args.lr                              = self.args.get("lr", 0.001)
        self.args.dropout                         = self.args.get("dropout", 0.3)
        self.args.epochs                          = self.args.get("epochs", 10)
        self.args.batch_size                      = self.args.get("batch_size", 64)
        self.args.cuda                            = self.args.get("cuda", True)
        self.args.num_channels                    = self.args.get("num_channels", 512)

        self.args.startFromEp                     = self.args.get("startFromEp", 0)
        self.args.numEps                          = self.args.get("numEps", 30)
        self.args.tempThreshold                   = self.args.get("tempThreshold", 15)
        self.args.updateThreshold                 = self.args.get("updateThreshold", 0.51)
        self.args.maxlenOfQueue                   = self.args.get("maxlenOfQueue", 200000)
        self.args.numMCTSSims                     = self.args.get("numMCTSSims", 30)
        self.args.arenaCompare                    = self.args.get("arenaCompare", 11)
        self.args.cpuct                           = self.args.get("cpuct", 1.0)
        self.args.maxDepth                        = self.args.get("maxDepth", 500)
        self.args.startWithRandomPlay             = self.args.get("startWithRandomPlay", False)
        self.args.numRandomPlayExamples           = self.args.get("numRandomPlayExamples", 100)

        self.args.load_checkpoint                 = self.args.get("load_checkpoint", False)
        self.args.checkpoint                      = self.args.get("checkpoint", None)
        self.args.prevHistory                     = self.args.get("prevHistory", None)
        self.args.checkpoint_dir                  = self.args.get("checkpoint_dir", "./checkpoints")
        self.args.numItersForTrainExamplesHistory = self.args.get("numItersForTrainExamplesHistory", 30)

        self.trainExamplesHistory = []
        self.skipFirstSelfPlay = False

    def _reset_mcts(self):
        # Reset MCTS variables (clears cache)
        self.Qsa = {}       # stores Q values for s,a (as defined in the paper)
        self.Nsa = {}       # stores #times edge s,a was visited
        self.Ns = {}        # stores #times board s was visited
        self.Ps = {}        # stores initial policy (returned by neural net)

        self.Es = {}        # stores game.getGameEnded ended for board s
        self.Vs = {}        # stores game.getValidMoves for board s

    def init_model(self):
        """
        Initializes AlphaZero with a list of models (one for each task phase)
        in self.models
        """
        # Assumes that the input board shape will remain constant between phases
        # Unfortunately, there's no good way to do this without that assumption.

        ###########################################
        # Define network based on Task parameters #
        ###########################################

        empty_state = self.task.empty_state(0)
        for phase in range(1, self.task.num_phases):
            try:
                assert self.task.empty_state(phase).shape == empty_state.shape
            except AssertionError:
                raise TypeError("{} cannot be applied to tasks with variant board representations!".format(self.approach_name))

        if 'flat' in STATE_TYPE_OPTION[empty_state.state_type]:
            raise TypeError("{} currently does not support tasks with board type \"flat\"".format(self.approeach_name))

        self.input = Input(shape=empty_state.shape)

        if STATE_TYPE_OPTION[empty_state.state_type] == 'deeprect':
            extra_dim = 1
            for i in range(2, len(empty_state.shape)):
                extra_dim *= empty_state.shape[i]
            x_image = Reshape((empty_state.shape[0], empty_state.shape[1], extra_dim))(self.input)
        elif STATE_TYPE_OPTION[empty_state.state_type] == 'rect':
            x_image = Reshape((empty_state.shape[0], empty_state.shape[1], 1))(self.input)
        else:
            raise TypeError("Unknown state type \"{}\"".format(empty_state.state_type))
            x_image = None

        conv_model = SimpleConvNet(self.args, x_image)

        self.v = Dense(self.task.num_players, activation='tanh', name='v')(conv_model.output)                                                                        # batch_size x 1

        self.outputs = [self.v]
        self.output_sizes = [self.task.num_players]

        self.models = []

        for phase in range(self.task.num_phases):
            empty_move = self.task.empty_move(phase)
            output_size = 1
            for i in range(len(empty_move.shape)):
                output_size *= empty_move.shape[i]
            output = Dense(output_size, activation='softmax', name='pi{}'.format(phase))(conv_model.output)   # batch_size x self.output_size
            self.outputs.append(output)
            self.output_sizes.append(output_size)

            model = Model(inputs=self.input, outputs=[self.v, output])
            model.compile(loss=['mean_squared_error', 'categorical_crossentropy'], optimizer=Adam(self.args.lr))
            self.models.append(model)

        # Doing the above instead of this because we need to have multiple models
        # in order to train correctly. Unfortunately, that means we cannot share
        # self.model = Model(inputs=self.input, outputs=self.outputs)

    def init_to_task(self, task, make_competetor=True):
        """
        Customizes an approach to work on
        a specific task

        Parameters
        ----------
        task : Task
            The Task object to customize to. Should provide
            all the necessary methods for this approach to customize,
            like length of move vectors for different phases.
        competetor : bool (True)
            Optional, controls whether we initialize a competetor AI for selfplay.
            When creating a competetor, this is turned to False so we don't
            infinitely recur.

        Returns
        -------
        self
            For daisy-chaining purposes. Other methods return
            self for the same reason

        Notes
        -----
        Although the parameters for the neural network size are passed through
        the __init__ method, the neural networks are actually created in this
        method. This is to handle different sizes of input and output layers
        required for different Tasks.
        """

        self.task = task

        self.init_model()
        self._reset_mcts()

        #######################
        # Load previous model #
        #######################

        self.iteration = self.args.startFromEp

        if self.args.load_checkpoint:
            if not (self.args.checkpoint is None):
                self.load_weights(self.args.checkpoint)
            else:
                try:
                    self.load_weights(self._get_checkpoint_filename(self.iteration))
                except:
                    log.warn("Tried to load checkpoint from starting iteration, could not find it.")
            if not (self.args.prevHistory is None) and make_competetor:
                self.load_history(self.args.prevHistory)
                self.skipFirstSelfPlay = True

        if make_competetor:
            self.pnet = self.__class__(self.args).init_to_task(self.task, make_competetor=False)
        else:
            self.pnet = None

        # Returning self so that constructs like
        # `ai = CustomApproach(args).init_to_task(Task(more_args))`
        # become possible
        return self

    def _nn_predict(self, state):
        v, pi = self.models[state.phase].predict(np.reshape(state, (1,)+state.shape))
        return v[0], pi[0]

    def _search(self, board, depth, maxdepth=float('inf')):
        """
        This function performs one iteration of MCTS. It is recursively called
        till a leaf node is found. The action chosen at each node is one that
        has the maximum upper confidence bound as in the paper.

        Once a leaf node is found, the neural network is called to return an
        initial policy P and a value v for the state. This value is propogated
        up the search path. In case the leaf node is a terminal state, the
        outcome is propogated up the search path. The values of Ns, Nsa, Qsa are
        updated.

        Parameters
        ----------
        board : State
            The board to search

        Returns
        -------
            v: the player value vector corresponding to the searched board

        Notes
        -----
        Because of the internal use of canonical boards, every v returned is
        relative to the perspective of the first player.
        """

        if depth > maxdepth: return np.zeros((self.task.num_players,))

        canonicalBoard = self.task.get_canonical_form(board)
        phase = board.phase
        s = self.task.state_string_representation(board)

        if s not in self.Es:
            if self.task.is_terminal_state(board):
                winners = self.task.get_winners(board)
                self.Es[s] = np.array([1 if (i in winners) else -1 for i in range(self.task.num_players)])
            else:
                self.Es[s] = np.zeros((self.task.num_players,))

        if not (self.Es[s] == 0).all():
            # terminal node
            return self.Es[s]

        if s not in self.Ps:
            # leaf node
            # NOTE: this section is the only place canonical boards are introduced
            # the rest of the searching takes place on non-canonical boards
            # for ease of passing hidden information.
            # NOTE: but I shouldn't even be passing around hidden information because it should be hidden even when searching???
            v, self.Ps[s] = self._nn_predict(canonicalBoard)
            # correcting ordering of v
            v = np.concatenate((v[(self.task.num_players-board.next_player):], v[:(self.task.num_players-board.next_player)]))
            self.Ps[s] = np.reshape(self.Ps[s], self.task.empty_move(board.phase).shape)
            # unfortunately, this requires that canonical forms do nothing
            # like flipping the board, which is usually normal...
            valids = self.task.get_legal_mask(board)
            self.Ps[s] = self.Ps[s]*valids      # masking invalid moves
            sum_Ps_s = np.sum(self.Ps[s])
            if sum_Ps_s > 0.0:
                self.Ps[s] /= sum_Ps_s    # renormalize
            else:
                # if all valid moves were masked make all valid moves equally probable

                # NB! All valid moves may be masked if either your NNet architecture is insufficient or you've get overfitting or something else.
                # If you have got dozens or hundreds of these messages you should pay attention to your NNet and/or training process.
                log.warn("All valid moves were masked, do workaround.")
                self.Ps[s] = self.Ps[s] + valids
                self.Ps[s] /= np.sum(self.Ps[s])

            self.Vs[s] = list(self.task.iterate_legal_moves(board))
            self.Ns[s] = 1
            #print("For state {}".format(self.task.state_string_representation(canonicalBoard)))
            #print("Predicted move: {} and value: {}".format(self.Ps[s], v))
            return v

        valid_moves = self.Vs[s]
        cur_best = -float('inf')
        best_act = -1

        # pick the action with the highest upper confidence bound
        for move in valid_moves:
            a = self.task.move_string_representation(move, board)
            if (s,a) in self.Qsa:
                u = self.Qsa[(s,a)][board.next_player] + self.args.cpuct*(move*self.Ps[s]).sum()*math.sqrt(self.Ns[s])/(1+self.Nsa[(s,a)])
            else:
                u = self.args.cpuct*(move*self.Ps[s]).sum()*math.sqrt(self.Ns[s] + EPS)     # Q = 0 ?

            if u > cur_best:
                cur_best = u
                best_act = a

        a = best_act
        #print("For state {}".format(self.task.state_string_representation(board)))
        #print("Best move is {}".format(a))
        next_s = self.task.apply_move(self.task.string_to_move(a, board), board)

        try:
            v = self._search(next_s, depth+1, maxdepth=maxdepth)
        except RecursionError:
            # NOTE: this might cause some issues, but it hits the
            # recursion limit pretty infrequently anyways (most likely due
            # to inadequate tie handling anyways, which should be fixed)
            return 0

        if (s,a) in self.Qsa:
            self.Qsa[(s,a)] = (self.Nsa[(s,a)]*self.Qsa[(s,a)] + v)/(self.Nsa[(s,a)]+1)
            self.Nsa[(s,a)] += 1
        else:
            self.Qsa[(s,a)] = v
            self.Nsa[(s,a)] = 1

        self.Ns[s] += 1
        return v

    def _get_action_prob(self, state, temp=1):
        """
        Returns the probabilities for each legal move given a state.
        Also returns the legal moves themselves

        Parameters
        ----------
            state : State

        Returns
        -------
            probs : list(float)
                A policy vector where the probability of the ith action is
                proportional to Nsa[(s,a)]**(1./temp)
            avail_moves : list(Move)
                A list of available moves where each entry corresponds to the
                same index in probs
        """
        for i in range(self.args.numMCTSSims):
            self._search(state, 0, maxdepth=self.args.maxDepth)

        s = self.task.state_string_representation(state)
        avail_moves = list(map(lambda x: self.task.move_string_representation(x, state),
                     self.task.iterate_legal_moves(state)))
        counts = [self.Nsa[(s,a)] if (s,a) in self.Nsa else 0 \
                for a in avail_moves]

        probs = []
        if temp == 0:
            bestA = np.argmax(counts)
            probs = [0]*len(counts)
            probs[bestA] = 1
        else:
            probs = [x**(1./temp) for x in counts]

        return probs, avail_moves

    def _get_random_action_prob(self, state, temp=1):
        """
        Just like _get_action_prob, only completely random
        """
        s = self.task.state_string_representation(state)
        avail_moves = list(map(lambda x: self.task.move_string_representation(x, state),
                     self.task.iterate_legal_moves(state)))
        probs = [1.0/len(avail_moves)] * len(avail_moves)

        return probs, avail_moves

    def get_move(self, state, temp=0):
        """
        Gets a move the AI wants to play for the passed in state.

        Parameters
        ----------
        state : State
            The state object containing all the state information and
            next player information. Size and shape varies per Task.
            Ideally should be canonicalized.

        Returns
        -------
        move : Move
            The move object containing the move information. Size and
            shape varies per Task.
        """

        probs, avail_moves = self._get_action_prob(state, temp)

        chosen_move = random.choices(
            population=avail_moves,
            weights=probs,
            k=1
        )[0]

        return self.task.string_to_move(chosen_move, state)

    def load_weights(self, filename):
        """
        Loads a previous Approach state from a file. Just the
        weights, history is loaded separately.

        Parameters
        ----------
        filename : str
            File to load the weights from. Not having this be a
            list allows for other data encoding schemes.

        Returns
        -------
        self
        """

        filepath = filename
        if not os.path.exists(filepath):
            filepath = os.path.join(self.args.checkpoint_dir, filename)
            if not os.path.exists(filepath):
                raise("No model in local file {} or path {}!".format(filename, filepath))

        all_model_files = []
        with open(filepath, "rb") as f:
            all_model_files = Unpickler(f).load()

        for i in range(len(all_model_files)):
            buf = all_model_files[i]
            self.models[i].load_weights(buf)

        return self

    def save_weights(self, filename):
        """
        Saves the weights of the current state to a file.

        Parameters
        ----------
        filename : str
            File to save the weights to.

        Returns
        -------
        self
        """

        filepath = os.path.join(self.args.checkpoint_dir, filename)
        if not os.path.exists(self.args.checkpoint_dir):
            print("Checkpoint Directory does not exist! Making directory {}".format(self.args.checkpoint_dir))
            os.mkdir(self.args.checkpoint_dir)

        # This was the best thing I could think of to fit multiple models in one file:
        # Keras's h5py backend supports writing data directly to a BytesIO object
        # So, we just tell it to do that for all the models and write the resulting
        # list to a pickle file.
        # Will most certainly fail for large enough models
        all_model_files = []
        for i in range(len(self.models)):
            buf = io.BytesIO()
            self.models[i].save_weights(buf, overwrite=True)
            all_model_files.append(buf)

        with open(filepath, "wb") as f:
            Pickler(f).dump(all_model_files)

        return self

    def load_history(self, filename):
        """
        Loads a game history from a file. A file can optionally
        contain one or many History classes, and this method
        can be extended with optional arguments to specify how
        many histories to load.

        Parameters
        ----------
        filename : str
            File to load history from.

        Returns
        -------
        self
        """

        filepath = filename
        if not os.path.exists(filepath):
            filepath = os.path.join(self.args.checkpoint_dir, filename)
            if not os.path.exists(filepath):
                raise("No checkpoint in local file {} or path {}!".format(filename, filepath))

        with open(filepath, "rb") as f:
            log.info(f"Loading History from {filepath}")
            self.trainExamplesHistory = Unpickler(f).load()

        return self

    def save_history(self, filename):
        """
        Saves the current game history to a file. Should generally
        append to the history in the file if it exists.

        Parameters
        ----------
        filename : str
            File to save/append history to.

        Returns
        -------
        self
        """

        folder = self.args.checkpoint_dir
        if not os.path.exists(folder):
            os.makedirs(folder)
        filepath = os.path.join(folder, filename)
        with open(filepath, "wb") as f:
            Pickler(f).dump(self.trainExamplesHistory)

        return self

    def _get_checkpoint_filename(self, iteration):
        return "checkpoint_{}.pth.tar".format(iteration)

    def _run_one_selfplay(self, action_prob_func):
        trainExamples = []
        board = self.task.empty_state(phase=0)
        episodeStep = 0

        while not self.task.is_terminal_state(board):
            episodeStep += 1
            temp = int(episodeStep < self.args.tempThreshold)

            probs, avail_moves = action_prob_func(board, temp=temp) #self._get_action_prob(board, temp=temp)
            # not applicable to all games, but might include later
            #sym = self.task.getSymmetries(canonicalBoard, pi)
            #for b,p in sym:

            pi = np.asarray([probs[i]*self.task.string_to_move(avail_moves[i],board) for i in range(len(probs))])
            pi = np.sum(pi, axis=0).flatten()

            trainExamples.append((board, board.next_player, pi))

            action = random.choices(
                population=avail_moves,
                weights=probs,
                k=1
            )[0]
            board = self.task.apply_move(self.task.string_to_move(action, board), board)

        # Game is over
        winners = self.task.get_winners(board)

        #print("SELFPLAY GAME OVER. Board: {}".format(self.task.state_string_representation(board)))
        #print("Winners: {}. Final player: {}.".format(winners, board.next_player))
        # This might be biased towards or against ties depending on the last player
        # to move, but in sufficiently complex games this should result in a 50/50
        # split anyways
        fmtTrainExamples = []
        for old_board, player, pi in trainExamples:
            r = np.array([1 if (i in winners) else -1 for i in range(self.task.num_players)])
            fmtTrainExamples.append((old_board, pi, r))
        return fmtTrainExamples

    def _run_selfplay(self):

        # bookkeeping
        log.info('------ITER ' + str(self.iteration) + '------')
        # examples of the iteration
        if self.iteration == 0 and self.args.startWithRandomPlay:
            iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)
            log.info("Running Random Play to start")
            bar = Progbar(self.args.numRandomPlayExamples)
            bar.update(0)

            for eps in range(self.args.numRandomPlayExamples):
                iterationTrainExamples += self._run_one_selfplay(self._get_random_action_prob)
                bar.add(1)

            self.trainExamplesHistory.append(iterationTrainExamples)

        elif self.iteration > self.args.startFromEp or not self.skipFirstSelfPlay:
            iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)
            bar = Progbar(self.args.numEps)
            bar.update(0)

            for eps in range(self.args.numEps):
                self._reset_mcts()
                iterationTrainExamples += self._run_one_selfplay(self._get_action_prob)
                # bookkeeping + plot progress
                bar.add(1)

            # save the iteration examples to the history
            self.trainExamplesHistory.append(iterationTrainExamples)

        if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
            log.debug("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
            self.trainExamplesHistory.pop(0)
        # backup history to a file
        # NB! the examples were collected using the model from the previous iteration
        self.save_history(self._get_checkpoint_filename(self.iteration)+".examples")

    def _match_phase(self, phase):
        def _internal_match_phase(a):
            return a.phase == phase
        return _internal_match_phase

    def _train_nnet(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """
        #input_boards, target_pis, target_vs = list(zip(*examples))

        # for each phase, filter out all the examples from that phase
        # and train the corresponding model on them
        for phase in range(self.task.num_phases):
            match_phase = self._match_phase(phase)
            f_input_boards, f_target_pis, f_target_vs = list(zip(*
                list(filter(
                    lambda x: match_phase(x[0]),
                    examples
                ))
            ))
            f_input_boards = np.asarray(f_input_boards)
            f_target_pis = np.asarray(f_target_pis)
            f_target_vs = np.asarray(f_target_vs)
            #f_target_vs = f_target_vs.reshape(f_target_vs.shape + (1,))
            self.models[phase].fit(
                x=f_input_boards,
                y=[f_target_vs, f_target_pis],
                batch_size=self.args.batch_size,
                epochs=self.args.epochs
            )

    def _arena_play_once(self, first_player, second_player, verbose=False):
        # TODO: Currently only supports 2-player games, should probably fix that
        if verbose:
            assert(self.task.state_string_representation)
        board = self.task.empty_state(phase=0)

        first_player._reset_mcts()
        second_player._reset_mcts()

        it = 0
        first_player_number = board.next_player

        while not self.task.is_terminal_state(board):
            it += 1
            if verbose:
                print(f"Turn {it} Player {board.next_player}")
                print(self.task.state_string_representation(board))

            temp = it < self.args.tempThreshold
            # can't count on consistent player numbers
            # or even consisten player counts
            if board.next_player == first_player_number:
                #print("FIRST PLAYER")
                move = first_player.get_move(board, temp)
                board = self.task.apply_move(move, board)
            else:
                #print("SECOND PLAYER")
                move = second_player.get_move(board, temp)
                board = self.task.apply_move(move, board)

        winners = self.task.get_winners(board)
        if verbose:
            print(f"Game Over: Turn {it} Winners {winners}")
            print(self.task.state_string_representation(board))
        r = 0
        if first_player_number in winners:
            r = 1
        elif first_player_number not in winners and len(winners)>0:
            r = -1

        return r

    def _arena_play(self, num, verbose=True):
        """
        Plays num games in which player 1 and 2 both start num/2 times each

        Parameters
        ----------
            num : int

        Returns
        -------
            oneWon : int
                games won by player1
            twoWon : int
                games won by player2
            draws:
                games won by nobody
        """
        num = int(num/2)
        eps_time = Progbar(2*num, stateful_metrics=["wins", "draws", "losses"])
        eps_time.update(0, values=[("wins",0),("draws",0),("losses",0)])
        oneWon = 0
        twoWon = 0
        draws = 0

        for _ in range(num):
            #import pdb; pdb.set_trace()
            result = self._arena_play_once(self, self.pnet, verbose=verbose)
            if result == 1:
                oneWon += 1
            elif result == -1:
                twoWon += 1
            else:
                draws += 1
            eps_time.add(1, [("wins", oneWon), ("losses", twoWon), ("draws", draws)])

            result = self._arena_play_once(self.pnet, self, verbose=verbose)
            if result == 1:
                twoWon += 1
            elif result == -1:
                oneWon += 1
            else:
                draws += 1
            eps_time.add(1, [("wins", oneWon), ("losses", twoWon), ("draws", draws)])

        return oneWon, twoWon, draws

    def train_once(self):
        """
        Runs a single training iteration to fine-tune the weights. Possible
        side effects include:

        * Changing the internals weights (duh)
        * Adding to the history (optional)
        * Printing to console
        * Automatically calling `save_history` and `save_weights`

        In implementations, can take custom arguments here in a single dict
        or have arguments passed in an earlier initialization phase.
        Any settings passed in here are expected to override default settings.
        """

        self._run_selfplay()

        # shuffle examlpes before training
        trainExamples = []
        for e in self.trainExamplesHistory:
            trainExamples.extend(e)
        random.shuffle(trainExamples)

        # training new network, keeping a copy of the old one
        self.save_weights('temp.pth.tar')
        self.pnet.load_weights('temp.pth.tar')
        self.pnet._reset_mcts()

        self._train_nnet(trainExamples)
        #print(self.trainExamplesHistory)
        self._reset_mcts()

        log.info('PITTING AGAINST PREVIOUS VERSION')
        nwins, pwins, draws = self._arena_play(self.args.arenaCompare)

        log.info('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
        if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold:
            log.info('REJECTING NEW MODEL')
            self.load_weights('temp.pth.tar')
        else:
            log.info('ACCEPTING NEW MODEL')
            self.save_weights(self._get_checkpoint_filename(self.iteration))
            self.save_weights('best.pth.tar')

        self.iteration += 1

    def test_once(self):
        """
        Runs a single testing interation. Does not change weights, and usually
        does not change the history either.

        Returns
        -------
        score : float
            ELO, win percentage, or another number where higher is better
        """

        return None

Example #3

Show file

File: SugarSyncShell.py Project: monofox/SugarSync-Python-Client

class SugarSyncShell:
    TYPE_FILE = 1
    TYPE_FOLDER = 2
    TYPE_ALL = 4

    def __init__(self, sugarsync, startdir=None, collection=True):
        self.sugarsync = sugarsync
        self.virtualfs = startdir # starts with startdir
        self.localPath = os.getcwd()
        self.path = [startdir]
        self.run = True
        self.histfile = os.path.join(os.environ["HOME"], ".ssync.pyhist")
        readline.parse_and_bind("tab: complete")
        try:    
            readline.read_history_file(self.histfile)
        except IOError:
            pass
        atexit.register(readline.write_history_file, self.histfile)
        readline.set_completer(self.completer)

        self.cmds = {
                'clear': self.clear,
                'cd': self.cd,
                'cp' : self.cp,
                'file': self.info,
                'history': self.history,
                'get': self.get,
                'put': self.put,
                'rm': self.rm,
                'ls': self.ls,
                'lpwd': self.lpwd,
                'lcd' : self.lcd,
                'lmkdir': self.lmkdir,
                'pwd': self.pwd,
                'help': self.help,
                'refresh': self.refresh,
                'save': self.save,
                'load': self.load,
                'exit': self.exit
                }
        self.cmds = OrderedDict(sorted(self.cmds.items(), key=lambda t: t[0]))
        self.names = []

        self.cmd()

    def completer(self, text, state):

        filelist = self.path[len(self.path)-1].getChildren()
        if len(filelist) <= 0:
            return None

        # sort ?
        keys = sorted(filelist.keys())
        filtlist = []
        # why so easy? Because we only need it so!
        for tab in keys:
            if tab.lower().startswith(text.strip().lower()):
                filtlist.append(tab)
        
        if state > len(filtlist) \
                and len(filtlist) > 0:
            return None
        elif len(filtlist) > 0:
            return filtlist[state]
        else:
            return None

    def save(self, param):
        # saves the shell with all the supdirs.
        with open('syncdata.bin', 'wb') as f:
            Pickler(f, 3).dump(self.path)

        print('Data saved...')

    def load(self, param):
        if os.path.isfile('syncdata.bin'):
            with open('syncdata.bin', 'rb') as f:
                self.path = Unpickler(f).load()

            self.clear()
            print('Data loaded...')
        else:
            print('File not found. Session saved?')

    def getPath(self, withHeader=True, colorize=True):
        # the beginning:
        if len(self.sugarsync.nickname) > 0:
            h = self.sugarsync.nickname + ' '
        else:
            h = self.sugarsync.username[:self.sugarsync.username.find('@')] + ' '

        if colorize:
            h = Colors.c(h, Colors.YELLOWL)

        p = ''

        for f in self.path:
            p = p + f.getName() + '/'
        
        if colorize:
            p = Colors.c(p, Colors.BLUE)

        if withHeader:
            return h + p
        else:
            return p

    def cmd(self):
        while self.run:
            want = input('%s %s ' % (self.getPath(), Colors.c('$', Colors.BLUE)))
            cmd = ''
            param = ''
            # its always the same: command parameters - there is no &&, ||, |, &, etc. pp.

            index = want.find(' ')
            if index > 0:
                cmd = want[:index]
                param = want[index:]
            else:
                cmd = want
                param = ''

            try:
                self.cmds[cmd](param)
            except KeyError as ke:
                print('Wrong input.')
            except Exception as e:
                print('Error processing action.', e)
                exc_type, exc_value, exc_traceback = sys.exc_info()
                #print("*** print_tb:")
                #traceback.print_tb(exc_traceback, limit=20, file=sys.stdout)
                print("*** print_exception:")
                traceback.print_exception(exc_type, exc_value, exc_traceback, limit=20, file=sys.stdout)


    def searchRecursivePath(self, parent, path, typ, top=True):
        # TODO: implement and '..' correctly ;-)
        if type(path).__name__ == 'str':
            path = path.split('/') # i search always path[0]!

        if parent is None:
            parent = self.path[len(self.path)-1]

        child = parent.getChildren()

        data = []
        i = 0

        if path[0].strip() == '.' \
                or (len(path) > 1 and path[1].strip() == '..'):

            newLen = 0
            if len(path) > 1 and path[0].strip() == '.':
                newLen = 1
            elif len(path) > 2 and path[1].strip() == '..':
                newLen = 2
            else:
                # this is the last path element. We will check.
                if typ is not SugarSyncShell.TYPE_FOLDER:
                    data = None
            
            if newLen > 0:
                dataTmp = self.searchRecursivePath(parent, path[newLen:], typ, False)
                if type(dataTmp).__name__ == 'list':
                    for f in dataTmp:
                        data.append(f)
                elif dataTmp is not None:
                    data.append(dataTmp)
            

        keys = list(child.keys())
        while type(data).__name__ == 'list' and len(data) <= 0 and i < len(keys):
            if str(child[keys[i]].getName()).strip() == path[0].strip():
                if len(path) > 1 and isinstance(child[keys[i]], SugarSyncDirectory):
                    dataTmp = self.searchRecursivePath(child[keys[i]], path[1:], typ, False)
                    data.append(child[keys[i]])
                    if type(dataTmp).__name__ == 'list':
                        for f in dataTmp:
                            if f not in data:
                                data.append(f)
                    elif dataTmp is not None and dataTmp not in data:
                        data.append(dataTmp)
                elif len(path) == 1 and (
                        (isinstance(child[keys[i]], SugarSyncDirectory) and typ == SugarSyncShell.TYPE_FOLDER)
                        or (isinstance(child[keys[i]], SugarSyncFile) and typ == SugarSyncShell.TYPE_FILE)
                        or typ == SugarSyncShell.TYPE_ALL
                        ):
                    data.append(child[keys[i]])
            i = i + 1

        if type(data).__name__ == 'list' and len(data) <= 0:
            data = None

        if top and data is not None:
            add = False
            newPath = []
            for f in self.path:
                if f in data:
                    break
                newPath.append(f)

            for f in data:
                newPath.append(f)

            data = newPath
        elif top:
            data = self.path

        return data

    def searchPath(self, path, typ):
        data = None
        elm = self.path[len(self.path)-1]
        elm = elm.getChildren()

        if elm is not None:
            keys = list(elm.keys())
            i = 0
            while data is None and i < len(keys):
                if str(elm[keys[i]].getName()).strip() == path.strip() and (
                        (isinstance(elm[keys[i]], SugarSyncDirectory) and typ == SugarSyncShell.TYPE_FOLDER)
                        or   (isinstance(elm[keys[i]], SugarSyncFile) and typ == SugarSyncShell.TYPE_FILE)
                        or   typ == SugarSyncShell.TYPE_ALL
                        ):
                    data = elm[keys[i]]

                i = i+1

        return data

    def clear(self, param):
        (width, height) = Console.getTerminalSize()
        for f in range(0,height):
            print('');

    def help(self, param):
        # this is a method to display the help
        print('Following commands are possible:')
        for cmd in self.cmds:
            print(cmd)

    def cd(self, param):
        # TODO: at this point its very pre-release...
        # we have to trim the param
        param = param.strip()
        if param[len(param)-1:] == '/':
            param = param[:-1]

        # and split on / ?

        if param == '.':
            return True
        elif param == '..':
            self.path.pop()
        else:
            # search
            path = self.searchRecursivePath(None, param, SugarSyncShell.TYPE_FOLDER)
            if path is not None:
                self.path = path
                return True
            else:
                print('Could not change the directory.')
                return False
    

    def ls(self, param):
        # get actual element:
        elm = self.path[len(self.path)-1]

        print(Colors.c('./', Colors.BLUE))
        if len(self.path) > 1:
            print(Colors.c('../', Colors.BLUE))

        for k,v in elm.getChildren().items():
            if isinstance(v, SugarSyncDirectory):
                print(Colors.c(str(k) + '/', Colors.BLUE))
            else:
                print(k)

    def pwd(self, param):
        print(self.getPath(False, False)) # withour header and without color

    def history(self, param):
        param = param.strip()
        
        if param in ['.', './', '..', '../'] or param[len(param)-1:] == '/':
            print('It can be only a file.')
            return False

        elm = self.searchPath(param, SugarSyncShell.TYPE_FILE)
        if elm is not None:
            elm.printVersions()
        else:
            print('Path not found.')

    def info(self, param):
        param = param.strip()
        
        if param in ['.', './']:
            elm = self.path[len(self.path)-1]
        elif param in ['..', '../'] and len(self.path) > 1:
            elm = self.path[len(self.path)-2]
        elif param not in ['.', '..']:
            elm = self.searchPath(param, SugarSyncShell.TYPE_ALL)
        else:
            print('Invalid path.')
            return False

        if elm is not None:
            elm.printInfo()
        else:
            print('Path not found.')

    def get(self, param):
        # at this point it can only be a file
        param = param.strip()

        if param in ['.', '..'] or param[len(param)-1:] == '/':
            print('It can be only a file at this development point.')
            return False
        
        elm = self.searchPath(param, SugarSyncShell.TYPE_FILE)
        if elm is not None:
            # filename ? and check if its exist...
            fname = str(elm.getName())
            if os.path.lexists(self.localPath+'/'+fname):
                # overwrite?
                overwrite = None
                while overwrite is None:
                    ow = input('Overwrite "%s/%s"? (N/y)' % (self.localPath,fname))
                    ow = ow.strip()
                    if ow in ['y', 'Y']:
                        overwrite = True
                    elif ow in ['n', 'N', '']:
                        overwrite = False
                    else:
                        print('I have not understand your answer "%s".' % (ow))

                if overwrite and not os.path.isfile(self.localPath+'/'+fname):
                        print('Can not overwrite something which is not a file.')
                        overwrite = None
            else:
                overwrite = True

            if overwrite and os.path.lexists(self.localPath+'/'+fname):
                os.remove(self.localPath+'/'+fname)
            
            if overwrite:
                self.sugarsync.getFile(elm.getLink(), self.localPath+'/'+fname)

        else:
            print('Could not find the file.')
            return False

    def put(self, param):
        param = param.strip()

        if param in ['.', '..'] or param[len(param)-1:] == '/':
            print('It can be only a file at this development point.')
            return False

        # exist param local?
        if os.path.isfile(self.localPath+'/'+param):
            # exist it in this path online?
            elm = self.searchPath(param, SugarSyncShell.TYPE_FILE)
            if elm is not None:
                # found. Overwrite?
                overwrite = None
                while overwrite is None:
                    ow = input('Such a file exist already online. Overwrite "%s"? (N/y)' % (param))
                    ow = ow.strip()
                    if ow in ['y', 'Y']:
                        overwrite = True
                    elif ow in ['n', 'N', '']:
                        overwrite = False
                    else:
                        print('I have not understand your answer "%s".' % (ow))

                if overwrite and not isinstance(elm, SugarSyncFile):
                    print('Can not overwrite something which is not a file.')
                    overwrite = None
            else:
                overwrite = True
            
            create = overwrite and elm is None
            if not create:
                filename = str(elm.getLink())
            else:
                filename = str(self.path[len(self.path)-1].getLink()) + '/' + param

            self.sugarsync.uploadFile(self.localPath+'/'+param, filename, create)
        else:
            print('Could not find the file.')

    def cp(self, param):
        param = param.strip()
        param = param.split(' ')
        elm = None
        folder = None
        target = None
        
        if param[0] in ['.', '..'] or param[0][len(param)-1:] == '/':    
            print('It can be only a file at this development point.')
            return False

        # exist file?
        elm = self.searchPath(param[0], SugarSyncShell.TYPE_FILE)
        if elm is None:
            print('Could not find the file.') 
            return False

        if len(param) == 3:
            # params are: <file> <folder> <new-name>
            if param[1] == '.':
                folder = self.path[len(self.path)-1]
            elif param[1] == '..':
                if len(self.path) > 1:
                    folder = self.path[len(self.path)-2]
                else:
                    print('You cant go more back than to root.')
                    return False
            else:
                folder = self.searchPath(param[2], SugarSyncShell.TYPE_FOLDER)
        elif len(param) == 2:
            # first: we try to find existent folder
            folder = self.searchPath(param[1], SugarSyncShell.TYPE_FOLDER)
            if folder is None:
                param[1] = param[1].split('/')
                target = param[1][-1]
                param[1] = '/'.join(param[1][:-1])
                folder = self.searchPath(param[1], SugarSyncShell.TYPE_FOLDER)
                if folder is None:
                    print('No Valid folder.')
                    return False
            if target is None or target == '':
                target = elm.getName()

        else:
            print('Nope. You have give me too much or not enough parameters. Syntax: <file> <target>')
            return False
        
        if target is None:
            target = param[2]

        # Now check, whether the file exists already in the target folder.


        ret = self.sugarsync.copyFile(elm.getLink(), folder.getLink(), target)
        if ret:
            # refresh target folder
            folder.refresh()

    def rm(self, param):
        param = param.strip()
        
        if param in ['.', '..'] or param[len(param)-1:] == '/':    
            print('It can be only a file at this development point.')
            return False

        # exist file?
        elm = self.searchPath(param, SugarSyncShell.TYPE_FILE)
        if elm is not None:
            # found.. delete it *hrhrhr* ;-)
            self.sugarsync.deleteFile(elm.getLink())
        else:
            print('Could not find the file.')

    def refresh(self, param):
        param = param.strip()
        elm = None

        if len(param) > 0:
            elm = self.searchPath(param, SugarSyncShell.TYPE_FOLDER)
        else:
            elm = self.path[len(self.path)-1]
    
        if elm is None:
            print('Folder not found.')
        else:
            elm.refresh()

    def lcd(self, param):
        param = param.strip()
        # change local path
        os.chdir(param)
        self.localPath = os.getcwd()

    def lmkdir(self, param):
        param = param.strip()
        dirs = param.split('/')
        if dirs[len(dirs)-1].strip() == '':
            dirs.pop(len(dirs)-1)

        if len(dirs) < 1:
            print('You have to specify a directory to create.')
        else:
            try:
                if len(dirs) == 1:
                    os.mkdir(param)
                    print('Folder was create.')
                else:
                    os.makedirs(param, exist_ok=True)
            except:
                print('Folder could not be created.')

    def lpwd(self, param):
        # we do not accept any param
        print('Local Path: ' + os.getcwd())

    def lrm(self, param):
        param = param.strip()
        
        if os.path.isfile(param):
            try:
                os.remove(param)
                print('File could be deleted.')
            except:
                print('There was an error deleting the file.')
        elif os.path.isdir(param):
            print('I only support files at this point of development.')


    def exit(self, param):
        print('Goodbye ;-)')
        self.run = False