예제 #1
0
파일: entry.py 프로젝트: onia/pygi
    def __init__(self, view):
        Gtk.EventBox.__init__(self)
        self._view = view

        self.set_visible_window(False)

        hbox = Gtk.Box.new(Gtk.Orientation.HORIZONTAL, 3)
        hbox.show()
        hbox.set_border_width(3)

        # context for the view
        self._entry = Gtk.Entry()
        self._entry.set_has_frame(False)
        self._entry.set_name('gedit-commander-entry')
        self._entry.show()

        css = Gtk.CssProvider()
        css.load_from_data("""
@binding-set terminal-like-bindings {
    unbind "<Control>A";

    bind "<Control>W" { "delete-from-cursor" (word-ends, -1) };
    bind "<Control>A" { "move-cursor" (buffer-ends, -1, 0) };
    bind "<Control>U" { "delete-from-cursor" (display-line-ends, -1) };
    bind "<Control>K" { "delete-from-cursor" (display-line-ends, 1) };
    bind "<Control>E" { "move-cursor" (buffer-ends, 1, 0) };
    bind "Escape" { "delete-from-cursor" (display-lines, 1) };
}

GtkEntry#gedit-commander-entry {
    gtk-key-bindings: terminal-like-bindings;

    /* Override background to anything. This is weird, but doing this we can
       then in code use widget.override_background to set the color dynamically
       to the same color as the gedit view */
    background: transparent;
    border-width: 0;
    box-shadow: 0 0 transparent;
}
""")

        # FIXME: remove hardcopy of 600 (GTK_STYLE_PROVIDER_PRIORITY_APPLICATION)
        # https://bugzilla.gnome.org/show_bug.cgi?id=646860
        self._entry.get_style_context().add_provider(css, 600)

        self._prompt_label = Gtk.Label(label='<b>&gt;&gt;&gt;</b>',
                                       use_markup=True)
        self._prompt_label.show()

        self._entry.connect('focus-out-event', self.on_entry_focus_out)
        self._entry.connect('key-press-event', self.on_entry_key_press)

        self._history = History(
            os.path.join(GLib.get_user_config_dir(),
                         'gedit/commander/history'))
        self._prompt = None

        self._accel_group = None

        hbox.pack_start(self._prompt_label, False, False, 0)
        hbox.pack_start(self._entry, True, True, 0)

        self.copy_style_from_view()
        self.view_style_updated_id = self._view.connect(
            'style-updated', self.on_view_style_updated)

        self.add(hbox)
        self.attach()
        self._entry.grab_focus()

        self._wait_timeout = 0
        self._info_window = None

        self.connect('destroy', self.on_destroy)
        self.connect_after('size-allocate', self.on_size_allocate)
        self.view_draw_id = self._view.connect_after('draw', self.on_draw)

        self._history_prefix = None
        self._suspended = None
        self._handlers = [[0, Gdk.KEY_Up, self.on_history_move, -1],
                          [0, Gdk.KEY_Down, self.on_history_move, 1],
                          [None, Gdk.KEY_Return, self.on_execute, None],
                          [None, Gdk.KEY_KP_Enter, self.on_execute, None],
                          [0, Gdk.KEY_Tab, self.on_complete, None],
                          [0, Gdk.KEY_ISO_Left_Tab, self.on_complete, None]]

        self._re_complete = re.compile(
            '("((?:\\\\"|[^"])*)"?|\'((?:\\\\\'|[^\'])*)\'?|[^\s]+)')
        self._command_state = commands.Commands.State()
예제 #2
0
def main():
    """
    Main procedure.
    """
    # Hyper-parameters.
    gamma = 0.95
    epsilon = 1.0
    epsilon_min = 0.1
    epsilon_step = 0.01
    batch_size = 64
    actions = [1, 2, 3, 7, 8]
    n_action = 5
    n_history = 50000
    n_episode = 5000
    n_observation = 4

    # Initalize instances.
    env = gym.make('Enduro-v0', frameskip=5)
    online = Model(n_action=n_action).cuda()
    target = Model(n_action=n_action).cuda()
    trainer = Trainer(online, target, gamma=gamma)
    history = History('s', 'a', 'r', 's*', 't', maxlen=n_history)

    for episode in range(n_episode):

        # Initialize the environment.
        observation, observations = env.reset(), deque(maxlen=n_observation)
        for _ in range(n_observation):
            state = preprocess(observation, observations)

        # Iterate until the episode is done.
        total, done = 0, False
        while not done:

            # Choose between exploration vs exploitation.
            if np.random.rand() <= epsilon:
                action = np.random.randint(n_action)
            else:
                action = online.predict(state)

            # Interact with the environment.
            observation, reward, done, _ = env.step(actions[action])
            consequence = preprocess(observation, observations)
            total += reward

            # Stack the experience tuple.
            history.append(state, action, reward, consequence, done)

            # Preserve the next state as a current state.
            state = consequence

        # Skip learning phase if it doesn't have enough history.
        if len(history) < n_history:
            continue

        # Checkpoint.
        trainer.save(f'checkpoint/{episode:04d}-{int(total):03d}.pt')

        # Epsilon schedule.
        epsilon = max(epsilon - epsilon_step, epsilon_min)

        # Mini-batch training.
        for replay in history.replay(batch_size):
            trainer.train(*replay)

        # Update target network.
        if episode % 5 == 0:
            trainer.update()
예제 #3
0
def main():
    args = build_parser().parse_args()
    config = build_default_config()
    config.merge_from_file(args.config_path)
    config.experiment_path = args.experiment_path
    config.render = not args.no_render
    config.freeze()
    del args

    writer = SummaryWriter(config.experiment_path)

    seed_torch(config.seed)
    env = VecEnv([lambda: build_env(config) for _ in range(config.workers)])
    if config.render:
        env = wrappers.TensorboardBatchMonitor(env, writer, config.log_interval)
    env = wrappers.torch.Torch(env, device=DEVICE)
    env.seed(config.seed)

    policy_model = ModelDQN(config.model, env.observation_space, env.action_space).to(DEVICE)
    target_model = ModelDQN(config.model, env.observation_space, env.action_space).to(DEVICE)
    target_model.load_state_dict(policy_model.state_dict())
    optimizer = build_optimizer(config.opt, policy_model.parameters())
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config.episodes)

    metrics = {
        "loss": Mean(),
        "lr": Last(),
        "eps": FPS(),
        "ep/length": Mean(),
        "ep/reward": Mean(),
    }

    # ==================================================================================================================
    # training loop
    policy_model.train()
    target_model.eval()
    episode = 0
    s = env.reset()
    e_base = 0.95
    e_step = np.exp(np.log(0.05 / e_base) / config.episodes)

    bar = tqdm(total=config.episodes, desc="training")
    history = History()
    while episode < config.episodes:
        with torch.no_grad():
            for _ in range(config.horizon):
                av = policy_model(s)
                a = sample_action(av, e_base * e_step ** episode)
                s_prime, r, d, meta = env.step(a)
                history.append(
                    state=s.cpu(),
                    action=a.cpu(),
                    reward=r.cpu(),
                    done=d.cpu(),
                    state_prime=s_prime.cpu(),
                )
                # history.append(state=s, action=a, reward=r, done=d, state_prime=s_prime)
                s = s_prime

                (indices,) = torch.where(d)
                for i in indices:
                    metrics["eps"].update(1)
                    metrics["ep/length"].update(meta[i]["episode"]["l"])
                    metrics["ep/reward"].update(meta[i]["episode"]["r"])
                    episode += 1
                    scheduler.step()
                    bar.update(1)

                    if episode % 10 == 0:
                        target_model.load_state_dict(policy_model.state_dict())

                    if episode % config.log_interval == 0 and episode > 0:
                        for k in metrics:
                            writer.add_scalar(
                                k, metrics[k].compute_and_reset(), global_step=episode
                            )
                        writer.add_scalar("e", e_base * e_step ** episode, global_step=episode)
                        writer.add_histogram(
                            "rollout/action", rollout.actions, global_step=episode
                        )
                        writer.add_histogram(
                            "rollout/reward", rollout.rewards, global_step=episode
                        )
                        writer.add_histogram("rollout/return", returns, global_step=episode)
                        writer.add_histogram(
                            "rollout/action_value", action_values, global_step=episode
                        )

        rollout = history.full_rollout()
        action_values = policy_model(rollout.states)
        action_values = action_values * one_hot(rollout.actions, action_values.size(-1))
        action_values = action_values.sum(-1)
        with torch.no_grad():
            action_values_prime = target_model(rollout.states_prime)
            action_values_prime, _ = action_values_prime.detach().max(-1)
        returns = one_step_discounted_return(
            rollout.rewards, action_values_prime, rollout.dones, gamma=config.gamma
        )

        # critic
        errors = returns - action_values
        critic_loss = errors ** 2

        loss = (critic_loss * 0.5).mean(1)

        metrics["loss"].update(loss.data.cpu().numpy())
        metrics["lr"].update(np.squeeze(scheduler.get_lr()))

        # training
        optimizer.zero_grad()
        loss.mean().backward()
        nn.utils.clip_grad_norm_(policy_model.parameters(), 0.5)
        optimizer.step()

    bar.close()
    env.close()
예제 #4
0
파일: jlat.py 프로젝트: fraoustin/jlat
app.register_blueprint(
    Books(url_prefix="/",
          dir_books=os.path.join(JLAT_DIR, "files", "uploads", "book")))
from note import Notes
app.register_blueprint(Notes(url_prefix="/"))
from review import Reviews
app.register_blueprint(Reviews(url_prefix="/"))
from up import Ups
app.register_blueprint(
    Ups(url_prefix='/',
        dir_uploads=os.path.join(JLAT_DIR, "files", "uploads", "import")))
from synth import Synth
app.register_blueprint(Synth(url_prefix='/'))
from history import History
app.register_blueprint(
    History(url_prefix='/',
            archives=os.path.join(JLAT_DIR, "files", "archives")))
from register import Register
app.register_blueprint(Register(url_prefix='/'))


@app.route("/", methods=["GET", "POST"])
@login_required
def home():
    return render_template('index.html')


if __name__ == "__main__":
    db.init_app(app)
    with app.app_context():
        db.create_all()
    with app.app_context():
예제 #5
0
 def __init__(self):
     self.config = Config()
     self.net_tool = NetTool()
     self.history = History()
예제 #6
0
 def __init__(self, show_training=False):
     self.history = History()
     self.show_training = show_training
     self.grasp = np.array((0, 0, 0.067))  # acquire autonomously
예제 #7
0
파일: auction.py 프로젝트: mattall/V-Fiber
def sim(config):
    # TODO: Create agents here
    agents = init_agents(config)
    # Uncomment to print agents.
    #for a in agents:
    #    logging.info(a)

    n = len(agents)
    by_id = dict((a.id, a) for a in agents)
    agent_ids = [a.id for a in agents]

    if (config.mechanism.lower() == 'gsp'
            or config.mechanism.lower() == 'switch'):
        mechanism = GSP
    elif config.mechanism.lower() == 'vcg':
        mechanism = VCG
    else:
        raise ValueError("mechanism must be one of 'gsp', 'vcg', or 'switch'")

    reserve = config.reserve

    # Dictionaries : round # -> per_slot_list_of_whatever
    slot_occupants = {}
    slot_clicks = {}
    per_click_payments = {}
    slot_payments = {}
    values = {}
    bids = {}

    history = History(bids, slot_occupants, slot_clicks, per_click_payments,
                      slot_payments, n)

    def total_spent(agent_id, end):
        """
        Compute total amount spent by agent_id through (not including)
        round end.
        """
        s = 0
        for t in range(end):
            slot = agent_slot(slot_occupants, agent_id, t)
            if slot != -1:
                s += slot_payments[t][slot]
        return s

    def run_round(top_slot_clicks, t):
        """ top_slot_clicks is the expected number of clicks in the top slot
            k is the round number
        """
        if t == 0:
            bids[t] = [(a.id, a.initial_bid(reserve)) for a in agents]
        else:
            # Bids from agents with no money get reduced to zero
            have_money = lambda a: total_spent(a.id, t) < config.budget
            still_have_money = filter(have_money, agents)
            current_bids = []
            for a in agents:
                b = a.bid(t, history, reserve)
                if total_spent(a.id, t) < config.budget:
                    current_bids.append((a.id, b))
                else:
                    # Out of money: make bid zero.
                    current_bids.append((a.id, 0))
            bids[t] = current_bids

        ##   Ignore those below reserve price
        active_bidders = len(filter(lambda (i, b): b >= reserve, bids[t]))
        #####################################
        ##   1a.   Define no. of slots  (TO-DO: Check what the # of available slots should be)
        #num_slots = max(1, active_bidders-1)
        num_slots = max(1, n - 1)

        ##   1b.  Calculate clicks/slot
        slot_clicks[t] = [
            iround(top_slot_clicks * pow(config.dropoff, i))
            for i in range(num_slots)
        ]

        ##  2. Run mechanism and allocate slots
        (slot_occupants[t],
         per_click_payments[t]) = (mechanism.compute(slot_clicks[t], reserve,
                                                     bids[t]))

        ##  3. Define payments
        slot_payments[t] = map(lambda (x, y): x * y,
                               zip(slot_clicks[t], per_click_payments[t]))

        ##  4.  Save utility (misnamed as values)
        values[t] = dict(zip(agent_ids, zeros))

        def agent_value(agent_id, clicks, payment):
            if agent_id is not None:
                values[t][agent_id] = by_id[agent_id].value * clicks - payment
            return None

        map(agent_value, slot_occupants[t], slot_clicks[t], slot_payments[t])

        ## Debugging. Set to True to see what's happening.
        log_console = True
        if log_console:
            logging.info("\t=== Round %d ===" % t)
            logging.info("\tnum_slots: %d" % num_slots)
            logging.info("\tbids: %s" % bids[t])
            logging.info("\tslot occupants: %s" % slot_occupants[t])
            logging.info("\tslot_clicks: %s" % slot_clicks[t])
            logging.info("\tper_click_payments: %s" % per_click_payments[t])
            logging.info("\tslot_payments: %s" % slot_payments[t])
            logging.info("\tUtility: %s" % values[t])
            logging.info("\ttotals spent: %s" %
                         [total_spent(a.id, t + 1) for a in agents])

    for t in range(0, config.num_rounds):
        # Over 48 rounds, go from 80 to 20 and back to 80.  Mean 50.
        # Makes sense when 48 rounds, to simulate a day
        top_slot_clicks = iround(30 * math.cos(math.pi * t / 24) + 50)

        if t == config.num_rounds / 2 and config.mechanism == 'switch':
            mechanism = VCG
        ##   0.  Runs one round
        run_round(top_slot_clicks, t)
        for a in agents:
            history.set_agent_spent(a.id, total_spent(a.id, t))

    for a in agents:
        history.set_agent_spent(a.id, total_spent(a.id, config.num_rounds))

    return history
예제 #8
0
 def doOperation(self, operation: Operation):
     operation.execute()
     self.addToHistory(
         History("Do operation: " + operation.__class__.__name__,
                 self.getId()))
예제 #9
0
 def retrieve_history(self):
     if not self.has_history():
         raise HistoryNotFound(self)
     history_db = self.retrieve_object(self.manifest.history_database, 'H')
     return History(history_db)
예제 #10
0
def generate_history():
    data_generation = DataGeneration()
    history = History(data_generation.generate_transactions())
    history.interleave_transaction_schedule()
    return jsonify(history.serialize()), 200
예제 #11
0
 def close_product(self):
     self._history.append(History("Account closed", self.getId()))
     return True
예제 #12
0
 def __init__(self):
     self.name = None
     self.email = None
     self.created_at = str(timestring.Date('today'))
     self.history = History(self)
예제 #13
0
 def create(self):
     table = 'screensaver' if is_screensaver_mode() else 'photoframe'
     return History(table)
예제 #14
0
tr = Tracer('tr')
tr.open('/tmp/', '', '.dat')
tr.start()
robot.after.addSignal('tr.triger')

#tr.add(dyn.name+'.ffposition','ff')
tr.add(taskRF.featureDes.name + '.position', 'refr')
tr.add(taskLF.featureDes.name + '.position', 'refl')
tr.add('dyn.rf', 'r')
tr.add('dyn.lf', 'l')

tr.add('featureComDes.errorIN', 'comref')
tr.add('dyn.com', 'com')
tr.add(taskWaist.gain.name + '.gain', 'gainwaist')

history = History(dyn, 1)

# --- RUN -----------------------------------------------------------------

featurePosture.selec.value = toFlags(range(6, 36))

sot.clear()
for task in [taskWaist, taskRF, taskLF]:
    task.feature.position.recompute(0)
    task.feature.keep()
    task.feature.selec.value = '111111'
    sot.push(task.task.name)

taskWaist.ref = matrixToTuple(eye(4))
taskWaist.feature.selec.value = '111011'
taskWaist.gain.setByPoint(18, 0.1, 0.005, 0.8)
예제 #15
0
Jason Mahr
"""


from constants import *
from cube import Cube
from fitness import *
from history import History
from validate import is_even, is_solved


## For history.py


h = History()
moves = [1, 5, 1, 9, 9, 8, 8, 12, 14, 15, 17, 12, 2, 3, 6, 5, 7, 7, 9, 10]
for move in moves:
    h.add(move)
assert h.get() == ["R'", 'B2', 'F2', 'U', "L'", 'R', 'F', "R'", "B'"]


## For cube.py


c = Cube()
for move in moves:
    c.move(move)
assert c.get_cube() == [[1, 1, 5, 4, 3, 1, 4, 4], [3, 3, 0, 4, 2, 3, 4, 0],
                        [3, 5, 0, 2, 1, 2, 4, 2], [5, 5, 2, 3, 0, 3, 1, 0],
                        [4, 2, 2, 5, 5, 0, 1, 4], [0, 1, 3, 1, 5, 0, 2, 5]]
예제 #16
0
    def run_sim_once(self):
        """Return a history"""
        conf = self.config
        # Keep track of the current round.  Needs to be in scope for helpers.
        round = 0

        def check_pred(pred, msg, Exc, lst):
            """Check if any element of lst matches the predicate.  If it does,
            raise an exception of type Exc, including the msg and the offending
            element."""
            m = list(map(pred, lst))
            if True in m:
                i = m.index(True)
                raise Exc(msg + " Bad element: %s" % lst[i])

        def check_uploads(peer, uploads):
            """Raise an IllegalUpload exception if there is a problem."""
            def check(pred, msg):
                check_pred(pred, msg, IllegalUpload, uploads)

            not_upload = lambda o: not isinstance(o, Upload)
            check(not_upload, "List of Uploads contains non-Upload object.")

            self_upload = lambda upload: upload.to_id == peer.id
            check(self_upload, "Can't upload to yourself.")

            not_from_self = lambda upload: upload.from_id != peer.id
            check(not_from_self, "Upload.from != peer id.")

            check(lambda u: u.bw < 0, "Upload bandwidth must be non-negative!")

            limit = self.up_bw(peer.id)
            print(sum([u.bw for u in uploads]), "   ", limit)
            if sum([u.bw for u in uploads]) > limit:
                raise IllegalUpload("Can't upload more than limit of %d. %s" %
                                    (limit, uploads))

            # If we got here, looks ok.

        def check_requests(peer, requests, peer_pieces, available):
            """Raise an IllegalRequest exception if there is a problem."""
            def check(pred, msg):
                check_pred(pred, msg, IllegalRequest, requests)

            check(lambda o: not isinstance(o, Request),
                  "List of Requests contains non-Request object.")

            bad_piece_id = lambda r: (r.piece_id < 0 or r.piece_id >= self.
                                      config.num_pieces)
            check(bad_piece_id, "Request asks for non-existent piece!")

            bad_peer_id = lambda r: r.peer_id not in self.peer_ids
            check(bad_peer_id, "Request mentions non-existent peer!")

            bad_requester_id = lambda r: r.requester_id != peer.id
            check(bad_requester_id, "Request has wrong peer id!")

            bad_start_block = lambda r: (r.start < 0 or r.start >= self.config.
                                         blocks_per_piece or r.start >
                                         peer_pieces[peer.id][r.piece_id])
            # Must request the _next_ necessary block
            check(bad_start_block, "Request has bad start block!")

            def piece_peer_does_not_have(r):
                other_peer = self.peers_by_id[r.peer_id]
                return r.piece_id not in available[other_peer.id]

            check(piece_peer_does_not_have,
                  "Asking for piece peer does not have!")

            # If we got here, looks ok

        def available_pieces(peer_id, peer_pieces):
            """
            Return a list of piece ids that this peer has available.
            """
            return [
                i for i in range(conf.num_pieces)
                if peer_pieces[peer_id][i] == conf.blocks_per_piece
            ]

        def peer_done(peer_pieces, peer_id):
            # TODO: remove linear pass
            for blocks_so_far in peer_pieces[peer_id]:
                if blocks_so_far < conf.blocks_per_piece:
                    return False
            return True

        def all_done(peer_pieces):
            result = True
            # Check all peers to update done status
            for peer_id in peer_pieces:
                if peer_done(peer_pieces, peer_id):
                    history.peer_is_done(round, peer_id)
                else:
                    result = False
            return result

        def create_peers():
            """Each agent class must be already loaded, and have a
            constructor that takes the config, id,  pieces, and
            up and down bandwidth, in that order."""
            def load(class_name, params):
                agent_class = conf.agent_classes[class_name]
                return agent_class(*params)

            counts = dict()

            def index(name):
                if name in counts:
                    a = counts[name]
                    counts[name] += 1
                else:
                    a = 0
                    counts[name] = 1
                return a

            n = len(conf.agent_class_names)
            ids = ["%s%d" % (n, index(n)) for n in conf.agent_class_names]

            is_seed = lambda id: id.startswith("Seed")

            def get_pieces(id):
                if id.startswith("Seed"):
                    return [conf.blocks_per_piece] * conf.num_pieces
                else:
                    return [0] * conf.num_pieces

            peer_pieces = dict()  # id -> list (blocks / piece)
            peer_pieces = dict((id, get_pieces(id)) for id in ids)
            pieces = [get_pieces(id) for id in ids]
            r = itertools.repeat

            # Re-initialize upload bandwidths at the beginning of each
            # new simulation
            up_bws = [self.up_bw(id, reinit=True) for id in ids]
            params = list(zip(r(conf), ids, pieces, up_bws))

            peers = list(map(load, conf.agent_class_names, params))
            #logging.debug("Peers: \n" + "\n".join(str(p) for p in peers))
            return peers, peer_pieces

        def get_peer_requests(p, peer_info, peer_history, peer_pieces,
                              available):
            def remove_me(info):
                # TODO: Do we need this linear pass?
                return [peer for peer in peer_info if peer.id != p.id]

            pieces = copy.copy(peer_pieces[p.id])
            # Made copy of pieces and the peer info this peer needs to make it's
            # decision, so that it can't change the simulation's copies.
            p.update_pieces(pieces)
            rs = p.requests(remove_me(peer_info), peer_history)
            check_requests(p, rs, peer_pieces, available)
            return rs

        def get_peer_uploads(all_requests, p, peer_info, peer_history):
            def remove_me(info):
                # TODO: remove this pass?  Use a set?
                return [peer for peer in peer_info if peer.id != p.id]

            def requests_to(id):
                f = lambda r: r.peer_id == id
                ans = []
                for rs in list(all_requests.values()):
                    ans.extend(list(filter(f, rs)))
                return ans

            requests = requests_to(p.id)

            us = p.uploads(requests, remove_me(peer_info), peer_history)
            check_uploads(p, us)
            return us

        def upload_rate(uploads, uploader_id, requester_id):
            """
            return the uploading rate from uploader to requester
            in blocks per time period, or 0 if not uploading.
            """
            for u in uploads[uploader_id]:
                if u.to_id == requester_id:
                    return u.bw
            return 0

        def update_peer_pieces(peer_pieces, requests, uploads, available):
            """
            Process the uploads: figure out how many blocks of all the requested
            pieces the requesters ended up with.
            Make sure requesting the same thing from lots of peers doesn't
            stack.
            update the sets of available pieces as needed.
            """
            downloads = dict()  # peer_id -> [downloads]
            new_pp = copy.deepcopy(peer_pieces)
            for requester_id in requests:
                downloads[requester_id] = list()
            for requester_id in requests:
                # Keep track of how many blocks of each piece this
                # requester got.  piece -> (blocks, from_who)
                new_blocks_per_piece = dict()

                def update_count(piece_id, blocks, peer_id):
                    if piece_id in new_blocks_per_piece:
                        old = new_blocks_per_piece[piece_id][0]
                        if blocks > old:
                            new_blocks_per_piece[piece_id] = (blocks, peer_id)
                    else:
                        new_blocks_per_piece[piece_id] = (blocks, peer_id)

                # Group the requests by peer that is being asked
                get_peer_id = lambda r: r.peer_id
                rs = sorted(requests[requester_id], key=get_peer_id)
                for peer_id, rs_for_peer in itertools.groupby(rs, get_peer_id):
                    bw = upload_rate(uploads, peer_id, requester_id)
                    if bw == 0:
                        continue
                    # This bandwidth gets applied in order to each piece requested
                    for r in rs_for_peer:
                        needed_blocks = conf.blocks_per_piece - r.start
                        alloced_bw = min(bw, needed_blocks)
                        update_count(r.piece_id, alloced_bw, peer_id)
                        bw -= alloced_bw
                        if bw == 0:
                            break
                for piece_id in new_blocks_per_piece:
                    (blocks, peer_id) = new_blocks_per_piece[piece_id]
                    new_pp[requester_id][piece_id] += blocks
                    if new_pp[requester_id][piece_id] == conf.blocks_per_piece:
                        available[requester_id].add(piece_id)
                    d = Download(peer_id, requester_id, piece_id, blocks)
                    downloads[requester_id].append(d)

            return (new_pp, downloads)

        def completed_pieces(peer_id, available):
            return len(available[peer_id])

        def log_peer_info(peer_pieces, available):
            for p_id in self.peer_ids:
                pieces = peer_pieces[p_id]
                logging.debug("pieces for %s: %s" % (str(p_id), str(pieces)))
            log = ", ".join("%s:%s" % (p_id, completed_pieces(p_id, available))
                            for p_id in self.peer_ids)
            logging.info("Pieces completed: " + log)

        logging.debug("Starting simulation with config: %s" % str(conf))

        peers, peer_pieces = create_peers()
        self.peer_ids = [p.id for p in peers]
        self.peers_by_id = dict((p.id, p) for p in peers)

        upload_rates = dict((id, self.up_bw(id)) for id in self.peer_ids)
        history = History(self.peer_ids, upload_rates)

        # dict : pid -> set(finished / available pieces)
        available = dict((pid, set(available_pieces(pid, peer_pieces)))
                         for pid in self.peer_ids)

        # Begin the event loop
        while True:
            logging.info("======= Round %d ========" % round)

            peer_info = [PeerInfo(p.id, available[p.id]) for p in peers]
            requests = dict()  # peer_id -> list of Requests
            uploads = dict()  # peer_id -> list of Uploads
            h = dict()
            for p in peers:
                h[p.id] = history.peer_history(p.id)
                requests[p.id] = get_peer_requests(p, peer_info, h[p.id],
                                                   peer_pieces, available)

            for p in peers:
                uploads[p.id] = get_peer_uploads(requests, p, peer_info,
                                                 h[p.id])

            (peer_pieces,
             downloads) = update_peer_pieces(peer_pieces, requests, uploads,
                                             available)
            history.update(downloads, uploads)

            logging.debug(history.pretty_for_round(round))

            log_peer_info(peer_pieces, available)

            if all_done(peer_pieces):
                logging.info("All done!")
                break
            round += 1
            if round > conf.max_round:
                logging.info("Out of time.  Stopping.")
                break

        logging.info("Game history:\n%s" % history.pretty())

        logging.info("======== STATS ========")
        logging.info("Uploaded blocks:\n%s" %
                     Stats.uploaded_blocks_str(self.peer_ids, history))
        logging.info("Completion rounds:\n%s" %
                     Stats.completion_rounds_str(self.peer_ids, history))
        logging.info("All done round: %s" %
                     Stats.all_done_round(self.peer_ids, history))

        return history
예제 #17
0
                        loss0=loss0)
 ckpter_auc = CheckPoint(model=model,
                         optimizer=optimizer_model,
                         path=path_ckpt,
                         prefix=run_name,
                         interval=1,
                         save_num=n_save_epoch,
                         loss0=auc_last)
 ckpter_auc_lr = CheckPoint(model=logisticReg,
                            optimizer=optimizer_model,
                            path=path_ckpt,
                            prefix=run_name + '_lr',
                            interval=1,
                            save_num=n_save_epoch,
                            loss0=auc_last)
 train_hist = History(name='train_hist' + run_name)
 validation_hist = History(name='validation_hist' + run_name)
 if start:
     # ---------  Training logs before start training -----------------
     # model.eval()
     # logisticReg.eval()
     with torch.no_grad():
         tot_loss, tot_acc = 0, 0
         n_batches = len(train_loader)
         Ptp01, Ptp05, Ptp1, AUC = np.zeros(
             n_batches // n_batch_verif), np.zeros(
                 n_batches // n_batch_verif), np.zeros(
                     n_batches // n_batch_verif), np.zeros(n_batches //
                                                           n_batch_verif)
         vs, vf, tg = [], [], []
         idx = -1
예제 #18
0
    def __init__(self, view):
        gtk.EventBox.__init__(self)
        self._view = view

        hbox = gtk.HBox(False, 3)
        hbox.show()
        hbox.set_border_width(3)

        self._entry = gtk.Entry()
        self._entry.modify_font(self._view.style.font_desc)
        self._entry.set_has_frame(False)
        self._entry.set_name('command-bar')
        self._entry.modify_text(gtk.STATE_NORMAL,
                                self._view.style.text[gtk.STATE_NORMAL])
        self._entry.set_app_paintable(True)

        self._entry.connect('realize', self.on_realize)
        self._entry.connect('expose-event', self.on_entry_expose)

        self._entry.show()

        self._prompt_label = gtk.Label('<b>&gt;&gt;&gt;</b>')
        self._prompt_label.set_use_markup(True)
        self._prompt_label.modify_font(self._view.style.font_desc)
        self._prompt_label.show()
        self._prompt_label.modify_fg(gtk.STATE_NORMAL,
                                     self._view.style.text[gtk.STATE_NORMAL])

        self.modify_bg(gtk.STATE_NORMAL, self.background_gdk())
        self._entry.modify_base(gtk.STATE_NORMAL, self.background_gdk())

        self._entry.connect('focus-out-event', self.on_entry_focus_out)
        self._entry.connect('key-press-event', self.on_entry_key_press)

        self.connect_after('size-allocate', self.on_size_allocate)
        self.connect_after('expose-event', self.on_expose)
        self.connect_after('realize', self.on_realize)

        self._history = History(
            os.path.expanduser('~/.config/pluma/commander/history'))
        self._prompt = None

        self._accel_group = None

        hbox.pack_start(self._prompt_label, False, False, 0)
        hbox.pack_start(self._entry, True, True, 0)

        self.add(hbox)
        self.attach()

        self._entry.grab_focus()
        self._wait_timeout = 0
        self._info_window = None

        self.connect('destroy', self.on_destroy)

        self._history_prefix = None
        self._suspended = None
        self._handlers = [[0, gtk.keysyms.Up, self.on_history_move, -1],
                          [0, gtk.keysyms.Down, self.on_history_move, 1],
                          [None, gtk.keysyms.Return, self.on_execute, None],
                          [None, gtk.keysyms.KP_Enter, self.on_execute, None],
                          [0, gtk.keysyms.Tab, self.on_complete, None],
                          [
                              0, gtk.keysyms.ISO_Left_Tab, self.on_complete,
                              None
                          ]]

        self._re_complete = re.compile(
            '("((?:\\\\"|[^"])*)"?|\'((?:\\\\\'|[^\'])*)\'?|[^\s]+)')
        self._command_state = commands.Commands.State()
예제 #19
0
    def __init__(self, config):

        #init replay memory
        self.session = tf.Session()
        self.config = config
        #init parameters
        self.timeStep = 0
        self.stateInput = tf.placeholder(tf.int32,
                                         [None, self.config.seq_length])
        self.data = {}
        self.history = [History(), History(), History()]
        self.BATCH_SIZE = 256

        #set config.final_vocab_size manually
        embed = tf.Variable(tf.random_uniform(
            [self.config.final_vocab_size, self.config.embed_dim], -1.0, 1.0),
                            name="embed")

        word_embeds = tf.nn.embedding_lookup(embed, self.stateInput)
        self.initializer = tf.truncated_normal_initializer(stddev=0.02)
        self.cell = tf.nn.rnn_cell.LSTMCell(self.config.rnn_size,
                                            initializer=self.initializer,
                                            state_is_tuple=True)
        initial_state = self.cell.zero_state(self.BATCH_SIZE, tf.float32)
        outputs, _ = tf.nn.rnn(self.cell, [
            tf.reshape(embed_t, [-1, self.config.embed_dim])
            for embed_t in tf.split(1, self.config.seq_length, word_embeds)
        ],
                               dtype=tf.float32,
                               initial_state=initial_state,
                               scope="LSTMN")
        self.output_embed = tf.transpose(tf.pack(outputs), [1, 0, 2])
        self.mean_pool = tf.reduce_mean(self.output_embed, 1)
        linear_output = tf.nn.relu(
            tf.nn.rnn_cell._linear(self.mean_pool,
                                   int(self.output_embed.get_shape()[2]),
                                   1.0,
                                   0.01,
                                   scope="linearN"))

        linear_output_21 = tf.nn.relu(
            tf.nn.rnn_cell._linear(linear_output,
                                   int(self.output_embed.get_shape()[2]),
                                   1.0,
                                   0.01,
                                   scope="linearN21"))
        linear_output_22 = tf.nn.relu(
            tf.nn.rnn_cell._linear(linear_output,
                                   int(self.output_embed.get_shape()[2]),
                                   1.0,
                                   0.01,
                                   scope="linearN22"))
        linear_output_23 = tf.nn.relu(
            tf.nn.rnn_cell._linear(linear_output,
                                   int(self.output_embed.get_shape()[2]),
                                   1.0,
                                   0.01,
                                   scope="linearN23"))

        #we calculate the Q values. For the Student Network
        self.action_value_1 = tf.nn.rnn_cell._linear(linear_output_21,
                                                     self.config.num_actions,
                                                     1.0,
                                                     0.01,
                                                     scope="actionN1")
        self.object_value_1 = tf.nn.rnn_cell._linear(linear_output_21,
                                                     self.config.num_objects,
                                                     1.0,
                                                     0.01,
                                                     scope="objectN1")

        self.action_value_2 = tf.nn.rnn_cell._linear(linear_output_22,
                                                     self.config.num_actions,
                                                     1.0,
                                                     0.01,
                                                     scope="actionN2")
        self.object_value_2 = tf.nn.rnn_cell._linear(linear_output_22,
                                                     self.config.num_objects,
                                                     1.0,
                                                     0.01,
                                                     scope="objectN2")

        self.action_value_3 = tf.nn.rnn_cell._linear(linear_output_23,
                                                     self.config.num_actions,
                                                     1.0,
                                                     0.01,
                                                     scope="actionN3")
        self.object_value_3 = tf.nn.rnn_cell._linear(linear_output_23,
                                                     self.config.num_objects,
                                                     1.0,
                                                     0.01,
                                                     scope="objectN3")

        #here we will input the teachers q value
        self.target_action_value = tf.placeholder(
            tf.float32, [None, self.config.num_actions])
        self.target_object_value = tf.placeholder(
            tf.float32, [None, self.config.num_objects])

        #here we calculate the probabilities for the teacher network
        self.target_action_prob = tf.nn.softmax(
            tf.truediv(self.target_action_value, self.config.temperature))
        self.target_object_prob = tf.nn.softmax(
            tf.truediv(self.target_object_value, self.config.temperature))

        #here we calculate the probabilities for the student network
        self.pred_action_prob_1 = tf.nn.softmax(self.action_value_1)
        self.pred_object_prob_1 = tf.nn.softmax(self.object_value_1)

        self.pred_action_prob_2 = tf.nn.softmax(self.action_value_2)
        self.pred_object_prob_2 = tf.nn.softmax(self.object_value_2)

        self.pred_action_prob_3 = tf.nn.softmax(self.action_value_3)
        self.pred_object_prob_3 = tf.nn.softmax(self.object_value_3)

        entropy_action = -tf.reduce_sum(
            self.target_action_prob * tf.log(self.target_action_prob),
            reduction_indices=[1])
        entropy_object = -tf.reduce_sum(
            self.target_object_prob * tf.log(self.target_object_prob),
            reduction_indices=[1])

        cross_entropy_action_1 = -tf.reduce_sum(
            self.target_action_prob * tf.log(self.pred_action_prob_1),
            reduction_indices=[1])
        cross_entropy_object_1 = -tf.reduce_sum(
            self.target_object_prob * tf.log(self.pred_object_prob_1),
            reduction_indices=[1])

        cross_entropy_action_2 = -tf.reduce_sum(
            self.target_action_prob * tf.log(self.pred_action_prob_2),
            reduction_indices=[1])
        cross_entropy_object_2 = -tf.reduce_sum(
            self.target_object_prob * tf.log(self.pred_object_prob_2),
            reduction_indices=[1])

        cross_entropy_action_3 = -tf.reduce_sum(
            self.target_action_prob * tf.log(self.pred_action_prob_3),
            reduction_indices=[1])
        cross_entropy_object_3 = -tf.reduce_sum(
            self.target_object_prob * tf.log(self.pred_object_prob_3),
            reduction_indices=[1])

        self.kl_divergence_1 = tf.reduce_mean(
            0.5 * (cross_entropy_action_1 - entropy_action +
                   cross_entropy_object_1 - entropy_object))

        self.kl_divergence_2 = tf.reduce_mean(
            0.5 * (cross_entropy_action_2 - entropy_action +
                   cross_entropy_object_2 - entropy_object))

        self.kl_divergence_3 = tf.reduce_mean(
            0.5 * (cross_entropy_action_3 - entropy_action +
                   cross_entropy_object_3 - entropy_object))

        self.optim_1 = tf.train.AdamOptimizer(
            learning_rate=self.config.LEARNING_RATE).minimize(
                self.kl_divergence_1)
        self.optim_2 = tf.train.AdamOptimizer(
            learning_rate=self.config.LEARNING_RATE).minimize(
                self.kl_divergence_2)
        self.optim_3 = tf.train.AdamOptimizer(
            learning_rate=self.config.LEARNING_RATE).minimize(
                self.kl_divergence_3)

        self.summary_placeholders = {}
        self.summary_ops = {}

        tags = [
            'average_reward', 'average_numrewards', 'number_of_episodes',
            'quest1_average_reward_cnt'
        ]
        scalar_summary_tags = []
        for i in range(1, 4):
            scalar_summary_tags.append([tag + str(i) for tag in tags])

        for i in range(3):
            for tag in scalar_summary_tags[i]:
                self.summary_placeholders[tag] = tf.placeholder(
                    'float32', None, name=tag.replace(' ', '_'))
                self.summary_ops[tag] = tf.scalar_summary(
                    'evaluation_data/' + tag, self.summary_placeholders[tag])

        self.saver = tf.train.Saver()
        self.train_writer = tf.train.SummaryWriter(
            self.config.summaries_dir + '/train/' + str(self.config.game_num),
            self.session.graph)
        if not (self.config.LOAD_WEIGHTS and self.load_weights()):
            self.session.run(tf.initialize_all_variables())
예제 #20
0
def test_md_history():
    yield _md, History(':memory:')
예제 #21
0
import utils

# hyperparameters
num_epochs = 10000
batch_size = 128
lr = 1e-4
beta = 4
save_iter = 200

shape = (28, 28)
n_obs = shape[0] * shape[1]

# create DAE and ß-VAE and their training history
dae = DAE(n_obs, num_epochs, batch_size, 1e-3, save_iter, shape)
beta_vae = BetaVAE(n_obs, num_epochs, batch_size, 1e-4, beta, save_iter, shape)
history = History()

# fill autoencoder training history with examples
print('Filling history...', end='', flush=True)

transformation = transforms.Compose([
    transforms.ColorJitter(),
    transforms.ToTensor()
])

dataset = MNIST('data', transform=transformation, download=True)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

for data in dataloader:
    img, _ = data
    img = img.view(img.size(0), -1).numpy().tolist()
    def __init__(self, config):

        self.dic  =  pickle.load(open("embedTeacher"+str(config.game_num)+".p","rb"))
        #init replay memory
        conf = tf.ConfigProto()
        conf.gpu_options.allow_growth=True
        self.session = tf.Session(config=conf)

        self.config = config

        self.memory = self.load_replay_memory(config)
        self.history = History()
        #init parameters
        self.timeStep = 0
        self.epsilon = config.INITIAL_EPSILON

        # self.stateInput = tf.placeholder(tf.int32, [None, self.config.seq_length,self.config.embed_dim])
        # self.stateInputT = tf.placeholder(tf.int32, [None, self.config.seq_length,self.config.embed_dim])

        self.stateInput = tf.placeholder(tf.float32, [None, self.config.seq_length,self.config.embed_dim])
        self.stateInputT = tf.placeholder(tf.float32, [None, self.config.seq_length,self.config.embed_dim])

        # self.stateInput = tf.placeholder(tf.int32, [self.config.seq_length, self.config.BATCH_SIZE, self.config.embed_dim])
        # self.stateInputT = tf.placeholder(tf.int32, [self.config.seq_length, self.config.BATCH_SIZE, self.config.embed_dim])

        self.word_embeds = self.stateInput
        self.word_embedsT = self.stateInputT
        # print '$'*100
        self.initializer = tf.truncated_normal_initializer(stddev = 0.02)
        # self.initializer = tf.random_uniform_initializer(minval=-1.0, maxval=1.0, seed=None, dtype=tf.float32)
        # self.initializer = tf.contrib.layers.xavier_initializer()
        # print '$'*100
        self.cell = tf.nn.rnn_cell.LSTMCell(self.config.rnn_size, initializer = self.initializer, state_is_tuple=True)
        self.cellT = tf.nn.rnn_cell.LSTMCell(self.config.rnn_size, initializer = self.initializer, state_is_tuple=True)
        # print '$'*100
        initial_state = self.cell.zero_state(self.config.BATCH_SIZE, tf.float32)
        initial_stateT = self.cellT.zero_state(self.config.BATCH_SIZE, tf.float32)
        # print '$'*100
        # early_stop = tf.constant(self.config.seq_length, dtype = tf.int32)
        # print '$'*100
        outputs, _ = tf.nn.rnn(self.cell, [tf.reshape(embed_t, [-1, self.config.embed_dim]) for embed_t in tf.split(1, self.config.seq_length, self.word_embeds)], dtype=tf.float32, initial_state = initial_state, scope = "LSTMN")
        outputsT, _ = tf.nn.rnn(self.cellT, [tf.reshape(embed_tT, [-1, self.config.embed_dim]) for embed_tT in tf.split(1, self.config.seq_length, self.word_embedsT)], dtype=tf.float32, initial_state = initial_stateT, scope = "LSTMT")
        # outputs, _ = tf.nn.rnn(self.cell, self.word_embeds, dtype=tf.float32, initial_state = initial_state, scope = "LSTMN")
        # outputsT, _ = tf.nn.rnn(self.cellT, self.word_embedsT, dtype=tf.float32, initial_state = initial_stateT, scope = "LSTMT")
        # print '$'*100
        self.output_embed = tf.transpose(tf.pack(outputs), [1, 0, 2])
        self.output_embedT = tf.transpose(tf.pack(outputsT), [1, 0, 2])
        # print '$'*100
        mean_pool = tf.reduce_mean(self.output_embed, 1)
        mean_poolT = tf.reduce_mean(self.output_embedT, 1)
        # print '$'*100
        linear_output = tf.nn.relu(tf.nn.rnn_cell._linear(mean_pool, int(self.output_embed.get_shape()[2]), 1,0.01, scope="linearN"))
        linear_outputT = tf.nn.relu(tf.nn.rnn_cell._linear(mean_poolT, int(self.output_embedT.get_shape()[2]),1, 0.01, scope="linearT"))
        # print '$'*100

        self.action_value = tf.nn.rnn_cell._linear(linear_output, self.config.num_actions, 1,0.01, scope="actionN")
        self.action_valueT = tf.nn.rnn_cell._linear(linear_outputT, self.config.num_actions, 1,0.01, scope="actionT")

        self.object_value = tf.nn.rnn_cell._linear(linear_output, self.config.num_objects, 1,0.01, scope="objectN")
        self.object_valueT = tf.nn.rnn_cell._linear(linear_outputT, self.config.num_objects, 1,0.01, scope="objectT")

        self.target_action_value = tf.placeholder(tf.float32, [None])
        self.target_object_value = tf.placeholder(tf.float32, [None])

        self.action_indicator = tf.placeholder(tf.float32, [None, self.config.num_actions])
        self.object_indicator = tf.placeholder(tf.float32, [None, self.config.num_objects])

        self.pred_action_value = tf.reduce_sum(tf.mul(self.action_indicator, self.action_value), 1)
        self.pred_object_value = tf.reduce_sum(tf.mul(self.object_indicator, self.object_value), 1)

        self.target_qpred = tf.truediv(tf.add(self.target_action_value,self.target_object_value),2.0)

        # self.qpred = tf.truediv(tf.add(self.pred_action_value,self.pred_object_value),2.0)

        summary_list = []
        with tf.name_scope('delta'):
            # self.delta_a = self.target_action_value - self.pred_action_value
            # self.delta_o = self.target_object_value - self.pred_object_value
            self.delta_a = self.target_qpred - self.pred_action_value
            self.delta_o = self.target_qpred - self.pred_object_value
            self.variable_summaries(self.delta_a, 'delta_a',summary_list)
            self.variable_summaries(self.delta_o, 'delta_o',summary_list)
            # self.delta = self.target_qpred - self.qpred
            # self.variable_summaries(self.delta, 'delta',summary_list)

        if self.config.clipDelta:
                with tf.name_scope('clippeddelta'):
                    # self.delta = tf.clip_by_value(self.delta, self.config.minDelta, self.config.maxDelta, name='clipped_delta')

                    self.quadratic_part_a = tf.minimum(abs(self.delta_a), config.maxDelta)
                    self.linear_part_a = abs(self.delta_a) - self.quadratic_part_a


                    self.quadratic_part_o = tf.minimum(abs(self.delta_o), config.maxDelta)
                    self.linear_part_o = abs(self.delta_o) - self.quadratic_part_o

                    self.quadratic_part = tf.concat(0,[self.quadratic_part_a,self.quadratic_part_o])
                    self.linear_part = tf.concat(0,[self.linear_part_a,self.linear_part_o])

                    # self.quadratic_part = tf.minimum(abs(self.delta), config.maxDelta)
                    # self.linear_part = abs(self.delta) - self.quadratic_part

                    # self.variable_summaries(self.delta, 'clippeddelta',summary_list)

                    # self.variable_summaries(self.linear_part_a, 'linear_part_a',summary_list)
                    # self.variable_summaries(self.quadratic_part_a, 'quadratic_part_a',summary_list)

                    # self.variable_summaries(self.linear_part_o, 'linear_part_o',summary_list)
                    # self.variable_summaries(self.quadratic_part_o, 'quadratic_part_o',summary_list)

                    self.variable_summaries(self.linear_part, 'linear_part',summary_list)
                    self.variable_summaries(self.quadratic_part, 'quadratic_part',summary_list)





        with tf.name_scope('loss'):
            #self.loss = 0.5*tf.reduce_mean(tf.square(self.delta), name='loss')
            # self.loss_a = tf.reduce_mean(0.5*tf.square(self.quadratic_part_a) + config.clipDelta * self.linear_part_a, name='loss_a')
            # self.variable_summaries(self.loss_a, 'loss_a',summary_list)

            # self.loss_o = tf.reduce_mean(0.5*tf.square(self.quadratic_part_o) + config.clipDelta * self.linear_part_o, name='loss_o')
            # self.variable_summaries(self.loss_o, 'loss_o',summary_list)

            self.loss = tf.reduce_mean(0.5*tf.square(self.quadratic_part) + config.clipDelta * self.linear_part, name='loss')
            self.variable_summaries(self.loss, 'loss',summary_list)

        self.W = ["LSTMN", "linearN", "actionN", "objectN"]
        self.target_W = ["LSTMT", "linearT", "actionT", "objectT"]

        # for i in range(len(self.W)):
        #     vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = self.W[i])
        #     varsT = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = self.target_W[i])

        #     with tf.name_scope('activationsN'):
        #         summary_list.extend(map(lambda x:tf.histogram_summary('activations/'+str(x.name), x), vars))
        #     with tf.name_scope('activationsT'):
        #         summary_list.extend(map(lambda x:tf.histogram_summary('activations/'+str(x.name), x), varsT))

        self.summary_placeholders = {}
        self.summary_ops = {}
        if self.config.TUTORIAL_WORLD:
            scalar_summary_tags = ['average.q_a','average.q_o','average.q','average_reward','average_numrewards','number_of_episodes','quest1_average_reward_cnt', \
                    'quest2_average_reward_cnt','quest3_average_reward_cnt']
        else:
            scalar_summary_tags = ['average.q_a','average.q_o','average.q','average_reward','average_numrewards','number_of_episodes','quest1_average_reward_cnt']

        for tag in scalar_summary_tags:
            self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag.replace(' ', '_'))
            self.summary_ops[tag]  = tf.scalar_summary('evaluation_data/'+tag, self.summary_placeholders[tag])

        # Clipping gradients

        # self.optim_ = tf.train.RMSPropOptimizer(learning_rate = self.config.LEARNING_RATE)
        # tvars = tf.trainable_variables()
        # def ClipIfNotNone(grad,var):
        #     if grad is None:
        #         return (grad, var)
        #     return (tf.clip_by_norm(grad,10), var)
        # grads = [ClipIfNotNone(i,var) for i,var in self.optim_.compute_gradients(self.loss, tvars)]

        # self.optim = self.optim_.apply_gradients(grads)
        # self.optim = tf.train.RMSPropOptimizer(learning_rate = self.config.LEARNING_RATE).minimize(self.loss_a + self.loss_o)
        # self.optim = tf.train.RMSPropOptimizer(learning_rate = self.config.LEARNING_RATE).minimize(self.loss)
        # self.optim = tf.train.AdagradOptimizer(learning_rate = self.config.LEARNING_RATE).minimize(self.loss)
        # self.optim_a = tf.train.AdagradOptimizer(learning_rate = self.config.LEARNING_RATE).minimize(self.loss_a)
        # self.optim_o = tf.train.AdagradOptimizer(learning_rate = self.config.LEARNING_RATE).minimize(self.loss_o)
        # self.optim1 = tf.train.AdamOptimizer(learning_rate = self.config.LEARNING_RATE).minimize(self.loss_a)
        # self.optim2 = tf.train.AdamOptimizer(learning_rate = self.config.LEARNING_RATE).minimize(self.loss_o)

        self.optim = tf.train.AdamOptimizer(learning_rate = self.config.LEARNING_RATE).minimize(self.loss)
        self.saver = tf.train.Saver()

        if not(self.config.LOAD_WEIGHTS and self.load_weights()):
            self.session.run(tf.initialize_all_variables())

        # self.merged = tf.merge_all_summaries()
        self.merged = tf.merge_summary(summary_list)
        self.train_writer = tf.train.SummaryWriter(self.config.summaries_dir + '/train/'+str(self.config.game_num),self.session.graph)

        self.copyTargetQNetworkOperation()
예제 #23
0
from torch.utils.tensorboard import SummaryWriter
from albumentations import *
import cv2
import json
from pycocotools.cocoeval import COCOeval

try:
    from apex import amp
    APEX = True
except ModuleNotFoundError:
    APEX = False

if __name__ == '__main__':
    opt = opts().parse()
    logger = TrainingManager(opt.save_dir)
    history = History(opt.save_dir, opt.resume)
    writer = SummaryWriter()
    torch.backends.cudnn.benchmark = True
    print(opt)
    transforms = {
        "train":
        Compose(
            [
                ShiftScaleRotate(rotate_limit=90,
                                 scale_limit=(-0.35, 0.3),
                                 border_mode=cv2.BORDER_CONSTANT),
                PadIfNeeded(min_height=512,
                            min_width=512,
                            border_mode=cv2.BORDER_CONSTANT,
                            always_apply=True),
                RandomCrop(512, 512, always_apply=True),
예제 #24
0
import sys; print('%s %s' % (sys.executable or sys.platform, sys.version))
{0:,} .format(2**100)
{0:,} .format(2**100)
{0:,} .format(2**100)
{0:,} .format(2**100)
{0:,} .format(2**100)
{0:,} .format(2**100)
print('hello)
print('hello')
import sys; print('%s %s' % (sys.executable or sys.platform, sys.version))
from history import History
x = History()
x.a = 1
import sys; print('%s %s' % (sys.executable or sys.platform, sys.version))
from history import History
x = History()
x.history_dict
import sys; print('%s %s' % (sys.executable or sys.platform, sys.version))
from bag import Bag
b2 = Bag(['b','d','a','b','d','c','d'])
sorted(b2)
[i for i in b2]
b2.bag
iter
iter(b2)
Bag.__iter__(b2)
import sys; print('%s %s' % (sys.executable or sys.platform, sys.version))
from bag import Bag
b2 = Bag(['a','b','b','c','d','d','d'])
iter(b2)
sorted(b)
def weighted_dual_averages_method(oracle, prox, primal_dual_oracle,
                                  t_start, max_iter = 1000,
                                  eps = 1e-5, eps_abs = None, stop_crit = 'dual_gap_rel',
                                  verbose_step = 100, verbose = False, save_history = False):
    if stop_crit == 'dual_gap_rel':
        def crit():
            return duality_gap <= eps * duality_gap_init
    elif stop_crit == 'dual_gap':
        def crit():
            return duality_gap <= eps_abs
    elif stop_crit == 'max_iter':
        def crit():
            return it_counter == max_iter
    elif callable(stop_crit):
        crit = stop_crit
    else:
        raise ValueError("stop_crit should be callable or one of the following names: \
                         'dual_gap', 'dual_gap_rel', 'max iter'")
    
    A = 0.0
    t = np.copy(t_start)
    grad_sum = np.zeros(len(t_start))
    beta_seq = 1.0
    rho_wda = np.sqrt(2) * np.linalg.norm(t_start)

    flows_weighted = primal_dual_oracle.get_flows(t_start)
    t_weighted = np.copy(t_start)
    primal, dual, duality_gap_init, state_msg = primal_dual_oracle(flows_weighted, t_weighted)
    if save_history:
        history = History('iter', 'primal_func', 'dual_func', 'dual_gap')
        history.update(0, primal, dual, duality_gap_init)
    if verbose:
        print(state_msg)
    
    success = False
    
    for it_counter in range(1, max_iter+1):
        grad_t = oracle.grad(t)
        flows = primal_dual_oracle.get_flows(t) #grad() is called here
        alpha = 1 / np.linalg.norm(grad_t)
        A += alpha
        grad_sum += alpha * grad_t
        
        beta_seq = 1 if it_counter == 1 else beta_seq + 1.0 / beta_seq
        beta = beta_seq / rho_wda
        t = prox(grad_sum / A, t_start, beta / A)

        t_weighted = (t_weighted * (A - alpha) + t * alpha) / A
        flows_weighted = (flows_weighted * (A - alpha) + flows * alpha) / A
        
        primal, dual, duality_gap, state_msg = primal_dual_oracle(flows_weighted, t_weighted)
        if save_history:
            history.update(it_counter, primal, dual, duality_gap)
        if verbose and (it_counter % verbose_step == 0):
            print('\nIterations number: {:d}'.format(it_counter))
            print(state_msg, flush = True)
        if crit():
            success = True
            break
            
    result = {'times': t_weighted, 'flows': flows_weighted,
              'iter_num': it_counter,
              'res_msg': 'success' if success else 'iterations number exceeded'}
    if save_history:
        result['history'] = history.dict
    if verbose:
        print('\nResult: ' + result['res_msg'])
        print('Total iters: ' + str(it_counter))
        print(state_msg)
        print('Oracle elapsed time: {:.0f} sec'.format(oracle.time))
    return result
예제 #26
0
        t.optim.Adam,
        nn.MSELoss(reduction="sum"),
        actor_learning_rate=1e-5,
        critic_learning_rate=1e-4,
    )

    episode, step, reward_fulfilled = 0, 0, 0
    smoothed_total_reward = 0

    while episode < max_episodes:
        episode += 1
        total_reward = 0
        terminal = False
        step = 0
        state = convert(env.reset())
        history = History(history_depth, (1, 128))

        tmp_observations = []
        while not terminal:
            step += 1
            with t.no_grad():
                history.append(state)
                # agent model inference
                action = ppo.act({"mem": history.get()})[0]
                state, reward, terminal, _ = env.step(action.item())
                state = convert(state)
                total_reward += reward

                old_history = history.get()
                new_history = history.append(state).get()
                tmp_observations.append({
예제 #27
0
def main(**kwargs):
    config = C(
        horizon=32,
        discount=0.99,
        num_episodes=100000,
        num_workers=8,
        e_greedy_eps=0.9,
    )
    for k in kwargs:
        config[k] = kwargs[k]

    writer = SummaryWriter(config.experiment_path)

    env = VecEnv([build_env for _ in range(config.num_workers)])
    env = wrappers.TensorboardBatchMonitor(env,
                                           writer,
                                           log_interval=100,
                                           fps_mul=0.5)
    env = wrappers.Torch(env)

    model = Agent(env.observation_space, env.action_space)
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    1e-4 * config.num_workers)

    episode = 0
    pbar = tqdm(total=config.num_episodes)

    obs = env.reset()
    state = model.zero_state(config.num_workers)

    while episode < config.num_episodes:
        history = History()
        state = tuple(x.detach() for x in state)

        for i in range(config.horizon):
            transition = history.append_transition()

            action_value, state_prime = model(obs, state)
            action = select_action(action_value, eps=config.e_greedy_eps)
            transition.record(
                action_value_i=select_action_value(action_value, action))

            obs_prime, reward, done, info = env.step(action)
            transition.record(reward=reward, done=done)
            state_prime = model.reset_state(state_prime, done)

            obs, state = obs_prime, state_prime

            for i in info:
                if "episode" not in i:
                    continue
                episode += 1
                writer.add_scalar("episode/return",
                                  i["episode"]["r"],
                                  global_step=episode)
                writer.add_scalar("episode/length",
                                  i["episode"]["l"],
                                  global_step=episode)
                pbar.update()

        rollout = history.build()

        action_value_prime, _ = model(obs_prime, state_prime)
        action_prime = select_action(action_value_prime,
                                     eps=config.e_greedy_eps)

        return_ = n_step_bootstrapped_return(
            reward_t=rollout.reward,
            value_prime=select_action_value(action_value_prime,
                                            action_prime).detach(),
            done_t=rollout.done,
            discount=config.discount,
        )

        td_error = rollout.action_value_i - return_
        loss = td_error.pow(2)

        optimizer.zero_grad()
        loss.mean().backward()
        optimizer.step()

        writer.add_scalar("rollout/action_value_i",
                          rollout.action_value_i.mean(),
                          global_step=episode)
        writer.add_scalar("rollout/td_error",
                          td_error.mean(),
                          global_step=episode)
        writer.add_scalar("rollout/loss", loss.mean(), global_step=episode)

    env.close()
    writer.close()
예제 #28
0
 def testName(self):
     history = History("./tmp/test.db")
     builder = BuilderTpp(history)
     builder.build()
예제 #29
0
def main():
    parser = argparse.ArgumentParser(description='SM Room Timer')
    parser.add_argument('-f', '--file', dest='filename', default=None)
    parser.add_argument('--rooms', dest='rooms_filename', default='rooms.json')
    parser.add_argument('--doors', dest='doors_filename', default='doors.json')
    parser.add_argument('--debug', dest='debug', action='store_true')
    parser.add_argument('--debug-log', dest='debug_log_filename')
    parser.add_argument('--verbose', dest='verbose', action='store_true')
    parser.add_argument('--usb2snes', action='store_true')
    parser.add_argument('--route', action='store_true')
    parser.add_argument('--rebuild', action='store_true')
    # parser.add_argument('--segment', action='append', required=True)
    args = parser.parse_args()

    rooms = Rooms.read(args.rooms_filename)
    doors = Doors.read(args.doors_filename, rooms)
    route = Route() if args.route else DummyRoute()

    if args.filename and need_rebuild(args.filename):
        if not args.rebuild:
            print(
                "File needs to be rebuilt before it can be used; run rebuild_history.py or pass --rebuild to this script."
            )
            sys.exit(1)

        backup_and_rebuild(rooms, doors, args.filename)

    if args.debug_log_filename:
        debug_log = open(args.debug_log_filename, 'a')
        verbose = True
    elif args.debug:
        debug_log = sys.stdout
        verbose = True
    else:
        debug_log = None
        verbose = args.verbose

    frontend = SegmentTimerTerminalFrontend(verbose=verbose,
                                            debug_log=debug_log)

    if args.filename is not None and os.path.exists(args.filename):
        history = read_transition_log(args.filename, rooms, doors)
    else:
        history = History()

    for tid in history:
        route.record(tid)
        if route.complete: break

    print('Route is %s' % ('complete' if route.complete else 'incomplete'))

    transition_log = FileTransitionLog(
        args.filename) if args.filename is not None else NullTransitionLog()

    tracker = SegmentTimeTracker(history,
                                 transition_log,
                                 route,
                                 on_new_room_time=frontend.new_room_time)

    state_reader = ThreadedStateReader(rooms,
                                       doors,
                                       usb2snes=args.usb2snes,
                                       logger=frontend)
    state_reader.start()

    try:
        timer = SegmentTimer(frontend,
                             state_reader,
                             on_transitioned=tracker.transitioned,
                             on_state_change=frontend.state_changed,
                             on_reset=tracker.room_reset)

        while state_reader.is_alive():
            timer.poll()

    finally:
        state_reader.stop()
예제 #30
0
tr.add('dyn.lf','lf')
tr.add('dyn.rf','rf')

tr.start()
robot.after.addSignal('tr.triger')
robot.after.addSignal(contactLF.task.name+'.error')
robot.after.addSignal('dyn.rf')
robot.after.addSignal('dyn.lf')
robot.after.addSignal('dyn.com')
robot.after.addSignal('sot.forcesNormal')
robot.after.addSignal('dyn.waist')

robot.after.addSignal('taskLim.normalizedPosition')
tr.add('taskLim.normalizedPosition','qn')

history = History(dyn,1,zmp.zmp)

#-----------------------------------------------------------------------------
# --- RUN --------------------------------------------------------------------
#-----------------------------------------------------------------------------

RADIUS = (0.35,-0.2)
#RADIUS = (0.4,-0.42) # WARNING: this version induce a collision of the hips.


q0 = robot.state.value

sot.clear()

contact(contactLF)
contact(contactRF)