예제 #1
0
def query_tab_names():
    """
    Create Tabs from maya-type sTypes
    """
    search_type = 'sthpw/search_object'
    if env.Mode().get == 'standalone':
        filters = [('type', env.Env().get_types_list()), ('namespace', env.Env().get_namespace())]
    else:
        filters = [('type', env.Mode().get), ('namespace', env.Env().get_namespace())]

    assets = server_query(search_type, filters)

    out_tabs = {
        'names': [],
        'codes': [],
        'layouts': [],
        'colors': [],
    }
    if assets:
        for asset in assets:
            asset_get = asset.get
            out_tabs['names'].append(asset_get('title'))
            out_tabs['codes'].append(asset_get('code'))
            out_tabs['layouts'].append(asset_get('layout'))
            out_tabs['colors'].append(asset_get('color'))

    return out_tabs
예제 #2
0
def generate_skey(pipeline_code=None, code=None):
    skey = 'skey://{0}/{1}?project={2}&code={3}'.format(env.Env().get_namespace(),
                                                        pipeline_code,
                                                        env.Env().get_project(),
                                                        code)

    return skey
예제 #3
0
    def slide_images(self, value):
        image_path_icon = '{0}/{1}/{2}'.format(
            env.Env().get_asset_dir(),
            self.icon_list[value - 1]['relative_dir'],
            self.icon_list[value - 1]['file_name'])
        if not self.playblast:
            image_path_big = '{0}/{1}/{2}'.format(
                env.Env().get_asset_dir(),
                self.main_list[value - 1]['relative_dir'],
                self.main_list[value - 1]['file_name'])

        self.preview_image = QtGui.QImage(0, 0, QtGui.QImage.Format_ARGB32)
        if not self.external:
            self.preview_image.load(image_path_big)
        else:
            self.preview_image.load(image_path_icon)
        self.preview_pixmap = QtGui.QPixmap.fromImage(
            self.preview_image).scaled(self.size(), QtCore.Qt.KeepAspectRatio,
                                       QtCore.Qt.SmoothTransformation)

        self.scene = QtGui.QGraphicsScene(self)

        self.scene.addPixmap(self.preview_pixmap)

        self.previewGraphicsView.setScene(self.scene)
        self.previewGraphicsView.fitInView(self.scene.sceneRect(),
                                           QtCore.Qt.KeepAspectRatio)
        if self.playblast:
            return image_path_icon
        else:
            return image_path_big
예제 #4
0
def server_auth(host, project, login, password):
    tactic_srv = tactic_client_lib.TacticServerStub.get(setup=False)
    srv = host
    prj = project
    tactic_srv.set_server(srv)
    tactic_srv.set_project(prj)
    log = login
    psw = password
    ticket = env.Env().get_ticket()
    if not ticket:
        ticket = tactic_srv.get_ticket(log, psw)
        env.Env().set_ticket(ticket)
    tactic_srv.set_ticket(ticket)
    return tactic_srv
예제 #5
0
def context_query(process):
    """
    Query for Context elements
    Creating one list of lists, to reduce count of queries to the server
    :param process - list of tab names (vfx/asset)
    """

    search_type = 'sthpw/pipeline'

    filters = [('search_type', process), ('project_code', env.Env().get_project())]
    assets = server_query(search_type, filters)

    if assets:
        # TODO may be worth it to simplify this
        contexts = collections.OrderedDict()

        for proc in process:
            contexts[proc] = []
        items = contexts.copy()

        for context in contexts:
            for asset in assets:
                if context == asset['search_type']:
                    contexts[context] = Et.fromstring(asset['pipeline'].encode('utf-8'))

        for key, val in contexts.iteritems():
            if len(val):
                for element in val.iter('process'):
                    items[key].append(element.attrib['name'])

        return items
예제 #6
0
def test(actor):
    env = environment.Env()
    state_size = env.state_size
    action_size = env.action_size
    env.reset()
    #  env.R.body.wb = np.array([[1.], [-1.], [1.]])
    #  env.R.body.vs = np.array([[0.], [2.], [1.]])
    for t in range(int(endtime / env.R.dtime)):
        action = actor(np.reshape(env.state, [1, env.state_size]),
                       batch_size=1)
        #  np.zeros((12), dtype=np.float64)
        next_state, reward, done = env.step(action)
        points = np.concatenate([
            np.reshape(env.R.body.Rnow, (4, 3)),
            np.reshape(env.R.joints, (12, 3))
        ],
                                axis=0)
        if (t == 0):
            plt.show()
        if (done == 1):
            print(done)
            break
        plot_robot(points)
        plt.pause(0.01)
        ax.clear()
예제 #7
0
def theta2plot(theta):  # theta: np.array([timeN, 12])
    env = environment.Env()
    state_size = env.state_size
    action_size = env.action_size
    before_theta = np.zeros(12)
    for p in range(env.R.numLeg):
        for i in range(env.R.numsubleg):
            before[p * env.R.numsubleg + i] = env.R.leg[p].sub[i].theta
    omegas = np.array([theta.shape[0] - 1, theta.shape[1]])
    for t in range(theta.shape[0] - 1):
        omegas[t] = (theta[t + 1] - theta[t]) / env.R.dtime
    env.reset_theta(theta[0])
    #  env.R.body.wb = np.array([[1.], [-1.], [1.]])
    #  env.R.body.vs = np.array([[0.], [2.], [1.]])
    for t in range(int(endtime / env.R.dtime)):
        print('t = ', t)
        #  np.zeros((12), dtype=np.float64)
        next_state, reward, done = env.step(omegas[t])
        points = np.concatenate([
            np.reshape(env.R.body.Rnow, (4, 3)),
            np.reshape(env.R.joints, (12, 3))
        ],
                                axis=0)
        if (t == 0):
            plt.show()
        if (done == 1):
            print(done)
            break
        plot_robot(points)
        plt.pause(0.01)
        ax.clear()
예제 #8
0
 def readSettings(self):
     """
     Reading Settings
     """
     self.settings.beginGroup(env.Mode().get + '/ui_checkout')
     tab_name = self.objectName().split('/')
     group_path = '{0}/{1}/{2}'.format(tab_name[0],
                                       env.Env().get_project(), tab_name[1])
     self.settings.beginGroup(group_path)
     self.commentsSplitter.restoreState(
         self.settings.value('commentsSplitter'))
     self.descriptionSplitter.restoreState(
         self.settings.value('descriptionSplitter'))
     self.imagesSplitter.restoreState(self.settings.value('imagesSplitter'))
     self.searchLineEdit.setText(
         self.settings.value('searchLineEdit_text', ''))
     self.contextComboBox.setCurrentIndex(
         self.settings.value('contextComboBox', 0))
     self.add_items_to_results(self.searchLineEdit.text())
     try:
         gf.revert_expanded_state(self.resultsTreeWidget,
                                  self.settings.value(
                                      'resultsTreeWidget_isExpanded', None),
                                  expand=True)
         gf.revert_expanded_state(self.resultsTreeWidget,
                                  self.settings.value(
                                      'resultsTreeWidget_isSelected', None),
                                  select=True)
     except:
         pass
     self.settings.endGroup()
     self.settings.endGroup()
예제 #9
0
    def fill_notes(self):
        self.conversationScrollArea.close()
        self.create_scroll_area()
        self.current_user = env.Env().get_user()
        self.task_item.get_notes()
        self.widgets_list = []
        for proc in self.task_item.notes.itervalues():
            for context in proc.contexts.itervalues():
                for note in reversed(list(context.items.itervalues())):
                    if note.info['process'] == self.task_item.info['process']:
                        if note.info['login'] == self.current_user:
                            self.note_widget = Ui_outcomWidget(note, self)
                            self.lay.addWidget(self.note_widget)
                            self.widgets_list.append(self.note_widget)
                        else:
                            self.note_widget = Ui_incomWidget(note, self)
                            self.lay.addWidget(self.note_widget)
                            self.widgets_list.append(self.note_widget)

        # looks like duct tape
        self.conversationScrollArea.show()

        # TODO make scroll_to_widget using line above
        # print(self.widgets_list[-1].height())
        self.conversationScrollArea.verticalScrollBar().setValue(
            self.conversationScrollArea.verticalScrollBar().maximum())
예제 #10
0
 def perform_save(self):
     """
     Scope all Edits for save
     :return:
     """
     if self.projectInfoCodeLabel.text():
         env.Env().set_project(self.projectInfoCodeLabel.text())
         self.restart()
예제 #11
0
    def __init__(self, parent=None):
        super(self.__class__, self).__init__(parent=parent)

        self.settings = QtCore.QSettings('TACTIC Handler', 'TACTIC Handling Tool')

        self.tactic_project = env.Env().get_project()

        self.setupUi(self)

        self.readSettings()

        self.tab_actions()
예제 #12
0
	def play_games(self, agent:agent.AbstractAgent, buffer:replay_buffer.DataContainer):
		for _ in range(self.config.game_count_per_iteration):
			t = 0
			env = environment.Env()
			T = replay_buffer.Trajectory()
			while not env.terminate():
				state = env.get_state()
				action = agent.act(state, t%2)
				env.step(action)
				env.render()
				T.add(state, action)
				t += 1
			T.result = env.result()
			buffer.save_game(T)
예제 #13
0
    def query_notes(s_code, process=None):
        """
        Query for Notes
        :param s_code: Code of asset related to note
        :param process: Process code
        :return:
        """
        search_type = 'sthpw/note'
        if process:
            filters = [('search_code', s_code), ('process', process), ('project_code', env.Env().get_project())]
        else:
            filters = [('search_code', s_code), ('project_code', env.Env().get_project())]

        return server_query(search_type, filters)
예제 #14
0
def query_snapshots(process_list=None, s_code=None):
    """
    Query for snapshots belongs to asset
    :return: list of snapshots
    """
    process_codes = list(process_list)
    process_codes.extend(['icon', 'attachment', 'publish'])

    filters_snapshots = [
        ('process', process_codes),
        ('project_code', env.Env().get_project()),
        ('search_code', s_code),
    ]

    return server_start().query_snapshots(filters=filters_snapshots, include_files=True)
예제 #15
0
    def query_snapshots(s_code, process=None, user=None):
        """
        Query for Snapshots
        :param s_code: Code of asset related to snapshot
        :param process: Process code
        :param user: Optional users names
        :return:
        """
        # TODO Per users query

        if process:
            filters = [('search_code', s_code), ('process', process), ('project_code', env.Env().get_project())]
        else:
            filters = [('search_code', s_code), ('project_code', env.Env().get_project())]

        return server_start().query_snapshots(filters=filters, include_files=True)
예제 #16
0
    def query_tasks(s_code, process=None, user=None):
        """
        Query for Task
        :param s_code: Code of asset related to task
        :param process: Process code
        :param user: Optional users names
        :return:
        """
        # TODO Per users query

        search_type = 'sthpw/task'
        if process:
            filters = [('search_code', s_code), ('process', process), ('project_code', env.Env().get_project())]
        else:
            filters = [('search_code', s_code), ('project_code', env.Env().get_project())]

        return server_query(search_type, filters)
예제 #17
0
def enjoyPrius(args):
    prius = simulator.Prius(args)
    env = En.Env(args.road, args.vehicle, args.track) 

    while True:
        print(env._terminal(prius.collisions()))
        #env.testRender(pos)
        #time.sleep(0.5)
    #for i in range(0,90):
    #    pos.position.x = 42 + i/30
    #    pos.position.y = i / 30
    #    pos.orientation.x = i/90 * math.pi/2
    #    print str(pos)
    #    print(env.testRender(pos))
    #    #env.testRender(pos)
    #    time.sleep(0.05)

    #time.sleep(0.5)
    #while True:
    #    if args.mode == 0:
    #        prius.control_prius(args.value,-prius.pose().position.y/80,0)
    #    if args.mode == 1:
    #        prius.control_world()
    #        break
    #    time.sleep(0.1)
    #while True:
        #prius.control_world(False)
        #time.sleep(5)
        #print str(prius.pose())
        #if args.mode == 0:
        #    #prius.control_prius(args.value,-prius.pose().position.y/80,0)
        #    #prius.control_prius(args.value,-prius.pose().position.y/20,0)
        #    prius.control_prius(args.value,0.2,0)
        #if args.mode == 1:
        #    prius.control_world()
        #    break
        #print prius.pose().orientation.x
        #print str(prius.collisions())
        #print prius.collisions().position.x
        #print str(prius.velocity())
        print '===================='
    pass
 def writeSettings(self):
     """
     Writing Settings
     """
     self.settings.beginGroup(env.Mode().get + '/ui_checkin')
     tab_name = self.objectName().split('/')
     group_path = '{0}/{1}/{2}'.format(tab_name[0],
                                       env.Env().get_project(), tab_name[1])
     self.settings.beginGroup(group_path)
     self.settings.setValue('commentsSplitter',
                            self.commentsSplitter.saveState())
     self.settings.setValue('descriptionSplitter',
                            self.descriptionSplitter.saveState())
     self.settings.setValue('imagesSplitter',
                            self.imagesSplitter.saveState())
     self.settings.setValue('dropPlateSplitter',
                            self.dropPlateSplitter.saveState())
     self.settings.setValue('searchOptionsSplitter',
                            self.searchOptionsSplitter.saveState())
     self.settings.setValue('searchLineEdit_text',
                            self.searchLineEdit.text())
     self.settings.setValue('contextComboBox',
                            self.contextComboBox.currentIndex())
     self.settings.setValue(
         'searchByCodeRadioButton',
         self.searchOptionsGroupBox.byCodeRadioButton.isChecked())
     self.settings.setValue(
         'searchByNameRadioButton',
         self.searchOptionsGroupBox.byNameRadioButton.isChecked())
     self.settings.setValue(
         'searchAllProcessCheckBox',
         self.searchOptionsGroupBox.showAllProcessCheckBox.isChecked())
     if self.resultsTreeWidget.topLevelItemCount() > 0:
         self.settings.setValue(
             'resultsTreeWidget_isSelected',
             gf.expanded_state(self.resultsTreeWidget, is_selected=True))
         self.settings.setValue(
             'resultsTreeWidget_isExpanded',
             gf.expanded_state(self.resultsTreeWidget, is_expanded=True))
     print('Done ui_checkin_tree ' + self.objectName() + ' settings write')
     self.settings.endGroup()
     self.settings.endGroup()
예제 #19
0
    def readSettings(self):
        """
        Reading Settings
        """
        self.userNameLineEdit.setText(env.Env().get_user())
        self.passwordLineEdit.setText(env.Env().get_user())
        self.tacticEnvLineEdit.setText(env.Env().get_data_dir())
        self.tacticAssetDirLineEdit.setText(env.Env().get_asset_dir())
        self.tacticInstallDirLineEdit.setText(env.Env().get_install_dir())
        self.tacticServerLineEdit.setText(env.Env().get_server())
        if env.Mode().get == 'maya':
            self.currentWorkdirLineEdit.setText(cmds.workspace(q=True, dir=True))

        self.settings.beginGroup(env.Mode().get + '/ui_conf')
        self.configToolBox.setCurrentIndex(self.settings.value('configToolBox', 0))
        self.settings.endGroup()
예제 #20
0
    def get_current_item_paths(self):
        nested_item = self.resultsTreeWidget.itemWidget(
            self.resultsTreeWidget.currentItem(), 0)
        file_path = None
        dir_path = None
        all_process = None

        modes = env.Mode().mods
        modes.append('main')
        for mode in modes:
            if nested_item.files.get(mode):
                main_file = nested_item.files[mode][0]
                asset_dir = env.Env().get_asset_dir()
                file_path = '{0}/{1}/{2}'.format(asset_dir,
                                                 main_file['relative_dir'],
                                                 main_file['file_name'])
                split_path = main_file['relative_dir'].split('/')
                dir_path = '{0}/{1}'.format(asset_dir,
                                            '{0}/{1}/{2}'.format(*split_path))
                all_process = nested_item.sobject.all_process

        return file_path, dir_path, all_process
예제 #21
0
def launch(pa, pg_resume=None, render=False, repre='image', end='no_new_job'):

    # ----------------------------
    print("Preparing for workers...")
    # ----------------------------

    pg_learners = []
    envs = []

    nw_len_seqs, nw_size_seqs = job_distribution.generate_sequence_work(
        pa, seed=42)

    ### create sequence of environments for each of the num_ex job sets/sequences
    for ex in xrange(pa.num_ex):

        print "-prepare for env-", ex

        env = environment.Env(pa,
                              nw_len_seqs=nw_len_seqs,
                              nw_size_seqs=nw_size_seqs,
                              render=False,
                              repre=repre,
                              end=end)
        env.seq_no = ex
        envs.append(env)

    ### generate sequence of NNs for each batch, each of which is a a policy gradient agent
    for ex in xrange(pa.batch_size +
                     1):  # last worker for updating the parameters

        print "-prepare for worker-", ex

        pg_learner = pg_network.PGLearner(pa)

        if pg_resume is not None:
            net_handle = open(pg_resume, 'rb')
            net_params = cPickle.load(net_handle)
            pg_learner.set_net_params(net_params)

        pg_learners.append(pg_learner)

    accums = init_accums(pg_learners[pa.batch_size])

    # --------------------------------------
    print("Preparing for reference data...")
    # --------------------------------------

    ref_discount_rews, ref_slow_down = slow_down_cdf.launch(pa,
                                                            pg_resume=None,
                                                            render=False,
                                                            plot=False,
                                                            repre=repre,
                                                            end=end)
    mean_rew_lr_curve = []
    max_rew_lr_curve = []
    slow_down_lr_curve = []

    # --------------------------------------
    print("Start training...")
    # --------------------------------------

    timer_start = time.time()

    for iteration in xrange(1, pa.num_epochs):

        ### use a thread for each use manager to share results across threads
        ps = []  # threads
        manager = Manager()  # managing return results
        manager_result = manager.list([])

        ex_indices = range(pa.num_ex)
        np.random.shuffle(ex_indices)

        all_eprews = []
        grads_all = []
        loss_all = []
        eprews = []
        eplens = []
        all_slowdown = []
        all_entropy = []

        ex_counter = 0

        ### for each jobset
        for ex in xrange(pa.num_ex):

            ex_idx = ex_indices[ex]
            ### evaluate several instances of trajectories for set of PG agents
            p = Process(target=get_traj_worker,
                        args=(
                            pg_learners[ex_counter],
                            envs[ex_idx],
                            pa,
                            manager_result,
                        ))
            ps.append(p)

            ex_counter += 1

            ##

            if ex_counter >= pa.batch_size or ex == pa.num_ex - 1:

                print ex, "out of", pa.num_ex

                ex_counter = 0

                for p in ps:
                    p.start()

                for p in ps:
                    p.join()

                result = []  # convert list from shared memory
                for r in manager_result:
                    result.append(r)

                ps = []
                manager_result = manager.list([])

                all_ob = concatenate_all_ob_across_examples(
                    [r["all_ob"] for r in result], pa)
                all_action = np.concatenate([r["all_action"] for r in result])
                all_adv = np.concatenate([r["all_adv"] for r in result])

                # Do policy gradient update step, using the first agent
                # put the new parameter in the last 'worker', then propagate the update at the end
                grads = pg_learners[pa.batch_size].get_grad(
                    all_ob, all_action, all_adv)

                grads_all.append(grads)

                all_eprews.extend([r["all_eprews"] for r in result])

                eprews.extend(np.concatenate([r["all_eprews"] for r in result
                                              ]))  # episode total rewards
                eplens.extend(np.concatenate([r["all_eplens"] for r in result
                                              ]))  # episode lengths

                all_slowdown.extend(
                    np.concatenate([r["all_slowdown"] for r in result]))
                all_entropy.extend(
                    np.concatenate([r["all_entropy"] for r in result]))

        # assemble gradients
        grads = grads_all[0]
        for i in xrange(1, len(grads_all)):
            for j in xrange(len(grads)):
                grads[j] += grads_all[i][j]

        # propagate network parameters to others
        params = pg_learners[pa.batch_size].get_params()

        rmsprop_updates_outside(grads, params, accums, pa.lr_rate, pa.rms_rho,
                                pa.rms_eps)

        for i in xrange(pa.batch_size + 1):
            pg_learners[i].set_net_params(params)

        timer_end = time.time()

        print "-----------------"
        print "Iteration: \t %i" % iteration
        print "NumTrajs: \t %i" % len(eprews)
        print "NumTimesteps: \t %i" % np.sum(eplens)
        # print "Loss:     \t %s" % np.mean(loss_all)
        print "MaxRew: \t %s" % np.average([np.max(rew) for rew in all_eprews])
        print "MeanRew: \t %s +- %s" % (np.mean(eprews), np.std(eprews))
        print "MeanSlowdown: \t %s" % np.mean(all_slowdown)
        print "MeanLen: \t %s +- %s" % (np.mean(eplens), np.std(eplens))
        print "MeanEntropy \t %s" % (np.mean(all_entropy))
        print "Elapsed time\t %s" % (timer_end - timer_start), "seconds"
        print "-----------------"

        timer_start = time.time()

        max_rew_lr_curve.append(np.average([np.max(rew)
                                            for rew in all_eprews]))
        mean_rew_lr_curve.append(np.mean(eprews))
        slow_down_lr_curve.append(np.mean(all_slowdown))

        if iteration % pa.output_freq == 0:
            param_file = open(
                pa.output_filename + '_' + str(iteration) + '.pkl', 'wb')
            cPickle.dump(pg_learners[pa.batch_size].get_params(), param_file,
                         -1)
            param_file.close()

            pa.unseen = True
            slow_down_cdf.launch(pa,
                                 pa.output_filename + '_' + str(iteration) +
                                 '.pkl',
                                 render=False,
                                 plot=True,
                                 repre=repre,
                                 end=end)
            pa.unseen = False
            # test on unseen examples

            plot_lr_curve(pa.output_filename, max_rew_lr_curve,
                          mean_rew_lr_curve, slow_down_lr_curve,
                          ref_discount_rews, ref_slow_down)
예제 #22
0
def launch(pa, pg_resume=None, render=False, plot=False, repre='image', end='no_new_job'):

    # ---- Parameters ----

    test_types = ['Tetris', 'SJF', 'Random']

    if pg_resume is not None:
        test_types = ['PG'] + test_types

    env = environment.Env(pa, render, repre=repre, end=end)

    all_discount_rews = {}
    jobs_slow_down = {}
    work_complete = {}
    work_remain = {}
    job_len_remain = {}
    num_job_remain = {}
    job_remain_delay = {}

    for test_type in test_types:
        all_discount_rews[test_type] = []
        jobs_slow_down[test_type] = []
        work_complete[test_type] = []
        work_remain[test_type] = []
        job_len_remain[test_type] = []
        num_job_remain[test_type] = []
        job_remain_delay[test_type] = []

    for seq_idx in xrange(pa.num_ex):
        print('\n\n')
        print("=============== " + str(seq_idx) + " ===============")

        for test_type in test_types:

            rews, info = get_traj(test_type, pa, env, pa.episode_max_length, pg_resume)

            print "---------- " + test_type + " -----------"

            print "total discount reward : \t %s" % (discount(rews, pa.discount)[0])

            all_discount_rews[test_type].append(
                discount(rews, pa.discount)[0]
            )

            # ------------------------
            # ---- per job stat ----
            # ------------------------

            enter_time = np.array([info.record[i].enter_time for i in xrange(len(info.record))])
            finish_time = np.array([info.record[i].finish_time for i in xrange(len(info.record))])
            job_len = np.array([info.record[i].len for i in xrange(len(info.record))])
            job_total_size = np.array([np.sum(info.record[i].res_vec) for i in xrange(len(info.record))])

            finished_idx = (finish_time >= 0)
            unfinished_idx = (finish_time < 0)

            jobs_slow_down[test_type].append(
                (finish_time[finished_idx] - enter_time[finished_idx]) / job_len[finished_idx]
            )
            work_complete[test_type].append(
                np.sum(job_len[finished_idx] * job_total_size[finished_idx])
            )
            work_remain[test_type].append(
                np.sum(job_len[unfinished_idx] * job_total_size[unfinished_idx])
            )
            job_len_remain[test_type].append(
                np.sum(job_len[unfinished_idx])
            )
            num_job_remain[test_type].append(
                len(job_len[unfinished_idx])
            )
            job_remain_delay[test_type].append(
                np.sum(pa.episode_max_length - enter_time[unfinished_idx])
            )

        env.seq_no = (env.seq_no + 1) % env.pa.num_ex

    # -- matplotlib colormap no overlap --
    if plot:
        num_colors = len(test_types)
        cm = plt.get_cmap('gist_rainbow')
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.set_color_cycle([cm(1. * i / num_colors) for i in range(num_colors)])

        for test_type in test_types:
            slow_down_cdf = np.sort(np.concatenate(jobs_slow_down[test_type]))
            slow_down_yvals = np.arange(len(slow_down_cdf))/float(len(slow_down_cdf))
            ax.plot(slow_down_cdf, slow_down_yvals, linewidth=2, label=test_type)

        plt.legend(loc=4)
        plt.xlabel("job slowdown", fontsize=20)
        plt.ylabel("CDF", fontsize=20)
        # plt.show()
        plt.savefig(pg_resume + "_slowdown_fig" + ".pdf")

    return all_discount_rews, jobs_slow_down
예제 #23
0
import environment
import time

height, width = 30, 30
vehNum = 3

CrossRoad = environment.Env(vehNum, height, width, 4)
CrossRoad.showEnv_init()

for count in range(1):
    collisionFlag = False
    endFlag = False
    tag = 0
    CrossRoad.reStart()
    print()
    while not (collisionFlag or endFlag):
        action = [0] * vehNum
        [state, reward, collisionFlag, endFlag] = CrossRoad.updateEnv(action)
        CrossRoad.showEnv()
        tag += 1
        print(count, "step: ", tag, "collision?: ", collisionFlag, "end?: ",
              endFlag)
        print(state)
        time.sleep(2)
예제 #24
0
def launch(pa, pg_resume=None, render=False, repre='image', end='no_new_job'):

    env = environment.Env(pa, render=False, repre=repre, end=end)

    pg_learner = pg_network.PGLearner(pa)

    if pg_resume is not None:
        net_handle = open(pg_resume, 'r')
        net_params = cPickle.load(net_handle)
        pg_learner.set_net_params(net_params)

    if pa.evaluate_policy_name == "SJF":
        evaluate_policy = other_agents.get_sjf_action
    elif pa.evaluate_policy_name == "PACKER":
        evaluate_policy = other_agents.get_packer_action
    else:
        print("Panic: no policy known to evaluate.")
        exit(1)

    # ----------------------------
    print("Preparing for data...")
    # ----------------------------

    nw_len_seqs, nw_size_seqs = job_distribution.generate_sequence_work(
        pa, seed=42)

    # print 'nw_time_seqs=', nw_len_seqs
    # print 'nw_size_seqs=', nw_size_seqs

    mem_alloc = 4

    X = np.zeros([
        pa.simu_len * pa.num_ex * mem_alloc, 1, pa.network_input_height,
        pa.network_input_width
    ],
                 dtype=theano.config.floatX)
    y = np.zeros(pa.simu_len * pa.num_ex * mem_alloc, dtype='int32')

    print 'network_input_height=', pa.network_input_height
    print 'network_input_width=', pa.network_input_width

    counter = 0

    for train_ex in range(pa.num_ex):

        env.reset()

        for _ in xrange(pa.episode_max_length):

            # ---- get current state ----
            ob = env.observe()

            a = evaluate_policy(env.machine, env.job_slot)

            if counter < pa.simu_len * pa.num_ex * mem_alloc:

                add_sample(X, y, counter, ob, a)
                counter += 1

            ob, rew, done, info = env.step(a, repeat=True)

            if done:  # hit void action, exit
                break

        # roll to next example
        env.seq_no = (env.seq_no + 1) % env.pa.num_ex

    num_train = int(0.8 * counter)
    num_test = int(0.2 * counter)

    X_train, X_test = X[:num_train], X[num_train:num_train + num_test]
    y_train, y_test = y[:num_train], y[num_train:num_train + num_test]

    # Normalization, make sure nothing becomes NaN

    # X_mean = np.average(X[:num_train + num_test], axis=0)
    # X_std = np.std(X[:num_train + num_test], axis=0)
    #
    # X_train = (X_train - X_mean) / X_std
    # X_test = (X_test - X_mean) / X_std

    # ----------------------------
    print("Start training...")
    # ----------------------------

    for epoch in xrange(pa.num_epochs):

        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_acc = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(X_train,
                                         y_train,
                                         pa.batch_size,
                                         shuffle=True):
            inputs, targets = batch
            err, prob_act = pg_learner.su_train(inputs, targets)
            pg_act = np.argmax(prob_act, axis=1)
            train_err += err
            train_acc += np.sum(pg_act == targets)
            train_batches += 1

        # # And a full pass over the test data:
        test_err = 0
        test_acc = 0
        test_batches = 0
        for batch in iterate_minibatches(X_test,
                                         y_test,
                                         pa.batch_size,
                                         shuffle=False):
            inputs, targets = batch
            err, prob_act = pg_learner.su_test(inputs, targets)
            pg_act = np.argmax(prob_act, axis=1)
            test_err += err
            test_acc += np.sum(pg_act == targets)
            test_batches += 1

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, pa.num_epochs,
                                                   time.time() - start_time))
        print("  training loss:    \t\t{:.6f}".format(train_err /
                                                      train_batches))
        print("  training accuracy:\t\t{:.2f} %".format(
            train_acc / float(num_train) * 100))
        print("  test loss:        \t\t{:.6f}".format(test_err / test_batches))
        print("  test accuracy:    \t\t{:.2f} %".format(test_acc /
                                                        float(num_test) * 100))

        sys.stdout.flush()

        if epoch % pa.output_freq == 0:

            net_file = open(
                pa.output_filename + '_net_file_' + str(epoch) + '.pkl', 'wb')
            cPickle.dump(pg_learner.return_net_params(), net_file, -1)
            net_file.close()

    print("done")
예제 #25
0
def enjoyPrius(args):
    # controlling loop
    global going_on
    signal.signal(signal.SIGINT, handler)
    going_on = True

    # prius and its's environment
    prius = simulator.Prius(args)
    thread_update_pos = threading.Thread(target = updatePriusPos, args = (prius,))
    thread_update_collisions = threading.Thread(target = updatePriusCollision, args = (prius,))
    thread_update_pos.start()
    thread_update_collisions.start()
    env = En.Env(args.road, args.vehicle, args.track) 

    # network session and it's parameters
    sess = tf.InteractiveSession()
    inputState, outputQ, h_fc1 = createNetwork()
    
    # action is choosed by policy pi
    action = tf.placeholder("float", [None, ACTIONS])
    # optimal q value of actions taken just now
    target_q = tf.placeholder("float", [None])
    # real q value when these actions selected and actuated
    action_q = tf.reduce_sum(tf.multiply(outputQ , action), reduction_indices=1)
    # target is q -> *q
    cost = tf.reduce_mean(tf.square(target_q - action_q))
    train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)

    # saving and loading networks
    saver = tf.train.Saver()
    sess.run(tf.initialize_all_variables())
    checkpoint = tf.train.get_checkpoint_state("saved_networks")
    if checkpoint and checkpoint.model_checkpoint_path:
        saver.restore(sess, checkpoint.model_checkpoint_path)
        print("Successfully loaded:", checkpoint.model_checkpoint_path)
    else:
        print("Could not find old network weights")

    x_t, reward, terminal = env.render(prius.collisions(), prius.pose())
    while terminal:
        prius.reset()
        time.sleep(0.2)
        x_t, reward, terminal = env.render(prius.collisions(), prius.pose())
    # control prius by pedal percent 0.2, hand steering is 0, brake pedal is 0
    prius.control_prius(0.2, 0, 0)
    x_t, reward, terminal = env.render(prius.collisions(), prius.pose())
    x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY)
    ret , x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
    state_t = np.stack((x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t), axis=2)
    state_t1 = state_t

    epsilon = INITIAL_EPSILON
    step = 1

    store = deque()
    simTime = time.time()

    while going_on:
        # always run at high frequency
        x_t, reward, terminal = env.render(prius.collisions(), prius.pose())
        # train when step over observation stage
        if step > OBSERVE:
            # sample a minibatch to train
            minibatch = random.sample(store, BATCH)
            # get the batch variables
            state_j_batch = [d[0] for d in minibatch]
            action_batch = [d[1] for d in minibatch]
            reward_batch = [d[2] for d in minibatch]
            state_j1_batch = [d[3] for d in minibatch]
            # qV_batch = []
            target_q_batch = []
            q_action1_batch = outputQ.eval(feed_dict = {inputState: state_j1_batch})
            for i in range(0, len(minibatch)):
                terminal_j = minibatch[i][4]
                if terminal_j:
                    target_q_batch.append(reward_batch[i])
                else:
                    target_q_batch.append(reward_batch[i] + GAMMA * \
                                      ( np.max(q_action1_batch[i][0:2] ) + \
                                        np.max(q_action1_batch[i][2:4] ) + \
                                        np.max(q_action1_batch[i][4:6] ) + \
                                        np.max(q_action1_batch[i][6:8] ) + \
                                        np.max(q_action1_batch[i][8:10] ) + \
                                        np.max(q_action1_batch[i][10:12]) +\
                                        np.max(q_action1_batch[i][12:14]) +\
                                        np.max(q_action1_batch[i][14:16]) +\
                                        np.max(q_action1_batch[i][16:18]) +\
                                        np.max(q_action1_batch[i][18:20])
                                     )  )
            train_step.run(feed_dict = {
                target_q: target_q_batch,
                action: action_batch,
                inputState: state_j_batch }
                )
        # save progress every 10000 iterations
        if step % 10000 == 0:
            saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = step)
        # step > OBSERVE end train_step

        # control by frequecy of 10HZ
        if (time.time() - simTime < 0.098) and (not terminal):
            continue
        simTime = time.time()

        ## current q value
        #readout_t = readout.eval(feed_dict={s : [s_t]})[0]
        qV_t = outputQ.eval(feed_dict={inputState : [state_t]})[0]
        ## epsilon-greedy policy
        action_array_t = np.zeros([ACTIONS])
        action_angle_t = 0
        # scale down epsilon
        if epsilon > FINAL_EPSILON and step > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE
        if random.random() < epsilon:
            action_array_t, action_angle_t = getRandomAction()
        else:
            action_array_t, action_angle_t = getAction(qV_t)

        prius.control_prius(0.2, -1*action_angle_t, 0)
        x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY)
        ret , x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY)
        x_t = np.reshape(x_t, (160, 160, 1))
        state_t1 = np.append(x_t, state_t[:, :, :9], axis=2)
        store.append((state_t, action_array_t, reward, state_t1, terminal))
        if len(store) > REPLAY_MEMORY:
            store.popleft()
        # print on-time reward
        line1 = "step :====================== ",step,"======================== "
        line3 = "reward                       ",reward,"                      "
        if terminal:
            printRed(line1)
            printRed(line3)
        elif reward < 0.04:
            printYellow(line1)
            printYellow(line3)
        elif reward < 0.1:
            printCyan(line1)
            printCyan(line3)
        else:
            printGreen(line1)
            printGreen(line3)
        # if terminal, restart
        if terminal:
            prius.reset()
            time.sleep(0.2)
            x_t, reward, terminal = env.render(prius.collisions(), prius.pose())
            x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY)
            ret , x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY)
            state_t1 = np.stack((x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t), axis=2)
        state_t = state_t1
        step += 1
예제 #26
0
def save_scene(search_key, context, description, all_process):

    # add info about particular scene
    skey_link = 'skey://{0}&context={1}'.format(search_key, context)
    if not cmds.attributeQuery(
            'tacticHandler_skey', node='defaultObjectSet', exists=True):
        cmds.addAttr('defaultObjectSet',
                     longName='tacticHandler_skey',
                     dataType='string')
    cmds.setAttr('defaultObjectSet.tacticHandler_skey',
                 skey_link,
                 type='string')

    # get template names for scene and playblast image
    temp_dir = env.Env().get_temp_dir()
    random_uuid = uuid.uuid4()

    types = {
        'mayaBinary': 'mb',
        'mayaAscii': 'ma',
    }
    temp_file = '{0}/{1}.ma'.format(temp_dir, random_uuid)
    temp_playblast = '{0}/{1}.jpg'.format(temp_dir, random_uuid)

    # rename file, save scene, playblast, get saving format
    cmds.file(rename=temp_file)
    cmds.file(save=True, type='mayaAscii')
    current_frame = cmds.currentTime(query=True)
    cmds.playblast(forceOverwrite=True,
                   format='image',
                   completeFilename=temp_playblast,
                   showOrnaments=False,
                   widthHeight=[960, 540],
                   sequenceTime=False,
                   frame=[current_frame],
                   compression='jpg',
                   offScreen=True,
                   viewer=False,
                   percent=100)

    # check in snapshot
    snapshot = tc.checkin_snapshot(search_key,
                                   context,
                                   temp_file,
                                   file_type='maya',
                                   is_current=True,
                                   description=description)

    # from pprint import pprint
    # pprint(snapshot)
    # retrieve checked in snapshot file info
    asset_dir = env.Env().get_asset_dir()
    file_sobject = snapshot['__file_sobjects__'][0]
    relative_dir = file_sobject['relative_dir']
    file_name = file_sobject['file_name']

    # make proper file path, and dir path to set workspace
    new_file = '{0}/{1}/{2}'.format(asset_dir, relative_dir, file_name)
    split_path = relative_dir.split('/')
    dir_path = '{0}/{1}'.format(asset_dir, '{0}/{1}/{2}'.format(*split_path))
    set_workspace(dir_path, all_process)

    # check in playblast
    tc.checkin_playblast(snapshot['code'], temp_playblast)

    # set proper scene name
    cmds.file(rename=new_file)
예제 #27
0
    def __init__(
            self,
            # ddqn parameters
            connection_label="lonely_worker",
            q_network_type='simple',
            target_q_network_type='simple',
            gamma=0.99,
            target_update_freq=10000,
            train_freq=3,
            num_burn_in=300,
            batch_size=32,
            optimizer='adam',
            loss_func="mse",
            max_ep_length=1000,
            experiment_id="Exp_1",
            model_checkpoint=True,
            opt_metric=None,
            # environment parameters
            net_file="cross.net.xml",
            route_file="cross.rou.xml",
            network_dir="./network",
            demand="nominal",
            state_shape=(1, 11),
            num_actions=2,
            use_gui=False,
            delta_time=10,
            reward="balanced",
            # memory parameters
            max_size=100000,
            # additional parameters
            policy="linDecEpsGreedy",
            eps=0.1,
            num_episodes=2,
            monitoring=False,
            episode_recording=False,
            hparams=None):

        if hparams:
            args_description = locals()
            args_description = str(
                {key: args_description[key]
                 for key in hparams})
        else:
            args_description = "single_worker"

        self.connection_label = connection_label
        self.q_network_type = q_network_type
        self.target_q_network_type = target_q_network_type
        self.gamma = gamma
        self.target_update_freq = target_update_freq
        self.train_freq = train_freq
        self.num_burn_in = num_burn_in
        self.batch_size = batch_size
        self.optimizer = optimizer
        self.loss_func = loss_func
        self.max_ep_length = max_ep_length
        self.experiment_id = experiment_id
        self.model_checkpoint = model_checkpoint
        self.opt_metric = opt_metric

        # additional parameters
        self.policy = policy
        self.eps = eps
        self.num_episodes = num_episodes
        self.monitoring = monitoring
        self.episode_recording = episode_recording
        self.output_dir, self.summary_writer_folder = tools.get_output_folder(
            "./logs", self.experiment_id, args_description)
        self.summary_writer = tf.summary.FileWriter(
            logdir=self.summary_writer_folder)

        # environment parameters
        self.net_file = os.path.join(network_dir, net_file)
        self.route_file = os.path.join(self.output_dir, route_file)
        self.demand = demand
        self.state_shape = state_shape
        self.num_actions = num_actions
        self.use_gui = use_gui
        self.delta_time = delta_time
        self.reward = reward

        # memory parameters
        self.max_size = max_size
        self.state_shape = state_shape

        # Initialize Q-networks (value and target)
        self.q_network = agent.get_model(model_name=self.q_network_type,
                                         input_shape=(self.state_shape[1], ),
                                         num_actions=self.num_actions)

        self.target_q_network = agent.get_model(
            model_name=self.target_q_network_type,
            input_shape=(self.state_shape[1], ),
            num_actions=self.num_actions)

        # Initialize environment
        self.env = environment.Env(connection_label=self.connection_label,
                                   net_file=self.net_file,
                                   route_file=self.route_file,
                                   demand=self.demand,
                                   state_shape=self.state_shape,
                                   num_actions=self.num_actions,
                                   policy=self.policy,
                                   use_gui=self.use_gui,
                                   eps=self.eps,
                                   reward=self.reward)

        # Initialize replay memory
        self.memory = memory.ReplayMemory(max_size=self.max_size,
                                          state_shape=self.state_shape,
                                          num_actions=self.num_actions)

        # Initialize Double DQN algorithm
        self.ddqn = doubledqn.DoubleDQN(
            q_network=self.q_network,
            target_q_network=self.target_q_network,
            memory=self.memory,
            gamma=self.gamma,
            target_update_freq=self.target_update_freq,
            train_freq=self.train_freq,
            num_burn_in=self.num_burn_in,
            batch_size=self.batch_size,
            optimizer=self.optimizer,
            loss_func=self.loss_func,
            max_ep_length=self.max_ep_length,
            env_name=self.env,
            output_dir=self.output_dir,
            monitoring=self.monitoring,
            episode_recording=self.episode_recording,
            experiment_id=self.experiment_id,
            summary_writer=self.summary_writer)

        # Store initialization prameters
        self.store_init(locals())
예제 #28
0
def train(supply_distribution: Tuple[dict, list],
          demand_distribution: Tuple[dict, list],
          model_name: str,
          demand: int,
          max_day: int,
          training_timesteps_list: str,
          tblog: str,
          max_age: int = 35,
          obs_method: int = 1,
          doi: int = 4) -> str:
    """
    Train the agent
    First train without evaluation
    Second train with in-training evaluation

    :param demand_distribution: dict of {blood group : prevalence }, list of antigens included of the demand
    :param supply_distribution: dict of {blood group : prevalence }, list of antigens included of the supply
    :param model_name: str: name of the model to be stored
    :param demand: int: number of blood that is supplied / requested
    :param max_day: int: number of days per episode
    :param training_timesteps_list: list: [number of episodes without evaluation, number of episodes with evaluation]
    :param tblog: str, name of the tensorboard log
    :param max_age: int, max age of the RBCs
    :param obs_method: int, 1 or 2: item requested one-hot-encoded (1) or binary (2)
    :param doi: int, number of days of inventory
    :return: file name: str, name of the model that is stored
    """
    # Initialize parameters
    GAMMA = round(1 - (1 / (35 * demand)), 5)  # 0.993
    state_type = 'custom_category'

    time_string = datetime.now().strftime("%Y_%m_%d_%H_%M")
    file_name = time_string + model_name

    max_reward = max_day * demand * 0.1

    # Create environment
    env = environment.Env(supply_distribution[0],
                          demand_distribution[0],
                          max_age,
                          demand,
                          doi=doi,
                          obs_method=obs_method,
                          state_type=state_type,
                          max_day=max_day,
                          file_name=file_name,
                          verbose=0)
    env = DummyVecEnv([lambda: env])
    model = PPO2(MlpPolicy,
                 env,
                 gamma=GAMMA,
                 verbose=0,
                 tensorboard_log="results/tensorboard_data/" + tblog +
                 "/")  # create model

    # Train the model without evaluation (=faster)
    print('start phase 1, without evaluation')
    model.learn(total_timesteps=training_timesteps_list[0],
                tb_log_name=file_name)
    # TB- run: tensorboard --logdir ./tblog/

    # Export
    model.save('results/model/' + file_name)  # Save for backup

    callback_on_best = StopTrainingOnDecayingRewardThreshold(
        max_reward=max_reward,
        episode_decay=training_timesteps_list[2],
        reward_decay=0.05,
        no_reward_episodes=training_timesteps_list[0],
        verbose=1)

    # Callback for evaluation
    eval_callback = EvalCallback(
        env,  # callback_on_new_best=callback_on_best,
        best_model_save_path='results/model/' + file_name,
        eval_freq=50000,
        verbose=1,
        n_eval_episodes=5)

    # Train the model with eval every 50000 steps
    print('start phase 2 with evaluation')
    model.learn(total_timesteps=training_timesteps_list[1],
                tb_log_name=file_name,
                callback=eval_callback,
                reset_num_timesteps=False
                )  # train the model and run tensorboard 5000000 1500000

    # Export
    model.save('results/model/' + file_name + 'end')  # Save for backup

    # Extract the tensorboard data
    data_extract.extract_tb(file_name)

    return file_name
예제 #29
0
def solve(supply_distribution: Tuple[dict, list],
          demand_distribution: Tuple[dict,
                                     list], model_name: str, export_model: str,
          max_age: int, demand: int, doi: int, n_warm_start_days: int,
          n_days: int, obs_method: int, state_type: str) -> dict:
    """

    :param demand_distribution: Tuple[dict, list] containing a dict with {blood_group : distribution}, list of
    included antigens
    :param supply_distribution: Tuple[dict, list] containing a dict with {blood_group : distribution}, list of
    included antigens
    :param model_name: str, name of the model that is used to store the results
    :param export_model: str, name of hte model that is trained
    :param max_age: int, max age of the RBCs
    :param demand: int, number of demand / supply per day
    :param doi: days of inventory, the number of days the inventory is filled before first supply
    :param n_warm_start_days: int, number of days of warm start
    :param n_days: int, number of days for evaluation
    :param obs_method: int, 1 or 2: item requested one-hot-encoded (1) or binary (2)
    :param state_type: type of state that is used 'custom category'
    :return:
    """
    # Get model ready
    env = environment.Env(supply_distribution[0],
                          demand_distribution[0],
                          max_age,
                          demand,
                          doi,
                          obs_method=obs_method,
                          state_type=state_type,
                          file_name=model_name)
    env = DummyVecEnv([lambda: env])
    model = PPO2.load(export_model, env=env)

    # Run model
    obs = env.reset()

    # Warm start
    print('warm start - started')
    env.env_method('set_days', n_warm_start_days)
    done = False
    while not done:
        action, _states = model.predict(obs, deterministic=True)
        obs_next, rewards, done, info = env.step(action)
        obs = obs_next
    print('warm start - ended')

    # Testing
    print('Testing - started')
    env.env_method('set_days', n_days)
    env.env_method('change_eval_boolean', True)

    done = False
    while not done:
        action, _states = model.predict(obs, deterministic=True)
        obs_next, rewards, done, info = env.step(action)
        obs = obs_next

    results = env.env_method('render_blood_specific')  # get evaluation metrics
    print('Testing - ended')

    return results
예제 #30
0
def get_server():
    return server_auth(env.Env().get_server(), env.Env().get_project(), env.Env().get_user(), env.Env().get_pass())