Beispiel #1
0
        def _run_infer_process():
            # prepare ablation blocks
            logger.info('Prepare model ablation blocks.')
            ablation_blocks = getattr(self.ant_context.params, 'ablation', [])
            if ablation_blocks is None:
                ablation_blocks = []
            for b in ablation_blocks:
                self.ant_context.deactivate_block(b)

            # infer
            logger.info('Running inference process.')
            try:

                def _callback_func(data):
                    id = demo_dataset.now()['id'] if 'id' in demo_dataset.now(
                    ) else ''
                    record_content = {
                        'experiment_uuid': experiment_uuid,
                        'data': data,
                        'id': id
                    }
                    for data_group in record_content['data']:
                        for item in data_group:
                            if item['type'] in ['IMAGE', 'VIDEO', 'FILE']:
                                item['data'] = '%s/record/%s' % (
                                    infer_dump_dir, item['data'])

                    self.rpc.response.post(response=json.dumps(record_content))

                self.context.recorder = LocalRecorderNodeV2(_callback_func)
                self.context.call_infer_process(demo_dataset,
                                                dump_dir=infer_dump_dir)
            except Exception as e:
                print(e)
                traceback.print_exc()
Beispiel #2
0
 def network_fn(*args, **kwargs):
     #
     logger.info('building computing graph')
     res = func(self.is_training, *args, **kwargs)
     tf.train.write_graph(self.sess.graph_def, self.dump_dir,
                          'graph.pbtxt')
     return res
Beispiel #3
0
    def _sandbox_thread(**kwargs):
        if 'sandbox_time' in kwargs and kwargs['sandbox_time'] is not None:
            launch_time = kwargs['sandbox_launch_time']
            sandbox_running_time = kwargs['sandbox_time']
            # now time
            now_time = time.time()
            if now_time - launch_time > sandbox_running_time:
                # arriving custom max running time
                logger.info(
                    'max running time is arriving, exit antgo running environment'
                )

                # experiment result save process
                if 'sandbox_dump_dir' in kwargs and \
                        'sandbox_experiment' in kwargs and \
                        'sandbox_user_token' in kwargs:
                    # 1.step launch experiment save process
                    logger.info(
                        'launch upload experiment record in the running end')
                    process = multiprocessing.Process(
                        target=experiment_upload_dht,
                        args=(kwargs['sandbox_dump_dir'],
                              kwargs['sandbox_experiment'],
                              kwargs['sandbox_user_token'],
                              kwargs['sandbox_user_proxy'],
                              kwargs['sandbox_user_signature']))
                    process.start()

                    # 2.step wating until save process stop
                    process.join()

                # exit globally
                os._exit(0)
Beispiel #4
0
def qiniu_upload(file_path,
                 bucket='mltalker',
                 out_url_base='http://experiment.mltalker.com',
                 max_size=10):
    return 'qiniu:%s/%s' % ('http:127.0.0.1', 'aaa')
    access_key = 'ZSC-X2p4HG5uvEtfmn5fsTZ5nqB3h54oKjHt0tU6'
    secret_key = 'Ya8qYwIDXZn6jSJDMz_ottWWOZqlbV8bDTNfCGO0'
    q = Auth(access_key, secret_key)

    if max_size is not None:
        # check file size
        fsize = os.path.getsize(file_path)
        fsize = fsize / float(1024 * 1024)
        if fsize > max_size:
            logger.error('file size is larger than limit (%dMB)' % max_size)
            return None

    key = file_path.split('/')[-1]
    token = q.upload_token(bucket, key, 3600)
    ret, info = put_file(token, key, file_path)
    if ret['key'] == key and ret['hash'] == etag(file_path):
        logger.info('success to upload')
        return 'qiniu:%s/%s' % (out_url_base, key)

    return None
Beispiel #5
0
    def get_bbox(bbox_dir, imglist, train_or_test):
        ret = []
        ret_label = []

        def parse_bbox(fname):
            root = ET.parse(fname).getroot()
            size = root.find('size').getchildren()
            size = map(int, [size[0].text, size[1].text])

            box = root.find('object').find('bndbox').getchildren()
            box = map(lambda x: float(x.text), box)

            label = root.find('object').find('name').text
            return np.asarray(list(box), dtype='float32'), label

        with timed_operation('Loading Bounding Boxes ...'):
            cnt = 0
            import tqdm
            for k in tqdm.trange(len(imglist)):
                fname = imglist[k][0]
                fname = fname[:-4] + 'xml'
                fname = os.path.join(bbox_dir, train_or_test, fname)
                try:
                    box, label = parse_bbox(fname)
                    ret.append(box)
                    ret_label.append(label)
                    cnt += 1
                except KeyboardInterrupt:
                    raise
                except:
                    ret.append(None)
                    ret_label.append(-1)
            logger.info("{}/{} images have bounding box.".format(
                cnt, len(imglist)))
        return ret, ret_label
Beispiel #6
0
    def deploy(self, model, *args, **kwargs):
        # model context
        self.ctx.model = model

        if self.is_training:
            self.training_deploy(model, *args, **kwargs)
        else:
            self.infer_deploy(model, *args, **kwargs)

            if self.ctx.ant is None:
                logger.info('successfully deploy model')
                exit(0)
Beispiel #7
0
  def start(self):
    server_port = _pick_idle_port(self.port)

    logger.info('launch train %s server'%'worker' if self.is_worker else 'master')
    pid = None
    if not self.is_worker:
      # config train master
      automl_config = {}
      if self.main_param is not None:
        main_config_path = os.path.join(self.main_folder, self.main_param)
        params = yaml.load(open(main_config_path, 'r'))

        if 'automl' in params and 'study' in params['automl']:
          # parse study
          study_name = params['automl']['study'].get('study_name', '')
          automl_config['study_name'] = study_name

          study_goal = params['automl']['study'].get('goal', 'MAXIMIZE')
          assert(study_goal in ['MAXIMIZE', 'MINIMIZE'])
          automl_config['study_goal'] = study_goal

          study_max_trials = params['automl']['study'].get('study_max_trials', 1000)
          automl_config['study_max_trials'] = int(study_max_trials)

          study_max_time = params['automl']['study'].get('study_max_time', '1d')
          automl_config['study_max_time'] = study_max_time

          study_hyperparameter_search = ''
          study_hyperparameters = {}

          study_architecture_search = params['automl']['study'].get('study_architecture_search', 'Evolution')
          automl_config['study_architecture_search'] = study_architecture_search

          study_architecture_parameters = params['automl']['study'].get('study_architecture_parameters', {})
          automl_config['study_architecture_parameters'] = study_architecture_parameters

      # launch command dispatch process
      process = multiprocessing.Process(target=self.listening_and_command_dispatch, kwargs=automl_config)
      process.start()
      pid = process.pid

    train_server_start(self.main_file if self.is_worker else None,
                       self.main_param if self.is_worker else None,
                       self.main_folder,
                       self.app_token if self.is_worker else None,
                       self.task if self.is_worker else None,
                       self.devices if self.is_worker else None,
                       self.max_time,
                       self.is_worker,
                       self.signature,
                       server_port,
                       self.servers if self.is_worker else None,
                       pid)
Beispiel #8
0
  def snapshot(self, epoch=0, iter=-1):
    logger.info('snapshot at %d in %d epoch' % (self.iter_at, epoch))
    if not os.path.exists(self.dump_dir):
        os.makedirs(self.dump_dir)

    model_filename = "{prefix}_{infix}_{d}_{e}.ckpt".format(prefix=self.snapshot_prefix,
                                                        infix=self.snapshot_infix,
                                                        d=self.iter_at if iter < 0 else iter,
                                                        e=epoch)
    model_filepath = os.path.join(self.dump_dir, model_filename)

    # save checkpoint
    self.saver.save(self.sess, model_filepath)
Beispiel #9
0
    def deploy(self, model):
        # model context
        self.ctx.model = model

        # deploy model
        if self.is_training:
            self.training_deploy(model)
        else:
            self.infer_deploy(model)

            if self.ctx.ant is None and not self.ctx.debug:
                logger.info('successfully deploy model')
                exit(0)
Beispiel #10
0
    def run(self, data_generator=None, binds={}):
        # bind data
        with self.graph.as_default():
            feed_dict = {}
            if data_generator is not None:
                for clone in self.clones:
                    # generate data
                    data = next(data_generator)

                    for k, v in binds.items():
                        placeholder_tensor = self.graph.get_tensor_by_name(
                            '{}{}:0'.format(clone.scope, k))
                        feed_dict[placeholder_tensor] = data[v] if (
                            type(data) == tuple
                            or type(data) == list) else data

            # increment
            self.iter_at += 1

            # forward process
            start_time = time.time()
            result = self.sess.run(
                self.val_ops,
                feed_dict=feed_dict if len(feed_dict) > 0 else None)
            elapsed_time = int((time.time() - start_time) * 100) / 100.0

            # push value passively
            if self.ctx.recorder is not None and self.ctx.recorder.model_fn is not None:
                self.ctx.recorder.action(result[-1])
                result = result[:-1]

            # record elapsed time
            self.time_stat.add(elapsed_time)

            if self.is_training:
                loss_val = 0.0
                if type(result) == list:
                    loss_val = result[1]
                else:
                    loss_val = result

                if self.iter_at % self.log_every_n_steps == 0:
                    logger.info(
                        'INFO: loss %f lr %f at iterator %d (%f sec/step)' %
                        (loss_val, self.sess.run(self.lr), self.iter_at,
                         float(self.time_stat.get())))
            else:
                logger.info('INFO: (%f sec/step)' %
                            float(self.time_stat.get()))

            return result
Beispiel #11
0
  def _run_by_feed(self, feed_dict=None, **kwargs):
    with self.graph.as_default():
      if feed_dict is None:
        feed_dict = {}
        for k_name, v_value in kwargs.items():
          if k_name not in self.cache:
            k_tensor = self.graph.get_tensor_by_name('{}:0'.format(k_name))
            self.cache[k_name] = k_tensor

          feed_dict[self.cache[k_name]] = v_value

      # forward process
      start_time = time.time()
      result = self.sess.run(self.val_ops, feed_dict=feed_dict if feed_dict is not None and len(feed_dict) > 0 else None)
      elapsed_time = int((time.time() - start_time) * 100) / 100.0

      if self.ctx.recorder is not None and self.ctx.recorder.model_fn is not None:
        self.ctx.recorder.action(result[-1])
        result = result[:-1]

      self.iter_at += 1

      if self.summary_op is not None:
        summary_op_val = result[0]
        result = result[1:]
        self.train_writer.add_summary(summary_op_val, self.iter_at)

      # record elapsed time
      self.time_stat.add(elapsed_time)

      # print log
      if self.is_training:
        loss_val = 0.0
        if type(result) == list and len(result) >= 2:
          loss_val = result[1]
        elif type(result) == list and len(result) == 1:
          loss_val = result[0]
        else:
          loss_val = result

        self.loss_stat.add(loss_val)
        if self.iter_at % self.log_every_n_steps == 0:
          if not self.is_distribute_training or(self.is_distribute_training and self.rank == 0):
            logger.info('(PID: %s) INFO: loss %f lr %f at iterator %d (%f sec/step)' %
                        (str(os.getpid()), self.loss_stat.get(), self.sess.run(self.lr), self.iter_at, float(self.time_stat.get())))
      else:
        if not self.is_distribute_training or (self.is_distribute_training and self.rank == 0):
         logger.info('(PID: %s) INFO: (%f sec/step)' % (str(os.getpid()), float(self.time_stat.get())))

      return result[0] if type(result) == list and len(result) == 1 else result
Beispiel #12
0
    def __bing_find_and_download(waiting_process_queue,
                                 search_url,
                                 session,
                                 dir,
                                 max_page_num=50):
        t = 0
        num = 0
        while t < max_page_num:
            Url = search_url.format(t * 35 + 1)

            t = t + 1
            try:
                Result = session.get(Url, timeout=7, allow_redirects=False)
            except BaseException:
                t = t + 60
                continue
            else:
                pic_url = re.findall('src="(.*?)"', Result.text,
                                     re.S)  # 先利用正则表达式找到图片url
                for each in pic_url:
                    logger.info("Downloading(%d) %s." % (num + 1, str(each)))
                    try:
                        if each is not None:
                            pic = requests.get(each, timeout=7)
                        else:
                            continue
                    except BaseException:
                        logger.error("Couldnt download %s." % each)
                        continue
                    else:
                        # 分配唯一文件标识
                        file_folder = os.path.join(dir, 'test')
                        if not os.path.exists(file_folder):
                            os.makedirs(file_folder)

                        file_path = os.path.join(
                            file_folder, 'bing_%s.jpg' % str(uuid.uuid4()))
                        with open(file_path, 'wb') as fp:
                            fp.write(pic.content)
                        num += 1
                        logger.info("Finish download %s ." % str(each))

                        # 加入等待处理队列
                        if waiting_process_queue is not None:
                            waiting_process_queue.put(file_path)

        # 结束标记
        if waiting_process_queue is not None:
            waiting_process_queue.put(None)
Beispiel #13
0
                def network_fn(*args, **kwargs):
                    res = func(self.is_training, *args, **kwargs)
                    if kwargs['clone'] == 0:
                        # 1.step save graph file
                        tf.train.write_graph(self.sess.graph_def,
                                             self.dump_dir, 'graph.pbtxt')

                        # 2.step transfer to local graph net
                        logger.info('build model graph svg')
                        svg_graph = _convert_to_svg_graph(
                            os.path.join(self.dump_dir, 'graph.pbtxt'),
                            self.dump_dir, ['input'])
                        if svg_graph is not None:
                            self.ctx.job.send({'DATA': {'GRAPH': svg_graph}})
                    return res
Beispiel #14
0
  def restore_scopy_from(self, model, restore_scope, checkpoint_path):
    # model_variables = slim.get_model_variables() if model.model_variables is None else model.model_variables
    model_variables = tf.global_variables()
    variables_to_restore = {}
    for var in model_variables:
      if var.op.name.startswith(restore_scope):
        variables_to_restore[var.op.name] = var

    if len(variables_to_restore) == 0:
      return

    if tf.gfile.IsDirectory(checkpoint_path):
      checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)

    logger.info('restore %s scope from %s' % checkpoint_path)
    fn = slim.assign_from_checkpoint_fn(checkpoint_path, variables_to_restore, ignore_missing_vars=True)
    fn(self.sess)
Beispiel #15
0
    def _sandbox_thread(**kwargs):
        if 'sandbox_time' in kwargs and kwargs['sandbox_time'] is not None:
            launch_time = kwargs['sandbox_launch_time']
            sandbox_running_time = kwargs['sandbox_time']
            # now time
            now_time = time.time()
            if now_time - launch_time > sandbox_running_time:
                # arriving custom max running time
                logger.info('running time is arriving')

                # experiment result save process
                if 'sandbox_dump_dir' in kwargs and \
                        'sandbox_experiment' in kwargs and \
                        'sandbox_user_token' in kwargs:
                    # 1.step launch experiment save process
                    process = multiprocessing.Process(
                        target=experiment_upload_dht,
                        args=(kwargs['sandbox_dump_dir'],
                              kwargs['sandbox_experiment'],
                              kwargs['sandbox_user_token'],
                              kwargs['sandbox_user_token']))
                    process.start()

                    # 2.step wating until save process stop
                    process.join()

                # exit globally
                os._exit(0)

        # upload experiment record to dth every 1 hour
        if 'sandbox_dump_dir' in kwargs and \
                'sandbox_experiment' in kwargs and \
                'sandbox_user_token' in kwargs:
            now_time = time.time()
            launch_time = kwargs['sandbox_launch_time']
            sandbox_running_time = int(now_time - launch_time)
            if (sandbox_running_time + 1) / 3600 == 0:
                # launch experiment save process
                process = multiprocessing.Process(
                    target=experiment_upload_dht,
                    args=(kwargs['sandbox_dump_dir'],
                          kwargs['sandbox_experiment'],
                          kwargs['sandbox_user_token'],
                          kwargs['sandbox_user_token']))
                process.start()
Beispiel #16
0
def tftool_visualize_pb(pb_path):
  if not os.path.exists(pb_path):
    logger.error('pb model file dont exist')
    return

  logger.info('load model pb')
  graph = tf.get_default_graph()
  graphdef = graph.as_graph_def()
  graphdef.ParseFromString(gfile.FastGFile(pb_path, "rb").read())

  _ = tf.import_graph_def(graphdef, name="")

  logger.info('start model FLOPs statistic')
  flops = tf.profiler.profile(graph, options=tf.profiler.ProfileOptionBuilder.float_operation())
  logger.info('model FLOPs: {}'.format(flops.total_float_ops))

  logger.info('generate visualization data')
  summary_write = tf.summary.FileWriter("./", graph)
  logger.info('open tensorboard --logdir=.')
Beispiel #17
0
    def __vcg_img_download(waiting_process_queue, save_dir, img_url, keyword,
                           count):
        try:
            logger.info("Downloading(%d) %s." % (count + 1, img_url))
            pic = requests.get(img_url, timeout=7)
        except BaseException:
            logger.error("Couldnt download %s." % img_url)
            return
        else:
            file_prefix = 'VCG_' + keyword + '_' + str(count)
            file_name = file_prefix + '.jpg' if download_data_type == 'image' else file_prefix + '.mp4'
            file_path = os.path.join(save_dir, file_name)
            fp = open(file_path, 'wb')
            fp.write(pic.content)
            fp.close()
            logger.info("Finish download %s ." % img_url)

            if waiting_process_queue is not None:
                waiting_process_queue.put(file_path)
Beispiel #18
0
    def __init__(self, name, dir=None, params=None):
        """
    :param name: 'train', 'test'
    :param data_dir: a directory containing the original {train,test}_32x32.mat
    """
        super(SVHNDigit, self).__init__(name, dir, params)

        # read sample data
        if self.train_or_test == 'sample':
            self.data_samples, self.ids = self.load_samples()
            return

        if name == "val":
            super(SVHNDigit, self).__init__("val", dir)
        else:
            super(SVHNDigit, self).__init__(name, dir)

        filename = ""
        if self.train_or_test == 'val':
            if not os.path.exists(os.path.join(self.dir, 'extra_32x32.mat')):
                self.download(self.dir, ['extra_32x32.mat'],
                              default_url=SVHN_URL)

            filename = os.path.join(self.dir, 'extra' + '_32x32.mat')
        else:
            if not os.path.exists(
                    os.path.join(self.dir, self.train_or_test + '_32x32.mat')):
                self.download(self.dir, [self.train_or_test + '_32x32.mat'],
                              default_url=SVHN_URL)

            filename = os.path.join(self.dir,
                                    self.train_or_test + '_32x32.mat')
        assert os.path.isfile(filename), \
          "File {} not found! Please download it from {}.".format(filename, SVHN_URL)

        logger.info("Loading {} ...".format(filename))
        data = scipy.io.loadmat(filename)
        self.X = data['X'].transpose(3, 0, 1, 2)
        self.Y = data['y'].reshape((-1))
        self.Y[np.where(self.Y == 10)] = 0
        self.Y = self.Y.astype(np.uint8)

        self.ids = list(range(self.Y.shape[0]))
Beispiel #19
0
    def process_add_command(self):
        task_type = FLAGS.task_type()
        task_measure = FLAGS.task_measure()

        if task_type is None or task_measure is None:
            logger.error('need set task_type and task_measure simutaneously')
            return

        task_measures = task_measure.split(',')
        task_measures = json.dumps(task_measures)

        remote_api = 'hub/api/terminal/task/type/%s' % task_type
        response = self.remote_api_request(
            remote_api, action='post', data={'task-measures': task_measures})

        if response is None:
            logger.error('fail to add task type')
            return

        logger.info('success to add task type')
        print(response)
Beispiel #20
0
def debug_training_process(dataset_func, param_config=None):
    # 0.step get global context
    ctx = get_global_context()
    ctx.debug = True

    # 1.step parse params config file
    if param_config is not None:
        logger.info('load param file %s' % param_config)
        params = yaml.load(open(param_config, 'r'))
        ctx.params = params

    # 2.step call traing process
    train_time_stamp = datetime.fromtimestamp(
        timestamp()).strftime('%Y%m%d.%H%M%S.%f')
    logger.info('build dump folder %s' % train_time_stamp)
    dump_dir = os.path.join(os.curdir, 'dump', train_time_stamp)
    if not os.path.exists(dump_dir):
        os.makedirs(dump_dir)
    logger.info('start debug training process')

    dataset_obj = None
    if isinstance(dataset_func, FunctionType):
        dataset_obj = RandomDataset('train', '')
        dataset_obj.data_func = dataset_func
    else:
        Config = config.AntConfig
        config_xml = os.path.join(os.environ['HOME'], '.config', 'antgo',
                                  'config.xml')
        Config.parse_xml(config_xml)
        dataset_obj = dataset_func(
            'train', os.path.join(Config.data_factory, dataset_func.__name__))

    ctx.recorder = EvaluationRecorderNode(None, None, '123')
    ctx.call_training_process(dataset_obj, dump_dir=dump_dir)
Beispiel #21
0
Datei: cmd.py Projekt: zhcm/antgo
  def process_add_command(self):
    ######################################################
    ############  stage.1 add user  ######################
    ######################################################
    if FLAGS.user() is not None and FLAGS.group() is not None:
      users_info = {'users': [FLAGS.user()]}
      res = self.remote_api_request('hub/api/terminal/groups/%s'%str(FLAGS.group()),
                                   json.dumps(users_info),
                                   'patch')
      if res is not None:
        logger.info('add user into group %s'%str(FLAGS.group()))
      else:
        logger.error('fail to add user')
      return

    ######################################################
    ############  stage.2 add new task type   ############
    ############  and task measure            ############
    ######################################################
    task_type = FLAGS.task_type()
    task_measure = FLAGS.task_measure()

    if task_type is None or task_measure is None:
      logger.error('need set task_type and task_measure simutaneously')
      return

    task_measures = task_measure.split(',')
    task_measures = json.dumps(task_measures)

    remote_api = 'hub/api/terminal/task/type/%s'%task_type
    response = self.remote_api_request(remote_api,
                                       action='post',
                                       data={'task-measures': task_measures})

    if response is None:
      logger.error('fail to add task type')
      return

    logger.info('success to add task type')
    print(response)
Beispiel #22
0
    def _sandbox_thread(**kwargs):
        if 'sandbox_time' in kwargs and kwargs['sandbox_time'] is not None:
            launch_time = kwargs['sandbox_launch_time']
            sandbox_running_time = kwargs['sandbox_time']
            # now time
            now_time = time.time()
            if now_time - launch_time > sandbox_running_time:
                # arriving custom max running time
                logger.info(
                    'max running time is arriving, exit antgo running environment'
                )

                # experiment result save process
                if 'sandbox_dump_dir' in kwargs and \
                        'sandbox_experiment' in kwargs and \
                        'sandbox_user_token' in kwargs:
                    # TODO 当实验运行超时时,执行固定操作
                    pass

                # exit globally
                mlogger.exit()
                os._exit(0)
Beispiel #23
0
    def start(self):
        try:
            # 1.step get template resource folder
            file_folder = os.path.dirname(__file__)
            parent_folder = '/'.join(file_folder.split('/')[:-1])
            template_file_folder = os.path.join(parent_folder, 'resource',
                                                'templates')

            # 2.step copy main_file.py
            main_file = 'task_main_file.py' if self.main_file is None else self.main_file
            shutil.copy(
                os.path.join(template_file_folder, 'task_main_file.py'),
                os.path.join(self.dump_dir, main_file))

            # 3.step copy main_param.yaml
            main_param = 'task_main_param.yaml' if self.main_param is None else self.main_param
            shutil.copy(
                os.path.join(template_file_folder, 'task_main_param.yaml'),
                os.path.join(self.dump_dir, main_param))

            logger.info('execute template command')
        except:
            logger.error('fail execute template command')
            traceback.print_exc()
Beispiel #24
0
def api_server_start(api_name,
                     api_task,
                     api_dump_dir,
                     server_port,
                     api_input_queue,
                     api_output_queue,
                     input_data_format,
                     output_data_format,
                     parent_id):
  # register sig
  signal.signal(signal.SIGTERM, GracefulExitException.sigterm_handler)

  try:
    # 0.step define http server port
    define('port', default=server_port, help='run on port')

    # 1.step prepare static resource
    api_static_dir = os.path.join(api_dump_dir, 'static')
    if not os.path.exists(api_static_dir):
      os.makedirs(api_static_dir)

    if not os.path.exists(os.path.join(api_static_dir, 'input')):
      os.makedirs(os.path.join(api_static_dir, 'input'))
    if not os.path.exists(os.path.join(api_static_dir, 'output')):
      os.makedirs(os.path.join(api_static_dir, 'output'))

    tornado.options.parse_command_line()
    settings = {
      'api_static_path': api_static_dir,
      'api_port': server_port,
      'api_name': api_name,
      'api_task': api_task,
      'api_input_queue': api_input_queue,
      'api_output_queue': api_output_queue,
      'input_data_format': input_data_format,
      'output_data_format': output_data_format,
    }
    app = tornado.web.Application(handlers=[(r"/api/", APIHandler)],
                                  **settings)
    http_server = tornado.httpserver.HTTPServer(app)
    http_server.listen(options.port)

    logger.info('api server is providing server on port %d' % server_port)
    tornado.ioloop.IOLoop.instance().start()
    logger.info('api stop server')
  except GracefulExitException:
    logger.info('demo server exit')
    sys.exit(0)
  except KeyboardInterrupt:
    os.kill(parent_id, signal.SIGKILL)
Beispiel #25
0
def batch_server_start(batch_dump_dir, server_port, command_queue):
    # register sig
    signal.signal(signal.SIGTERM, GracefulExitException.sigterm_handler)

    # 0.step define http server port
    define('port', default=server_port, help='run on port')

    try:
        # 静态资源目录
        batch_static_dir = os.path.join(batch_dump_dir, 'batch')

        # 2.step launch web server
        db = {'content': {}, 'command_queue': command_queue}
        settings = {
            'static_path': os.path.join(batch_static_dir, 'static'),
            'port': server_port,
            'cookie_secret': str(uuid.uuid4()),
            'db': db,
        }

        app = tornado.web.Application(handlers=[
            (r"/batch-api/entry/", EntryApiHandler),
            (r"/batch-api/dataset/", EntryApiHandler),
            (r"/batch-api/page/", PageApiHandler),
            (r"/batch-api/config/", ConfigApiHandler),
            (r"/batch-api/ping/", PingApiHandler),
            (r"/batch-api/tag/", TagChangeApiHandler),
            (r"/batch-api/search/", SearchApiHandler),
            (r'/(.*)', tornado.web.StaticFileHandler, {
                "path": batch_static_dir,
                "default_filename": "index.html"
            }),
        ],
                                      **settings)
        http_server = tornado.httpserver.HTTPServer(app)
        http_server.listen(options.port)

        logger.info('demo is providing server on port %d' % server_port)
        tornado.ioloop.IOLoop.instance().start()
        logger.info('demo stop server')
    except GracefulExitException:
        logger.info('demo server exit')
        sys.exit(0)
    except KeyboardInterrupt:
        pass
Beispiel #26
0
    def test_coco2017(self):
        Config = config.AntConfig
        config_xml = os.path.join(os.environ['HOME'], '.config', 'antgo',
                                  'config.xml')
        Config.parse_xml(config_xml)

        logger.info('test coco2017 train for SEGMENTATION task (stuff)')
        coco2017 = COCO2017('train',
                            os.path.join(Config.data_factory, 'COCO2017'), {
                                'task_type': 'SEGMENTATION',
                                'task_type_subset': 'stuff'
                            })

        file_num = 0
        for data, annotation in coco2017.iterator_value():
            file_num += 1

        print(file_num)

        logger.info('test coco2017 val for SEGMENTATION task (stuff)')
        coco2017 = COCO2017('val',
                            os.path.join(Config.data_factory, 'COCO2017'), {
                                'task_type': 'SEGMENTATION',
                                'task_type_subset': 'stuff'
                            })
        file_num = 0
        for data, annotation in coco2017.iterator_value():
            file_num += 1

        print(file_num)

        logger.info('test coco2017 test for SEGMENTATION task (stuff)')
        coco2017 = COCO2017('test',
                            os.path.join(Config.data_factory, 'COCO2017'), {
                                'task_type': 'SEGMENTATION',
                                'task_type_subset': 'stuff'
                            })
        file_num = 0
        for data, annotation in coco2017.iterator_value():
            file_num += 1

        print(file_num)
Beispiel #27
0
def ensemble_server_start(dump_dir, server_port, worker_num):
    # register sig
    signal.signal(signal.SIGTERM, GracefulExitException.sigterm_handler)

    # 0.step define http server port
    define('port', default=server_port, help='run on port')

    try:
        # 静态资源目录
        static_dir = os.path.join(dump_dir, 'ensemble')

        # 2.step launch web server
        db = {}
        settings = {
            'static_path': os.path.join(static_dir, 'static'),
            'port': server_port,
            'cookie_secret': str(uuid.uuid4()),
            'worker_num': worker_num,
            'cond': threading.Condition(),
            'lock': threading.Lock(),
            'db': db
        }

        app = tornado.web.Application(handlers=[
            (r"/ensemble-api/avg/", AverageApiHandler),
            (r"/ensemble-api/live/", LiveApiHandler)
        ],
                                      **settings)
        http_server = tornado.httpserver.HTTPServer(app)
        http_server.listen(options.port)

        logger.info('ensemble is providing server on port %d' % server_port)
        tornado.ioloop.IOLoop.instance().start()
        logger.info('ensemble stop server')
    except GracefulExitException:
        logger.info('ensemble server exit')
        sys.exit(0)
    except KeyboardInterrupt:
        pass
Beispiel #28
0
    def start(self):
        # 1.step 加载挑战任务
        running_ant_task = None
        if self.token is not None:
            # 1.1.step 从平台获取挑战任务配置信息
            response = mlogger.info.challenge.get(command=type(self).__name__)
            if response['status'] == 'ERROR':
                logger.error('Couldnt load challenge task.')
                self.token = None
            elif response['status'] == 'SUSPEND':
                # prohibit submit challenge task frequently
                # submit only one in one week
                logger.error('Prohibit submit challenge task frequently.')
                exit(-1)
            elif response['status'] == 'OK':
                content = response['content']

                if 'task' in content:
                    challenge_task = create_task_from_json(content)
                    if challenge_task is None:
                        logger.error('Couldnt load challenge task.')
                        exit(-1)
                    running_ant_task = challenge_task
            else:
                # unknow error
                logger.error('Unknow error.')
                exit(-1)

        if running_ant_task is None:
            # 1.2.step 加载自定义任务配置信息
            custom_task = create_task_from_xml(self.ant_task_config,
                                               self.context)
            if custom_task is None:
                logger.error('Couldnt load custom task.')
                exit(-1)
            running_ant_task = custom_task

        assert (running_ant_task is not None)

        # 2.step 注册实验
        experiment_uuid = self.context.experiment_uuid

        # 3.step  备份实验基本信息
        # 3.1.step 打包代码,并上传至云端
        self.stage = 'CHALLENGE'
        # - backup in dump_dir
        main_folder = self.main_folder
        if not os.path.exists(os.path.join(self.ant_dump_dir,
                                           experiment_uuid)):
            os.makedirs(os.path.join(self.ant_dump_dir, experiment_uuid))

        # goldcoin = os.path.join(self.ant_dump_dir, experiment_uuid, 'code.tar.gz')

        # if os.path.exists(goldcoin):
        #   os.remove(goldcoin)

        # logger.info('Prepare package model files.')
        # tar = tarfile.open(goldcoin, 'w:gz')

        # # 排除dump目录,将当前文件夹下所有数据上传
        # for root, dirs, files in os.walk(main_folder):
        #   if os.path.commonprefix([root, self.ant_dump_dir]) == self.ant_dump_dir:
        #     continue

        #   rel_root = os.path.relpath(root, main_folder)
        #   for f in files:
        #     tar.add(os.path.join(root, f), arcname=os.path.join(rel_root, f))

        # tar.close()
        # logger.info('Finish package process.')

        # TODO: 在下一个版本支持模型文件上传
        # # 上传模型代码
        # mlogger.getEnv().dashboard.experiment.upload(MODEL=goldcoin,
        #                                              APP_STAGE=self.stage)

        # 3.2.step 更新基本配置
        if self.app_token is not None or self.user_token is not None:
            for k, v in self.context.params.items():
                setattr(mlogger.tag, k, str(v))

        # 4.step 加载测试数据集
        if running_ant_task.dataset_name is None or running_ant_task.dataset_name == '':
            logger.info(
                'Missing dataset set in task config, switch to use --dataset=xxx'
            )
            if self.ant_dataset is None or self.ant_dataset == '':
                logger.error('Sorry, --dataset= is empty.')
                return
            running_ant_task.config(dataset_name=self.ant_dataset)

        logger.info('Loading test dataset %s.' % running_ant_task.dataset_name)

        ant_test_dataset = None
        if self.context.register_at('test') is not None:
            ant_test_dataset = ProxyDataset('test')
            ant_test_dataset.register(test=self.context.register_at('test'))
        else:
            ant_test_dataset = running_ant_task.dataset(
                'test',
                os.path.join(self.ant_data_source,
                             running_ant_task.dataset_name),
                running_ant_task.dataset_params)

        with safe_manager(ant_test_dataset):
            self.context.recorder = RecorderNode2()

            self.stage = "CHALLENGE"
            logger.info('Start infer process.')
            infer_dump_dir = os.path.join(self.ant_dump_dir, experiment_uuid,
                                          'inference')
            if not os.path.exists(infer_dump_dir):
                os.makedirs(infer_dump_dir)
            else:
                shutil.rmtree(infer_dump_dir)
                os.makedirs(infer_dump_dir)

            intermediate_dump_dir = os.path.join(self.ant_dump_dir,
                                                 experiment_uuid, 'record')
            with safe_recorder_manager(self.context.recorder):
                self.context.recorder.dump_dir = intermediate_dump_dir
                # from ablation experiment
                ablation_blocks = getattr(self.ant_context.params, 'ablation',
                                          [])
                if ablation_blocks is None:
                    ablation_blocks = []
                for b in ablation_blocks:
                    self.ant_context.deactivate_block(b)

                with performance_statistic_region(self.ant_name):
                    try:
                        self.context.call_infer_process(
                            ant_test_dataset, infer_dump_dir)
                    except Exception as e:
                        if type(e.__cause__) != StopIteration:
                            print(e)
                            traceback.print_exc()

                task_running_statictic = get_performance_statistic(
                    self.ant_name)
                task_running_statictic = {
                    self.ant_name: task_running_statictic
                }
                task_running_elapsed_time = task_running_statictic[
                    self.ant_name]['time']['elapsed_time']
                task_running_statictic[self.ant_name]['time']['elapsed_time_per_sample'] = \
                    task_running_elapsed_time / float(ant_test_dataset.size)

                if not self.context.recorder.is_measure:
                    # has no annotation to continue to meausre
                    # 更新实验统计信息
                    if self.app_token is not None or self.user_token is not None:
                        mlogger.info.experiment.patch(
                            experiment_data=zlib.compress(
                                json.dumps({
                                    'REPORT': task_running_statictic,
                                    'APP_STAGE': self.stage
                                }).encode()))

                    # 生成实验报告
                    logger.info('Save experiment report.')
                    everything_to_html(
                        task_running_statictic,
                        os.path.join(self.ant_dump_dir, experiment_uuid))
                    return

            logger.info('Start evaluation process.')
            evaluation_measure_result = []
            with safe_recorder_manager(
                    RecordReader(intermediate_dump_dir)) as record_reader:
                for measure in running_ant_task.evaluation_measures:
                    if measure.crowdsource:
                        # start crowdsource server
                        measure.dump_dir = os.path.join(
                            infer_dump_dir, measure.name, 'static')
                        if not os.path.exists(measure.dump_dir):
                            os.makedirs(measure.dump_dir)

                        measure.experiment_id = experiment_uuid
                        measure.app_token = self.token
                        logger.info('Launch crowdsource evaluation server.')
                        crowdsrouce_evaluation_status = measure.crowdsource_server(
                            record_reader)
                        if not crowdsrouce_evaluation_status:
                            logger.error(
                                'Couldnt finish crowdsource evaluation server.'
                            )
                            continue

                        # using crowdsource evaluation
                        result = measure.eva()
                        # TODO: support bootstrap confidence interval for crowdsource evaluation
                    else:
                        # evaluation
                        record_generator = record_reader.iterate_read(
                            'predict', 'groundtruth')
                        result = measure.eva(record_generator, None)

                        # compute confidence interval
                        if measure.is_support_rank and getattr(
                                running_ant_task, 'confidence_interval',
                                False):
                            confidence_interval = bootstrap_confidence_interval(
                                record_reader, time.time(), measure, 10)
                            result['statistic']['value'][0][
                                'interval'] = confidence_interval
                        elif measure.is_support_rank:
                            result['statistic']['value'][0]['interval'] = (
                                result['statistic']['value'][0]['value'],
                                result['statistic']['value'][0]['value'])

                    evaluation_measure_result.append(result)

                # #########################
                # roc_auc_measure = {'statistic': {'name': 'roc_auc',
                #                                  'value': [{'name': 'ROC', 'value': [[[0, 3], [1, 0], [2, 6]],
                #                                                                      [[0, 8], [2, 3], [3, 7]]],
                #                                             'type': 'CURVE', 'x': 'FP', 'y': 'TP',
                #                                             'legend': ['class-0', 'class-1']},
                #                                            {'name': 'AUC', 'value': [0.1, 0.2], 'type': 'SCALAR', 'x': 'class',
                #                                             'y': 'AUC'}]}}
                #
                # voc_measure = {'statistic': {'name': 'voc',
                #                              'value': [{'name': 'MAP', 'value': [18.0, 9.0, 20.0], 'type': 'SCALAR', 'x': 'class',
                #                                         'y': 'Mean Average Precision'},
                #                                        {'name': 'Mean-MAP', 'value': 0.14, 'type': 'SCALAR'}]}}
                #
                #
                # evaluation_measure_result.append(roc_auc_measure)
                # evaluation_measure_result.append(voc_measure)
                # #########################

                task_running_statictic[
                    self.ant_name]['measure'] = evaluation_measure_result

            # if self.is_non_mltalker_task:
            #   # generate report resource
            #   logger.info('generate model evaluation report')
            #   everything_to_html(task_running_statictic, os.path.join(self.ant_dump_dir, now_time_stamp))
            #   return

            # significance statistic
            logger.info('Significance difference compare and rank.')
            # benchmark record
            benchmark_model_data = {}
            if self.token is not None:
                response = mlogger.info.benchmark.get()
                if response['status'] == 'OK':
                    benchmark_info = response['content']
                    for bmd in benchmark_info:
                        benchmark_name = bmd[
                            'benchmark_name']  # benchmark name (experiment_uuid)
                        benchmark_record = bmd['benchmark_record']  # url
                        benchmark_report = bmd['benchmark_report']  # 统计数据

                        # download benchmark record from url
                        logger.info('Download benchmark %s.' % benchmark_name)
                        mlogger.info.experiment.download(
                            file_name=benchmark_record,
                            file_folder=os.path.join(self.ant_dump_dir,
                                                     experiment_uuid,
                                                     'benchmark',
                                                     benchmark_name),
                            experiment_uuid=benchmark_name)

                        if 'record' not in benchmark_model_data:
                            benchmark_model_data['record'] = {}
                        benchmark_model_data['record'][
                            benchmark_name] = os.path.join(
                                self.ant_dump_dir, experiment_uuid,
                                'benchmark', benchmark_name, 'record')

                        if 'report' not in benchmark_model_data:
                            benchmark_model_data['report'] = {}

                        for benchmark_experiment_name, benchmark_experiment_report in benchmark_report.items(
                        ):
                            benchmark_model_data['report'][
                                benchmark_name] = benchmark_experiment_report
            elif self.ant_task_benchmark is not None:
                for experiment in self.ant_task_benchmark.split(','):
                    if os.path.exists(
                            os.path.join(self.ant_dump_dir, experiment)):
                        if 'record' not in benchmark_model_data:
                            benchmark_model_data['record'] = {}
                        benchmark_model_data['record'][
                            experiment] = os.path.join(self.ant_dump_dir,
                                                       experiment, 'record')

            if benchmark_model_data is not None and 'record' in benchmark_model_data:
                benchmark_model_record = benchmark_model_data['record']

                task_running_statictic[self.ant_name]['significant_diff'] = {}
                for meature_index, measure in enumerate(
                        running_ant_task.evaluation_measures):
                    if measure.is_support_rank and not measure.crowdsource:
                        significant_diff_score = []
                        for benchmark_model_name, benchmark_model_address in benchmark_model_record.items(
                        ):
                            if getattr(running_ant_task, 'confidence_interval',
                                       False):
                                with safe_recorder_manager(
                                        RecordReader(intermediate_dump_dir)
                                ) as record_reader:
                                    with safe_recorder_manager(
                                            RecordReader(
                                                benchmark_model_address)
                                    ) as benchmark_record_reader:
                                        s = bootstrap_ab_significance_compare([
                                            record_reader,
                                            benchmark_record_reader
                                        ], time.time(), measure, 10)

                                        significant_diff_score.append({
                                            'name':
                                            benchmark_model_name,
                                            'score':
                                            s
                                        })
                            else:
                                compare_value = \
                                  task_running_statictic[self.ant_name]['measure'][meature_index]['statistic']['value'][0]['value'] - \
                                  benchmark_model_data['report'][benchmark_model_name]['measure'][meature_index]['statistic']['value'][0]['value']
                                if compare_value > 0:
                                    if getattr(measure, 'larger_is_better',
                                               0) == 1:
                                        significant_diff_score.append({
                                            'name':
                                            benchmark_model_name,
                                            'score':
                                            1
                                        })
                                    else:
                                        significant_diff_score.append({
                                            'name':
                                            benchmark_model_name,
                                            'score':
                                            -1
                                        })
                                elif compare_value < 0:
                                    if getattr(measure, 'larger_is_better',
                                               0) == 1:
                                        significant_diff_score.append({
                                            'name':
                                            benchmark_model_name,
                                            'score':
                                            -1
                                        })
                                    else:
                                        significant_diff_score.append({
                                            'name':
                                            benchmark_model_name,
                                            'score':
                                            1
                                        })
                                else:
                                    significant_diff_score.append({
                                        'name': benchmark_model_name,
                                        'score': 0
                                    })

                        task_running_statictic[
                            self.ant_name]['significant_diff'][
                                measure.name] = significant_diff_score
                    elif measure.is_support_rank and measure.crowdsource:
                        # TODO: support model significance compare for crowdsource evaluation
                        pass

            # error analysis
            logger.info('Error analysis.')
            # benchmark report
            benchmark_model_statistic = None
            if benchmark_model_data is not None and 'report' in benchmark_model_data:
                benchmark_model_statistic = benchmark_model_data['report']

            # task_running_statictic={self.ant_name:
            #                           {'measure':[
            #                             {'statistic': {'name': 'MESR',
            #                                            'value': [{'name': 'MESR', 'value': 0.4, 'type':'SCALAR'}]},
            #                                            'info': [{'id':0,'score':0.8,'category':1},
            #                                                     {'id':1,'score':0.3,'category':1},
            #                                                     {'id':2,'score':0.9,'category':1},
            #                                                     {'id':3,'score':0.5,'category':1},
            #                                                     {'id':4,'score':1.0,'category':1}]},
            #                             {'statistic': {'name': "SE",
            #                                            'value': [{'name': 'SE', 'value': 0.5, 'type': 'SCALAR'}]},
            #                                            'info': [{'id':0,'score':0.4,'category':1},
            #                                                     {'id':1,'score':0.2,'category':1},
            #                                                     {'id':2,'score':0.1,'category':1},
            #                                                     {'id':3,'score':0.5,'category':1},
            #                                                     {'id':4,'score':0.23,'category':1}]}]}}

            for measure_result in task_running_statictic[
                    self.ant_name]['measure']:
                if 'info' in measure_result and len(
                        measure_result['info']) > 0:
                    measure_name = measure_result['statistic']['name']
                    measure_data = measure_result['info']

                    # independent analysis per category for classification problem
                    measure_data_list = []
                    if running_ant_task.class_label is not None and len(
                            running_ant_task.class_label) > 1:
                        if running_ant_task.class_label is not None:
                            for cl_i, cl in enumerate(
                                    running_ant_task.class_label):
                                measure_data_list.append([
                                    md for md in measure_data
                                    if md['category'] == cl
                                    or md['category'] == cl_i
                                ])

                    if len(measure_data_list) == 0:
                        measure_data_list.append(measure_data)

                    for category_id, category_measure_data in enumerate(
                            measure_data_list):
                        if len(category_measure_data) == 0:
                            continue

                        if 'analysis' not in task_running_statictic[
                                self.ant_name]:
                            task_running_statictic[
                                self.ant_name]['analysis'] = {}

                        if measure_name not in task_running_statictic[
                                self.ant_name]['analysis']:
                            task_running_statictic[
                                self.ant_name]['analysis'][measure_name] = {}

                        # reorganize as list
                        method_samples_list = [{
                            'name': self.ant_name,
                            'data': category_measure_data
                        }]
                        if benchmark_model_statistic is not None:
                            # extract statistic data from benchmark
                            for benchmark_name, benchmark_statistic_data in benchmark_model_statistic.items(
                            ):
                                # finding corresponding measure
                                for benchmark_measure_result in benchmark_statistic_data[
                                        'measure']:
                                    if benchmark_measure_result['statistic'][
                                            'name'] == measure_name:
                                        benchmark_measure_data = benchmark_measure_result[
                                            'info']

                                        # finding corresponding category
                                        sub_benchmark_measure_data = None
                                        if running_ant_task.class_label is not None and len(
                                                running_ant_task.class_label
                                        ) > 1:
                                            sub_benchmark_measure_data = \
                                              [md for md in benchmark_measure_data if md['category'] == running_ant_task.class_label[category_id] or md['category'] == category_id]
                                        if sub_benchmark_measure_data is None:
                                            sub_benchmark_measure_data = benchmark_measure_data

                                        method_samples_list.append({
                                            'name':
                                            benchmark_name,
                                            'data':
                                            sub_benchmark_measure_data
                                        })

                                        break
                                break

                        # reorganize data as score matrix
                        method_num = len(method_samples_list)
                        # samples_num are the same among methods
                        samples_num = len(method_samples_list[0]['data'])
                        # samples_num = ant_test_dataset.size
                        method_measure_mat = np.zeros(
                            (method_num, samples_num))
                        samples_map = []

                        for method_id, method_measure_data in enumerate(
                                method_samples_list):
                            # reorder data by index
                            order_key = 'id'
                            if 'index' in method_measure_data['data'][0]:
                                order_key = 'index'
                            method_measure_data_order = sorted(
                                method_measure_data['data'],
                                key=lambda x: x[order_key])

                            if method_id == 0:
                                # record sample id
                                for sample_id, sample in enumerate(
                                        method_measure_data_order):
                                    samples_map.append(sample)

                            # order consistent
                            for sample_id, sample in enumerate(
                                    method_measure_data_order):
                                method_measure_mat[method_id,
                                                   sample_id] = sample['score']

                        is_binary = False
                        # collect all score
                        test_score = [
                            td['score']
                            for td in method_samples_list[0]['data']
                            if td['score'] > -float("inf")
                            and td['score'] < float("inf")
                        ]
                        hist, x_bins = np.histogram(test_score, 100)
                        if len(np.where(hist > 0.0)[0]) <= 2:
                            is_binary = True

                        # score matrix analysis
                        if not is_binary:
                            s, ri, ci, lr_samples, mr_samples, hr_samples = \
                              continuous_multi_model_measure_analysis(method_measure_mat, samples_map, ant_test_dataset)

                            analysis_tag = 'Global'
                            if len(measure_data_list) > 1:
                                analysis_tag = 'Global-Category-' + str(
                                    running_ant_task.class_label[category_id])

                            model_name_ri = [
                                method_samples_list[r]['name'] for r in ri
                            ]
                            task_running_statictic[self.ant_name]['analysis'][measure_name][analysis_tag] = \
                                          {'value': s.tolist() if type(s) != list else s,
                                           'type': 'MATRIX',
                                           'x': ci,
                                           'y': model_name_ri,
                                           'sampling': [{'name': 'High Score Region', 'data': hr_samples},
                                                        {'name': 'Middle Score Region', 'data': mr_samples},
                                                        {'name': 'Low Score Region', 'data': lr_samples}]}

                            # group by tag
                            tags = getattr(ant_test_dataset, 'tag', None)
                            if tags is not None:
                                for tag in tags:
                                    g_s, g_ri, g_ci, g_lr_samples, g_mr_samples, g_hr_samples = \
                                      continuous_multi_model_measure_analysis(method_measure_mat,
                                                                              samples_map,
                                                                              ant_test_dataset,
                                                                              filter_tag=tag)

                                    analysis_tag = 'Group'
                                    if len(measure_data_list) > 1:
                                        analysis_tag = 'Group-Category-' + str(
                                            running_ant_task.
                                            class_label[category_id])

                                    if analysis_tag not in task_running_statictic[
                                            self.ant_name]['analysis'][
                                                measure_name]:
                                        task_running_statictic[self.ant_name][
                                            'analysis'][measure_name][
                                                analysis_tag] = []

                                    model_name_ri = [
                                        method_samples_list[r]['name']
                                        for r in g_ri
                                    ]
                                    tag_data = {
                                        'value':
                                        g_s.tolist()
                                        if type(g_s) != list else g_s,
                                        'type':
                                        'MATRIX',
                                        'x':
                                        g_ci,
                                        'y':
                                        model_name_ri,
                                        'sampling': [{
                                            'name': 'High Score Region',
                                            'data': g_hr_samples
                                        }, {
                                            'name': 'Middle Score Region',
                                            'data': g_mr_samples
                                        }, {
                                            'name': 'Low Score Region',
                                            'data': g_lr_samples
                                        }]
                                    }

                                    task_running_statictic[
                                        self.ant_name]['analysis'][
                                            measure_name][analysis_tag].append(
                                                (tag, tag_data))
                        else:
                            s, ri, ci, region_95, region_52, region_42, region_13, region_one, region_zero = \
                              discrete_multi_model_measure_analysis(method_measure_mat,
                                                                    samples_map,
                                                                    ant_test_dataset)

                            analysis_tag = 'Global'
                            if len(measure_data_list) > 1:
                                analysis_tag = 'Global-Category-' + str(
                                    running_ant_task.class_label[category_id])

                            model_name_ri = [
                                method_samples_list[r]['name'] for r in ri
                            ]
                            task_running_statictic[self.ant_name]['analysis'][measure_name][analysis_tag] = \
                                            {'value': s.tolist() if type(s) != list else s,
                                             'type': 'MATRIX',
                                             'x': ci,
                                             'y': model_name_ri,
                                             'sampling': [{'name': '95%', 'data': region_95},
                                                          {'name': '52%', 'data': region_52},
                                                          {'name': '42%', 'data': region_42},
                                                          {'name': '13%', 'data': region_13},
                                                          {'name': 'best', 'data': region_one},
                                                          {'name': 'zero', 'data': region_zero}]}

                            # group by tag
                            tags = getattr(ant_test_dataset, 'tag', None)
                            if tags is not None:
                                for tag in tags:
                                    g_s, g_ri, g_ci, g_region_95, g_region_52, g_region_42, g_region_13, g_region_one, g_region_zero = \
                                      discrete_multi_model_measure_analysis(method_measure_mat,
                                                                              samples_map,
                                                                              ant_test_dataset,
                                                                              filter_tag=tag)
                                    # if 'group' not in task_running_statictic[self.ant_name]['analysis'][measure_name]:
                                    #   task_running_statictic[self.ant_name]['analysis'][measure_name]['group'] = []
                                    #
                                    analysis_tag = 'Group'
                                    if len(measure_data_list) > 1:
                                        analysis_tag = 'Group-Category-' + str(
                                            running_ant_task.
                                            class_label[category_id])

                                    if analysis_tag not in task_running_statictic[
                                            self.ant_name]['analysis'][
                                                measure_name]:
                                        task_running_statictic[self.ant_name][
                                            'analysis'][measure_name][
                                                analysis_tag] = []

                                    model_name_ri = [
                                        method_samples_list[r]['name']
                                        for r in g_ri
                                    ]
                                    tag_data = {
                                        'value':
                                        g_s.tolist()
                                        if type(g_s) != list else g_s,
                                        'type':
                                        'MATRIX',
                                        'x':
                                        g_ci,
                                        'y':
                                        model_name_ri,
                                        'sampling': [{
                                            'name': '95%',
                                            'data': region_95
                                        }, {
                                            'name': '52%',
                                            'data': region_52
                                        }, {
                                            'name': '42%',
                                            'data': region_42
                                        }, {
                                            'name': '13%',
                                            'data': region_13
                                        }, {
                                            'name': 'best',
                                            'data': region_one
                                        }, {
                                            'name': 'zero',
                                            'data': region_zero
                                        }]
                                    }

                                    task_running_statictic[
                                        self.ant_name]['analysis'][
                                            measure_name][analysis_tag].append(
                                                (tag, tag_data))

            # 更新实验统计信息
            if self.app_token is not None or self.user_token is not None:
                mlogger.info.experiment.patch(experiment_data=zlib.compress(
                    json.dumps({
                        'REPORT': task_running_statictic,
                        'APP_STAGE': self.stage
                    }).encode()))

            # 生成实验报告
            logger.info('Save experiment report.')
            everything_to_html(
                task_running_statictic,
                os.path.join(self.ant_dump_dir, experiment_uuid))
Beispiel #29
0
    def crowdsource_server(self, record_db):
        # 0.step search idel server port
        idle_server_port = _pick_idle_port(from_port=40000,
                                           to_port=50000,
                                           check_count=20)
        if idle_server_port is None:
            logger.error('couldnt find idle port for crowdsoure server')
            return False

        # select crowdsource info
        crowdsource_info = {
            'title': self._crowdsource_title,
            'type': self._crowdsource_type,
            'complex': self._complex_degree,
            'time': self._crowdsource_start_time,
            'complete': 0.0,
            'bonus': self._crowdsource_bonum,
            'estimated_time': self._crowdsource_estimated_time
        }

        # 1.step launch crowdsource server (independent process)
        request_queue = Queue()
        response_queue = Queue()

        process = multiprocessing.Process(
            target=crowdsrouce_server_start,
            args=(os.getpid(), self.experiment_id, self.app_token,
                  '/'.join(os.path.normpath(self.dump_dir).split('/')[0:-1]),
                  self.name, self.client_html_template,
                  self.client_keywords_template, idle_server_port,
                  crowdsource_info, request_queue, response_queue))
        process.start()

        # 2.step listening crowdsource server is OK
        waiting_time = 10
        is_connected = False
        now_time = time.time()
        while not is_connected and (time.time() - now_time) < 60 * 5:
            # waiting crowdsource server in 5 minutes
            try:
                res = requests.get('http://127.0.0.1:%d/heartbeat' %
                                   idle_server_port)
                heatbeat_content = json.loads(res.content)
                if 'ALIVE' in heatbeat_content:
                    is_connected = True
                    break
            except:
                time.sleep(waiting_time)

        if not is_connected:
            logger.error(
                'fail to connect local crowdsource server, couldnt start crowdsource crowdsource server'
            )
            return False

        # 3.step listening client query until crowdsource server is finished
        while not self._is_finished:
            client_id = ''
            try:
                # 2.1 step receive request
                client_query = request_queue.get()

                # 2.2 step check query is legal
                if not self._query_is_legal(client_query):
                    if self._client_response_record[client_query['CLIENT_ID']]['QUERY_INDEX'] == \
                            len(self._client_response_record[client_query['CLIENT_ID']]['ID']) - 1:
                        # send stop page
                        response_queue.put(self._prepare_stop_page())
                    else:
                        # send unknown page
                        response_queue.put({})
                    continue

                # client id
                client_id = client_query['CLIENT_ID']

                # 2.3 step response client query
                # QUERY: 'START', 'NEXT'
                ########################################################
                #############            START           ###############
                ########################################################
                if client_query['QUERY'] == 'START':
                    logger.info('response client_id %s %s query' %
                                (client_id, 'START'))
                    response = self._start_click_branch(
                        client_query, record_db)
                    response_queue.put(response)
                    continue

                ########################################################
                #############            NEXT            ###############
                ########################################################
                if client_query['QUERY'] == 'NEXT':
                    logger.info('response client_id %s %s query' %
                                (client_id, 'NEXT'))
                    response = self._next_click_branch(client_query, record_db)
                    response_queue.put(response)
                    continue

                logger.error('client_id %s unknow error' % client_id)
                response_queue.put({})
                continue
            except:
                logger.error('client_id %s unknow error' % client_id)
                response_queue.put({})

        # save crowdsource client response
        with open(os.path.join(self.dump_dir, 'crowdsource_record.txt'),
                  'w') as fp:
            fp.write(json.dumps(self._client_response_record))

        # kill crowdsource server
        # suspend 5 minutes, crowdsource abort suddenly
        time.sleep(30)
        # kill crowdsource server
        os.kill(process.pid, signal.SIGTERM)
        return True
Beispiel #30
0
    def _next_click_branch(self, client_query, record_db, to_next_page=False):
        # continue unfinished session
        client_id = client_query['CLIENT_ID']
        if client_id not in self._client_response_record:
            return {}

        logger.info('enter client id %s "NEXT" response' % client_id)
        now_time = time.time()

        # 1.step client response content
        query_index = client_query['QUERY_INDEX']

        if client_query['QUERY_STATUS'] == 'CLOSE_ECHO' or to_next_page:
            logger.info('prepare next page data')
            # enter next sample
            return self._prepare_next_page(client_id, query_index, record_db)

        # make sure only once access
        # assert(self._client_response_record[client_id]['RESPONSE'][query_index] is None)

        if client_query['CLIENT_RESPONSE'] is not None:
            try:
                logger.info('record client id %s response' % client_id)
                user_response = client_query['CLIENT_RESPONSE']

                # 1.2.step record client response
                self._client_response_record[client_id]['RESPONSE'][
                    query_index] = user_response
                self._client_response_record[client_id]['RESPONSE_TIME'][
                    query_index]['STOP_TIME'] = now_time

                query_data_index = self._client_response_record[client_id][
                    'ID'][query_index]
                if query_data_index not in self._sample_finished_count:
                    self._sample_finished_count[query_data_index] = {
                        'CLIENT': [],
                        'COUNT': 0
                    }

                if client_id not in self._sample_finished_count[
                        query_data_index]['CLIENT']:
                    self._sample_finished_count[query_data_index]['COUNT'] = \
                      self._sample_finished_count[query_data_index]['COUNT'] + 1
                    self._sample_finished_count[query_data_index][
                        'CLIENT'].append(client_id)

                logger.info('finish record client id %s response' % client_id)
            except:
                logger.error('failed to record client id %s response' %
                             client_id)
                self._client_response_record[client_id]['RESPONSE'][
                    query_index] = None
                self._client_response_record[client_id]['RESPONSE_TIME'][
                    query_index]['STOP_TIME'] = -1
                return {}

        # 2.step server response
        # 2.1.step whether over predefined max time in one session
        if self._max_time_in_session > 0:
            # user has been defined max time in session
            if now_time - self._client_response_record[client_id][
                    'START_TIME'] > self._max_time_in_session:
                # return stop flag
                logger.info(
                    'client id %s session time is larger than max time' %
                    client_id)
                return self._prepare_stop_page()

        # 2.2.step whether over predefined max samples in one session
        if self._max_samples_in_session > 0:
            # user has been defined max samples in session
            if (query_index + 1) >= self._max_samples_in_session:
                # return stop flag
                logger.info(
                    'client id %s max samples is larger than task defined' %
                    client_id)
                return self._prepare_stop_page()

        # 2.3.step whether need skip first some samples (in first some samples, we must return ground truth)
        if self._skip_sample_num > 0:
            # user has been defined skip first some samples
            if query_index < self._skip_sample_num:
                # user would see the ground truth of his last judgement
                logger.info('prepare grouth truth page')
                return self._prepare_ground_truth_page(client_id, query_index,
                                                       record_db)

        # 2.4.step enter next sample
        logger.info('prepare next page')
        return self._prepare_next_page(client_id, query_index, record_db)