def load_data(config):

    print("-*-"*10)
    print("current data_sign: {}".format(config.data_sign))

    if config.data_sign == "conll03":
        data_processor = Conll03Processor()
    elif config.data_sign == "zh_msra":
        data_processor = MSRAProcessor()
    elif config.data_sign == "zh_onto":
        data_processor = Onto4ZhProcessor()
    elif config.data_sign == "en_onto":
        data_processor = Onto5EngProcessor()
    elif config.data_sign == "genia":
        data_processor = GeniaProcessor()
    elif config.data_sign == "ace2004":
        data_processor = ACE2004Processor()
    elif config.data_sign == "ace2005":
        data_processor = ACE2005Processor()
    elif config.data_sign == "resume":
            data_processor = ResumeZhProcessor()
    else:
        raise ValueError("Please Notice that your data_sign DO NOT exits !!!!!")


    label_list = data_processor.get_labels()
    tokenizer = BertTokenizer4Tagger.from_pretrained(config.bert_model, do_lower_case=True)

    dataset_loaders = MRCNERDataLoader(config, data_processor, label_list, tokenizer, mode="train", allow_impossible=True)
    train_dataloader = dataset_loaders.get_dataloader(data_sign="train") 
    dev_dataloader = dataset_loaders.get_dataloader(data_sign="dev")
    test_dataloader = dataset_loaders.get_dataloader(data_sign="test")
    num_train_steps = dataset_loaders.get_num_train_epochs()


    return train_dataloader, dev_dataloader, test_dataloader, num_train_steps, label_list 
def load_data(config, logger):

    logger.info("-*-" * 10)
    logger.info(f"current data_sign: {config.data_sign}")

    if config.data_sign == "conll03":
        data_processor = Conll03Processor()
    elif config.data_sign == "zh_msra":
        data_processor = MSRAProcessor()
    elif config.data_sign == "zh_onto":
        data_processor = Onto4ZhProcessor()
    elif config.data_sign == "en_onto":
        data_processor = Onto5EngProcessor()
    elif config.data_sign == "genia":
        data_processor = GeniaProcessor()
    elif config.data_sign == "ace2004":
        data_processor = ACE2004Processor()
    elif config.data_sign == "ace2005":
        data_processor = ACE2005Processor()
    elif config.data_sign == "resume":
        data_processor = ResumeZhProcessor()
    else:
        raise ValueError(
            "Please Notice that your data_sign DO NOT exits !!!!!")

    label_list = data_processor.get_labels()
    tokenizer = BertTokenizer4Tagger.from_pretrained(
        config.bert_model, do_lower_case=config.do_lower_case)

    dataset_loaders = MRCNERDataLoader(
        config,
        data_processor,
        label_list,
        tokenizer,
        mode="train",
        allow_impossible=True,
    )  # entity_scheme=config.entity_scheme)
    if config.debug:
        logger.info("%=" * 20)
        logger.info("=" * 10 + " DEBUG MODE " + "=" * 10)
        train_dataloader = dataset_loaders.get_dataloader(
            data_sign="dev",
            num_data_processor=config.num_data_processor,
            logger=logger)
    else:
        train_dataloader = dataset_loaders.get_dataloader(
            data_sign="train",
            num_data_processor=config.num_data_processor,
            logger=logger)
    dev_dataloader = dataset_loaders.get_dataloader(
        data_sign="dev",
        num_data_processor=config.num_data_processor,
        logger=logger)
    test_dataloader = dataset_loaders.get_dataloader(
        data_sign="test",
        num_data_processor=config.num_data_processor,
        logger=logger)
    train_instances = dataset_loaders.get_train_instance()
    num_train_steps = len(
        train_dataloader
    ) // config.gradient_accumulation_steps * config.num_train_epochs
    per_gpu_train_batch_size = config.train_batch_size // config.n_gpu

    logger.info("****** Running Training ******")
    logger.info(f"Number of Training Data: {train_instances}")
    logger.info(
        f"Train Epoch {config.num_train_epochs}; Total Train Steps: {num_train_steps}; Warmup Train Steps: {config.warmup_steps}"
    )
    logger.info(f"Per GPU Train Batch Size: {per_gpu_train_batch_size}")

    return train_dataloader, dev_dataloader, test_dataloader, num_train_steps, label_list