Example #1
0
def run_infer(weights_folder_path, cfg):

    cfg.pretrained = False
    # for local test, please modify the following path into actual path.
    cfg.data_folder = cfg.data_dir + "test/"

    to_device_transform = ToDeviced(keys=("input", "target", "mask",
                                          "is_annotated"),
                                    device=cfg.device)

    all_path = []
    for path in glob.iglob(os.path.join(weights_folder_path, "*.pth")):
        all_path.append(path)

    nets = []
    for path in all_path:
        state_dict = torch.load(path)["model"]
        new_state_dict = {}
        for k, v in state_dict.items():
            new_state_dict[k.replace("module.", "")] = v

        net = RanzcrNet(cfg).eval().to(cfg.device)
        net.load_state_dict(new_state_dict)

        del net.decoder
        del net.segmentation_head

        nets.append(net)

    test_df = pd.read_csv(cfg.test_df)
    test_dataset = get_test_dataset(test_df, cfg)
    test_dataloader = get_test_dataloader(test_dataset, cfg)

    with torch.no_grad():
        fold_preds = [[] for i in range(len(nets))]
        for batch in tqdm(test_dataloader):
            batch = to_device_transform(batch)
            for i, net in enumerate(nets):
                if cfg.mixed_precision:
                    with autocast():
                        logits = net(batch)["logits"].cpu().numpy()
                else:
                    logits = net(batch)["logits"].cpu().numpy()
                fold_preds[i] += [logits]
        fold_preds = [np.concatenate(p) for p in fold_preds]

    preds = np.stack(fold_preds)
    preds = expit(preds)
    preds = np.mean(preds, axis=0)

    sub_df = test_df.copy()
    sub_df[cfg.label_cols] = preds

    submission = pd.read_csv(cfg.test_df)
    submission.loc[sub_df.index, cfg.label_cols] = sub_df[cfg.label_cols]
    submission.to_csv("submission.csv", index=False)
Example #2
0
    def __init__(self, game, args):
        self.game = game
        self.args = args

        # neural network of current generation
        self.current_agent = create_agent(self.game)
        # neural network of previous generation
        self.previous_agent = create_agent(self.game)

        # history of examples from args.numItersForTrainExamplesHistory latest iterations
        self.trainExamplesHistory = []

        self.remote_actors_signal_queues = []
        self.remote_actors_return_queue = queue.Queue()

        self.test_dataset = get_test_dataset()
Example #3
0
def print_acc(pab):
    test_data = get_test_dataset()
    labels_test = test_data.labels

    if torch.cuda.is_available():
        labels_test = labels_test.cuda()

    predicts = torch.max(pab.data, 1)[1]

    print('torch.max(outputs_test.data, 1)', len(torch.max(pab.data, 1)))

    correct = (predicts == labels_test).sum()
    total = len(labels_test)
    print('total', total, 'correct', correct)
    p = 1.0 * correct / total
    print('Accuracy: %.4f' % p.item())

    return p.item()
Example #4
0
def write_xls():
    test_data = get_test_dataset()
    labels_test = test_data.labels

    e = Excel()
    e.new_worksheet('cifar10iid')

    no = 31

    outputs_papb_dir = params.dataset_division_testno + '/papb' + str(
        no) + '.npy'
    papb = torch.load(outputs_papb_dir)

    outputs_pab_dir = params.dataset_division_testno + '/pab' + str(
        no) + '.npy'
    pab = torch.load(outputs_pab_dir)

    outputs_fed_pab_dir = params.dataset_division_testno + '/fed_pab' + str(
        no) + '.npy'
    fed_pab = torch.load(outputs_fed_pab_dir)

    sum = 0
    l = int(len(papb) / 10000)

    for j in range(100):
        # 将标签数据构造好
        print('labels_test[0]', labels_test[j])
        label_write_data = torch.zeros(10)
        label_write_data[labels_test[j]] = 1
        e.add_data_papb(label_write_data)
        '''for i in range(l):
            e.add_data_papb(papb[i * 10000 + j])
            sum += papb[i * 10000 + j]'''
        e.add_data_pab(pab[0 + j])

        print('fed_pab', fed_pab.shape)
        e.add_data_fed_pab(fed_pab[0 + j])

    e.set_dir(params.dataset_division_testno + '/1.xls')
    e.save()
Example #5
0
def run():
    parser = ArgumentParser()
    parser.add_argument(
        "--dataset_path",
        type=str,
        default="",
        help="Path or url of the dataset. If empty download from S3.")
    parser.add_argument("--use_adapter",
                        default=False,
                        action='store_true',
                        help="Use adapter or not")
    parser.add_argument("--keyword_module",
                        type=str,
                        default="",
                        help="add, attention, ")
    parser.add_argument(
        "--model",
        type=str,
        default="openai-gpt",
        help="Model type (openai-gpt or gpt2)",
        choices=['openai-gpt',
                 'gpt2'])  # anything besides gpt2 will load openai-gpt
    parser.add_argument("--model_checkpoint",
                        type=str,
                        default="",
                        help="Path, url or short name of the model")
    parser.add_argument("--device",
                        type=str,
                        default="cuda" if torch.cuda.is_available() else "cpu",
                        help="Device (cuda or cpu)")
    parser.add_argument("--bert_model_path",
                        default="./",
                        type=str,
                        help="Bert pre-trained model path")
    parser.add_argument(
        "--vocab_file",
        default="./vocab.korean.rawtext.list",
        type=str,
        help="The vocabulary file that the BERT model was trained on.")
    parser.add_argument("--no_sample",
                        action='store_true',
                        help="Set to use greedy decoding instead of sampling")
    parser.add_argument("--max_length",
                        type=int,
                        default=50,
                        help="Maximum length of the output utterances")
    parser.add_argument("--min_length",
                        type=int,
                        default=1,
                        help="Minimum length of the output utterances")
    parser.add_argument("--seed", type=int, default=0, help="Seed")
    parser.add_argument("--temperature",
                        type=int,
                        default=0.7,
                        help="Sampling softmax temperature")
    parser.add_argument(
        "--top_k",
        type=int,
        default=50,
        help="Filter top-k tokens before sampling (<=0: no filtering)")
    parser.add_argument(
        "--top_p",
        type=float,
        default=0.9,
        help="Nucleus filtering (top-p) before sampling (<=0.0: no filtering)")
    parser.add_argument(
        "--do_lower_case",
        action='store_true',
        help="Set this flag if you are using an uncased model.")

    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__file__)
    logger.info(pformat(args))

    if args.model_checkpoint == "":
        if args.model == 'gpt2':
            raise ValueError(
                "Interacting with GPT2 requires passing a finetuned model_checkpoint"
            )
        else:
            args.model_checkpoint = download_pretrained_model()

    if args.seed != 0:
        random.seed(args.seed)
        torch.random.manual_seed(args.seed)
        torch.cuda.manual_seed(args.seed)

    logger.info("Get pretrained model and tokenizer")

    # Load KoBERT model and tokenizer
    bert_tokenizer = BertTokenizer.from_pretrained(
        args.vocab_file, do_lower_case=args.do_lower_case)
    bert_model = BertModel.from_pretrained(args.bert_model_path)
    bert_model.to(args.device)
    bert_model.eval()

    # Load KoGPT2 model and tokenizer
    tok_path = get_tokenizer()
    gpt_model, gpt_vocab = get_pytorch_conkogpt2_model2(
        use_adapter=args.use_adapter)
    gpt_tokenizer = SentencepieceTokenizer(tok_path)
    gpt_model.to(args.device)
    gpt_model.eval()

    model = Seq2Seq(bert_model, gpt_model, gpt_vocab, args)
    model.load_state_dict(torch.load(args.model_checkpoint), strict=False)
    model.to(args.device)
    model.eval()

    logger.info("Load test data")
    sourceList, targetList = get_test_dataset(bert_tokenizer, gpt_tokenizer,
                                              gpt_vocab, args.dataset_path)

    f1 = open((args.model_checkpoint + "_output.txt"), 'w')
    for line in zip(sourceList, targetList):
        out_ids = sample_sequence(line[0], bert_model, bert_tokenizer,
                                  gpt_model, gpt_vocab, args)
        out_texts = gpt_vocab.to_tokens(out_ids)
        for text in out_texts:
            f1.write(text.replace('▁', ' ').replace('</s>', ' '))
        """
        for id in out_ids:
            f1.write(str(id))
            f1.write(' ')
        """
        f1.write("\n")
    f1.close()
Example #6
0
 def setUp(self):
   """ Create a new Dataset and add features. """
   self.dataset = get_test_dataset()
Example #7
0
 def setUp(self):
     """ Create a new Dataset and add features. """
     self.dataset = get_test_dataset()