def run_infer(weights_folder_path, cfg): cfg.pretrained = False # for local test, please modify the following path into actual path. cfg.data_folder = cfg.data_dir + "test/" to_device_transform = ToDeviced(keys=("input", "target", "mask", "is_annotated"), device=cfg.device) all_path = [] for path in glob.iglob(os.path.join(weights_folder_path, "*.pth")): all_path.append(path) nets = [] for path in all_path: state_dict = torch.load(path)["model"] new_state_dict = {} for k, v in state_dict.items(): new_state_dict[k.replace("module.", "")] = v net = RanzcrNet(cfg).eval().to(cfg.device) net.load_state_dict(new_state_dict) del net.decoder del net.segmentation_head nets.append(net) test_df = pd.read_csv(cfg.test_df) test_dataset = get_test_dataset(test_df, cfg) test_dataloader = get_test_dataloader(test_dataset, cfg) with torch.no_grad(): fold_preds = [[] for i in range(len(nets))] for batch in tqdm(test_dataloader): batch = to_device_transform(batch) for i, net in enumerate(nets): if cfg.mixed_precision: with autocast(): logits = net(batch)["logits"].cpu().numpy() else: logits = net(batch)["logits"].cpu().numpy() fold_preds[i] += [logits] fold_preds = [np.concatenate(p) for p in fold_preds] preds = np.stack(fold_preds) preds = expit(preds) preds = np.mean(preds, axis=0) sub_df = test_df.copy() sub_df[cfg.label_cols] = preds submission = pd.read_csv(cfg.test_df) submission.loc[sub_df.index, cfg.label_cols] = sub_df[cfg.label_cols] submission.to_csv("submission.csv", index=False)
def __init__(self, game, args): self.game = game self.args = args # neural network of current generation self.current_agent = create_agent(self.game) # neural network of previous generation self.previous_agent = create_agent(self.game) # history of examples from args.numItersForTrainExamplesHistory latest iterations self.trainExamplesHistory = [] self.remote_actors_signal_queues = [] self.remote_actors_return_queue = queue.Queue() self.test_dataset = get_test_dataset()
def print_acc(pab): test_data = get_test_dataset() labels_test = test_data.labels if torch.cuda.is_available(): labels_test = labels_test.cuda() predicts = torch.max(pab.data, 1)[1] print('torch.max(outputs_test.data, 1)', len(torch.max(pab.data, 1))) correct = (predicts == labels_test).sum() total = len(labels_test) print('total', total, 'correct', correct) p = 1.0 * correct / total print('Accuracy: %.4f' % p.item()) return p.item()
def write_xls(): test_data = get_test_dataset() labels_test = test_data.labels e = Excel() e.new_worksheet('cifar10iid') no = 31 outputs_papb_dir = params.dataset_division_testno + '/papb' + str( no) + '.npy' papb = torch.load(outputs_papb_dir) outputs_pab_dir = params.dataset_division_testno + '/pab' + str( no) + '.npy' pab = torch.load(outputs_pab_dir) outputs_fed_pab_dir = params.dataset_division_testno + '/fed_pab' + str( no) + '.npy' fed_pab = torch.load(outputs_fed_pab_dir) sum = 0 l = int(len(papb) / 10000) for j in range(100): # 将标签数据构造好 print('labels_test[0]', labels_test[j]) label_write_data = torch.zeros(10) label_write_data[labels_test[j]] = 1 e.add_data_papb(label_write_data) '''for i in range(l): e.add_data_papb(papb[i * 10000 + j]) sum += papb[i * 10000 + j]''' e.add_data_pab(pab[0 + j]) print('fed_pab', fed_pab.shape) e.add_data_fed_pab(fed_pab[0 + j]) e.set_dir(params.dataset_division_testno + '/1.xls') e.save()
def run(): parser = ArgumentParser() parser.add_argument( "--dataset_path", type=str, default="", help="Path or url of the dataset. If empty download from S3.") parser.add_argument("--use_adapter", default=False, action='store_true', help="Use adapter or not") parser.add_argument("--keyword_module", type=str, default="", help="add, attention, ") parser.add_argument( "--model", type=str, default="openai-gpt", help="Model type (openai-gpt or gpt2)", choices=['openai-gpt', 'gpt2']) # anything besides gpt2 will load openai-gpt parser.add_argument("--model_checkpoint", type=str, default="", help="Path, url or short name of the model") parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu", help="Device (cuda or cpu)") parser.add_argument("--bert_model_path", default="./", type=str, help="Bert pre-trained model path") parser.add_argument( "--vocab_file", default="./vocab.korean.rawtext.list", type=str, help="The vocabulary file that the BERT model was trained on.") parser.add_argument("--no_sample", action='store_true', help="Set to use greedy decoding instead of sampling") parser.add_argument("--max_length", type=int, default=50, help="Maximum length of the output utterances") parser.add_argument("--min_length", type=int, default=1, help="Minimum length of the output utterances") parser.add_argument("--seed", type=int, default=0, help="Seed") parser.add_argument("--temperature", type=int, default=0.7, help="Sampling softmax temperature") parser.add_argument( "--top_k", type=int, default=50, help="Filter top-k tokens before sampling (<=0: no filtering)") parser.add_argument( "--top_p", type=float, default=0.9, help="Nucleus filtering (top-p) before sampling (<=0.0: no filtering)") parser.add_argument( "--do_lower_case", action='store_true', help="Set this flag if you are using an uncased model.") args = parser.parse_args() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__file__) logger.info(pformat(args)) if args.model_checkpoint == "": if args.model == 'gpt2': raise ValueError( "Interacting with GPT2 requires passing a finetuned model_checkpoint" ) else: args.model_checkpoint = download_pretrained_model() if args.seed != 0: random.seed(args.seed) torch.random.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) logger.info("Get pretrained model and tokenizer") # Load KoBERT model and tokenizer bert_tokenizer = BertTokenizer.from_pretrained( args.vocab_file, do_lower_case=args.do_lower_case) bert_model = BertModel.from_pretrained(args.bert_model_path) bert_model.to(args.device) bert_model.eval() # Load KoGPT2 model and tokenizer tok_path = get_tokenizer() gpt_model, gpt_vocab = get_pytorch_conkogpt2_model2( use_adapter=args.use_adapter) gpt_tokenizer = SentencepieceTokenizer(tok_path) gpt_model.to(args.device) gpt_model.eval() model = Seq2Seq(bert_model, gpt_model, gpt_vocab, args) model.load_state_dict(torch.load(args.model_checkpoint), strict=False) model.to(args.device) model.eval() logger.info("Load test data") sourceList, targetList = get_test_dataset(bert_tokenizer, gpt_tokenizer, gpt_vocab, args.dataset_path) f1 = open((args.model_checkpoint + "_output.txt"), 'w') for line in zip(sourceList, targetList): out_ids = sample_sequence(line[0], bert_model, bert_tokenizer, gpt_model, gpt_vocab, args) out_texts = gpt_vocab.to_tokens(out_ids) for text in out_texts: f1.write(text.replace('▁', ' ').replace('</s>', ' ')) """ for id in out_ids: f1.write(str(id)) f1.write(' ') """ f1.write("\n") f1.close()
def setUp(self): """ Create a new Dataset and add features. """ self.dataset = get_test_dataset()