def main(args): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(device) with open(os.path.join(args.path, 'mapping.pkl'), 'rb') as f: mapping = pickle.load(f) vocab_size = len(mapping) sat = SAT(vocab_size=vocab_size) sat.to(device) loss_func = nn.CrossEntropyLoss(ignore_index=2) opt = optim.Adam(sat.parameters(), lr=1e-4, betas=(0.9, 0.98), eps=1e-9) if not args.test: train_dl = get_loader(mode='train', path=args.path, batch_size=args.batch_size) val_dl = get_loader(mode='val', path=args.path, batch_size=args.batch_size) best_loss = float('inf') for epoch in range(args.epochs): pbar = tqdm(train_dl) sat.train() for xb, yb in pbar: loss = sat.train_batch(xb, yb) pbar.set_description("| epoch: {:3d} | loss: {:.6f} |".format( epoch + 1, loss)) sat.eval() with torch.no_grad(): val_losses = [] for xb, yb in tqdm(val_dl): loss = sat.valid_batch(xb, yb) val_losses.append(loss.detach().cpu().numpy()) val_loss = np.mean(val_losses) print('val loss: {:.6f}'.format(val_loss)) if best_loss > val_loss: best_loss = val_loss sat.save(file_name='model/best-model.pt', num_epoch=epoch) # test else: print('testing...') beam_size = 5 test_dl = get_loader(mode='test', path=args.path, batch_size=args.batch_size) sat.load('./model/best-model.pt') with open(os.path.join(args.path, 'itow.pkl'), 'rb') as f: itow = pickle.load(f) P = [] for xb, yb in tqdm(test_dl): pred = sat.inference(xb, beam_size=5, max_len=36) P += pred test_func(P, itow)
import test test.test_func() print(test.myvar) from test import test_func myvar = test_func() print(myvar)
#!/usr/bin/python #环境变量配置,利用python解析器执行 #模块引入,会自动生成pyc文件(字节码文件),会加快加载速度 import test def hello(): print("\ncall hello fun\n") #this is entry #如果该模块被单独执行,__name__ = "__main__" #如果该模块被引用,__name__ = "__模块名字__" #一条语句结尾有没有;都可以 print("__name__ = " + __name__) if __name__ == "__main__": print("\nhello world\n") hello() test.test_func()
import test import socket import tensorflow as tf import Numpy_test host = '' port = 50000 backlog = 5 size = 1024 socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) socket.bind((host, port)) socket.listen(backlog) test.test_func() # prints "Hello!" Numpy_test.numpy_func() # print("Server Started") while True: connection = None try: connection, address = socket.accept() print("Unity Connected.") # connection.send("First call from Python\n") while True: data = connection.recv(size).decode() if data: if data == "Disconnect": print("Connection closed by Unity")
def run_script(): print(__name__) print(__file__) test.test_func() print(test.__file__)
def runner(imgs): return test_func(model, imgs=torch.Tensor(np.float32(imgs)))['preds']
def main(): parser = argparse.ArgumentParser( description="Train the FAN or the HAN model" ) parser.add_argument( "dataset", choices=["yelp", "yahoo", "amazon", "synthetic"], help="Choose the dataset", ) parser.add_argument( "model", choices=["fan", "han"], help="Choose the model to be trained (flat or hierarchical)", ) args = parser.parse_args() if args.dataset == "yelp": dataset_config = Yelp elif args.dataset == "yahoo": dataset_config = Yahoo elif args.dataset == "amazon": dataset_config = Amazon elif args.dataset == "synthetic": dataset_config = Synthetic else: # should not end there exit() wv = KeyedVectors.load(dataset_config.EMBEDDING_FILE) train_df = pd.read_csv(dataset_config.TRAIN_DATASET).fillna("") train_documents = train_df.text train_labels = train_df.label if args.model == "fan": train_dataset = FlatDataset( train_documents, train_labels, wv.vocab, dataset_config.WORDS_PER_DOC[PADDING], ) else: train_dataset = HierarchicalDataset( train_documents, train_labels, wv.vocab, dataset_config.SENT_PER_DOC[PADDING], dataset_config.WORDS_PER_SENT[PADDING], ) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, ) val_df = pd.read_csv(dataset_config.VAL_DATASET).fillna("") val_documents = val_df.text val_labels = val_df.label if args.model == "fan": val_dataset = FlatDataset( val_documents, val_labels, wv.vocab, dataset_config.WORDS_PER_DOC[PADDING], ) else: val_dataset = HierarchicalDataset( val_documents, val_labels, wv.vocab, dataset_config.SENT_PER_DOC[PADDING], dataset_config.WORDS_PER_SENT[PADDING], ) val_data_loader = torch.utils.data.DataLoader( val_dataset, batch_size=BATCH_SIZE, shuffle=True ) logdir = Path(f"{LOG_DIR}/{args.dataset}/{args.model}") logdir.mkdir(parents=True, exist_ok=True) writer = SummaryWriter(str(logdir / f"{PADDING}pad")) if args.model == "fan": model = Fan( embedding_matrix=wv.vectors, word_hidden_size=WORD_HIDDEN_SIZE, num_classes=len(train_labels.unique()), batch_size=BATCH_SIZE, ).to(DEVICE) else: model = Han( embedding_matrix=wv.vectors, word_hidden_size=WORD_HIDDEN_SIZE, sent_hidden_size=SENT_HIDDEN_SIZE, num_classes=len(train_labels.unique()), batch_size=BATCH_SIZE, ).to(DEVICE) criterion = torch.nn.NLLLoss().to(DEVICE) optimizer = torch.optim.SGD( (p for p in model.parameters() if p.requires_grad), lr=LEARNING_RATE, momentum=MOMENTUM, ) lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=0.1, patience=PATIENCE - 2, verbose=True, ) train_losses = [] train_accs = [] val_losses = [] val_accs = [] best_val_loss = 1_000_000 best_state_dict = model.state_dict() actual_patience = 0 for epoch in range(1, EPOCHS + 1): train_loss, train_acc = train_func( model, train_data_loader, criterion, optimizer, writer ) train_losses.append(train_loss) train_accs.append(train_acc) val_loss, val_acc = test_func(model, val_data_loader, criterion) val_losses.append(val_loss) val_accs.append(val_acc) print(f"Epoch {epoch}") print( f" Train loss: {train_loss:.4}, Train acc: {train_acc * 100:.1f}%" ) print(f" Val loss: {val_loss:.4}, Val acc: {val_acc * 100:.1f}%") lr_scheduler.step(val_loss) writer.add_scalar("Train/Loss", train_loss, epoch) writer.add_scalar("Train/Accuracy", train_acc, epoch) writer.add_scalar("Validation/Loss", val_loss, epoch) writer.add_scalar("Validation/Accuracy", val_acc, epoch) writer.add_scalar( "Learning rate", optimizer.param_groups[0]["lr"], epoch ) # Early stopping with patience if val_loss < best_val_loss: actual_patience = 0 best_val_loss = val_loss best_state_dict = model.state_dict() else: actual_patience += 1 if actual_patience == PATIENCE: model.load_state_dict(best_state_dict) break writer.add_text( "Hyperparameters", f"BATCH_SIZE = {BATCH_SIZE}; " f"MOMENTUM = {MOMENTUM}; " f"PATIENCE = {PATIENCE}; " f"PADDING = {PADDING}", ) writer.close() modeldir = Path(MODEL_DIR) modeldir.mkdir(parents=True, exist_ok=True) torch.save( model.state_dict(), f"{modeldir}/{args.dataset}-{args.model}-{PADDING}pad.pth", )