Ejemplo n.º 1
0
def main(args):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(device)
    with open(os.path.join(args.path, 'mapping.pkl'), 'rb') as f:
        mapping = pickle.load(f)

    vocab_size = len(mapping)
    sat = SAT(vocab_size=vocab_size)
    sat.to(device)
    loss_func = nn.CrossEntropyLoss(ignore_index=2)
    opt = optim.Adam(sat.parameters(), lr=1e-4, betas=(0.9, 0.98), eps=1e-9)

    if not args.test:
        train_dl = get_loader(mode='train',
                              path=args.path,
                              batch_size=args.batch_size)
        val_dl = get_loader(mode='val',
                            path=args.path,
                            batch_size=args.batch_size)
        best_loss = float('inf')
        for epoch in range(args.epochs):
            pbar = tqdm(train_dl)
            sat.train()
            for xb, yb in pbar:
                loss = sat.train_batch(xb, yb)
                pbar.set_description("| epoch: {:3d} | loss: {:.6f} |".format(
                    epoch + 1, loss))

            sat.eval()
            with torch.no_grad():
                val_losses = []
                for xb, yb in tqdm(val_dl):
                    loss = sat.valid_batch(xb, yb)
                    val_losses.append(loss.detach().cpu().numpy())

                val_loss = np.mean(val_losses)
                print('val loss: {:.6f}'.format(val_loss))
                if best_loss > val_loss:
                    best_loss = val_loss
                    sat.save(file_name='model/best-model.pt', num_epoch=epoch)

    # test
    else:
        print('testing...')
        beam_size = 5
        test_dl = get_loader(mode='test',
                             path=args.path,
                             batch_size=args.batch_size)
        sat.load('./model/best-model.pt')
        with open(os.path.join(args.path, 'itow.pkl'), 'rb') as f:
            itow = pickle.load(f)

        P = []
        for xb, yb in tqdm(test_dl):
            pred = sat.inference(xb, beam_size=5, max_len=36)
            P += pred

        test_func(P, itow)
Ejemplo n.º 2
0
import test
test.test_func()
print(test.myvar)

from test import test_func
myvar = test_func()
print(myvar)
Ejemplo n.º 3
0
#!/usr/bin/python
#环境变量配置,利用python解析器执行

#模块引入,会自动生成pyc文件(字节码文件),会加快加载速度
import test


def hello():
    print("\ncall hello fun\n")


#this is entry
#如果该模块被单独执行,__name__ = "__main__"
#如果该模块被引用,__name__ = "__模块名字__"
#一条语句结尾有没有;都可以
print("__name__ = " + __name__)
if __name__ == "__main__":
    print("\nhello world\n")
    hello()
    test.test_func()
import test
import socket
import tensorflow as tf
import Numpy_test

host = '' 
port = 50000
backlog = 5
size = 1024

socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 
socket.bind((host, port))
socket.listen(backlog)

test.test_func()  # prints "Hello!"
Numpy_test.numpy_func() #
print("Server Started")

while True:
        connection = None
        try:
            connection, address = socket.accept()
            print("Unity Connected.")
            # connection.send("First call from Python\n")
           
            while True:
                    data = connection.recv(size).decode()
                    if data:
                        if data == "Disconnect":
                            print("Connection closed by Unity")
Ejemplo n.º 5
0
def run_script():
    print(__name__)
    print(__file__)

    test.test_func()
    print(test.__file__)
Ejemplo n.º 6
0
 def runner(imgs):
     return test_func(model, imgs=torch.Tensor(np.float32(imgs)))['preds']
Ejemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser(
        description="Train the FAN or the HAN model"
    )
    parser.add_argument(
        "dataset",
        choices=["yelp", "yahoo", "amazon", "synthetic"],
        help="Choose the dataset",
    )
    parser.add_argument(
        "model",
        choices=["fan", "han"],
        help="Choose the model to be trained (flat or hierarchical)",
    )

    args = parser.parse_args()

    if args.dataset == "yelp":
        dataset_config = Yelp
    elif args.dataset == "yahoo":
        dataset_config = Yahoo
    elif args.dataset == "amazon":
        dataset_config = Amazon
    elif args.dataset == "synthetic":
        dataset_config = Synthetic
    else:
        # should not end there
        exit()

    wv = KeyedVectors.load(dataset_config.EMBEDDING_FILE)

    train_df = pd.read_csv(dataset_config.TRAIN_DATASET).fillna("")
    train_documents = train_df.text
    train_labels = train_df.label
    if args.model == "fan":
        train_dataset = FlatDataset(
            train_documents,
            train_labels,
            wv.vocab,
            dataset_config.WORDS_PER_DOC[PADDING],
        )
    else:
        train_dataset = HierarchicalDataset(
            train_documents,
            train_labels,
            wv.vocab,
            dataset_config.SENT_PER_DOC[PADDING],
            dataset_config.WORDS_PER_SENT[PADDING],
        )
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2,
    )

    val_df = pd.read_csv(dataset_config.VAL_DATASET).fillna("")
    val_documents = val_df.text
    val_labels = val_df.label
    if args.model == "fan":
        val_dataset = FlatDataset(
            val_documents,
            val_labels,
            wv.vocab,
            dataset_config.WORDS_PER_DOC[PADDING],
        )
    else:
        val_dataset = HierarchicalDataset(
            val_documents,
            val_labels,
            wv.vocab,
            dataset_config.SENT_PER_DOC[PADDING],
            dataset_config.WORDS_PER_SENT[PADDING],
        )
    val_data_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=BATCH_SIZE, shuffle=True
    )

    logdir = Path(f"{LOG_DIR}/{args.dataset}/{args.model}")
    logdir.mkdir(parents=True, exist_ok=True)
    writer = SummaryWriter(str(logdir / f"{PADDING}pad"))

    if args.model == "fan":
        model = Fan(
            embedding_matrix=wv.vectors,
            word_hidden_size=WORD_HIDDEN_SIZE,
            num_classes=len(train_labels.unique()),
            batch_size=BATCH_SIZE,
        ).to(DEVICE)
    else:
        model = Han(
            embedding_matrix=wv.vectors,
            word_hidden_size=WORD_HIDDEN_SIZE,
            sent_hidden_size=SENT_HIDDEN_SIZE,
            num_classes=len(train_labels.unique()),
            batch_size=BATCH_SIZE,
        ).to(DEVICE)

    criterion = torch.nn.NLLLoss().to(DEVICE)
    optimizer = torch.optim.SGD(
        (p for p in model.parameters() if p.requires_grad),
        lr=LEARNING_RATE,
        momentum=MOMENTUM,
    )
    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, factor=0.1, patience=PATIENCE - 2, verbose=True,
    )

    train_losses = []
    train_accs = []
    val_losses = []
    val_accs = []
    best_val_loss = 1_000_000
    best_state_dict = model.state_dict()
    actual_patience = 0
    for epoch in range(1, EPOCHS + 1):
        train_loss, train_acc = train_func(
            model, train_data_loader, criterion, optimizer, writer
        )
        train_losses.append(train_loss)
        train_accs.append(train_acc)

        val_loss, val_acc = test_func(model, val_data_loader, criterion)
        val_losses.append(val_loss)
        val_accs.append(val_acc)

        print(f"Epoch {epoch}")
        print(
            f"  Train loss: {train_loss:.4}, Train acc: {train_acc * 100:.1f}%"
        )
        print(f"  Val loss: {val_loss:.4}, Val acc: {val_acc * 100:.1f}%")

        lr_scheduler.step(val_loss)

        writer.add_scalar("Train/Loss", train_loss, epoch)
        writer.add_scalar("Train/Accuracy", train_acc, epoch)
        writer.add_scalar("Validation/Loss", val_loss, epoch)
        writer.add_scalar("Validation/Accuracy", val_acc, epoch)
        writer.add_scalar(
            "Learning rate", optimizer.param_groups[0]["lr"], epoch
        )

        # Early stopping with patience
        if val_loss < best_val_loss:
            actual_patience = 0
            best_val_loss = val_loss
            best_state_dict = model.state_dict()
        else:
            actual_patience += 1
            if actual_patience == PATIENCE:
                model.load_state_dict(best_state_dict)
                break

    writer.add_text(
        "Hyperparameters",
        f"BATCH_SIZE = {BATCH_SIZE}; "
        f"MOMENTUM = {MOMENTUM}; "
        f"PATIENCE = {PATIENCE}; "
        f"PADDING = {PADDING}",
    )
    writer.close()

    modeldir = Path(MODEL_DIR)
    modeldir.mkdir(parents=True, exist_ok=True)
    torch.save(
        model.state_dict(),
        f"{modeldir}/{args.dataset}-{args.model}-{PADDING}pad.pth",
    )