def main():
    BASEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    DATADIR = os.path.join(BASEDIR, 'data/reciepe_category/')
    FILENAME = "recipe_category_datasets_preprocessed.pkl"
    assert os.path.exists(os.path.join(DATADIR, FILENAME)
                          ), "data file not found"
    with open(os.path.join(DATADIR, FILENAME), 'rb') as f:
        datasets = pickle.load(f)
    traindata, testdata = train_test_split(datasets, train_size=0.9)
    load_dotenv(verbose=True)
    EMBEDDING_DIM = int(os.environ.get("EMBEDDING_DIM"))
    HIDDEN_DIM = int(os.environ.get("HIDDEN_DIM"))
    category_index = get_category_index(datasets)
    tag_size = len(category_index)
    model = LSTMClassifier(EMBEDDING_DIM, HIDDEN_DIM, tag_size)
    loss_function = nn.NLLLoss()
    optimizer = optim.SGD(model.parameters(), lr=float(
        os.environ.get("LEARNING_RATE")))

    losses = []
    for epoch in range(int(os.environ["NUM_EPOCHS"])):
        all_loss = 0
        for title_embeded, cat in zip(traindata["title_embeded"], traindata["category"]):
            model.zero_grad()
            out = model(title_embeded)
            answer = category2tensor(cat, category_index)
            loss = loss_function(out, answer)
            loss.backward()
            optimizer.step()
            all_loss += loss.item()
        losses.append(all_loss)
        print("epoch", epoch, "\t", "loss", all_loss)
    print("done.")
    RESULTDIR = os.path.join(BASEDIR, os.environ.get("RESULTDIR"))
    if not os.path.exists(RESULTDIR):
        os.makedirs(RESULTDIR)
    now = str(datetime.datetime.now()).strip()
    OUTPUTFILENAME = "trainresult-{}.pkl".format(now)
    with open(os.path.join(RESULTDIR, OUTPUTFILENAME), 'wb') as f:
        pickle.dump((losses, category_index, model), f)
        print("model saved")

    test_num = len(testdata)
    a = 0
    with torch.no_grad():
        for title_embeded, cat in zip(testdata["title_embeded"], testdata["category"]):
            out = model(title_embeded)
            _, predict = torch.max(out, 1)
            answer = category2tensor(cat, category_index)
            if predict == answer:
                a += 1
    print("predict: ", a / test_num)
Exemple #2
0
    test_acc_ = []
    ### training procedure
    for epoch in range(epochs):
        optimizer = adjust_learning_rate(optimizer, epoch)

        batch_num = len(train_data) / batch_size
        ## training epoch
        total_acc = 0.0
        total_loss = 0.0
        total = 0.0
        for i in range(batch_num):
            train_inputs = train_data[i * batch_size:(i + 1) * batch_size]
            #TODO: itt hagytam abba
            #test_labels = [ for l in train_data[i*batch_size: (i+1)*batch_size]]

            model.zero_grad()
            model.batch_size = len(train_labels)
            model.hidden = model.init_hidden()

            output = model(train_inputs.t())

            sys.exit(0)
            loss = loss_function(output, Variable(train_labels))
            loss.backward()
            optimizer.step()

            # calc training acc
            _, predicted = torch.max(output.data, 1)
            total_acc += (predicted == train_labels).sum()
            total += len(train_labels)
            total_loss += loss.data[0]