def main(): BASEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) DATADIR = os.path.join(BASEDIR, 'data/reciepe_category/') FILENAME = "recipe_category_datasets_preprocessed.pkl" assert os.path.exists(os.path.join(DATADIR, FILENAME) ), "data file not found" with open(os.path.join(DATADIR, FILENAME), 'rb') as f: datasets = pickle.load(f) traindata, testdata = train_test_split(datasets, train_size=0.9) load_dotenv(verbose=True) EMBEDDING_DIM = int(os.environ.get("EMBEDDING_DIM")) HIDDEN_DIM = int(os.environ.get("HIDDEN_DIM")) category_index = get_category_index(datasets) tag_size = len(category_index) model = LSTMClassifier(EMBEDDING_DIM, HIDDEN_DIM, tag_size) loss_function = nn.NLLLoss() optimizer = optim.SGD(model.parameters(), lr=float( os.environ.get("LEARNING_RATE"))) losses = [] for epoch in range(int(os.environ["NUM_EPOCHS"])): all_loss = 0 for title_embeded, cat in zip(traindata["title_embeded"], traindata["category"]): model.zero_grad() out = model(title_embeded) answer = category2tensor(cat, category_index) loss = loss_function(out, answer) loss.backward() optimizer.step() all_loss += loss.item() losses.append(all_loss) print("epoch", epoch, "\t", "loss", all_loss) print("done.") RESULTDIR = os.path.join(BASEDIR, os.environ.get("RESULTDIR")) if not os.path.exists(RESULTDIR): os.makedirs(RESULTDIR) now = str(datetime.datetime.now()).strip() OUTPUTFILENAME = "trainresult-{}.pkl".format(now) with open(os.path.join(RESULTDIR, OUTPUTFILENAME), 'wb') as f: pickle.dump((losses, category_index, model), f) print("model saved") test_num = len(testdata) a = 0 with torch.no_grad(): for title_embeded, cat in zip(testdata["title_embeded"], testdata["category"]): out = model(title_embeded) _, predict = torch.max(out, 1) answer = category2tensor(cat, category_index) if predict == answer: a += 1 print("predict: ", a / test_num)
test_acc_ = [] ### training procedure for epoch in range(epochs): optimizer = adjust_learning_rate(optimizer, epoch) batch_num = len(train_data) / batch_size ## training epoch total_acc = 0.0 total_loss = 0.0 total = 0.0 for i in range(batch_num): train_inputs = train_data[i * batch_size:(i + 1) * batch_size] #TODO: itt hagytam abba #test_labels = [ for l in train_data[i*batch_size: (i+1)*batch_size]] model.zero_grad() model.batch_size = len(train_labels) model.hidden = model.init_hidden() output = model(train_inputs.t()) sys.exit(0) loss = loss_function(output, Variable(train_labels)) loss.backward() optimizer.step() # calc training acc _, predicted = torch.max(output.data, 1) total_acc += (predicted == train_labels).sum() total += len(train_labels) total_loss += loss.data[0]