parser.add_argument('-gpus',
                    default=[],
                    nargs='+',
                    type=int,
                    help="Use CUDA on the listed devices.")
parser.add_argument('-corpus',
                    default="../data/small",
                    type=str,
                    help="corpus to train the model")
opt = parser.parse_args()
device = torch.device("cpu")
if len(opt.gpus) != 0:
    torch.cuda.set_device(opt.gpus[0])
    device = torch.device("cuda:" + str(opt.gpus[0]))

data_set = LoadData.DataSet(opt.corpus)
num_data = len(data_set)
print("Training instance:", num_data)
dl = torch.utils.data.DataLoader(data_set,
                                 batch_size=opt.batch_size,
                                 shuffle=True,
                                 num_workers=0)
criterion = nn.CrossEntropyLoss().to(device)
epochs = opt.epochs
network = Net.CBOWNet(20000, opt.embed_dimension).to(device)
print(device)
optimizer = torch.optim.Adam(network.parameters())
cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)
best_loss = 1000000.0

for i in range(epochs):