def main(args): if not args.use_cuda: paddle.set_device("cpu") if paddle.distributed.get_world_size() > 1: paddle.distributed.init_parallel_env() graph = load(args.dataset) model = SkipGramModel(graph.num_nodes, args.embed_size, args.neg_num, sparse=not args.use_cuda) model = paddle.DataParallel(model) train_steps = int(graph.num_nodes / args.batch_size) * args.epoch scheduler = paddle.optimizer.lr.PolynomialDecay( learning_rate=args.learning_rate, decay_steps=train_steps, end_lr=0.0001) optim = Adam(learning_rate=scheduler, parameters=model.parameters()) train_ds = ShardedDataset(graph.nodes) collate_fn = BatchRandWalk(graph, args.walk_len, args.win_size, args.neg_num, args.neg_sample_type) data_loader = Dataloader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=args.sample_workers, collate_fn=collate_fn) for epoch in tqdm.tqdm(range(args.epoch)): train_loss = train(model, data_loader, optim) log.info("Runing epoch:%s\t train_loss:%.6f", epoch, train_loss) paddle.save(model.state_dict(), "model.pdparams")
def main(args): if not args.use_cuda: paddle.set_device("cpu") if paddle.distributed.get_world_size() > 1: paddle.distributed.init_parallel_env() if args.edge_file: graph = load_from_file(args.edge_file) else: graph = load(args.dataset) edges = np.load("./edges.npy") edges = np.concatenate([edges, edges[:, [1, 0]]]) graph = pgl.Graph(edges) model = SkipGramModel(graph.num_nodes, args.embed_size, args.neg_num, sparse=not args.use_cuda) model = paddle.DataParallel(model) train_ds = ShardedDataset(graph.nodes, repeat=args.epoch) train_steps = int(len(train_ds) // args.batch_size) log.info("train_steps: %s" % train_steps) scheduler = paddle.optimizer.lr.PolynomialDecay( learning_rate=args.learning_rate, decay_steps=train_steps, end_lr=0.0001) optim = Adam(learning_rate=scheduler, parameters=model.parameters()) collate_fn = BatchRandWalk(graph, args.walk_len, args.win_size, args.neg_num, args.neg_sample_type) data_loader = Dataloader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=args.sample_workers, collate_fn=collate_fn) train_loss = train(model, data_loader, optim) paddle.save(model.state_dict(), "model.pdparams")