Beispiel #1
0
    def generator(self):
        try:
            for feed_list in super(GraphGenerator, self).generator():
                yield feed_list

        except Exception as e:
            log.exception(e)
Beispiel #2
0
    def start(self):
        batch = 0
        start = time.time()
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
        if trainer_id == 0:
            writer = SummaryWriter(
                os.path.join(self.config.output_path, "train_history"))

        for epoch_idx in range(self.config.epoch):
            for idx, batch_feed_dict in enumerate(self.model.data_loader()):
                try:
                    cpu_time = time.time()
                    batch += 1
                    batch_loss = self.exe.run(self.program,
                                              feed=batch_feed_dict,
                                              fetch_list=[self.model.loss])
                    end = time.time()
                    if trainer_id == 0:
                        writer.add_scalar("loss", np.mean(batch_loss), batch)
                        if batch % self.config.log_per_step == 0:
                            log.info(
                                "Epoch %s Batch %s %s-Loss %s \t Speed(per batch) %.5lf/%.5lf sec"
                                % (epoch_idx, batch, "train",
                                   np.mean(batch_loss), (end - start) / batch,
                                   (end - cpu_time)))
                            writer.flush()
                        if batch % self.config.save_per_step == 0:
                            self.fleet.save_persistables(
                                self.exe,
                                os.path.join(self.config.output_path,
                                             str(batch)))
                except Exception as e:
                    log.info("Pyreader train error")
                    log.exception(e)
            log.info("epcoh %s done." % epoch_idx)
Beispiel #3
0
    def pair_generate(self):
        for walks in self.walk_generator():
            try:
                src_list, pos_list = [], []
                for walk in walks:
                    s, p = skip_gram_gen_pair(walk, self.config.win_size)
                    src_list.append(s), pos_list.append(p)
                src = [s for x in src_list for s in x]
                pos = [s for x in pos_list for s in x]

                if len(src) == 0:
                    continue

                negs = self.negative_sample(
                    src,
                    pos,
                    neg_num=self.config.neg_num,
                    neg_sample_type=self.config.neg_sample_type)

                src = np.array(src, dtype=np.int64).reshape(-1, 1, 1)
                pos = np.array(pos, dtype=np.int64).reshape(-1, 1, 1)

                yield src, pos, negs

            except Exception as e:
                log.exception(e)
Beispiel #4
0
def train_prog(exe, program, model, pyreader, args):
    trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
    start = time.time()
    batch = 0
    total_loss = 0.
    total_acc = 0.
    total_sample = 0
    for epoch_idx in range(args.num_epoch):
        for step, batch_feed_dict in enumerate(pyreader()):
            try:
                cpu_time = time.time()
                batch += 1
                batch_loss, batch_acc = exe.run(
                    program,
                    feed=batch_feed_dict,
                    fetch_list=[model.loss, model.acc])

                end = time.time()
                if batch % args.log_per_step == 0:
                    log.info(
                        "Batch %s Loss %s Acc %s \t Speed(per batch) %.5lf/%.5lf sec"
                        % (batch, np.mean(batch_loss), np.mean(batch_acc),
                           (end - start) / batch, (end - cpu_time)))

                if step % args.steps_per_save == 0:
                    save_path = args.save_path
                    if trainer_id == 0:
                        model_path = os.path.join(save_path, "%s" % step)
                        fleet.save_persistables(exe, model_path)
            except Exception as e:
                log.info("Pyreader train error")
                log.exception(e)
Beispiel #5
0
    def generator(self):
        try:
            for feed_dict in super(GraphGenerator, self).generator():
                if self.use_pyreader:
                    yield [feed_dict[name] for name in self.feed_name_list]
                else:
                    yield feed_dict

        except Exception as e:
            log.exception(e)
Beispiel #6
0
    def __call__(self):
        iterval = 20000000 * 24 // self.config.walk_len
        pair_count = 0
        for walks in self.walk_generator():
            try:
                for walk in walks:
                    index = np.arange(0, len(walk), dtype="int64")
                    batch_s, batch_p = skip_gram_gen_pair(
                        index, self.config.win_size)
                    for s, p in zip(batch_s, batch_p):
                        yield walk[s], walk[p]
                        pair_count += 1
                        if pair_count % iterval == 0 and self.rank == 0:
                            log.info("[%s] pairs have been loaded in rank [%s]" \
                                    % (pair_count, self.rank))

            except Exception as e:
                log.exception(e)

        log.info("total [%s] pairs in rank [%s]" % (pair_count, self.rank))
Beispiel #7
0
    def __call__(self):
        np.random.seed(os.getpid())
        if self.neg_sample_type == "outdegree":
            outdegree = self.graph.outdegree()
            distribution = 1. * outdegree / outdegree.sum()
            alias, events = alias_sample_build_table(distribution)
        max_len = int(self.batch_size * self.walk_len *
                      ((1 + self.win_size) - 0.3))
        for walks in self.walk_generator():
            try:
                src_list, pos_list = [], []
                for walk in walks:
                    s, p = skip_gram_gen_pair(walk, self.win_size)
                    src_list.append(s[:max_len]), pos_list.append(p[:max_len])
                src = [s for x in src_list for s in x]
                pos = [s for x in pos_list for s in x]
                src = np.array(src, dtype=np.int64),
                pos = np.array(pos, dtype=np.int64)
                src, pos = np.reshape(src,
                                      [-1, 1, 1]), np.reshape(pos, [-1, 1, 1])

                neg_sample_size = [len(pos), self.neg_num, 1]
                if src.shape[0] == 0:
                    continue
                if self.neg_sample_type == "average":
                    negs = np.random.randint(low=0,
                                             high=self.graph.num_nodes,
                                             size=neg_sample_size)
                elif self.neg_sample_type == "outdegree":
                    negs = alias_sample(neg_sample_size, alias, events)
                elif self.neg_sample_type == "inbatch":
                    pass
                dst = np.concatenate([pos, negs], 1)
                # [batch_size, 1, 1] [batch_size, neg_num+1, 1]
                yield src[:max_len], dst[:max_len]
            except Exception as e:
                log.exception(e)