def _worker(reader: DatasetReader,
            input_queue: Queue,
            output_queue: Queue,
            index: int) -> None:
    """
    A worker that pulls filenames off the input queue, uses the dataset reader
    to read them, and places the generated instances on the output queue.
    When there are no filenames left on the input queue, it puts its ``index``
    on the output queue and doesn't do anything else.
    """
    # Keep going until you get a file_path that's None.
    while True:
        file_path = input_queue.get()
        if file_path is None:
            # Put my index on the queue to signify that I'm finished
            output_queue.put(index)
            break

        logger.info(f"reading instances from {file_path}")
        for instance in reader.read(file_path):
            output_queue.put(instance)
    def _instances(self, file_path: str, manager: Manager, output_queue: Queue) -> Iterator[Instance]:
        """
        A generator that reads instances off the output queue and yields them up
        until none are left (signified by all ``num_workers`` workers putting their
        ids into the queue).
        """
        shards = glob.glob(file_path)
        num_shards = len(shards)

        # If we want multiple epochs per read, put shards in the queue multiple times.
        input_queue = manager.Queue(num_shards * self.epochs_per_read + self.num_workers)
        for _ in range(self.epochs_per_read):
            random.shuffle(shards)
            for shard in shards:
                input_queue.put(shard)

        # Then put a None per worker to signify no more files.
        for _ in range(self.num_workers):
            input_queue.put(None)

        processes: List[Process] = []
        num_finished = 0

        for worker_id in range(self.num_workers):
            process = Process(target=_worker,
                              args=(self.reader, input_queue, output_queue, worker_id))
            logger.info(f"starting worker {worker_id}")
            process.start()
            processes.append(process)

        # Keep going as long as not all the workers have finished.
        while num_finished < self.num_workers:
            item = output_queue.get()
            if isinstance(item, int):
                # Means a worker has finished, so increment the finished count.
                num_finished += 1
                logger.info(f"worker {item} finished ({num_finished}/{self.num_workers})")
            else:
                # Otherwise it's an ``Instance``, so yield it up.
                yield item

        for process in processes:
            process.join()
        processes.clear()
예제 #3
0
def main(args):
    if args.labels:
        data = []
        with open(args.data_dir, encoding="utf-8") as f:
            for line in csv.reader(f, delimiter="\t"):
                data.append(line)
            text, labels = list(zip(*data[1:]))
    else:
        text = []
        with open(args.data_dir, encoding="utf-8") as f:
            for line in f.readlines():
                text.append(line.strip())
        labels = None

    if isinstance(text, tuple):
        text = list(text)

    if "imdb" in args.data_dir or "IMDB" in args.data_dir:
        text = [clean_for_imdb(t) for t in text]

    logger.info("Do back-translation for {} sentences".format(len(text)))

    if args.gpus is not None and len(args.gpus) > 1:
        logger.info("Use Multiple GPUs: {}".format(", ".join([str(i) for i in args.gpus])))
        split_point = len(text) // len(args.gpus)

        text_splitted = []
        for gpu_id in args.gpus:
            text_splitted.append(text[gpu_id * split_point : (gpu_id + 1) * split_point])
            if gpu_id == len(args.gpus) - 1:
                text_splitted[-1] += text[(gpu_id + 1) * split_point :]
        assert sum(len(s) for s in text_splitted) == len(text)

        set_start_method("spawn")
        q = Queue()

        procs = []
        for i in range(len(args.gpus)):
            proc = Process(target=multi_translate, args=(args, i, text_splitted[i], q))
            procs.append(proc)
            proc.start()

        q_result = []
        for p in procs:
            q_result.append(q.get())

        back_translated_docs = []
        for doc_split in sorted(q_result):
            back_translated_docs += doc_split[1]

        q.close()
        q.join_thread()

        for proc in procs:
            proc.join()
    else:
        if args.gpus is not None:
            gpu = args.gpus[0]
            logger.info("Use only one GPU: {}".format(gpu))
            back_translated_docs = translate(args, text, args.gpus[0])[1]
        else:
            logger.info("Use cpu")
            back_translated_docs = translate(args, text)

    output_file_name = "bt_" + os.path.basename(args.data_dir)
    output_dir = os.path.join(args.output_dir, output_file_name)

    folder_name = os.path.dirname(output_dir)
    if not os.path.isdir(folder_name):
        os.makedirs(folder_name)

    if args.return_sentence_pair:
        # Save original sentence pair
        filename, ext = os.path.splitext(output_dir)
        with open(filename + ".pickle", "wb") as f:
            pickle.dump(back_translated_docs, f)

        # Save back-translated sentences
        bt_doc = [" ".join(list(zip(*d))[1]) for d in back_translated_docs]
        with open(output_dir, "wt") as f:
            if labels is not None:
                tsv_writer = csv.writer(f, delimiter="\t")
                tsv_writer.writerow(data[0])
                for line, label in zip(bt_doc, labels):
                    tsv_writer.writerow([line, label])
            else:
                for line in bt_doc:
                    f.write(line)
                    f.write('\n')

        # Save cross sentences
        new_back_translated_docs = []
        for doc in back_translated_docs:
            new_doc = []
            for j, sent in enumerate(doc):
                if j % 2 == 0:
                    new_doc.append(sent)
                else:
                    new_doc.append(sent[::-1])
            new_back_translated_docs.append(new_doc)
        new_docs1, new_docs2 = [], []
        for doc in new_back_translated_docs:
            n1, n2 = list(zip(*doc))
            new_docs1.append(" ".join(n1))
            new_docs2.append(" ".join(n2))
        
        filename, ext = os.path.splitext(output_dir)
        with open(filename + "_pair1" + ext, "wt") as f:
            if labels is not None:
                tsv_writer = csv.writer(f, delimiter="\t")
                tsv_writer.writerow(data[0])
                for line, label in zip(new_docs1, labels):
                    tsv_writer.writerow([line, label])
            else:
                for line in new_docs1:
                    f.write(line)
                    f.write('\n')
        with open(filename + "_pair2" + ext, "wt") as f:
            if labels is not None:
                tsv_writer = csv.writer(f, delimiter="\t")
                tsv_writer.writerow(data[0])
                for line, label in zip(new_docs2, labels):
                    tsv_writer.writerow([line, label])
            else:
                for line in new_docs2:
                    f.write(line)
                    f.write('\n')
    else:
        with open(output_dir, "wt") as f:
            if labels is not None:
                tsv_writer = csv.writer(f, delimiter="\t")
                tsv_writer.writerow(data[0])
                for line, label in zip(back_translated_docs, labels):
                    tsv_writer.writerow([line, label])
            else:
                for line in back_translated_docs:
                    f.write(line)
                    f.write('\n')

    logger.info("Translated documents are saved in {}".format(output_dir))
예제 #4
0
    parser = argparse.ArgumentParser()
    parser.add_argument('-size', type=int, help='input the sum of node')
    parser.add_argument('-path', help='the path fo share file system')
    args = parser.parse_args()
    print("size:" + str(args.size))
    print("path:" + args.path)

    processes = []

    num_blocks = [2, 2, 2, 2]

    # stop_flag = Value('i', 0)
    e = Event()

    buffer_queues = []
    buffer_queues.append(Queue(400))
    buffer_queues.append(Queue(400))
    buffer_queues.append(Queue(400))
    buffer_queues.append(Queue(400))
    buffer_queues.append(Queue(400))
    buffer_queues.append(Queue(400))

    layers = []
    input_layer = ResInputLayer()
    input_layer.share_memory()

    layers.append(input_layer)

    block1 = ResBlockLayer(BasicBlock, 64, num_blocks[0], 1)
    block1.share_memory()
    layers.append(block1)
예제 #5
0
파일: train.py 프로젝트: jason-su/MKD-NET
def train(training_dbs, validation_db, system_config, model, args):
    # reading arguments from command
    start_iter  = args.start_iter
    initialize  = args.initialize
    gpu         = args.gpu

    # reading arguments from json file
    learning_rate    = system_config.learning_rate
    max_iteration    = system_config.max_iter
    pretrained_model = system_config.pretrain
    stepsize         = system_config.stepsize
    snapshot         = system_config.snapshot
    val_iter         = system_config.val_iter
    display          = system_config.display
    decay_rate       = system_config.decay_rate

    print("building model...")
    nnet = NetworkFactory(system_config, model, gpu=gpu)
    if initialize:
        nnet.save_params(0)
        exit(0)

    # queues storing data for training
    training_queue   = Queue(system_config.prefetch_size)
    validation_queue = Queue(5)

    # queues storing pinned data for training
    pinned_training_queue   = queue.Queue(system_config.prefetch_size)
    pinned_validation_queue = queue.Queue(5)

    # allocating resources for parallel reading
    # parallel read train data to queue
    # 每个worker对应一份training_db,生成workder个并行读数据的进程
    training_tasks = init_parallel_jobs(system_config, training_dbs, training_queue, data_sampling_func, True)
    if val_iter:
        validation_tasks = init_parallel_jobs(system_config, [validation_db], validation_queue, data_sampling_func, False)

    #设置进程信号量,线程负责把数据从training_queue读到pinned_training_queue中
    training_pin_semaphore   = threading.Semaphore()
    validation_pin_semaphore = threading.Semaphore()
    training_pin_semaphore.acquire()
    validation_pin_semaphore.acquire()
 
    training_pin_args   = (training_queue, pinned_training_queue, training_pin_semaphore)
    training_pin_thread = threading.Thread(target=pin_memory, args=training_pin_args)
    training_pin_thread.daemon = True
    training_pin_thread.start()
 
    validation_pin_args   = (validation_queue, pinned_validation_queue, validation_pin_semaphore)
    validation_pin_thread = threading.Thread(target=pin_memory, args=validation_pin_args)
    validation_pin_thread.daemon = True
    validation_pin_thread.start()

    if pretrained_model is not None:
        if not os.path.exists(pretrained_model):
            raise ValueError("pretrained model does not exist")
        print("loading from pretrained model")
        nnet.load_pretrained_params(pretrained_model)

    if start_iter:
        nnet.load_params(start_iter)
        learning_rate /= (decay_rate ** (start_iter // stepsize))
        learning_rate = max(5e-5,learning_rate)
        nnet.set_lr(learning_rate)
        print("training starts from iteration {} with learning_rate {}".format(start_iter + 1, learning_rate))
    else:
        nnet.set_lr(learning_rate)

    print("training start, max iteration {}".format(max_iteration))
        
    nnet.cuda()
    nnet.train_mode()
    with stdout_to_tqdm() as save_stdout:
        for iteration in tqdm(range(start_iter + 1, max_iteration + 1), file=save_stdout, ncols=80):
#         for iteration in range(start_iter + 1, max_iteration + 1):
            training = pinned_training_queue.get(block=True)
            training_loss,focal_loss,pull_loss,push_loss,off_loss = nnet.train(training["xs"],training["ys"])
            
#             if display and iteration % display == 0:
#                 print("training loss at iteration {}: {}".format(iteration, training_loss.item()))
#                 
#             print("[log-loss]:{}={}".format(iteration, training_loss.item()))
            
            writer.add_scalar('train_loss', training_loss, global_step=iteration)
            writer.add_scalar('focal_loss', focal_loss, global_step=iteration)
            writer.add_scalar('pull_loss', pull_loss, global_step=iteration)
            writer.add_scalar('push_loss', push_loss, global_step=iteration)
            writer.add_scalar('off_loss', off_loss, global_step=iteration)
            
            del training_loss

            if val_iter and validation_db.db_inds.size and iteration % val_iter == 0:
                nnet.eval_mode()
                validation = pinned_validation_queue.get(block=True)
                validation_loss = nnet.validate(validation["xs"],validation["ys"])
                print("[log-validation-loss]:{}={}".format(iteration, validation_loss.item()))
                writer.add_scalar('validation_loss', validation_loss, global_step=iteration)
                
                nnet.train_mode()

            if iteration % snapshot == 0:
                nnet.save_params(iteration)

            if iteration % stepsize == 0:
                learning_rate /= decay_rate
                learning_rate = max(5e-5,learning_rate)
                nnet.set_lr(learning_rate)
                print("set learning rate {}".format(learning_rate))


    # sending signal to kill the thread
    training_pin_semaphore.release()
    validation_pin_semaphore.release()

    # terminating data fetching processes
    terminate_tasks(training_tasks)
    terminate_tasks(validation_tasks)
    
    writer.close()
예제 #6
0
def train(model: nn.Module,
          data: Union[MoleculeDataset, List[MoleculeDataset]],
          loss_func: Callable,
          optimizer: Optimizer,
          scheduler: _LRScheduler,
          args: Namespace,
          n_iter: int = 0,
          logger: logging.Logger = None,
          writer: SummaryWriter = None,
          chunk_names: bool = False,
          val_smiles: List[str] = None,
          test_smiles: List[str] = None) -> int:
    """
    Trains a model for an epoch.

    :param model: Model.
    :param data: A MoleculeDataset (or a list of MoleculeDatasets if using moe).
    :param loss_func: Loss function.
    :param optimizer: An Optimizer.
    :param scheduler: A learning rate scheduler.
    :param args: Arguments.
    :param n_iter: The number of iterations (training examples) trained on so far.
    :param logger: A logger for printing intermediate results.
    :param writer: A tensorboardX SummaryWriter.
    :param chunk_names: Whether to train on the data in chunks. In this case,
    data must be a list of paths to the data chunks.
    :param val_smiles: Validation smiles strings without targets.
    :param test_smiles: Test smiles strings without targets, used for adversarial setting.
    :return: The total number of iterations (training examples) trained on so far.
    """
    debug = logger.debug if logger is not None else print

    model.train()

    if args.dataset_type == 'bert_pretraining':
        features_loss = nn.MSELoss()

    if chunk_names:
        for path, memo_path in tqdm(data, total=len(data)):
            featurization.SMILES_TO_FEATURES = dict()
            if os.path.isfile(memo_path):
                found_memo = True
                with open(memo_path, 'rb') as f:
                    featurization.SMILES_TO_FEATURES = pickle.load(f)
            else:
                found_memo = False
            with open(path, 'rb') as f:
                chunk = pickle.load(f)
            if args.moe:
                for source in chunk:
                    source.shuffle()
            else:
                chunk.shuffle()
            n_iter = train(model=model,
                           data=chunk,
                           loss_func=loss_func,
                           optimizer=optimizer,
                           scheduler=scheduler,
                           args=args,
                           n_iter=n_iter,
                           logger=logger,
                           writer=writer,
                           chunk_names=False,
                           val_smiles=val_smiles,
                           test_smiles=test_smiles)
            if not found_memo:
                with open(memo_path, 'wb') as f:
                    pickle.dump(featurization.SMILES_TO_GRAPH,
                                f,
                                protocol=pickle.HIGHEST_PROTOCOL)
        return n_iter

    if not args.moe:
        data.shuffle()

    loss_sum, iter_count = 0, 0
    if args.adversarial:
        if args.moe:
            train_smiles = []
            for d in data:
                train_smiles += d.smiles()
        else:
            train_smiles = data.smiles()
        train_val_smiles = train_smiles + val_smiles
        d_loss_sum, g_loss_sum, gp_norm_sum = 0, 0, 0

    if args.moe:
        test_smiles = list(test_smiles)
        random.shuffle(test_smiles)
        train_smiles = []
        for d in data:
            d.shuffle()
            train_smiles.append(d.smiles())
        num_iters = min(len(test_smiles), min([len(d) for d in data]))
    elif args.maml:
        num_iters = args.maml_batches_per_epoch * args.maml_batch_size
        model.zero_grad()
        maml_sum_loss = 0
    else:
        num_iters = len(data) if args.last_batch else len(
            data) // args.batch_size * args.batch_size

    if args.parallel_featurization:
        batch_queue = Queue(args.batch_queue_max_size)
        exit_queue = Queue(1)
        batch_process = Process(target=async_mol2graph,
                                args=(batch_queue, data, args, num_iters,
                                      args.batch_size, exit_queue,
                                      args.last_batch))
        batch_process.start()
        currently_loaded_batches = []

    iter_size = 1 if args.maml else args.batch_size

    for i in trange(0, num_iters, iter_size):
        if args.moe:
            if not args.batch_domain_encs:
                model.compute_domain_encs(
                    train_smiles)  # want to recompute every batch
            mol_batch = [
                MoleculeDataset(d[i:i + args.batch_size]) for d in data
            ]
            train_batch, train_targets = [], []
            for b in mol_batch:
                tb, tt = b.smiles(), b.targets()
                train_batch.append(tb)
                train_targets.append(tt)
            test_batch = test_smiles[i:i + args.batch_size]
            loss = model.compute_loss(train_batch, train_targets, test_batch)
            model.zero_grad()

            loss_sum += loss.item()
            iter_count += len(mol_batch)
        elif args.maml:
            task_train_data, task_test_data, task_idx = data.sample_maml_task(
                args)
            mol_batch = task_test_data
            smiles_batch, features_batch, target_batch = task_train_data.smiles(
            ), task_train_data.features(), task_train_data.targets(task_idx)
            # no mask since we only picked data points that have the desired target
            targets = torch.Tensor(target_batch).unsqueeze(1)
            if next(model.parameters()).is_cuda:
                targets = targets.cuda()
            preds = model(smiles_batch, features_batch)
            loss = loss_func(preds, targets)
            loss = loss.sum() / len(smiles_batch)
            grad = torch.autograd.grad(
                loss, [p for p in model.parameters() if p.requires_grad])
            theta = [
                p for p in model.named_parameters() if p[1].requires_grad
            ]  # comes in same order as grad
            theta_prime = {
                p[0]: p[1] - args.maml_lr * grad[i]
                for i, p in enumerate(theta)
            }
            for name, nongrad_param in [
                    p for p in model.named_parameters()
                    if not p[1].requires_grad
            ]:
                theta_prime[name] = nongrad_param + torch.zeros(
                    nongrad_param.size()).to(nongrad_param)
        else:
            # Prepare batch
            if args.parallel_featurization:
                if len(currently_loaded_batches) == 0:
                    currently_loaded_batches = batch_queue.get()
                mol_batch, featurized_mol_batch = currently_loaded_batches.pop(
                )
            else:
                if not args.last_batch and i + args.batch_size > len(data):
                    break
                mol_batch = MoleculeDataset(data[i:i + args.batch_size])
            smiles_batch, features_batch, target_batch = mol_batch.smiles(
            ), mol_batch.features(), mol_batch.targets()

            if args.dataset_type == 'bert_pretraining':
                batch = mol2graph(smiles_batch, args)
                mask = mol_batch.mask()
                batch.bert_mask(mask)
                mask = 1 - torch.FloatTensor(mask)  # num_atoms
                features_targets = torch.FloatTensor(
                    target_batch['features']
                ) if target_batch[
                    'features'] is not None else None  # num_molecules x features_size
                targets = torch.FloatTensor(target_batch['vocab'])  # num_atoms
                if args.bert_vocab_func == 'feature_vector':
                    mask = mask.reshape(-1, 1)
                else:
                    targets = targets.long()
            else:
                batch = smiles_batch
                mask = torch.Tensor([[x is not None for x in tb]
                                     for tb in target_batch])
                targets = torch.Tensor([[0 if x is None else x for x in tb]
                                        for tb in target_batch])

            if next(model.parameters()).is_cuda:
                mask, targets = mask.cuda(), targets.cuda()

                if args.dataset_type == 'bert_pretraining' and features_targets is not None:
                    features_targets = features_targets.cuda()

            if args.class_balance:
                class_weights = []
                for task_num in range(data.num_tasks()):
                    class_weights.append(
                        args.class_weights[task_num][targets[:,
                                                             task_num].long()])
                class_weights = torch.stack(
                    class_weights).t()  # num_molecules x num_tasks
            else:
                class_weights = torch.ones(targets.shape)

            if args.cuda:
                class_weights = class_weights.cuda()

            # Run model
            model.zero_grad()
            if args.parallel_featurization:
                previous_graph_input_mode = model.encoder.graph_input
                model.encoder.graph_input = True  # force model to accept already processed input
                preds = model(featurized_mol_batch, features_batch)
                model.encoder.graph_input = previous_graph_input_mode
            else:
                preds = model(batch, features_batch)
            if args.dataset_type == 'regression_with_binning':
                preds = preds.view(targets.size(0), targets.size(1), -1)
                targets = targets.long()
                loss = 0
                for task in range(targets.size(1)):
                    loss += loss_func(
                        preds[:, task, :], targets[:, task]
                    ) * class_weights[:,
                                      task] * mask[:,
                                                   task]  # for some reason cross entropy doesn't support multi target
                loss = loss.sum() / mask.sum()
            else:
                if args.dataset_type == 'unsupervised':
                    targets = targets.long().reshape(-1)

                if args.dataset_type == 'bert_pretraining':
                    features_preds, preds = preds['features'], preds['vocab']

                if args.dataset_type == 'kernel':
                    preds = preds.view(int(preds.size(0) / 2), 2,
                                       preds.size(1))
                    preds = model.kernel_output_layer(preds)

                loss = loss_func(preds, targets) * class_weights * mask
                if args.predict_features_and_task:
                    loss = (loss.sum() + loss[:, :-args.features_size].sum() * (args.task_weight-1)) \
                                / (mask.sum() + mask[:, :-args.features_size].sum() * (args.task_weight-1))
                else:
                    loss = loss.sum() / mask.sum()

                if args.dataset_type == 'bert_pretraining' and features_targets is not None:
                    loss += features_loss(features_preds, features_targets)

            loss_sum += loss.item()
            iter_count += len(mol_batch)

        if args.maml:
            model_prime = build_model(args=args, params=theta_prime)
            smiles_batch, features_batch, target_batch = task_test_data.smiles(
            ), task_test_data.features(), [
                t[task_idx] for t in task_test_data.targets()
            ]
            # no mask since we only picked data points that have the desired target
            targets = torch.Tensor([[t] for t in target_batch])
            if next(model_prime.parameters()).is_cuda:
                targets = targets.cuda()
            model_prime.zero_grad()
            preds = model_prime(smiles_batch, features_batch)
            loss = loss_func(preds, targets)
            loss = loss.sum() / len(smiles_batch)
            loss_sum += loss.item()
            iter_count += len(
                smiles_batch
            )  # TODO check that this makes sense, but it's just for display
            maml_sum_loss += loss
            if i % args.maml_batch_size == args.maml_batch_size - 1:
                maml_sum_loss.backward()
                optimizer.step()
                model.zero_grad()
                maml_sum_loss = 0
        else:
            loss.backward()
            if args.max_grad_norm is not None:
                clip_grad_norm_(model.parameters(), args.max_grad_norm)
            optimizer.step()

        if args.adjust_weight_decay:
            current_pnorm = compute_pnorm(model)
            if current_pnorm < args.pnorm_target:
                for i in range(len(optimizer.param_groups)):
                    optimizer.param_groups[i]['weight_decay'] = max(
                        0, optimizer.param_groups[i]['weight_decay'] -
                        args.adjust_weight_decay_step)
            else:
                for i in range(len(optimizer.param_groups)):
                    optimizer.param_groups[i][
                        'weight_decay'] += args.adjust_weight_decay_step

        if isinstance(scheduler, NoamLR):
            scheduler.step()

        if args.adversarial:
            for _ in range(args.gan_d_per_g):
                train_val_smiles_batch = random.sample(train_val_smiles,
                                                       args.batch_size)
                test_smiles_batch = random.sample(test_smiles, args.batch_size)
                d_loss, gp_norm = model.train_D(train_val_smiles_batch,
                                                test_smiles_batch)
            train_val_smiles_batch = random.sample(train_val_smiles,
                                                   args.batch_size)
            test_smiles_batch = random.sample(test_smiles, args.batch_size)
            g_loss = model.train_G(train_val_smiles_batch, test_smiles_batch)

            # we probably only care about the g_loss honestly
            d_loss_sum += d_loss * args.batch_size
            gp_norm_sum += gp_norm * args.batch_size
            g_loss_sum += g_loss * args.batch_size

        n_iter += len(mol_batch)

        # Log and/or add to tensorboard
        if (n_iter // args.batch_size) % args.log_frequency == 0:
            lrs = scheduler.get_lr()
            pnorm = compute_pnorm(model)
            gnorm = compute_gnorm(model)
            loss_avg = loss_sum / iter_count
            if args.adversarial:
                d_loss_avg, g_loss_avg, gp_norm_avg = d_loss_sum / iter_count, g_loss_sum / iter_count, gp_norm_sum / iter_count
                d_loss_sum, g_loss_sum, gp_norm_sum = 0, 0, 0
            loss_sum, iter_count = 0, 0

            lrs_str = ', '.join(f'lr_{i} = {lr:.4e}'
                                for i, lr in enumerate(lrs))
            debug(
                f'Loss = {loss_avg:.4e}, PNorm = {pnorm:.4f}, GNorm = {gnorm:.4f}, {lrs_str}'
            )
            if args.adversarial:
                debug(
                    f'D Loss = {d_loss_avg:.4e}, G Loss = {g_loss_avg:.4e}, GP Norm = {gp_norm_avg:.4}'
                )

            if writer is not None:
                writer.add_scalar('train_loss', loss_avg, n_iter)
                writer.add_scalar('param_norm', pnorm, n_iter)
                writer.add_scalar('gradient_norm', gnorm, n_iter)
                for i, lr in enumerate(lrs):
                    writer.add_scalar(f'learning_rate_{i}', lr, n_iter)

    if args.parallel_featurization:
        exit_queue.put(
            0)  # dummy var to get the subprocess to know that we're done
        batch_process.join()

    return n_iter
예제 #7
0
def train(training_dbs, validation_db, system_config, model, args):
    # reading arguments from command
    start_iter = args.start_iter
    distributed = args.distributed
    world_size = args.world_size
    initialize = args.initialize
    gpu = args.gpu
    rank = args.rank

    # reading arguments from json file
    batch_size = system_config.batch_size
    print(batch_size)
    learning_rate = system_config.learning_rate
    max_iteration = system_config.max_iter
    pretrained_model = system_config.pretrain
    stepsize = system_config.stepsize
    snapshot = system_config.snapshot
    val_iter = system_config.val_iter
    display = system_config.display
    decay_rate = system_config.decay_rate
    stepsize = system_config.stepsize

    print("Process {}: building model...".format(rank))
    nnet = NetworkFactory(system_config,
                          model,
                          distributed=distributed,
                          gpu=gpu)
    if initialize:
        nnet.save_params(0)
        exit(0)

    # queues storing data for training
    training_queue = Queue(system_config.prefetch_size)
    # validation_queue = Queue(5)

    # queues storing pinned data for training
    pinned_training_queue = queue.Queue(system_config.prefetch_size)
    # pinned_validation_queue = queue.Queue(5)

    # allocating resources for parallel reading
    training_tasks = init_parallel_jobs(system_config, training_dbs,
                                        training_queue, data_sampling_func,
                                        True)
    # if val_iter:
    # validation_tasks = init_parallel_jobs(system_config, [validation_db], validation_queue, data_sampling_func, False)

    training_pin_semaphore = threading.Semaphore()
    # validation_pin_semaphore = threading.Semaphore()
    training_pin_semaphore.acquire()
    # validation_pin_semaphore.acquire()

    training_pin_args = (training_queue, pinned_training_queue,
                         training_pin_semaphore)
    training_pin_thread = threading.Thread(target=pin_memory,
                                           args=training_pin_args)
    training_pin_thread.daemon = True
    training_pin_thread.start()

    # validation_pin_args   = (validation_queue, pinned_validation_queue, validation_pin_semaphore)
    # validation_pin_thread = threading.Thread(target=pin_memory, args=validation_pin_args)
    # validation_pin_thread.daemon = True
    # validation_pin_thread.start()

    if pretrained_model is not None:
        if not os.path.exists(pretrained_model):
            raise ValueError("pretrained model does not exist")
        print("Process {}: loading from pretrained model".format(rank))
        nnet.load_pretrained_params(pretrained_model)

    if start_iter:
        nnet.load_params(start_iter)
        learning_rate /= (decay_rate**(start_iter // stepsize))
        nnet.set_lr(learning_rate)
        print(
            "Process {}: training starts from iteration {} with learning_rate {}"
            .format(rank, start_iter + 1, learning_rate))
    else:
        nnet.set_lr(learning_rate)

    if rank == 0:
        print("training start...")
    nnet.cuda()
    nnet.train_mode()
    with stdout_to_tqdm() as save_stdout:
        for iteration in range(start_iter + 1, max_iteration + 1):
            training = pinned_training_queue.get(block=True)
            training_loss = nnet.train(**training)

            if display and iteration % display == 0:
                print("Process {}: training loss at iteration {}: {}".format(
                    rank, iteration, training_loss.item()))
            del training_loss

            # if val_iter and validation_db.db_inds.size and iteration % val_iter == 0:
            #     nnet.eval_mode()
            #     validation = pinned_validation_queue.get(block=True)
            #     validation_loss = nnet.validate(**validation)
            #     print("Process {}: validation loss at iteration {}: {}".format(rank, iteration, validation_loss.item()))
            #     nnet.train_mode()

            if iteration % snapshot == 0 and rank == 0:
                nnet.save_params(iteration)

            if iteration % stepsize == 0:
                learning_rate /= decay_rate
                nnet.set_lr(learning_rate)

    # sending signal to kill the thread
    training_pin_semaphore.release()
    # validation_pin_semaphore.release()

    # terminating data fetching processes
    terminate_tasks(training_tasks)
예제 #8
0
class SkipGramModel(nn.Module):
    """ Negative sampling based skip-gram """
    def __init__(
        self,
        emb_size,
        emb_dimension,
        batch_size,
        only_cpu,
        only_gpu,
        only_fst,
        only_snd,
        mix,
        neg_weight,
        negative,
        lr,
        lap_norm,
        fast_neg,
        record_loss,
        async_update,
        num_threads,
    ):
        """ initialize embedding on CPU 

        Paremeters
        ----------
        emb_size int : number of nodes
        emb_dimension int : embedding dimension
        batch_size int : number of node sequences in each batch
        only_cpu bool : training with CPU
        only_gpu bool : training with GPU
        only_fst bool : only embedding for first-order proximity
        only_snd bool : only embedding for second-order proximity
        mix bool : mixed training with CPU and GPU
        negative int : negative samples for each positve node pair
        neg_weight float : negative weight
        lr float : initial learning rate
        lap_norm float : weight of laplacian normalization
        fast_neg bool : do negative sampling inside a batch
        record_loss bool : print the loss during training
        use_context_weight : give different weights to the nodes in a context window
        async_update : asynchronous training
        """
        super(SkipGramModel, self).__init__()
        self.emb_size = emb_size
        self.batch_size = batch_size
        self.only_cpu = only_cpu
        self.only_gpu = only_gpu
        if only_fst:
            self.fst = True
            self.snd = False
            self.emb_dimension = emb_dimension
        elif only_snd:
            self.fst = False
            self.snd = True
            self.emb_dimension = emb_dimension
        else:
            self.fst = True
            self.snd = True
            self.emb_dimension = int(emb_dimension / 2)
        self.mixed_train = mix
        self.neg_weight = neg_weight
        self.negative = negative
        self.lr = lr
        self.lap_norm = lap_norm
        self.fast_neg = fast_neg
        self.record_loss = record_loss
        self.async_update = async_update
        self.num_threads = num_threads

        # initialize the device as cpu
        self.device = torch.device("cpu")

        # embedding
        initrange = 1.0 / self.emb_dimension
        if self.fst:
            self.fst_u_embeddings = nn.Embedding(self.emb_size,
                                                 self.emb_dimension,
                                                 sparse=True)
            init.uniform_(self.fst_u_embeddings.weight.data, -initrange,
                          initrange)
        if self.snd:
            self.snd_u_embeddings = nn.Embedding(self.emb_size,
                                                 self.emb_dimension,
                                                 sparse=True)
            init.uniform_(self.snd_u_embeddings.weight.data, -initrange,
                          initrange)
            self.snd_v_embeddings = nn.Embedding(self.emb_size,
                                                 self.emb_dimension,
                                                 sparse=True)
            init.constant_(self.snd_v_embeddings.weight.data, 0)

        # lookup_table is used for fast sigmoid computing
        self.lookup_table = torch.sigmoid(torch.arange(-6.01, 6.01, 0.01))
        self.lookup_table[0] = 0.
        self.lookup_table[-1] = 1.
        if self.record_loss:
            self.logsigmoid_table = torch.log(
                torch.sigmoid(torch.arange(-6.01, 6.01, 0.01)))
            self.loss_fst = []
            self.loss_snd = []

        # indexes to select positive/negative node pairs from batch_walks
        self.index_emb_negu, self.index_emb_negv = init_emb2neg_index(
            self.negative, self.batch_size)

        # adam
        if self.fst:
            self.fst_state_sum_u = torch.zeros(self.emb_size)
        if self.snd:
            self.snd_state_sum_u = torch.zeros(self.emb_size)
            self.snd_state_sum_v = torch.zeros(self.emb_size)

    def create_async_update(self):
        """ Set up the async update subprocess.
        """
        self.async_q = Queue(1)
        self.async_p = mp.Process(target=async_update,
                                  args=(self.num_threads, self, self.async_q))
        self.async_p.start()

    def finish_async_update(self):
        """ Notify the async update subprocess to quit.
        """
        self.async_q.put((None, None, None, None, None))
        self.async_p.join()

    def share_memory(self):
        """ share the parameters across subprocesses """
        if self.fst:
            self.fst_u_embeddings.weight.share_memory_()
            self.fst_state_sum_u.share_memory_()
        if self.snd:
            self.snd_u_embeddings.weight.share_memory_()
            self.snd_v_embeddings.weight.share_memory_()
            self.snd_state_sum_u.share_memory_()
            self.snd_state_sum_v.share_memory_()

    def set_device(self, gpu_id):
        """ set gpu device """
        self.device = torch.device("cuda:%d" % gpu_id)
        print("The device is", self.device)
        self.lookup_table = self.lookup_table.to(self.device)
        if self.record_loss:
            self.logsigmoid_table = self.logsigmoid_table.to(self.device)
        self.index_emb_negu = self.index_emb_negu.to(self.device)
        self.index_emb_negv = self.index_emb_negv.to(self.device)

    def all_to_device(self, gpu_id):
        """ move all of the parameters to a single GPU """
        self.device = torch.device("cuda:%d" % gpu_id)
        self.set_device(gpu_id)
        if self.fst:
            self.fst_u_embeddings = self.fst_u_embeddings.cuda(gpu_id)
            self.fst_state_sum_u = self.fst_state_sum_u.to(self.device)
        if self.snd:
            self.snd_u_embeddings = self.snd_u_embeddings.cuda(gpu_id)
            self.snd_v_embeddings = self.snd_v_embeddings.cuda(gpu_id)
            self.snd_state_sum_u = self.snd_state_sum_u.to(self.device)
            self.snd_state_sum_v = self.snd_state_sum_v.to(self.device)

    def fast_sigmoid(self, score):
        """ do fast sigmoid by looking up in a pre-defined table """
        idx = torch.floor((score + 6.01) / 0.01).long()
        return self.lookup_table[idx]

    def fast_logsigmoid(self, score):
        """ do fast logsigmoid by looking up in a pre-defined table """
        idx = torch.floor((score + 6.01) / 0.01).long()
        return self.logsigmoid_table[idx]

    def fast_pos_bp(self, emb_pos_u, emb_pos_v, first_flag):
        """ get grad for positve samples """
        pos_score = torch.sum(torch.mul(emb_pos_u, emb_pos_v), dim=1)
        pos_score = torch.clamp(pos_score, max=6, min=-6)
        # [batch_size, 1]
        score = (1 - self.fast_sigmoid(pos_score)).unsqueeze(1)
        if self.record_loss:
            if first_flag:
                self.loss_fst.append(
                    torch.mean(self.fast_logsigmoid(pos_score)).item())
            else:
                self.loss_snd.append(
                    torch.mean(self.fast_logsigmoid(pos_score)).item())

        # [batch_size, dim]
        if self.lap_norm > 0:
            grad_u_pos = score * emb_pos_v + self.lap_norm * (emb_pos_v -
                                                              emb_pos_u)
            grad_v_pos = score * emb_pos_u + self.lap_norm * (emb_pos_u -
                                                              emb_pos_v)
        else:
            grad_u_pos = score * emb_pos_v
            grad_v_pos = score * emb_pos_u

        return grad_u_pos, grad_v_pos

    def fast_neg_bp(self, emb_neg_u, emb_neg_v, first_flag):
        """ get grad for negative samples """
        neg_score = torch.sum(torch.mul(emb_neg_u, emb_neg_v), dim=1)
        neg_score = torch.clamp(neg_score, max=6, min=-6)
        # [batch_size * negative, 1]
        score = -self.fast_sigmoid(neg_score).unsqueeze(1)
        if self.record_loss:
            if first_flag:
                self.loss_fst.append(
                    self.negative * self.neg_weight *
                    torch.mean(self.fast_logsigmoid(-neg_score)).item())
            else:
                self.loss_snd.append(
                    self.negative * self.neg_weight *
                    torch.mean(self.fast_logsigmoid(-neg_score)).item())

        grad_u_neg = self.neg_weight * score * emb_neg_v
        grad_v_neg = self.neg_weight * score * emb_neg_u

        return grad_u_neg, grad_v_neg

    def fast_learn(self, batch_edges, neg_nodes=None):
        """ Learn a batch of edges in a fast way. It has the following features:
            1. It calculating the gradients directly without the forward operation.
            2. It does sigmoid by a looking up table.

        Specifically, for each positive/negative node pair (i,j), the updating procedure is as following:
            score = self.fast_sigmoid(u_embedding[i].dot(v_embedding[j]))
            # label = 1 for positive samples; label = 0 for negative samples.
            u_embedding[i] += (label - score) * v_embedding[j]
            v_embedding[i] += (label - score) * u_embedding[j]

        Parameters
        ----------
        batch_edges list : a list of node sequnces
        neg_nodes torch.LongTensor : a long tensor of sampled true negative nodes. If neg_nodes is None,
            then do negative sampling randomly from the nodes in batch_walks as an alternative.

        Usage example
        -------------
        batch_walks = torch.LongTensor([[1,2], [3,4], [5,6]])
        neg_nodes = None
        """
        lr = self.lr

        # [batch_size, 2]
        nodes = batch_edges
        if self.only_gpu:
            nodes = nodes.to(self.device)
            if neg_nodes is not None:
                neg_nodes = neg_nodes.to(self.device)
        bs = len(nodes)

        if self.fst:
            emb_u = self.fst_u_embeddings(nodes[:, 0]).view(
                -1, self.emb_dimension).to(self.device)
            emb_v = self.fst_u_embeddings(nodes[:, 1]).view(
                -1, self.emb_dimension).to(self.device)

            ## Postive
            emb_pos_u, emb_pos_v = emb_u, emb_v
            grad_u_pos, grad_v_pos = self.fast_pos_bp(emb_pos_u, emb_pos_v,
                                                      True)

            ## Negative
            emb_neg_u = emb_pos_u.repeat((self.negative, 1))

            if bs < self.batch_size:
                index_emb_negu, index_emb_negv = init_emb2neg_index(
                    self.negative, bs)
                index_emb_negu = index_emb_negu.to(self.device)
                index_emb_negv = index_emb_negv.to(self.device)
            else:
                index_emb_negu = self.index_emb_negu
                index_emb_negv = self.index_emb_negv

            if neg_nodes is None:
                emb_neg_v = torch.index_select(emb_v, 0, index_emb_negv)
            else:
                emb_neg_v = self.fst_u_embeddings.weight[neg_nodes].to(
                    self.device)

            grad_u_neg, grad_v_neg = self.fast_neg_bp(emb_neg_u, emb_neg_v,
                                                      True)

            ## Update
            grad_u_pos.index_add_(0, index_emb_negu, grad_u_neg)
            grad_u = grad_u_pos
            if neg_nodes is None:
                grad_v_pos.index_add_(0, index_emb_negv, grad_v_neg)
                grad_v = grad_v_pos
            else:
                grad_v = grad_v_pos

            # use adam optimizer
            grad_u = adam(grad_u, self.fst_state_sum_u, nodes[:, 0], lr,
                          self.device, self.only_gpu)
            grad_v = adam(grad_v, self.fst_state_sum_u, nodes[:, 1], lr,
                          self.device, self.only_gpu)
            if neg_nodes is not None:
                grad_v_neg = adam(grad_v_neg, self.fst_state_sum_u, neg_nodes,
                                  lr, self.device, self.only_gpu)

            if self.mixed_train:
                grad_u = grad_u.cpu()
                grad_v = grad_v.cpu()
                if neg_nodes is not None:
                    grad_v_neg = grad_v_neg.cpu()
                else:
                    grad_v_neg = None

                if self.async_update:
                    grad_u.share_memory_()
                    grad_v.share_memory_()
                    nodes.share_memory_()
                    if neg_nodes is not None:
                        neg_nodes.share_memory_()
                        grad_v_neg.share_memory_()
                    self.async_q.put(
                        (grad_u, grad_v, grad_v_neg, nodes, neg_nodes, True))

            if not self.async_update:
                self.fst_u_embeddings.weight.data.index_add_(
                    0, nodes[:, 0], grad_u)
                self.fst_u_embeddings.weight.data.index_add_(
                    0, nodes[:, 1], grad_v)
                if neg_nodes is not None:
                    self.fst_u_embeddings.weight.data.index_add_(
                        0, neg_nodes, grad_v_neg)

        if self.snd:
            emb_u = self.snd_u_embeddings(nodes[:, 0]).view(
                -1, self.emb_dimension).to(self.device)
            emb_v = self.snd_v_embeddings(nodes[:, 1]).view(
                -1, self.emb_dimension).to(self.device)

            ## Postive
            emb_pos_u, emb_pos_v = emb_u, emb_v
            grad_u_pos, grad_v_pos = self.fast_pos_bp(emb_pos_u, emb_pos_v,
                                                      False)

            ## Negative
            emb_neg_u = emb_pos_u.repeat((self.negative, 1))

            if bs < self.batch_size:
                index_emb_negu, index_emb_negv = init_emb2neg_index(
                    self.negative, bs)
                index_emb_negu = index_emb_negu.to(self.device)
                index_emb_negv = index_emb_negv.to(self.device)
            else:
                index_emb_negu = self.index_emb_negu
                index_emb_negv = self.index_emb_negv

            if neg_nodes is None:
                emb_neg_v = torch.index_select(emb_v, 0, index_emb_negv)
            else:
                emb_neg_v = self.snd_v_embeddings.weight[neg_nodes].to(
                    self.device)

            grad_u_neg, grad_v_neg = self.fast_neg_bp(emb_neg_u, emb_neg_v,
                                                      False)

            ## Update
            grad_u_pos.index_add_(0, index_emb_negu, grad_u_neg)
            grad_u = grad_u_pos
            if neg_nodes is None:
                grad_v_pos.index_add_(0, index_emb_negv, grad_v_neg)
                grad_v = grad_v_pos
            else:
                grad_v = grad_v_pos

            # use adam optimizer
            grad_u = adam(grad_u, self.snd_state_sum_u, nodes[:, 0], lr,
                          self.device, self.only_gpu)
            grad_v = adam(grad_v, self.snd_state_sum_v, nodes[:, 1], lr,
                          self.device, self.only_gpu)
            if neg_nodes is not None:
                grad_v_neg = adam(grad_v_neg, self.snd_state_sum_v, neg_nodes,
                                  lr, self.device, self.only_gpu)

            if self.mixed_train:
                grad_u = grad_u.cpu()
                grad_v = grad_v.cpu()
                if neg_nodes is not None:
                    grad_v_neg = grad_v_neg.cpu()
                else:
                    grad_v_neg = None

                if self.async_update:
                    grad_u.share_memory_()
                    grad_v.share_memory_()
                    nodes.share_memory_()
                    if neg_nodes is not None:
                        neg_nodes.share_memory_()
                        grad_v_neg.share_memory_()
                    self.async_q.put(
                        (grad_u, grad_v, grad_v_neg, nodes, neg_nodes, False))

            if not self.async_update:
                self.snd_u_embeddings.weight.data.index_add_(
                    0, nodes[:, 0], grad_u)
                self.snd_v_embeddings.weight.data.index_add_(
                    0, nodes[:, 1], grad_v)
                if neg_nodes is not None:
                    self.snd_v_embeddings.weight.data.index_add_(
                        0, neg_nodes, grad_v_neg)

        return

    def get_embedding(self):
        if self.fst:
            embedding_fst = self.fst_u_embeddings.weight.cpu().data.numpy()
            embedding_fst /= np.sqrt(np.sum(embedding_fst * embedding_fst,
                                            1)).reshape(-1, 1)
        if self.snd:
            embedding_snd = self.snd_u_embeddings.weight.cpu().data.numpy()
            embedding_snd /= np.sqrt(np.sum(embedding_snd * embedding_snd,
                                            1)).reshape(-1, 1)
        if self.fst and self.snd:
            embedding = np.concatenate((embedding_fst, embedding_snd), 1)
            embedding /= np.sqrt(np.sum(embedding * embedding,
                                        1)).reshape(-1, 1)
        elif self.fst and not self.snd:
            embedding = embedding_fst
        elif self.snd and not self.fst:
            embedding = embedding_snd
        else:
            pass

        return embedding

    def save_embedding(self, dataset, file_name):
        """ Write embedding to local file. Only used when node ids are numbers.

        Parameter
        ---------
        dataset DeepwalkDataset : the dataset
        file_name str : the file name
        """
        embedding = self.get_embedding()
        np.save(file_name, embedding)

    def save_embedding_pt(self, dataset, file_name):
        """ For ogb leaderboard. """
        embedding = torch.Tensor(self.get_embedding()).cpu()
        embedding_empty = torch.zeros_like(embedding.data)
        valid_nodes = torch.LongTensor(dataset.valid_nodes)
        valid_embedding = embedding.data.index_select(0, valid_nodes)
        embedding_empty.index_add_(0, valid_nodes, valid_embedding)

        torch.save(embedding_empty, file_name)
예제 #9
0
def evaluate(config, directories, seeds_per_thread=5, repeats=1, model_workers=None, average_weights=False):
    models = []

    if model_workers is None:
        model_workers = config['training']['num_threads_model_workers']
    threads = config['training']['num_threads_exploring_virtual'] + config['training']['num_threads_exploiting_virtual']

    for model_directory in directories:
        models.append(load_model(model_directory))

    if average_weights:
        average_model = create_model(config['model'])
        average_model.train()
        average_model.to(torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'))
        #print("start averaging")
        average_update(average_model.actor, [model.actor for model in models])
        average_model = AverageModel([average_model], config, repeats)
    else:
        average_model = AverageModel(models, config, repeats)

    processes = []
    results = Queue()
    observation_queue = Queue()
    action_queue = Queue()
    observation_conns = [mp.Pipe(duplex=False) for _ in
                         range(threads)]
    action_conns = [mp.Pipe(duplex=False) for _ in
                    range(threads)]
    try:
        for p_id in range(model_workers):
            p = mp.Process(
                target=client_model_worker,
                args=(average_model,
                      observation_queue, action_queue)
            )
            p.start()
            processes.append(p)

        in_observation_conns = _get_in_connections(observation_conns)
        out_observation_conns = _get_out_connections(observation_conns)

        p = mp.Process(
            target=client_observation_worker,
            args=(in_observation_conns, observation_queue)
        )
        p.start()
        processes.append(p)

        in_action_conns = _get_in_connections(action_conns)
        out_action_conns = _get_out_connections(action_conns)

        p = mp.Process(
            target=client_action_worker,
            args=(out_action_conns, action_queue)
        )
        p.start()
        processes.append(p)

        for p_id in range(threads):
            p = mp.Process(
                target=evaluate_single_thread,
                args=(
                    p_id,
                    RemoteModel(in_action_conns[p_id], out_observation_conns[p_id]),
                    config,
                    seeds_per_thread,
                    results
                )
            )
            p.start()
            processes.append(p)

        rewards_total = []
        rewards_without_falling = []
        modified_rewards_total = []
        step_counts_total = []
        infos_total = []
        for _ in range(threads):
            rewards, modified_rewards, step_counts, infos = results.get()
            rewards_total += rewards
            modified_rewards_total += modified_rewards
            step_counts_total += step_counts
            infos_total += infos
            for r, s in zip(rewards, step_counts):
                if s > 999:
                    rewards_without_falling.append(r)
    finally:
        for p in processes:
            p.terminate()
    return rewards_total, modified_rewards_total, step_counts_total, infos_total, rewards_without_falling
class QIterable(Iterable[Instance]):
    """
    You can't set attributes on Iterators, so this is just a dumb wrapper
    that exposes the output_queue.
    """
    def __init__(self, output_queue_size, epochs_per_read, num_workers, reader,
                 file_path) -> None:
        self.output_queue = Queue(output_queue_size)
        self.epochs_per_read = epochs_per_read
        self.num_workers = num_workers
        self.reader = reader
        self.file_path = file_path

        # Initialized in start.
        self.input_queue: Optional[Queue] = None
        self.processes: List[Process] = []
        # The num_active_workers and num_inflight_items counts in conjunction
        # determine whether there could be any outstanding instances.
        self.num_active_workers: Optional[Value] = None
        self.num_inflight_items: Optional[Value] = None

    def __iter__(self) -> Iterator[Instance]:
        self.start()

        # Keep going as long as not all the workers have finished or there are items in flight.
        while self.num_active_workers.value > 0 or self.num_inflight_items.value > 0:
            # Inner loop to minimize locking on self.num_active_workers.
            while True:
                try:
                    # Non-blocking to handle the empty-queue case.
                    yield self.output_queue.get(block=False, timeout=1.0)
                    with self.num_inflight_items.get_lock():
                        self.num_inflight_items.value -= 1
                except Empty:
                    # The queue could be empty because the workers are
                    # all finished or because they're busy processing.
                    # The outer loop distinguishes between these two
                    # cases.
                    break

        self.join()

    def start(self) -> None:
        shards = glob.glob(self.file_path)
        # Ensure a consistent order before shuffling for testing.
        shards.sort()
        num_shards = len(shards)

        # If we want multiple epochs per read, put shards in the queue multiple times.
        self.input_queue = Queue(num_shards * self.epochs_per_read +
                                 self.num_workers)
        for _ in range(self.epochs_per_read):
            np.random.shuffle(shards)
            for shard in shards:
                self.input_queue.put(shard)

        # Then put a None per worker to signify no more files.
        for _ in range(self.num_workers):
            self.input_queue.put(None)

        assert (
            not self.processes
        ), "Process list non-empty! You must call QIterable.join() before restarting."
        self.num_active_workers = Value("i", self.num_workers)
        self.num_inflight_items = Value("i", 0)
        for worker_id in range(self.num_workers):
            process = Process(
                target=_worker,
                args=(
                    self.reader,
                    self.input_queue,
                    self.output_queue,
                    self.num_active_workers,
                    self.num_inflight_items,
                    worker_id,
                ),
            )
            logger.info(f"starting worker {worker_id}")
            process.start()
            self.processes.append(process)

    def join(self) -> None:
        for process in self.processes:
            process.join()
        self.processes.clear()

    def __del__(self) -> None:
        """
        Terminate processes if the user hasn't joined. This is necessary as
        leaving stray processes running can corrupt shared state. In brief,
        we've observed shared memory counters being reused (when the memory was
        free from the perspective of the parent process) while the stray
        workers still held a reference to them.

        For a discussion of using destructors in Python in this manner, see
        https://eli.thegreenplace.net/2009/06/12/safely-using-destructors-in-python/.
        """
        for process in self.processes:
            process.terminate()
def train_controller(current_time):
    """
    Train the controllers by using the CMA-ES algorithm to improve candidature solutions
    by testing them in parallel using multiprocessing
    """

    current_time = str(current_time)
    number_generations = 1
    games = GAMES
    levels = LEVELS
    current_game = False
    result_queue = Queue()

    vae, lstm, best_controller, solver, checkpoint = init_models(
        current_time,
        sequence=1,
        load_vae=True,
        load_controller=True,
        load_lstm=True)
    if checkpoint:
        current_ctrl_version = checkpoint["version"]
        current_solver_version = checkpoint["solver_version"]
        new_results = solver.result()
        current_best = new_results[1]
    else:
        current_ctrl_version = 1
        current_solver_version = 1
        current_best = 0

    while True:
        solutions = solver.ask()
        fitlist = np.zeros(POPULATION)
        eval_left = 0

        ## Once a level is beaten, remove it from the training set of levels
        if current_best > SCORE_CAP or not current_game:
            if not current_game or len(levels[current_game]) == 0:
                current_game = games[0]
                games.remove(current_game)
                current_best = 0
            current_level = np.random.choice(levels[current_game])
            levels[current_game].remove(current_level)

        print("[CONTROLLER] Current game: %s and level is: %s" %
              (current_game, current_level))
        while eval_left < POPULATION:
            jobs = []
            todo = PARALLEL if eval_left + PARALLEL <= POPULATION else (
                eval_left + PARALLEL) % POPULATION

            ## Create the child processes to evaluate in parallel
            print("[CONTROLLER] Starting new batch")
            for job in range(todo):
                process_id = eval_left + job

                ## Assign new weights to the controller, given by the CMA
                controller = Controller(PARAMS_CONTROLLER,
                                        ACTION_SPACE).to(DEVICE)
                init_controller(controller, solutions[process_id])

                ## Start the evaluation
                new_game = VAECGame(process_id, vae, lstm, controller,
                                    current_game, current_level, result_queue)
                new_game.start()
                jobs.append(new_game)

            ## Wait for the evaluation to be completed
            for p in jobs:
                p.join()

            eval_left = eval_left + todo
            print("[CONTROLLER] Done with batch")

        ## Get the results back from the processes
        times = create_results(result_queue, fitlist)

        ## For display
        current_score = np.max(fitlist)
        average_score = np.mean(fitlist)

        ## Update solver with results
        max_idx = np.argmax(fitlist)
        fitlist = rankmin(fitlist)
        solver.tell(fitlist)
        new_results = solver.result()

        ## Display
        print("[CONTROLLER] Total duration for generation: %.3f seconds, average duration:"
            " %.3f seconds per process, %.3f seconds per run" % ((np.sum(times), \
                    np.mean(times), np.mean(times) / REPEAT_ROLLOUT)))
        print("[CONTROLLER] Creating generation: {} ...".format(
            number_generations + 1))
        print("[CONTROLLER] Current best score: {}, new run best score: {}".
              format(current_best, current_score))
        print(
            "[CONTROLLER] Best score ever: {}, current number of improvements: {}"
            .format(current_best, current_ctrl_version))
        print(
            "[CONTROLLER] Average score on all of the processes: {}\n".format(
                average_score))

        ## Save the new best controller
        if current_score > current_best:
            init_controller(best_controller, solutions[max_idx])
            state = {
                'version': current_ctrl_version,
                'solver_version': current_solver_version,
                'score': current_score,
                'level': current_level,
                'game': current_game,
                'generation': number_generations
            }
            save_checkpoint(best_controller, "controller", state, current_time)
            current_ctrl_version += 1
            current_best = current_score

        ## Save solver and change level to a random one
        if number_generations % SAVE_SOLVER_TICK == 0:
            dir_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), \
                        'saved_models', current_time, "{}-solver.pkl".format(current_solver_version))
            pickle.dump(solver, open(dir_path, 'wb'))
            current_solver_version += 1
            current_level = np.random.choice(levels[current_game])

        number_generations += 1
예제 #12
0
    frames = torch.cat(frames, dim=0).cuda()
    H, W = frames.size()[2:]
    frames = F.interpolate(frames,
                           size=(768, 768),
                           mode='bilinear',
                           align_corners=False)  # must be divisible by 32
    out = net(frames)[0]
    out = F.interpolate(out, size=(H, W), mode='bilinear',
                        align_corners=False).argmax(dim=1).detach().cpu()
    out_q.put(out)


if __name__ == '__main__':
    torch.multiprocessing.set_start_method('spawn')

    in_q = Queue(1024)
    out_q = Queue(1024)

    in_worker = Process(target=get_func, args=(args.input, in_q))
    out_worker = Process(target=save_func,
                         args=(args.input, args.output, out_q))

    in_worker.start()
    out_worker.start()

    net = get_model()

    frames = []
    while True:
        frame = in_q.get()
        if frame == 'quit': break
예제 #13
0
class KGEmbedding:
    """Sparse Embedding for Knowledge Graph
    It is used to store both entity embeddings and relation embeddings.

    Parameters
    ----------
    num : int
        Number of embeddings.
    dim : int
        Embedding dimention size.
    device : th.device
        Device to store the embedding.
    """
    def __init__(self, device):
        self.emb = None
        self.is_train = False
        self.async_q = None
        self.device = device

    def init(self, emb_init, lr, async_threads, num=-1, dim=-1, init_strat='uniform', optimizer='Adagrad', device=None):
        """Initializing the embeddings for training.

        Parameters
        ----------
        emb_init : float or tuple
            The intial embedding range should be [-emb_init, emb_init].
        """
        self.async_threads = async_threads
        if device is not None:
            self.device = device
        if self.emb is None:
            self.emb = th.empty(num, dim, dtype=th.float32, device=self.device)
            self.num = self.emb.shape[0]
            self.dim = self.emb.shape[1]
        if optimizer == 'Adagrad':
            self.optim = Adagrad(self.emb, device=self.device, lr=lr)
        elif optimizer == 'Adam':
            self.optim = Adam(self.emb, device=self.device, lr=lr)
        else:
            raise NotImplementedError(f'optimizer {optimizer} is not supported by dglke yet.')

        self.trace = []
        self.has_cross_rel = False

        if init_strat == 'uniform':
            INIT.uniform_(self.emb, -emb_init, emb_init)
        elif init_strat == 'normal':
            if type(emb_init) is tuple or type(emb_init) is list:
                if len(emb_init) == 0:
                    mean = emb_init
                    std = 1
                else:
                    mean, std = emb_init
                INIT.normal_(self.emb.data, mean, std)
            else:
                init_size = emb_init
                INIT.normal_(self.emb.data)
                self.emb.data *= init_size
        elif init_strat == 'random':
            if type(emb_init) is tuple:
                x, y = emb_init
                self.emb.data = th.rand(num, dim, dtype=th.float32, device=self.device) * x + y
        elif init_strat == 'xavier':
            INIT.xavier_normal_(self.emb.data)
        elif init_strat == 'constant':
            INIT.constant_(self.emb.data, emb_init)

    def clone(self, device):
        clone_emb = copy.deepcopy(self)
        clone_emb.device = device
        clone_emb.emb = clone_emb.emb.to(device)
        clone_emb.optim = clone_emb.optim.to(device)
        return clone_emb

    def load(self, path, name):
        """Load embeddings.

        Parameters
        ----------
        path : str
            Directory to load the embedding.
        name : str
            Embedding name.
        """
        file_name = os.path.join(path, name)
        self.emb = th.Tensor(np.load(file_name))

    def load_emb(self, emb_array):
        """Load embeddings from numpy array.

        Parameters
        ----------
        emb_array : numpy.array  or torch.tensor
            Embedding array in numpy array or torch.tensor
        """
        if isinstance(emb_array, np.ndarray):
            self.emb = th.Tensor(emb_array)
        else:
            self.emb = emb_array

    def save(self, path, name):
        """Save embeddings.

        Parameters
        ----------
        path : str
            Directory to save the embedding.
        name : str
            Embedding name.
        """
        file_name = os.path.join(path, name)
        np.save(file_name, self.emb.cpu().detach().numpy())

    def train(self):
        self.is_train = True

    def eval(self):
        self.is_train = False

    def setup_cross_rels(self, cross_rels, global_emb):
        cpu_bitmap = th.zeros((self.num,), dtype=th.bool)
        for i, rel in enumerate(cross_rels):
            cpu_bitmap[rel] = 1
        self.cpu_bitmap = cpu_bitmap
        self.has_cross_rel = True
        self.global_emb = global_emb

    def get_noncross_idx(self, idx):
        cpu_mask = self.cpu_bitmap[idx]
        gpu_mask = ~cpu_mask
        return idx[gpu_mask]

    def share_memory(self):
        """Use torch.tensor.share_memory_() to allow cross process tensor access
        """
        self.emb.share_memory_()
        self.optim.share_memory()

    def __call__(self, idx, gpu_id=-1, trace=True):
        """ Return sliced tensor.

        Parameters
        ----------
        idx : th.tensor
            Slicing index
        gpu_id : int
            Which gpu to put sliced data in.
        trace : bool
            If True, trace the computation. This is required in training.
            If False, do not trace the computation.
            Default: True
        """
        # for inference or evaluation
        if self.is_train is False:
            return self.emb[idx].cuda(gpu_id, non_blocking=True)

        if self.has_cross_rel:
            cpu_idx = idx.cpu()
            cpu_mask = self.cpu_bitmap[cpu_idx]
            cpu_idx = cpu_idx[cpu_mask]
            cpu_idx = th.unique(cpu_idx)
            if cpu_idx.shape[0] != 0:
                cpu_emb = self.global_emb.emb[cpu_idx]
                self.emb[cpu_idx] = cpu_emb.cuda(gpu_id, non_blocking=True)
        s = self.emb[idx]
        if gpu_id >= 0:
            s = s.cuda(gpu_id, non_blocking=True)
        # During the training, we need to trace the computation.
        # In this case, we need to record the computation path and compute the gradients.
        if trace:
            data = s.clone().detach().requires_grad_(True)
            self.trace.append((idx, data))
        else:
            data = s
        return data

    def update(self, gpu_id=-1):
        """ Update embeddings in a sparse manner
        Sparse embeddings are updated in mini batches. we maintains gradient states for
        each embedding so they can be updated separately.

        Parameters
        ----------
        gpu_id : int
            Which gpu to accelerate the calculation. if -1 is provided, cpu is used.
        """
        with th.no_grad():
            for idx, data in self.trace:
                grad = data.grad.data
                # the update is non-linear so indices must be unique
                grad_indices = idx
                grad_values = grad
                if self.async_q is not None:
                    grad_indices.share_memory_()
                    grad_values.share_memory_()
                    self.async_q.put((grad_indices, grad_values, gpu_id))
                else:
                    if self.has_cross_rel:
                        cpu_mask = self.cpu_bitmap[grad_indices]
                        cpu_idx = grad_indices[cpu_mask]
                        if cpu_idx.shape[0] > 0:
                            cpu_grad = grad_values[cpu_mask]
                            self.global_emb.optim.step(cpu_idx, self.global_emb.emb, cpu_grad, gpu_id)
                    self.optim.step(grad_indices, self.emb, grad_values, gpu_id)
        self.trace = []

    def create_async_update(self):
        """Set up the async update subprocess.
        """
        self.async_q = Queue(1)
        self.async_p = mp.Process(target=self.async_update)
        self.async_p.start()

    def finish_async_update(self):
        """Notify the async update subprocess to quit.
        """
        self.async_q.put((None, None, None))
        self.async_p.join()

    def async_update(self):
        th.set_num_threads(self.async_threads)
        while True:
            (grad_indices, grad_values, gpu_id) = self.async_q.get()
            if grad_indices is None:
                return
            with th.no_grad():
                if self.has_cross_rel:
                    cpu_mask = self.cpu_bitmap[grad_indices]
                    cpu_idx = grad_indices[cpu_mask]
                    if cpu_idx.shape[0] > 0:
                        cpu_grad = grad_values[cpu_mask]
                        self.global_emb.optim.step(cpu_idx, self.global_emb.emb, cpu_grad, gpu_id)
                self.optim.step(grad_indices, self.emb, grad_values, gpu_id)


    def curr_emb(self):
        """Return embeddings in trace.
        """
        data = [data for _, data in self.trace]
        return th.cat(data, 0)
예제 #14
0
 def create_async_update(self):
     """Set up the async update subprocess.
     """
     self.async_q = Queue(1)
     self.async_p = mp.Process(target=self.async_update)
     self.async_p.start()
예제 #15
0
if __name__ == '__main__':

    opt = TestOptions().parse()

    data_info = data.dataset_info()
    datanum = data_info.get_dataset(opt)[0]
    folderlevel = data_info.folder_level[datanum]

    dataloaders = data.create_dataloader_test(opt)

    visualizer = Visualizer(opt)
    iter_counter = IterationCounter(opt,
                                    len(dataloaders[0]) * opt.render_thread)
    # create a webpage that summarizes the all results

    testing_queue = Queue(10)

    ngpus = opt.device_count

    render_gpu_ids = list(range(ngpus - opt.render_thread, ngpus))
    render_layer_list = []
    for gpu in render_gpu_ids:
        opt.gpu_ids = gpu
        render_layer = TestRender(opt)
        render_layer_list.append(render_layer)

    opt.gpu_ids = list(range(0, ngpus - opt.render_thread))
    print('Testing gpu ', opt.gpu_ids)
    if opt.names is None:
        model = TestModel(opt)
        model.eval()
예제 #16
0
def call_mods(args):
    print("[main]call_mods starts..")
    start = time.time()

    model_path = os.path.abspath(args.model_path)
    if not os.path.exists(model_path):
        raise ValueError("--model_path is not set right!")
    input_path = os.path.abspath(args.input_path)
    if not os.path.exists(input_path):
        raise ValueError("--input_path does not exist!")
    success_file = input_path.rstrip("/") + "." + str(
        uuid.uuid1()) + ".success"
    if os.path.exists(success_file):
        os.remove(success_file)

    if os.path.isdir(input_path):
        motif_seqs, chrom2len, fast5s_q, len_fast5s, positions = _extract_preprocess(
            input_path, str2bool(args.recursively), args.motifs,
            str2bool(args.is_dna), args.reference_path, args.f5_batch_size,
            args.positions)
        if use_cuda:
            _call_mods_from_fast5s_gpu(motif_seqs, chrom2len, fast5s_q,
                                       len_fast5s, positions, model_path,
                                       success_file, args)
        else:
            _call_mods_from_fast5s_cpu2(motif_seqs, chrom2len, fast5s_q,
                                        len_fast5s, positions, model_path,
                                        success_file, args)
    else:
        # features_batch_q = mp.Queue()
        features_batch_q = Queue()
        p_rf = mp.Process(target=_read_features_file,
                          args=(input_path, features_batch_q, args.batch_size))
        p_rf.daemon = True
        p_rf.start()

        # pred_str_q = mp.Queue()
        pred_str_q = Queue()

        predstr_procs = []

        if use_cuda:
            nproc_dp = args.nproc_gpu
            if nproc_dp < 1:
                nproc_dp = 1
        else:
            nproc = args.nproc
            if nproc < 3:
                print("--nproc must be >= 3!!")
                nproc = 3
            nproc_dp = nproc - 2
            if nproc_dp > nproc_to_call_mods_in_cpu_mode:
                nproc_dp = nproc_to_call_mods_in_cpu_mode

        for _ in range(nproc_dp):
            p = mp.Process(target=_call_mods_q,
                           args=(model_path, features_batch_q, pred_str_q,
                                 success_file, args))
            p.daemon = True
            p.start()
            predstr_procs.append(p)

        # print("write_process started..")
        p_w = mp.Process(target=_write_predstr_to_file,
                         args=(args.result_file, pred_str_q))
        p_w.daemon = True
        p_w.start()

        for p in predstr_procs:
            p.join()

        # print("finishing the write_process..")
        pred_str_q.put("kill")

        p_rf.join()

        p_w.join()

    if os.path.exists(success_file):
        os.remove(success_file)
    print("[main]call_mods costs %.2f seconds.." % (time.time() - start))
def main(method):
    args = built_parser(method=method)
    env = gym.make(args.env_name)
    state_dim = env.observation_space.shape
    action_dim = env.action_space.shape[0]

    args.state_dim = state_dim
    args.action_dim = action_dim
    action_high = env.action_space.high
    action_low = env.action_space.low
    args.action_high = action_high.tolist()
    args.action_low = action_low.tolist()
    args.seed = np.random.randint(0, 30)
    args.init_time = time.time()

    if args.alpha == 'auto' and args.target_entropy == 'auto':
        delta_a = np.array(args.action_high, dtype=np.float32) - np.array(
            args.action_low, dtype=np.float32)
        args.target_entropy = -1 * args.action_dim + sum(np.log(delta_a / 2))

    Q_net1 = QNet(args)
    Q_net1.train()
    Q_net1.share_memory()
    Q_net1_target = QNet(args)
    Q_net1_target.train()
    Q_net1_target.share_memory()
    Q_net2 = QNet(args)
    Q_net2.train()
    Q_net2.share_memory()
    Q_net2_target = QNet(args)
    Q_net2_target.train()
    Q_net2_target.share_memory()
    actor1 = PolicyNet(args)
    if args.code_model == "eval":
        actor1.load_state_dict(
            torch.load('./' + args.env_name + '/method_' + str(args.method) +
                       '/model/policy_' + str(args.max_train) + '.pkl'))
    actor1.train()
    actor1.share_memory()
    actor1_target = PolicyNet(args)
    actor1_target.train()
    actor1_target.share_memory()
    actor2 = PolicyNet(args)
    actor2.train()
    actor2.share_memory()
    actor2_target = PolicyNet(args)
    actor2_target.train()
    actor2_target.share_memory()

    Q_net1_target.load_state_dict(Q_net1.state_dict())
    Q_net2_target.load_state_dict(Q_net2.state_dict())
    actor1_target.load_state_dict(actor1.state_dict())
    actor2_target.load_state_dict(actor2.state_dict())

    Q_net1_optimizer = my_optim.SharedAdam(Q_net1.parameters(),
                                           lr=args.critic_lr)
    Q_net1_optimizer.share_memory()
    Q_net2_optimizer = my_optim.SharedAdam(Q_net2.parameters(),
                                           lr=args.critic_lr)
    Q_net2_optimizer.share_memory()
    actor1_optimizer = my_optim.SharedAdam(actor1.parameters(),
                                           lr=args.actor_lr)
    actor1_optimizer.share_memory()
    actor2_optimizer = my_optim.SharedAdam(actor2.parameters(),
                                           lr=args.actor_lr)
    actor2_optimizer.share_memory()
    log_alpha = torch.zeros(1, dtype=torch.float32, requires_grad=True)
    log_alpha.share_memory_()
    alpha_optimizer = my_optim.SharedAdam([log_alpha], lr=args.alpha_lr)
    alpha_optimizer.share_memory()

    share_net = [
        Q_net1, Q_net1_target, Q_net2, Q_net2_target, actor1, actor1_target,
        actor2, actor2_target, log_alpha
    ]
    share_optimizer = [
        Q_net1_optimizer, Q_net2_optimizer, actor1_optimizer, actor2_optimizer,
        alpha_optimizer
    ]

    experience_in_queue = []
    experience_out_queue = []
    for i in range(args.num_buffers):
        experience_in_queue.append(Queue(maxsize=10))
        experience_out_queue.append(Queue(maxsize=10))
    shared_queue = [experience_in_queue, experience_out_queue]
    step_counter = mp.Value('i', 0)
    stop_sign = mp.Value('i', 0)
    iteration_counter = mp.Value('i', 0)
    shared_value = [step_counter, stop_sign, iteration_counter]
    lock = mp.Lock()
    procs = []
    if args.code_model == "train":
        for i in range(args.num_actors):
            procs.append(
                Process(target=actor_agent,
                        args=(args, shared_queue, shared_value,
                              [actor1, Q_net1], lock, i)))
        for i in range(args.num_buffers):
            procs.append(
                Process(target=buffer,
                        args=(args, shared_queue, shared_value, i)))
        procs.append(
            Process(target=evaluate_agent,
                    args=(args, shared_value, share_net)))
        for i in range(args.num_learners):
            #device = torch.device("cuda")
            device = torch.device("cpu")
            procs.append(
                Process(target=leaner_agent,
                        args=(args, shared_queue, shared_value, share_net,
                              share_optimizer, device, lock, i)))
    elif args.code_model == "simu":
        procs.append(Process(target=simu_agent, args=(args, shared_value)))

    for p in procs:
        p.start()
    for p in procs:
        p.join()
예제 #18
0
class Base(object):
    def __init__(self):
        self.epoch = 0
        self.iteration = 0
        self.offset = 0

        # for multiprocessing
        self._epoch = 0

        # Setting for multiprocessing
        self.preloading_process = None
        self.queue = Queue()
        self.queue_size = 0

    def count_vocab_size(self, dict_path):
        vocab_count = 1  # for <blank>
        with codecs.open(dict_path, 'r', 'utf-8') as f:
            for line in f:
                if line.strip() != '':
                    vocab_count += 1
        return vocab_count

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        raise NotImplementedError()

    def __iter__(self):
        """Returns self."""
        return self

    @property
    def epoch_detail(self):
        # Floating point version of epoch
        return self.epoch + (self.offset / len(self))

    def __next__(self, batch_size=None):
        """Generate each mini-batch.

        Args:
            batch_size (int): the size of mini-batch
        Returns:
            batch (tuple):
            is_new_epoch (bool): If true, 1 epoch is finished

        """
        if batch_size is None:
            batch_size = self.batch_size

        if self.nques is None:
            if self.max_epoch is not None and self.epoch >= self.max_epoch:
                raise StopIteration()
            # NOTE: max_epoch == None means infinite loop

            data_indices, is_new_epoch = self.sample_index(batch_size)
            batch = self.make_batch(data_indices)
            self.iteration += len(data_indices)
        else:
            # Clean up multiprocessing
            if self.preloading_process is not None and self.queue_size == 0:
                self.preloading_process.terminate()
                self.preloading_process.join()

            if self.max_epoch is not None and self.epoch >= self.max_epoch:
                # Clean up multiprocessing
                self.preloading_process.terminate()
                self.preloading_process.join()
                raise StopIteration()
            # NOTE: max_epoch == None means infinite loop

            # Enqueue mini-batches
            if self.queue_size == 0:
                self.data_indices_list = []
                self.is_new_epoch_list = []
                for _ in six.moves.range(self.nques):
                    data_indices, is_new_epoch = self.sample_index(batch_size)
                    self.data_indices_list.append(data_indices)
                    self.is_new_epoch_list.append(is_new_epoch)
                self.preloading_process = Process(
                    self.preloading_loop,
                    args=(self.queue, self.data_indices_list))
                self.preloading_process.start()
                self.queue_size += self.nques
                time.sleep(3)

            # print(self.queue.qsize())
            # print(self.queue_size)

            self.iteration += len(self.data_indices_list[self.nques -
                                                         self.queue_size])
            self.queue_size -= 1
            batch = self.queue.get()
            is_new_epoch = self.is_new_epoch_list.pop(0)

        if is_new_epoch:
            self.epoch += 1

        return batch, is_new_epoch

    def next(self, batch_size=None):
        # For python2
        return self.__next__(batch_size)

    def sample_index(self, batch_size):
        """Sample data indices of mini-batch.

        Args:
            batch_size (int): the size of mini-batch
        Returns:
            data_indices (np.ndarray):
            is_new_epoch (bool):

        """
        is_new_epoch = False

        if self.sort_by_input_length or not self.shuffle:
            if self.sort_by_input_length:
                # Change batch size dynamically
                min_num_frames_batch = self.df[self.offset:self.offset +
                                               1]['x_len'].values[0]
                _batch_size = self.select_batch_size(batch_size,
                                                     min_num_frames_batch)
            else:
                _batch_size = batch_size

            if len(self.rest) > _batch_size:
                data_indices = list(self.df[self.offset:self.offset +
                                            _batch_size].index)
                self.rest -= set(data_indices)
                # NOTE: rest is in uttrance length order when sort_by_input_length == True
                # NOTE: otherwise in name length order when shuffle == False
                self.offset += len(data_indices)
            else:
                # Last mini-batch
                data_indices = list(self.df[self.offset:self.offset +
                                            len(self.rest)].index)
                self._reset()
                is_new_epoch = True
                self._epoch += 1
                if self._epoch == self.sort_stop_epoch:
                    self.sort_by_input_length = False
                    self.shuffle = True

            # Sort in the descending order for pytorch
            data_indices = data_indices[::-1]
        else:
            # Randomly sample uttrances
            if len(self.rest) > batch_size:
                data_indices = random.sample(list(self.rest), batch_size)
                self.rest -= set(data_indices)
            else:
                # Last mini-batch
                data_indices = list(self.rest)
                self._reset()
                is_new_epoch = True
                self._epoch += 1

            self.offset += len(data_indices)

        return data_indices, is_new_epoch

    def select_batch_size(self, batch_size, min_num_frames_batch):
        if not self.dynamic_batching:
            return batch_size

        if min_num_frames_batch <= 800:
            pass
        elif min_num_frames_batch <= 1600:
            batch_size = int(batch_size / 2)
        else:
            batch_size = int(batch_size / 4)

        if batch_size < 1:
            batch_size = 1

        return batch_size

    def reset(self):
        self._reset()

        self.queue = Queue()
        self.queue_size = 0

        # Clean up multiprocessing
        if self.preloading_process is not None:
            self.preloading_process.terminate()
            self.preloading_process.join()

    def _reset(self):
        """Reset data counter and offset."""
        self.rest = set(list(self.df.index))
        self.offset = 0

    def preloading_loop(self, queue, data_indices_list):
        """.

        Args:
            queue ():
            data_indices_list (np.ndarray):

        """
        # print("Pre-loading started.")
        for i in six.moves.range(len(data_indices_list)):
            queue.put(self.make_batch(data_indices_list[i]))
예제 #19
0
파일: train.py 프로젝트: zhu-del/LSTR
def train(training_dbs, validation_db, start_iter=0, freeze=False):
    learning_rate = system_configs.learning_rate
    max_iteration = system_configs.max_iter
    pretrained_model = system_configs.pretrain
    snapshot = system_configs.snapshot
    val_iter = system_configs.val_iter
    display = system_configs.display
    decay_rate = system_configs.decay_rate
    stepsize = system_configs.stepsize
    batch_size = system_configs.batch_size

    # getting the size of each database
    training_size = len(training_dbs[0].db_inds)
    validation_size = len(validation_db.db_inds)

    # queues storing data for training
    training_queue = Queue(system_configs.prefetch_size)  # 5
    validation_queue = Queue(5)

    # queues storing pinned data for training
    pinned_training_queue = queue.Queue(system_configs.prefetch_size)  # 5
    pinned_validation_queue = queue.Queue(5)

    # load data sampling function
    data_file = "sample.{}".format(training_dbs[0].data)  # "sample.coco"
    sample_data = importlib.import_module(data_file).sample_data
    # print(type(sample_data)) # function

    # allocating resources for parallel reading
    training_tasks = init_parallel_jobs(training_dbs, training_queue,
                                        sample_data)
    if val_iter:
        validation_tasks = init_parallel_jobs([validation_db],
                                              validation_queue, sample_data)

    training_pin_semaphore = threading.Semaphore()
    validation_pin_semaphore = threading.Semaphore()
    training_pin_semaphore.acquire()
    validation_pin_semaphore.acquire()

    training_pin_args = (training_queue, pinned_training_queue,
                         training_pin_semaphore)
    training_pin_thread = threading.Thread(target=pin_memory,
                                           args=training_pin_args)
    training_pin_thread.daemon = True
    training_pin_thread.start()

    validation_pin_args = (validation_queue, pinned_validation_queue,
                           validation_pin_semaphore)
    validation_pin_thread = threading.Thread(target=pin_memory,
                                             args=validation_pin_args)
    validation_pin_thread.daemon = True
    validation_pin_thread.start()

    print("building model...")
    nnet = NetworkFactory(flag=True)

    if pretrained_model is not None:
        if not os.path.exists(pretrained_model):
            raise ValueError("pretrained model does not exist")
        print("loading from pretrained model")
        nnet.load_pretrained_params(pretrained_model)

    if start_iter:
        learning_rate /= (decay_rate**(start_iter // stepsize))

        nnet.load_params(start_iter)
        nnet.set_lr(learning_rate)
        print("training starts from iteration {} with learning_rate {}".format(
            start_iter + 1, learning_rate))
    else:
        nnet.set_lr(learning_rate)

    print("training start...")
    nnet.cuda()
    nnet.train_mode()
    header = None
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter(
        'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    metric_logger.add_meter(
        'class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}'))

    with stdout_to_tqdm() as save_stdout:
        for iteration in metric_logger.log_every(tqdm(range(
                start_iter + 1, max_iteration + 1),
                                                      file=save_stdout,
                                                      ncols=67),
                                                 print_freq=10,
                                                 header=header):

            training = pinned_training_queue.get(block=True)
            viz_split = 'train'
            save = True if (display and iteration % display == 0) else False
            (set_loss, loss_dict) \
                = nnet.train(iteration, save, viz_split, **training)
            (loss_dict_reduced, loss_dict_reduced_unscaled,
             loss_dict_reduced_scaled, loss_value) = loss_dict
            metric_logger.update(loss=loss_value,
                                 **loss_dict_reduced_scaled,
                                 **loss_dict_reduced_unscaled)
            metric_logger.update(class_error=loss_dict_reduced['class_error'])
            metric_logger.update(lr=learning_rate)

            del set_loss

            if val_iter and validation_db.db_inds.size and iteration % val_iter == 0:
                nnet.eval_mode()
                viz_split = 'val'
                save = True
                validation = pinned_validation_queue.get(block=True)
                (val_set_loss, val_loss_dict) \
                    = nnet.validate(iteration, save, viz_split, **validation)
                (loss_dict_reduced, loss_dict_reduced_unscaled,
                 loss_dict_reduced_scaled, loss_value) = val_loss_dict
                print('[VAL LOG]\t[Saving training and evaluating images...]')
                metric_logger.update(loss=loss_value,
                                     **loss_dict_reduced_scaled,
                                     **loss_dict_reduced_unscaled)
                metric_logger.update(
                    class_error=loss_dict_reduced['class_error'])
                metric_logger.update(lr=learning_rate)
                nnet.train_mode()

            if iteration % snapshot == 0:
                nnet.save_params(iteration)

            if iteration % stepsize == 0:
                learning_rate /= decay_rate
                nnet.set_lr(learning_rate)

            if iteration % (training_size // batch_size) == 0:
                metric_logger.synchronize_between_processes()
                print("Averaged stats:", metric_logger)

    # sending signal to kill the thread
    training_pin_semaphore.release()
    validation_pin_semaphore.release()

    # terminating data fetching processes
    for training_task in training_tasks:
        training_task.terminate()
    for validation_task in validation_tasks:
        validation_task.terminate()
예제 #20
0
class ExternalEmbedding:
    """Sparse Embedding for Knowledge Graph
    It is used to store both entity embeddings and relation embeddings.

    Parameters
    ----------
    args :
        Global configs.
    num : int
        Number of embeddings.
    dim : int
        Embedding dimention size.
    device : th.device
        Device to store the embedding.
    """
    def __init__(self, args, num, dim, device):
        self.gpu = args.gpu
        self.args = args
        self.num = num
        self.trace = []

        self.emb = th.empty(num, dim, dtype=th.float32, device=device)
        self.state_sum = self.emb.new().resize_(self.emb.size(0)).zero_()
        self.state_step = 0
        self.has_cross_rel = False
        # queue used by asynchronous update
        self.async_q = None
        # asynchronous update process
        self.async_p = None

    def init(self, emb_init):
        """Initializing the embeddings.

        Parameters
        ----------
        emb_init : float
            The intial embedding range should be [-emb_init, emb_init].
        """
        INIT.uniform_(self.emb, -emb_init, emb_init)
        INIT.zeros_(self.state_sum)

    def setup_cross_rels(self, cross_rels, global_emb):
        cpu_bitmap = th.zeros((self.num, ), dtype=th.bool)
        for i, rel in enumerate(cross_rels):
            cpu_bitmap[rel] = 1
        self.cpu_bitmap = cpu_bitmap
        self.has_cross_rel = True
        self.global_emb = global_emb

    def get_noncross_idx(self, idx):
        cpu_mask = self.cpu_bitmap[idx]
        gpu_mask = ~cpu_mask
        return idx[gpu_mask]

    def share_memory(self):
        """Use torch.tensor.share_memory_() to allow cross process tensor access
        """
        self.emb.share_memory_()
        self.state_sum.share_memory_()

    def __call__(self, idx, gpu_id=-1, trace=True):
        """ Return sliced tensor.

        Parameters
        ----------
        idx : th.tensor
            Slicing index
        gpu_id : int
            Which gpu to put sliced data in.
        trace : bool
            If True, trace the computation. This is required in training.
            If False, do not trace the computation.
            Default: True
        """
        if self.has_cross_rel:
            cpu_idx = idx.cpu()
            cpu_mask = self.cpu_bitmap[cpu_idx]
            cpu_idx = cpu_idx[cpu_mask]
            cpu_idx = th.unique(cpu_idx)
            if cpu_idx.shape[0] != 0:
                cpu_emb = self.global_emb.emb[cpu_idx]
                self.emb[cpu_idx] = cpu_emb.cuda(gpu_id)
        s = self.emb[idx]
        if gpu_id >= 0:
            s = s.cuda(gpu_id)
        # During the training, we need to trace the computation.
        # In this case, we need to record the computation path and compute the gradients.
        if trace:
            data = s.clone().detach().requires_grad_(True)
            self.trace.append((idx, data))
        else:
            data = s
        return data

    def update(self, gpu_id=-1):
        """ Update embeddings in a sparse manner
        Sparse embeddings are updated in mini batches. we maintains gradient states for 
        each embedding so they can be updated separately.

        Parameters
        ----------
        gpu_id : int
            Which gpu to accelerate the calculation. if -1 is provided, cpu is used.
        """
        self.state_step += 1
        with th.no_grad():
            for idx, data in self.trace:
                grad = data.grad.data

                clr = self.args.lr
                #clr = self.args.lr / (1 + (self.state_step - 1) * group['lr_decay'])

                # the update is non-linear so indices must be unique
                grad_indices = idx
                grad_values = grad
                if self.async_q is not None:
                    grad_indices.share_memory_()
                    grad_values.share_memory_()
                    self.async_q.put((grad_indices, grad_values, gpu_id))
                else:
                    grad_sum = (grad_values * grad_values).mean(1)
                    device = self.state_sum.device
                    if device != grad_indices.device:
                        grad_indices = grad_indices.to(device)
                    if device != grad_sum.device:
                        grad_sum = grad_sum.to(device)

                    if self.has_cross_rel:
                        cpu_mask = self.cpu_bitmap[grad_indices]
                        cpu_idx = grad_indices[cpu_mask]
                        if cpu_idx.shape[0] > 0:
                            cpu_grad = grad_values[cpu_mask]
                            cpu_sum = grad_sum[cpu_mask].cpu()
                            cpu_idx = cpu_idx.cpu()
                            self.global_emb.state_sum.index_add_(
                                0, cpu_idx, cpu_sum)
                            std = self.global_emb.state_sum[cpu_idx]
                            if gpu_id >= 0:
                                std = std.cuda(gpu_id)
                            std_values = std.sqrt_().add_(1e-10).unsqueeze(1)
                            tmp = (-clr * cpu_grad / std_values)
                            tmp = tmp.cpu()
                            self.global_emb.emb.index_add_(0, cpu_idx, tmp)
                    self.state_sum.index_add_(0, grad_indices, grad_sum)
                    std = self.state_sum[grad_indices]  # _sparse_mask
                    if gpu_id >= 0:
                        std = std.cuda(gpu_id)
                    std_values = std.sqrt_().add_(1e-10).unsqueeze(1)
                    tmp = (-clr * grad_values / std_values)
                    if tmp.device != device:
                        tmp = tmp.to(device)
                    # TODO(zhengda) the overhead is here.
                    self.emb.index_add_(0, grad_indices, tmp)
        self.trace = []

    def create_async_update(self):
        """Set up the async update subprocess.
        """
        self.async_q = Queue(1)
        self.async_p = mp.Process(target=async_update,
                                  args=(self.args, self, self.async_q))
        self.async_p.start()

    def finish_async_update(self):
        """Notify the async update subprocess to quit.
        """
        self.async_q.put((None, None, None))
        self.async_p.join()

    def curr_emb(self):
        """Return embeddings in trace.
        """
        data = [data for _, data in self.trace]
        return th.cat(data, 0)

    def save(self, path, name):
        """Save embeddings.

        Parameters
        ----------
        path : str
            Directory to save the embedding.
        name : str
            Embedding name.
        """
        file_name = os.path.join(path, name + '.npy')
        np.save(file_name, self.emb.cpu().detach().numpy())

    def load(self, path, name):
        """Load embeddings.

        Parameters
        ----------
        path : str
            Directory to load the embedding.
        name : str
            Embedding name.
        """
        file_name = os.path.join(path, name + '.npy')
        self.emb = th.Tensor(np.load(file_name))
예제 #21
0
    with torch.no_grad():
        r_gen = RolloutGenerator(args.logdir, device, time_limit)

        while e_queue.empty():
            if p_queue.empty():
                sleep(.1)
            else:
                s_id, params = p_queue.get()
                r_queue.put((s_id, r_gen.rollout(params)))


################################################################################
#                Define queues and start workers                               #
################################################################################
p_queue = Queue()
r_queue = Queue()
e_queue = Queue()

for p_index in range(num_workers):
    Process(target=slave_routine,
            args=(p_queue, r_queue, e_queue, p_index)).start()


################################################################################
#                           Evaluation                                         #
################################################################################
def evaluate(solutions, results, rollouts=100):
    """ Give current controller evaluation.

    Evaluation is minus the cumulated reward averaged over rollout runs.
예제 #22
0
class PlmVectorizationPredictor(object):
    def __init__(self, model_info, config=None, vectorization=False):
        if isinstance(model_info, dict):
            self.args = model_info['config']
            self.model = model_info['model']
            self.tokenizer = model_info['tokenizer']
        elif isinstance(model_info, str):
            self.model_path = model_info
            self.args = self.load_config(config)
            model = PlmModel(self.args)
            self.model = self.load_model(model, model_info)
            self.tokenizer = self.model.tokenizer
        else:
            raise ValueError('error..')
        if self.args.data_type == 'query':
            self.id_field = 'description_id'
        else:
            self.id_field = 'paper_id'

        if vectorization:
            self.dest_filename = self.args.dest_filename
            self.output_queue = Queue(-1)
            self.worker = Process(target=self.np2str)
            self.worker.daemon = True
            self.worker.start()

    def predict(self, src_filename, dest_filename):
        self.model.eval()
        existed_ids = set()
        for item in read_jsonline_lazy(dest_filename, default=[]):
            existed_ids.add(item[self.id_field])

        loader = VectorizationDataLoader(src_filename, self.tokenizer, self.args)
        cos = nn.CosineSimilarity(dim=1)
        tp_count = 0

        total_count = 0
        for batch in loader:
            with torch.no_grad():
                query_embed = self.model(batch, 'query')
                true_embed = self.model(batch, 'true')
                false_embed = self.model(batch, 'false')
                true_scores = cos(query_embed, true_embed)
                false_scores = cos(query_embed, false_embed)
                print(true_scores, false_scores)
                total_count += query_embed.size(0)
                tp_count += (true_scores > false_scores).sum().cpu().numpy().tolist()

        accuray = tp_count / total_count

        return accuray

    def np2str(self):
        while True:
            batch = self.output_queue.get(block=True)
            lines = []
            for sent_embed, data_id in zip(batch['vector'], batch['index']):
                vec_str = np.array2string(sent_embed,
                                          separator=' ', floatmode='maxprec')[1:-1]
                vec_str = ' '.join([line.strip() for line in vec_str.splitlines(False)])
                line = data_id + ' ' + vec_str
                lines.append(line)
            # if not getattr(self,'dest_filename',None):
            #     print(lines, batch)
            append_lines(self.dest_filename, lines)

    def vectorize(self, src_filename, dest_filename):
        self.dest_filename = dest_filename
        loader = VectorizationDataLoader(src_filename, self.tokenizer, self.args)

        for batch in loader:
            with torch.no_grad():
                sent_embed_list = self.model(batch, prefix=None).cpu().numpy()
                self.output_queue.put({'vector': sent_embed_list, 'index': batch['data_ids']})

    def load_model(self, model, model_path):
        if torch.cuda.is_available():
            checkpoint = torch.load(model_path)
        else:
            checkpoint = torch.load(model_path, map_location=torch.device('cpu'))
        state_dict = OrderedDict()
        # avoid error when load parallel trained model
        for k, v in checkpoint.items():
            if k.startswith('module.'):
                k = k[7:]
            state_dict[k] = v
        model.load_state_dict(state_dict)
        if torch.cuda.is_available():
            model = model.cuda()
        return model

    def load_config(self, custom_config):
        # default_config = vars(parse_args(parser=self.parser))
        config_path = os.path.splitext(self.model_path)[0] + '.json'
        model_config = read_json(config_path)
        if custom_config:
            config_dict = {**model_config, **custom_config}
        else:
            config_dict = model_config
        config = Munch(config_dict)
        return config
예제 #23
0
class VariationalAutoEncoder(nn.Module):
    '''
    Implementation of a Variational AutoEncoder in pytorch. Currently two
    decoder/encoder units are supported. The first unit features a two layer
    dense neural network and the second a deep convolutional net.
    The number of laternt units can be specified and is usually around 4-12 units.
    The implmentation supports cuda. If cuda is not used the multiprocessing
    framework is/can be used to send the computation to the background, so the
    jupyter notebook it runs in will not be blocked.
    '''
    def __init__(self, n_latent_units, drop_ratio, convolutional=False):
        '''
        Constructor
        :param n_latent_units:
        :param drop_ratio:
        '''
        super(VariationalAutoEncoder, self).__init__()
        self.encoder = Encoder.Encoder(n_latent_units, drop_ratio) if not convolutional \
            else ConvEncoder.Encoder(n_latent_units, drop_ratio)
        self.decoder = Decoder.Decoder(n_latent_units, drop_ratio) if not convolutional \
            else ConvDecoder.Decoder(n_latent_units, drop_ratio)
        self.proc = None

        self.counter_epoch = Counter()
        self.counter_interation = Counter()
        self.loss_queue = Queue()
        self.stop_signal = Signal()
        self.losses = []

    def forward(self, x):
        '''
        The forward method, calles the encoder and decoder
        :param x:
        :return:
        '''
        z, mu, log_std = self.encoder.forward(x)
        self.mu = mu
        self.log_std = log_std
        return self.decoder.forward(z)

    def loss(self, _in, _out, mu, log_std):
        '''
        The loss function, the loss is calculated as the reconstruction error and
        the error given by the deviation of latent variable from the normal distirbution
        :param _in:
        :param _out:
        :param mu:
        :param log_std:
        :return:
        '''
        # img_loss = self.img_loss_func(_in, _out)
        # img_loss = F.mse_loss(_in, _out)
        img_loss = _in.sub(_out).pow(2).sum()
        mean_sq = mu * mu
        # -0.5 * tf.reduce_sum(1.0 + 2.0 * logsd - tf.square(mn) - tf.exp(2.0 * logsd), 1)
        latent_loss = -0.5 * torch.sum(1.0 + 2.0 * log_std - mean_sq -
                                       torch.exp(2.0 * log_std))
        return img_loss + latent_loss, img_loss, latent_loss

    def start(self, train=None):
        '''
        This runs the training in the background. Currently only works with the cpu version
        (cuda not supported atm)
        :param train:
        :return:
        '''
        if self.proc is not None:
            raise Exception("Process already started.")
        self.share_memory()
        self.losses = []
        if train is None:
            train = VariationalAutoEncoder._get_training_test_method()
        self.proc = mp.Process(target=train,
                               args=(self, self.train_loader, self.test_loader,
                                     self.counter_epoch,
                                     self.counter_interation, self.loss_queue,
                                     self.stop_signal))
        self.proc.start()

    def restart(self, train=None):
        '''
        Running in the background can be stopped. This method should be used if
        the computation should be resumed. As with start(), does currently not work with cuda.
        :param train:
        :return:
        '''
        if self.proc is None:
            raise Exception("Process has not been started before.")
        if self.proc.is_alive():
            raise Exception("Process is still active.")
        self.stop_signal.set_signal(False)
        if train is None:
            train = VariationalAutoEncoder._get_training_test_method()
        self.proc = mp.Process(target=train,
                               args=(self, self.train_loader, self.test_loader,
                                     self.counter_epoch,
                                     self.counter_interation, self.loss_queue,
                                     self.stop_signal))
        self.proc.start()

    def stop(self):
        '''
        This functions sends a stop signal to the background process.
        :return:
        '''
        if self.proc is None:
            raise Exception("Process has been started.")
        if not self.proc.is_alive():
            raise Exception("Process is not alive.")
        self.stop_signal.set_signal(True)
        self.proc.join()
        self.stop_signal.set_signal(False)

    def get_progress(self):
        '''
        Functions gets the progress of the computation running in the background.
        :return:
        '''
        while self.loss_queue.qsize() > 0:
            self.losses.append(self.loss_queue.get())
        return self.losses

    def set_train_loader(self, train_loader, test_loader=None):
        self.train_loader = train_loader
        self.test_loader = test_loader

    def cuda(self):
        super(VariationalAutoEncoder, self).cuda()
        self.decoder.cuda()
        self.encoder.cuda()

    @staticmethod
    def _get_training_test_method():
        def train(model, train_loader, test_loader, counter_epoch,
                  counter_iterations, loss_queue, stop_signal):
            print("started", stop_signal.value)
            train_op = optim.Adam(model.parameters(), lr=0.0005)
            while not stop_signal.value:
                loss_train = []
                loss_test = []
                n_train = []
                n_test = []
                for _, data in enumerate(train_loader):
                    # data = Variable(data.view(-1,784))
                    data = Variable(data)
                    train_op.zero_grad()
                    dec = model(data)
                    loss, loss_1, loss_2 = model.loss(dec, data, model.mu,
                                                      model.log_std)
                    loss_train.append(
                        (loss.data[0], loss_1.data[0], loss_2.data[0]))
                    n_train.append(len(data))
                    loss.backward()
                    train_op.step()
                    counter_iterations.increment()

                for _, data in enumerate(test_loader):
                    # data = Variable(data.view(-1,784))
                    data = Variable(data)
                    dec = model(data)
                    loss, _, _ = model.loss(dec, data, model.mu, model.log_std)
                    loss_test.append(loss.data[0])
                    n_test.append(len(data))

                counter_epoch.increment()

                epoch = counter_epoch.value
                loss_train_mean = numpy.mean(loss_train,
                                             axis=0)  # / numpy.sum(n_train)
                loss_test_mean = numpy.mean(loss_test)  # / numpy.sum(n_test)
                loss_queue.put((epoch, loss_train_mean, loss_test_mean))
                #print("{}: ".format(epoch),  loss_train_mean, loss_test_mean)

        return train

    @staticmethod
    def get_MNIST_train_loader(batch_size=32, keep_classes=False):
        train_loader = torch.utils.data.DataLoader(datasets.MNIST(
            './data/datasets/MNIST',
            train=True,
            download=True,
            transform=transforms.Compose([transforms.ToTensor()])),
                                                   batch_size=batch_size)

        test_loader = torch.utils.data.DataLoader(datasets.MNIST(
            './data/datasets/MNIST',
            train=False,
            transform=transforms.Compose([transforms.ToTensor()])),
                                                  batch_size=batch_size)

        if keep_classes:
            return train_loader, test_loader
        return DataIterator(train_loader), DataIterator(test_loader)

    @staticmethod
    def get_FashionMNIST_train_loader(batch_size=32, keep_classes=False):
        train_loader = torch.utils.data.DataLoader(datasets.FashionMNIST(
            './data/datasets/FMNIST',
            train=True,
            download=True,
            transform=transforms.Compose([transforms.ToTensor()])),
                                                   batch_size=batch_size)

        test_loader = torch.utils.data.DataLoader(datasets.FashionMNIST(
            './data/datasets/FMNIST',
            train=False,
            transform=transforms.Compose([transforms.ToTensor()])),
                                                  batch_size=batch_size)

        if keep_classes:
            return train_loader, test_loader
        return DataIterator(train_loader), DataIterator(test_loader)
예제 #24
0
def controller_train_proc(ctrl_dir,
                          controller,
                          vae,
                          mdrnn,
                          target_return=950,
                          skip_train=False,
                          display=True):
    step_log('4-2. controller_train_proc START!!')
    # define current best and load parameters
    cur_best = None
    if not os.path.exists(ctrl_dir):
        os.mkdir(ctrl_dir)
    ctrl_file = os.path.join(ctrl_dir, 'best.tar')

    p_queue = Queue()
    r_queue = Queue()
    #e_queue = Queue()   # pipaek : not necessary if not multiprocessing

    print("Attempting to load previous best...")
    if os.path.exists(ctrl_file):
        #state = torch.load(ctrl_file, map_location={'cuda:0': 'cpu'})
        state = torch.load(ctrl_file)
        cur_best = -state['reward']
        controller.load_state_dict(state['state_dict'])
        print("Previous best was {}...".format(-cur_best))

    if skip_train:
        return  # pipaek : 트레이닝을 통한 모델 개선을 skip하고 싶을 때..

    def evaluate(solutions,
                 results,
                 rollouts=100):  # pipaek : rollout 100 -> 10 , originally 100
        """ Give current controller evaluation.

        Evaluation is minus the cumulated reward averaged over rollout runs.

        :args solutions: CMA set of solutions
        :args results: corresponding results
        :args rollouts: number of rollouts

        :returns: minus averaged cumulated reward
        """
        index_min = np.argmin(results)
        best_guess = solutions[index_min]
        restimates = []

        for s_id in range(rollouts):
            print('p_queue.put(), s_id=%d' % s_id)
            p_queue.put((s_id, best_guess))
            print('>>>rollout_routine!!')
            rollout_routine()  # pipaek : 여기서도 p_queue.put 하자마자 바로 처리..

        print(">>>Evaluating...")
        for _ in tqdm(range(rollouts)):
            #while r_queue.empty():
            #    sleep(.1)   # pipaek : multi-process가 아니므로
            if not r_queue.empty(
            ):  # pipaek : 20180718 r_queue.get()에서 stuck되어 있는 것을 방지하기 위해 체크!!
                #print('r_queue.get()')
                #restimates.append(r_queue.get()[1])
                r_s_id, r = r_queue.get()
                print(
                    'in evaluate r_queue.get() r_s_id=%d, r_queue remain=%d' %
                    (r_s_id, r_queue.qsize()))
                restimates.append(r)
            else:
                print('r_queue.empty() -> break!!')
                break

        return best_guess, np.mean(restimates), np.std(restimates)

    def rollout_routine():
        """ Thread routine.

        Threads interact with p_queue, the parameters queue, r_queue, the result
        queue and e_queue the end queue. They pull parameters from p_queue, execute
        the corresponding rollout, then place the result in r_queue.

        Each parameter has its own unique id. Parameters are pulled as tuples
        (s_id, params) and results are pushed as (s_id, result).  The same
        parameter can appear multiple times in p_queue, displaying the same id
        each time.

        As soon as e_queue is non empty, the thread terminate.

        When multiple gpus are involved, the assigned gpu is determined by the
        process index p_index (gpu = p_index % n_gpus).

        :args p_queue: queue containing couples (s_id, parameters) to evaluate
        :args r_queue: where to place results (s_id, results)
        :args e_queue: as soon as not empty, terminate
        :args p_index: the process index
        """
        # init routine
        #gpu = p_index % torch.cuda.device_count()
        #device = torch.device('cuda:{}'.format(gpu) if torch.cuda.is_available() else 'cpu')

        # redirect streams
        #if not os.path.exists(tmp_dir):
        #    os.mkdir(tmp_dir)

        #sys.stdout = open(os.path.join(tmp_dir, 'rollout.out'), 'a')
        #sys.stderr = open(os.path.join(tmp_dir, 'rollout.err'), 'a')

        with torch.no_grad():
            r_gen = RolloutGenerator(vae, mdrnn, controller, device,
                                     rollout_time_limit)

            while not p_queue.empty():
                print('in rollout_routine, p_queue.get()')
                s_id, params = p_queue.get()
                print('r_queue.put() sid=%d' % s_id)
                r_queue.put((s_id, r_gen.rollout(params)))
                print('r_gen.rollout OK, r_queue.put()')
                #r_queue.qsize()

    parameters = controller.parameters()
    es = cma.CMAEvolutionStrategy(flatten_parameters(parameters), 0.1,
                                  {'popsize': C_POP_SIZE})
    print("CMAEvolutionStrategy start OK!!")

    epoch = 0
    log_step = 3
    while not es.stop():
        print("--------------------------------------")
        print("CURRENT EPOCH = %d" % epoch)
        if cur_best is not None and -cur_best > target_return:
            print("Already better than target, breaking...")
            break

        r_list = [0] * C_POP_SIZE  # result list
        solutions = es.ask()
        print("CMAEvolutionStrategy-ask")

        # push parameters to queue
        for s_id, s in enumerate(
                solutions):  # pipaek : 이 for가 C_POP_SIZE 만큼 반복된다.
            #for _ in range(C_POP_SIZE * C_N_SAMPLES):
            for _ in range(C_N_SAMPLES):
                print('in controller_train_proc p_queue.put() s_id : %d' %
                      s_id)
                p_queue.put((s_id, s))
                #print("p_queue.put %d" % s_id)
                rollout_routine(
                )  # pipaek : p_queue.put 하자마자 바로 get해서 rollout하고 나서 r_queue에 결과 입력.
                print("rollout_routine OK, r_queue size=%d" % r_queue.qsize())

        # retrieve results
        if display:
            pbar = tqdm(total=C_POP_SIZE * C_N_SAMPLES)
        #for idx in range(C_POP_SIZE * C_N_SAMPLES):
        while not r_queue.empty(
        ):  # pipaek : 20180718 여기서 r_queue.get을 못해서 영원히 걸려있는 상태를 방지하기 위해 for문을 while문으로 바꾼다.
            #while r_queue.empty():
            #    sleep(.1)
            try:
                r_s_id, r = r_queue.get()
                print(
                    'in controller_train_proc r_queue.get() r_s_id=%d, r_queue remain=%d'
                    % (r_s_id, r_queue.qsize()))
                r_list[r_s_id] += r / C_N_SAMPLES
                if display:
                    pbar.update(1)
            except IndexError as err:
                print('IndexError during r_queue.get()')
                print('cur r_list size:%d, index:%d' % (len(r_list), r_s_id))
        if display:
            pbar.close()

        es.tell(solutions,
                r_list)  # pipaek : solution array에다가 r_list 결과를 업데이트..
        es.disp()

        # evaluation and saving
        if epoch % log_step == log_step - 1:
            print(">>>> TRYING EVALUATION, CURRENT EPOCH = %d" % epoch)
            best_params, best, std_best = evaluate(
                solutions, r_list, rollouts=100
            )  # pipaek : evaluate을 위해서 rollout은 10번만 하자.. originally 100
            print("Current evaluation: {}".format(best))
            if not cur_best or cur_best > best:
                cur_best = best
                print("Saving new best with value {}+-{}...".format(
                    -cur_best, std_best))
                load_parameters(best_params, controller)
                torch.save(
                    {
                        'epoch': epoch,
                        'reward': -cur_best,
                        'state_dict': controller.state_dict()
                    }, os.path.join(ctrl_dir, 'best.tar'))
            if -best > target_return:
                print(
                    "Terminating controller training with value {}...".format(
                        best))
                break

        epoch += 1

    print("es.stop!!")
    es.result_pretty()
class VideoProcessingPipeline(object):
    """
    Manages the acquisition and preprocessing of video frames from the webcam.
    A pipeline with two processes is used: the first process denoises frames and
    queues the result to the second process which calculates the optical flows
    on CPU, and queues back the moving average to the main process. This moving
    average is used as attention prior by the model.
    """
    def __init__(self,
                 img_size,
                 img_cfg,
                 frames_window=13,
                 flows_window=5,
                 skip_frames=2,
                 cam_res=(640, 480),
                 denoising=True):
        """
        :param img_size: the images input size of the neural network.
        :param img_cfg: the config parameters for image processing.
        :param frames_window: the number of webcam frames input at once into
            the neural network to make a prediction step. Best results tend
            to be obtained for roughly a bit less than one second.
        :param flows_window: the number of optical flows used to calculate an
            attention prior. Defaults to 5. Change at your own risks.
        :param skip_frames: down-sampling factor of the webcam frames. Defaults
            to 2 in order to roughly obtain 15 FPS with a 30 FPS webcam. This
            down-sampling is basic and could be improved to support ratios such
            as 2/3 to obtain 20 FPS.
        :param cam_res: webcam resolution (width, height). The application was
            only tested in 640x480. Change at your own risks.
        :param denoising: activate the denoising process. Defaults to True.
            Most usefull with low quality webcams.
        """
        if frames_window not in [9, 13, 17, 21]:
            raise ValueError('Invalid window size for webcam frames: `%s`' %
                             str(frames_window))
        if flows_window not in [3, 5, 7, 9]:
            raise ValueError('Invalid window size for optical flows: `%s`' %
                             str(flows_window))
        if flows_window > frames_window:
            raise ValueError(
                'Optical flow window cannot be wider than camera frames window'
            )

        self.img_size = img_size
        # optical flows can be computed in lower resolution w/o harming results
        self.opt_size = img_size // 2
        self.frames_window = frames_window
        self.flows_window = flows_window
        self.skip_frames = skip_frames
        self.total_frames = 0  # total number of frames acquired
        self.cam_res = cam_res
        self.denoising = denoising
        self.img_frames = [
            np.zeros((self.img_size, self.img_size, 3), dtype=np.uint8)
        ] * (self.frames_window // 2)
        self.gray_frames = [
            np.zeros((self.opt_size, self.opt_size), dtype=np.uint8)
        ] * (self.frames_window // 2)
        self.priors = []

        # init multiprocessing
        self.q_parent, self.q_prior = Queue(), Queue()

        # start denoising process
        if self.denoising:
            self.q_denoise = Queue()
            self.p_denoise = Process(
                target=denoise_frame,
                args=(self.q_denoise, self.q_prior, img_cfg.getint('h'),
                      img_cfg.getint('template_window_size'),
                      img_cfg.getint('search_window_size')))
            self.p_denoise.start()
            print('Denoising enabled')
        else:
            print('Denoising disabled')

        # start prior calculation process
        self.p_prior = Process(target=calc_attention_prior,
                               args=(self.opt_size, self.flows_window,
                                     self.q_prior, self.q_parent))
        self.p_prior.start()

        # initialise camera
        self.cap = cv.VideoCapture(0)
        if self.cap.isOpened():
            self.cap_fps = int(round(self.cap.get(cv.CAP_PROP_FPS)))
            self.cap.set(3, self.cam_res[0])
            self.cap.set(4, self.cam_res[1])
            print('Device @%d FPS' % self.cap_fps)
        else:
            raise IOError('Failed to open webcam capture')

        # raw images
        self.last_frame = collections.deque(maxlen=self.cap_fps)
        # cropped region of the raw images
        self.last_cropped_frame = collections.deque(maxlen=self.cap_fps)

        # acquire and preprocess the exact number of frames needed
        # to make the first prior map
        for i in range((frames_window // 2) + 1):
            self.acquire_next_frame(enable_skip=False)

        # now wait for the first prior to be returned
        while len(self.priors) == 0:
            if not self.q_parent.empty():
                # de-queue a prior
                prior, flow = self.q_parent.get(block=False)
                self.priors.append(prior)

            # sleep while the queue is empty
            time.sleep(0.01)

    def _center_crop(self, img, target_shape):
        """
        Returns a center crop of the provided image.

        :param img: the image to crop.
        :param target_shape: the dimensions of the crop.
        :return the cropped image
        """
        h, w = target_shape
        y, x = img.shape[:2]
        start_y = max(0, y // 2 - (h // 2))
        start_x = max(0, x // 2 - (w // 2))
        return img[start_y:start_y + h, start_x:start_x + w]

    def acquire_next_frame(self, enable_skip=True):
        """
        Reads the next frame from the webcam and starts the asynchronous
        preprocessing. The video stream is down-sampled as necessary to
        reach the desired FPS.

        :param enable_skip: enables down-sampling of the webcam stream.
            Must be True except during initialisation.
        :return: the last frame acquired or None if that frame was skipped
            due to down-sampling of the webcam stream.
        """
        ret, frame = self.cap.read()
        if not ret:
            self.terminate()
            raise IOError('Failed to read the next frame from webcam')

        self.total_frames += 1
        if not enable_skip:
            return self._preprocess_frame(frame)
        elif (self.total_frames % self.skip_frames) == 0:
            return self._preprocess_frame(frame)
        return None

    def _preprocess_frame(self, frame):
        """
        Crops, change to gray scale, resizes and sends the newly acquired
        webcam frame to the preprocessing pipeline.

        :param frame: the last acquired frame.
        :return the last acquired frame.
        """
        # crop a square at the center of the frame
        rgb = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
        rgb = self._center_crop(rgb, (self.cam_res[1], self.cam_res[1]))
        self.last_frame.append(frame)
        self.last_cropped_frame.append(rgb)
        # convert to gray scale and resize
        gray = cv.cvtColor(rgb, cv.COLOR_RGB2GRAY)
        gray = cv.resize(gray, (self.opt_size, self.opt_size))
        rgb = cv.resize(rgb, (self.img_size, self.img_size))
        # queue to relevant child process
        if self.denoising:
            self.q_denoise.put(gray)
        else:
            self.q_prior.put(gray)
        self.img_frames.append(rgb)
        self.gray_frames.append(gray)
        return frame

    def get_model_input(self, dequeue=True):
        """
        Gets the list of images and the prior needed for the inference
        of the current frame. Use `dequeue` to retrieve the next prior
        from the queue. The caller must first verify that the queue is
        non-empty.

        :param dequeue: must be set to True except during initialisation.
        :return: images ndarray and the corresponding prior
        """
        # de-queue a prior
        if dequeue:
            prior, flow = self.q_parent.get(block=False)
            self.priors.append(prior)

        # ensure enough frames have been preprocessed
        n_frames = self.frames_window
        assert len(self.img_frames) >= n_frames
        assert len(self.gray_frames) >= n_frames
        assert len(self.priors) == 1

        imgs = np.stack(self.img_frames[:self.frames_window], axis=0)
        self.img_frames.pop(0)  # slide window to the right
        self.gray_frames.pop(0)

        return imgs, [self.priors.pop(0)]

    def terminate(self):
        """Terminates processes, closes queues and releases video capture."""
        if self.denoising:
            self.q_denoise.put(None)
            time.sleep(0.2)
            self.p_denoise.terminate()
        else:
            self.q_prior.put(None)
            time.sleep(0.2)
        self.p_prior.terminate()
        time.sleep(0.1)

        if self.denoising:
            self.p_denoise.join(timeout=0.5)
        self.p_prior.join(timeout=0.5)

        if self.denoising:
            self.q_denoise.close()
        self.q_parent.close()
        self.cap.release()
예제 #26
0
def dynamic_power(model, input_shape):
    q = Queue()
    power_return = Queue()
    interval_return = Queue()
    latency_return = Queue()
    input_tensor_queue = Queue()
    model_queue = Queue()

    input_tensor = torch.ones([*input_shape])
    input_tensor_queue.put(input_tensor)

    model.share_memory()

    model_queue.put(model)

    context = torch.multiprocessing.get_context('spawn')

    p_thread = context.Process(target=power_thread,
                               args=(power_return, interval_return, q))
    l_thread = context.Process(target=latency_thread,
                               args=(model_queue, input_tensor_queue,
                                     latency_return, q))

    l_thread.start()
    p_thread.start()

    power_l = list()  # GPU power list
    interval_l = list()  # power interval list
    latency_l = list()  # latency list

    l_thread.join()

    while True:
        if not power_return.empty():
            power_l.append(power_return.get())
        if not interval_return.empty():
            interval_l.append(interval_return.get())
        if not latency_return.empty():
            latency_l.append(latency_return.get())
        if power_return.empty() and interval_return.empty(
        ) and latency_return.empty():
            break

    power_return.close()
    interval_return.close()
    latency_return.close()
    q.close()

    del q
    del power_return
    del latency_return
    del interval_return

    return latency_l, power_l, interval_l
    def __init__(self,
                 img_size,
                 img_cfg,
                 frames_window=13,
                 flows_window=5,
                 skip_frames=2,
                 cam_res=(640, 480),
                 denoising=True):
        """
        :param img_size: the images input size of the neural network.
        :param img_cfg: the config parameters for image processing.
        :param frames_window: the number of webcam frames input at once into
            the neural network to make a prediction step. Best results tend
            to be obtained for roughly a bit less than one second.
        :param flows_window: the number of optical flows used to calculate an
            attention prior. Defaults to 5. Change at your own risks.
        :param skip_frames: down-sampling factor of the webcam frames. Defaults
            to 2 in order to roughly obtain 15 FPS with a 30 FPS webcam. This
            down-sampling is basic and could be improved to support ratios such
            as 2/3 to obtain 20 FPS.
        :param cam_res: webcam resolution (width, height). The application was
            only tested in 640x480. Change at your own risks.
        :param denoising: activate the denoising process. Defaults to True.
            Most usefull with low quality webcams.
        """
        if frames_window not in [9, 13, 17, 21]:
            raise ValueError('Invalid window size for webcam frames: `%s`' %
                             str(frames_window))
        if flows_window not in [3, 5, 7, 9]:
            raise ValueError('Invalid window size for optical flows: `%s`' %
                             str(flows_window))
        if flows_window > frames_window:
            raise ValueError(
                'Optical flow window cannot be wider than camera frames window'
            )

        self.img_size = img_size
        # optical flows can be computed in lower resolution w/o harming results
        self.opt_size = img_size // 2
        self.frames_window = frames_window
        self.flows_window = flows_window
        self.skip_frames = skip_frames
        self.total_frames = 0  # total number of frames acquired
        self.cam_res = cam_res
        self.denoising = denoising
        self.img_frames = [
            np.zeros((self.img_size, self.img_size, 3), dtype=np.uint8)
        ] * (self.frames_window // 2)
        self.gray_frames = [
            np.zeros((self.opt_size, self.opt_size), dtype=np.uint8)
        ] * (self.frames_window // 2)
        self.priors = []

        # init multiprocessing
        self.q_parent, self.q_prior = Queue(), Queue()

        # start denoising process
        if self.denoising:
            self.q_denoise = Queue()
            self.p_denoise = Process(
                target=denoise_frame,
                args=(self.q_denoise, self.q_prior, img_cfg.getint('h'),
                      img_cfg.getint('template_window_size'),
                      img_cfg.getint('search_window_size')))
            self.p_denoise.start()
            print('Denoising enabled')
        else:
            print('Denoising disabled')

        # start prior calculation process
        self.p_prior = Process(target=calc_attention_prior,
                               args=(self.opt_size, self.flows_window,
                                     self.q_prior, self.q_parent))
        self.p_prior.start()

        # initialise camera
        self.cap = cv.VideoCapture(0)
        if self.cap.isOpened():
            self.cap_fps = int(round(self.cap.get(cv.CAP_PROP_FPS)))
            self.cap.set(3, self.cam_res[0])
            self.cap.set(4, self.cam_res[1])
            print('Device @%d FPS' % self.cap_fps)
        else:
            raise IOError('Failed to open webcam capture')

        # raw images
        self.last_frame = collections.deque(maxlen=self.cap_fps)
        # cropped region of the raw images
        self.last_cropped_frame = collections.deque(maxlen=self.cap_fps)

        # acquire and preprocess the exact number of frames needed
        # to make the first prior map
        for i in range((frames_window // 2) + 1):
            self.acquire_next_frame(enable_skip=False)

        # now wait for the first prior to be returned
        while len(self.priors) == 0:
            if not self.q_parent.empty():
                # de-queue a prior
                prior, flow = self.q_parent.get(block=False)
                self.priors.append(prior)

            # sleep while the queue is empty
            time.sleep(0.01)
예제 #28
0
def main():
    q = Queue()
    idx_q = Queue()
    epochs = 3
    learning_rate = 0.001
    batch_size = 32
    test_batch_size = 16
    log_interval = 100
    cpu_pth_path = "/home/yoon/Yoon/pytorch/research/part_train/cpu.pth"
    gpu_pth_path = "/home/yoon/Yoon/pytorch/research/part_train/gpu.pth"

    #print(torch.cuda.get_device_name(0))
    print(torch.cuda.is_available())
    use_cuda = torch.cuda.is_available()
    print("use_cude : ", use_cuda)

    #device = torch.device("cuda" if use_cuda else "cpu")
    device1 = "cpu"
    device2 = "cuda"

    nThreads = 1 if use_cuda else 2
    if platform.system() == 'Windows':
        nThreads = 0  #if you use windows

    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, ), (0.5, ))])

    # datasets
    testset = torchvision.datasets.FashionMNIST('./data',
                                                download=True,
                                                train=False,
                                                transform=transform)

    test_loader = torch.utils.data.DataLoader(testset,
                                              batch_size=test_batch_size,
                                              shuffle=False,
                                              num_workers=nThreads)

    # constant for classes
    classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal',
               'Shirt', 'Sneaker', 'Bag', 'Ankle Boot')

    # model
    model1 = Net(q).to(device1)
    model1.share_memory()  # imshow example
    model2 = Net2(q).to(device2)
    model2.share_memory()

    # Freeze model weights
    for param in model1.parameters():  # 전체 layer train해도 파라미터 안바뀌게 프리징
        param.requires_grad = False
    for param in model2.parameters():  # 전체 layer train해도 파라미터 안바뀌게 프리징
        param.requires_grad = False

    proc1 = Process(target=my_run,
                    args=(model1, testset, device1, cpu_pth_path, idx_q))
    proc2 = Process(target=my_run,
                    args=(model2, testset, device2, gpu_pth_path, idx_q))

    num_processes = (proc2, proc1)
    processes = []

    for procs in num_processes:
        procs.start()
        processes.append(procs)

    for proc in processes:
        proc.join()
예제 #29
0
def data_runner(queue1: Queue, queue2: Queue):
    queue1.get()
    queue2.put(1)

    queue1.get()
    queue2.put(1)
예제 #30
0
    with torch.no_grad():
        r_gen = RolloutGenerator(args.logdir, device, time_limit)

        while e_queue.empty():
            if p_queue.empty():
                sleep(.1)
            else:
                s_id, params = p_queue.get()
                r_queue.put((s_id, r_gen.rollout(params)))


################################################################################
#                Define queues and start workers                               #
################################################################################
p_queue = Queue()
r_queue = Queue()
e_queue = Queue()

for p_index in range(num_workers):
    Process(target=slave_routine, args=(p_queue, r_queue, e_queue, p_index)).start()


################################################################################
#                           Evaluation                                         #
################################################################################
def evaluate(solutions, results, rollouts=100):
    """ Give current controller evaluation.

    Evaluation is minus the cumulated reward averaged over rollout runs.
예제 #31
0
def train(training_dbs, validation_db, start_iter=0):
    learning_rate = system_configs.learning_rate
    max_iteration = system_configs.max_iter
    pretrained_model = system_configs.pretrain
    snapshot = system_configs.snapshot
    val_iter = system_configs.val_iter
    display = system_configs.display
    decay_rate = system_configs.decay_rate
    stepsize = system_configs.stepsize

    # getting the size of each database
    training_size = len(training_dbs[0].db_inds)
    validation_size = len(validation_db.db_inds)

    # queues storing data for training
    training_queue = Queue(system_configs.prefetch_size)
    validation_queue = Queue(5)

    # queues storing pinned data for training
    pinned_training_queue = queue.Queue(system_configs.prefetch_size)
    pinned_validation_queue = queue.Queue(5)

    # load data sampling function
    data_file = "sample.{}".format(training_dbs[0].data)
    sample_data = importlib.import_module(data_file).sample_data

    # allocating resources for parallel reading
    training_tasks = init_parallel_jobs(training_dbs, training_queue,
                                        sample_data, True)
    if val_iter:
        validation_tasks = init_parallel_jobs([validation_db],
                                              validation_queue, sample_data,
                                              False)

    training_pin_semaphore = threading.Semaphore()
    validation_pin_semaphore = threading.Semaphore()
    training_pin_semaphore.acquire()
    validation_pin_semaphore.acquire()

    training_pin_args = (training_queue, pinned_training_queue,
                         training_pin_semaphore)
    training_pin_thread = threading.Thread(target=pin_memory,
                                           args=training_pin_args)
    training_pin_thread.daemon = True
    training_pin_thread.start()

    validation_pin_args = (validation_queue, pinned_validation_queue,
                           validation_pin_semaphore)
    validation_pin_thread = threading.Thread(target=pin_memory,
                                             args=validation_pin_args)
    validation_pin_thread.daemon = True
    validation_pin_thread.start()

    print("building model...")
    nnet = NetworkFactory(training_dbs[0])

    if pretrained_model is not None:
        if not os.path.exists(pretrained_model):
            raise ValueError("pretrained model does not exist")
        print("loading from pretrained model")
        nnet.load_pretrained_params(pretrained_model)

    if start_iter:
        learning_rate /= (decay_rate**(start_iter // stepsize))

        nnet.load_params(start_iter)
        nnet.set_lr(learning_rate)
        print("training starts from iteration {} with learning_rate {}".format(
            start_iter + 1, learning_rate))
    else:
        nnet.set_lr(learning_rate)

    print("training start...")
    nnet.cuda()
    nnet.train_mode()
    with stdout_to_tqdm() as save_stdout:
        for iteration in tqdm(range(start_iter + 1, max_iteration + 1),
                              file=save_stdout,
                              ncols=80):
            training = pinned_training_queue.get(block=True)
            training_loss, focal_loss, pull_loss, push_loss, regr_loss = nnet.train(
                **training)
            #training_loss, focal_loss, pull_loss, push_loss, regr_loss, cls_loss = nnet.train(**training)
            display = 1250
            if display and iteration % display == 0:
                print("training loss at iteration {}: {}".format(
                    iteration, training_loss.item()))
                print("focal loss at iteration {}:    {}".format(
                    iteration, focal_loss.item()))
                print("pull loss at iteration {}:     {}".format(
                    iteration, pull_loss.item()))
                print("push loss at iteration {}:     {}".format(
                    iteration, push_loss.item()))
                print("regr loss at iteration {}:     {}".format(
                    iteration, regr_loss.item()))
                #print("cls loss at iteration {}:      {}\n".format(iteration, cls_loss.item()))

            del training_loss, focal_loss, pull_loss, push_loss, regr_loss  #, cls_loss

            if val_iter and validation_db.db_inds.size and iteration % val_iter == 0:
                nnet.eval_mode()
                validation = pinned_validation_queue.get(block=True)
                validation_loss = nnet.validate(**validation)

                print("validation loss at iteration {}: {}".format(
                    iteration, validation_loss.item()))
                # testing(validation_db, nnet, result_dir, debug=debug)

                nnet.train_mode()

            if iteration % snapshot == 0:
                nnet.save_params(iteration)

            if iteration % stepsize == 0:
                learning_rate /= decay_rate
                nnet.set_lr(learning_rate)

    # sending signal to kill the thread
    training_pin_semaphore.release()
    validation_pin_semaphore.release()

    # terminating data fetching processes
    for training_task in training_tasks:
        training_task.terminate()
    for validation_task in validation_tasks:
        validation_task.terminate()
class MultiprocessAsyncGameExecutor(AsyncGameExecutor):
    def __init__(self, game_factory: GameExecutorFactory, network: nn.Module,
                 device: torch.device, processes: int, batches_ahead: int,
                 batch_size: int, states_on_device: bool):
        self._states_on_device = states_on_device
        self._device = device
        self._experience_queue = Queue(maxsize=processes + 1)
        block_size = max(1, batches_ahead - processes)
        self.block_buffer = []
        print('* starting %d workers (batch size: %d, block size: %d)' %
              (processes, batch_size, block_size))
        self._processes = []
        self._request_queues = []
        for i in range(processes):
            request_queue = Queue(maxsize=10)
            # Transfer to GPU in the other process does not work.. it does not throw an error, but training does not converge
            p = Process(target=_run_game,
                        args=(
                            i,
                            game_factory,
                            network,
                            device,
                            request_queue,
                            self._experience_queue,
                            batch_size,
                            block_size,
                            False,
                        ))
            p.start()
            self._request_queues.append(request_queue)
            self._processes.append(p)

    def _send_to_all(self, request, block=False):
        for request_queue in self._request_queues:
            request_queue.put(request, block=block)

    def get_experiences(self):
        if len(self.block_buffer) == 0:
            block_buffer = self._experience_queue.get(block=True)
            if self._states_on_device:
                for eps, exps in block_buffer:
                    exps = [e.to_device(self._device) for e in exps]
                    self.block_buffer.append((eps, exps))
            else:
                self.block_buffer.extend(block_buffer)
        return self.block_buffer.pop()

    def update_exploration_rate(self, exploration_rate):
        self._send_to_all(
            _RunGameRequest(set_exploration_rate=exploration_rate), block=True)

    def close(self):
        print('* shutting down workers')
        self._send_to_all(_RunGameRequest(do_terminate=True))
        # wake the workers
        try:
            while not self._experience_queue.empty():
                try:
                    self._experience_queue.get(block=False)
                except queue.Empty:
                    pass
        except ConnectionResetError:
            pass
        except FileNotFoundError:
            pass

        self._experience_queue.close()
        for p in self._processes:
            p.join(1000)
        for q in self._request_queues:
            q.close()
        self._experience_queue.close()
예제 #33
0
def _call_mods_from_fast5s_cpu2(motif_seqs, chrom2len, fast5s_q, len_fast5s,
                                positions, model_path, success_file, args):
    # features_batch_q = mp.Queue()
    # errornum_q = mp.Queue()
    features_batch_q = Queue()
    errornum_q = Queue()

    # pred_str_q = mp.Queue()
    pred_str_q = Queue()

    nproc = args.nproc
    nproc_call_mods = nproc_to_call_mods_in_cpu_mode
    if nproc <= nproc_call_mods + 1:
        nproc = nproc_call_mods + 1 + 1

    fast5s_q.put("kill")
    features_batch_procs = []
    for _ in range(nproc - nproc_call_mods - 1):
        p = mp.Process(target=_read_features_fast5s_q,
                       args=(fast5s_q, features_batch_q, errornum_q,
                             motif_seqs, chrom2len, positions, args))
        p.daemon = True
        p.start()
        features_batch_procs.append(p)

    call_mods_gpu_procs = []
    for _ in range(nproc_call_mods):
        p_call_mods_gpu = mp.Process(target=_call_mods_q,
                                     args=(model_path, features_batch_q,
                                           pred_str_q, success_file, args))
        p_call_mods_gpu.daemon = True
        p_call_mods_gpu.start()
        call_mods_gpu_procs.append(p_call_mods_gpu)

    # print("write_process started..")
    p_w = mp.Process(target=_write_predstr_to_file,
                     args=(args.result_file, pred_str_q))
    p_w.daemon = True
    p_w.start()

    errornum_sum = 0
    while True:
        running = any(p.is_alive() for p in features_batch_procs)
        while not errornum_q.empty():
            errornum_sum += errornum_q.get()
        if not running:
            break

    for p in features_batch_procs:
        p.join()
    features_batch_q.put("kill")

    for p_call_mods_gpu in call_mods_gpu_procs:
        p_call_mods_gpu.join()

    # print("finishing the write_process..")
    pred_str_q.put("kill")

    p_w.join()

    print("%d of %d fast5 files failed.." % (errornum_sum, len_fast5s))