def _new_logger(log_level: Optional[int] = None): global _LOGGER if _LOGGER is None: _LOGGER = mp.get_logger() if log_level is not None: get_logger().setLevel(log_level) return True return False
def create_logger(): multiprocessing.log_to_stderr() logger = multiprocessing.get_logger() logger.setLevel(logging.INFO) fh = logging.FileHandler("process.log") fmt = '%(asctime)s - %(processName)s - %(levelname)s - %(funcName)s - %(message)s' formatter = logging.Formatter(fmt) fh.setFormatter(formatter) logger.addHandler(fh) return logger
from allennlp.data.token_indexers import PretrainedBertIndexer from matchmaker.dataloaders.bling_fire_tokenizer import BlingFireTokenizer from typing import Dict, Tuple, List # # Multiprocess input pipeline # ------------------------------- # # single epoch batch generators with multiple subprocesses, each subprocess works on its own file until the file is parsed completely # # - the processes have as little communication as possible (because it is prohibitly expensive in python) # - the finished batches go into shared memory and then the queue to be picked up by the train/validaton loops # mp.get_logger().setLevel( logging.WARNING) # ignore useless process start console logs mp.set_sharing_strategy( "file_system" ) # VERY MUCH needed for linux !! makes everything MUCH faster -> from 10 to 30+ batches/s fasttext_vocab_cached_mapping = None fasttext_vocab_cached_data = None # # we need to wrap the individual process queues, because they might be filled in different order # now we make sure to always get the same training samples in the same order for all runs # class DeterministicQueue(): def __init__(self, distributed_queues): self.distributed_queues = distributed_queues
import logging import os from queue import Empty from typing import Iterable, Iterator, List, Optional from torch.multiprocessing import JoinableQueue, Process, Queue, get_logger from allennlp.common.checks import ConfigurationError from allennlp.data.dataset import Batch from allennlp.data.dataset_readers.multiprocess_dataset_reader import QIterable from allennlp.data.instance import Instance from allennlp.data.iterators.data_iterator import DataIterator, TensorDict from allennlp.data.vocabulary import Vocabulary logger = get_logger() logger.setLevel(logging.INFO) def _create_tensor_dicts_from_queue(input_queue: Queue, output_queue: Queue, iterator: DataIterator, shuffle: bool, index: int) -> None: """ Pulls instances from ``input_queue``, converts them into ``TensorDict``s using ``iterator``, and puts them on the ``output_queue``. """ logger.info(f"Iterator worker: {index} PID: {os.getpid()}") def instances() -> Iterator[Instance]: instance = input_queue.get() while instance is not None: yield instance
from typing import Iterable, Iterator, List, Optional import logging from torch.multiprocessing import Manager, Process, Queue, get_logger from allennlp.common.checks import ConfigurationError from allennlp.data.instance import Instance from allennlp.data.iterators.data_iterator import DataIterator, TensorDict from allennlp.data.dataset import Batch from allennlp.data.vocabulary import Vocabulary logger = get_logger() # pylint: disable=invalid-name logger.setLevel(logging.INFO) def _create_tensor_dicts(input_queue: Queue, output_queue: Queue, iterator: DataIterator, shuffle: bool, index: int) -> None: """ Pulls at most ``max_instances_in_memory`` from the input_queue, groups them into batches of size ``batch_size``, converts them to ``TensorDict`` s, and puts them on the ``output_queue``. """ def instances() -> Iterator[Instance]: instance = input_queue.get() while instance is not None: yield instance instance = input_queue.get() for tensor_dict in iterator(instances(), num_epochs=1, shuffle=shuffle):
import numpy as np import logging import torch.multiprocessing as mp from torch.multiprocessing import Process, Queue, Pipe, Lock import time import os import io from spiketag.view import wave_view from spiketag.utils import Timer from vispy import app, keys import torch info = mp.get_logger().info # Create two instances of MyRect, each using canvas.scene as their parent nCh = 160 #32 channel number # Number of cols and rows in the table. # nrows = 16 # ncols = 2 # Number of channels. # m = nrows*ncols # Number of samples per channel. npts = int(20000) # Generate the signals as a (nCh, npts) array. init_data = np.random.randn(npts, nCh).astype(np.float32) init_data[:, 4] = 1 wview = wave_view(data=init_data, fs=25e3, chs=np.arange(160), pagesize=npts) @wview.connect