Esempio n. 1
0
def run(parser, args):
    # new code block: change the reference path within the args.toml file into the args.mindex path
    d = toml.load(args.toml)

    print(d["conditions"]["reference"])
    args.tomlfile = args.toml
    args.toml = toml.load(args.toml)
    print(args)

    # TODO: Move logging config to separate configuration file
    # set up logging to file
    logging.basicConfig(level=logging.DEBUG,
                        format='%(levelname)s::%(asctime)s::%(name)s::%(message)s',
                        filename=args.log_file,
                        filemode='w')

    # define a Handler that writes INFO messages or higher to the sys.stderr
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)

    # set a format which is simpler for console use
    formatter = logging.Formatter('%(name)-15s: %(levelname)-8s %(message)s')
    console.setFormatter(formatter)

    # add the handler to the root logger
    logging.getLogger('').addHandler(console)

    # Start by logging sys.argv and the parameters used
    logger = logging.getLogger("Manager")
    logger.info(" ".join(sys.argv))
    print_args(args, logger=logger)

    logger.info("Initialising iterAlign.")

    logger.info("Setting up FastQ monitoring.")

    #### Check if a run is active - if not, wait.

    args.simulation = True
    connection = None

    #set default message severity level.
    severity = 2

    if args.watch is None:
        args.simulation = False
        logger.info("Creating rpc connection for device {}.".format(args.device))
        try:
            connection, messageport = get_rpc_connection(args.device)
        except ValueError as e:
            print(e)
            sys.exit(1)

        #send_message_port("Iteralign Connected to MinKNOW", args.host, messageport)
        send_message(connection, "Iteralign Connected to MinKNOW.", Severity.WARN)

        logger.info("Loaded RPC")
        while parse_message(connection.acquisition.current_status())['status'] != "PROCESSING":
            time.sleep(1)
        ### Check if we know where data is being written to , if not... wait
        args.watch = parse_message(connection.acquisition.get_acquisition_info())['config_summary'][
            'reads_directory']

    else:
        messageport = ""

    event_handler = FastqHandler(args, logging, messageport, connection)
    # This block handles the fastq
    observer = Observer()
    observer.schedule(event_handler, path=args.watch, recursive=True)
    observer.daemon = True

    try:

        observer.start()
        logger.info("FastQ Monitoring Running.")
        while 1:
            time.sleep(1)

    except KeyboardInterrupt:

        logger.info("Exiting - Will take a few seconds to clean up!")

        observer.stop()
        observer.join()

        if args.keepfiles:
            logging.info("The 'keepfiles' argument was set, files generated by classifier have been retained")
        else:
            if os.path.isdir(args.path):
                for path, dirs, files in os.walk(args.path):
                    for f in files:
                        if f.startswith(args.prefix):
                            os.unlink(f)
                            logging.info("file removed: {}".format(f))

            if os.path.isdir("./"):
                for path, dirs, files in os.walk("./"):
                    for f in files:
                        if f.endswith(args.creport):
                            os.unlink(f)
                            logging.info("file removed: {}".format(f))

        logging.info("All files generated by classifier have been removed.")

        os._exit(0)
Esempio n. 2
0
def run(parser, args):
    args.tomlfile = args.toml
    args.toml = toml.load(args.toml)
    print(args)

    # TODO: Move logging config to separate configuration file
    # set up logging to file
    logging.basicConfig(
        level=logging.DEBUG,
        format='%(levelname)s::%(asctime)s::%(name)s::%(message)s',
        filename=args.log_file,
        filemode='w')

    # define a Handler that writes INFO messages or higher to the sys.stderr
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)

    # set a format which is simpler for console use
    formatter = logging.Formatter('%(name)-15s: %(levelname)-8s %(message)s')
    console.setFormatter(formatter)

    # add the handler to the root logger
    logging.getLogger('').addHandler(console)

    # Start by logging sys.argv and the parameters used
    logger = logging.getLogger("Manager")
    logger.info(" ".join(sys.argv))
    print_args(args, logger=logger)

    logger.info("Initialising iterAlign.")

    logger.info("Setting up FastQ monitoring.")

    #### Check if a run is active - if not, wait.

    args.simulation = True
    connection = None
    if args.watch is None:
        args.simulation = False
        logger.info("Creating rpc connection for device {}.".format(
            args.device))
        try:
            connection, messageport = get_rpc_connection(args.device)
        except ValueError as e:
            print(e)
            sys.exit(1)

        send_message(connection, "Iteralign Connected to MinKNOW",
                     Severity.WARN)

        logger.info("Loaded RPC")
        while parse_message(connection.acquisition.current_status()
                            )['status'] != "PROCESSING":
            time.sleep(1)
        #### Check if we know where data is being written to , if not... wait
        args.watch = parse_message(connection.acquisition.get_acquisition_info(
        ))['config_summary']['reads_directory']

    else:
        messageport = ""

    event_handler = FastqHandler(args, logging, messageport, connection)
    # This block handles the fastq
    observer = Observer()
    observer.schedule(event_handler, path=args.watch, recursive=True)
    observer.daemon = True

    try:

        observer.start()
        logger.info("FastQ Monitoring Running.")
        while 1:
            time.sleep(1)

    except KeyboardInterrupt:

        logger.info("Exiting - Will take a few seconds to clean up!")

        observer.stop()
        observer.join()

        os._exit(0)
Esempio n. 3
0
    def __init__(
        self,
        mk_host="127.0.0.1",
        mk_port=9501,
        device=None,
        cache_size=512,
        cache_type="ReadCache",
        signal_calibration=False,
        filter_strands=True,
        one_chunk=False,
        pre_filter_classes=None,
        reload_rpc=True,
        log_file=None,
    ):
        """A basic Read Until client.

        This class handles the interactions with the MinKNOW gRPC stream.
        It requires a thread-safe queue/cache to operate. There are two
        prodived in `read_cache.py`.

        Parameters
        ----------
        mk_host : str
            The host to connect to MinKNOW on, default: "127.0.0.1"
        mk_port : int
            The insecure channel port for MinKNOW, default: 9501
        device : str
            The device to get the connection for. E.G MinION: MN18458, GridION:
            GA10000, PromethION: 1-A1-D1
        cache_size : int
            The maximum size of the read cache, default: 512
        cache_type : str or class
            The cache for managing incoming data from the gRPC.
            If a string is provided that cache will be loaded from read_cache.py.
            Otherwise, if a class is provided it will be used as the cache. See
            read_cache.py for descriptions and requirements of the cache.
        signal_calibration : bool
            Request calibrated or uncalibrated signal from the gRPC, default: False
        filter_strands : bool
            Filter incoming data for only strand like classifications. If True
            strand classes must be provided in pre_filter_classes, default: True
        one_chunk : bool
            default: True
        pre_filter_classes : set (or iterable)
            Classes to filter reads by. Ignored if `filter_strands` is False,
            default: {'strand', 'adapter'}
        reload_rpc : bool
            Repload the RPC when initiating the client, default: True
        log_file : str
            Filepath to log messages to if not provided use console, default: None

        Examples
        --------

        To set up and use a client:

        >>> read_until_client = ReadUntilClient()

        This creates an initial connection to a MinKNOW instance in
        preparation for setting up live reads stream. To initiate the stream:

        >>> read_until_client.run()

        The client is now recieving data and can send calls to methods
        of `read_until_client` can then be made in a separate thread.
        For example an continually running analysis function can be
        submitted to the executor as:

        >>> def analysis(client, *args, **kwargs):
        ...     while client.is_running:
        ...         for channel, read in client.get_read_chunks():
        ...             raw_data = np.fromstring(read.raw_data, client.signal_dtype)
        ...             # do something with raw data... and maybe call:
        ...             #    client.stop_receiving_read(channel, read.number)
        ...             #    client.unblock_read(channel, read.number)
        >>> with ThreadPoolExecutor() as executor:
        ...     executor.submit(analysis_function, read_until_client)

        To stop processing the gRPC read stream:

        >>> read_until_client.reset()

        If an analysis function is set up as above in response to
        `client.is_running`, calling the above call will cause the
        analysis function to return.

        """
        # TODO: infer flowcell size from device we get back for cache size
        #  eg: cache_size becomes "infer" or int
        #  c = self.connection
        #  len(parse_message(c.device.get_channels_layout())['channel_records'])
        if pre_filter_classes is None:
            pre_filter_classes = {"strand", "adapter"}

        self.logger = setup_logger(
            __name__,
            # "ReadUntilClient_v2",
            log_file=log_file,
            log_format="%(asctime)s %(name)s %(message)s",
            level=logging.INFO,
        )
        self.device = device
        self.mk_host = mk_host
        self.mk_port = mk_port
        self.reload_rpc = reload_rpc
        self.cache_size = cache_size

        # Alternatively, check that cache is sub of BaseCache
        if isinstance(cache_type, str):
            current_package = vars(sys.modules[__name__])["__package__"]
            self.CacheType = _import(
                "{}.read_cache.{}".format(current_package, cache_type)
            )
        else:
            self.CacheType = cache_type

        self.filter_strands = filter_strands
        self.one_chunk = one_chunk
        self.pre_filter_classes = pre_filter_classes

        if self.filter_strands and not self.pre_filter_classes:
            raise ValueError("Read filtering set but no filter classes given.")

        self.logger.info(
            "Client type: {} chunk".format("single" if self.one_chunk else "many")
        )
        self.logger.info("Cache type: {}".format(self.CacheType.__name__))

        pre_filter_classes_str = "no filter"
        if self.pre_filter_classes:
            pre_filter_classes_str = nice_join(self.pre_filter_classes, " ", "and")

        self.logger.info("Filter for classes: {}".format(pre_filter_classes_str))

        self.strand_classes = set(
            int(k)
            for k, v in CLASS_MAP["read_classification_map"].items()
            if v in self.pre_filter_classes
        )
        self.logger.debug("Strand-like classes are: {}.".format(self.strand_classes))
        self.logger.info("Creating rpc connection for device {}.".format(self.device))

        # try:
        #     from . import rpc
        #
        #     rpc._load()
        # except Exception as e:
        #     self.logger.warning("RPC module not found\n{}".format(e))
        #     self.logger.info("Attempting to load RPC")

        self.connection, self.message_port = get_rpc_connection(
            target_device=self.device,
            host=self.mk_host,
            port=self.mk_port,
            reload=self.reload_rpc,
        )

        self.logger.info("Loaded RPC")
        self.msgs = self.connection.data._pb

        log_waiting = True
        while parse_message(self.connection.acquisition.current_status())["status"] != "PROCESSING":
            if log_waiting:
                self.logger.info("Waiting for device to start processing")
                log_waiting = False

        self.mk_run_dir = Path(parse_message(
            self.connection.protocol.get_current_protocol_run()
        )["output_path"])

        # Create the output dir if it doesn't already exist
        # Sometimes we are faster than MinKNOW, this isn't a problem on OS X

        if self.mk_host in ("127.0.0.1","localhost"):
            #we are running locally so:
            self.mk_run_dir.mkdir(parents=True, exist_ok=True)
            self.unblock_logger = setup_logger(
                # Necessary to use a str of the Path for 3.5 compatibility
                "unblocks",
                log_file=str(self.mk_run_dir / "unblocked_read_ids.txt"),
            )
        else:
            #we are running remotely:
            self.mk_run_dir=Path(".")
            self.unblock_logger = setup_logger(
                # Necessary to use a str of the Path for 3.5 compatibility
                "unblocks",
                log_file="unblocked_read_ids.txt",
            )

        # Get signal calibrations
        self.calibration, self.calibration_dtype = {
            True: (self.msgs.GetLiveReadsRequest.CALIBRATED, "calibrated_signal",),
            False: (self.msgs.GetLiveReadsRequest.UNCALIBRATED, "uncalibrated_signal",),
        }.get(signal_calibration)

        _data_types = parse_message(self.connection.data.get_data_types())[
            self.calibration_dtype
        ]

        _signal_dtype = {
            "FLOATING_POINT": {2: "float16", 4: "float32"},
            "SIGNED_INTEGER": {2: "int16", 4: "int32"},
            "UNSIGNED_INTEGER": {2: "uint16", 4: "uint32"},
        }.get(_data_types["type"], {}).get(_data_types["size"], None)

        if _signal_dtype is not None:
            self.signal_dtype = np.dtype(_signal_dtype)
        else:
            raise NotImplementedError("Unrecognized signal dtype")

        self.logger.info("Signal data-type: {}".format(self.signal_dtype))
        # setup the queues and running status
        self._process_thread = None
        self.reset()