Ejemplo n.º 1
0
 def parse(self):
     """Parse file and store information in self."""
     if isGzip(self.filepath):
         with gzip.open(self.filepath, "rt") as FH:
             self._parseFileHandle(FH)
     else:
         with open(self.filepath, "r") as FH:
             self._parseFileHandle(FH)
Ejemplo n.º 2
0
    def nbSeq(filepath):
        """
        Return the number of sequences in file.

        :param filepath: Path to the file.
        :type filepath: str
        :return: The number of sequences.
        :rtype: int
        """
        handler = open
        handler_options = "r"
        if isGzip(filepath):
            handler = gzip.open
            handler_options = "rt"
        nb_lines = 0
        with handler(filepath, handler_options) as reader:
            for line in reader:
                nb_lines += 1
        return int(nb_lines / 4)
Ejemplo n.º 3
0
    def nbSeq(filepath):
        """
        Return the number of sequences in file.

        :param filepath: Path to the file.
        :type filepath: str
        :return: The number of sequences.
        :rtype: int
        """
        nb_seq = 0
        handler = open
        handler_options = "r"
        if isGzip(filepath):
            handler = gzip.open
            handler_options = "rt"
        with handler(filepath, handler_options) as reader:
            for line in reader:
                if line.startswith(">"):
                    nb_seq += 1
        return nb_seq
Ejemplo n.º 4
0
    def __init__(self, filepath, mode="r"):
        """
        Build and return an instance of FastqIO.

        :param filepath: Path to the file.
        :type filepath: str
        :param mode: Mode to open the file ('r', 'w', 'a').
        :type mode: str
        :return: The new instance.
        :rtype: FastqIO
        """
        self.filepath = filepath
        self.mode = mode
        if (mode in ["w", "a"]
                and filepath.endswith('.gz')) or (mode not in ["w", "a"]
                                                  and isGzip(filepath)):
            self.file_handle = gzip.open(filepath, mode + "t")
        else:
            self.file_handle = open(filepath, mode)
        self.current_line_nb = 1
Ejemplo n.º 5
0
    def nbSeqAndNt(filepath):
        """
        Return the number of sequences and nucleotids in file.

        :param filepath: Path to the file.
        :type filepath: str
        :return: The number of sequences and the number of nucleotids.
        :rtype: int, int
        """
        nb_seq = 0
        nb_nt = 0
        handler = open
        handler_options = "r"
        if isGzip(filepath):
            handler = gzip.open
            handler_options = "rt"
        with handler(filepath, handler_options) as reader:
            for line in reader:
                if line.startswith(">"):
                    nb_seq += 1
                else:
                    nb_nt += len(line.rstrip())
        return nb_seq, nb_nt
Ejemplo n.º 6
0
    # Logger
    logging.basicConfig(
        format=
        '%(asctime)s -- [%(filename)s][pid:%(process)d][%(levelname)s] -- %(message)s'
    )
    log = logging.getLogger(os.path.basename(__file__))
    log.setLevel(logging.INFO)
    log.info("Command: " + " ".join(sys.argv))

    # Process
    out_open_fct = open
    out_mode = "w"
    if args.output.endswith('.gz'):
        out_open_fct = gzip.open
        out_mode = "wt"
    with out_open_fct(args.output, out_mode) as writer:
        last_line = "\n"
        for curr_in_file in args.inputs:
            if not last_line.endswith("\n"):
                writer.write("\n")  # Start new line for a new file
            in_open_fct = open
            in_mode = "r"
            if isGzip(curr_in_file):
                in_open_fct = gzip.open
                in_mode = "rt"
            with in_open_fct(curr_in_file, in_mode) as reader:
                for line in reader:
                    writer.write(line)
                    last_line = line
    log.info("End of job")