コード例 #1
0
def check_fastq_files(filenames, required_offset, allow_empty=False):
    stats = {
        "seq_retained_nts": 0,
        "seq_retained_reads": 0,
        "seq_collapsed": 0,
    }

    for file_type, filename in filenames:
        qualities = _read_sequences(file_type, filename, stats)
        offsets = fastq.classify_quality_strings(qualities)
        if offsets == fastq.OFFSET_BOTH:
            raise NodeError("FASTQ file contains quality scores with both "
                            "quality offsets (33 and 64); file may be "
                            "unexpected format or corrupt. Please ensure "
                            "that this file contains valid FASTQ reads from a "
                            "single source.\n    Filename = %r" % (filename, ))
        elif offsets == fastq.OFFSET_MISSING:
            if allow_empty and not qualities:
                continue

            raise NodeError("FASTQ file did not contain quality scores; file "
                            "may be unexpected format or corrupt. Ensure that "
                            "the file is a FASTQ file.\n    Filename = %r" %
                            (filename, ))
        elif offsets not in (fastq.OFFSET_AMBIGIOUS, required_offset):
            raise NodeError("FASTQ file contains quality scores with wrong "
                            "quality score offset (%i); expected reads with "
                            "quality score offset %i. Ensure that the "
                            "'QualityOffset' specified in the makefile "
                            "corresponds to the input.\n    Filename = %s" %
                            (offsets, required_offset, filename))

    return stats
コード例 #2
0
ファイル: validation.py プロジェクト: tmancill/paleomix
def _validate_fasta_line(filename, linenum, line):
    invalid_chars = frozenset(line) - _VALID_CHARS
    if invalid_chars:
        if invalid_chars == frozenset("\r"):
            raise NodeError("FASTA file contains carriage-returns ('\\r')!\n"
                            "Please convert file to unix format, using e.g. "
                            "dos2unix.\n    Filename = %r\n" % (filename, ))

        raise NodeError("FASTA sequence contains invalid characters\n"
                        "    Filename = %r\n    Line = %r\n"
                        "    Invalid characters = %r" %
                        (filename, linenum, "".join(invalid_chars)))
コード例 #3
0
ファイル: phylip.py プロジェクト: jelber2/paleomix
def _read_sequences(filename):
    """Collects the sequences from a PHYLIP file, and returns the header,
    the names of the sequences, and the sequences themselves. The parser
    supports interleaved sequences (as produced by the pipeline), or simple
    sequential (each paired name and sequence on a single line) as produced
    by RAxML's reduce functionality. PHYLIP files containing multiple entries
    are not supported."""
    line, header = " ", None
    with open(filename) as handle:
        # Find header
        num_sequences = num_bases = 0
        while line:
            line = handle.readline()
            if line.strip():
                header = line
                num_sequences, num_bases = map(int, line.split())
                break

        names = [None for _ in xrange(num_sequences)]
        sequences = [[] for _ in xrange(num_sequences)]

        line_num = 0
        while line:
            line = handle.readline()
            line_strip = line.strip()
            if line_strip:
                # The first N sequences are expected to contain sample names
                index = line_num % num_sequences
                if line_num < num_sequences:
                    name, line_strip = line_strip.split(None, 1)
                    names[index] = name

                sequences[index].extend(line_strip.split())
                line_num += 1

    if len(sequences) != num_sequences:
        message = ("Expected %i sequences, but found %i in PHYLIP file:\n"
                   "    Filename = %r") % (num_sequences, len(sequences),
                                           filename)
        raise NodeError(message)

    for (index, fragments) in enumerate(sequences):
        sequences[index] = "".join(fragments)
        if len(sequences[index]) != num_bases:
            message = ("Expected %ibp sequences, found %ibp sequence for %r\n"
                       " Filename = %r") % (num_bases, len(
                           sequences[index]), names[index], filename)
            raise NodeError(message)

    return header, names, sequences
コード例 #4
0
def _read_sequences(file_type, filename, stats):
    cat_call = factory.new("cat")
    cat_call.add_multiple_values((filename, ))
    cat_call = cat_call.finalized_call

    cat = None
    try:
        cat = procs.open_proc(cat_call,
                              bufsize=io.DEFAULT_BUFFER_SIZE,
                              stderr=procs.PIPE,
                              stdout=procs.PIPE)
        qualities = _collect_qualities(cat.stdout, file_type, filename, stats)

        return sampling.reservoir_sampling(qualities, 100000)
    except StandardError as error:
        if cat:
            try:
                cat.kill()
            except OSError:
                pass
            cat.wait()
            cat = None
        raise error
    finally:
        rc_cat = cat.wait() if cat else 0
        if rc_cat:
            message = "Error running 'paleomix cat':\n" \
                      "  Unicat return-code = %i\n\n%s" \
                      % (rc_cat, cat.stderr.read())
            raise NodeError(message)
コード例 #5
0
 def _teardown(self, config, temp):
     # Validate output from MAFFT
     output_file = reroot_path(temp, self._output_file)
     try:
         MSA.from_file(output_file)
     except MSAError, error:
         raise NodeError("Invalid MSA produced by MAFFT:\n%s" % (error,))
コード例 #6
0
ファイル: sequences.py プロジェクト: jelber2/paleomix
 def _report_failure(cls, bed, fragment):
     message = "Failed to extract region from " \
               "reference sequence at %s:%i-%i; got " \
               "%i bp, but expected %i bp." \
               % (bed.contig, bed.start, bed.end,
                  len(fragment), (bed.end - bed.start))
     raise NodeError(message)
コード例 #7
0
    def _setup(self, config, temp):
        with open(os.path.join(temp, "contigs.table"), "w") as handle:
            handle.write("ID\tSize\tNs\tHits\n")

            # Workaround for pysam < 0.9 returning list, >= 0.9 returning str
            for line in "".join(pysam.idxstats(self._input_file)).split('\n'):
                line = line.strip()
                if not line:
                    continue

                name, size, hits, _ = line.split('\t')
                name = contig_name_to_plink_name(name)
                if name is None or not (name.isdigit() or name == 'X'):
                    continue
                elif name not in self._contigs:
                    # Excluding contigs is allowed
                    continue

                if int(size) != self._contigs[name]['Size']:
                    raise NodeError(
                        "Size mismatch between database and BAM; "
                        "expected size %i, found %i for contig %r" %
                        (int(size), self._contigs[name]['Size'], name))

                row = {
                    'ID': name,
                    'Size': self._contigs[name]['Size'],
                    'Ns': self._contigs[name]['Ns'],
                    'Hits': hits,
                }

                handle.write('{ID}\t{Size}\t{Ns}\t{Hits}\n'.format(**row))

        CommandNode._setup(self, config, temp)
コード例 #8
0
ファイル: validation.py プロジェクト: tmancill/paleomix
    def _throw_node_error(self, chrom, pos, records, name, seq, qual):
        message = [
            "The same read was found multiple times at position %i on %r!" %
            (pos, chrom),
            "    Name:      %r" % (name, ),
            "    Sequence:  %r" % (seq, ),
            "    Qualities: %r" % (qual, ),
            "",
        ]

        message.append("Read was found in these BAM files:")
        for filename, records in sorted(records.items()):
            message.append("   %s in %r" %
                           (_summarize_reads(records), filename))

        message.append("")
        message.append("This indicates that the same data has been "
                       "included multiple times in the project. This "
                       "can be because multiple copies of the same "
                       "files were used, or because one or more files "
                       "contain multiple copies of the same reads. "
                       "The command 'paleomix dupcheck' may be used "
                       "to review the potentially duplicated data in "
                       "these BAM files.\n\n"
                       "If this error was a false positive, then you "
                       "may execute the following command(s) to mark "
                       "this test as having succeeded:")

        for fpath in self.output_files:
            message.append("$ touch '%s'" % (fpath, ))

        raise NodeError("\n".join(message))
コード例 #9
0
ファイル: phylip.py プロジェクト: jelber2/paleomix
def _read_partitions(filename):
    """Read a partition file, as produced by the pipeline itself, and
    returns a list of tuples containing the (start, end) coordinates;
    each line is expected to follow the following format:

    DNA, Name = Start-End

    Multiple regions, or skips are not supported."""
    partitions = []
    with open(filename) as handle:
        for (line_num, line) in enumerate(handle):
            result = _RE_PARTITION.match(line.rstrip())
            if result:
                start, end = result.groups()
            else:
                result = _RE_PARTITION_SINGLE.match(line.rstrip())
                if not result:
                    message = ("Line %i in partitions file does not follow "
                               "expected format:\n"
                               "  Expected, either = 'DNA, Name = Start-End'\n"
                               "                or = 'DNA, Name = Start'\n"
                               "  Found = %r") % (line_num, line.rstrip())
                    raise NodeError(message)
                start, = result.groups()
                end = start

            partitions.append((int(start) - 1, int(end)))
    return partitions
コード例 #10
0
ファイル: validation.py プロジェクト: tmancill/paleomix
def _validate_fasta_header(filename, linenum, line, cache):
    name = line.split(" ", 1)[0][1:]
    if not name:
        raise NodeError("FASTA sequence must have non-empty name\n"
                        "    Filename = %r\n    Line = %r\n" %
                        (filename, linenum))
    elif not _RE_REF_NAME.match(name):
        raise NodeError("Invalid name for FASTA sequence: %r\n"
                        "    Filename = %r\n    Line = %r\n" %
                        (name, filename, linenum))
    elif name in cache:
        raise NodeError("FASTA sequences have identical name\n"
                        "    Filename = %r\n    Name = %r\n"
                        "    Line 1 = %r\n    Line 2 = %r\n" %
                        (filename, name, linenum, cache[name]))
    cache[name] = linenum
コード例 #11
0
    def _read_admixture_log(cls, filename):
        with open(filename) as handle:
            for line in handle:
                if line.startswith("Loglikelihood:"):
                    return float(line.split()[1])

            raise NodeError("Could not find likelihood value in log-file %r; "
                            "looking for line starting with 'Loglikelihood:'" %
                            (filename, ))
コード例 #12
0
ファイル: paml.py プロジェクト: jelber2/paleomix
 def _run(self, config, temp):
     try:
         CommandNode._run(self, config, temp)
     except NodeError, error:
         if self._command.join() == [1, None]:
             with open(fileutils.reroot_path(temp,
                                             "template.stdout")) as handle:
                 lines = handle.readlines()
             if lines and ("Giving up." in lines[-1]):
                 error = NodeError("%s\n\n%s" % (error, lines[-1]))
         raise error
コード例 #13
0
    def customize(cls,
                  input_file_1,
                  input_file_2,
                  output_file,
                  reference,
                  prefix,
                  threads=2,
                  log_file=None,
                  dependencies=()):

        # Setting IN_FILE_2 to None makes AtomicCmd ignore this key
        aln = _bowtie2_template(("bowtie2", ),
                                prefix,
                                OUT_STDOUT=AtomicCmd.PIPE,
                                CHECK_VERSION=BOWTIE2_VERSION)

        aln.set_option("-x", prefix)

        if log_file is not None:
            aln.set_kwargs(OUT_STDERR=log_file)

        if input_file_1 and not input_file_2:
            aln.add_multiple_options("-U",
                                     safe_coerce_to_tuple(input_file_1),
                                     template="IN_FILE_1_%02i")
        elif input_file_1 and input_file_2:
            aln.add_multiple_options("-1",
                                     safe_coerce_to_tuple(input_file_1),
                                     template="IN_FILE_1_%02i")
            aln.add_multiple_options("-2",
                                     safe_coerce_to_tuple(input_file_2),
                                     template="IN_FILE_2_%02i")
        else:
            raise NodeError("Input 1, OR both input 1 and input 2 must "
                            "be specified for Bowtie2 node")

        max_threads = _get_max_threads(reference, threads)
        aln.set_option("--threads", max_threads)

        run_fixmate = input_file_1 and input_file_2
        order, commands = _process_output(aln,
                                          output_file,
                                          reference,
                                          run_fixmate=run_fixmate)
        commands["aln"] = aln

        return {
            "commands": commands,
            "order": ["aln"] + order,
            "threads": max_threads,
            "dependencies": dependencies
        }
コード例 #14
0
def _collect_qualities(handle, file_type, filename, stats):
    header = handle.readline()
    while header:
        sequence = handle.readline()
        seperator = handle.readline()
        qualities = handle.readline()

        if not header.startswith("@"):
            if header.startswith(">"):
                raise NodeError("Input file appears to be in FASTA format "
                                "(header starts with '>', expected '@'), "
                                "but only FASTQ files are supported\n"
                                "Filename = %r" % (filename, ))

            raise NodeError("Input file lacks FASTQ header (expected '@', "
                            "found %r), but only FASTQ files are supported\n"
                            "    Filename = %r" % (header[:1], filename))
        elif not qualities:
            raise NodeError("Partial record found; is not 4 lines long:\n"
                            "Filename = %r\n    Record = '%s'" %
                            (filename, header.rstrip()))
        elif not seperator.startswith("+"):
            raise NodeError("Input file lacks FASTQ seperator (expected '+', "
                            "found %r), but only FASTQ files are supported\n"
                            "    Filename = %r" % (seperator[:1], filename))
        elif len(sequence) != len(qualities):
            raise NodeError("Input file contains malformed FASTQ records; "
                            "length of sequence / qualities are not the "
                            "same.\n    Filename = %r\n    Record = '%s'" %
                            (filename, header.rstrip()))

        stats["seq_retained_nts"] += len(sequence)
        stats["seq_retained_reads"] += 1

        if "Collapsed" in file_type:
            stats["seq_collapsed"] += 1

        yield qualities
        header = handle.readline()
コード例 #15
0
ファイル: mapdamage.py プロジェクト: jelber2/paleomix
 def _run(self, config, temp):
     try:
         CommandNode._run(self, config, temp)
     except NodeError, error:
         err_message = "DNA damage levels are too low"
         if self._command.join() == [1]:
             fpath = os.path.join(temp, "pipe_mapDamage.stdout")
             with open(fpath) as handle:
                 for line in handle:
                     if err_message in line:
                         line = line.strip().replace("Warning:", "ERROR:")
                         error = NodeError("%s\n\n%s" % (error, line))
                         break
         raise error
コード例 #16
0
ファイル: bwa.py プロジェクト: jelber2/paleomix
def _check_bwa_prefix(prefix):
    """Checks that a given prefix is compatible with the currently
    installed version of BWA. This is required in order to allow
    auto-indexing of prefixes, as indexes produced by v0.5.x and
    by 0.6+ are not only incompatible, but differs in the files
    produced, with 0.5.x producing a handful of additional files.

    As a consequence, simply using normal input-file dependencies
    would result in prefixes being re-indexed if the version of
    BWA was changed from 0.6+ to 0.5.x, and in failures during
    runtime if the version was changed from 0.5.x to 0.6+.

    This function treats that a difference in the version of BWA
    installed and the version implied by the prefix files is an
    error, and therefore requires user intervention."""
    if prefix in _PREFIXES_CHECKED:
        return
    _PREFIXES_CHECKED.add(prefix)

    try:
        bwa_version = BWA_VERSION.version
    except versions.VersionRequirementError:
        return  # Ignored here, reported elsewhere

    # Files unique to v0.5.x
    v05x_files = set((prefix + ext) for ext in (".rbwt", ".rpac", ".rsa"))
    # Files common to v0.5.x, v0.6.x, and v0.7.x
    common_files = set((prefix + ext)
                       for ext in (".amb", ".ann", ".bwt", ".pac", ".sa"))
    all_files = v05x_files | common_files
    current_files = all_files - set(missing_files(all_files))

    expected_version = None
    if (current_files & common_files):
        if bwa_version >= (0, 6, 0):
            if (current_files & v05x_files):
                expected_version = "v0.5.x"
        elif bwa_version < (0, 6, 0):
            if not (current_files & v05x_files):
                expected_version = "v0.6.x or later"

    if expected_version:
        raise NodeError("BWA version is v%s, but prefix appears to be created using %s!\n"
                        "  Your copy of BWA may have changed, or you may be using the wrong\n"
                        "  prefix. To resolve this issue, either change your prefix, re-install\n"
                        "  BWA %s, or remove the prefix files at\n"
                        "    $ ls %s.*"
                        % (".".join(map(str, bwa_version)), expected_version, expected_version, prefix))
コード例 #17
0
    def _run(self, config, temp):
        likelihoods = []
        for fileset in self._files:
            for filename in fileset:
                if filename.endswith(".log"):
                    likelihoods.append(
                        (self._read_admixture_log(filename), fileset))
                    break
            else:
                raise NodeError("No log-file found in list of admixture "
                                "output-files: %r" % (fileset, ))

        _, fileset = max(likelihoods)
        for src_filename in fileset:
            dst_filename = fileutils.reroot_path(self._output_root,
                                                 src_filename)
            fileutils.copy_file(src_filename, dst_filename)
コード例 #18
0
def _check_bwa_prefix(prefix):
    """Checks that a given prefix is compatible with the currently required version of
    BWA. Older index files are incompatible with BWA v0.7.x, but may be identified by
    the presense of a small number of additional files not present when an index is
    produced using BWA v0.7.x.
    """
    if any(
            os.path.exists(prefix + ext)
            for ext in (".rbwt", ".rpac", ".rsa.")):
        filenames = "\n".join("    %s.%s" % (prefix, ext)
                              for ext in ("amb", "ann", "bwt", "pac", "sa",
                                          "rbwt", "rpac", "rsa"))

        raise NodeError(
            "Prefix appears to be created using BWA v0.5.x or older, but PALEOMIX only "
            "supports BWA v0.7.x or later.\nPlease remove the following files to allow "
            "PALEOMIX to re-index the FASTA file:\n%s" % (filenames, ))
コード例 #19
0
ファイル: sequences.py プロジェクト: jelber2/paleomix
    def _setup(self, _config, _temp):
        for filename in self._infiles.itervalues():
            with open(filename + ".fai") as handle:
                sequences = set()
                for line in handle:
                    sequences.add(line.split("\t", 1)[0])

                missing_sequences = list(self._sequences - sequences)
                if missing_sequences:
                    if len(missing_sequences) >= 4:
                        missing_sequences = missing_sequences[:3]
                        missing_sequences.append("...")

                    message = ("FASTA file does not contain expected "
                               "sequences:\n  File =  %r\n  "
                               "Sequences = %s\n") \
                        % (filename, ", ".join(missing_sequences))
                    raise NodeError(message)
コード例 #20
0
ファイル: bedtools.py プロジェクト: jelber2/paleomix
    def _run(self, config, temp):
        contigs = {}
        with open(self._fai_file) as handle:
            for line in handle:
                name, length, _ = line.split('\t', 2)
                if name in contigs:
                    raise NodeError('Reference genome contains multiple '
                                    'identically named contigs (%r)!' %
                                    (name, ))

                contigs[name] = int(length)

        with open(reroot_path(temp, self._outfile), 'w') as handle:
            for record in read_bed_file(self._infile, contigs=contigs):
                max_length = contigs[record.contig]
                record.start = max(0, record.start - self._amount)
                record.end = min(record.end + self._amount, max_length)

                handle.write('%s\n' % (record, ))
コード例 #21
0
    def _read_coverage_tables(cls, key, filenames):
        hits = nts = 0
        for filename in filenames:
            subtable = {}
            read_coverage_table(subtable, filename)
            contigtables = get_in(subtable, key)

            if contigtables is None:
                raise NodeError("Error reading table %r; row not found:"
                                "\n   %s   ...\n\nIf files have been renamed "
                                "during the run, then please remove this file "
                                "in that it may be re-generated.\nHowever, "
                                "note that read-group tags in the BAM files "
                                "may not be correct!" %
                                (filename, "   ".join(key)))

            for contigtable in contigtables.itervalues():
                hits += contigtable["Hits"]
                nts += contigtable["M"]
        return hits, nts
コード例 #22
0
    def _run(self, config, temp):
        contigs = {}
        with open(self._fai_file) as handle:
            for line in handle:
                name, length, _ = line.split("\t", 2)
                if name in contigs:
                    raise NodeError("Reference genome contains multiple "
                                    "identically named contigs (%r)!" %
                                    (name, ))

                contigs[name] = int(length)

        with open(reroot_path(temp, self._outfile), "w") as handle:
            records = list(read_bed_file(self._infile, contigs=contigs))
            pad_bed_records(records=records,
                            padding=self._amount,
                            max_sizes=contigs)

            for record in merge_bed_records(records):
                handle.write("%s\n" % (record, ))
コード例 #23
0
    node._remove_temp_dir = node_mock._remove_temp_dir

    node.run(cfg_mock)

    node_mock.mock_calls == [
        call._create_temp_dir(cfg_mock),
        call._setup(cfg_mock, _DUMMY_TEMP),
        call._run(cfg_mock, _DUMMY_TEMP),
        call._teardown(cfg_mock, _DUMMY_TEMP),
        call._remove_temp_dir(_DUMMY_TEMP),
    ]


_EXCEPTIONS = (
    (TypeError("The castle AAARGH!"), NodeUnhandledException),
    (NodeError("He's a very naughty boy!"), NodeError),
)


@pytest.mark.parametrize("key", ("_setup", "_run", "_teardown"))
@pytest.mark.parametrize("exception, expectation", _EXCEPTIONS)
def test_run__exceptions(key, exception, expectation):
    mock = Mock()
    node = Node()
    node._create_temp_dir = mock._create_temp_dir
    node._create_temp_dir.return_value = _DUMMY_TEMP

    setattr(node, key, getattr(mock, key))
    getattr(node, key).side_effect = exception

    cfg_mock = Mock(temp_root=_DUMMY_TEMP_ROOT)
コード例 #24
0
    def __init__(
            self,
            input_file_1,
            input_file_2,
            output_file,
            reference,
            prefix,
            threads=2,
            log_file=None,
            mapping_options={},
            cleanup_options={},
            dependencies=(),
    ):
        # Setting IN_FILE_2 to None makes AtomicCmd ignore this key
        aln = _bowtie2_template(
            ("bowtie2", ),
            prefix,
            OUT_STDOUT=AtomicCmd.PIPE,
            CHECK_VERSION=BOWTIE2_VERSION,
        )

        aln.set_option("-x", prefix)

        if log_file is not None:
            aln.set_kwargs(OUT_STDERR=log_file)

        if input_file_1 and not input_file_2:
            aln.add_option("-U", input_file_1)
        elif input_file_1 and input_file_2:
            aln.add_option("-1", input_file_1)
            aln.add_option("-2", input_file_2)
        else:
            raise NodeError("Input 1, OR both input 1 and input 2 must "
                            "be specified for Bowtie2 node")

        max_threads = _get_max_threads(reference, threads)
        aln.set_option("--threads", max_threads)

        cleanup = _new_cleanup_command(aln,
                                       output_file,
                                       reference,
                                       paired_end=input_file_1
                                       and input_file_2)

        apply_options(aln, mapping_options)
        apply_options(cleanup, cleanup_options)

        algorithm = "PE" if input_file_2 else "SE"
        description = _get_node_description(
            name="Bowtie2",
            algorithm=algorithm,
            input_files_1=input_file_1,
            input_files_2=input_file_2,
            prefix=prefix,
            threads=threads,
        )

        CommandNode.__init__(
            self,
            command=ParallelCmds([aln.finalize(),
                                  cleanup.finalize()]),
            description=description,
            threads=threads,
            dependencies=dependencies,
        )
コード例 #25
0
ファイル: validation.py プロジェクト: tmancill/paleomix
def check_fasta_file(filename):
    with open(filename) as handle:
        namecache = {}
        state, linelength, linelengthchanged = _NA, None, False
        for linenum, line in enumerate(handle, start=1):
            # Only \n is allowed as not all tools  handle \r
            line = line.rstrip("\n")

            if not line:
                if state in (_NA, _IN_WHITESPACE):
                    continue
                elif state == _IN_HEADER:
                    raise NodeError("Expected FASTA sequence, found empty line"
                                    "\n    Filename = %r\n    Line = %r" %
                                    (filename, linenum))
                elif state == _IN_SEQUENCE:
                    state = _IN_WHITESPACE
                else:
                    assert False
            elif line.startswith(">"):
                if state in (_NA, _IN_SEQUENCE, _IN_WHITESPACE):
                    _validate_fasta_header(filename, linenum, line, namecache)
                    state = _IN_HEADER
                    linelength = None
                    linelengthchanged = False
                elif state == _IN_HEADER:
                    raise NodeError("Empty sequences not allowed\n"
                                    "    Filename = %r\n    Line = %r" %
                                    (filename, linenum - 1))
                else:
                    assert False
            else:
                if state == _NA:
                    raise NodeError("Expected FASTA header, found %r\n"
                                    "    Filename = %r\n    Line = %r" %
                                    (line, filename, linenum))
                elif state == _IN_HEADER:
                    _validate_fasta_line(filename, linenum, line)
                    linelength = len(line)
                    state = _IN_SEQUENCE
                elif state == _IN_SEQUENCE:
                    _validate_fasta_line(filename, linenum, line)
                    # If the length has changed, then that line must be the
                    # last line in the record, which may be shorter due to the
                    # sequence length. This is because the FAI index format
                    # expects that each line has the same length.
                    if linelengthchanged or (linelength < len(line)):
                        raise NodeError("Lines in FASTQ files must be of same "
                                        "length\n    Filename = %r\n"
                                        "    Line = %r" % (filename, linenum))
                    elif linelength != len(line):
                        linelengthchanged = True
                elif state == _IN_WHITESPACE:
                    raise NodeError("Empty lines not allowed in sequences\n"
                                    "    Filename = %r\n    Line = %r" %
                                    (filename, linenum))
                else:
                    assert False

        if state == _NA:
            raise NodeError("File does not contain any sequences:\n"
                            "    Filename = %r" % (filename, ))
        elif state == _IN_HEADER:
            raise NodeError("File ends with an empty sequence:\n"
                            "    Filename = %r" % (filename, ))