Ejemplo n.º 1
0
def main():
    input_filename = sys.argv[1]
    output_filename = sys.argv[2]
    input_type = sys.argv[3] or 'sanger'  # input type should ordinarily be unnecessary

    num_reads = None
    fastq_read = None
    out = fastaWriter(path=output_filename, format="fasta")
    for num_reads, fastq_read in enumerate(fastqReader(path=input_filename, format=input_type)):
        out.write(fastq_read)
    out.close()
    if num_reads is None:
        print("No valid FASTQ reads could be processed.")
    else:
        print("%i FASTQ reads were converted to FASTA." % (num_reads + 1))
Ejemplo n.º 2
0
                    record.quality = record.quality[:cut]
                    clipped += 1
                    writer.write(record)
                else:
                    short_clipped += 1
            elif keep_negatives:
                if len(record) >= min_len:
                    negs += 1
                    writer.write(record)
                else:
                    short_negs += 1
elif seq_format.lower() == "fasta":
    in_handle = open(in_file, "rU")
    out_handle = open(out_file, "w")
    reader = fastaReader(in_handle)
    writer = fastaWriter(out_handle)
    #Following code is identical to that for FASTQ but without editing qualities
    if forward:
        for record in reader:
            seq = record.sequence.upper()
            result = primer.search(seq)
            if result:
                #Forward primer, take everything after it
                cut = result.end()
                record.sequence = seq[cut:]
                if len(record.sequence) >= min_len:
                    clipped += 1
                    writer.write(record)
                else:
                    short_clipped += 1
            elif keep_negatives:
Ejemplo n.º 3
0
        manifest = ReadRocheXmlManifest(in_handle)
    except ValueError:
        manifest = None
    out_handle = open(out_file, "wb")
    writer = SffWriter(out_handle, xml=manifest)
    in_handle.seek(0) #start again after getting manifest
    count = writer.write_file(rename_seqrecords(SffIterator(in_handle), rename))
    out_handle.close()
    in_handle.close()
else:
    #Use Galaxy for FASTA, QUAL or FASTQ
    if seq_format.lower() in ["fasta", "csfasta"] \
    or seq_format.lower().startswith("qual"):
        from galaxy_utils.sequence.fasta import fastaReader, fastaWriter
        reader = fastaReader(open(in_file, "rU"))
        writer = fastaWriter(open(out_file, "w"))
        marker = ">"
    elif seq_format.lower().startswith("fastq"):
        from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
        reader = fastqReader(open(in_file, "rU"))
        writer = fastqWriter(open(out_file, "w"))
        marker = "@"
    else:
        sys.exit("Unsupported file type %r" % seq_format)
    #Now do the renaming
    count = 0
    renamed = 0
    for record in reader:
        #The [1:] is because the fastaReader leaves the > on the identifier,
        #likewise the fastqReader leaves the @ on the identifier
        try:
Ejemplo n.º 4
0
                    record.quality = record.quality[:cut]
                    clipped += 1
                    writer.write(record)
                else:
                    short_clipped += 1
            elif keep_negatives:
                if len(record) >= min_len:
                    negs += 1
                    writer.write(record)
                else:
                    short_neg += 1
elif seq_format.lower()=="fasta":
    in_handle = open(in_file, "rU")
    out_handle = open(out_file, "w")
    reader = fastaReader(in_handle)
    writer = fastaWriter(out_handle)
    #Following code is identical to that for FASTQ but without editing qualities
    if forward:
        for record in reader:
            seq = record.sequence.upper()
            result = primer.search(seq)
            if result:
                #Forward primer, take everything after it
                cut = result.end()
                record.sequence = seq[cut:]
                if len(record.sequence) >= min_len:
                    clipped += 1
                    writer.write(record)
                else:
                    short_clipped += 1
            elif keep_negatives:
Ejemplo n.º 5
0
        manifest = None
    out_handle = open(out_file, "wb")
    writer = SffWriter(out_handle, xml=manifest)
    in_handle.seek(0)  # start again after getting manifest
    count = writer.write_file(rename_seqrecords(SffIterator(in_handle), rename))
    out_handle.close()
    in_handle.close()
else:
    # Use Galaxy for FASTA, QUAL or FASTQ
    if seq_format.lower() in ["fasta", "csfasta"] or seq_format.lower().startswith(
        "qual"
    ):
        from galaxy_utils.sequence.fasta import fastaReader, fastaWriter

        reader = fastaReader(open(in_file, "rU"))
        writer = fastaWriter(open(out_file, "w"))
        marker = ">"
    elif seq_format.lower().startswith("fastq"):
        from galaxy_utils.sequence.fastq import fastqReader, fastqWriter

        reader = fastqReader(open(in_file, "rU"))
        writer = fastqWriter(open(out_file, "w"))
        marker = "@"
    else:
        sys.exit("Unsupported file type %r" % seq_format)
    # Now do the renaming
    count = 0
    renamed = 0
    for record in reader:
        # The [1:] is because the fastaReader leaves the > on the identifier,
        # likewise the fastqReader leaves the @ on the identifier
Ejemplo n.º 6
0
    print("Using %i IDs from %i columns of tabular file" %
          (len(ids), len(columns)))
else:
    # Single column, special case speed up
    col = columns[0]
    for line in handle:
        if not line.startswith("#"):
            ids.add(line.rstrip("\n").split("\t")[col])
    print("Using %i IDs from tabular file" % (len(ids)))
handle.close()

# Write filtered FASTA file based on IDs from tabular file
reader = fastaReader(open(in_file, "rU"))
if out_positive_file != "-" and out_negative_file != "-":
    print("Generating two FASTA files")
    positive_writer = fastaWriter(open(out_positive_file, "w"))
    negative_writer = fastaWriter(open(out_negative_file, "w"))
    for record in reader:
        # The [1:] is because the fastaReader leaves the > on the identifer.
        if record.identifier and record.identifier.split()[0][1:] in ids:
            positive_writer.write(record)
        else:
            negative_writer.write(record)
    positive_writer.close()
    negative_writer.close()
elif out_positive_file != "-":
    print("Generating matching FASTA file")
    positive_writer = fastaWriter(open(out_positive_file, "w"))
    for record in reader:
        # The [1:] is because the fastaReader leaves the > on the identifer.
        if record.identifier and record.identifier.split()[0][1:] in ids: