def mean_emoji(filename): # works for up to 500bp reads means = np.zeros(500) seq_count = 0 for r in SeqIO.parse(filename, "fastq"): index = 0 for s in r.letter_annotations["phred_quality"]: means[index] += s index = index + 1 seq_count = seq_count + 1 cleaned = np.trim_zeros(means) means_fp = cleaned / seq_count fake_seq = ''.join(["a"] * len(means_fp.round())) record = SeqRecord(Seq(fake_seq), id="test", name="mean scores", description="example with mean fastq socres", letter_annotations={ 'phred_quality': list(means_fp.round().astype(int)) }) print("".join([ emojify(fastq_emoji_map[s]) for s in QualityIO._get_sanger_quality_str(record) ]))
def get_vcf_qual(quality): '''Map a quality value to an emoji''' # Hack to do this quickly - use same trick as FASTQE and convert from value to a PHRED encoding then map #TODO make this better # if quality == None: bioemojify_qual = emojify(":question:") else: fake_seq = 'N' record_qual = SeqRecord(Seq(fake_seq), id="test", name="lookup", description="example", letter_annotations={'phred_quality': [int(quality)]}) mapping_dict_qual_use = emaps.fastq_emoji_map_binned original_qual = QualityIO._get_sanger_quality_str(record_qual) #print(original_qual) bioemojify_qual = "".join([emojify(mapping_dict_qual_use.get(s, ":heart_eyes:")) for s in original_qual]) return(bioemojify_qual)
def map_scores(sequence, mapping_dict=emaps.fastq_emoji_map, default_value=":heart_eyes:", mapping_function=emojify, spacer=" "): ''' :param sequence: :param mapping_dict: :param default_value: :param mapping_function: :param spacer: :return: ''' mapped_values = spacer.join([ mapping_function(mapping_dict.get(s, default_value)) for s in QualityIO._get_sanger_quality_str(sequence) ]) return (mapped_values)
def convert_fastq(options): '''Convert FASTQ file to emoji. If no FASTQ files are specified on the command line then read from the standard input (stdin). Arguments: options: the command line options of the program Result: None ''' if options.custom: with open(options.custom) as f: mapping_dict_use = ast.literal_eval(f.read()) else: mapping_dict_use = local_seq_emoji_map if options.custom_qual: with open(options.custom_qual) as f: mapping_dict_qual_use = ast.literal_eval(f.read()) elif options.bin: mapping_dict_qual_use = emaps.fastq_emoji_map_binned else: mapping_dict_qual_use = emaps.fastq_emoji_map if options.fastq_files: for fastq_filename in options.fastq_files: logging.info("Processing FASTA file from %s", fastq_filename) try: if fastq_filename.endswith(".gz"): fastq_file = gzip.open(fastq_filename, 'rt') else: fastq_file = open(fastq_filename) except IOError as exception: exit_with_error(str(exception), EXIT_FILE_IO_ERROR) else: with fastq_file: for seq in SeqIO.parse(fastq_file, "fastq"): print(emojify(":arrow_forward:")+" "+seq.id) #print(">"+seq.id) original = seq.seq bioemojify = "".join([emojify(mapping_dict_use.get(s,":heart_eyes:")) for s in original]) original_qual = QualityIO._get_sanger_quality_str(seq) bioemojify_qual = "".join([emojify(mapping_dict_qual_use.get(s,":heart_eyes:")) for s in original_qual]) print(bioemojify+"\n"+bioemojify_qual) # print(*zip([a for a in bioemojify if a != " "],[b for b in bioemojify_qual if b != " "])) else: logging.info("Processing FASTQ file from stdin") #stats = FastaStats().from_file(sys.stdin, options.minlen) if (binascii.hexlify(sys.stdin.buffer.peek(1)[:2]) == b'1f8b'): # print("zipped") stdin_file = gzip.open(sys.stdin.buffer, 'rt') else: stdin_file = sys.stdin for seq in SeqIO.parse(stdin_file, "fastq"): print(emojify(":arrow_forward:")+" "+seq.id) #print(">"+seq.id) original = seq.seq bioemojify = "".join([emojify(mapping_dict_use.get(s,":heart_eyes:")) for s in original]) original_qual = QualityIO._get_sanger_quality_str(seq) bioemojify_qual = "".join([emojify(mapping_dict_qual_use.get(s,":heart_eyes:")) for s in original_qual]) print(bioemojify+"\n"+bioemojify_qual)
def process_files(options): '''Compute and print FastaStats for each input FASTA file specified on the command line. If no FASTA files are specified on the command line then read from the standard input (stdin). Arguments: options: the command line options of the program Result: None ''' if options.fasta_files: for fasta_filename in options.fasta_files: logging.info( "Processing FASTA file from {}".format(fasta_filename)) try: fasta_file = open(fasta_filename) except IOError as exception: exit_with_error(str(exception), EXIT_FILE_IO_ERROR) else: with fasta_file: stats = FastaStats().from_file(fasta_file, options.minlen) #print(stats.pretty(fasta_filename)) if options.scale: print_scale(emaps.all_qualities, options.bin) #rewrite this if options.bin: logging.info("Binned calculations") if options.max: logging.info("Calculate max quality per position") print( stats.pretty(fasta_filename), "max (binned)", " ".join([ emojify( emaps.fastq_emoji_map_binned.get( s, ':heart_eyes:')) for s in QualityIO._get_sanger_quality_str( stats.quality_scores_maxs) ]), sep='\t') logging.info("Calculate mean quality per position") print(stats.pretty(fasta_filename), "mean (binned)", " ".join([ emojify( emaps.fastq_emoji_map_binned.get( s, ':heart_eyes:')) for s in QualityIO._get_sanger_quality_str( stats.quality_scores_mean) ]), sep='\t') if options.min: logging.info("Calculate min quality per position") print( stats.pretty(fasta_filename), "min (binned)", " ".join([ emojify( emaps.fastq_emoji_map_binned.get( s, ':heart_eyes:')) for s in QualityIO._get_sanger_quality_str( stats.quality_scores_mins) ]), sep='\t') else: if options.max: logging.info("Calculate max quality per position") print( stats.pretty(fasta_filename), "max", " ".join([ emojify( emaps.fastq_emoji_map.get( s, ':heart_eyes:')) for s in QualityIO._get_sanger_quality_str( stats.quality_scores_maxs) ]), sep='\t') logging.info("Calculate mean quality per position") print(stats.pretty(fasta_filename), "mean", " ".join([ emojify( emaps.fastq_emoji_map.get( s, ':heart_eyes:')) for s in QualityIO._get_sanger_quality_str( stats.quality_scores_mean) ]), sep='\t') if options.min: logging.info("Calculate min quality per position") print( stats.pretty(fasta_filename), "min", " ".join([ emojify( emaps.fastq_emoji_map.get( s, ':heart_eyes:')) for s in QualityIO._get_sanger_quality_str( stats.quality_scores_mins) ]), sep='\t') #print("MAX: "," ".join([s for s in QualityIO._get_sanger_quality_str(stats.quality_scores_maxs)])) #print("MEAN: "," ".join([s for s in QualityIO._get_sanger_quality_str(stats.quality_scores_mean)])) #print("MIN: "," ".join([s for s in QualityIO._get_sanger_quality_str(stats.quality_scores_mins)])) else: logging.info("Processing FASTA file from stdin") stats = FastaStats().from_file(sys.stdin, options.minlen) print(stats.pretty("stdin"))
return read totreads = 0 passing_reads = 0 while True: try: read = R1.next() read2 = R2.next() totreads += 1 except StopIteration: break fil1 = filterSeq(read, 0.1, 10, 100) fil2 = filterSeq(read2, 0.1, 10, 100) if (fil1 != None) and (fil2 != None): sys.stdout.write(fil1.id + "\t" + str(fil1.seq) + "\t" + str(fil2.seq) + "\t" + QualityIO._get_sanger_quality_str(fil1) + "\t" + QualityIO._get_sanger_quality_str(fil2) + "\n") passing_reads += 1 elif (fil1 != None): SeqIO.write(fil1, out, "fastq") elif (fil2 != None): SeqIO.write(fil2, out, "fastq") sys.stderr.write("\t" + str(passing_reads) + " out of " + str(totreads) + " fragments passed the filtering" + "\n") data = commands.getstatusoutput('date') sys.stderr.write("2nd step: filtering out duplicated fragments at " + data[1] + "\n")