def extract_kraken_report_bracken_txt(file_path, key, data_dict): buffer = datahandling.read_buffer(file_path) buffer = buffer.split("\n") if len(buffer) > 1: data_dict["results"][key]["unclassified_count"] = int( buffer[0].split("\t")[1]) return data_dict
def extract_contig_stats(file_path, key, data_dict): buffer = datahandling.read_buffer(file_path) data_dict["results"][key]["insert_size_avg"] = float( re.search("insert size average:\s*([0-9]+[\.]?[0-9]*)", buffer, re.MULTILINE).group(1)) data_dict["summary"]["insert_size_avg"] = data_dict["results"][key][ "insert_size_avg"] return data_dict
def extract_kraken_report_txt(file_path, key, data_dict): buffer = datahandling.read_buffer(file_path) buffer = buffer.split("\n") data_dict["results"][key]["kraken_output"] = [] for item in buffer: data_dict["results"][key]["kraken_output"].append( [value.strip() for value in item.split("\t")]) return data_dict
def extract_bbuk_log(file_path, key, data_dict): buffer = datahandling.read_buffer(file_path) data_dict["results"][key]["input_reads_num"] = int( re.search("Input:\s*([0-9]+)\sreads", buffer, re.MULTILINE).group(1)) data_dict["results"][key]["filtered_reads_num"] = int( re.search("Result:\s*([0-9]+)\sreads", buffer, re.MULTILINE).group(1)) data_dict["results"][key]["input_reads_bases"] = int( re.search("Input:.*?([0-9]+)\sbases", buffer, re.MULTILINE).group(1)) data_dict["results"][key]["filtered_reads_bases"] = int( re.search("Result:.*?([0-9]+)\sbases", buffer, re.MULTILINE).group(1)) data_dict["summary"]["filtered_reads_num"] = data_dict["results"][key][ "filtered_reads_num"] return data_dict
def extract_quast_report(file_path, key, data_dict): buffer = datahandling.read_buffer(file_path) data_dict["results"][key]["GC"] = float( re.search("GC \(%\)\t([0-9]+[\.]?[0-9]*)", buffer, re.MULTILINE).group(1)) data_dict["results"][key]["N50"] = int( re.search("N50\t([0-9]+)", buffer, re.MULTILINE).group(1)) data_dict["results"][key]["N75"] = int( re.search("N75\t([0-9]+)", buffer, re.MULTILINE).group(1)) data_dict["results"][key]["L50"] = int( re.search("L50\t([0-9]+)", buffer, re.MULTILINE).group(1)) data_dict["results"][key]["L75"] = int( re.search("L75\t([0-9]+)", buffer, re.MULTILINE).group(1)) data_dict["summary"]["GC"] = data_dict["results"][key]["GC"] data_dict["summary"]["N50"] = data_dict["results"][key]["N50"] return data_dict
def extract_bracken_txt(file_path, key, data_dict): buffer = datahandling.read_buffer(file_path) buffer = buffer.split("\n") if len(buffer) > 1: for i in range(1, len(buffer) - 1): # skip first line as it's header data_dict["results"][key]["species_" + str(i) + "_name"] = buffer[i].split("\t")[0] data_dict["results"][key][ "species_" + str(i) + "_kraken_assigned_reads"] = buffer[i].split("\t")[3] data_dict["results"][key]["species_" + str(i) + "_added_reads"] = buffer[i].split( "\t")[4] data_dict["results"][key]["species_" + str(i) + "_count"] = int( buffer[i].split("\t")[5].strip()) return data_dict
def extract_contig_sketch(file_path, key, data_dict): buffer = datahandling.read_buffer(file_path) data_dict["results"][key] = buffer.split("\n") return data_dict