def report_demultiplex_stats(config,mockdb,directory,flowcell,machine): """ Pulls the information form the demultiplex stats file and prints it in a convenient csv format. The directory in the arguments is the base output directory for casava, i.e. it contains the Project_PROJECT_ID directories as well as the Basecall_Stats_FCID directory. """ flowcell_key = flowcell.key machine_key = machine.key demultiplex_filename = os.path.join(directory, config.get('Common_directories','basecall_front')+ flowcell_key,config.get('Filenames','demultiplex')) if not os.path.isfile(demultiplex_filename): return struct = extract_barcode_lane_stats(demultiplex_filename) total_reads = calculate_lane_total(struct,'# Reads') pf = calculate_weighted_percent(struct,'% PF') q30 = calculate_weighted_percent(struct,'% of >= Q30 Bases (PF)') print "sample_key,index,total_reads,pecentage_pf,pecenetage_qbove_q30,flowcell,hiseq_machine,lane_number"; for i in range(0,len(struct)): number = struct[i]['Lane'] if re.search('lane',translate_sample_name(struct[i]['Sample ID'])): continue string = struct[i]['Sample ID'] string += "," + struct[i]['Index'] string += "," + struct[i]['# Reads'] string += "," + struct[i]['% PF'] string += "," + struct[i]['% of >= Q30 Bases (PF)'] string += "," + flowcell.key string += "," + machine.key string += "," + number print string return 1
def fill_demultiplex_stats(config,mockdb,directory,flowcell,machine): """ Pulls the information form the demultiplex stats file and puts it in the correct objects. The directory in the arguments is the base output directory for casava, i.e. it contains the Project_PROJECT_ID directories as well as the Basecall_Stats_FCID directory. """ flowcell_key = flowcell.key machine_key = machine.key demultiplex_filename = os.path.join(directory, config.get('Common_directories','basecall_front')+ flowcell_key,config.get('Filenames','demultiplex')) if not os.path.isfile(demultiplex_filename): return struct = extract_barcode_lane_stats(demultiplex_filename) total_reads = calculate_lane_total(struct,'# Reads') pf = calculate_weighted_percent(struct,'% PF') q30 = calculate_weighted_percent(struct,'% of >= Q30 Bases (PF)') for i in range(0,len(struct)): number = struct[i]['Lane'] lane_key = flowcell_key + '_lane_' + number lane = mockdb['Lane'].__get__(config,key=lane_key,flowcell=flowcell,number=number) lane.total_reads = total_reads[number] lane.percentage_pf = pf[number] lane.percentage_above_q30 = q30[number] sample_key = translate_sample_name(struct[i]['Sample ID']) if re.search('lane',sample_key): lane.undetermined_reads = struct[i]['# Reads'] continue sample = mockdb['Sample'].__get__(config,key=sample_key) index = struct[i]['Index'] barcode_key = lane_key + "_" + index barcode = mockdb['Barcode'].__get__(config,key=barcode_key,sample=sample,index=index,lane=lane) barcode.reads = struct[i]['# Reads'] return 1