def produce_analysis_piper(ngi_config, project_id): #create piper_ngi analysis_dir = os.path.join(ngi_config["analysis"]["base_root"], ngi_config["analysis"]["sthlm_root"], ngi_config["analysis"]["top_dir"], "ANALYSIS", project_id) data_dir = os.path.join(ngi_config["analysis"]["base_root"], ngi_config["analysis"]["sthlm_root"], ngi_config["analysis"]["top_dir"], "DATA", project_id) piper_ngi_dir = os.path.join(analysis_dir, "piper_ngi") fs.create_folder(piper_ngi_dir) piper_dirs = ["01_raw_alignments","02_preliminary_alignment_qc","03_genotype_concordance", "04_merged_aligments","05_processed_alignments","06_final_alignment_qc","07_variant_calls","08_misc"] for piper_dir in piper_dirs: current_dir = os.path.join(piper_ngi_dir, piper_dir) fs.create_folder(current_dir) if piper_dir == "05_processed_alignments": for sample_id in os.listdir(data_dir): bam_file = "{}.clean.dedup.bam".format(sample_id) fs.touch(os.path.join(current_dir, bam_file)) if piper_dir == "07_variant_calls": for sample_id in os.listdir(data_dir): vcf_file = "{}.clean.dedup.recal.bam.raw.indel.vcf.gz".format(sample_id) fs.touch(os.path.join(current_dir, vcf_file)) current_dir = os.path.join(piper_ngi_dir, "sbatch") fs.create_folder(current_dir) current_dir = os.path.join(piper_ngi_dir, "setup_xml_files") fs.create_folder(current_dir) current_dir = os.path.join(piper_ngi_dir, "logs") fs.create_folder(current_dir) create_version_report(current_dir)
def create_FC(incoming_dir, run_name, samplesheet, fastq_1 = None, fastq_2=None ): # Create something like 160217_ST-E00201_0063_AHJHNYCCXX path_to_fc = os.path.join(incoming_dir, run_name) if os.path.exists(path_to_fc): # This FC exists, skip it return fs.create_folder(path_to_fc) fs.touch(os.path.join(path_to_fc, 'RTAComplete.txt')) # Create folder Demultiplexing fs.create_folder(os.path.join(path_to_fc, 'Demultiplexing')) # Create folder Demultiplexing/Reports fs.create_folder(os.path.join(path_to_fc, 'Demultiplexing', 'Reports')) # Create folder Demultiplexing/Stats fs.create_folder(os.path.join(path_to_fc, 'Demultiplexing', 'Stats')) # Memorise SampleSheet stats header = [] for key in samplesheet[0]: header.append(key) counter = 1 current_lane = '' for line in samplesheet: project_name = line.get('Sample_Project', line.get('Project', '')) lane = line['Lane'] if current_lane == '': current_lane = lane elif current_lane != lane: counter = 1 current_lane = lane sample_id = line.get('SampleID', line.get('Sample_ID', '')) sample_name = line.get('SampleName', line.get('Sample_Name', '')) # Create dir structure fs.create_folder(os.path.join(path_to_fc, 'Demultiplexing', project_name, sample_id)) # Now create the data fastq_1_dest = '{}_S{}_L00{}_R1_001.fastq.gz'.format(sample_name, counter, lane) fastq_2_dest = '{}_S{}_L00{}_R2_001.fastq.gz'.format(sample_name, counter, lane) counter += 1 if fastq_1 is None: fs.touch(os.path.join(path_to_fc, 'Demultiplexing', project_name, sample_id, fastq_1_dest)) fs.touch(os.path.join(path_to_fc, 'Demultiplexing', project_name, sample_id, fastq_2_dest)) else: fs.do_symlink(fastq_1, os.path.join(path_to_fc, 'Demultiplexing', project_name, sample_id, fastq_1_dest)) fs.do_symlink(fastq_2, os.path.join(path_to_fc, 'Demultiplexing', project_name, sample_id, fastq_2_dest)) with open(os.path.join(path_to_fc, 'SampleSheet.csv'), 'w') as Samplesheet_file: Samplesheet_file.write(u'[Header]\n') Samplesheet_file.write(u'Date,2016-03-29\n') Samplesheet_file.write(u'Investigator Name,Christian Natanaelsson\n') Samplesheet_file.write(u'[Data]\n') for key in header: Samplesheet_file.write(u'{},'.format(key)) Samplesheet_file.write(u'\n') for line in samplesheet: for key in header: Samplesheet_file.write(u'{},'.format(line[key])) Samplesheet_file.write(u'\n')
def produce_analysis_piper(ngi_config, project_id): # Create piper_ngi analysis_dir = os.path.join(ngi_config['analysis']['base_root'], ngi_config['analysis']['sthlm_root'], ngi_config['analysis']['top_dir'], 'ANALYSIS', project_id) data_dir = os.path.join(ngi_config['analysis']['base_root'], ngi_config['analysis']['sthlm_root'], ngi_config['analysis']['top_dir'], 'DATA', project_id) piper_ngi_dir = os.path.join(analysis_dir, 'piper_ngi') fs.create_folder(piper_ngi_dir) piper_dirs = ['01_raw_alignments', '02_preliminary_alignment_qc', '03_genotype_concordance', '04_merged_aligments', '05_processed_alignments', '06_final_alignment_qc', '07_variant_calls', '08_misc'] for piper_dir in piper_dirs: current_dir = os.path.join(piper_ngi_dir, piper_dir) fs.create_folder(current_dir) if piper_dir == '05_processed_alignments': for sample_id in os.listdir(data_dir): bam_file = '{}.clean.dedup.bam'.format(sample_id) fs.touch(os.path.join(current_dir, bam_file)) if piper_dir == '07_variant_calls': for sample_id in os.listdir(data_dir): vcf_file = '{}.clean.dedup.recal.bam.raw.indel.vcf.gz'.format(sample_id) fs.touch(os.path.join(current_dir, vcf_file)) current_dir = os.path.join(piper_ngi_dir, 'sbatch') fs.create_folder(current_dir) current_dir = os.path.join(piper_ngi_dir, 'setup_xml_files') fs.create_folder(current_dir) current_dir = os.path.join(piper_ngi_dir, 'logs') fs.create_folder(current_dir) create_version_report(current_dir)
def create_FC(incoming_dir, run_name, samplesheet, fastq_1 = None, fastq_2=None ): # create something like 160217_ST-E00201_0063_AHJHNYCCXX path_to_fc = os.path.join(incoming_dir, run_name) if os.path.exists(path_to_fc): # this FC exists, skip it return fs.create_folder(path_to_fc) fs.touch(os.path.join(path_to_fc, "RTAComplete.txt")) # create folder Demultiplexing fs.create_folder(os.path.join(path_to_fc, "Demultiplexing")) # create folder Demultiplexing/Reports fs.create_folder(os.path.join(path_to_fc, "Demultiplexing", "Reports")) # create folder Demultiplexing/Stats fs.create_folder(os.path.join(path_to_fc, "Demultiplexing", "Stats")) #memorise SampleSheet stats header = [] for key in samplesheet[0]: header.append(key) counter = 1 current_lane = "" for line in samplesheet: project_name = line.get("Sample_Project", line.get("Project", "")) lane = line["Lane"] if current_lane == "": current_lane = lane elif current_lane != lane: counter = 1 current_lane = lane sample_id = line.get("SampleID", line.get("Sample_ID", "")) sample_name = line.get("SampleName", line.get("Sample_Name", "")) #create dir structure fs.create_folder(os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id)) #now create the data fastq_1_dest = "{}_S{}_L00{}_R1_001.fastq.gz".format(sample_name, counter, lane) fastq_2_dest = "{}_S{}_L00{}_R2_001.fastq.gz".format(sample_name, counter, lane) counter += 1 if fastq_1 is None: fs.touch(os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id, fastq_1_dest)) fs.touch(os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id, fastq_2_dest)) else: fs.do_symlink(fastq_1, os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id, fastq_1_dest)) fs.do_symlink(fastq_2, os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id, fastq_2_dest)) with open(os.path.join(path_to_fc, "SampleSheet.csv"), "w") as Samplesheet_file: Samplesheet_file.write("[Header]\n") Samplesheet_file.write("Date,2016-03-29\n") Samplesheet_file.write("Investigator Name,Christian Natanaelsson\n") Samplesheet_file.write("[Data]\n") for key in header: Samplesheet_file.write("{},".format(key)) Samplesheet_file.write("\n") for line in samplesheet: for key in header: Samplesheet_file.write("{},".format(line[key])) Samplesheet_file.write("\n")
def test_touch(self): """Make empty file.""" new_file = os.path.join(self.rootdir, 'empty') filesystem.touch(new_file) self.assertTrue(os.path.isfile(new_file))