Beispiel #1
0
def produce_analysis_piper(ngi_config, project_id):
    #create piper_ngi
    analysis_dir = os.path.join(ngi_config["analysis"]["base_root"],
                                            ngi_config["analysis"]["sthlm_root"],
                                            ngi_config["analysis"]["top_dir"],
                                            "ANALYSIS", project_id)
    data_dir = os.path.join(ngi_config["analysis"]["base_root"],
                                            ngi_config["analysis"]["sthlm_root"],
                                            ngi_config["analysis"]["top_dir"],
                                            "DATA", project_id)

    piper_ngi_dir = os.path.join(analysis_dir, "piper_ngi")
    fs.create_folder(piper_ngi_dir)
    piper_dirs = ["01_raw_alignments","02_preliminary_alignment_qc","03_genotype_concordance",
                "04_merged_aligments","05_processed_alignments","06_final_alignment_qc","07_variant_calls","08_misc"]
    for piper_dir in piper_dirs:
        current_dir =  os.path.join(piper_ngi_dir, piper_dir)
        fs.create_folder(current_dir)
        if piper_dir == "05_processed_alignments":
            for sample_id in os.listdir(data_dir):
                bam_file = "{}.clean.dedup.bam".format(sample_id)
                fs.touch(os.path.join(current_dir, bam_file))
        if piper_dir == "07_variant_calls":
            for sample_id in os.listdir(data_dir):
                vcf_file = "{}.clean.dedup.recal.bam.raw.indel.vcf.gz".format(sample_id)
                fs.touch(os.path.join(current_dir, vcf_file))
    current_dir = os.path.join(piper_ngi_dir, "sbatch")
    fs.create_folder(current_dir)
    current_dir = os.path.join(piper_ngi_dir, "setup_xml_files")
    fs.create_folder(current_dir)
    current_dir = os.path.join(piper_ngi_dir, "logs")
    fs.create_folder(current_dir)
    create_version_report(current_dir)
Beispiel #2
0
def create_FC(incoming_dir, run_name, samplesheet, fastq_1 = None, fastq_2=None ):
    # Create something like 160217_ST-E00201_0063_AHJHNYCCXX
    path_to_fc = os.path.join(incoming_dir, run_name)
    if os.path.exists(path_to_fc):
        # This FC exists, skip it
        return
    fs.create_folder(path_to_fc)
    fs.touch(os.path.join(path_to_fc, 'RTAComplete.txt'))
    # Create folder Demultiplexing
    fs.create_folder(os.path.join(path_to_fc, 'Demultiplexing'))
    # Create folder Demultiplexing/Reports
    fs.create_folder(os.path.join(path_to_fc, 'Demultiplexing', 'Reports'))
    # Create folder Demultiplexing/Stats
    fs.create_folder(os.path.join(path_to_fc, 'Demultiplexing', 'Stats'))
    # Memorise SampleSheet stats
    header = []
    for key in samplesheet[0]:
        header.append(key)
    counter = 1
    current_lane = ''
    for line in samplesheet:
        project_name = line.get('Sample_Project', line.get('Project', ''))
        lane = line['Lane']
        if current_lane == '':
            current_lane = lane
        elif current_lane != lane:
            counter = 1
            current_lane = lane
        sample_id = line.get('SampleID', line.get('Sample_ID', ''))
        sample_name = line.get('SampleName', line.get('Sample_Name', ''))
        # Create dir structure
        fs.create_folder(os.path.join(path_to_fc, 'Demultiplexing', project_name, sample_id))
        # Now create the data
        fastq_1_dest = '{}_S{}_L00{}_R1_001.fastq.gz'.format(sample_name, counter, lane)
        fastq_2_dest = '{}_S{}_L00{}_R2_001.fastq.gz'.format(sample_name, counter, lane)
        counter += 1
        if fastq_1 is None:
            fs.touch(os.path.join(path_to_fc, 'Demultiplexing', project_name,
                                  sample_id, fastq_1_dest))
            fs.touch(os.path.join(path_to_fc, 'Demultiplexing', project_name,
                                  sample_id, fastq_2_dest))
        else:
            fs.do_symlink(fastq_1, os.path.join(path_to_fc, 'Demultiplexing',
                                                project_name, sample_id, fastq_1_dest))
            fs.do_symlink(fastq_2, os.path.join(path_to_fc, 'Demultiplexing',
                                                project_name, sample_id, fastq_2_dest))

    with open(os.path.join(path_to_fc, 'SampleSheet.csv'), 'w') as Samplesheet_file:
        Samplesheet_file.write(u'[Header]\n')
        Samplesheet_file.write(u'Date,2016-03-29\n')
        Samplesheet_file.write(u'Investigator Name,Christian Natanaelsson\n')
        Samplesheet_file.write(u'[Data]\n')
        for key in header:
             Samplesheet_file.write(u'{},'.format(key))
        Samplesheet_file.write(u'\n')
        for line in samplesheet:
            for key in header:
                Samplesheet_file.write(u'{},'.format(line[key]))
            Samplesheet_file.write(u'\n')
Beispiel #3
0
def produce_analysis_piper(ngi_config, project_id):
    # Create piper_ngi
    analysis_dir = os.path.join(ngi_config['analysis']['base_root'],
                                ngi_config['analysis']['sthlm_root'],
                                ngi_config['analysis']['top_dir'],
                                'ANALYSIS', project_id)
    data_dir = os.path.join(ngi_config['analysis']['base_root'],
                            ngi_config['analysis']['sthlm_root'],
                            ngi_config['analysis']['top_dir'],
                            'DATA', project_id)

    piper_ngi_dir = os.path.join(analysis_dir, 'piper_ngi')
    fs.create_folder(piper_ngi_dir)
    piper_dirs = ['01_raw_alignments',
                  '02_preliminary_alignment_qc',
                  '03_genotype_concordance',
                  '04_merged_aligments',
                  '05_processed_alignments',
                  '06_final_alignment_qc',
                  '07_variant_calls',
                  '08_misc']
    for piper_dir in piper_dirs:
        current_dir =  os.path.join(piper_ngi_dir, piper_dir)
        fs.create_folder(current_dir)
        if piper_dir == '05_processed_alignments':
            for sample_id in os.listdir(data_dir):
                bam_file = '{}.clean.dedup.bam'.format(sample_id)
                fs.touch(os.path.join(current_dir, bam_file))
        if piper_dir == '07_variant_calls':
            for sample_id in os.listdir(data_dir):
                vcf_file = '{}.clean.dedup.recal.bam.raw.indel.vcf.gz'.format(sample_id)
                fs.touch(os.path.join(current_dir, vcf_file))
    current_dir = os.path.join(piper_ngi_dir, 'sbatch')
    fs.create_folder(current_dir)
    current_dir = os.path.join(piper_ngi_dir, 'setup_xml_files')
    fs.create_folder(current_dir)
    current_dir = os.path.join(piper_ngi_dir, 'logs')
    fs.create_folder(current_dir)
    create_version_report(current_dir)
Beispiel #4
0
def create_FC(incoming_dir, run_name, samplesheet, fastq_1 = None, fastq_2=None ):
    # create something like 160217_ST-E00201_0063_AHJHNYCCXX
    path_to_fc = os.path.join(incoming_dir, run_name)
    if os.path.exists(path_to_fc):
        # this FC exists, skip it
        return
    fs.create_folder(path_to_fc)
    fs.touch(os.path.join(path_to_fc, "RTAComplete.txt"))
    # create folder Demultiplexing
    fs.create_folder(os.path.join(path_to_fc, "Demultiplexing"))
    # create folder Demultiplexing/Reports
    fs.create_folder(os.path.join(path_to_fc, "Demultiplexing", "Reports"))
    # create folder Demultiplexing/Stats
    fs.create_folder(os.path.join(path_to_fc, "Demultiplexing", "Stats"))
    #memorise SampleSheet stats
    header = []
    for key in samplesheet[0]:
        header.append(key)
    counter = 1
    current_lane = ""
    for line in samplesheet:
        project_name = line.get("Sample_Project", line.get("Project", ""))
        lane = line["Lane"]
        if current_lane == "":
            current_lane = lane
        elif current_lane != lane:
            counter = 1
            current_lane = lane
        sample_id = line.get("SampleID", line.get("Sample_ID", ""))
        sample_name = line.get("SampleName", line.get("Sample_Name", ""))
        #create dir structure
        fs.create_folder(os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id))
        #now create the data
        fastq_1_dest = "{}_S{}_L00{}_R1_001.fastq.gz".format(sample_name, counter, lane)
        fastq_2_dest = "{}_S{}_L00{}_R2_001.fastq.gz".format(sample_name, counter, lane)
        counter += 1
        if fastq_1 is None:
            fs.touch(os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id, fastq_1_dest))
            fs.touch(os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id, fastq_2_dest))
        else:
            fs.do_symlink(fastq_1, os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id, fastq_1_dest))
            fs.do_symlink(fastq_2, os.path.join(path_to_fc, "Demultiplexing", project_name, sample_id, fastq_2_dest))
    
    with open(os.path.join(path_to_fc, "SampleSheet.csv"), "w") as Samplesheet_file:
        Samplesheet_file.write("[Header]\n")
        Samplesheet_file.write("Date,2016-03-29\n")
        Samplesheet_file.write("Investigator Name,Christian Natanaelsson\n")
        Samplesheet_file.write("[Data]\n")
        for key in header:
             Samplesheet_file.write("{},".format(key))
        Samplesheet_file.write("\n")
        for line in samplesheet:
            for key in header:
                Samplesheet_file.write("{},".format(line[key]))
            Samplesheet_file.write("\n")
Beispiel #5
0
 def test_touch(self):
     """Make empty file."""
     new_file = os.path.join(self.rootdir, 'empty')
     filesystem.touch(new_file)
     self.assertTrue(os.path.isfile(new_file))