예제 #1
0
    def __init__(self, *args, **kwargs):
        '''Initialize a sequenced sample'''
        super(SampleSeq, self).__init__(*args, **kwargs)

        from hivwholeseq.sequencing.filenames import get_seqrun_foldername
        from hivwholeseq.sequencing.adapter_info import foldername_adapter
        seq_run = self.loc['seq run']
        adaID = self.loc['adapter']
        self['folder'] = str(get_seqrun_foldername(seq_run)+foldername_adapter(adaID))
        self['seqrun_folder'] = str(get_seqrun_foldername(seq_run))
예제 #2
0
    def __init__(self, *args, **kwargs):
        '''Initialize a sequenced sample'''
        super(SampleSeq, self).__init__(*args, **kwargs)

        from hivwholeseq.sequencing.filenames import get_seqrun_foldername
        from hivwholeseq.sequencing.adapter_info import foldername_adapter
        seq_run = self.loc['seq run']
        adaID = self.loc['adapter']
        self['folder'] = str(
            get_seqrun_foldername(seq_run) + foldername_adapter(adaID))
        self['seqrun_folder'] = str(get_seqrun_foldername(seq_run))
예제 #3
0
    def __init__(self, *args, **kwargs):
        '''Initialize a sequencing run'''
        super(SequencingRun, self).__init__(*args, **kwargs)

        from hivwholeseq.sequencing.filenames import get_seqrun_foldername
        self['folder'] = str(get_seqrun_foldername(self.name))

        self['samples'] = load_samples_sequenced(seq_runs=[self.name])
예제 #4
0
    def __init__(self, *args, **kwargs):
        '''Initialize a sequencing run'''
        super(SequencingRun, self).__init__(*args, **kwargs)

        from hivwholeseq.sequencing.filenames import get_seqrun_foldername
        self['folder'] = str(get_seqrun_foldername(self.name))

        self['samples'] = load_samples_sequenced(seq_runs=[self.name])
예제 #5
0
def make_symlinks(dataset, VERBOSE=0):
    '''Make symlinks for fastq.gz from the SRA'''
    seq_run = dataset.name
    samples = dataset.samples
    data_folder = get_seqrun_foldername(seq_run)
    raw_root_folder = dataset.loc['raw data']

    import re
    seq_run_int = int(re.findall(r'\d+$', seq_run)[0])

    # Unclassified reads
    unclass_fn = '/'.join(raw_root_folder.split('/')[:-2])+'/'
    for fn in os.listdir(unclass_fn):
        if ('illumina_M' in fn) and ('RunId'+'{:04d}'.format(seq_run_int) in fn):
            unclass_fn = unclass_fn+fn+'/LaneId1/'
            break
    fn1 = unclass_fn+[fn for fn in os.listdir(unclass_fn) if 'L001_R1' in fn][0]
    fn2 = unclass_fn+[fn for fn in os.listdir(unclass_fn) if 'L001_R2' in fn][0]

    dst_folder = data_folder+foldername_adapter(-1)
    dst_fn1 = dst_folder+'read1.fastq.gz'
    dst_fn2 = dst_folder+'read2.fastq.gz'
    if not os.path.isfile(dst_fn1):
        os.symlink(fn1, dst_fn1)
    elif VERBOSE:
            print dst_fn1, 'exists already'
    if not os.path.isfile(dst_fn2):
        os.symlink(fn2, dst_fn2)
    elif VERBOSE:
            print dst_fn2, 'exists already'
    if VERBOSE:
        print 'Unclassified reads symlinked'

    # Samples
    for sn, sample in samples.iterrows():
        if 'missing SRA' in str(sample['notes']):
            continue

        if str(sample['raw name']) != 'nan':
            raw_fn = str(sample['raw name'])
        elif str(sample['patient sample']) != 'nan':
            raw_fn = str(sample['patient sample'])
        else:
            raw_fn = str(sample.name)

        tmp = [fn for fn in os.listdir(raw_root_folder) if raw_fn in fn]
        if not tmp:
            print 'FAILED:', raw_fn
            print 'LISTDIR:', '\n'.join(os.listdir(raw_root_folder))
            raise ValueError('Folder not found')

        sample_fn = raw_root_folder+tmp[0]+'/'

        fn1 = sample_fn+[fn for fn in os.listdir(sample_fn) if 'L001_R1' in fn][0]
        fn2 = sample_fn+[fn for fn in os.listdir(sample_fn) if 'L001_R2' in fn][0]

        adaID = sample['adapter']
        dst_folder = data_folder+foldername_adapter(adaID)
        dst_fn1 = dst_folder+'read1.fastq.gz'
        dst_fn2 = dst_folder+'read2.fastq.gz'
        if not os.path.isfile(dst_fn1):
            os.symlink(fn1, dst_fn1)
        elif VERBOSE:
                print dst_fn1, 'exists already'
        if not os.path.isfile(dst_fn2):
            os.symlink(fn2, dst_fn2)
        elif VERBOSE:
                print dst_fn2, 'exists already'
        if VERBOSE:
            print sn+' '+adaID+' reads symlinked'

        # Symlink samples folder to runs folder
        src_folder = data_folder+foldername_adapter(adaID).rstrip('/')
        dst_folder = get_sample_foldername(sample.name).rstrip('/')
        if not os.path.islink(dst_folder):
            os.symlink(src_folder, dst_folder)
        elif VERBOSE:
                print dst_folder, 'exists already'