def __init__(self, *args, **kwargs): '''Initialize a sequenced sample''' super(SampleSeq, self).__init__(*args, **kwargs) from hivwholeseq.sequencing.filenames import get_seqrun_foldername from hivwholeseq.sequencing.adapter_info import foldername_adapter seq_run = self.loc['seq run'] adaID = self.loc['adapter'] self['folder'] = str(get_seqrun_foldername(seq_run)+foldername_adapter(adaID)) self['seqrun_folder'] = str(get_seqrun_foldername(seq_run))
def __init__(self, *args, **kwargs): '''Initialize a sequenced sample''' super(SampleSeq, self).__init__(*args, **kwargs) from hivwholeseq.sequencing.filenames import get_seqrun_foldername from hivwholeseq.sequencing.adapter_info import foldername_adapter seq_run = self.loc['seq run'] adaID = self.loc['adapter'] self['folder'] = str( get_seqrun_foldername(seq_run) + foldername_adapter(adaID)) self['seqrun_folder'] = str(get_seqrun_foldername(seq_run))
def __init__(self, *args, **kwargs): '''Initialize a sequencing run''' super(SequencingRun, self).__init__(*args, **kwargs) from hivwholeseq.sequencing.filenames import get_seqrun_foldername self['folder'] = str(get_seqrun_foldername(self.name)) self['samples'] = load_samples_sequenced(seq_runs=[self.name])
def __init__(self, *args, **kwargs): '''Initialize a sequencing run''' super(SequencingRun, self).__init__(*args, **kwargs) from hivwholeseq.sequencing.filenames import get_seqrun_foldername self['folder'] = str(get_seqrun_foldername(self.name)) self['samples'] = load_samples_sequenced(seq_runs=[self.name])
def make_symlinks(dataset, VERBOSE=0): '''Make symlinks for fastq.gz from the SRA''' seq_run = dataset.name samples = dataset.samples data_folder = get_seqrun_foldername(seq_run) raw_root_folder = dataset.loc['raw data'] import re seq_run_int = int(re.findall(r'\d+$', seq_run)[0]) # Unclassified reads unclass_fn = '/'.join(raw_root_folder.split('/')[:-2])+'/' for fn in os.listdir(unclass_fn): if ('illumina_M' in fn) and ('RunId'+'{:04d}'.format(seq_run_int) in fn): unclass_fn = unclass_fn+fn+'/LaneId1/' break fn1 = unclass_fn+[fn for fn in os.listdir(unclass_fn) if 'L001_R1' in fn][0] fn2 = unclass_fn+[fn for fn in os.listdir(unclass_fn) if 'L001_R2' in fn][0] dst_folder = data_folder+foldername_adapter(-1) dst_fn1 = dst_folder+'read1.fastq.gz' dst_fn2 = dst_folder+'read2.fastq.gz' if not os.path.isfile(dst_fn1): os.symlink(fn1, dst_fn1) elif VERBOSE: print dst_fn1, 'exists already' if not os.path.isfile(dst_fn2): os.symlink(fn2, dst_fn2) elif VERBOSE: print dst_fn2, 'exists already' if VERBOSE: print 'Unclassified reads symlinked' # Samples for sn, sample in samples.iterrows(): if 'missing SRA' in str(sample['notes']): continue if str(sample['raw name']) != 'nan': raw_fn = str(sample['raw name']) elif str(sample['patient sample']) != 'nan': raw_fn = str(sample['patient sample']) else: raw_fn = str(sample.name) tmp = [fn for fn in os.listdir(raw_root_folder) if raw_fn in fn] if not tmp: print 'FAILED:', raw_fn print 'LISTDIR:', '\n'.join(os.listdir(raw_root_folder)) raise ValueError('Folder not found') sample_fn = raw_root_folder+tmp[0]+'/' fn1 = sample_fn+[fn for fn in os.listdir(sample_fn) if 'L001_R1' in fn][0] fn2 = sample_fn+[fn for fn in os.listdir(sample_fn) if 'L001_R2' in fn][0] adaID = sample['adapter'] dst_folder = data_folder+foldername_adapter(adaID) dst_fn1 = dst_folder+'read1.fastq.gz' dst_fn2 = dst_folder+'read2.fastq.gz' if not os.path.isfile(dst_fn1): os.symlink(fn1, dst_fn1) elif VERBOSE: print dst_fn1, 'exists already' if not os.path.isfile(dst_fn2): os.symlink(fn2, dst_fn2) elif VERBOSE: print dst_fn2, 'exists already' if VERBOSE: print sn+' '+adaID+' reads symlinked' # Symlink samples folder to runs folder src_folder = data_folder+foldername_adapter(adaID).rstrip('/') dst_folder = get_sample_foldername(sample.name).rstrip('/') if not os.path.islink(dst_folder): os.symlink(src_folder, dst_folder) elif VERBOSE: print dst_folder, 'exists already'