def find_ref_files(self,priors): '''Locates all reference files based upon gender, organism and annotation.''' topIx = self.psv['refLoc']+self.REFERENCE_FILES['tophat_index'][self.psv['genome']][self.psv['gender']][self.psv['annotation']] topIxFid = dxencode.find_file(topIx,dxencode.REF_PROJECT_DEFAULT) if topIxFid == None: sys.exit("ERROR: Unable to locate TopHat index file '" + topIx + "'") else: priors['tophat_index'] = topIxFid starIx = self.psv['refLoc']+self.REFERENCE_FILES['star_index'][self.psv['genome']][self.psv['gender']][self.psv['annotation']] starIxFid = dxencode.find_file(starIx,dxencode.REF_PROJECT_DEFAULT) if starIxFid == None: sys.exit("ERROR: Unable to locate STAR index file '" + starIx + "'") else: priors['star_index'] = starIxFid rsemIx = self.psv['refLoc']+self.REFERENCE_FILES['rsem_index'][self.psv['genome']][self.psv['annotation']] rsemIxFid = dxencode.find_file(rsemIx,dxencode.REF_PROJECT_DEFAULT) if rsemIxFid == None: sys.exit("ERROR: Unable to locate RSEM index file '" + rsemIx + "'") else: priors['rsem_index'] = rsemIxFid chromSizes = self.psv['refLoc']+self.REFERENCE_FILES['chrom_sizes'][self.psv['genome']][self.psv['gender']] chromSizesFid = dxencode.find_file(chromSizes,dxencode.REF_PROJECT_DEFAULT) if chromSizesFid == None: sys.exit("ERROR: Unable to locate Chrom Sizes file '" + chromSizes + "'") else: priors['chrom_sizes'] = chromSizesFid self.psv['ref_files'] = self.REFERENCE_FILES.keys()
def find_ref_files(self,priors): '''Locates all reference files based upon organism and gender.''' star_ix = self.psv['refLoc']+self.REFERENCE_FILES['star_index'][self.psv['genome']][self.psv['gender']] star_ix_fid = dxencode.find_file(star_ix,dxencode.REF_PROJECT_DEFAULT) if star_ix_fid == None: sys.exit("ERROR: Unable to locate STAR index file '" + star_ix + "'") else: priors['star_index'] = star_ix_fid chrom_sizes = self.psv['refLoc']+self.REFERENCE_FILES['chrom_sizes'][self.psv['genome']][self.psv['gender']] chrom_sizes_fid = dxencode.find_file(chrom_sizes,dxencode.REF_PROJECT_DEFAULT) if chrom_sizes_fid == None: sys.exit("ERROR: Unable to locate Chrom Sizes file '" + chrom_sizes + "'") else: priors['chrom_sizes'] = chrom_sizes_fid self.psv['ref_files'] = self.REFERENCE_FILES.keys()
def find_control_file(self,rep,default=None): '''Attempts to find an appropriate control file.''' # TODO Make more generic and move to dxencode.py when needed. (AUTHID,AUTHPW,SERVER) = dxencode.processkey(self.server_key) if 'controls' not in rep: return None for file_key in rep['controls']: if isinstance(file_key,list): file_key = file_key[0] file_obj = dxencode.enc_lookup_json(file_key,self.server_key,frame='embedded') rep_id = file_obj["replicate"]['@id'] rep_obj = dxencode.enc_lookup_json(rep_id,self.server_key,frame='embedded') exp_id = rep_obj['experiment']['@id'].split('/')[2] rep_tech = "rep%s_%s" % \ (rep_obj['biological_replicate_number'], rep_obj['technical_replicate_number']) control_root = self.psv['control_path'] # Cheating: if self.proj_name == "scratchPad" and self.psv['control_path'] == self.CONTROL_ROOT_FOLDER: control_root = "/lrna" path_n_glob = control_root + exp_id + '/' + rep_tech + '/' + self.CONTROL_FILE_GLOB target_folder = dxencode.find_folder(exp_id + '/' + rep_tech,self.project,control_root) #print "Target found [%s]" % target_folder if target_folder != None: path_n_glob = target_folder + '/' + self.CONTROL_FILE_GLOB fid = dxencode.find_file(path_n_glob,self.proj_id,multiple=False,recurse=False) if fid != None: return dxencode.file_path_from_fid(fid) if default != None: return default #print json.dumps(rep,indent=4) print "Unable to find control in search of %s" % rep['controls'] sys.exit(1)
def find_ref_files(self,priors): '''Locates all reference files based upon organism and gender.''' #bwaIx = self.psv['refLoc']+self.REFERENCE_FILES['bwa_index'][self.psv['genome']][self.psv['gender']] base_dir = '/' + self.psv['genome'] + "/dna-me/" dmeIx = base_dir+self.REFERENCE_FILES["dme_ix"][self.psv['genome']] dmeIxFid = dxencode.find_file(dmeIx,dxencode.REF_PROJECT_DEFAULT) if dmeIxFid == None: sys.exit("ERROR: Unable to locate Bismark index file '" + dmeIx + "'") else: priors['dme_ix'] = dmeIxFid self.psv['ref_files'] = self.REFERENCE_FILES.keys()
def find_ref_files(priors, psv): '''Locates all reference files based upon gender, organism and annotation.''' refFiles = {} topIx = psv['refLoc'] + GENOME_REFERENCES['tophat_index'][psv['genome']][ psv['gender']][psv['annotation']] topIxFid = dxencode.find_file(topIx, dxencode.REF_PROJECT_DEFAULT) if topIxFid == None: sys.exit("ERROR: Unable to locate TopHat index file '" + topIx + "'") else: priors['tophat_index'] = topIxFid starIx = psv['refLoc'] + GENOME_REFERENCES['star_index'][psv['genome']][ psv['gender']][psv['annotation']] starIxFid = dxencode.find_file(starIx, dxencode.REF_PROJECT_DEFAULT) if starIxFid == None: sys.exit("ERROR: Unable to locate STAR index file '" + starIx + "'") else: priors['star_index'] = starIxFid rsemIx = psv['refLoc'] + GENOME_REFERENCES['rsem_index'][psv['genome']][ psv['annotation']] rsemIxFid = dxencode.find_file(rsemIx, dxencode.REF_PROJECT_DEFAULT) if rsemIxFid == None: sys.exit("ERROR: Unable to locate RSEM index file '" + rsemIx + "'") else: priors['rsem_index'] = rsemIxFid chromSizes = psv['refLoc'] + GENOME_REFERENCES['chrom_sizes'][ psv['genome']][psv['gender']] chromSizesFid = dxencode.find_file(chromSizes, dxencode.REF_PROJECT_DEFAULT) if chromSizesFid == None: sys.exit("ERROR: Unable to locate Chrom Sizes file '" + chromSizes + "'") else: priors['chrom_sizes'] = chromSizesFid psv['ref_files'] = GENOME_REFERENCES.keys()
def find_control_file(self,rep,default=None): '''Attempts to find an appropriate control file.''' # TODO Make more generic and move to dxencode.py when needed. (AUTHID,AUTHPW,SERVER) = dxencode.processkey(self.server_key) for file_key in rep['controls']: url = '%s%s/?format=json&frame=embedded' % (SERVER,file_key) #print '-- ' + AUTHID + " " + AUTHPW + " " + SERVER + " " + url try: response = dxencode.encoded_get(url, AUTHID, AUTHPW) file_obj = response.json() except: print "URL to control [%s] returned ?" % url print response sys.exit(1) #print json.dumps(response,indent=4) rep_id = file_obj["replicate"]['@id'] url = '%s%s/?format=json&frame=embedded' % (SERVER,rep_id) try: response = dxencode.encoded_get(url, AUTHID, AUTHPW) rep_obj = response.json() except: print "URL to replicate [%s] returned ?" % url print response sys.exit(1) exp_id = rep_obj['experiment'].split('/')[2] rep_tech = "rep%s_%s" % \ (rep_obj['biological_replicate_number'], rep_obj['technical_replicate_number']) # default by cheating if self.proj_name == dxencode.PRODUCTION_PROJECT: control_root = "/long-RNA-seq/runs/" else: control_root = self.CONTROL_ROOT_FOLDER path_n_glob = control_root + exp_id + '/' + rep_tech + '/' + self.CONTROL_FILE_GLOB target_folder = dxencode.find_folder(exp_id + '/' + rep_tech,self.project,control_root) #print "Target found [%s]" % target_folder if target_folder != None: path_n_glob = target_folder + '/' + self.CONTROL_FILE_GLOB fid = dxencode.find_file(path_n_glob,self.proj_id,multiple=False,recurse=False) if fid != None: return dxencode.file_path_from_fid(fid) if default != None: return default print "Unable to find control in search of %s" % rep['controls'] sys.exit(1)