Ejemplo n.º 1
0
    def find_ref_files(self,priors):
        '''Locates all reference files based upon gender, organism and annotation.'''
        topIx = self.psv['refLoc']+self.REFERENCE_FILES['tophat_index'][self.psv['genome']][self.psv['gender']][self.psv['annotation']]
        topIxFid = dxencode.find_file(topIx,dxencode.REF_PROJECT_DEFAULT)
        if topIxFid == None:
            sys.exit("ERROR: Unable to locate TopHat index file '" + topIx + "'")
        else:
            priors['tophat_index'] = topIxFid

        starIx = self.psv['refLoc']+self.REFERENCE_FILES['star_index'][self.psv['genome']][self.psv['gender']][self.psv['annotation']]
        starIxFid = dxencode.find_file(starIx,dxencode.REF_PROJECT_DEFAULT)
        if starIxFid == None:
            sys.exit("ERROR: Unable to locate STAR index file '" + starIx + "'")
        else:
            priors['star_index'] = starIxFid

        rsemIx = self.psv['refLoc']+self.REFERENCE_FILES['rsem_index'][self.psv['genome']][self.psv['annotation']]
        rsemIxFid = dxencode.find_file(rsemIx,dxencode.REF_PROJECT_DEFAULT)
        if rsemIxFid == None:
            sys.exit("ERROR: Unable to locate RSEM index file '" + rsemIx + "'")
        else:
            priors['rsem_index'] = rsemIxFid

        chromSizes = self.psv['refLoc']+self.REFERENCE_FILES['chrom_sizes'][self.psv['genome']][self.psv['gender']]
        chromSizesFid = dxencode.find_file(chromSizes,dxencode.REF_PROJECT_DEFAULT)
        if chromSizesFid == None:
            sys.exit("ERROR: Unable to locate Chrom Sizes file '" + chromSizes + "'")
        else:
            priors['chrom_sizes'] = chromSizesFid
        self.psv['ref_files'] = self.REFERENCE_FILES.keys()
Ejemplo n.º 2
0
    def find_ref_files(self,priors):
        '''Locates all reference files based upon organism and gender.'''
        star_ix = self.psv['refLoc']+self.REFERENCE_FILES['star_index'][self.psv['genome']][self.psv['gender']]
        star_ix_fid = dxencode.find_file(star_ix,dxencode.REF_PROJECT_DEFAULT)
        if star_ix_fid == None:
            sys.exit("ERROR: Unable to locate STAR index file '" + star_ix + "'")
        else:
            priors['star_index'] = star_ix_fid

        chrom_sizes = self.psv['refLoc']+self.REFERENCE_FILES['chrom_sizes'][self.psv['genome']][self.psv['gender']]
        chrom_sizes_fid = dxencode.find_file(chrom_sizes,dxencode.REF_PROJECT_DEFAULT)
        if chrom_sizes_fid == None:
            sys.exit("ERROR: Unable to locate Chrom Sizes file '" + chrom_sizes + "'")
        else:
            priors['chrom_sizes'] = chrom_sizes_fid
        self.psv['ref_files'] = self.REFERENCE_FILES.keys()
 def find_control_file(self,rep,default=None):
     '''Attempts to find an appropriate control file.'''
     # TODO Make more generic and move to dxencode.py when needed.
     
     (AUTHID,AUTHPW,SERVER) = dxencode.processkey(self.server_key)
     if 'controls' not in rep:
         return None
     for file_key in rep['controls']:
         if isinstance(file_key,list):
             file_key = file_key[0]
         file_obj = dxencode.enc_lookup_json(file_key,self.server_key,frame='embedded')
         rep_id = file_obj["replicate"]['@id']
         rep_obj = dxencode.enc_lookup_json(rep_id,self.server_key,frame='embedded')
         exp_id = rep_obj['experiment']['@id'].split('/')[2]
         rep_tech = "rep%s_%s" % \
                 (rep_obj['biological_replicate_number'], rep_obj['technical_replicate_number'])
         control_root = self.psv['control_path']
         # Cheating:
         if self.proj_name == "scratchPad" and self.psv['control_path'] == self.CONTROL_ROOT_FOLDER:
             control_root = "/lrna"
         path_n_glob = control_root + exp_id + '/' + rep_tech + '/' + self.CONTROL_FILE_GLOB
         target_folder = dxencode.find_folder(exp_id + '/' + rep_tech,self.project,control_root)
         #print "Target found [%s]" % target_folder
         if target_folder != None:
             path_n_glob = target_folder + '/' + self.CONTROL_FILE_GLOB
         fid = dxencode.find_file(path_n_glob,self.proj_id,multiple=False,recurse=False)
         if fid != None:
             return dxencode.file_path_from_fid(fid)
             
     if default != None:
         return default
     #print json.dumps(rep,indent=4)
     print "Unable to find control in search of %s" % rep['controls']
     sys.exit(1)
Ejemplo n.º 4
0
 def find_ref_files(self,priors):
     '''Locates all reference files based upon organism and gender.'''
     #bwaIx = self.psv['refLoc']+self.REFERENCE_FILES['bwa_index'][self.psv['genome']][self.psv['gender']]
     base_dir = '/' + self.psv['genome'] + "/dna-me/"
     dmeIx = base_dir+self.REFERENCE_FILES["dme_ix"][self.psv['genome']]
     dmeIxFid = dxencode.find_file(dmeIx,dxencode.REF_PROJECT_DEFAULT)
     if dmeIxFid == None:
         sys.exit("ERROR: Unable to locate Bismark index file '" + dmeIx + "'")
     else:
         priors['dme_ix'] = dmeIxFid
     self.psv['ref_files'] = self.REFERENCE_FILES.keys()
Ejemplo n.º 5
0
def find_ref_files(priors, psv):
    '''Locates all reference files based upon gender, organism and annotation.'''
    refFiles = {}
    topIx = psv['refLoc'] + GENOME_REFERENCES['tophat_index'][psv['genome']][
        psv['gender']][psv['annotation']]
    topIxFid = dxencode.find_file(topIx, dxencode.REF_PROJECT_DEFAULT)
    if topIxFid == None:
        sys.exit("ERROR: Unable to locate TopHat index file '" + topIx + "'")
    else:
        priors['tophat_index'] = topIxFid

    starIx = psv['refLoc'] + GENOME_REFERENCES['star_index'][psv['genome']][
        psv['gender']][psv['annotation']]
    starIxFid = dxencode.find_file(starIx, dxencode.REF_PROJECT_DEFAULT)
    if starIxFid == None:
        sys.exit("ERROR: Unable to locate STAR index file '" + starIx + "'")
    else:
        priors['star_index'] = starIxFid

    rsemIx = psv['refLoc'] + GENOME_REFERENCES['rsem_index'][psv['genome']][
        psv['annotation']]
    rsemIxFid = dxencode.find_file(rsemIx, dxencode.REF_PROJECT_DEFAULT)
    if rsemIxFid == None:
        sys.exit("ERROR: Unable to locate RSEM index file '" + rsemIx + "'")
    else:
        priors['rsem_index'] = rsemIxFid

    chromSizes = psv['refLoc'] + GENOME_REFERENCES['chrom_sizes'][
        psv['genome']][psv['gender']]
    chromSizesFid = dxencode.find_file(chromSizes,
                                       dxencode.REF_PROJECT_DEFAULT)
    if chromSizesFid == None:
        sys.exit("ERROR: Unable to locate Chrom Sizes file '" + chromSizes +
                 "'")
    else:
        priors['chrom_sizes'] = chromSizesFid
    psv['ref_files'] = GENOME_REFERENCES.keys()
 def find_control_file(self,rep,default=None):
     '''Attempts to find an appropriate control file.'''
     # TODO Make more generic and move to dxencode.py when needed.
     
     (AUTHID,AUTHPW,SERVER) = dxencode.processkey(self.server_key)
     for file_key in rep['controls']:
         url = '%s%s/?format=json&frame=embedded' % (SERVER,file_key)
         #print '-- ' + AUTHID + " " + AUTHPW + " " + SERVER + " " + url
         try:
             response = dxencode.encoded_get(url, AUTHID, AUTHPW)
             file_obj = response.json()
         except:
             print "URL to control [%s] returned ?" % url
             print response
             sys.exit(1)
         #print json.dumps(response,indent=4)
         rep_id = file_obj["replicate"]['@id']
         url = '%s%s/?format=json&frame=embedded' % (SERVER,rep_id)
         try:
             response = dxencode.encoded_get(url, AUTHID, AUTHPW)
             rep_obj = response.json()
         except:
             print "URL to replicate [%s] returned ?" % url
             print response
             sys.exit(1)
         exp_id = rep_obj['experiment'].split('/')[2]
         rep_tech = "rep%s_%s" % \
                 (rep_obj['biological_replicate_number'], rep_obj['technical_replicate_number'])
         # default by cheating
         if self.proj_name == dxencode.PRODUCTION_PROJECT:
             control_root = "/long-RNA-seq/runs/"
         else:
             control_root = self.CONTROL_ROOT_FOLDER
         path_n_glob = control_root + exp_id + '/' + rep_tech + '/' + self.CONTROL_FILE_GLOB
         target_folder = dxencode.find_folder(exp_id + '/' + rep_tech,self.project,control_root)
         #print "Target found [%s]" % target_folder
         if target_folder != None:
             path_n_glob = target_folder + '/' + self.CONTROL_FILE_GLOB
         fid = dxencode.find_file(path_n_glob,self.proj_id,multiple=False,recurse=False)
         if fid != None:
             return dxencode.file_path_from_fid(fid)
             
     if default != None:
         return default
     print "Unable to find control in search of %s" % rep['controls']
     sys.exit(1)