def pipeline_specific_vars(self, args, verbose=False): '''Adds pipeline specific variables to a dict, for use building the workflow.''' psv = Launch.pipeline_specific_vars(self, args) # Some specific settings psv['nthreads'] = 8 psv['map_thresh'] = 10 psv['sample_size'] = 15000000 psv['read_length'] = args.read_length psv['pe_or_se'] = "pe" for ltr in sorted(psv['reps'].keys()): rep = psv['reps'][ltr] if not rep['paired_end']: psv['pe_or_se'] = "se" if rep['paired_end'] and 'barcode' in rep and rep[ 'barcode'] == "undetected": del rep['barcode'] if args.umi: psv['umi'] = "yes" psv['upper_limit'] = 0 # Crawford fastqs require trimming psv["trim_len"] = 0 if not self.template and not psv['paired_end'] and "crawford" in psv[ 'lab']: print "Detected that fastqs will be trimmed to 20" psv["trim_len"] = 20 self.multi_rep = True # For DNase, a single tech_rep moves on to merge/filter. self.combined_reps = True if verbose: print "Pipeline Specific Vars:" print json.dumps(psv, indent=4) return psv
def pipeline_specific_vars(self,args,verbose=False): '''Adds pipeline specific variables to a dict, for use building the workflow.''' psv = Launch.pipeline_specific_vars(self,args) # Some specific settings psv['nthreads'] = 8 psv['map_thresh'] = 10 psv['sample_size'] = 15000000 psv['read_length'] = args.read_length psv['pe_or_se'] = "pe" for ltr in sorted( psv['reps'].keys() ): rep = psv['reps'][ltr] if not rep['paired_end']: psv['pe_or_se'] = "se" if rep['paired_end'] and 'barcode' in rep and rep['barcode'] == "undetected": del rep['barcode'] if args.umi: psv['umi'] = "yes" psv['upper_limit'] = 0 # Crawford fastqs require trimming psv["trim_len"] = 0 if not self.template and not psv['paired_end'] and "crawford" in psv['lab']: print "Detected that fastqs will be trimmed to 20" psv["trim_len"] = 20 self.multi_rep = True # For DNase, a single tech_rep moves on to merge/filter. self.combined_reps = True if verbose: print "Pipeline Specific Vars:" print json.dumps(psv,indent=4) return psv
def pipeline_specific_vars(self, args, verbose=False): '''Adds pipeline specific variables to a dict, for use building the workflow.''' #args.pe = True # This is necessary to ensure templating does what it must. psv = Launch.pipeline_specific_vars(self, args) # Could be multiple annotations supported per genome psv['annotation'] = args.annotation if psv['genome'] != self.GENOME_DEFAULT and psv[ 'annotation'] == self.ANNO_DEFAULT: psv['annotation'] = self.ANNO_DEFAULTS[psv['genome']] if psv['annotation'] not in self.ANNO_ALLOWED[psv['genome']]: print psv['genome'] + " has no " + psv[ 'annotation'] + " annotation." sys.exit(1) # Some specific settings psv['assay_type'] = "rampage" if self.exp["assay_term_name"] == "CAGE": psv['assay_type'] = "cage" psv['nthreads'] = 8 if not self.template: psv['control'] = args.control if psv['paired_end'] and psv['assay_type'] == "cage": print "ERROR: CAGE is always expected to be single-end but mapping says otherwise." sys.exit(1) elif not psv['paired_end'] and psv['assay_type'] == "rampage": print "Rampage is always expected to be paired-end but mapping says otherwise." sys.exit(1) if not psv["stranded"]: print "Detected unstranded library" # run will either be for combined or single rep. if not self.combined_reps: run = psv['reps'][ 'a'] # If not combined then run will be for the first (only) replicate else: run = psv # If annotation is not default, then add it to title if psv['annotation'] != self.ANNO_DEFAULTS[psv['genome']]: psv['title'] += ', ' + psv['annotation'] psv['name'] += '_' + psv['annotation'] if self.exp["assay_term_name"] == "CAGE": psv['name'] = psv['assay_type'] + psv['name'][4:] psv['title'] = "CAGE" + psv['title'][7:] # Must override results location because of annotation psv['resultsLoc'] = self.umbrella_folder(args.folder,self.FOLDER_DEFAULT,self.proj_name,psv['exp_type'], \ psv['genome'],psv['annotation']) psv['resultsFolder'] = psv['resultsLoc'] if not self.template: psv['resultsFolder'] += psv['experiment'] + '/' self.update_rep_result_folders(psv) if verbose: print "Pipeline Specific Vars:" print json.dumps(psv, indent=4) return psv
def pipeline_specific_vars(self,args,verbose=False): '''Adds pipeline specific variables to a dict, for use building the workflow.''' psv = Launch.pipeline_specific_vars(self,args) # Could be multiple annotations supported per genome psv['annotation'] = args.annotation if psv['genome'] != self.GENOME_DEFAULT and psv['annotation'] == self.ANNO_DEFAULT: psv['annotation'] = self.ANNO_DEFAULTS[psv['genome']] if psv['annotation'] not in self.ANNO_ALLOWED[psv['genome']]: print psv['genome']+" has no "+psv['annotation']+" annotation." sys.exit(1) # Some specific settings psv['nthreads'] = 8 psv['rnd_seed'] = 12345 # If annotation is not default, then add it to title if psv['annotation'] != self.ANNO_DEFAULTS[psv['genome']]: psv['title'] += ', ' + psv['annotation'] psv['name'] += '_' + psv['annotation'] self.no_tophat = args.no_tophat if not self.no_tophat: self.PRUNE_STEPS = [] # Must override results location because of annotation psv['resultsLoc'] = dxencode.umbrella_folder(args.folder,self.FOLDER_DEFAULT,self.proj_name,psv['exp_type'], \ psv['genome'],psv['annotation']) psv['resultsFolder'] = psv['resultsLoc'] + psv['experiment'] + '/' self.update_rep_result_folders(psv) if verbose: print "Pipeline Specific Vars:" print json.dumps(psv,indent=4) return psv
def pipeline_specific_vars(self, args, verbose=False): '''Adds pipeline specific variables to a dict, for use building the workflow.''' psv = Launch.pipeline_specific_vars(self, args) # Could be multiple annotations supported per genome psv['annotation'] = args.annotation if psv['genome'] != self.GENOME_DEFAULT and psv['annotation'] == self.ANNO_DEFAULT: psv['annotation'] = self.ANNO_DEFAULTS[psv['genome']] if psv['annotation'] not in self.ANNO_ALLOWED[psv['genome']]: print psv['genome']+" has no "+psv['annotation']+" annotation." sys.exit(1) # Some specific settings psv['nthreads'] = 8 psv['rnd_seed'] = 12345 # If paired-end then read_strand might vary TruSeq or ScriptSeq, but only for quant-rsem psv["read_strand"] = "unstranded" # SE experiments are all unstranded if psv["paired_end"]: psv["read_strand"] = "reverse" # Usual ENCODE LRNA experiments are rd1-/rd2+ (AKA reverse) if not psv["stranded"]: psv["read_strand"] = "unstranded" # "ScriptSeq" experiments are rd1+/rd2- (AKA forward) print "Detected unstranded library" elif psv.get('ScriptSeq', False): # file.replicate.library.document contains "/documents/F17c31e10-1542-42c6-8b4c-3afff95564cf%2F" psv["read_strand"] = "ScriptSeq" # "ScriptSeq" experiments are rd1+/rd2- (AKA forward) print "Detected ScriptSeq" else: # SE if psv["stranded"]: psv["read_strand"] = psv.get("strand_direction", "unstranded") else: psv["read_strand"] = "unstranded" if psv["stranded"]: print "Strand orientation is '%s'" % (psv["read_strand"]) # print "Detected special cases" # If annotation is not default, then add it to title if psv['annotation'] != self.ANNO_DEFAULTS[psv['genome']]: psv['title'] += ', ' + psv['annotation'] psv['name'] += '_' + psv['annotation'] self.no_tophat = True if args.tophat_also: self.no_tophat = False self.PRUNE_STEPS = [] # This blocks pruning... keeping tophat # Must override results location because of annotation psv['resultsLoc'] = self.umbrella_folder(args.folder, self.FOLDER_DEFAULT, self.proj_name, psv['exp_type'], psv['genome'], psv['annotation']) psv['resultsFolder'] = psv['resultsLoc'] if not self.template: psv['resultsFolder'] += psv['experiment'] + '/' self.update_rep_result_folders(psv) if verbose: print "Pipeline Specific Vars:" print json.dumps(psv, indent=4) return psv
def pipeline_specific_vars(self, args, verbose=False): '''Adds pipeline specific variables to a dict, for use building the workflow.''' psv = Launch.pipeline_specific_vars(self, args) # Now add pipline specific variables and tests # Could be multiple annotations supported per genome psv['annotation'] = args.annotation if psv['genome'] != self.GENOME_DEFAULT and psv[ 'annotation'] == self.ANNO_DEFAULT: psv['annotation'] = self.ANNO_DEFAULTS[psv['genome']] if psv['annotation'] not in self.ANNO_ALLOWED[psv['genome']]: print psv['genome'] + " has no " + psv[ 'annotation'] + " annotation." sys.exit(1) # Paired ends? if psv['paired_end']: print "Small-RNA is always expected to be single-end but mapping says otherwise." #print json.dumps(psv,indent=4,sort_keys=True) sys.exit(1) # Some specific settings psv['nthreads'] = 8 # By replicate: for ltr in psv['reps'].keys(): if len(ltr) != 1: # only simple reps continue rep = psv['reps'][ltr] rep["clipping_model"] = "ENCODE3" # Default if "a_tailing" in rep: rep["clipping_model"] = "A_Tailing_" + rep["a_tailing"] print "%s detected for %s" % (rep["clipping_model"], rep["rep_tech"]) # If annotation is not default, then add it to title if psv['annotation'] != self.ANNO_DEFAULTS[psv['genome']]: psv['title'] += ', ' + psv['annotation'] psv['name'] += '_' + psv['annotation'] # Must override results location because of annotation genome = psv['genome'] if self.no_refs: # (no_refs is only True when templating) genome = None # If templating with no refs then this will hide genome and annotation psv['resultsLoc'] = self.umbrella_folder(args.folder,self.FOLDER_DEFAULT,self.proj_name,psv['exp_type'], \ psv['genome'],psv['annotation']) psv['resultsFolder'] = psv['resultsLoc'] if not self.template: psv['resultsFolder'] += psv['experiment'] + '/' self.update_rep_result_folders(psv) if verbose: print "Pipeline Specific Vars:" print json.dumps(psv, indent=4) return psv
def pipeline_specific_vars(self,args,verbose=False): '''Adds pipeline specific variables to a dict, for use building the workflow.''' psv = Launch.pipeline_specific_vars(self,args) # Could be multiple annotations supported per genome psv['annotation'] = args.annotation if psv['genome'] != self.GENOME_DEFAULT and psv['annotation'] == self.ANNO_DEFAULT: psv['annotation'] = self.ANNO_DEFAULTS[psv['genome']] if psv['annotation'] not in self.ANNO_ALLOWED[psv['genome']]: print psv['genome']+" has no "+psv['annotation']+" annotation." sys.exit(1) if not psv['paired_end']: print "Rampage is always expected to be paired-end but mapping says otherwise." sys.exit(1) # Some specific settings psv['nthreads'] = 8 psv['control'] = args.control # run will either be for combined or single rep. if not psv['combined']: run = psv['reps']['a'] # If not combined then run will be for the first (only) replicate else: run = psv # workflow labeling psv['description'] = "The ENCODE Rampage RNA pipeline for long RNAs" run['name'] = "rampage_"+psv['genome'] if psv['genome'] == 'mm10': run['name'] += psv['annotation'] if psv['gender'] == 'female': run['name'] += "XX" else: run['name'] += "XY" run['title'] = "Rampage RNA " + psv['experiment'] + " - " + run['rep_tech'] run['name'] += "_"+psv['experiment']+"_" + run['rep_tech'] if not psv['combined']: run['title'] += " [library '"+run['library_id']+"']" run['title'] += " on " + psv['genome']+" - "+psv['gender'] # Must override results location because of annotation psv['resultsLoc'] = dxencode.umbrella_folder(args.folder,self.FOLDER_DEFAULT,self.proj_name,psv['exp_type'], \ psv['genome'],psv['annotation']) psv['resultsFolder'] = psv['resultsLoc'] + psv['experiment'] + '/' psv['reps']['a']['resultsFolder'] = psv['resultsLoc'] + psv['experiment'] + '/' + \ psv['reps']['a']['rep_tech'] + '/' if psv['combined']: psv['reps']['b']['resultsFolder'] = psv['resultsLoc'] + psv['experiment'] + '/' + \ psv['reps']['b']['rep_tech'] + '/' if verbose: print "Pipeline Specific Vars:" print json.dumps(psv,indent=4) return psv
def pipeline_specific_vars(self,args,verbose=False): '''Adds pipeline specific variables to a dict, for use building the workflow.''' #args.pe = True # This is necessary to ensure templating does what it must. psv = Launch.pipeline_specific_vars(self,args) # Could be multiple annotations supported per genome psv['annotation'] = args.annotation if psv['genome'] != self.GENOME_DEFAULT and psv['annotation'] == self.ANNO_DEFAULT: psv['annotation'] = self.ANNO_DEFAULTS[psv['genome']] if psv['annotation'] not in self.ANNO_ALLOWED[psv['genome']]: print psv['genome']+" has no "+psv['annotation']+" annotation." sys.exit(1) # Some specific settings psv['assay_type'] = "rampage" if self.exp["assay_term_name"] == "CAGE": psv['assay_type'] = "cage" psv['nthreads'] = 8 if not self.template: psv['control'] = args.control if psv['paired_end'] and psv['assay_type'] == "cage": print "ERROR: CAGE is always expected to be single-end but mapping says otherwise." sys.exit(1) elif not psv['paired_end'] and psv['assay_type'] == "rampage": print "Rampage is always expected to be paired-end but mapping says otherwise." sys.exit(1) # run will either be for combined or single rep. if not self.combined_reps: run = psv['reps']['a'] # If not combined then run will be for the first (only) replicate else: run = psv # If annotation is not default, then add it to title if psv['annotation'] != self.ANNO_DEFAULTS[psv['genome']]: psv['title'] += ', ' + psv['annotation'] psv['name'] += '_' + psv['annotation'] if self.exp["assay_term_name"] == "CAGE": psv['name'] = psv['assay_type'] + psv['name'][4:] psv['title'] = "CAGE" + psv['title'][7:] # Must override results location because of annotation psv['resultsLoc'] = self.umbrella_folder(args.folder,self.FOLDER_DEFAULT,self.proj_name,psv['exp_type'], \ psv['genome'],psv['annotation']) psv['resultsFolder'] = psv['resultsLoc'] if not self.template: psv['resultsFolder'] += psv['experiment'] + '/' self.update_rep_result_folders(psv) if verbose: print "Pipeline Specific Vars:" print json.dumps(psv,indent=4) return psv
def pipeline_specific_vars(self,args,verbose=False): '''Adds pipeline specific variables to a dict, for use building the workflow.''' psv = Launch.pipeline_specific_vars(self,args) # Now add pipline specific variables and tests # Could be multiple annotations supported per genome psv['annotation'] = args.annotation if psv['genome'] != self.GENOME_DEFAULT and psv['annotation'] == self.ANNO_DEFAULT: psv['annotation'] = self.ANNO_DEFAULTS[psv['genome']] if psv['annotation'] not in self.ANNO_ALLOWED[psv['genome']]: print psv['genome']+" has no "+psv['annotation']+" annotation." sys.exit(1) # Paired ends? if psv['paired_end']: print "Small-RNA is always expected to be single-end but mapping says otherwise." #print json.dumps(psv,indent=4,sort_keys=True) sys.exit(1) # Some specific settings psv['nthreads'] = 8 # By replicate: for ltr in psv['reps'].keys(): if len(ltr) != 1: # only simple reps continue rep = psv['reps'][ltr] rep["clipping_model"] = "ENCODE3" # Default if "a_tailing" in rep: rep["clipping_model"] = "A_Tailing_" + rep["a_tailing"] print "%s detected for %s" % (rep["clipping_model"],rep["rep_tech"]) # If annotation is not default, then add it to title if psv['annotation'] != self.ANNO_DEFAULTS[psv['genome']]: psv['title'] += ', ' + psv['annotation'] psv['name'] += '_' + psv['annotation'] # Must override results location because of annotation genome = psv['genome'] if self.no_refs: # (no_refs is only True when templating) genome = None # If templating with no refs then this will hide genome and annotation psv['resultsLoc'] = self.umbrella_folder(args.folder,self.FOLDER_DEFAULT,self.proj_name,psv['exp_type'], \ psv['genome'],psv['annotation']) psv['resultsFolder'] = psv['resultsLoc'] if not self.template: psv['resultsFolder'] += psv['experiment'] + '/' self.update_rep_result_folders(psv) if verbose: print "Pipeline Specific Vars:" print json.dumps(psv,indent=4) return psv
def pipeline_specific_vars(self, args, verbose=False): '''Adds pipeline specific variables to a dict, for use building the workflow.''' psv = Launch.pipeline_specific_vars(self, args) # Could be multiple annotations supported per genome psv['annotation'] = args.annotation if psv['genome'] != self.GENOME_DEFAULT and psv[ 'annotation'] == self.ANNO_DEFAULT: psv['annotation'] = self.ANNO_DEFAULTS[psv['genome']] if psv['annotation'] not in self.ANNO_ALLOWED[psv['genome']]: print psv['genome'] + " has no " + psv[ 'annotation'] + " annotation." sys.exit(1) # Some specific settings psv['nthreads'] = 8 psv['rnd_seed'] = 12345 # Override paired-end with TruSeq or ScriptSeq, but only for quant-rsem psv["paired_type"] = "true" if not psv["paired_end"]: psv["paired_type"] = "false" else: if psv.get( 'ScriptSeq', False ): # file.replicate.library.document contains "/documents/F17c31e10-1542-42c6-8b4c-3afff95564cf%2F" psv["paired_type"] = "ScriptSeq" print "Detected ScriptSeq" else: psv["paired_type"] = "TruSeq" # If annotation is not default, then add it to title if psv['annotation'] != self.ANNO_DEFAULTS[psv['genome']]: psv['title'] += ', ' + psv['annotation'] psv['name'] += '_' + psv['annotation'] self.no_tophat = args.no_tophat if not self.no_tophat: self.PRUNE_STEPS = [] # Must override results location because of annotation psv['resultsLoc'] = self.umbrella_folder(args.folder,self.FOLDER_DEFAULT,self.proj_name,psv['exp_type'], \ psv['genome'],psv['annotation']) psv['resultsFolder'] = psv['resultsLoc'] if not self.template: psv['resultsFolder'] += psv['experiment'] + '/' self.update_rep_result_folders(psv) if verbose: print "Pipeline Specific Vars:" print json.dumps(psv, indent=4) return psv
def pipeline_specific_vars(self,args,verbose=False): '''Adds pipeline specific variables to a dict, for use building the workflow.''' psv = Launch.pipeline_specific_vars(self,args) # Some specific settings psv['nthreads'] = 8 psv['min_insert'] = 0 psv['max_insert'] = 500 if verbose: print "Pipeline Specific Vars:" print json.dumps(psv,indent=4) return psv
def pipeline_specific_vars(self, args, verbose=False): '''Adds pipeline specific variables to a dict, for use building the workflow.''' psv = Launch.pipeline_specific_vars(self, args) # Could be multiple annotations supported per genome psv['annotation'] = args.annotation if psv['genome'] != self.GENOME_DEFAULT and psv['annotation'] == self.ANNO_DEFAULT: psv['annotation'] = self.ANNO_DEFAULTS[psv['genome']] if psv['annotation'] not in self.ANNO_ALLOWED[psv['genome']]: print psv['genome']+" has no "+psv['annotation']+" annotation." sys.exit(1) # Some specific settings psv['nthreads'] = 8 psv['rnd_seed'] = 12345 # If paired-end then read_strand might vary TruSeq or ScriptSeq, but only for quant-rsem psv["read_strand"] = "unstranded" # SE experiments are all unstranded if psv["paired_end"]: psv["read_strand"] = "reverse" # Usual ENCODE LRNA experiments are rd1-/rd2+ (AKA reverse) if not psv["stranded"]: psv["read_strand"] = "unstranded" # "ScriptSeq" experiments are rd1+/rd2- (AKA forward) print "Detected unstranded library" elif psv.get('ScriptSeq', False): # file.replicate.library.document contains "/documents/F17c31e10-1542-42c6-8b4c-3afff95564cf%2F" psv["read_strand"] = "ScriptSeq" # "ScriptSeq" experiments are rd1+/rd2- (AKA forward) print "Detected ScriptSeq" # print "Detected special cases" # If annotation is not default, then add it to title if psv['annotation'] != self.ANNO_DEFAULTS[psv['genome']]: psv['title'] += ', ' + psv['annotation'] psv['name'] += '_' + psv['annotation'] self.no_tophat = True if args.tophat_also: self.no_tophat = False self.PRUNE_STEPS = [] # This blocks pruning... keeping tophat # Must override results location because of annotation psv['resultsLoc'] = self.umbrella_folder(args.folder, self.FOLDER_DEFAULT, self.proj_name, psv['exp_type'], psv['genome'], psv['annotation']) psv['resultsFolder'] = psv['resultsLoc'] if not self.template: psv['resultsFolder'] += psv['experiment'] + '/' self.update_rep_result_folders(psv) if verbose: print "Pipeline Specific Vars:" print json.dumps(psv, indent=4) return psv
def pipeline_specific_vars(self, args, verbose=False): """Adds pipeline specific variables to a dict, for use building the workflow.""" psv = Launch.pipeline_specific_vars(self, args) # Now add pipline specific variables and tests # Could be multiple annotations supported per genome psv["annotation"] = args.annotation if psv["genome"] != self.GENOME_DEFAULT and psv["annotation"] == self.ANNO_DEFAULT: psv["annotation"] = self.ANNO_DEFAULTS[psv["genome"]] if psv["annotation"] not in self.ANNO_ALLOWED[psv["genome"]]: print psv["genome"] + " has no " + psv["annotation"] + " annotation." sys.exit(1) # Paired ends? if psv["paired_end"]: print "Small-RNA is always expected to be single-end but mapping says otherwise." # print json.dumps(psv,indent=4,sort_keys=True) sys.exit(1) # Some specific settings psv["nthreads"] = 8 # If annotation is not default, then add it to title if psv["annotation"] != self.ANNO_DEFAULTS[psv["genome"]]: psv["title"] += ", " + psv["annotation"] psv["name"] += "_" + psv["annotation"] # Must override results location because of annotation genome = psv["genome"] if self.no_refs: # (no_refs is only True when templating) genome = None # If templating with no refs then this will hide genome and annotation psv["resultsLoc"] = self.umbrella_folder( args.folder, self.FOLDER_DEFAULT, self.proj_name, psv["exp_type"], psv["genome"], psv["annotation"] ) psv["resultsFolder"] = psv["resultsLoc"] if not self.template: psv["resultsFolder"] += psv["experiment"] + "/" self.update_rep_result_folders(psv) if verbose: print "Pipeline Specific Vars:" print json.dumps(psv, indent=4) return psv
def pipeline_specific_vars(self, args, verbose=False): '''Adds pipeline specific variables to a dict, for use building the workflow.''' psv = Launch.pipeline_specific_vars(self, args) # Could be multiple annotations supported per genome psv['annotation'] = args.annotation if psv['genome'] != self.GENOME_DEFAULT and psv[ 'annotation'] == self.ANNO_DEFAULT: psv['annotation'] = self.ANNO_DEFAULTS[psv['genome']] if psv['annotation'] not in self.ANNO_ALLOWED[psv['genome']]: print psv['genome'] + " has no " + psv[ 'annotation'] + " annotation." sys.exit(1) # Some specific settings psv['nthreads'] = 8 psv['rnd_seed'] = 12345 # If annotation is not default, then add it to title if psv['annotation'] != self.ANNO_DEFAULTS[psv['genome']]: psv['title'] += ', ' + psv['annotation'] psv['name'] += '_' + psv['annotation'] self.no_tophat = args.no_tophat if not self.no_tophat: self.PRUNE_STEPS = [] # Must override results location because of annotation psv['resultsLoc'] = self.umbrella_folder(args.folder,self.FOLDER_DEFAULT,self.proj_name,psv['exp_type'], \ psv['genome'],psv['annotation']) psv['resultsFolder'] = psv['resultsLoc'] if not self.template: psv['resultsFolder'] += psv['experiment'] + '/' self.update_rep_result_folders(psv) if verbose: print "Pipeline Specific Vars:" print json.dumps(psv, indent=4) return psv
def pipeline_specific_vars(self,args,verbose=False): '''Adds pipeline specific variables to a dict, for use building the workflow.''' psv = Launch.pipeline_specific_vars(self,args) # Now add pipline specific variables and tests # Could be multiple annotations supported per genome psv['annotation'] = args.annotation if psv['genome'] != self.GENOME_DEFAULT and psv['annotation'] == self.ANNO_DEFAULT: psv['annotation'] = self.ANNO_DEFAULTS[psv['genome']] if psv['annotation'] not in self.ANNO_ALLOWED[psv['genome']]: print psv['genome']+" has no "+psv['annotation']+" annotation." sys.exit(1) # Paired ends? if psv['paired_end']: print "Small-RNA is always expected to be single-end but mapping says otherwise." #print json.dumps(psv,indent=4,sort_keys=True) sys.exit(1) # Some specific settings psv['nthreads'] = 8 # If annotation is not default, then add it to title if psv['annotation'] != self.ANNO_DEFAULTS[psv['genome']]: psv['title'] += ', ' + psv['annotation'] psv['name'] += '_' + psv['annotation'] # Must override results location because of annotation psv['resultsLoc'] = dxencode.umbrella_folder(args.folder,self.FOLDER_DEFAULT,self.proj_name,psv['exp_type'], \ psv['genome'],psv['annotation']) psv['resultsFolder'] = psv['resultsLoc'] + psv['experiment'] + '/' self.update_rep_result_folders(psv) if verbose: print "Pipeline Specific Vars:" print json.dumps(psv,indent=4) return psv
def pipeline_specific_vars(self,args,verbose=False): '''Adds pipeline specific variables to a dict, for use building the workflow.''' psv = Launch.pipeline_specific_vars(self,args) # Now add pipline specific variables and tests # Paired ends? if psv['paired_end']: print "Small-RNA is always expected to be single-end but mapping says otherwise." #print json.dumps(psv,indent=4,sort_keys=True) sys.exit(1) # Some specific settings psv['nthreads'] = 8 # run will either be for combined or single rep. if self.combined_reps: print "Small-RNA-seq pipeline currently does not support combined-replicate processing." sys.exit(1) if verbose: print "Pipeline Specific Vars:" print json.dumps(psv,indent=4) return psv
def pipeline_specific_vars(self,args,verbose=False): '''Adds pipeline specific variables to a dict, for use building the workflow.''' psv = Launch.pipeline_specific_vars(self,args) # Now add pipline specific variables and tests # Paired ends? if psv['paired_end']: print "Small-RNA is always expected to be single-end but mapping says otherwise." sys.exit(1) # Some specific settings psv['nthreads'] = 8 # run will either be for combined or single rep. if not psv['combined']: run = psv['reps']['a'] # If not combined then run will be for the first (only) replicate else: run = psv print "Small-RNA-seq pipeline currently does not support combined-replicate processing." sys.exit(1) # workflow labeling psv['description'] = "The ENCODE RNA Seq pipeline for short RNA" genderToken = "XY" if psv['gender'] == 'female': genderToken = "XX" run['title'] = "short RNA-seq " + psv['experiment'] + " - "+run['rep_tech'] + \ " (library '"+run['library_id']+"') on " + psv['genome'] + \ " - "+psv['gender'] run['name'] = "srna_"+psv['genome']+genderToken+"_"+psv['experiment'] + "_"+run['rep_tech'] if verbose: print "Pipeline Specific Vars:" print json.dumps(psv,indent=4) return psv