def configFromFile(self, config_file_path): import ConfigParser user_config = ConfigParser.ConfigParser() user_config.read(config_file_path) # take care of the general section G = lambda v: user_config.get('general', v) self.run_date = G('run_date') self.platform = G('platform') self.input_dir = G('input_dir') self.output_dir = G('output_dir') self.input_files = [file.strip() for file in G('input_files').split(',')] self.input_file_type = G('input_file_type') # populate sample information for every run_key for run_key in [s for s in user_config.sections() if s != 'general']: #print run_key # looks like: 1:ACACT S = lambda v: user_config.get(run_key, v) sample = Sample(run_key) # has defaults -not required try: sample.proximal_primers = S('forward_primers').strip("'").strip('"').split(',') except: sample.proximal_primers = [] try: sample.distal_primers = S('reverse_primers').strip("'").strip('"').split(',') except: sample.distal_primers = [] try: sample.stop_sequences = S('stop_sequences').strip("'").strip('"').split(',') except: sample.stop_sequences = [] try: sample.anchor = S('anchor') except: sample.anchor = '' # required sample.direction = S('direction') sample.project = S('project_name') sample.dataset = S('dataset_name') sample.dna_region = S('dna_region') sample.taxonomic_domain = S('taxonomic_domain') # a list of run_keys # convert: change ':' to '_' key = run_key[:1]+'_'+run_key[2:] self.run_keys.append(key) # a dictionary of samples self.samples[key] = sample
def initializeFromDictionary(self, configDict): # get the general stuff general_config = configDict['general'] print( 'General Config0:',general_config) #if general_config['gast_data_source'] != 'database': self.run = general_config['run'] self.platform = general_config.get('platform', "unknown") self.input_dir = general_config.get('input_dir', None) self.require_distal = general_config.get('require_distal', True) self.minimumLength = general_config.get('minimumLength', C.minimumLength) self.maximumLength = general_config.get('maximumLength', C.maximumLength) self.minAvgQual = general_config.get('minAvgQual', C.minAvgQual) self.force_runkey = general_config.get('force_runkey', None) try: self.idx_keys = general_config['idx_keys'] except: self.idx_keys = "" if self.vamps_user_upload: self.site = general_config['site'] if self.site == 'new_vamps': self.project_dir = general_config['project_dir'] self.node_db = general_config['node_db'] self.process_dir = general_config['process_dir'] self.hostname = general_config['hostname'] self.ref_db_dir = general_config['ref_db_dir'] self.config_file = general_config['config_file'] self.project = general_config['project'] self.env_source_id = general_config['env_source_id'] self.user = general_config['user'] #self.datasets = configDict['datasets'] self.input_files = general_config['input_files'] #self.project = general_config['project'] #self.dataset = general_config['dataset'] self.dna_region = general_config['dna_region'] self.domain = general_config['domain'] self.load_vamps_database = general_config['load_vamps_database'] try: self.require_distal = general_config['require_distal'] except: self.require_distal = True try: self.minimumLength = general_config['minimum_length'] except: self.minimumLength = C.minimumLength try: self.maximumLength = general_config['maximum_length'] except: self.maximumLength = C.maximumLength try: self.use_cluster = general_config['use_cluster'] except: self.use_cluster = False try: self.use64bit = general_config['use64bit'] except: self.use64bit = False try: self.fasta_file = general_config['fasta_file'] except: self.fasta_file = None try: self.mobedac = general_config['mobedac'] except: self.mobedac = False try: self.use_full_length= general_config['use_full_length'] except: self.use_full_length= False try: self.classifier = general_config['classifier'] except: self.classifier= 'unknown' else: if self.platform in C.illumina_list: self.compressed = general_config['compressed'] self.database_name = general_config['database_name'] self.database_host = general_config['database_host'] self.site = general_config['site'] self.load_vamps_database = general_config['load_vamps_database'] if "archaea" in general_config: self.archaea = general_config['archaea'] if "do_perfect" in general_config: self.do_perfect = general_config['do_perfect'] else: self.do_perfect = C.pipeline_run_items[self.platform]['do_perfect'] if "lane_name" in general_config: self.lane_name = general_config['lane_name'] else: self.lane_name = C.pipeline_run_items[self.platform]['lane_name'] elif self.platform == '454': self.compressed = general_config['compressed'] self.database_name = general_config['database_name'] self.database_host = general_config['database_host'] self.site = general_config['site'] self.load_vamps_database = general_config['load_vamps_database'] else: pass # added gast_input_source for vamps uploads # so when users want to gast at a later time they will # look in the database and not the files (which may be missing) # see /xraid2-2/vampsweb/vampsdev/vamps_trim.py if 'gast_input_source' in general_config: self.gast_input_source = general_config['gast_input_source'] print( 'General Config:',general_config) if 'files_list' in general_config: input_file_names = general_config['files_list'] self.input_files = ','.join(general_config['files_list']) self.files_list = general_config['files_list'] else: input_file_names = [input_str.strip() for input_str in general_config['input_files'].split(',')] self.input_files = ','.join(general_config['input_files']) self.files_list = general_config['input_files'] self.input_file_info = {} print(general_config) for idx,input_file in enumerate(input_file_names): if "input_file_format" in general_config: file_format = general_config['input_file_format'] else: # default file_format = 'fasta' if file_format not in C.input_file_formats: raise Exception("Invalid sequence input file format: " + general_config['input_file_format']) if "input_file_lane" in general_config: file_lane = general_config['input_file_lane'] else: # default file_lane = '' # make up a hash...they are allowed to not put in any input_file_lanes...could be 3 mbl fasta files which would all have lane # info encoded on each id/description line of the sequence record self.input_file_info[input_file] = { "name" : input_file, "format" : file_format, "lane" : file_lane } # now deal with each lane_runkey combo (Sample) that is misnamed though # populate sample information for every run_key for lane_run_key in [s for s in configDict.keys() if s != 'general']: # change ':' to '_' # key = lane_run_key[:1]+'_'+lane_run_key[2:] lane_run_dict = configDict[lane_run_key] sample = Sample(lane_run_key) # has defaults -not required try: sample.forward_primers = lane_run_dict['forward_primers'].split(',') except: sample.forward_primers = [] try: sample.reverse_primers = lane_run_dict['reverse_primers'].split(',') except: sample.reverse_primers = [] try: sample.stop_sequences = lane_run_dict['stop_sequences'].split(',') except: sample.stop_sequences = [] try: sample.anchor = lane_run_dict['anchor'] except: sample.anchor = '' # should we try to trim with mbl primers as well as custom ones try: sample.use_mbl_primers = lane_run_dict['use_mbl_primers'] except: sample.use_mbl_primers = 1 ################################# try: sample.run_key = lane_run_dict['run_key'] except: sample.run_key = '' try: sample.lane = lane_run_dict['lane'] except: sample.lane = '' try: sample.adaptor = lane_run_dict['adaptor'] except: sample.adaptor = '' try: sample.barcode = lane_run_dict['barcode'] except: sample.barcode = '' try: sample.seq_operator = lane_run_dict['seq_operator'] except: sample.seq_operator = '' try: sample.amp_operator = lane_run_dict['amp_operator'] except: sample.amp_operator = '' try: sample.primer_suite = lane_run_dict['primer_suite'] except: sample.primer_suite = '' try: sample.tubelabel = lane_run_dict['tubelabel'] except: sample.tubelabel = '' try: sample.dna_region = lane_run_dict['dna_region'] except: sample.dna_region = '' if sample.primer_suite: sample.taxonomic_domain = sample.primer_suite.split()[0] else: sample.taxonomic_domain = 'unknown' sample.project_title = lane_run_dict['project_title'] sample.project_description = lane_run_dict['project_description'] sample.env_sample_source_id = lane_run_dict['env_sample_source_id'] sample.dataset_description = lane_run_dict['dataset_description'] sample.project = lane_run_dict['project'] sample.dataset = lane_run_dict['dataset'] # print('lane_run_key '+lane_run_key) if self.vamps_user_upload: # required for 454 sample.direction = lane_run_dict['direction'] #sample.taxonomic_domain = lane_run_dict['taxonomic_domain'] # a list of run_keys # convert: change ':' to '_' #lane_run_key = '_'.join(lane_run_key.split(':')) key = lane_run_key[:1]+'_'+lane_run_key[2:] #sample.key = key self.run_keys.append(key) # a dictionary of samples self.samples[key] = sample else: if self.platform in C.illumina_list: # req specifically for illumina sample.data_owner = lane_run_dict['data_owner'] sample.first_name = lane_run_dict['first_name'] sample.last_name = lane_run_dict['last_name'] sample.email = lane_run_dict['email'] sample.institution = lane_run_dict['institution'] sample.funding = lane_run_dict['funding'] sample.barcode_index = lane_run_dict['barcode_index'] sample.overlap = lane_run_dict['overlap'] sample.read_length = lane_run_dict['read_length'] # sample.file_prefix = lane_run_dict['file_prefix'] sample.insert_size = lane_run_dict['insert_size'] #sample.taxonomic_domain = lane_run_dict['domain'] # concatenate: barcode_index and run_key and lane key = lane_run_dict['barcode_index'] +'_'+ lane_run_dict['run_key'] +'_'+ lane_run_dict['lane'] #sample.key = key self.run_keys.append(key) # a dictionary of samples self.samples[lane_run_key] = sample elif self.platform == '454': # required for 454 sample.direction = lane_run_dict['direction'] sample.data_owner = lane_run_dict['data_owner'] sample.first_name = lane_run_dict['first_name'] sample.last_name = lane_run_dict['last_name'] sample.email = lane_run_dict['email'] sample.institution = lane_run_dict['institution'] sample.funding = lane_run_dict['funding'] #sample.taxonomic_domain = lane_run_dict['domain'] # a list of run_keys # convert: change ':' to '_' key = lane_run_key[:1]+'_'+lane_run_key[2:] #sample.key = key self.run_keys.append(key) # a dictionary of samples self.samples[lane_run_key] = sample