Python Sample.dataset Examples

Programming Language: Python

Namespace/Package Name: pipeline.sample

Class/Type: Sample

Method/Function: dataset

Examples at hotexamples.com: 4

Python Sample.dataset - 4 examples found. These are the top rated real world Python examples of pipeline.sample.Sample.dataset extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Sample(2)

dataset(2)

direction(2)

dna_region(2)

anchor(2)

assembly(1)

env_sample_source_id(1)

pre_map(1)

institution(1)

insert_size(1)

funding(1)

forward_primers(1)

first_name(1)

file_prefix(1)

env_sample_source(1)

barcode(1)

email(1)

adaptor(1)

distal_primers(1)

amp_operator(1)

dataset_description(1)

data_owner(1)

barcode_index(1)

trim_reads(1)

Example #1

Show file

File: runconfig.py Project: avoorhis/mbl_sequencing_pipeline

    def configFromFile(self, config_file_path):
        import ConfigParser
        
        user_config = ConfigParser.ConfigParser()
        user_config.read(config_file_path)

        # take care of the general section
        G = lambda v: user_config.get('general', v)
        self.run_date   = G('run_date')
        self.platform   = G('platform')
        self.input_dir  = G('input_dir')
        self.output_dir = G('output_dir')

        self.input_files  = [file.strip() for file in G('input_files').split(',')]
        self.input_file_type = G('input_file_type')
 
        # populate sample information for every run_key
        for run_key in [s for s in user_config.sections() if s != 'general']:
            #print run_key    # looks like:  1:ACACT
            S = lambda v: user_config.get(run_key, v)
            sample = Sample(run_key)
            
            # has defaults -not required
            try:
                sample.proximal_primers = S('forward_primers').strip("'").strip('"').split(',')
            except:
                sample.proximal_primers = []
            try:
                sample.distal_primers = S('reverse_primers').strip("'").strip('"').split(',')
            except:
                sample.distal_primers = []
            try:
                sample.stop_sequences = S('stop_sequences').strip("'").strip('"').split(',')
            except:
                sample.stop_sequences = []
            try:
                sample.anchor = S('anchor')
            except:
                sample.anchor = ''
            # required
            sample.direction = S('direction')
            sample.project = S('project_name')
            sample.dataset = S('dataset_name')
            sample.dna_region = S('dna_region')
            sample.taxonomic_domain = S('taxonomic_domain')
            
            # a list of run_keys
            # convert: change ':' to '_'
            key = run_key[:1]+'_'+run_key[2:]
            self.run_keys.append(key)
            # a dictionary of samples
            self.samples[key] = sample

Example #2

Show file

File: runconfig.py Project: MBL-Woods-Hole/py_mbl_sequencing_pipeline

    def initializeFromDictionary(self, configDict):
        # get the general stuff
        general_config = configDict['general']
        #if general_config['gast_data_source'] != 'database':
        self.run       = general_config['run']
        self.platform       = general_config.get('platform', "unknown")
        self.input_dir      = general_config.get('input_dir', None)
        self.require_distal = general_config.get('require_distal', True)
        self.minimumLength  = general_config.get('minimumLength', C.minimumLength)
        self.maximumLength  = general_config.get('maximumLength', C.maximumLength)
        self.minAvgQual     = general_config.get('minAvgQual',    C.minAvgQual)
        self.force_runkey   = general_config.get('force_runkey', None)
        self.use_cluster    = general_config['use_cluster']
        try:
            self.idx_keys           = general_config['idx_keys']
        except:
            self.idx_keys = ""
            
        if self.platform == 'vamps':
            self.user           = general_config['user']
            
            self.dna_region     = general_config['dna_region'] 
            self.input_files    = general_config['input_files'] 
            self.project        = general_config['project'] 
            self.dataset        = general_config['dataset']
            self.site           = general_config['site']
            self.env_source_id  = general_config['envsource']
            try:
                self.fasta_file    = general_config['fasta_file'] 
            except:
                self.fasta_file    =None
        if self.platform == 'illumina':
            self.compressed     = general_config['compressed'] 
            self.database_name  = general_config['database_name'] 
            self.database_host  = general_config['database_host'] 
        # added gast_input_source for vamps uploads
        # so when users want to gast at a later time they will
        # look in the database and not the files (which may be missing)
        # see /xraid2-2/vampsweb/vampsdev/vamps_trim.py
        self.gast_input_source = 'files' # for regular gast pipeline
        if 'gast_input_source' in general_config: 
            self.gast_input_source = general_config['gast_input_source']
        if 'files_list' in general_config:
            input_file_names = general_config['files_list']
            self.input_files = ','.join(general_config['files_list'])
        else:
            input_file_names  = [input_str.strip() for input_str in general_config['input_files'].split(',')]
            self.input_files = general_config['input_files']
#         
#         # for ini file:  (no plurals)
#         # 1) if input_file_format is a comma sep list then it should match the count of input_file_name
#         #       The same with input_file_lane
#         # 2) if input_file_format is supplied and is a single item it will apply to all the input files
#         #       either in input_dir or the list (or single) of input_file_name
#         # 3) EITHER input_dir OR input_file_name will be supplied (but not both)
#         #
#         if self.platform == '454':
#             
#             if 'input_file_format' in general_config and general_config['input_file_format'] != '':
#                 input_file_types = general_config['input_file_format']
#             elif 'file_formats_list' in general_config:    
#                 input_file_types = general_config['file_formats_list']
#             else:
#                 input_file_types  = [input_str.strip() for input_str in general_config['input_file_formats'].split(',')]
#             
#             print 'input_file_types= ',input_file_types
#             if len(input_file_names) != len(input_file_types):
#                 raise Exception("Mismatch between the number of input_file_names(" + str(len(input_file_names)) + ") and input_file_types(" + str(len(input_file_types)) + ") in configuration information")
#             
#             if 'lanes_list' in general_config: 
#                 input_file_lanes = general_config['lanes_list']
#             else:        
#                 lane_info = general_config['input_file_lanes'].strip()
#                 input_file_lanes  = [] if lane_info == '' else [input_str.strip() for input_str in lane_info.split(',')]
#     
#             # no lane info? better by our custom fasta-mbl format then
#             if len(input_file_lanes) == 0 and len([  type for type in input_file_types if type != 'fasta-mbl' ]) > 0:
#                 raise Exception("Only fasta-mbl formatted sequence files are allowed to not provide a value for input_file_lanes")
#     
#             # if they give any lane information it then needs to either be 1 value (for all files) or match them exactly
#             if len(input_file_lanes) > 1 and (len(input_file_names) != len(input_file_lanes)):
#                 raise Exception("Mismatch between the number of input_file_names(" + str(len(input_file_names)) + ") and lanes(" + str(len(input_file_lanes)) + ") in configuration information")
#         else:
#             input_file_types = []   
#             input_file_lanes = []
#         
#         
#         
#         
 
 
        self.input_file_info = {}
#        print general_config
        for idx,input_file in enumerate(input_file_names):
            
            if "input_file_format" in general_config:
                file_format = general_config['input_file_format']
            else:
                # default
                file_format = 'fasta'
            
            
            if file_format not in C.input_file_formats:
                raise Exception("Invalid sequence input file format: " + config_dict['input_file_format'])
                
            if "input_file_lane" in general_config:
                file_lane = general_config['input_file_lane']
            else:
                # default
                file_lane = ''    
                
            # make up a hash...they are allowed to not put in any input_file_lanes...could be 3 mbl fasta files which would all have lane
            # info encoded on each id/description line of the sequence record
            
            self.input_file_info[input_file] =  {  "name" : input_file, 
                                                   "format" : file_format, 
                                                   "lane" : file_lane
                                                }
        
        
        # now deal with each lane_runkey combo (Sample) that is misnamed though
        # populate sample information for every run_key
        for lane_run_key in [s for s in configDict.keys() if s != 'general']:
            lane_run_dict = configDict[lane_run_key]
            sample = Sample(lane_run_key)
            # has defaults -not required
            try:
                sample.forward_primers = lane_run_dict['forward_primers'].split(',')
            except:
                sample.forward_primers = []
            try:
                sample.reverse_primers = lane_run_dict['reverse_primers'].split(',')
            except:
                sample.reverse_primers = []
            try:
                sample.stop_sequences = lane_run_dict['stop_sequences'].split(',')
            except:
                sample.stop_sequences = []
            try:
                sample.anchor = lane_run_dict['anchor']
            except:
                sample.anchor = ''
            # should we try to trim with mbl primers as well as custom ones
            try:
                sample.use_mbl_primers = lane_run_dict['use_mbl_primers']
            except:
                sample.use_mbl_primers = 1
#################################
            try:
                sample.run_key = lane_run_dict['run_key']
            except:
                sample.run_key = ''
            try:
                sample.lane = lane_run_dict['lane']
            except:
                sample.lane = ''
            try:
                sample.adaptor = lane_run_dict['adaptor']
            except:
                sample.adaptor = ''
            try:
                sample.barcode = lane_run_dict['barcode']
            except:
                sample.barcode = ''
            try:
                sample.seq_operator = lane_run_dict['seq_operator']
            except:
                sample.seq_operator = ''
            try:
                sample.amp_operator = lane_run_dict['amp_operator']
            except:
                sample.amp_operator = ''
            try:
                sample.primer_suite = lane_run_dict['primer_suite']
            except:
                sample.primer_suite = ''
            try:
                sample.tubelabel = lane_run_dict['tubelabel']
            except:
                sample.tubelabel = ''
            try:    
                sample.dna_region = lane_run_dict['dna_region'] 
            except:
                sample.dna_region = ''
                
            sample.data_owner           = lane_run_dict['data_owner']
            sample.first_name           = lane_run_dict['first_name']
            sample.last_name            = lane_run_dict['last_name']
            sample.email                = lane_run_dict['email']
            sample.institution          = lane_run_dict['institution']
            sample.project_title        = lane_run_dict['project_title']
            sample.project_description  = lane_run_dict['project_description']
            sample.funding              = lane_run_dict['funding']
            sample.env_sample_source    = lane_run_dict['env_sample_source']
            sample.dataset_description  = lane_run_dict['dataset_description']
                
            if self.platform == 'illumina':
                # req specifically for illumina
                sample.barcode_index = lane_run_dict['barcode_index'] 
                sample.overlap = lane_run_dict['overlap'] 
                sample.read_length = lane_run_dict['read_length'] 
                sample.file_prefix = lane_run_dict['file_prefix'] 
                sample.insert_size = lane_run_dict['insert_size']
                # concatenate: barcode_index and run_key and lane
                key = lane_run_dict['barcode_index'] +'_'+ lane_run_dict['run_key'] +'_'+ lane_run_dict['lane'] 
                #sample.key = key
                self.run_keys.append(key)  
                
            elif self.platform == '454':
                # required for 454
                sample.direction = lane_run_dict['direction'] 
                sample.taxonomic_domain = lane_run_dict['domain']
                # a list of run_keys
                # convert: change ':' to '_'
                key = lane_run_key[:1]+'_'+lane_run_key[2:]
                #sample.key = key
                self.run_keys.append(key)
                
            sample.project = lane_run_dict['project']
            sample.dataset = lane_run_dict['dataset']
                      

            
            # a dictionary of samples
            self.samples[key] = sample

Example #3

Show file

File: runconfig.py Project: avoorhis/py_mbl_sequencing_pipeline

    def initializeFromDictionary(self, configDict):
        # get the general stuff
        general_config = configDict['general']
        print(   'General Config0:',general_config)
        #if general_config['gast_data_source'] != 'database':
        self.run             = general_config['run']
        self.platform       = general_config.get('platform', "unknown")
        self.input_dir      = general_config.get('input_dir', None)
        self.require_distal = general_config.get('require_distal', True)
        self.minimumLength  = general_config.get('minimumLength', C.minimumLength)
        self.maximumLength  = general_config.get('maximumLength', C.maximumLength)
        self.minAvgQual     = general_config.get('minAvgQual',    C.minAvgQual)
        self.force_runkey   = general_config.get('force_runkey', None)

        try:
            self.idx_keys           = general_config['idx_keys']
        except:
            self.idx_keys = ""


        if self.vamps_user_upload:
            self.site               = general_config['site']
            if self.site == 'new_vamps':
                self.project_dir    = general_config['project_dir']
                self.node_db        = general_config['node_db']
                self.process_dir  = general_config['process_dir']
                self.hostname     = general_config['hostname']
                self.ref_db_dir   = general_config['ref_db_dir']
                self.config_file  = general_config['config_file']
                self.project  = general_config['project']
                self.env_source_id  = general_config['env_source_id']

            self.user           = general_config['user']
            #self.datasets       =   configDict['datasets']

            self.input_files    = general_config['input_files']
            #self.project        = general_config['project']
            #self.dataset        = general_config['dataset']
            self.dna_region     = general_config['dna_region']
            self.domain         = general_config['domain']



            self.load_vamps_database = general_config['load_vamps_database']
            try:
                self.require_distal = general_config['require_distal']
            except:
                self.require_distal = True
            try:
                self.minimumLength = general_config['minimum_length']
            except:
                self.minimumLength = C.minimumLength
            try:
                self.maximumLength = general_config['maximum_length']
            except:
                self.maximumLength = C.maximumLength
            try:
                self.use_cluster    = general_config['use_cluster']
            except:
                self.use_cluster = False
            try:
                self.use64bit = general_config['use64bit']
            except:
                self.use64bit = False

            try:
                self.fasta_file     = general_config['fasta_file']
            except:
                self.fasta_file     = None
            try:
                self.mobedac        = general_config['mobedac']
            except:
                self.mobedac        = False
            try:
                self.use_full_length= general_config['use_full_length']
            except:
                self.use_full_length= False
            try:
                self.classifier     = general_config['classifier']
            except:
                self.classifier= 'unknown'
        else:
            if self.platform in C.illumina_list:
                self.compressed     = general_config['compressed']
                self.database_name  = general_config['database_name']
                self.database_host  = general_config['database_host']
                self.site           = general_config['site']
                self.load_vamps_database = general_config['load_vamps_database']
                if "archaea" in general_config:
                    self.archaea    = general_config['archaea']
                if "do_perfect" in general_config:
                    self.do_perfect = general_config['do_perfect']
                else:
                    self.do_perfect = C.pipeline_run_items[self.platform]['do_perfect']
                if "lane_name" in general_config:
                    self.lane_name = general_config['lane_name']
                else:
                    self.lane_name = C.pipeline_run_items[self.platform]['lane_name']

            elif self.platform == '454':
                self.compressed     = general_config['compressed']
                self.database_name  = general_config['database_name']
                self.database_host  = general_config['database_host']
                self.site           = general_config['site']
                self.load_vamps_database = general_config['load_vamps_database']
            else:
                pass
        # added gast_input_source for vamps uploads
        # so when users want to gast at a later time they will
        # look in the database and not the files (which may be missing)
        # see /xraid2-2/vampsweb/vampsdev/vamps_trim.py

        if 'gast_input_source' in general_config:
            self.gast_input_source = general_config['gast_input_source']

        print(   'General Config:',general_config)
        if 'files_list' in general_config:
            input_file_names = general_config['files_list']
            self.input_files = ','.join(general_config['files_list'])
            self.files_list = general_config['files_list']
        else:
            input_file_names  = [input_str.strip() for input_str in general_config['input_files'].split(',')]
            self.input_files = ','.join(general_config['input_files'])
            self.files_list = general_config['input_files']





        self.input_file_info = {}
        print(general_config)
        for idx,input_file in enumerate(input_file_names):

            if "input_file_format" in general_config:
                file_format = general_config['input_file_format']
            else:
                # default
                file_format = 'fasta'


            if file_format not in C.input_file_formats:
                raise Exception("Invalid sequence input file format: " + general_config['input_file_format'])

            if "input_file_lane" in general_config:
                file_lane = general_config['input_file_lane']
            else:
                # default
                file_lane = ''

            # make up a hash...they are allowed to not put in any input_file_lanes...could be 3 mbl fasta files which would all have lane
            # info encoded on each id/description line of the sequence record

            self.input_file_info[input_file] =  {  "name" : input_file,
                                                   "format" : file_format,
                                                   "lane" : file_lane
                                                }


        # now deal with each lane_runkey combo (Sample) that is misnamed though
        # populate sample information for every run_key

        for lane_run_key in [s for s in configDict.keys() if s != 'general']:
        	# change ':' to '_'
        	# key = lane_run_key[:1]+'_'+lane_run_key[2:]

            lane_run_dict = configDict[lane_run_key]

            sample = Sample(lane_run_key)


            # has defaults -not required
            try:
                sample.forward_primers = lane_run_dict['forward_primers'].split(',')
            except:
                sample.forward_primers = []
            try:
                sample.reverse_primers = lane_run_dict['reverse_primers'].split(',')
            except:
                sample.reverse_primers = []
            try:
                sample.stop_sequences = lane_run_dict['stop_sequences'].split(',')
            except:
                sample.stop_sequences = []
            try:
                sample.anchor = lane_run_dict['anchor']
            except:
                sample.anchor = ''
            # should we try to trim with mbl primers as well as custom ones
            try:
                sample.use_mbl_primers = lane_run_dict['use_mbl_primers']
            except:
                sample.use_mbl_primers = 1
#################################
            try:
                sample.run_key = lane_run_dict['run_key']
            except:
                sample.run_key = ''
            try:
                sample.lane = lane_run_dict['lane']
            except:
                sample.lane = ''
            try:
                sample.adaptor = lane_run_dict['adaptor']
            except:
                sample.adaptor = ''
            try:
                sample.barcode = lane_run_dict['barcode']
            except:
                sample.barcode = ''
            try:
                sample.seq_operator = lane_run_dict['seq_operator']
            except:
                sample.seq_operator = ''
            try:
                sample.amp_operator = lane_run_dict['amp_operator']
            except:
                sample.amp_operator = ''
            try:
                sample.primer_suite = lane_run_dict['primer_suite']
            except:
                sample.primer_suite = ''
            try:
                sample.tubelabel = lane_run_dict['tubelabel']
            except:
                sample.tubelabel = ''
            try:
                sample.dna_region = lane_run_dict['dna_region']
            except:
                sample.dna_region = ''

            if sample.primer_suite:
                sample.taxonomic_domain = sample.primer_suite.split()[0]
            else:
                sample.taxonomic_domain = 'unknown'


            sample.project_title        = lane_run_dict['project_title']
            sample.project_description  = lane_run_dict['project_description']

            sample.env_sample_source_id = lane_run_dict['env_sample_source_id']
            sample.dataset_description  = lane_run_dict['dataset_description']
            sample.project              = lane_run_dict['project']
            sample.dataset              = lane_run_dict['dataset']
#             print('lane_run_key '+lane_run_key)
            if self.vamps_user_upload:
                # required for 454
                sample.direction = lane_run_dict['direction']
                #sample.taxonomic_domain = lane_run_dict['taxonomic_domain']
                # a list of run_keys
                # convert: change ':' to '_'
                #lane_run_key = '_'.join(lane_run_key.split(':'))
                key = lane_run_key[:1]+'_'+lane_run_key[2:]
                #sample.key = key
                self.run_keys.append(key)
                # a dictionary of samples
                self.samples[key] = sample
            else:
                if self.platform in C.illumina_list:
                    # req specifically for illumina
                    sample.data_owner           = lane_run_dict['data_owner']
                    sample.first_name           = lane_run_dict['first_name']
                    sample.last_name            = lane_run_dict['last_name']
                    sample.email                = lane_run_dict['email']
                    sample.institution          = lane_run_dict['institution']
                    sample.funding              = lane_run_dict['funding']
                    sample.barcode_index = lane_run_dict['barcode_index']
                    sample.overlap = lane_run_dict['overlap']
                    sample.read_length = lane_run_dict['read_length']
#                    sample.file_prefix = lane_run_dict['file_prefix']
                    sample.insert_size = lane_run_dict['insert_size']
                    #sample.taxonomic_domain = lane_run_dict['domain']
                    # concatenate: barcode_index and run_key and lane
                    key = lane_run_dict['barcode_index'] +'_'+ lane_run_dict['run_key'] +'_'+ lane_run_dict['lane']
                    #sample.key = key
                    self.run_keys.append(key)
                    # a dictionary of samples
                    self.samples[lane_run_key] = sample

                elif self.platform == '454':
                    # required for 454
                    sample.direction            = lane_run_dict['direction']
                    sample.data_owner           = lane_run_dict['data_owner']
                    sample.first_name           = lane_run_dict['first_name']
                    sample.last_name            = lane_run_dict['last_name']
                    sample.email                = lane_run_dict['email']
                    sample.institution          = lane_run_dict['institution']
                    sample.funding              = lane_run_dict['funding']
                    #sample.taxonomic_domain = lane_run_dict['domain']
                    # a list of run_keys
                    # convert: change ':' to '_'
                    key = lane_run_key[:1]+'_'+lane_run_key[2:]
                    #sample.key = key
                    self.run_keys.append(key)
                    # a dictionary of samples
                    self.samples[lane_run_key] = sample

Example #4

Show file

File: runconfig.py Project: icefoxx/py_mbl_sequencing_pipeline

    def initializeFromDictionary(self, configDict):
        # get the general stuff
        general_config = configDict['general']
        #if general_config['gast_data_source'] != 'database':
        self.run       = general_config['run']
        self.platform       = general_config.get('platform', "unknown")
        self.input_dir      = general_config.get('input_dir', None)
        self.require_distal = general_config.get('require_distal', True)
        self.minimumLength  = general_config.get('minimumLength', C.minimumLength)
        self.maximumLength  = general_config.get('maximumLength', C.maximumLength)
        self.minAvgQual     = general_config.get('minAvgQual',    C.minAvgQual)
        self.force_runkey   = general_config.get('force_runkey', None)
        self.use_cluster    = general_config['use_cluster']
        try:
            self.idx_keys           = general_config['idx_keys']
        except:
            self.idx_keys = ""
 
 
        if self.vamps_user_upload:
            self.user           = general_config['user']           
            
            self.input_files    = general_config['input_files'] 
            #self.project        = general_config['project'] 
            #self.dataset        = general_config['dataset']
            self.dna_region     = general_config['dna_region']
            self.domain         = general_config['domain']
            
            
            self.site               = general_config['site']
            
            self.load_vamps_database = general_config['load_vamps_database']
            try:
                self.fasta_file     = general_config['fasta_file'] 
            except:
                self.fasta_file     = None
            try:
                self.mobedac        = general_config['mobedac'] 
            except:
                self.mobedac        = False
            try:
                self.use_full_length= general_config['use_full_length']
            except:
                self.use_full_length= False
            try:
                self.classifier     = general_config['classifier']
            except:
                self.classifier= 'unknown'
        else:
            if self.platform == 'illumina':
                self.compressed     = general_config['compressed']                 
                self.database_name  = general_config['database_name'] 
                self.database_host  = general_config['database_host'] 
                self.site           = general_config['site']
                self.load_vamps_database = general_config['load_vamps_database']
                if general_config.has_key("archaea"):
                    self.archaea    = general_config['archaea'] 
                if general_config.has_key("do_perfect"):
                    self.do_perfect = general_config['do_perfect']
                else:
                    self.do_perfect = C.pipeline_run_items['illumina']['do_perfect']        
                if general_config.has_key("lane_name"):
                    self.lane_name = general_config['lane_name']
                else:
                    self.lane_name = C.pipeline_run_items['illumina']['lane_name']                                    
                    
            elif self.platform == '454':
                self.compressed     = general_config['compressed'] 
                self.database_name  = general_config['database_name'] 
                self.database_host  = general_config['database_host'] 
                self.site           = general_config['site']
                self.load_vamps_database = general_config['load_vamps_database']
            else:
                pass
        # added gast_input_source for vamps uploads
        # so when users want to gast at a later time they will
        # look in the database and not the files (which may be missing)
        # see /xraid2-2/vampsweb/vampsdev/vamps_trim.py
        
        if 'gast_input_source' in general_config: 
            self.gast_input_source = general_config['gast_input_source']
        
        print    'General Config:',general_config
        if 'files_list' in general_config:
            input_file_names = general_config['files_list']
            self.input_files = ','.join(general_config['files_list'])
            self.files_list = general_config['files_list']
        else:
            input_file_names  = [input_str.strip() for input_str in general_config['input_files'].split(',')]
            self.input_files = ','.join(general_config['input_files'])
            self.files_list = general_config['input_files']
        

 
        
        
        self.input_file_info = {}
        print general_config
        for idx,input_file in enumerate(input_file_names):
            
            if "input_file_format" in general_config:
                file_format = general_config['input_file_format']
            else:
                # default
                file_format = 'fasta'
            
            
            if file_format not in C.input_file_formats:
                raise Exception("Invalid sequence input file format: " + general_config['input_file_format'])
                
            if "input_file_lane" in general_config:
                file_lane = general_config['input_file_lane']
            else:
                # default
                file_lane = ''    
                
            # make up a hash...they are allowed to not put in any input_file_lanes...could be 3 mbl fasta files which would all have lane
            # info encoded on each id/description line of the sequence record
            
            self.input_file_info[input_file] =  {  "name" : input_file, 
                                                   "format" : file_format, 
                                                   "lane" : file_lane
                                                }
        
        
        # now deal with each lane_runkey combo (Sample) that is misnamed though
        # populate sample information for every run_key
        
        for lane_run_key in [s for s in configDict.keys() if s != 'general']:
        	# change ':' to '_'
        	# key = lane_run_key[:1]+'_'+lane_run_key[2:]
            
            lane_run_dict = configDict[lane_run_key]
            #print 'CD ',configDict
            
            sample = Sample(lane_run_key)
            #print 'sample',sample
            
            # has defaults -not required
            try:
                sample.forward_primers = lane_run_dict['forward_primers'].split(',')
            except:
                sample.forward_primers = []
            try:
                sample.reverse_primers = lane_run_dict['reverse_primers'].split(',')
            except:
                sample.reverse_primers = []
            try:
                sample.stop_sequences = lane_run_dict['stop_sequences'].split(',')
            except:
                sample.stop_sequences = []
            try:
                sample.anchor = lane_run_dict['anchor']
            except:
                sample.anchor = ''
            # should we try to trim with mbl primers as well as custom ones
            try:
                sample.use_mbl_primers = lane_run_dict['use_mbl_primers']
            except:
                sample.use_mbl_primers = 1
#################################
            try:
                sample.run_key = lane_run_dict['run_key']
            except:
                sample.run_key = ''
            try:
                sample.lane = lane_run_dict['lane']
            except:
                sample.lane = ''
            try:
                sample.adaptor = lane_run_dict['adaptor']
            except:
                sample.adaptor = ''
            try:
                sample.barcode = lane_run_dict['barcode']
            except:
                sample.barcode = ''
            try:
                sample.seq_operator = lane_run_dict['seq_operator']
            except:
                sample.seq_operator = ''
            try:
                sample.amp_operator = lane_run_dict['amp_operator']
            except:
                sample.amp_operator = ''
            try:
                sample.primer_suite = lane_run_dict['primer_suite']
            except:
                sample.primer_suite = ''
            try:
                sample.tubelabel = lane_run_dict['tubelabel']
            except:
                sample.tubelabel = ''
            try:    
                sample.dna_region = lane_run_dict['dna_region'] 
            except:
                sample.dna_region = ''
            
            if sample.primer_suite:
                sample.taxonomic_domain = sample.primer_suite.split()[0]
            else:
                sample.taxonomic_domain = 'unknown'
                
            
            sample.project_title        = lane_run_dict['project_title']
            sample.project_description  = lane_run_dict['project_description']
            
            sample.env_sample_source_id = lane_run_dict['env_sample_source_id']
            sample.dataset_description  = lane_run_dict['dataset_description']
            sample.project              = lane_run_dict['project']
            sample.dataset              = lane_run_dict['dataset']
            print 'lane_run_key '+lane_run_key
            if self.vamps_user_upload:
                # required for 454
                sample.direction = lane_run_dict['direction'] 
                #sample.taxonomic_domain = lane_run_dict['taxonomic_domain']
                # a list of run_keys
                # convert: change ':' to '_'
                #lane_run_key = '_'.join(lane_run_key.split(':'))
                key = lane_run_key[:1]+'_'+lane_run_key[2:]
                #sample.key = key
                self.run_keys.append(key)
                # a dictionary of samples
            	self.samples[key] = sample
            else:
                if self.platform == 'illumina':
                    # req specifically for illumina
                    sample.data_owner           = lane_run_dict['data_owner']
                    sample.first_name           = lane_run_dict['first_name']
                    sample.last_name            = lane_run_dict['last_name']
                    sample.email                = lane_run_dict['email']
                    sample.institution          = lane_run_dict['institution']
                    sample.funding              = lane_run_dict['funding']
                    sample.barcode_index = lane_run_dict['barcode_index'] 
                    sample.overlap = lane_run_dict['overlap'] 
                    sample.read_length = lane_run_dict['read_length'] 
#                    sample.file_prefix = lane_run_dict['file_prefix'] 
                    sample.insert_size = lane_run_dict['insert_size']
                    #sample.taxonomic_domain = lane_run_dict['domain']
                    # concatenate: barcode_index and run_key and lane
                    key = lane_run_dict['barcode_index'] +'_'+ lane_run_dict['run_key'] +'_'+ lane_run_dict['lane'] 
                    #sample.key = key
                    self.run_keys.append(key)  
                    # a dictionary of samples
                    self.samples[lane_run_key] = sample
                    
                elif self.platform == '454':
                    # required for 454
                    sample.direction            = lane_run_dict['direction'] 
                    sample.data_owner           = lane_run_dict['data_owner']
                    sample.first_name           = lane_run_dict['first_name']
                    sample.last_name            = lane_run_dict['last_name']
                    sample.email                = lane_run_dict['email']
                    sample.institution          = lane_run_dict['institution']
                    sample.funding              = lane_run_dict['funding']
                    #sample.taxonomic_domain = lane_run_dict['domain']
                    # a list of run_keys
                    # convert: change ':' to '_'
                    key = lane_run_key[:1]+'_'+lane_run_key[2:]
                    #sample.key = key
                    self.run_keys.append(key)
                    # a dictionary of samples
                    self.samples[lane_run_key] = sample