def check_for_missing_values(self, data):
        missing_key   = ''
        error = False
        warn = False
        for item in data:
            if item == 'general':
                for k,v in data[item].items():
                    if not k:
                        #sys.exit("ERROR: key for: '"+v+"' is missing or corrupt - Exiting")
                        logger.warning("(key: "+item+") key for: '"+v+"' is missing or corrupt - Continuing")
                        warn=True
                    if v == '':
                        logger.warning("(key: "+item+") value of: '"+k+"' is missing or corrupt - Continuing")
                        warn=True

        for item in data:
            if item != 'general':
                for k,v in data[item].items():
                    if not k:
                        #sys.exit("ERROR: key for: '"+v+"' is missing or corrupt - Exiting")
                        logger.warning("(key: "+item+") key for: '"+v+"' is missing or corrupt - Continuing")
                        warn=True
                    if not v:
                        if (k == 'barcode' or k == 'adaptor'): #these could be empty
                            logger.warning("(key: "+item+") value of: '"+k+"' is missing or corrupt - Continuing")
                        else:
                            logger.error("(key: "+item+") value of: '"+k+"' is missing or corrupt - Continuing")
                            error=True
        return (error,warn)
Example #2
0
 def check_for_missing_values(self, data):
     missing_key   = ''
     error = False
     warn = False
     for item in data:
         if item == 'general':
             for k,v in data[item].iteritems():
                 if not k:
                     #sys.exit("ERROR: key for: '"+v+"' is missing or corrupt - Exiting")
                     logger.warning("(key: "+item+") key for: '"+v+"' is missing or corrupt - Continuing")
                     warn=True
                 if v == '':                        
                     logger.warning("(key: "+item+") value of: '"+k+"' is missing or corrupt - Continuing")
                     warn=True
                         
     for item in data:
         if item != 'general':
             for k,v in data[item].iteritems():
                 if not k:
                     #sys.exit("ERROR: key for: '"+v+"' is missing or corrupt - Exiting")
                     logger.warning("(key: "+item+") key for: '"+v+"' is missing or corrupt - Continuing")
                     warn=True
                 if not v:
                     if (k == 'barcode' or k == 'adaptor'): #these could be empty
                         logger.warning("(key: "+item+") value of: '"+k+"' is missing or corrupt - Continuing")
                     else:
                         logger.error("(key: "+item+") value of: '"+k+"' is missing or corrupt - Continuing")
                         error=True
     return (error,warn)
 def check_and_make_dir(self, dir_name):
     #        if not os.path.exists(dir_name):
     try:
         os.makedirs(dir_name)
     except OSError:
         if os.path.isdir(dir_name):
             logger.warning("\nDirectory %s already exists." % (dir_name))
             #                 confirm_msg = "Do you want to continue? (Yes / No) "
             #                 answer = raw_input(confirm_msg)
             #                 if answer != 'Yes':
             #                     sys.exit("There was an error in the directory " + dir_name + " creation - Exiting.")
             #                 elif answer == 'Yes':
             pass
         else:
             # There was an error on creation, so make sure we know about it
             raise
     return dir_name
    def check_and_make_dir(self, dir_name):
#        if not os.path.exists(dir_name):
        try:
            os.makedirs(dir_name)
        except OSError:
            if os.path.isdir(dir_name):
                logger.warning("\nDirectory %s already exists."  % (dir_name))
#                 confirm_msg = "Do you want to continue? (Yes / No) "
#                 answer = raw_input(confirm_msg)
#                 if answer != 'Yes':
#                     sys.exit("There was an error in the directory " + dir_name + " creation - Exiting.")
#                 elif answer == 'Yes':
                pass
            else:
            # There was an error on creation, so make sure we know about it
                raise
        return dir_name
 def get_input_files(self):
 
     files_list = []
     print self.general_config_dict['input_dir']
     if os.path.isdir(self.general_config_dict['input_dir']):
         
         for infile in glob.glob( os.path.join(self.general_config_dict['input_dir'], '*') ):
             if os.path.isdir(infile) == True:
                 pass
             else:
                 files_list.append(os.path.basename(infile))
     else:
         if fasta_file:
             pass
         logger.warning("No input directory or directory permissions problem: "+self.general_config_dict['input_dir'])
         
     return files_list
Example #6
0
    def check_projects_and_datasets(self,data):
        self.my_conn     = MyConnection(host='newbpcdb2.jbpc-np.mbl.edu', db="env454")
#         self.my_conn     = MyConnection()      
        project_dataset = {}
        projects = {}
        datasets = {}
        error   =False
        warn    =False
        for item in data:
            if item != 'general':
                #project_dataset[data[item]['project']+'--'+data[item]['dataset']] = 1
                datasets[data[item]['dataset']] = data[item]['project']
                projects[data[item]['project']] = 1
        for p in projects:
            #print p 
            my_sql = """SELECT project FROM project WHERE project = '%s'""" % (p)
            res    = self.my_conn.execute_fetch_select(my_sql)
            if res:
                logger.warning("project '"+p+"' already exists in the database - is this okay?")
                warn = True
            else:
                logger.debug("project '"+p+"' is new")
                
            ds_found_count = 0   
            for d in datasets:
                if datasets[d] == p:
                    
                    #print "\t%s" % (d)
                    my_sql = """SELECT dataset FROM dataset WHERE dataset = '%s'""" % (d)
                    res    = self.my_conn.execute_fetch_select(my_sql)
                    if res:
                        ds_found_count += 1
                        if ds_found_count >3:
                            logger.warning("\t\tPossibly more .... - Exiting after just three")
                            break
                        logger.warning("\tdataset '"+d+"' already exists in the database - is this okay?")
                        warn=True
                    else:
                        logger.debug("\tdataset '"+d+"' is new")
            logger.debug("\tDataset Count: "+str(len(datasets)))
        return (error,warn)      
    def check_projects_and_datasets(self, data):
        self.get_my_conn()
        project_dataset = {}
        projects = {}
        datasets = {}
        error   =False
        warn    =False
        for item in data:
            if item != 'general':
                #project_dataset[data[item]['project']+'--'+data[item]['dataset']] = 1
                datasets[data[item]['dataset']] = data[item]['project']
                projects[data[item]['project']] = 1
        for p in projects:
            #print(p)
            my_sql = """SELECT project FROM project WHERE project = '%s'""" % (p)
            res    = self.my_conn.execute_fetch_select(my_sql)
            if res:
                logger.warning("project '"+p+"' already exists in the database - is this okay?")
                warn = True
            else:
                logger.debug("project '"+p+"' is new")

            ds_found_count = 0
            for d in datasets:
                if datasets[d] == p:

                    #print("\t%s" % (d))
                    my_sql = """SELECT dataset FROM dataset WHERE dataset = '%s'""" % (d)
                    res    = self.my_conn.execute_fetch_select(my_sql)
                    if res:
                        ds_found_count += 1
                        if ds_found_count >3:
                            logger.warning("\t\tPossibly more .... - Exiting after just three")
                            break
                        logger.warning("\tdataset '"+d+"' already exists in the database - is this okay?")
                        warn=True
                    else:
                        logger.debug("\tdataset '"+d+"' is new")
            logger.debug("\tDataset Count: "+str(len(datasets)))
        return (error,warn)
 def gather_files_per_key(self, key):
 
     file_collector={}
     out_gast_dir = os.path.join(self.global_gast_dir,key)  #directory
     file_collector['gast_concat_file'] = os.path.join(out_gast_dir,'gast_concat')
     file_collector['tagtax_file'] = os.path.join(out_gast_dir,'tagtax_terse')
     if not os.path.exists(file_collector['gast_concat_file']):
             logger.warning("Could not find gast_concat_file file: "+file_collector['gast_concat_file'])
              
     if not os.path.exists(file_collector['tagtax_file']):
         logger.warning("Could not find tagtax_file file: "+file_collector['tagtax_file'])
     #print key,self.runobj.platform
     
     if self.runobj.vamps_user_upload:
         
         file_collector['unique_file'] = os.path.join(out_gast_dir,'unique.fa')
         file_collector['original_fa_file'] = os.path.join(out_gast_dir,'fasta.fa')
         
         if self.runobj.fasta_file:
             grep_cmd = ['grep','-c','>',self.runobj.fasta_file]
         else:
             grep_cmd = ['grep','-c','>',file_collector['unique_file']]
     else:
         if self.runobj.platform == 'illumina':
             
             #unique_file = os.path.join(self.basedir,C.gast_dir),'unique.fa')
             reads_dir = dirs.check_dir(dirs.reads_overlap_dir)
             file_prefix = self.runobj.samples[key].file_prefix
             file_collector['unique_file'] = os.path.join(reads_dir,file_prefix+"-PERFECT_reads.fa.unique")
             # ANNA What is the correct file here:
             file_collector['original_fa_file'] = os.path.join(reads_dir,file_prefix+"-PERFECT_reads.fa.unique")
             grep_cmd = ['grep','-c','>',file_collector['unique_file']]
         elif self.runobj.platform == '454':
             pass
         else:
             sys.exit("no usable platform found")
         
     if not os.path.exists(file_collector['unique_file']):
         logger.error("Could not find unique_file: "+file_collector['unique_file'])
         
     # get dataset_count here from unique_file
     # the dataset_count should be from the non-unique file
     # but if we don't have that must use uniques
     
     try:
         dataset_count = subprocess.check_output(grep_cmd).strip()
     except:
         dataset_count = 0
     print key,": Sequence Count", dataset_count
     
     # output files to be created:            
     file_collector['taxes_file']                = os.path.join(out_gast_dir,'vamps_data_cube_uploads.txt')
     file_collector['summed_taxes_file']         = os.path.join(out_gast_dir,'vamps_junk_data_cube_pipe.txt')
     file_collector['distinct_taxes_file']       = os.path.join(out_gast_dir,'vamps_taxonomy_pipe.txt')
     file_collector['sequences_file']            = os.path.join(out_gast_dir,'vamps_sequences_pipe.txt')
     file_collector['export_file']               = os.path.join(out_gast_dir,'vamps_export_pipe.txt')
     file_collector['projects_datasets_file']    = os.path.join(out_gast_dir,'vamps_projects_datasets_pipe.txt')
     file_collector['project_info_file']         = os.path.join(out_gast_dir,'vamps_projects_info_pipe.txt')
 
 
     return (file_collector, dataset_count, out_gast_dir)
Example #9
0
    def validate_illumina_ini(self, analysis_dir):
        """
        The csv headers are checked earlier
        """
        
        print "Validating ini type Config File (may have been converted from csv)"
        new_ini_file = os.path.join(analysis_dir, self.general_config_dict['run'] + '.ini')
        print "New ini file location: "+new_ini_file
        return_code = False
        error_code  = False
        warn_code   = False
        msg = ''
        error=False
        warn=False
        #print 'configpath',self.general_config_dict['configPath']
        # configPath here is the new configPath
        self.data_object = self.configDictionaryFromFile_ini(self.general_config_dict['configPath'])

        
        (error_code,warn_code) = self.check_for_missing_values(self.data_object)  
        if error_code: error=True
        if warn_code: warn=True
        (error_code,warn_code) = self.check_for_datasets(self.data_object)
        if error_code: error=True
        if warn_code: warn=True
        (error_code,warn_code) = self.check_domain_suite_region(self.data_object)
        if error_code: error=True
        if warn_code: warn=True
        (error_code,warn_code) = self.check_project_name(self.data_object)
        if error_code: error=True
        if warn_code: warn=True
        (error_code,warn_code) = self.check_dataset_name(self.data_object)
        if error_code: error=True
        if warn_code: warn=True
        (error_code,warn_code) = self.check_projects_and_datasets(self.data_object)
        if error_code: error=True
        if warn_code: warn=True
        #print self.data_object['input_dir']
        #print self.data_object['input_files']
 
 
        if 'input_dir' not in self.data_object['general'] and 'input_files' not in self.data_object['general']:
            logger.warning("No input directory and no input files")        
            warn=True
        elif not os.path.isdir(self.data_object['general']['input_dir']):
            logger.error("That is not a directory: "+self.data_object['general']['input_dir'])
            error=True
        elif self.data_object['general']['input_file_format'] == 'fastq' and self.data_object['general']['platform'] == 'illumina':
                file_exists = False
    #            if 'input_dir' in self.data_object['general'] and self.data_object['general']['input_dir']:
                for dirname, dirnames, filenames in os.walk(self.data_object['general']['input_dir']):
    #                if not filenames:
                    for file_name in filenames:
                        if os.path.isfile(os.path.join(dirname, file_name)):
                            file_exists = True
                            break
                if not file_exists:
                    logger.error("There are no files found in the input directory: "+self.data_object['general']['input_dir'])
                    error=True
        elif 'input_dir' in self.data_object['general'] and self.data_object['general']['input_dir'] and ('input_files' not in self.data_object['general'] or not self.data_object['general']['input_files']):
            logger.error("There are no files found in the input directory: "+self.data_object['general']['input_dir'])
            error=True
                        
        if error:
            sys.exit( """\n\t\033[91mTHERE WERE SEVERE PROBLEMS WITH THE CSV and/or CONFIG FILE - EXITING 
            PLEASE CORRECT THEM AND START OVER.\033[0m\n
            To view the errors add ' --loglevel info' to the command line.\n""")
        elif warn: 
            msg = """\n\t\033[93mTHERE WERE NON-FATAL PROBLEMS WITH THE CSV and/or CONFIG FILE THAT MAY OR MAY NOT CAUSE PROBLEMS.\033[0m\n
                To view the warnings add ' --loglevel warning' to the command line.\n"""
            print "\033[92mCSV File Passed Vaidation! (with warnings)\033[0m"
        else:
            print "\033[92mCSV File Passed Vaidation!\033[0m"
        return msg
    def validate_illumina_ini(self, analysis_dir):
        """
        The csv headers are checked earlier
        """

        print("Validating ini type Config File (may have been converted from csv)")
        new_ini_file = os.path.join(analysis_dir, self.general_config_dict['run'] + '.ini')
        print("New ini file location: "+new_ini_file)
        return_code = False
        error_code  = False
        warn_code   = False
        msg = ''
        error=False
        warn=False
        #print('configpath',self.general_config_dict['configPath'])
        # configPath here is the new configPath
        self.data_object = self.configDictionaryFromFile_ini(self.general_config_dict['configPath'])


        (error_code,warn_code) = self.check_for_missing_values(self.data_object)
        if error_code: error=True
        if warn_code: warn=True
        (error_code,warn_code) = self.check_for_datasets(self.data_object)
        if error_code: error=True
        if warn_code: warn=True
        (error_code,warn_code) = self.check_domain_suite_region(self.data_object)
        if error_code: error=True
        if warn_code: warn=True
        (error_code,warn_code) = self.check_project_name(self.data_object)
        if error_code: error=True
        if warn_code: warn=True
        (error_code,warn_code) = self.check_dataset_name(self.data_object)
        if error_code: error=True
        if warn_code: warn=True
        (error_code,warn_code) = self.check_projects_and_datasets(self.data_object)
        if error_code: error=True
        if warn_code: warn=True
        #print(self.data_object['input_dir'])
        #print(self.data_object['input_files'])


        if 'input_dir' not in self.data_object['general'] and 'input_files' not in self.data_object['general']:
            logger.warning("No input directory and no input files")
            warn=True
        elif not os.path.isdir(self.data_object['general']['input_dir']):
            logger.error("That is not a directory: "+self.data_object['general']['input_dir'])
            error=True
        elif self.data_object['general']['input_file_format'] == 'fastq' and self.data_object['general']['platform'] in C.illumina_list:
                file_exists = False
    #            if 'input_dir' in self.data_object['general'] and self.data_object['general']['input_dir']:
                for dirname, dirnames, filenames in os.walk(self.data_object['general']['input_dir']):
    #                if not filenames:
                    for file_name in filenames:
                        if os.path.isfile(os.path.join(dirname, file_name)):
                            file_exists = True
                            break
                if not file_exists:
                    logger.error("There are no files found in the input directory: "+self.data_object['general']['input_dir'])
                    error=True
        elif 'input_dir' in self.data_object['general'] and self.data_object['general']['input_dir'] and ('input_files' not in self.data_object['general'] or not self.data_object['general']['input_files']):
            logger.error("There are no files found in the input directory: "+self.data_object['general']['input_dir'])
            error=True

        if error:
            sys.exit( """\n\t\033[91mTHERE WERE SEVERE PROBLEMS WITH THE CSV and/or CONFIG FILE - EXITING
            PLEASE CORRECT THEM AND START OVER.\033[0m\n
            To view the errors add ' --loglevel info' to the command line.\n""")
        elif warn:
            msg = """\n\t\033[93mTHERE WERE NON-FATAL PROBLEMS WITH THE CSV and/or CONFIG FILE THAT MAY OR MAY NOT CAUSE PROBLEMS.\033[0m\n
                To view the warnings add ' --loglevel warning' to the command line.\n"""
            print("\033[92mCSV File Passed Vaidation! (with warnings)\033[0m")
        else:
            print("\033[92mCSV File Passed Vaidation!\033[0m")
        return msg