def setup_pfam_mysql(self):
     '''
     Establishes a connection to Pfam MySQL db using settings from file
     '''
     if hasattr(self, "pfam_mysql"):
         return
     
     self.pfam_mysql = PfamMySQLQuery()
     
     executable_locs = {}
     params = {}
     for s in ['db_username', 'db_password', 'db_address', 'db_name']:
         params[s] = self.settings_repository.get_item_value(('mysql_database_pfam', s))
     
     self.pfam_mysql.setup_caller(executable_locations=executable_locs, params=params)
class SifterAnalysisProject(GitAnalysisProject):
    '''
    This project controller extends the "GitAnalysisProject"
    adding lots of functionality specific to doing a SIFTER analysis
    '''
    
    implements(pc.IAnalysisProject)
    
    def __init__(self):
        '''
        Initializes an empty query container
        '''
        self.query_collection = SifterQueryCollection()
    
    def parse_hypothetical_pfam_domains(self, pfam_results):
        '''
        Given pfam_results (from PfamHMMERQuery().parse_results()
        Creates domain_architecture entries for queries
        '''
        # For each HMMER hit, adds a domain entry.
        for i in range(len(pfam_results)):
            #pprint(vars(pfam_hmm_scan.parsed_results[i]))
            if not hasattr(pfam_results[i], "_annotations"):
                continue
            
            seq_id = pfam_results[i]._annotations['seqName']
            domain_id = pfam_results[i]._annotations['hmmName']
            new_domain_region = ProteinInformation(
                                identifier=domain_id,
                                content = pfam_results[i],
                                retrieval_method="PFamQuery",
                                retrieval_date=datetime.now().strftime("%Y-%m-%d %H:%M"))
            
            q = self.query_collection.get_query(query_id=seq_id)
            p = q.protein_collection.get_protein(protein_id=seq_id)
            
            p.domain_architecture.add_domain_region(domain_region=new_domain_region)
        
    def setup_pfam_mysql(self):
        '''
        Establishes a connection to Pfam MySQL db using settings from file
        '''
        if hasattr(self, "pfam_mysql"):
            return
        
        self.pfam_mysql = PfamMySQLQuery()
        
        executable_locs = {}
        params = {}
        for s in ['db_username', 'db_password', 'db_address', 'db_name']:
            params[s] = self.settings_repository.get_item_value(('mysql_database_pfam', s))
        
        self.pfam_mysql.setup_caller(executable_locations=executable_locs, params=params)
        
    def get_pfam_mysql_for_each_query(self, db_table, db_field, extra_where_str, file_postfix, gzipped=True):
        '''
        For each query, extracts the Pfam MSA from MySQL.
        '''
        self.setup_pfam_mysql()
        
        # For each query, loop over each domain and get each 
        # Pfam MSA from the pre-computed Pfam table.
        for q in self.query_collection:
            p = q.protein_collection.get_protein(q.query_id)
            
            cache_ids = []
            
            for d in p.domain_architecture:
                outpt_fname = os.path.dirname(q.destination) + "/" + d.identifier + file_postfix
                
                if d.identifier in cache_ids:
                    # If we've already queried this then no need to duplicate.
                    continue
                else:
                    # Get file from MySQL query, gunzip, and save into query directory.
                    mysql_aq =  "(select auto_pfamA from pfamA where pfamA_id='" + d.identifier + "')"
                    mysql_q = "select "+db_field+" from "+db_table+" " \
                            + "where auto_pfamA = "+mysql_aq+" " \
                            + extra_where_str + ";"
                    print mysql_q
                    self.pfam_mysql.call({'query': mysql_q})
                    self.pfam_mysql.parse_results()
                    
                    f_output = open(outpt_fname, "w")
                    if gzipped:
                        f_gzipped = StringIO.StringIO(self.pfam_mysql.parsed_results[0][0][0])
                        f = gzip.GzipFile(fileobj=f_gzipped, mode='rbU')
                        f_output.write(f.read())
                        f.close()
                        f_gzipped.close()
                    else:
                        f_output.write(self.pfam_mysql.parsed_results[0][0][0])
                    f_output.close()
                        
                    # Store id into cache to not retrieve multiple times
                    cache_ids.append(d.identifier)
                
                # Store reference to files created in the query.
                if d.meta_data is None:
                    d.meta_data = {}
                d.meta_data[db_field] = os.path.basename(outpt_fname)
    
    def make_phylo_placers_for_queries(self):
        '''
        Creates phylogenetic placement infrastructure for
        queries.
        This will use the ThirdPartyWrappers.PplacerWrapper
        '''
        for q in self.query_collection:
            p = q.protein_collection.get_protein(q.query_id)
            
            for d in p.domain_architecture:
                # Get file from MySQL query, gunzip, and save into query directory.
                #d.identifier
                #d.meta_data[db_field]
                pass
    
    def make_queries_pfamscan(self, query_file):
        '''
        Calls HMMER on PFam HMM files
        Creates ProteinCollection, adding a scaffold AnnotatedProtein for the
        query sequences in query_file.
        '''
        
        # Create project infrastructure for queries
        self.query_collection.create_query_scaffolds(query_file=query_file,
                                                     destination_base_dir=self.file_repository.identity)
        
        self.query_collection.export_to_files()
        
        self.file_repository.save_repository(save_description="Query scaffolds created.")
        
        # Call HMMER on Pfam HMMs for the input fasta file
        pfam_hmm_scan = PfamHMMERQuery()
        # Get necessary executable locations from project settings
        executable_locs = {}
        executable_locs['hmmpress'] = self.settings_repository.get_item_value(('executable_locations', 'hmmpress'))
        executable_locs['hmmscan'] = self.settings_repository.get_item_value(('executable_locations', 'hmmscan'))
        # Get necessary parameters from project settings 
        params = {}
        params['pfam_db_loc'] = self.settings_repository.get_item_value(('local_database_locations', 'pfam_data')) 
        params['query_sequences_fasta_file'] = query_file
        # Make call and do initial parsing of results into format for ProteinInformation retrievers.
        pfam_hmm_scan.setup_caller(executable_locations=executable_locs, params=params)
        pfam_hmm_scan.call()
        pfam_hmm_scan.parse_results()
        
        # Create domain regions for each HMM hit
        self.parse_hypothetical_pfam_domains(pfam_results=pfam_hmm_scan.parsed_results)
        self.query_collection.export_to_files()
        self.file_repository.save_repository(save_description="Hypothetical domain architectures from Pfam HMMs written to query files.")
        
        # Get and gunzip MSAs, trees.
        self.get_pfam_mysql_for_each_query(db_table="alignments_and_trees",
                                            db_field="alignment",
                                            extra_where_str="and type='full'",
                                            file_postfix=".sto")
        self.get_pfam_mysql_for_each_query(db_table="alignments_and_trees",
                                            db_field="tree",
                                            extra_where_str="and type='full'",
                                            file_postfix=".tree")
        self.query_collection.export_to_files()
        self.file_repository.save_repository(save_description="Exported alignments and trees into query directories from Pfam MySQL.")
        
        '''