def generate_referenced_species_database(protein_id, referenced_species):
    '''
        Creates a database for a referenced species and protein, using formatdb
        @param protein_id
        @param referenced_species
    ''' 
    logger              = Logger.Instance()
    alignment_logger    = logger.get_logger('alignment')
    
    command_generator   = CommandGenerator()
    crawler             = DirectoryCrawler()
    
    exon_container      = ExonContainer.Instance()
    
    input_exons = exon_container.get((protein_id, referenced_species, "ensembl"))
    
    #source_exon_file    = "{0}/{1}.fa".format(crawler.get_exon_ensembl_path(protein_id), referenced_species)
    input_db_file       = "{0}/{1}.fa".format(crawler.get_database_path(protein_id), referenced_species)
    sequence_type       = "Nucleotide"
    
    input_exons.export_coding_exons_to_fasta(input_db_file)
    
    command             = command_generator.generate_formatdb_command(input_db_file, sequence_type)
    command_return      = Popen(command, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True)
    output              = command_return.stdout.read()
    if output != "":
        #LOGGING
        alignment_logger.warning("{0}, {1}, REF SPECIES DB, {2}".format(protein_id, referenced_species.strip(), output.strip()))      
        return False
    return True
def generate_blastn_alignments(protein_id, species_list = None, referenced_species = "Homo_sapiens"):
    '''
        Runs the blastn program for a specified protein and list of species
        @param protein_id
        @param species_list: if provided, runs blastn for this list of species, \
                             otherwise runs for species that are missing the blastn output \
                             who are determined by .status file in the blastn folder.
    '''
    logger              = Logger.Instance()
    alignment_logger    = logger.get_logger('alignment')
    
    crawler             = DirectoryCrawler()
    
    command_generator   = CommandGenerator()
    alignment_generator = AlignmentTargetGenerator()
    
    failed_species_list = []
    
    # retrieve the blastn targets
    if (not species_list):
        species_list    = alignment_generator.get_blastn_targets(protein_id)

    for species in species_list:
        
        ############# MOVE TO ANOTHER FNC
        output_file     = "{0}/{1}.blastout".format(crawler.get_blastn_path(protein_id), species.strip())
        input_file      = "{0}/{1}.fa".format(crawler.get_expanded_gene_path(protein_id), species.strip())
        database        = "{0}/{1}.fa".format(crawler.get_database_path(protein_id), referenced_species)
        
        command         = command_generator.generate_blastn_command(database, input_file, output_file)
        command_return  = Popen(command, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True)
        output          = command_return.stdout.read()
        if output != "":
            #LOGGING
            os.remove(output_file)
            alignment_logger.warning("{0}, {1}, BLASTN, {2}".format(protein_id, species.strip(), output.strip()))
            failed_species_list.append(species.strip())
            
    if failed_species_list:
        alignment_generator.set_failed_blastn_targets(protein_id, failed_species_list)
        return False
    return True
def generate_SW_gene_alignments(protein_id, species_list = None, referenced_species = "Homo_sapiens"):
    '''
        Runs the SW program for a specified protein and list of species, using the expanded gene region.
        @param protein_id
        @param species_list: if provided, runs SW for this list of species, \
                             otherwise runs for species that are missing the SW output \
                             who are determined by .status file in the /SW/gene folder.
    '''       
    logger                   = Logger.Instance()
    alignment_logger         = logger.get_logger('alignment')
     
    alignment_generator      = AlignmentTargetGenerator()
    crawler                  = DirectoryCrawler()
    command_generator        = CommandGenerator()
    
    if (not species_list):
        species_list         = alignment_generator.get_SW_gene_targets(protein_id)

    failed_species_list = []
    for species in species_list:
        
        ########### MOVE
        output_file          = "{0}/{1}.swout".format(crawler.get_SW_gene_path(protein_id), species.strip())
        query_sequence_file  = "{0}/{1}.fa".format(crawler.get_expanded_gene_path(protein_id), species.strip())
        target_fasta_db_file = "{0}/{1}.fa".format(crawler.get_database_path(protein_id), referenced_species)
        
        command              = command_generator.generate_SW_command(query_sequence_file, target_fasta_db_file, output_file)
        command_return       = Popen(command, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True)
        output               = command_return.stdout.read()
        if output != "":
            #LOGGING
            alignment_logger.warning("{0}, {1}, SW GENE, {2}".format(protein_id, species.strip(), output.strip()))
            failed_species_list.append(species.strip())
    os.remove(".sw_stdout_supressed")
    
    if failed_species_list: 
        alignment_generator.set_failed_SW_gene_targets(protein_id, failed_species_list)
        return False
    return True