Python DirectoryCrawler.get_protein_path Examples

Programming Language: Python

Namespace/Package Name: utilities.DirectoryCrawler

Class/Type: DirectoryCrawler

Method/Function: get_protein_path

Examples at hotexamples.com: 5

Python DirectoryCrawler.get_protein_path - 5 examples found. These are the top rated real world Python examples of utilities.DirectoryCrawler.DirectoryCrawler.get_protein_path extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

get_protein_path(5)

get_database_path(5)

get_tblastn_path(4)

get_SW_gene_path(4)

get_assembled_protein_path(4)

get_blastn_path(4)

get_mafft_path(3)

get_mutual_best_status_file_path(3)

generate_directory_tree(3)

get_SW_exon_path(3)

get_exon_genewise_path(3)

get_expanded_gene_path(3)

get_protein_description_file_path(2)

get_exon_ensembl_path(2)

get_root_path(2)

get_gene_path(1)

get_genewise_path(1)

Example #1

Show file

File: FileUtilities.py Project: abulovic/SuperExonRetriver2000

def reset_action (protein_id, key):
    update_entry_in_status_file(protein_id, key, 'FAILED')
    crawler = DirectoryCrawler()
    
    if key == 'GENE_RETRIEVAL': 
        clear_directory(crawler.get_gene_path(protein_id))
    elif key == 'EXP_GENE_RETRIEVAL' : 
        clear_directory(crawler.get_expanded_gene_path(protein_id))
    elif key == 'PROTEIN_RETRIEVAL' : 
        clear_directory(crawler.get_protein_path(protein_id))
    elif key == 'ENSEMBL_EXON_RETRIEVAL' : 
        clear_directory(crawler.get_exon_ensembl_path(protein_id))
    elif key == 'GENEWISE_EXON_RETRIEVAL' : 
        clear_directory(crawler.get_exon_genewise_path(protein_id))
        clear_directory(crawler.get_genewise_path(protein_id))
    elif key == 'REF_SP_DB_FORMATTING' : 
        clear_directory(crawler.get_database_path(protein_id))
    elif key == 'BLASTN_ALIGNMENT' : 
        clear_directory(crawler.get_blastn_path(protein_id))
    elif key == 'TBLASTN_ALIGNMENT' : 
        clear_directory(crawler.get_tblastn_path(protein_id))
    elif key == 'SW_GENE_ALIGNMENT' : 
        clear_directory(crawler.get_SW_gene_path(protein_id))
    elif key == 'SW_EXON_ALIGNMENT' : 
        clear_directory(crawler.get_SW_exon_path(protein_id))

Example #2

Show file

File: LocalDbSearchEngine.py Project: abulovic/SuperExonRetriver2000

def populate_sequence_protein (protein_id):
    '''
    Populates the "/PROTEIN_ID/sequence/protein/<species>.fa" 
    folder with fasta files containing protein sequence for
    all the species registered by the Reciprocal Best Search.
    '''
    logger                      = Logger.Instance()
    alignment_logger            = logger.get_logger('data_retrieval')
    
    alignment_command_generator = AlignmentCommandGenerator()
    directory_crawler           = DirectoryCrawler()
    protein_path                = directory_crawler.get_protein_path(protein_id)
    try:
        (proteins_known, proteins_abinitio) = DescriptionParser().get_protein_ids(protein_id)
    except IOError, e:
        alignment_logger.error("{0}, PROTEIN, , {2}".format(protein_id, e))
        return

Example #3

Show file

File: Alignments.py Project: abulovic/SuperExonRetriver2000

def generate_tblastn_alignments(protein_id, species_list = None, referenced_species = "Homo_sapiens"):
    '''
        Runs the tblastn program for a specified protein and list of species
        @param protein_id
        @param species_list: if provided, runs tblastn for this list of species, \
                             otherwise runs for species that are missing the tblastn output \
                             who are determined by .status file in the tblastn folder.
    '''
    logger              = Logger.Instance()
    alignment_logger    = logger.get_logger('alignment')
    
    alignment_generator = AlignmentTargetGenerator()
    crawler             = DirectoryCrawler()
    command_generator   = CommandGenerator()
    
    if (not species_list):
        species_list    = alignment_generator.get_tblastn_targets(protein_id)
    
    failed_species_list = []
    for species in species_list:
        
        ############## MOVE
        output_file     = "{0}/{1}.blastout".format(crawler.get_tblastn_path(protein_id), species.strip())
        input_file      = "{0}/{1}.fa".format(crawler.get_protein_path(protein_id), species.strip())
        database        = "{0}/{1}.fa".format(crawler.get_database_path(protein_id), referenced_species)
        
        command         = command_generator.generate_tblastn_command(database, input_file, output_file)
        command_return  = Popen(command, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True)
        output          = command_return.stdout.read()
        if output != "":
            #LOGGING
            os.remove(output_file)
            alignment_logger.warning("{0}, {1}, TBLASTN, {2}".format(protein_id, species.strip(), output.strip()))
            failed_species_list.append(species.strip())
      
    if failed_species_list:        
        alignment_generator.set_failed_tblastn_targets(protein_id, failed_species_list)
        return False
    return True

Example #4

Show file

File: OrthologPipe.py Project: abulovic/SuperExonRetriver2000

def main():
    
    '''
    Retrieves the list of all the proteins from reference species.
    For each ref species protein, it tries to find orthologues for all the species (from the species list)
    and generates the description file accordingly. If the description file already exists, it checks
    the status (OK/PARTIAL/FAILED).
    '''
    
    reference_species = "Homo_sapiens"
    
    dc = DirectoryCrawler()
    acg = AlignmentCommandGenerator()
    
    logger = Logger.Instance()
    mutual_best_logger = logger.get_logger('mutual_best')
    
    protein_list = get_protein_list()
    species_list = get_default_species_list()
    failed_proteins = []
    
    for (protein_id, num_of_exons) in protein_list:
        
        known_dict = {}
        abinitio_dict = {}
        print protein_id
        
        # generate all the directories for the protein
        dc.generate_directory_tree(protein_id)
        
        descr_file_path = dc.get_protein_description_file_path(protein_id)
        status_file_path = dc.get_mutual_best_status_file_path(protein_id)
        
        if (os.path.isfile(status_file_path) and os.path.getsize(status_file_path)):
            print DescriptionParser().get_protein_ids(protein_id)
            
            status_dict = read_status_file(protein_id)
            if (status_dict.has_key('MUTUAL_BEST')):
                if status_dict['MUTUAL_BEST'] == 'OK':
                    mutual_best_logger.info('-,%s,mutual_best already exists for this protein - moving to the next one' % protein_id)
                else :
                    mutual_best_logger.error('-,%s,mutual_best has failed for this protein (no orthologs found) - moving on the next one' % protein_id)
                    failed_proteins.append(protein_id)
            continue
        
        
        # create the description file
        descr_file = open(descr_file_path, 'w')
        # reference protein file
        ref_species_pep =  dc.get_protein_path(protein_id) + "/" + reference_species + ".fasta"
        fastacmd = acg.generate_fastacmd_protein_command(protein_id, reference_species, "all", ref_species_pep)
        
        p = Popen(fastacmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True)
        output = p.stdout.read()
        if output:
            mutual_best_logger.error("%s,fastacmd error" % protein_id)
             
        # find orthologues for all species
        for species in species_list:
            find_ortholog_by_RBH (reference_species, species, ref_species_pep, protein_id, descr_file, mutual_best_logger)
            
        descr_file.close()
        
        mutual_best_logger.info("\n\n")
        
        # check what we've found out, whether this protein has any orthologs
        (known_dict, abinitio_dict) = DescriptionParser().get_protein_ids(protein_id)
        if (not abinitio_dict and (not known_dict or (len(known_dict.keys()) == 1 and known_dict.keys()[0] == reference_species))):
            mutual_best_logger.info ("-,%s, mutual best failed for this protein." % protein_id)
            update_entry_in_status_file(protein_id, "MUTUAL_BEST", "FAILED")
            failed_proteins.append(protein_id)
            
        else:
            update_entry_in_status_file(protein_id, "MUTUAL_BEST", "OK")
            
    print "Failed proteins: "        
    for failed_protein_id in failed_proteins:
        print failed_protein_id

Example #5

Show file

File: OrthologFinder.py Project: abulovic/SuperExonRetriver2000

        else:
            break
        i += 1
    
    pattern = re.compile("lcl\|(.*)\spep::*")
    for title in best_alignments:
        prot_match = re.match(pattern, title)
        if prot_match.groups()[0] == protein_id:
            return title
    return best_alignments[0]
    
    
    
if __name__ == '__main__':
    protein_id = "ENSP00000311134"
    acg = AlignmentCommandGenerator()
    dc = DirectoryCrawler()
    
    dc.generate_directory_tree(protein_id)
    descr_file_path = dc.get_protein_description_file_path(protein_id)
    descr_file = open(descr_file_path, 'w')
    
    output_file_path = dc.get_protein_path(protein_id) + "/" + "Homo_sapiens.fasta"
    
    fastacmd = acg.generate_fastacmd_protein_command(protein_id, "Homo_sapiens", "all", output_file_path)
    os.system(fastacmd)
    
    for species in get_default_species_list():
        find_ortholog_by_RBH("Homo_sapiens", species, output_file_path, protein_id)
        
    descr_file.close()