def update_entry_in_status_file (protein_id, status_entry, status_entry_value):
    '''
    Updates the status entry to new value. 
    If there is no .status file as to this update, it generates the status file.
    If there exists the status file, it reads it.
    If this status entry is already present, and its value the same as the new value, then nothing is done.
    Otherwise, the value is updated and written in the status file.
    '''
    
    dc = DirectoryCrawler()
    status_file_path = dc.get_mutual_best_status_file_path(protein_id)
    status_dict = {}
    
    if (os.path.isfile(status_file_path)):
        status_dict = read_status_file(protein_id)
    
    if (status_dict.has_key(status_entry)):
        if (status_dict[status_entry] == status_entry_value):
            return
        else:
            status_dict[status_entry] = status_entry_value
            status_file = open(status_file_path, 'w')
            for status_entry, status_entry_value in status_dict.items():
                status_file.write("%s %s\n" % (status_entry, status_entry_value))
            status_file.close()
            
    else:
        status_file = open(status_file_path, 'a+')
        status_file.write("%s %s\n" % (status_entry, status_entry_value))
        status_file.close()
def read_status_file (protein_id):
    '''
    @return: status_dict dictionary of mapped status entries to their values
    Status entries may be:
        MUTUAL_BEST:    OK/FAILED
        DATA_RETRIEVAL: OK/PARTIAL/FAILED
    '''
    
    dc = DirectoryCrawler()
    status_file_path = dc.get_mutual_best_status_file_path(protein_id)
    try:
        status_file = open(status_file_path, 'r')
    except IOError:
        raise IOError('No .status file for protein %s' % protein_id)
    
    status_dict = dict(token.split() for token in status_file.read().strip().split('\n'))
    status_file.close()
    
    return status_dict
def main():
    
    '''
    Retrieves the list of all the proteins from reference species.
    For each ref species protein, it tries to find orthologues for all the species (from the species list)
    and generates the description file accordingly. If the description file already exists, it checks
    the status (OK/PARTIAL/FAILED).
    '''
    
    reference_species = "Homo_sapiens"
    
    dc = DirectoryCrawler()
    acg = AlignmentCommandGenerator()
    
    logger = Logger.Instance()
    mutual_best_logger = logger.get_logger('mutual_best')
    
    protein_list = get_protein_list()
    species_list = get_default_species_list()
    failed_proteins = []
    
    for (protein_id, num_of_exons) in protein_list:
        
        known_dict = {}
        abinitio_dict = {}
        print protein_id
        
        # generate all the directories for the protein
        dc.generate_directory_tree(protein_id)
        
        descr_file_path = dc.get_protein_description_file_path(protein_id)
        status_file_path = dc.get_mutual_best_status_file_path(protein_id)
        
        if (os.path.isfile(status_file_path) and os.path.getsize(status_file_path)):
            print DescriptionParser().get_protein_ids(protein_id)
            
            status_dict = read_status_file(protein_id)
            if (status_dict.has_key('MUTUAL_BEST')):
                if status_dict['MUTUAL_BEST'] == 'OK':
                    mutual_best_logger.info('-,%s,mutual_best already exists for this protein - moving to the next one' % protein_id)
                else :
                    mutual_best_logger.error('-,%s,mutual_best has failed for this protein (no orthologs found) - moving on the next one' % protein_id)
                    failed_proteins.append(protein_id)
            continue
        
        
        # create the description file
        descr_file = open(descr_file_path, 'w')
        # reference protein file
        ref_species_pep =  dc.get_protein_path(protein_id) + "/" + reference_species + ".fasta"
        fastacmd = acg.generate_fastacmd_protein_command(protein_id, reference_species, "all", ref_species_pep)
        
        p = Popen(fastacmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True)
        output = p.stdout.read()
        if output:
            mutual_best_logger.error("%s,fastacmd error" % protein_id)
             
        # find orthologues for all species
        for species in species_list:
            find_ortholog_by_RBH (reference_species, species, ref_species_pep, protein_id, descr_file, mutual_best_logger)
            
        descr_file.close()
        
        mutual_best_logger.info("\n\n")
        
        # check what we've found out, whether this protein has any orthologs
        (known_dict, abinitio_dict) = DescriptionParser().get_protein_ids(protein_id)
        if (not abinitio_dict and (not known_dict or (len(known_dict.keys()) == 1 and known_dict.keys()[0] == reference_species))):
            mutual_best_logger.info ("-,%s, mutual best failed for this protein." % protein_id)
            update_entry_in_status_file(protein_id, "MUTUAL_BEST", "FAILED")
            failed_proteins.append(protein_id)
            
        else:
            update_entry_in_status_file(protein_id, "MUTUAL_BEST", "OK")
            
    print "Failed proteins: "        
    for failed_protein_id in failed_proteins:
        print failed_protein_id