def main ():
    referenced_species = "Homo_sapiens"
    # 'ENSP00000253108', 'Ailuropoda_melanoleuca', 'ensembl'
    protein_list_raw = FileUtilities.get_protein_list()
    protein_list = []
    for protein_tuple in protein_list_raw:
        protein_list.append(protein_tuple[0])
    
    fill_all_containers(False)
    
    if(len(sys.argv) < 1):
        print "Usage: {0} <blastn | tblastn | SW_gene | SW_exon | all> \n".format(sys.argv[0])
        exit
    mode = sys.argv[1]
    
    populate_referenced_species_databases(protein_list, referenced_species)
    
    if (mode == "blastn"):
        populate_blastn_alignments(protein_list)
    elif (mode == "tblastn"):
        populate_tblastn_alignments(protein_list)
    elif (mode == "SW_gene"):
        populate_SW_gene_alignments(protein_list)
    elif (mode == "SW_exon"):
        populate_SW_exon_alignments(protein_list)
    elif (mode == "all"):
        populate_blastn_alignments(protein_list)
        populate_tblastn_alignments(protein_list)
        populate_SW_gene_alignments(protein_list)
        populate_SW_exon_alignments(protein_list)
    else:
        print "Usage: {0} <blastn | tblastn | SW_gene | SW_exon | all> \n".format(sys.argv[0])
        exit
def fill_all_containers (load_alignments):
    '''
    Fills all the containers with correspondent data.
    The containers are: data maps, proteins, genes, transcripts, ensembl exons, and all the alignment exons
    '''
    dc = DirectoryCrawler()
    
    protein_list_raw = FileUtilities.get_protein_list()
    # flatten the raw protein list and take every second element, which is a protein id
    protein_list = list(chain.from_iterable(protein_list_raw))[0::2]
    algorithms = ["blastn", "tblastn", "sw_gene", "sw_exon"]
    for protein_id in protein_list:
        dc.generate_directory_tree(protein_id)
        
        
    ens_exon_container = load_protein_configuration_batch(protein_list)
    if ens_exon_container:
        
        load_exon_configuration_batch(protein_list, "ensembl")
        load_exon_configuration_batch(protein_list, "genewise")
        if load_alignments:
            load_exon_configuration_batch (protein_list, "blastn")
            load_exon_configuration_batch(protein_list, "tblastn")
            load_exon_configuration_batch(protein_list, "sw_gene")
            load_exon_configuration_batch(protein_list, "sw_exon") 
            set_frames_to_coding_exons_batch (protein_list)
            remove_overlapping_alignments_batch(protein_list, ["blastn", "tblastn"])
            annotate_spurious_alignments_batch(protein_list, algorithms)
def populate_exon_table():
    dbm = DBManager.Instance()
    ec = ExonContainer.Instance()
    dmc = DataMapContainer.Instance()
    
    protein_id_list = FileUtilities.get_protein_list()
    species_list = FileUtilities.get_default_species_list()
    exon_type_list = ["ensembl", "genewise", "blastn", "tblastn", "sw_gene"]
    
    exon_list = []
    for ref_protein_id in protein_id_list:
        for species in species_list:
            for exon_type in exon_type_list:
                exon_key = (ref_protein_id[0], species, exon_type)
                try:
                    exons = ec.get(exon_key).get_ordered_exons()
                    for exon in exons:
                        if type(exon) is Exon:
                            if exon.viability:
                                exon_list.append(exon)
                        else:
                            exon_list.append(exon)
                except KeyError:
                    pass
    dbm.update_exon_table(exon_list)
    dbm.update_alignment_table(exon_list)
def populate_ortholog_table():
    dbm = DBManager.Instance()
    dmc = DataMapContainer.Instance()
    
    protein_id_list = FileUtilities.get_protein_list()
    species_list = FileUtilities.get_default_species_list()
    data_map_list = []
    for ref_protein_id in protein_id_list:
        for species in species_list:
            try:
                data_map = dmc.get((ref_protein_id[0], species))
                data_map_list.append(data_map)
            except KeyError, e:
                pass
def populate_gene_table():
    dbm = DBManager.Instance()
    dmc = DataMapContainer.Instance()
    
    protein_id_list = FileUtilities.get_protein_list()
    species_list = FileUtilities.get_default_species_list()
    data_map_list = []
    for ref_protein_id in protein_id_list:
        for species in species_list:
            try:
                data_map = dmc.get((ref_protein_id[0], species))
                data_map_list.append(data_map)
            except KeyError:
                print "PROTEIN_ID %s ERROR" % (ref_protein_id[0])
    print data_map_list
    dbm.update_gene_table(data_map_list)
def populate_protein_table():
    dbm = DBManager.Instance()
    pc = ProteinContainer.Instance()
    dmc = DataMapContainer.Instance()
    
    protein_id_list = FileUtilities.get_protein_list()
    species_list = FileUtilities.get_default_species_list()
    protein_list = []
    for ref_protein_id in protein_id_list:
        for species in species_list:
            try:
                protein_id = dmc.get((ref_protein_id[0], species))
                protein = pc.get(protein_id.protein_id)
                protein_list.append(protein)
            except KeyError:
                print "PROTEIN_ID %s ERROR" % (ref_protein_id[0])
    dbm.update_protein_table(protein_list)
def populate_exon_alignment_piece_table():
    dbm = DBManager.Instance()
    ec = ExonContainer.Instance()
    beac = BestExonAlignmentContainer.Instance()
    
    protein_id_list = FileUtilities.get_protein_list()
    species_list = FileUtilities.get_default_species_list()
    
    exon_aln_list = []
    for (ref_protein_id, exon_num) in protein_id_list:
        for species in species_list:
                try:
                    ref_exons = ec.get((ref_protein_id, 'Homo_sapiens', 'ensembl'))
                    for ref_exon in ref_exons.get_coding_exons():
                        best_exon_alignment = beac.get(ref_exon.exon_id, species)
                        if best_exon_alignment and best_exon_alignment.sw_gene_alignment:
                            for aln_piece in best_exon_alignment.sw_gene_alignment.alignment_pieces:
                                if aln_piece.type in ('coding', 'insertion'):
                                    exon_aln_list.append([ref_exon.exon_id, species, aln_piece])
                except KeyError, e:
                    print e