コード例 #1
0
def getAnnotations(Species, array_type, reannotate_exon_seq, force):
    """Annotate Affymetrix exon array data using files Ensembl data (sync'ed to genome release)."""
    global species
    species = Species
    global test
    global test_cluster
    test = 'no'
    test_cluster = ['TC0701360']
    data_type = 'mRNA'

    global ensembl_exon_db
    global ensembl_exon_db
    global exon_clusters
    global exon_region_db
    ensembl_exon_db, ensembl_annot_db, exon_clusters, intron_clusters, exon_region_db, intron_retention_db, ucsc_splicing_annot_db, ens_transcript_db = EnsemblImport.getEnsemblAssociations(
        species, data_type, test)
    ensembl_probeset_db = importCriticalExonLocations(
        species, array_type, ensembl_exon_db,
        force)  ###Get Pre-computed genomic locations for critical exons
    ensembl_probeset_db = ExonArrayEnsemblRules.annotateExons(
        ensembl_probeset_db, exon_clusters, ensembl_exon_db, exon_region_db,
        intron_retention_db, intron_clusters, ucsc_splicing_annot_db)
    constitutive_gene_db = {}
    ExonArrayEnsemblRules.exportEnsemblLinkedProbesets(array_type,
                                                       ensembl_probeset_db,
                                                       species)
    print "\nCritical exon data exported coordinates, exon associations and splicing annotations exported..."

    ### Change filenames to reflect junction array type
    export_filename = 'AltDatabase/' + species + '/' + array_type + '/' + species + '_Ensembl_probesets.txt'
    ef = filepath(export_filename)
    export_replacement = string.replace(export_filename, '_probe',
                                        '_' + array_type + '_probe')
    er = filepath(export_replacement)
    shutil.copyfile(ef, er)
    os.remove(ef)  ### Copy file to a new name

    ### Export full exon seqeunce for probesets/critical exons to replace the original incomplete sequence (used for miRNA analyses)
    if reannotate_exon_seq == 'yes':
        JunctionArray.reAnnotateCriticalExonSequences(species, array_type)
コード例 #2
0
ファイル: update.py プロジェクト: cwt1/altanalyze
def buildExonArrayExonAnnotations(species, array_type, force):

    ### Get UCSC associations (download databases if necessary)
    mRNA_Type = 'mrna'; run_from_scratch = 'yes'
    export_all_associations = 'no' ### YES only for protein prediction analysis
    buildUCSCAnnoationFiles(species,mRNA_Type,export_all_associations,run_from_scratch,force)
    from build_scripts import ExonArrayEnsemblRules; reload(ExonArrayEnsemblRules)
    process_from_scratch='yes'
    constitutive_source='default'
    ### Build the databases and return the variables (not used here)
    source_biotype = 'mRNA'
    if array_type == 'gene': source_biotype = 'gene'
    probeset_db,annotate_db,constitutive_gene_db,splicing_analysis_db = ExonArrayEnsemblRules.getAnnotations(process_from_scratch,constitutive_source,source_biotype,species)
コード例 #3
0
ファイル: QC.py プロジェクト: kitavime/altanalyze
def displayExpressionGraph(species,
                           Platform,
                           exp_file,
                           gene,
                           transpose,
                           display=True,
                           showIntrons=False,
                           analysisType='plot'):
    ### Get gene annotations (users can provide an Ensembl or symbol)
    print 'Importing exon-level expression data for visualization (be patient)...'
    from build_scripts import ExonAnalyze_module
    global platform
    platform = Platform
    if platform != 'AltMouse':
        gene_annotation_file = "AltDatabase/ensembl/" + species + "/" + species + "_Ensembl-annotations.txt"
    else:
        gene_annotation_file = "AltDatabase/" + species + "/" + platform + "/" + platform + "_gene_annotations.txt"

    genes = []
    gene = string.replace(gene, '|', ',')
    gene = string.replace(gene, ' ', ',')
    if ',' in gene:
        genes += string.split(gene, ',')
    else:
        genes.append(gene)
    gene_db = {}
    for gene in genes:
        try:
            if 'ENS' in gene:
                try:
                    annotate_db  ### If variable is defined
                except Exception:
                    annotate_db = ExonAnalyze_module.import_annotations(
                        gene_annotation_file, platform,
                        keyBySymbol=False)  ### Make an SQLite call
                gene_symbol = annotate_db[gene].Symbol()
            else:
                try:
                    annotate_db  ### If variable is defined
                except Exception:
                    annotate_db = ExonAnalyze_module.import_annotations(
                        gene_annotation_file, platform, keyBySymbol=True)
                gene_symbol = gene
                gene = annotate_db[gene].GeneID()
            gene_db[gene] = gene_symbol
        except Exception:
            #if len(gene)>0: print gene, 'not in database'
            pass

    if len(gene_db) == 0:
        force_no_gene_found_error
    if 'AltResults' in exp_file:
        root_dir = string.split(exp_file, 'AltResults')[0] + 'ExonPlots/'
    else:
        root_dir = string.split(exp_file, 'ExpressionInput')[0] + 'ExonPlots/'

    from build_scripts import ExonAnalyze_module
    if platform == 'RNASeq': datatype = 'exons'
    else: datatype = 'probesets'
    export_exon_filename = 'AltDatabase/' + species + '/' + platform + '/' + species + '_Ensembl_' + datatype + '.txt'
    if verifyFileLength(export_exon_filename) == 0:
        rootdir = string.replace(root_dir, 'ExonPlots/', '')
        export_exon_filename = rootdir + '/' + export_exon_filename

    from build_scripts import ExonArrayEnsemblRules
    ensembl_exon_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction(
        export_exon_filename, 'gene-probesets',
        gene_db)  ### Make an SQLite call

    filter_db = {}
    for gene in ensembl_exon_db:
        ensembl_exon_db[gene].sort()

        for (index, ed, id) in ensembl_exon_db[gene]:
            filter_db[id] = []

    try:
        os.mkdir(root_dir)
    except Exception:
        None  ### dir exists
    print 'Image results being saved to the folder "ExonPlots" in the AltAnalyze results directory.'
    importTableEntries(exp_file,
                       filter_db,
                       ensembl_exon_db,
                       gene_db,
                       root_dir,
                       transpose,
                       display,
                       showIntrons,
                       analysisType=analysisType)  ### Make an SQLite call
コード例 #4
0
def getJunctionComparisonsFromExport(species, array_type):
    type = 'standard'
    gene_junction_db = importEnsemblUCSCAltJunctions(species, type)

    ### Retrieve probesets with exon-junctions associated - these are critical exons
    filename = 'AltDatabase/' + species + '/' + array_type + '/' + species + '_Ensembl_' + array_type + '_probesets.txt'
    gene_probeset_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction(
        filename, 'junctions', {})
    left = {}
    right = {}
    gene_db = {}
    gene_exon_db = {}
    nonjunction_aligning = {}
    for gene in gene_probeset_db:
        for (probe_data, ed) in gene_probeset_db[gene]:
            probeset, strand, probeset_start, probeset_stop = probe_data
            region_id = string.replace(ed.RegionNumber(), '-', '.')
            original_region_id = region_id
            region_ids = string.split(region_id, '|')
            gene_db[probeset[:-2]] = gene
            #ed.AssociatedSplicingJunctions()
            r_starts = string.split(ed.ExonStart(), '|')
            r_stops = string.split(ed.ExonStop(), '|')
            for region_id in region_ids:
                if '|5' in probeset:
                    try:
                        left[probeset[:-2]].append(region_id)
                    except Exception:
                        left[probeset[:-2]] = [region_id]
                    if strand == '+':  ### If the junction probesets DO NOT align to the region coordinates, then the probeset maps to a junction outside the database
                        if probeset_stop not in r_stops:
                            nonjunction_aligning[
                                probeset[:
                                         -2]] = original_region_id + '_' + probeset_stop, 'left'
                    elif probeset_start not in r_starts:
                        nonjunction_aligning[
                            probeset[:
                                     -2]] = original_region_id + '_' + probeset_start, 'left'
                elif '|3' in probeset:
                    try:
                        right[probeset[:-2]].append(region_id)
                    except Exception:
                        right[probeset[:-2]] = [region_id]
                    if strand == '+':
                        if probeset_start not in r_starts:
                            nonjunction_aligning[
                                probeset[:
                                         -2]] = original_region_id + '_' + probeset_start, 'right'
                    elif probeset_stop not in r_stops:
                        nonjunction_aligning[
                            probeset[:
                                     -2]] = original_region_id + '_' + probeset_stop, 'right'
                else:
                    if '_' in region_id: print killer
                    try:
                        gene_exon_db[gene, region_id].append(probeset)
                    except Exception:
                        gene_exon_db[gene, region_id] = [probeset]

    print 'len(nonjunction_aligning)', len(nonjunction_aligning)
    gene_exon_db = eliminateRedundant(gene_exon_db)
    junction_db = {}  ### Get the exon-region IDs for an exon-junction
    for probeset in left:
        gene = gene_db[probeset]
        if probeset in right:
            for region1 in left[probeset]:
                for region2 in right[probeset]:
                    junction = region1 + '-' + region2
                    try:
                        junction_db[gene, junction].append(probeset)
                    except Exception:
                        junction_db[gene, junction] = [probeset]

    probeset_junction_export = 'AltDatabase/' + species + '/' + array_type + '/' + species + '_junction_comps.txt'

    fn = filepath(probeset_junction_export)
    data = open(fn, 'w')
    print "Exporting", probeset_junction_export
    title = 'gene' + '\t' + 'critical_exon' + '\t' + 'exclusion_junction_region' + '\t' + 'inclusion_junction_region' + '\t' + 'exclusion_probeset' + '\t' + 'inclusion_probeset' + '\t' + 'data_source' + '\n'
    data.write(title)
    temp_list = []

    for (gene, critical_exon, incl_junction,
         excl_junction) in gene_junction_db:
        if (gene, incl_junction) in junction_db:
            incl_junction_probesets = junction_db[gene, incl_junction]
            if (gene, excl_junction) in junction_db:
                excl_junction_probesets = junction_db[gene, excl_junction]
                for incl_junction_probeset in incl_junction_probesets:
                    for excl_junction_probeset in excl_junction_probesets:
                        try:
                            for incl_exon_probeset in gene_exon_db[
                                    gene, critical_exon]:
                                if incl_junction_probeset in nonjunction_aligning or excl_junction_probeset in nonjunction_aligning:
                                    null = []
                                else:  ### Ensure the probeset DOES map to the annotated junctions
                                    temp_list.append(
                                        string.join([
                                            gene, critical_exon, excl_junction,
                                            critical_exon,
                                            excl_junction_probeset,
                                            incl_exon_probeset, 'AltAnalyze'
                                        ], '\t') + '\n')
                        except Exception:
                            null = []
                        if incl_junction_probeset in nonjunction_aligning:
                            new_region_id, side = nonjunction_aligning[
                                incl_junction_probeset]
                            incl_junction = renameJunction(
                                incl_junction, side, new_region_id)
                        if excl_junction_probeset in nonjunction_aligning:
                            new_region_id, side = nonjunction_aligning[
                                excl_junction_probeset]
                            excl_junction = renameJunction(
                                excl_junction, side, new_region_id)
                        if excl_junction_probeset != incl_junction_probeset:
                            temp_list.append(
                                string.join([
                                    gene, critical_exon, excl_junction,
                                    incl_junction, excl_junction_probeset,
                                    incl_junction_probeset, 'AltAnalyze'
                                ], '\t') + '\n')
    temp_list = unique.unique(temp_list)
    for i in temp_list:
        data.write(i)
    data.close()
    print 'Number of compared junctions exported', len(temp_list)