Ejemplo n.º 1
0
def buildJunctionExonAnnotations(species,array_type,specific_array_type,force,genomic_build):
    ### Get UCSC associations (download databases if necessary)
    mRNA_Type = 'mrna'; run_from_scratch = 'yes'; force='no'
    export_all_associations = 'no' ### YES only for protein prediction analysis
    #buildUCSCAnnoationFiles(species,mRNA_Type,export_all_associations,run_from_scratch,force)

    ### Get genomic locations and initial annotations for exon sequences (exon pobesets and junctions)    
    import JunctionArray
    import JunctionArrayEnsemblRules
    """ The following functions:
    1) Extract transcript cluster-to-gene annotations
    2) Extract exon sequences for junctions and exon probesets from the Affymetrix annotation file (version 2.0),
    3) Map these sequences to Ensembl gene sequences (build specific) plus and minus 2KB, upstream and downstream
    4) Obtain AltAnalyze exon region annotations and obtain full-length exon sequences for each exon probeset
    5) Consoladate these into an Ensembl_probeset.txt file (rather than Ensembl_junction_probeset.txt) with junctions
       having a single probeset identifier.
    6) Determine which junctions and junction-exons represent recipricol junctions using:
       a) AltAnalyze identified recipricol junctions from Ensembl and UCSC and
       b) Affymetrix suggested recipricol junctions based on common exon cluster annotations, creating
          Mm_junction_comps_updated.txt.
       c) De novo comparison of all exon-junction region IDs for all junctions using the EnsemblImport method compareJunctions().
    """
    ### Steps 1-3
    JunctionArray.getJunctionExonLocations(species,array_type,specific_array_type)
    ### Step 4
    JunctionArrayEnsemblRules.getAnnotations(species,array_type,'yes',force)
    ### Step 5-6
    JunctionArray.identifyJunctionComps(species,array_type,specific_array_type)
Ejemplo n.º 2
0
def buildJunctionExonAnnotations(species, array_type, specific_array_type,
                                 force, genomic_build):
    ### Get UCSC associations (download databases if necessary)
    mRNA_Type = 'mrna'
    run_from_scratch = 'yes'
    force = 'no'
    export_all_associations = 'no'  ### YES only for protein prediction analysis
    buildUCSCAnnoationFiles(species, mRNA_Type, export_all_associations,
                            run_from_scratch, force)

    ### Get genomic locations and initial annotations for exon sequences (exon pobesets and junctions)
    import JunctionArray
    import JunctionArrayEnsemblRules
    """ The following functions:
    1) Extract transcript cluster-to-gene annotations
    2) Extract exon sequences for junctions and exon probesets from the Affymetrix annotation file (version 2.0),
    3) Map these sequences to Ensembl gene sequences (build specific) plus and minus 2KB, upstream and downstream
    4) Obtain AltAnalyze exon region annotations and obtain full-length exon sequences for each exon probeset
    5) Consoladate these into an Ensembl_probeset.txt file (rather than Ensembl_junction_probeset.txt) with junctions
       having a single probeset identifier.
    6) Determine which junctions and junction-exons represent recipricol junctions using:
       a) AltAnalyze identified recipricol junctions from Ensembl and UCSC and
       b) Affymetrix suggested recipricol junctions based on common exon cluster annotations, creating
          Mm_junction_comps_updated.txt.
       c) De novo comparison of all exon-junction region IDs for all junctions using the EnsemblImport method compareJunctions().
    """
    ### Steps 1-3
    JunctionArray.getJunctionExonLocations(species, array_type,
                                           specific_array_type)
    ### Step 4
    JunctionArrayEnsemblRules.getAnnotations(species, array_type, 'yes', force)
    ### Step 5-6
    JunctionArray.identifyJunctionComps(species, array_type,
                                        specific_array_type)