Example #1
0
        infile = a
    elif o == "-o":
        outfile = a

promotorUp = 2000
promotorDown = 2000

intervals = csv.reader(open(infile, "r"), delimiter="\t")

writer = csv.writer(open(outfile, "w"), delimiter="\t")

cpgIslands = ExtendedBed(
    os.path.expanduser(
        "~/mount/publicdata/hg18/cpgislands/cpgislands-0-index.bed"))

genome = Genome()

# load gene data
genedata = Ensembl.EnsemblGenes(assembly="hg18", annotation="ncbi36.1")

headerRow = [
    'Ensembl', 'Name', 'chr', 'start', 'stop', 'strand', 'No. Transcripts',
    'Avg. Exons per Transcript', "Unique Exons per Gene", "Start positions",
    "Start positions / No. Transcripts"
]

#"Promotor G-Count",  "Promotor C-Count",  "Promotor A-Count", "Promotor T-Count" ,
headerRow.extend([
    "Promotor region start",
    "Promotor region end",
    "Promotor GC Percent",
Example #2
0
            assembly = a

    assert methdatafile != None

    # if we have an expression file we need fc and expression columns
    if exprfile != None:
        assert fccol != None
        assert exprcols != None
    else:
        exprcols = []

    assert outputfile != None

    genedata = EnsemblGenes(assembly=assembly)

    genome = Genome(genomeBuild=assembly)

    if assembly == "hg18":
        cpgIslands = ExtendedBed(
            os.path.expanduser(
                "~/mount/publicdata/hg18/cpgislands/cpgislands.bed"))
        lINEs = ExtendedBed(
            os.path.expanduser("~/mount/publicdata/hg18/repeats/LINEs-0.bed"))
        sINEs = ExtendedBed(
            os.path.expanduser("~/mount/publicdata/hg18/repeats/SINEs-0.bed"))
    elif assembly == "hg19":
        cpgIslands = ExtendedBed(
            os.path.expanduser(
                "~/mount/publicdata/hg19/CpGIslands/cpgislands.bed"))
        lINEs = ExtendedBed(
            os.path.expanduser(
    exprfile = None
    ensemblidcol = "ensemblid"
    upstreamPromotor = 5000
    downstreamPromotor = 1000
    for o,a in opts:
        if o=="--gene-expression-file":
            exprfile = a
    assert exprfile != None
    
    print len(matrices)
    
    # WARNING: everything we just read in are pfm (position frequency), we might need position weight matrices
    # We have choice of JASPER (downloaded), JASPER (with motility) or TFD or combining them
    
    genedata = EnsemblGenes(assembly="hg18")
    genome = Genome(genomeBuild = "hg18")
    
    exprCSV = IndexedCSV(exprfile,keyPos=1)
    
    motifnames = []
    for matrix in matrices:
        motifnames.append(matrix)

    output = csv.writer(open(exprfile+".transcriptionfactors","w"),delimiter='\t')
        
    # header
    header = exprCSV.keys[:] # copy keys
    header.extend(motifnames)
    output.writerow(header)
    
    for testid in exprCSV:
        if o == "-o" or o == "--outputFolder":
            outputfolder = a

    assert infile != None
    assert outputfolder != None

    def getBlatLocation(line):
        result = line.split("\t")
        return int(result[0]), result[13], int(result[15]), int(
            result[16]), result[8]

    def getBlatQStarts(line):
        result = line.split("\t")
        return [int(y) for y in result[19].split(",")[:-1]]

    genome = Genome(genomeBuild=build)

    makeDirectory(outputfolder)

    with open(outputfolder + "/index.html", "w") as indexFile:

        print >> indexFile, """
            <html><body>
            <head>
            <style type="text/css">
            td{font-size:small;}
            th{font-size:small;}
            </style>
            </head>
        """
Example #5
0
 def __init__(self,build):
     self.genome = Genome(genomeBuild = build)
     
     self.valuesBehaviour = missingValuesDontCount
     self.chromosomeEnds = ChromosomeEnds(build)
Example #6
0
        else:
            regions.append(BedIntervalTree(a))
    elif o == "-c":
        controlAffyExpressionData = IndexedCSV(a)
    #Annotated difference file input
    elif o == "-e":
        rnaSeqExpressionData = IndexedCSV(a, key="test_id")
    elif o == "-a":
        assembly = a

UPSTREAM_PROMOTOR_DIST = 2000
DOWNSTREAM_PROMOTOR_DIST = 2000

writer = csv.writer(open(outfile, "w"), delimiter="\t")

genome = Genome(assembly)

###

# load data

genedata = Ensembl.EnsemblGenes(assembly=assembly)

genes = Ensembl.ReverseGeneMapping(genedata)

genespluspromotor = Ensembl.ReverseGeneMapping(
    genedata, tssPadding=UPSTREAM_PROMOTOR_DIST)

genepromotors = Ensembl.ReversePromotorMapping(
    genedata,
    upstreamPadding=UPSTREAM_PROMOTOR_DIST,
Example #7
0
            affypcol = a
        elif o == "--outputfile":
            outputfile = a
        elif o == "--promotorsize":
            upstreamPromotor = int(a)
            downstreamPromotor = int(a)

    assert methdatafile != None
    assert affyfile != None
    assert affyfccol != None
    assert affyexprcol != None
    assert outputfile != None

    genedata = EnsemblGenes(assembly="hg18")

    genome = Genome(genomeBuild="hg18")

    affyannotation = NetAffxAnnotation(genome="hg18", cdfname="HG-U133_Plus_2")

    cpgIslands = ExtendedBed(
        os.path.expanduser(
            "~/mount/publicdata/hg18/cpgislands/cpgislands-0-index.bed"))

    affyCSV = IndexedCSV(affyfile)
    affyEnsemblLogFCs = collections.defaultdict(list)
    affyEnsemblExprs = collections.defaultdict(list)
    affyEnsemblPvalues = collections.defaultdict(list)

    for affy in affyCSV:
        ensembls = affyannotation.getValues(affy, "Ensembl")
        if len(ensembls) == 1:
Example #8
0
    assert infile != None
    assert outputfolder != None

    def getBlatLocation(line):
        result = line.split("\t")
        return int(result[0]), result[13], int(result[15]), int(
            result[16]), result[8]

    def getBlatQStarts(line):
        result = line.split("\t")
        return [int(y) for y in result[19].split(",")[:-1]]

    exonboundaries = False
    blockStarts = []

    genome = Genome(genomeBuild=build)

    makeDirectory(outputfolder)

    shutil.copy("arrow-down.gif", outputfolder + "/arrow-down.gif")
    shutil.copy("arrow-none.gif", outputfolder + "/arrow-none.gif")
    shutil.copy("arrow-up.gif", outputfolder + "/arrow-up.gif")
    shutil.copy("sortable.css", outputfolder + "/sortable.css")
    shutil.copy("sortable.js", outputfolder + "/sortable.js")

    with open(outputfolder + "/index.html", "w") as indexFile:

        print >> indexFile, """
            <html><body>
            <head>
            <style type="text/css">
Example #9
0
###


def distanceHumanReadable(dist):
    return str(dist / 1000) + "kb"


TSS_TTS_Distance = 1000
SURROUNDING_SEQUENCE_Distance = 250  # each side
WINDOW_SIZE = 500
WINDOW_OFFSET = 5

# load data

genome = Genome(genomeBuild="hg18")

chromosomeEnds = ChromosomeEnds("hg18")

genedata = Ensembl.EnsemblGenes(assembly="hg18", annotation="ncbi36.1")

genes = Ensembl.ReverseGeneMapping(genedata)

exons = Ensembl.ReverseExonMapping(genedata)

transcriptionSites = Ensembl.TranscriptionSites(genedata)

cpgIslands = ExtendedBed(
    os.path.expanduser(
        "~/mount/publicdata/hg18/cpgislands/cpgislands-0-index.bed"))