Example #1
0
filenames = [
    '1134_and_Cho.txt',
    '1134_not_Cho.txt',
    'cho_not_1134.txt',
]

references = dict()
for file in filenames:
    print '****************** %s ********************' % file
    references[os.path.splitext(file)[0]] = set(yield_refs(file))

# get all the HS unigenes from all the files
cell_cycle_hs_unigene = list(chain(*references.values()))

# translate them to ensembl HS genes using the Synergizer
translated = S.translate('ensembl', 'H**o sapiens', 'unigene',
                         'ensembl_gene_id', cell_cycle_hs_unigene)
print 'Ensembl translated: %d' % S.how_many_have_translations(translated)
ensembl_hs_genes = S.get_translations(translated)


def yield_mouse_orthologs(hs_genes):
    # map into mouse orthologs using biomart
    query = B.new_query()
    dataset = B.add_dataset(query, 'hsapiens_gene_ensembl')
    B.add_attribute(dataset, 'ensembl_gene_id')
    B.add_attribute(dataset, 'mouse_ensembl_gene')
    filter = B.add_filter(dataset, name='ensembl_gene_id', value='')
    filter.set('value', ','.join(ensembl_hs_genes))
    for chunk in B.split_big_list(ensembl_hs_genes, 50):
        #logging.info('Querying Ensembl biomart for chunk of %d genes', len(chunk))
        filter.set('value', ','.join(chunk))
Example #2
0
    '1134_not_Cho.txt',
    'cho_not_1134.txt',
]

references = dict()
for file in filenames:
    print '****************** %s ********************' % file
    references[os.path.splitext(file)[0]] = set(yield_refs(file))


# get all the HS unigenes from all the files
cell_cycle_hs_unigene = list(chain(*references.values()))


# translate them to ensembl HS genes using the Synergizer
translated = S.translate('ensembl', 'H**o sapiens', 'unigene', 'ensembl_gene_id', cell_cycle_hs_unigene)
print 'Ensembl translated: %d' % S.how_many_have_translations(translated)
ensembl_hs_genes = S.get_translations(translated)

def yield_mouse_orthologs(hs_genes):
    # map into mouse orthologs using biomart
    query = B.new_query()
    dataset = B.add_dataset(query, 'hsapiens_gene_ensembl')
    B.add_attribute(dataset, 'ensembl_gene_id')
    B.add_attribute(dataset, 'mouse_ensembl_gene')
    filter = B.add_filter(dataset, name='ensembl_gene_id', value='')
    filter.set('value', ','.join(ensembl_hs_genes))
    for chunk in B.split_big_list(ensembl_hs_genes, 50):
        #logging.info('Querying Ensembl biomart for chunk of %d genes', len(chunk))
        filter.set('value', ','.join(chunk))
        for row in B.yield_csv_query_results(query):
Example #3
0
"""

muscle_tfs = [
    'MEF2c',  # MEF2
    'SP1',
    'SRF',
    'MyoD1',  # EBOX (MyoD)
    'TEF',
]
"""
Muscle study TFBS (based on work by Wasserman and Fickett, 1998)
"""

tf_sets = {
    'Haematopoietic': haematopoietic_tfs,
    'Liver': liver_tfs,
    'Muscle': muscle_tfs
}

import biopsy.identifiers.synergizer as S
import biopsy.identifiers.biomart as B

for tag, tfs in tf_sets.iteritems():
    translated = S.translate('ensembl', 'Mus musculus', 'mgi_symbol',
                             'ensembl_gene_id', tfs)
    print translated
    print 'Ensembl translated: %d/%d' % (
        S.how_many_have_translations(translated), len(tfs))
    ensembl_genes = S.get_translations(translated)
    open('%s-ensembl.txt' % tag, 'w').write('\n'.join(ensembl_genes))
Example #4
0
    "PAX5",
    "SP1",
]
"""
Haematopoietic TFBS
"""


liver_tfs = ["HNF1a", "Foxe3", "HNF4a", "CEBPa"]  # HNF1  # HNF3  # HNF4  # CEBP
"""
Liver study TFBS (based on work by Krivan and Wasserman, 2001)
"""


muscle_tfs = ["MEF2c", "SP1", "SRF", "MyoD1", "TEF"]  # MEF2  # EBOX (MyoD)
"""
Muscle study TFBS (based on work by Wasserman and Fickett, 1998)
"""

tf_sets = {"Haematopoietic": haematopoietic_tfs, "Liver": liver_tfs, "Muscle": muscle_tfs}

import biopsy.identifiers.synergizer as S
import biopsy.identifiers.biomart as B

for tag, tfs in tf_sets.iteritems():
    translated = S.translate("ensembl", "Mus musculus", "mgi_symbol", "ensembl_gene_id", tfs)
    print translated
    print "Ensembl translated: %d/%d" % (S.how_many_have_translations(translated), len(tfs))
    ensembl_genes = S.get_translations(translated)
    open("%s-ensembl.txt" % tag, "w").write("\n".join(ensembl_genes))
Example #5
0
# Copyright John Reid 2009
#

"""
Targets of liver specific transcription factors in Wasserman's predictive model for liver paper
"""


from utils import *
from itertools import imap
import biopsy.identifiers.synergizer as S


liver_targets = ["G6PC", "IGF1", "PAH", "IGFBP1", "CFB", "FABP2", "GUCA2B", "HOXA4", "SLC34A1"]  # BF

translated = S.translate("ensembl", "Mus musculus", "mgi_symbol", "ensembl_gene_id", liver_targets)
print translated
print "Ensembl translated: %d/%d" % (S.how_many_have_translations(translated), len(liver_targets))
ensembl_genes = S.get_translations(translated)
open("liver-targets.txt", "w").write("\n".join(ensembl_genes))


liver_ensembl_targets = {
    "G6PC": "ENSMUSG00000078650",
    "IGF1": "ENSMUSG00000020053",
    "PAH": "ENSMUSG00000020051",
    "IGFBP1": "ENSMUSG00000020429",
    "CFB": "ENSMUSG00000024371",
    "FABP2": "ENSMUSG00000023057",
    "GUCA2B": "ENSMUSG00000032978",
    "HOXA4": "ENSMUSG00000000942",
Example #6
0
from itertools import imap
import biopsy.identifiers.synergizer as S

liver_targets = [
    'G6PC',
    'IGF1',
    'PAH',
    'IGFBP1',
    'CFB',  # BF
    'FABP2',
    'GUCA2B',
    'HOXA4',
    'SLC34A1'
]

translated = S.translate('ensembl', 'Mus musculus', 'mgi_symbol',
                         'ensembl_gene_id', liver_targets)
print translated
print 'Ensembl translated: %d/%d' % (S.how_many_have_translations(translated),
                                     len(liver_targets))
ensembl_genes = S.get_translations(translated)
open('liver-targets.txt', 'w').write('\n'.join(ensembl_genes))

liver_ensembl_targets = {
    'G6PC': 'ENSMUSG00000078650',
    'IGF1': 'ENSMUSG00000020053',
    'PAH': 'ENSMUSG00000020051',
    'IGFBP1': 'ENSMUSG00000020429',
    'CFB': 'ENSMUSG00000024371',
    'FABP2': 'ENSMUSG00000023057',
    'GUCA2B': 'ENSMUSG00000032978',
    'HOXA4': 'ENSMUSG00000000942',