def get_gmt_url(species, gse, gpl): """ :type species: str :type gpl: str :type gse: str """ path_to_file = here('gmt', '{}_{}.gmt'.format(gse, gpl)) if getattr(settings, 'MEDIA_ARE_LOCAL', False): path = here(settings.MEDIA_ROOT, path_to_file) return here(settings.MEDIA_URL, path_to_file) if os.path.exists(path) else None return here(settings.MEDIA_URL, path_to_file)
def get_module_heat_map_url(species, gse, gpl, module_number): """ :type module_number: int :type gpl: str :type gse: str :type species: str :rtype: str """ path_to_image = here('heatmaps', species, '{}_{}_module_{}.svg'.format(gse, gpl, module_number)) if getattr(settings, 'MEDIA_ARE_LOCAL', False): path = here(settings.MEDIA_ROOT, path_to_image) return here(settings.MEDIA_URL, path_to_image) if os.path.exists(path) else None return here(settings.MEDIA_URL, path_to_image)
def get_gene_to_freq(path_to_file=None, db_version=None, species=None): if path_to_file: data = [x.strip().split('\t') for x in open(path_to_file).readlines()] elif db_version and species: data = [ x.strip().split('\t') for x in open( here(PATH_TO_DB_FILES[db_version], '{}.freq.entrez.txt'.format( species))).readlines() ] else: raise Exception('Specify path to file or db_version and species.') return {int(gene): int(freq) for gene, freq in data}
def create_pretty_gmt(outpath): print 'Collecting data' gses = {} for iter, m in enumerate(GQModule.objects.all()): if iter % 1000 == 0: print iter, 'modules done' gse, gpl, num = m.split_full_name() key = gse + '_' + gpl if key not in gses: gses[key] = {} gses[key][num] = ToSymbolConversion.convert( m.species, 'entrez', m.entrez_ids).get_final_symbol_ids() # if len(gses[key][num]) != len(m.entrez_ids): # print '{}: {} != {}'.format(key, len(gses[key][num]), len(m.entrez_ids)) print 'Writing results to', outpath for gse in gses: out = open(here(outpath, gse + '.gmt'), 'w') for num in range(0, len(gses[gse])): out.write('{}\t{}\n'.format(num, ' '.join(gses[gse][num])))
def read_queries(path): queries = [l.strip().split('\t') for l in open(path).readlines()] return {q[0]: set(map(int, q[-1].split())) for q in queries} def read_modules(species): modules = dict( GQModule.objects.filter(species=species).values_list( 'full_name', 'entrez_ids')) for k in modules: modules[k] = set(modules[k]) return modules queries = read_queries(here(GSEA_SETS_PATH, 'hs.c7.queries.txt')) mm_gses = get_all_gses('mm') hs_gses = get_all_gses('hs') rt_gses = get_all_gses('rt') hs_gene2freq = get_gene_to_freq(db_version=2015, species='hs') mm_gene2freq = get_gene_to_freq(db_version=2015, species='mm') rt_gene2freq = get_gene_to_freq(db_version=2015, species='rt') hs_modules = read_modules('hs') mm_modules = read_modules('mm') rt_modules = read_modules('rt') class QueryResultItem:
import os from genequery.utils import here ALLOWED_HOSTS = ['*'] BASE_DIR = '/genequery/front-end/genequery-web/' DEBUG = True STATIC_ROOT = here(BASE_DIR, '..', 'static') STATIC_URL = '/genequery-web/static/' STATICFILES_DIRS = (here(BASE_DIR, 'static'), ) MEDIA_ROOT = BASE_DIR + '/media/' MEDIA_URL = '/media/' MEDIA_ARE_LOCAL = False # Supposed to be used for Bonferroni adjustment # MUST BE UPDATED ALONG WITH DB VARIABLES MODULES_COUNT = { 'hs': 117497, 'mm': 82371, 'rt': 13560, } BONFERRONI_ADJ_LOG_P_VALUE = { 'hs': 5.070026778096448, 'mm': 4.915774338435374, 'rt': 4.132259689531044, }
}, 'require_debug_true': { '()': 'django.utils.log.RequireDebugTrue' } }, 'handlers': { 'null': { 'level': 'DEBUG', 'class': 'logging.NullHandler', }, 'console': { 'level': 'DEBUG', 'class': 'logging.StreamHandler', 'formatter': 'verbose' }, 'common_file': { 'level': 'INFO', 'class': 'logging.handlers.RotatingFileHandler', 'filename': here(LOG_DIR, 'genequery.log'), 'maxBytes': 1024 * 1024 * 30, 'backupCount': 10, 'formatter': 'verbose', }, }, 'loggers': { 'genequery': { 'handlers': ['console', 'common_file'], 'level': 'DEBUG', }, } }