def custom_db_analysis(): # Deal with a custom database file import os import glob import shutil from genewordsearch.DBBuilder import geneWordBuilder # Prep the database files for processing ip = str(request.environ['REMOTE_ADDR']) folder = os.path.join(app.config['UPLOAD_FOLDER'], ip) os.makedirs(folder, exist_ok=True) dbFiles = request.files.getlist('geneDBs') fileCount = len(dbFiles) fileNum = 0 for db in dbFiles: filename = secure_filename(db.filename) db.save(os.path.join(folder, (str(fileNum)+filename[-4:]))) fileNum += 1 fileList = glob.glob(folder+'/*') fileList.sort() # Pull and organize the rest of the database info headers = [] headtxt = [] delimiters = [] geneCols = [] desCols = [] fileNum = 0 while(fileNum < fileCount): headtxt.append(str(request.form['header'+str(fileNum)])) delimiters.append(str(request.form['delimiter'+str(fileNum)])) geneCols.append(str(request.form['geneCol'+str(fileNum)])) desCols.append(str(request.form['desCols'+str(fileNum)])) fileNum += 1 for header in headtxt: if(header =='y'): headers.append(True) else: headers.append(False) geneWordBuilder(ip,fileList,geneCols,desCols,delimiters,headers) shutil.rmtree(folder+'/') # Run the enrichment analysis genes = str(request.form['geneList']) probCutoff = float(request.form['probCut']) genes = re.split('\r| |,|\t|\n',genes) genes = list(filter((lambda x: x != ''),genes)) try: results = geneWordSearch(genes,ip,minChance=probCutoff) except KeyError: abort(400) ans = WordFreq.to_JSON_array(results[0]) shutil.rmtree('genewordsearch/databases/'+ip+'/') return jsonify(result=ans)
def custom_db_analysis(): # Deal with a custom database file # Prep the database files for processing ip = str(request.environ['REMOTE_ADDR']) folder = os.path.join(app.config['UPLOAD_FOLDER'], ip) os.makedirs(folder, exist_ok=True) dbFiles = request.files.getlist('geneDBs') fileCount = len(dbFiles) fileNum = 0 for db in dbFiles: filename = secure_filename(db.filename) db.save(os.path.join(folder, (str(fileNum) + filename[-4:]))) fileNum += 1 fileList = glob.glob(folder + '/*') fileList.sort() # Pull and organize the rest of the database info headers = [] headtxt = [] delimiters = [] geneCols = [] desCols = [] fileNum = 0 while (fileNum < fileCount): headtxt.append(str(request.form['header' + str(fileNum)])) delimiters.append(str(request.form['delimiter' + str(fileNum)])) geneCols.append(str(request.form['geneCol' + str(fileNum)])) desCols.append(str(request.form['desCols' + str(fileNum)])) fileNum += 1 for header in headtxt: if (header == 'y'): headers.append(True) else: headers.append(False) geneWordBuilder(ip, fileList, geneCols, desCols, delimiters, headers) shutil.rmtree(folder + '/') # Run the enrichment analysis genes = str(request.form['geneList']) probCutoff = float(request.form['probCut']) genes = re.split('\r| |,|\t|\n', genes) genes = list(filter((lambda x: x != ''), genes)) try: results = geneWordSearch(genes, ip, minChance=probCutoff) except KeyError: abort(400) ans = WordFreq.to_JSON_array(results[0]) shutil.rmtree('genewordsearch/databases/' + ip + '/') return jsonify(result=ans)
gwas = gwas_data_db[ont].get_data(cob=net) gwas_meta_db[ont][net]['windowSize'] = [] gwas_meta_db[ont][net]['flankLimit'] = [] for x in gwas['WindowSize'].unique(): gwas_meta_db[ont][net]['windowSize'].append(int(x)) for x in gwas['FlankLimit'].unique(): gwas_meta_db[ont][net]['flankLimit'].append(int(x)) # Find any functional annotations we have print('Finding functional annotations...') func_data_db = {} for func in co.available_datasets('RefGenFunc')['Name']: print('Processing annotations for {}...'.format(func)) func_data_db[func] = co.RefGenFunc(func) func_data_db[func].to_csv(os.path.join(conf['scratch'],(func+'.tsv'))) geneWordBuilder(func,[os.path.join(conf['scratch'],(func+'.tsv'))],[1],['2 end'],['tab'],[True]) # Find any GO ontologies we have for the networks we have print('Finding applicable GO Ontologies...') GOnt_db = {} for name in co.available_datasets('GOnt')['Name']: gont = co.GOnt(name) if gont.refgen.name not in GOnt_db: GOnt_db[gont.refgen.name] = gont # Generate in memory term lists print('Finding all available terms...') terms = {} for name,ont in onts.items(): terms[name] = {'data': [(term.id,term.desc,len(term.loci), len(ont.refgen.candidate_genes(term.effective_loci(window_size=50000))))
gwas_meta_db[ont][net]['overlapMethod'].append( str(x).strip().lower()) # Find any functional annotations we have print('Finding functional annotations...') func_data_db = {} for ref in co.Tools.available_datasets('RefGen')['Name']: refgen = co.RefGen(ref) if refgen.has_annotations(): print('Processing annotations for {}...'.format(ref)) func_data_db[ref] = refgen func_data_db[ref].export_annotations( os.path.join(conf['scratch'], (ref + '.tsv'))) if hasGWS: geneWordBuilder(ref, [os.path.join(conf['scratch'], (ref + '.tsv'))], [1], ['2 end'], ['tab'], [True]) # Find any GO ontologies we have for the networks we have print('Finding applicable GO Ontologies...') GOnt_db = {} for name in co.Tools.available_datasets('GOnt')['Name']: gont = co.GOnt(name) if gont.refgen.name not in GOnt_db: GOnt_db[gont.refgen.name] = gont # Generate in memory term lists print('Finding all available terms...') terms = {} for name, ont in onts.items(): terms[name] = []
# Find any functional annotations we have print("Finding functional annotations...") func_data_db = {} for ref in co.Tools.available_datasets("RefGen")["Name"]: refgen = co.RefGen(ref) if refgen.has_annotations(): print("Processing annotations for {}...".format(ref)) func_data_db[ref] = refgen func_data_db[ref].export_annotations( os.path.join(conf["scratch"], (ref + ".tsv")) ) if hasGWS: geneWordBuilder( ref, [os.path.join(conf["scratch"], (ref + ".tsv"))], [1], ["2 end"], ["tab"], [True], ) # Find any GO ontologies we have for the networks we have print("Finding applicable GO Ontologies...") GOnt_db = {} for name in co.Tools.available_datasets("GOnt")["Name"]: gont = co.GOnt(name) if gont.refgen.name not in GOnt_db: GOnt_db[gont.refgen.name] = gont # Generate in memory term lists print("Finding all available terms...") terms = {}
['Name','Description']].itertuples(index=False))} # Find all of the GWAS data we have available print('Finding GWAS Data...') gwas_data_db = {} for gwas in co.available_datasets('GWASData')['Name']: gwas_data_db[gwas] = co.GWASData(gwas) # Find any functional annotations we have print('Finding functional annotations...') func_data_db = {} for func in co.available_datasets('RefGenFunc')['Name']: print('Processing annotations for {}...'.format(func)) func_data_db[func] = co.RefGenFunc(func) func_data_db[func].to_csv(os.path.join(scratch_folder,(func+'.tsv'))) geneWordBuilder(func,[os.path.join(scratch_folder,(func+'.tsv'))],[1],['2 end'],['tab'],[True]) # Find any GO ontologies we have for the networks we have print('Finding applicable GO Ontologies...') GOnt_db = {} for name in co.available_datasets('GOnt')['Name']: gont = co.GOnt(name) if gont.refgen.name not in GOnt_db: GOnt_db[gont.refgen.name] = gont # Generate in memory term lists print('Finding all available terms...') terms = {} for ont in gwas_sets['data']: terms[ont[0]] = {'data': [(term.id,term.desc,len(term.loci), len(co.GWAS(ont[0]).refgen.candidate_genes(term.effective_loci(window_size=50000))))
print('Found gene names') # Find all of the GWAS data we have available print('Finding GWAS Data...') gwas_data_db = {} for gwas in co.available_datasets('GWASData')['Name']: gwas_data_db[gwas] = co.GWASData(gwas) # Find any functional annotations we have print('Finding functional annotations...') func_data_db = {} for func in co.available_datasets('RefGenFunc')['Name']: print('Processing annotations for {}...'.format(func)) func_data_db[func] = co.RefGenFunc(func) func_data_db[func].to_csv(os.path.join(scratch_folder,(func+'.tsv'))) geneWordBuilder(func,[os.path.join(scratch_folder,(func+'.tsv'))],[1],['2 end'],['tab'],[True]) # Find any GO ontologies we have for the networks we have print('Finding applicable GO Ontologies...') GOnt_db = {} for name in co.available_datasets('GOnt')['Name']: gont = co.GOnt(name) if gont.refgen.name not in GOnt_db: GOnt_db[gont.refgen.name] = gont # Generate in memory term lists print('Finding all available terms...') terms = {} for name,ont in onts.items(): terms[name] = {'data': [(term.id,term.desc,len(term.loci), len(ont.refgen.candidate_genes(term.effective_loci(window_size=50000))))