import contextlib import subprocess from scipy.stats import hypergeom from bson.json_util import dumps from operator import itemgetter import math from collections import defaultdict from app.GO_Parser import GOLocusParser import pandas as pd import app import genemania from fdr import fdr from status import Status log = app.get_logger('Gene Set Enrichment') @contextlib.contextmanager def mktemp(*args, **kwargs): d = tempfile.mkdtemp(*args, **kwargs) try: yield d finally: shutil.rmtree(d) def load_go_genes(): info = { 'database': 'go', 'collection': 'genes',
import os import sys import shutil import datetime import threading import app import ingest from util import save_file_metadata, add_id log = app.get_logger('dropbox') class Dropbox(threading.Thread): def __init__(self): threading.Thread.__init__(self) self.event = threading.Event() self.running = False self.files = {} def run(self): log.info('starting dropbox service') self.running = True # make dropbox directories, if necessary if not os.path.isdir(app.dropbox_path): os.makedirs(app.dropbox_path) log.info('created dropbox directory %s', app.dropbox_path) if not os.path.isdir(app.ingested_path): os.makedirs(app.ingested_path)
def __init__(self, table_name: str): self.Logger = app.get_logger(__name__) self.dynamoDb = boto3.resource("dynamodb") self.table = self.dynamoDb.Table(app.AWS_TABLE_NAME)
import sys import pymongo import requests import itertools import genemania from itertools import islice from app.util import set_status, create_edges_index, cleanup_edges from app.status import Status import app log = app.get_logger('humannet') def parse(columns, metadata, lines): status = Status('networks', logger=log).n(len(lines)).start() for idx, line in enumerate(lines): status.log(idx) tokens = line.split('\t') if not len(tokens) == len(columns) + 3: continue source = tokens[0] target = tokens[1] # humannet composite score #score = float(tokens[-1]) for column, token in itertools.izip(columns, tokens[2:-1]): try: # individual edge score
import matplotlib.pyplot as plt from scipy.stats import norm from keras.layers import (Input, Conv2D, Conv2DTranspose, LeakyReLU, Flatten, Dense, Dropout, BatchNormalization, Reshape, Activation, Lambda) from keras.models import Model, load_model from keras import backend as K from keras.utils import plot_model from keras.optimizers import Adam from keras.callbacks import ModelCheckpoint, LearningRateScheduler from app.data import load_celeb, load_celeb_attr from app import get_logger log = get_logger(__name__) class VarAutoencoderModel(object): def __init__(self, input_shape, learning_rate, use_batch_norm, use_dropout, r_loss_factor, z_dim): self.__input_shape = input_shape self.__learning_rate = learning_rate self.__z_dim = z_dim self.__shape_before_flattening = None self.__encoder_input_layer = None self.__encoder_output_layer = None self.__encoder = None
import pymongo from app import genemania import threading import multiprocessing from collections import defaultdict from operator import itemgetter import math import requests from app.diffusion.kernel_scipy import SciPYKernel from app import go from app.util import set_status, to_boolean, to_numeric import app log = app.get_logger('network analysis job queue') def run_job(job, event): ''' network analysis job, run as a separate process :param job: job object that contains a snapshot of the project :param event: used to inform parent thread of completion :return: all artifacts are saved to the database The job creates a network artifact, with values defined as follows: sources: { source1_id: source1, # file meta-data object source2_id: source2
from app import get_logger, get_config import math from flask import render_template, redirect, url_for, flash, request from flask_login import login_required, current_user from app import utils from app.models import CfgNotify from app.main.forms import CfgNotifyForm from . import main logger = get_logger(__name__) cfg = get_config() # 通用列表查询 def common_list(DynamicModel, view): # 接收参数 action = request.args.get('action') id = request.args.get('id') page = int(request.args.get('page')) if request.args.get('page') else 1 length = int(request.args.get('length')) if request.args.get('length') else cfg.ITEMS_PER_PAGE # 删除操作 if action == 'del' and id: try: DynamicModel.get(DynamicModel.id == id).delete_instance() flash('删除成功') except: flash('删除失败') # 查询列表 query = DynamicModel.select() total_count = query.count()
bottle.BaseRequest.MEMFILE_MAX = 1024 * 1024 import app from app.util import serialize, get_value from app.dropbox import dropbox api = Bottle() import nav.api api.merge(nav.api.api) import nav.AaronUtils api.merge(nav.AaronUtils.api) log = app.get_logger('api') # default to the network analysis app index page @api.get('/') def index(): redirect('/static/workflow/html/index.html') # generic API to serve any resource in the static directory @api.get('/static/<filepath:path>') def static(filepath): return static_file(filepath, root=app.static_path) # generic API for querying a specific mongo database/collection @api.get('/api/mongo/:database/:collection')
import sys import pymongo import requests import argparse from itertools import islice from app.util import set_status, create_edges_index, cleanup_edges from app.status import Status import app log = app.get_logger('genemania') def lookup_name(id, source='Gene Name'): ''' :param id: ensembl ID :param source: source (e.g., 'Gene Name') :return: name ''' c = pymongo.MongoClient().identifiers.genemania result = c.find_one({'preferred': id, 'source': source}) return None if result is None else result['name'] def lookup_id(name): ''' :param name: gene id, symbol, or synonym to find in the name column (case insensitive) :return: ensembl ID (or None) ''' c = pymongo.MongoClient().identifiers.genemania result = c.find_one({'NAME': name.upper()}) return None if result is None else result['preferred']
import sys import pymongo import requests import argparse from itertools import islice from app.util import set_status, create_edges_index from app.status import Status import app log = app.get_logger('genemania') def lookup_name(id, source='Gene Name'): ''' :param id: ensembl ID :param source: source (e.g., 'Gene Name') :return: name ''' c = pymongo.MongoClient().identifiers.genemania result = c.find_one({'preferred': id, 'source': source}) return None if result is None else result['name'] def lookup_id(name): ''' :param name: gene id, symbol, or synonym to find in the name column (case insensitive) :return: ensembl ID (or None) ''' c = pymongo.MongoClient().identifiers.genemania result = c.find_one({'NAME': name.upper()}) return None if result is None else result['preferred']
import pymongo from bson import ObjectId from bottle import Bottle, request, HTTPError, response import app from app.util import serialize, deserialize, set_status from app import genemania from bson.json_util import dumps log = app.get_logger('nav api') api = Bottle() @api.put('/api/nav/project') def create_project(): client = pymongo.MongoClient(app.mongodb_uri) project = { 'gene_list': [], 'include_neighbors': True, 'n_connected_neighbors': 20, 'n_hottest_neighbors': 20, 'do_heat_diffusion': False } set_status(project, 'created') project['_id'] = str(client.nav.projects.insert(project)) return serialize(project) @api.get('/api/nav/networks') def get_networks():
import os import time import pymongo from bson import ObjectId from bottle import request import app log = app.get_logger('util') def serialize(obj): if type(obj) in [list, set, tuple]: return [serialize(it) for it in obj] elif type(obj) == dict: for k, v in obj.iteritems(): obj[k] = serialize(v) return obj elif type(obj) == ObjectId: return str(obj) else: return obj def deserialize(obj): if type(obj) is list: return [deserialize(it) for it in obj] elif type(obj) == dict: for k, v in obj.iteritems(): if k == '_id': obj[k] = ObjectId(obj[k]) else:
import pymongo import math import glob import os import argparse import sys from itertools import islice from util import set_status, create_edges_index, cleanup_edges import genemania from status import Status import app log = app.get_logger('tcga') #create dictionary of mappings of genes to Ensembl IDs def build_mapping(file): id_to_symbol = dict() with open(file) as fid: for line in fid: target, src = line.split() id_to_symbol[src] = target symbol_to_id = genemania.id_lookup_table(id_to_symbol.values()) id_to_ensembl = { k: symbol_to_id[v] for k, v in id_to_symbol.iteritems() if v in symbol_to_id } return id_to_symbol, id_to_ensembl
import pymongo import math import glob import os import argparse import sys from itertools import islice from util import set_status, create_edges_index, cleanup_edges import genemania from status import Status import app log = app.get_logger('tcga') #create dictionary of mappings of genes to Ensembl IDs def build_mapping(file): id_to_symbol = dict() with open(file) as fid: for line in fid: target, src = line.split() id_to_symbol[src] = target symbol_to_id = genemania.id_lookup_table(id_to_symbol.values()) id_to_ensembl = {k: symbol_to_id[v] for k, v in id_to_symbol.iteritems() if v in symbol_to_id} return id_to_symbol, id_to_ensembl def parse_edges(dir, meta_id, id_to_symbol, id_to_ensembl, threshold): for filename in glob.glob(os.path.join(dir, '*.cor')):
bottle.BaseRequest.MEMFILE_MAX = 1024 * 1024 import app from app.util import serialize, get_value from app.dropbox import dropbox api = Bottle() import nav.api api.merge(nav.api.api) import nav.SearchRESTLayer api.merge(nav.SearchRESTLayer.api) log = app.get_logger('api') # default to the network analysis app index page @api.get('/') def index(): redirect('/static/workflow/html/index.html') # generic API to serve any resource in the static directory @api.get('/static/<filepath:path>') def static(filepath): return static_file(filepath, root=app.static_path) # generic API for querying a specific mongo database/collection
import sys import time import shutil import pymongo import tempfile import itertools import contextlib import subprocess from scipy.stats import hypergeom import app import genemania from fdr import fdr from status import Status log = app.get_logger('Gene Set Enrichment') @contextlib.contextmanager def mktemp(*args, **kwargs): d = tempfile.mkdtemp(*args, **kwargs) try: yield d finally: shutil.rmtree(d) def load_go_genes(): info = { 'database': 'go', 'collection': 'genes', 'url': 'http://geneontology.org/gene-associations/gene_association.goa_human.gz',
import sys import pymongo import requests import itertools import genemania from itertools import islice from app.util import set_status, create_edges_index, cleanup_edges from app.status import Status import app log = app.get_logger('humannet') def parse(columns, metadata, lines): status = Status('networks', logger=log).n(len(lines)).start() for idx, line in enumerate(lines): status.log(idx) tokens = line.split('\t') if not len(tokens) == len(columns) + 3: continue source = tokens[0] target = tokens[1] # humannet composite score #score = float(tokens[-1]) for column, token in itertools.izip(columns, tokens[2:-1]): try:
import sys import pymongo import requests import argparse from itertools import islice from app.util import set_status, create_edges_index from app.status import Status import app log = app.get_logger('icd10') def load_icd10(): client = pymongo.MongoClient() db = client.ontologies # collection ICD 10 codes icd10collection = db.icd10 #url = 'http://ec2-54-148-99-18.us-west-2.compute.amazonaws.com:9200/_plugin/head/DataSets/icd10_codes.txt' #log.info('reading network list from %s', url) #r = requests.get(url) f = open('/home/ec2-user/data/cytoscapenav/app/icd10_codes.txt', 'r') #f = open('/Users/aarongary/Development/DataSets/ICD_10/icd10_codes.txt', 'r') #r = requests.get(url) #lines = list(r.iter_lines())[1:] # ignore header line print('starting...')
import argparse from bson import ObjectId from gevent.pywsgi import WSGIServer from geventwebsocket.handler import WebSocketHandler import bottle from bottle import Bottle, redirect, request, response, static_file, request from bson.json_util import dumps import author_gene_clustering_module bottle.BaseRequest.MEMFILE_MAX = 1024 * 1024 import app api = Bottle() log = app.get_logger('api_alt') # generic API for returning the record count for a specific mongo database/collection @api.get('/ds/getmessage') def ds_getmessage(): return {'message': 'success'} # generic API for returning the record count for a specific mongo database/collection @api.get('/ds/getbpnet/:genes') def ds_get_bp_net(genes): genes_list = genes.split(',') graph_json = author_gene_clustering_module.analyze_AG_bipartite_network( genes_list)
def __init__(self): self.stock_list = [] self.Logger = app.get_logger(__name__) self.Symbols = self.get_stocks() datapoints = ['logo', 'company'] self.Datapoints = dict(zip(datapoints, datapoints))
from bson import ObjectId import pymongo from app import genemania import threading import multiprocessing from collections import defaultdict from operator import itemgetter import math from app.diffusion.kernel_scipy import SciPYKernel from app import go from app.util import set_status, to_boolean, to_numeric import app log = app.get_logger('network analysis job queue') def run_job(job, event): ''' network analysis job, run as a separate process :param job: job object that contains a snapshot of the project :param event: used to inform parent thread of completion :return: all artifacts are saved to the database The job creates a network artifact, with values defined as follows: sources: { source1_id: source1, # file meta-data object source2_id: source2
def __init__(self, datapoints: List[str] = None): self.Logger = app.get_logger(__name__) self.Symbols = self.get_stocks() self.datapoints = self._check_datapoints(datapoints) self.load_symbols_datapoints()
from app import get_logger, get_config import boto3 from utils import chunk_it, jsonify config = get_config() my_logger = get_logger() def to_log_file(data): if isinstance(data, list): for i in data: my_logger.info(i) else: my_logger.info(data) def to_firehose(data): conn = boto3.client('firehose', region_name="us-east-1", aws_access_key_id=config.AWS_ACCESS_KEY, aws_secret_access_key=config.AWS_SECRET_KEY) if isinstance(data, list): for group in chunk_it(data, 500): conn.put_record_batch( DeliveryStreamName=config.AWS_FIREHOSE_DELIVERY_STREAM_NAME, Records=[{ "Data": jsonify(i) } for i in group]) else: conn.put_record(
import os import sys import shutil import datetime import threading import app import ingest from util import save_file_metadata, add_id log = app.get_logger("dropbox") class Dropbox(threading.Thread): def __init__(self): threading.Thread.__init__(self) self.event = threading.Event() self.running = False self.files = {} def run(self): log.info("starting dropbox service") self.running = True # make dropbox directories, if necessary if not os.path.isdir(app.dropbox_path): os.makedirs(app.dropbox_path) log.info("created dropbox directory %s", app.dropbox_path) if not os.path.isdir(app.ingested_path): os.makedirs(app.ingested_path)
import re import csv import pymongo import itertools from bioservices import WikiPathways import app from util import save_file_metadata, split_id, is_numeric, is_boolean log = app.get_logger('ingest') def ingest(filepath): _id, _ = split_id(filepath) client = pymongo.MongoClient() meta = client.files.meta.find_one({'_id': _id}) if meta: parser = meta['parser'] if parser == 'tsv': data = ingest_tsv(filepath) else: raise NotImplementedError('unknown parser %s'.format(parser)) client.files[str(_id)].insert(data) return save_file_metadata(filepath, status='success', count=len(data))