def _fetch_pcassay(self, args): """PubChem BioAssay cannot be obtained via efetch, use web URL to get one at a time instead""" if 'id' in args: s_id=args['id'] else: # retrieve id list from WebEnv+query_key s_id=self.esearch(args) if type(s_id) is str: s_id=s_id.split(",") out=[] for id in s_id: url="https://pubchem.ncbi.nlm.nih.gov/assay/assay.cgi?aid=%s&version=1.2&q=expdesc_xmldisplay" % str(id) trial=1 while True: self._check_interval() r = requests.get(url) self._flag_check() if not r.ok: if trail==self._giveup: util.error_msg('{r.reason} ({r.status_code}): {r.error}'.format( r=r, error=r.text)) else: time.sleep(10) else: x=re.sub(r'<PC-AssayDescription\s.*?>', '<dummy_tag><PC-AssayDescription>', re.sub(r'\n', '', r.content)) out.append(x+'</dummy_tag>') break trial+=1 return out
def get_con(s_name, auth_path=None, db=None): """For MySQL, one can specify a default database to connect to""" con = None auth_path = os.path.dirname(os.path.abspath( __file__)) + '/db.csv' if auth_path is None else auth_path csv_path = util.format_path(auth_path) t_db = pd.read_csv(csv_path) t_db.fillna('', inplace=True) t_db = t_db[t_db['ID'] == s_name] if len(t_db) == 0: util.error_msg('Database %s is not defined!' % s_name) one = t_db.iloc[0] s_db = db or one['DB'] if one['TYPE'] == 'MYSQL': import MySQLdb as mysql #print one['HOST'], one['USR'], one['PWD'], s_db con = mysql.connect(one['HOST'], one['USR'], one['PWD'], s_db) elif one['TYPE'] == 'POSTGRES': import pgdb # make sure you do: #module load postgresql/9.2.4 #export LD_LIBRARY_PATH=.:/tools/GNU/postgresql/9.2.4/lib/ con = pgdb.connect(one['CONNECT']) elif one['TYPE'] == 'ORACLE': import cx_Oracle as oracle con = oracle.connect(one['CONNECT']) else: util.error_msg('Unsupported database engine: %s' % one['TYPE']) return con
def _fetch_pccompound(self, args): """PubChem Compound cannot be obtained via efetch, use web URL to get one at a time instead""" if 'id' in args: s_id=args['id'] else: # retrieve id list from WebEnv+query_key s_id=self.esearch(args) if type(s_id) is str: s_id=s_id.split(",") out=[] for id in s_id: url="https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/%s/XML/?response_type=display" % str(id) trial=1 while True: self._check_interval() r = requests.get(url) self._flag_check() if not r.ok: if trail==self._giveup: util.error_msg('{r.reason} ({r.status_code}): {r.error}'.format( r=r, error=r.text)) else: time.sleep(10) else: x=re.sub(r'<Record\s.*?>', '<dummy_tag><Record>', re.sub(r'\n', '', r.content)) out.append(x+'</dummy_tag>') break trial+=1 return out
def sql_in_old1(s_sql_left, s_sql_right, S_id, num=1000, con=None, params_before=None, params_after=None): if con is None: util.error_msg('Database connection is not provided!') S_id = list(S_id) n = len(S_id) t = [] # somethings SQL contains its own parameters, we need to provide parameter before/after the # S_id parameters params_before = params_before or [] params_after = params_after or [] for i in range(0, n, num): j = min(n, i + num) s_id = ",".join(["?"] * (j - i)) t2 = from_sql(con, s_sql_left + s_id + s_sql_right, params=params_before + S_id[i:j] + params_after) #if t2 is not None and len(t2)>0: t.append(t2) t = pd.concat(t, axis=0, ignore_index=True) return t
def make_icon(s_png, width=100, height=100, s_out=None): import scipy.misc if width is None and height is None: util.error_msg('At least one must not be None: width and height') #M=mahotas.imread(s_png) M = scipy.misc.imread(s_png) h, w, z = M.shape if width is None: scale = height * 1.0 / h elif height is None: scale = width * 1.0 / w else: scale_h = height * 1.0 / h scale_w = width * 1.0 / w scale = min(scale_h, scale_w) h = int(h * scale + 0.5) w = int(w * scale + 0.5) Mr = scipy.misc.imresize(M, (h, w)) h2, w2, z2 = Mr.shape M2 = np.ones([height, width, 3], dtype='1u1') * 255 offset0 = (width - w2) / 2 offset1 = (height - h2) / 2 M2[offset1:offset1 + h2, offset0:offset0 + w2, 0] = Mr[:, :, 0] M2[offset1:offset1 + h2, offset0:offset0 + w2, 1] = Mr[:, :, 1] M2[offset1:offset1 + h2, offset0:offset0 + w2, 2] = Mr[:, :, 2] if s_out is None: s_out, s_ext = os.path.splitext(s_png) s_out += "_thumbnail.png" #mahotas.imsave(s_out, M2) scipy.misc.imsave(s_out, M2)
def __init__(self, BIN=None, TEMPLATE=None): self.BIN=BIN if BIN is not None else Circos.DEFAULT_BIN Circos.DEFAULT_TEMPLATE=os.path.join(os.path.dirname(__file__), "circos", "circos.conf.template") self.TEMPLATE=TEMPLATE if TEMPLATE is not None else Circos.DEFAULT_TEMPLATE if not os.path.exists(self.BIN): util.error_msg("Circos tool " + self.BIN + " does not exist!") if not os.path.exists(self.TEMPLATE): util.error_msg("Circos template " + self.TEMPLATE + " does not exist!")
def _add_attr(self, objs, property_name, S_value): """Used by to_list()""" if len(objs) != len(S_value): util.error_msg( 'Attribute: %s, length of Objects and S_value do not match: %d vs %d!' % (property_name, len(objs), len(S_value))) for i, x in enumerate(objs): objs[i][property_name] = S_value[i]
def search(query=None): """ Search Google Play Music for a given query. Keyword arguments: query=None: The search query. Returns: A dict of lists with keys 'songs', 'artists', and 'albums'. """ global content if query is None: # No argument. error_msg(outbar, 'Missing search query.') return # Fetch as many results as we can display depending on terminal height. limit = int((main.getmaxyx()[0] - 3) / 3) addstr(outbar, 'Searching for \'%s\'...' % query) result = api.search(query, max_results=limit) outbar.erase() # Remove trailing output. outbar.refresh() # 'class' => class of MusicObject # 'hits' => key in search result # 'key' => per-entry key in search result mapping = { 'songs': { 'class': Song, 'hits': 'song_hits', 'key': 'track', }, 'artists': { 'class': Artist, 'hits': 'artist_hits', 'key': 'artist', }, 'albums': { 'class': Album, 'hits': 'album_hits', 'key': 'album', } } content = {'songs': [], 'artists': [], 'albums': []} for k in content.keys(): result[k] = iter(result[mapping[k]['hits']]) # Create 'limit' of each type. for i in range(limit): for k in content.keys(): try: content[k].append(mapping[k]['class'](next( result[k])[mapping[k]['key']])) except StopIteration: pass return content
def wrapper(self, args): """Internal default wrapper function to be called if start(f=None)""" name = args[0] args = args[1:] if callable(name): return name(*args) else: if name not in self.registry: util.error_msg('Function %s were not registered!' % name) return self.registry[name](*args)
def get_con_info(s_name, auth_path=None): auth_path = os.path.dirname(os.path.abspath( __file__)) + '/db.csv' if auth_path is None else auth_path csv_path = util.format_path(auth_path) t_db = pd.read_csv(csv_path) t_db.fillna('', inplace=True) t_db = t_db[t_db['ID'] == s_name] if len(t_db) == 0: util.error_msg('Database %s is not defined!' % s_name) return None return t_db.iloc[0]
def add_column(s_file, R, s_name, l_separator=True): """Add an extra column using value R array to an existing heat map. s_file: str, file name without extension, it will modify .cdt and .atr R: array(int/float), values to add s_name: str, column name l_separator: bool, default True. If True, add a column of blank value to separate the new column from existing ones.""" if re.search('\.\w{3}$', s_file): s_file = s_file[:-4] if not os.path.exists(s_file + '.cdt'): util.error_msg("File not exist: " + s_file + ".cdt!") f = open(s_file + '.cdt') S = [] cnt = 0 while True: line = f.readline() if not line: break SS = line.strip().split("\t") if SS[0].startswith('GENE'): if l_separator: SS.append('') SS.append('%.2f' % R[cnt]) cnt += 1 elif SS[0] == 'GID': if l_separator: SS.append('separator') SS.append(s_name) elif SS[0] == 'AID': X = [int(re.sub(r'\D', '', x)) for x in SS if x.startswith('ARRY')] n_array = max(X) + 1 SS.append('ARRY%dX' % n_array) if l_separator: SS.append('ARRY%dX' % (n_array + 1)) elif SS[0] == 'EWEIGHT': if l_separator: SS.append('0') SS.append('0') S.append(SS) f.close() S = ["\t".join(X) for X in S] util.save_list(s_file + '.cdt', S, s_end="\n") if os.path.exists(s_file + '.atr'): S = util.read_list(s_file + '.atr') SS = S[-1].split("\t") n_node = int(re.sub(r'\D', '', SS[0])) + 1 S.append('NODE%dX\tNODE%dX\tARRY%dX\t0' % (n_node, n_node - 1, n_array)) if l_separator: S.append('NODE%dX\tNODE%dX\tARRY%dX\t0' % (n_node + 1, n_node, n_array + 1)) util.save_list(s_file + '.atr', S, s_end="\n")
def tissue_specific(self): #http://bioinfo.wilmer.jhu.edu/tiger/download/ref2tissue-Table.txt url='http://bioinfo.wilmer.jhu.edu/tiger/download/ref2tissue-Table.txt' # RefSeq Tissue(s) # http://www.biomedcentral.com/1471-2105/9/271 r = requests.post(url) if not r.ok: util.error_msg('Cannot fetch tissue specific data from JHU: %s', r.text) S=r.content.split('\n') for i,s in enumerate(S): S[i]=s.replace('\t', ',', 1).replace('\t', ' ') import cStringIO return pd.read_csv(cStringIO.StringIO("\n".join(S)))
def _fetch(self, path, args={}): """return results for a NCBI query, possibly from the cache :param: path: relative query path (e.g., 'einfo.fcgi') :param: args: dictionary of query args :rtype: xml string The args are joined with args required by NCBI (tool and email address) and with the default args declared when instantiating the client. """ url = EUtils.url_base + path if type(args) is dict: args = dict(self.def_args.items() + args.items()) trial = 1 while True: if self.debug: print "trial %d" % trial # else args is str, pass as it is req_int = self.request_interval() sleep_time = req_int - (time.clock() - self._last_request_clock) #print "Sleep: ", sleep_time if sleep_time > 0: if self.debug: print "sleep_time %d" % sleep_time time.sleep(sleep_time) r = requests.post(url, args) self._last_request_clock = time.clock() self._request_count += 1 if self.debug: print r.text if not r.ok: if trial == self._giveup: if any(bad_word in r.text for bad_word in ['<error>', '<ERROR>']): xml = ET.fromstring(r.text.encode('utf-8')) util.error_msg( '{r.reason} ({r.status_code}): {error}'.format( r=r, error=xml.find('ERROR').text)) else: util.error_msg( '{r.reason} ({r.status_code}): {r.error}'.format( r=r, error=r.text)) else: time.sleep(10) else: return r.content #.encode('utf-8') trial += 1
def get_date(self): t=self.db.from_sql('select distinct history from statistics order by history desc limit 0,2') print t self.d1=self.d2=None if len(t)==0: util.error_msg('No history data is found') elif len(t)==1: util.warn_msg('Only one history entry') self.d1=str(t.ix[0, 'history']) else: self.d1=str(t.ix[0, 'history']) self.d2=str(t.ix[1, 'history']) print self.d1 print self.d2
def write(fn=None): """ Write the current queue to a file. Keyword arguments: fn=None: File to be written to. File is stored at ~/.local/share/pmcli/playlists/. """ if not queue: # Can't save an empty queue. error_msg(outbar, 'Queue is empty.') return if fn is None: # No argument. error_msg(outbar, 'Missing argument to write.') return path = join(expanduser('~'), '.local', 'share', 'pmcli', 'playlists') if not exists(path): # No playlists directory. error_msg(outbar, 'Path to playlists does not exist.') elif exists(join(path, fn)): error_msg(outbar, 'Playist %s already exists.' % fn) else: # Write the playlist. with open(join(path, fn), 'a') as f: json.dump(queue, f) addstr(outbar, 'Wrote queue to %s.' % fn)
def task_factory(xe): class_name = xe.tag if class_name in globals(): task_class = globals()[class_name] else: module_name = class_name.lower() if os.path.exists( os.path.join(SyncDB.LIB_DIR(), module_name + ".py")): m = __import__('task_class.' + module_name, globals(), locals(), ['*'], -1) task_class = m.__dict__[class_name] globals()[class_name] = task_class else: print util.error_msg('Module not found: ' + class_name) return task_class(xe=xe)
def calc_node_info(self, node, degreeCutoff=None): k = self.degree(node) neighbors = self.neighbors(node) s_md5="" #print("::::",node, ":::", k, "::::") #sw=util.StopWatch() if (k < 2): nodeInfo = NodeInfo() if (k == 1): nodeInfo.coreLevel = 1 nodeInfo.coreDensity = 1.0 nodeInfo.density = 1.0 nodeInfo.numNodeNeighbors = len(neighbors); ######### nodeInfo.nodeNeighbors = neighbors; #### # why ignore neighbor when k==1 in the original code??? else: gpNodeNeighborhood = self.subnetwork(neighbors+[node]) #sw.check('subnetwork') if (gpNodeNeighborhood.is_empty()): util.error_msg("In calc_node_info(): gpNodeNeighborhood was None.") #calculate the node information for each node if self.l_cache: s_md5=gpNodeNeighborhood.node_MD5() if s_md5 in self.cache_info: #self.hit+=1 nodeInfo=self.cache_info[s_md5].clone() nodeInfo.nodeNeighbors=neighbors return nodeInfo nodeInfo = NodeInfo() #density nodeInfo.density = MCODE.calc_density(gpNodeNeighborhood, self.params['includeLoops']) #w.check('density') nodeInfo.numNodeNeighbors = len(neighbors) #calculate the highest k-core c = self.get_highest_KCore(gpNodeNeighborhood) #w.check('kcore') k = c['k'] gpCore = c['network'] nodeInfo.coreLevel = k if (gpCore is not None and not gpCore.is_empty()): nodeInfo.coreDensity = MCODE.calc_density(gpCore, self.params['includeLoops']) #w.check('cacl_density') #record neighbor array for later use in cluster detection step nodeInfo.nodeNeighbors = neighbors if degreeCutoff: nodeInfo.score_node(degreeCutoff) if self.l_cache: self.cache_info[s_md5]=nodeInfo return nodeInfo
def parse_uniprot_kinase(self): url = 'http://www.uniprot.org/docs/pkinfam' r = requests.post(url) if not r.ok: util.error_msg('Cannot fetch kinase members from UniProt: %s', r.text) S = r.content.split('\n') #s_file='pkinfam' #f=open(s_file) data = [] n = len(S) i = 0 while i < n: line = S[i] i += 1 if re.search(r"^=+", line): s_grp = S[i] i += 1 s = S[i] i += 1 if not re.search(r"^=+", line): util.error_msg("Parsing error, expecting: ====") s = S[i] i += 1 while re.search('^\W*$', s): s = S[i] i += 1 continue #if re.search('\w', s): # util.error_msg("Parsing error, expecting a blank line") while re.search('\w', s): rslt = re.search('_HUMAN\s+\(<a.+>(\w+)<\/a>\s+\)', s) if rslt is not None: data.append({ 'annotation_field1': rslt.groups()[0], 'content': s_grp }) s = S[i] i += 1 t_kinase = pd.DataFrame(data) t_kinase['gid'] = t_kinase.annotation_field1.apply( lambda x: self.uniprot2gid.get(x, 0)) t_kinase['tax_id'] = '9606' t_kinase = t_kinase[t_kinase['gid'] > 0].copy() t_kinase.to_csv(os.path.join(SyncDB.DOWNLOAD_DIR(), self.fn_dest_kinase), index=False) print "%d Kinase Proteins Fetched" % len(t_kinase)
def _batch_retrieve(self, action, args, count=0, func=None, retmax=10000): """Return dict {WebEnv, query_key}""" # action can be efetch or esummary # according to https://www.ncbi.nlm.nih.gov/books/NBK25499/ # maximumly allowed retmax is 10000 for efetch and esummary retstart = 0 if 'id' in args: count, s_id = self._format_ids(args['id']) args['id'] = s_id args = dict(self.def_args.items() + args.items()) out = self.epost(args) del args['id'] xml = ET.fromstring(out) args['WebEnv'] = xml.find('WebEnv').text args['query_key'] = xml.find('QueryKey').text else: if 'WebEnv' not in args and 'query_key' not in args: util.error_msg('Missing id, WebEnv, query_key!') #if action=='elink' and 'cmd' in args and args['cmd']=='neighbor_history': # # not very meaningful for our use, as we separate id by id=&id=..., # # it will return one query_key per input id # del args['cmd'] out = self._fetch( "/" + action + '.fcgi', dict(args.items() + [('retmax', retmax), ('retstart', 0)])) #if 'cmd' in args and args['cmd']=='neighbor_history': # xml=ET.fromstring(out) # webenv=xml.find('./LinkSet/WebEnv').text # query_key=xml.find('./LinkSet/LinkSetDbHistory/QueryKey').text # return {'WebEnv': webenv, 'query_key': query_key} if func is not None and callable(func): out = func(out) S_xml = [out] # if there are more entries than retmax, we need to make additional trip while (count > 0 and retstart + retmax < count): retstart += retmax print "Fetching batch: %d ..." % (retstart) out = self._fetch( "/" + action + '.fcgi', dict(args.items() + [('retmax', retmax), ('retstart', retstart)])) if func is not None and callable(func): out = func(out) S_xml.append(out) return S_xml
def to_list(self, S_attr=None): """Return a list of dict, each dict represents an object, each object contains keys specified by S_attr. Generally, this is the only method we need to call to convert XML into a list of dict.""" out = [{} for x in self.data] for x in S_attr: if getattr(self, x) is not None: S_value = getattr(self, x)() if type(S_value) is list: # for gene description, it's a list self._add_attr(out, x, S_value) elif type( S_value) is dict: # a dict where each value is a list # for pubmed journal, it's a list of dict for k, v in S_value.items(): self._add_attr(out, x + '.' + k, v) else: util.error_msg('Attribute %s not implemented!' % x) return out
def adjust_p(R_p, N=None, method="BH"): """Calculate FDR for multiple test. N is the total # of tests run, if not given, set to len(R_p) R_p: an array of p-values N: int, total number of tests run method: currently fixed to Benjamini and Hochberg method. Output has been valided with adjust.p in R""" l_old = False # old implementation, slower, keep in case there is bug, new code has been tested N = len(R_p) if N is None else N if method.upper() == "BONFERRONI": return np.clip(np.array(R_p) * N, 0.0, 1.0) elif method.upper() == "HOLM": n = len(R_p) t = pd.DataFrame({'p': R_p, 'q': R_p, 'I': list(range(len(R_p)))}) t.sort_values('p', ascending=True, inplace=True) t.index = range(n) if l_old: q = 0.0 for i in range(n): q = t.ix[i, 'q'] = min(max(q, t.ix[i, 'p'] * (N - i)), 1) else: q = np.clip(t.p.values * (N - np.arange(n)), 0.0, 1.0) q = np.maximum.accumulate(q) t['q'] = q t.sort_values('I', inplace=True) return t.q.values elif method.upper() in ("BH", "FDR"): n = len(R_p) t = pd.DataFrame({'p': R_p, 'q': R_p, 'I': list(range(len(R_p)))}) t.sort_values('p', ascending=False, inplace=True) t.index = range(n) if l_old: q = 1.0 for i in range(n): q = t.ix[i, 'q'] = min(q, t.ix[i, 'p'] * N * 1.0 / (len(t) - i)) else: q = np.clip(t.p.values * N * 1.0 / (n - np.arange(n)), 0.0, 1.0) q = np.minimum.accumulate(q) t['q'] = q t.sort_values('I', inplace=True) return t.q.values else: util.error_msg('Unsupported method: %s' % method)
def make_input(self, s_file='untitled', options=None): if self.table is None: util.error_msg('Clustering.make_input: missing Clustering.table!') S=self.table.header() S_up=[ s.upper() for s in S] opt=self.input_opt opt.update(options or {}) self.input_opt=opt S_miss=[s for s in opt['DATA_COLS'] if S.index(s)<0] if len(S_miss)>0: util.error_msg('Clustering.make_input: missing data column: '+", ".join(S_miss)) i_id=util.index(opt['ID'], S) if (i_id<0): i_id=S_up.index('GENE') if i_id<0: util.error_msg('Clustering.make_input: no column is specified as the ID!') opt['ID']=S[i_id] if type(opt['DESCRIPTION']) is str: opt['DESCRIPTION']=[opt['DESCRIPTION']] I_des=[util.index(s, S) for s in opt['DESCRIPTION'] if util.index(s, S)>=0] if (len(I_des)==0): I_des=[i_id] opt['DESCRIPTION']=[opt['ID']] else: for i in I_des: self.table.iloc[:, i]=util.sarray2sarray(self.table.iloc[:,i]) i_w=util.index(opt['WEIGHT_COL'], S) opt['DATA_COLS']=self.get_default_exp_cols(opt['DATA_COLS']) n_exp=len(opt['DATA_COLS']) if n_exp==0: util.error_msg('Clustering.make_input: no data column is specified!') S_out=[] S_out.append('Gene\tDescription\tWeight\t'+'\t'.join(opt['DATA_COLS'])) if opt['EXP_WEIGHT'] is None or len(opt['EXP_WEIGHT'])!=n_exp: S_out.append('Exp\t\t'+'\t1'*n_exp) else: S_out.append('Exp\t\t\t'+'\t'.join(util.rarray2sarray(opt['EXP_WEIGHT'], s_format='%g', s_null=1.0))) #df.fillna('', inplace=True) i_cols=[S.index(s) for s in opt['DATA_COLS']] if opt['GENE_WEIGHT'] is not None and len(opt['GENE_WEIGHT'])==len(self.table): if opt['WEIGHT_COL']=='': opt['WEIGHT_COL']='WEIGHT' self.table[opt['WEIGHT_COL']]=opt['GENE_WEIGHT'] for i in range(len(self.table)): s=str(self.table.iloc[i, i_id])+'\t'+":".join(self.table.iloc[i, I_des])+'\t'+str(self.table.iloc[i, i_w] if i_w>=0 else 1) R=np.array([x for x in self.table.iloc[i,i_cols]]) if opt['GENE_NORMALIZE'] and opt['NORMALIZE_METHOD']=='Z': valid=util.no_nan(R) if len(valid)>1: R=(R-np.mean(valid))/np.std(R, ddof=1) s+='\t'+'\t'.join(['' if pd.isnull(x) else str(x) for x in R]) S_out.append(s) if re.search(r'\.input$', s_file) is not None: s_file=re.sub(r'\.input$', '', s_file) util.save_list(s_file+".input", S_out, s_end='\n') self.input=s_file
def sql_in(s_sql_left, s_sql_right, S_id, num=1000, con=None, params_before=None, params_after=None): if con is None: util.error_msg('Database connection is not provided!') S_id = list(S_id) n = len(S_id) # in case it's multi-line SQL statement s_sql_left = re.sub('[\r\n]', ' ', s_sql_left) s_sql_right = re.sub('[\r\n]', ' ', s_sql_right) s_sql_right = re.sub(r'^\s*\)', '', s_sql_right) pat = re.compile(r'\s+([\w.]+)\s+IN\s*\(\s*$', re.I) m = re.search(pat, s_sql_left) if m is None: util.error_msg('Left SQL does not ends with IN statement: %s' % s_sql_left) s_sql_left = s_sql_left[:m.start()] + " " s_col = m.groups()[0] # somethings SQL contains its own parameters, we need to provide parameter before/after the # S_id parameters params_before = params_before or [] params_after = params_after or [] # If oracle, either use ToTable() or run multiple SQL queries # If multiple SQL are run, results do not support GROUP BY, ORDER BY, DISTINCT # as they are applied to individual SQL runs # For other SQL servers, results will be exact, via multiple OR statements S = [] for i in range(0, n, num): j = min(n, i + num) S.append(",".join(["?"] * (j - i))) s_id = "(" + s_col + " IN (" + (") OR " + s_col + " IN (").join(S) + "))" if db_type(con) == 'MYSQL' and not S: s_id = "(" + s_col + " IN (''))" t = from_sql(con, s_sql_left + s_id + s_sql_right, params=params_before + S_id + params_after) return t
def insert_pct(pct_slide, s_img, top=None): """My special slide layout, a title and one big picture""" (left, width, height) = (296260, 8551480, 5078313) if top is None: top = 1347965 + 25 * 914400 // 72 center_x = left + width // 2 center_y = top + height // 2 ratio0 = width * 1.0 / height if not os.path.exists(s_img): util.error_msg('File $s not exist!' % s_img) M = scipy.misc.imread(s_img) h, w, z = M.shape ratio = w * 1.0 / h if ratio > ratio0: height = int(width / ratio) top = center_y - height // 2 else: width = int(height * ratio) left = center_x - width // 2 pct_slide.shapes.add_picture(s_img, left, top, width, height)
def read_cdt(s_file): if not s_file.endswith('.cdt'): s_file += '.cdt' if not os.path.exists(s_file): util.error_msg("File not exist: " + s_file + "!") f = open(s_file) S_header = f.readline().strip().split("\t") i_w = util.index("GWEIGHT", S_header) i_gene = util.index('GENE', S_header) i_name = util.index('NAME', S_header) l_start = False R_exp = [] R_gene = [] data = [] offset = 0 while True: line = f.readline() if not line: break S = line.strip().split("\t") if S[0] == 'EWEIGHT': for i in range(1, len(S)): if S[i] != "": offset = i break tmp = [] if i_gene >= 0: tmp.append(S_header[i_gene]) if i_name >= 0: tmp.append(S_header[i_name]) S_header = tmp + S_header[offset:] R_exp = util.sarray2rarray(S[offset:]) if i_w < 0: i_w = offset - 1 l_start = True elif l_start: one = [] if i_gene >= 0: one.append(S[i_gene]) if i_name >= 0: one.append(S[i_name]) one.extend(util.sarray2rarray(S[offset:])) data.append(one) R_gene.append(float(S[i_w])) f.close() t = pd.DataFrame(data, columns=S_header) return (t, R_exp, R_gene)
def transition(input): """ Route input to the appropriate function. Arguments: input: User input. """ commands = { 'h': help, 'help': help, 'e': expand, 'expand': expand, 's': search, 'search': search, 'p': play, 'play': play, 'q': enqueue, 'queue': enqueue, 'w': write, 'write': write, 'r': restore, 'restore': restore, } arg = None if content is None: addstr(infobar, 'Now playing: None') try: command, arg = input.split(maxsplit=1) except ValueError: command = input if command in commands: commands[command](arg) if content is not None: display() else: error_msg(outbar, 'Nonexistent command.')
def table_exists(con, s_table, s_db=""): db_src = db_type(con) if db_src == 'MYSQL': s_sql = "select count(*) from information_schema.tables where table_name=?" param = [s_table] if s_db: s_sql += " and table_schema=?" param.append(s_db) t = from_sql(con, s_sql, param) elif db_src == 'ORACLE': t = from_sql( con, "select count(*) from user_tables where table_name=upper(?)", [s_table]) elif db_src == 'SQLITE': t = from_sql( con, "select count(*) from sqlite_master where type='table' and name=?", [s_table]) else: util.error_msg('Unsupported database engine!') return t.ix[0, 0] > 0
def sql_in_old2(s_sql_left, s_sql_right, S_id, num=1000, con=None, params_before=None, params_after=None): if con is None: util.error_msg('Database connection is not provided!') S_id = list(S_id) n = len(S_id) t = [] # in case it's multi-line SQL statement s_sql_left = re.sub('[\r\n]', ' ', s_sql_left) s_sql_right = re.sub('[\r\n]', ' ', s_sql_right) s_sql_right = re.sub(r'^\s*\)', '', s_sql_right) pat = re.compile(r'\s+([\w.]+)\s+IN\s*\(\s*$', re.I) m = re.search(pat, s_sql_left) if m is None: util.error_msg('Left SQL does not ends with IN statement: %s' % s_sql_left) s_sql_left = s_sql_left[:m.start()] + " " s_col = m.groups()[0] # somethings SQL contains its own parameters, we need to provide parameter before/after the # S_id parameters params_before = params_before or [] params_after = params_after or [] S = [] for i in range(0, n, num): j = min(n, i + num) S.append(",".join(["?"] * (j - i))) s_id = "(" + s_col + " IN (" + (") OR " + s_col + " IN (").join(S) + "))" #s_id = "(" + " OR ".join(["("+s_col + " IN (" + x +"))" for x in S]) + ")" #print s_sql_left+s_id+s_sql_right t = from_sql(con, s_sql_left + s_id + s_sql_right, params=params_before + S_id + params_after) return t
def __init__(self, input='', table=None, input_options=None, cluster_options=None, user_hybrid = None): if user_hybrid is not None: user_hybrid_path = user_hybrid.split()[-1] if os.path.exists(user_hybrid_path): Clustering.BIN_HYB=user_hybrid else: util.error_msg("Clustering tool " + user_hybrid_path + " does not exist!") self.input_opt={} self.cluster_opt={} self.input='' self.table=None if input!='': if re.search(r'\.input$', input) is not None: self.input=re.sub(r'\.input$', '', s_input) else: self.input=input input_options = input_options or {} cluster_options = cluster_options or {} self.input_opt=Clustering.DEFAULT_INPUT_OPT.copy() self.input_opt.update(input_options) if 'EXP_WEIGHT' in cluster_options: util.error_msg('Clustering.__init__: EXP_WEIGHT has been moved from cluster_options into input_options!') if 'DATA_COLS' in cluster_options: util.error_msg('Clustering.__init__: DATA_COLS should be into input_options, not cluster_options!') self.cluster_opt=Clustering.DEFAULT_CLUSTER_OPT.copy() self.cluster_opt.update(cluster_options) if table is not None: self.table=table if self.table.col_type(self.input_opt['ID'])!='s': self.table[self.input_opt['ID']]=self.table[self.input_opt['ID']].astype(str) if input=='' and table is None: util.error_msg('Clustering.__init__: Missing both input and table!') if type(self.input_opt['DESCRIPTION']) is str: self.input_opt['DESCRIPTION']=[self.input_opt['DESCRIPTION']] R_w=self.input_opt['EXP_WEIGHT'] #R_w=input_options['EXP_WEIGHT'] if R_w is not None and len(R_w)>0: R_w=util.sarray2rarray(R_w) # cast to np array if np.allclose(R_w, 1.0, atol=1e-5): input_options['EXP_WEIGHT']=None else: input_options['EXP_WEIGHT']=R_w R_w=self.input_opt['GENE_WEIGHT'] #R_w=input_options['GENE_WEIGHT'] if R_w is not None and len(R_w)>0: R_w=util.sarray2rarray(R_w) # cast to np array if np.allclose(R_w, 1.0, atol=1e-5): input_options['GENE_WEIGHT']=None else: input_options['GENE_WEIGHT']=R_w
def play(arg=None): """ Play a MusicObject or the current queue. Keyword arguments: arg=None: A number n to play item n, 's' to play the queue in shuffle mode, or None to play the current queue in order. """ global content if arg is None or arg is 's': if not queue: # Can't play an empty queue. error_msg(outbar, 'The queue is empty.') else: # Play the queue. if arg is 's': # Shuffle. queue.shuffle() content = queue.collect() display() addstr(outbar, '[spc] pause [q] stop [n] next [9-0] volume') queue.play(infobar) outbar.erase() # Remove trailing output. outbar.refresh() elif content is None: # Nothing to play. error_msg(outbar, 'Wrong context for play.') else: try: num = int(arg) except ValueError: # arg needs to be an int if it isn't 's'. error_msg(outbar, 'Invalid argument to play.') else: opt = get_option(num) if opt is not None: # Valid input. addstr(outbar, '[spc] pause [q] stop [n] next [9-0] volume') opt.play(infobar) addstr(infobar, 'Now playing: None') outbar.erase() outbar.refresh() else: # num out of range. error_msg( outbar, 'Invalid number. Valid between 1-%d' % sum([len(content[k]) for k in content.keys()]))