def getAgent(self): log_.info("获取代理列表") self.cs.execute("SELECT sname,name,code from agent") agent_list = [] for row in self.cs: agent_list.append(AGENT(toStr(row[0]), toStr(row[1]), row[2])) return agent_list
def guess_sex(ref, data, sex_ratio_threshold=0.75): """ guessing the sex of individuals by comparing heterogametic chromosomes. By convention, all chromosomes are assumed to be diploid unless they start with an `X` or `Z` """ ref["heterogametic"] = [v[0] in "XZxz" for v in ref.index.get_level_values('chrom')] data["heterogametic"] = [v[0] in "XZxz" for v in data.index.get_level_values('chrom')] n_sites = ref.groupby(ref.heterogametic).apply(lambda df: len(df)) n_reads = data.groupby(data.heterogametic).apply(lambda df: np.sum(df.tref + df.talt)) cov = n_reads / n_sites del data["heterogametic"] del ref['heterogametic'] #no heteogametic data if True not in cov: return 'f' if cov[True] / cov[False] < sex_ratio_threshold: sex = "m" log_.info("guessing sex is male, X/A = %.4f/%.4f" % (cov[True], cov[False])) else: sex = "f" log_.info("guessing sex is female, X/A = %.4f/%.4f" % (cov[True], cov[False])) return sex
def guess_sex(data, sex_ratio_threshold=0.8): """ guessing the sex of individuals by comparing heterogametic chromosomes. By convention, all chromosomes are assumed to be diploid unless they start with an `X` or `W` """ data["heterogametic"] = [ v[0] in "XZxz" for v in data.index.get_level_values('chrom') ] cov = data.groupby( data.heterogametic).apply(lambda df: np.sum(df.tref + df.talt)) cov = cov.astype(float) #no heteogametic data if True not in cov: return 'f' cov[True] /= np.sum(data.heterogametic) cov[False] /= np.sum(data.heterogametic == False) del data["heterogametic"] if cov[True] / cov[False] < sex_ratio_threshold: sex = "m" log_.info("guessing sex is male, X/A = %.4f/%.4f" % (cov[True], cov[False])) else: sex = "f" log_.info("guessing sex is female, X/A = %.4f/%.4f" % (cov[True], cov[False])) return sex
def getUser(self): log_.info("获取用户列表") self.cs.execute("SELECT id,name,open_id,level,edit,address from user") userlist = [] for row in self.cs: userlist.append( USER(row[0], toStr(row[1]), toStr(row[2]), row[3], row[4], row[5])) return userlist
def post_url(): access_token, expires_in = get_token_info() print "token expires_in:%s" % expires_in timer = threading.Timer((expires_in - 200), post_url) timer.start() get_url_token[0] = "%s" % access_token.encode('utf-8') print access_token post_url_freshing[ 0] = 'https://api.weixin.qq.com/cgi-bin/message/custom/send?access_token=%s' % access_token log_.info("刷新token成功,%s" % post_url_freshing[0]) log_.info("刷新token:%s" % get_url_token[0])
def getTargetUser(self, open_id): try: log_.info("获取目标用户,open_id = " + open_id) sql = "SELECT id,name,level,edit,address from user WHERE open_id = \'" + open_id + '\'' self.cs.execute(sql) res = self.cs.fetchone() user = USER(res[0], toStr(res[1]), toStr(open_id), res[2], res[3], res[4]) return user except Exception as res: print res log_.warning("获取用户失败") return USER(9999, '未知用户', open_id, 0, 0, 0)
def updateAnswer(self, content): try: log_.info("更新回答") key = content.split('号')[0] + '号' if not msg.has_key(key): return 'nomember' sql = "update msg set answer = \'" + content + "\' where key = \'" + key + "\'" self.cs.execute(sql) self.conn.commit() msg[key].answer['answer'] = content return 'success' except Exception as res: print res log_.warning("更新回答失败:" + res) return res
def get_userInfo(open_id): url = "https://api.weixin.qq.com/cgi-bin/user/info?access_token=" + get_url_token[ 0] + "&openid=" + open_id + "&lang=zh_CN" log_.info("尝试获取用户信息:" + open_id) log_.info("url: " + url) res = requests.get(url) js = res.json() if "errmsg" not in js: return js[u"nickname"].encode('utf-8') else: print "Can not get user information" print js log_.error("获取用户信息失败,%s" % js) print "use token: " + get_url_token[0] return "未知用户"
def update_one_user(self, user_info): try: log_.info( "更新一个用户: name: %s open_id: %s level %d edit %d address %d" % (user_info.name, user_info.open_id, user_info.level, user_info.can_edit, user_info.edit_address)) sql = "replace into user(name,open_id,level,edit,address) values(\'" + user_info.name + "\',\'" + user_info.open_id + "\'," + str( user_info.level) + "," + str(user_info.can_edit) + "," + str( user_info.edit_address) + ")" self.cs.execute(sql) self.conn.commit() return True except Exception as res: print res log_.warning("更新用户失败:" + res) return False
def refresh(self): log_.info("刷新列表") ul = self.getUser() user.clear() manager.clear() for u in ul: user[u.open_id] = u if u.level == 9: manager[u.name] = u.open_id self.getTableName() msg.clear() ml = self.getMsg() for m in ml: msg[m.key] = m ag = self.getAgent() for a in ag: agents[a.code] = a
def getTableName(self): try: log_.info("获取目录结构") sql = "pragma table_info (\'msg\')" self.cs.execute(sql) res = self.cs.fetchall() for i in range(len(title)): del title[0] for rt in res: r = toStr(rt[1]) if 'l' in r or r in default_list or '预留' in r: continue else: title.append(r) except Exception as e: print e log_.critical("目录获取失败:" + e)
def getMsg(self): log_.info("获取消息列表") sql = "SELECT " for d in default_list: sql += d + "," for t in title: sql += t + "," sql += "l0,l1,l2,l3,l4,l5,l6,l7,l8,l9 from msg" self.cs.execute(sql) msglist = [] for row in self.cs: key = toStr(row[0]) answer_dict = dict() price = Xstr(row[-10:]) for a in range(1, len(default_list)): answer_dict[default_list[a]] = toStr(row[a]) for i in range(len(title)): answer_dict[title[i]] = toStr(row[i + len(default_list)]) msglist.append(MSG(key, answer_dict, price)) return msglist
def getAnswer(self, content): try: log_.info("获取回答") ask = content.split('+') sql = "SELECT answer" for t in title: sql += t sql += ",l0,l1,l2,l3,l4,l5,l6,l7,l8,l9 from msg WHERE key = \'" + ask[ 0] + '\'' self.cs.execute(sql) res = self.cs.fetchone() answer_dict = {'answer': toStr(res[0])} price = Xstr(res[-9:]) for i, t in title: answer_dict[t] = toStr(res[i + 1]) return MSG(ask[0], answer_dict, price) except Exception as res: print res log_.warning("回答获取失败:" + res) return res
def filter_ref(ref, states, filter_delta=None, filter_pos=None, filter_map=None): n_states = len(states) if filter_delta is not None: kp = np.zeros(ref.shape[0], np.bool) for i, s1 in enumerate(states): for j in range(i + 1, n_states): s2 = states[j] f1 = np.nan_to_num( ref[s1 + "_alt"] / (ref[s1 + "_alt"] + ref[s1 + "_ref"]) ) f2 = np.nan_to_num( ref[s2 + "_alt"] / (ref[s2 + "_alt"] + ref[s2 + "_ref"]) ) delta = np.abs(f1 - f2) kp = np.logical_or(kp, delta >= filter_delta) log_.info("filtering %s SNP due to delta", np.sum(1 - kp)) ref = ref[kp] if filter_pos is not None: chrom = ref.index.get_level_values('chrom').factorize()[0] pos = ref.index.get_level_values('pos').values kp = nfp(chrom, pos, ref.shape[0], filter_pos) log_.info("filtering %s SNP due to pos filter", np.sum(1 - kp)) ref = ref[kp] if filter_map is not None: chrom = ref.index.get_level_values('chrom').factorize()[0] pos = ref.index.get_level_values('map').values kp = nfp(chrom, pos, ref.shape[0], filter_map) log_.info("filtering %s SNP due to map filter", np.sum(1 - kp)) ref = ref[kp] return ref
def __init__(self): log_.info("连接数据库") self.connect()
def data2probs( df, IX, state_ids, cont_id=None, prior=None, cont_prior=(1e-8, 1e-8), ancestral=None, ): """create data structure that holds the genetic data creates an object of type `Probs` with the following entries: O : array[n_obs]: the number of alternative reads N : array[n_obs]: the total number of reads P_cont : array[n_obs]: the contaminant allele frequency lib[n_obs] : the library /read group of the observation alpha[n_snps, n_states] : the reference allele beta-prior beta[n_snps, n_states] : the alt allele beta-prior """ alpha_ix = ["%s_alt" % s for s in state_ids] beta_ix = ["%s_ref" % s for s in state_ids] snp_ix_states = set(alpha_ix + beta_ix) if cont_id is not None: cont = "%s_alt" % cont_id, "%s_ref" % cont_id snp_ix_states.update(cont) if ancestral is not None: anc = "%s_alt" % ancestral, "%s_ref" % ancestral snp_ix_states.update(anc) snp_df = df[list(snp_ix_states)] snp_df = snp_df[~snp_df.index.get_level_values('snp_id').duplicated()] #snp_df = df[list(snp_ix_states)].groupby(df.index.names).first() n_snps = len(snp_df.index.get_level_values('snp_id')) n_states = len(state_ids) if prior is None: # empirical bayes alpha = np.empty((n_snps, n_states)) beta = np.empty((n_snps, n_states)) if cont_id is not None: ca, cb = empirical_bayes_prior(snp_df[cont[0]], snp_df[cont[1]]) if ancestral is None: for i, (a, b, s) in enumerate(zip(alpha_ix, beta_ix, state_ids)): pa, pb = empirical_bayes_prior(snp_df[a], snp_df[b]) log_.info("[%s]EB prior [a=%.4f, b=%.4f]: " % (s, pa, pb)) alpha[:, i] = snp_df[a] + pa beta[:, i] = snp_df[b] + pb else: anc_ref, anc_alt = ancestral + "_ref", ancestral + "_alt" ref_is_anc = (snp_df[anc_ref] > 0) & (snp_df[anc_alt] == 0) alt_is_anc = (snp_df[anc_alt] > 0) & (snp_df[anc_ref] == 0) ref_is_der, alt_is_der = alt_is_anc, ref_is_anc anc_is_unknown = (1 - alt_is_anc) * (1 - ref_is_anc) == 1 for i, (a, b, s) in enumerate(zip(alpha_ix, beta_ix, state_ids)): pa, pb = empirical_bayes_prior(snp_df[a], snp_df[b]) log_.info("[%s]EB prior0 [anc=%.4f, der=%.4f]: " % (s, pa, pb)) alpha[:, i], beta[:, i] = snp_df[a], snp_df[b] alpha[anc_is_unknown, i] += pa beta[anc_is_unknown, i] += pb m_anc = pd.concat((ref_is_anc, alt_is_anc), 1) m_der = pd.concat((ref_is_der, alt_is_der), 1) ANC = np.array(snp_df[[b, a]])[m_anc] DER = np.array(snp_df[[b, a]])[m_der] pder, panc = empirical_bayes_prior(DER, ANC, True) log_.info("[%s]EB prior1 [anc=%.4f, der=%.4f]: " % (s, panc, pder)) alpha[alt_is_anc, i] += panc alpha[alt_is_der, i] += pder beta[ref_is_anc, i] += panc beta[ref_is_der, i] += pder P = Probs2( O=np.array(df.talt.values, np.int8), N=np.array(df.tref.values + df.talt.values, np.int8), P_cont=np.zeros_like(df.talt.values) if cont_id is None else np.array((df[cont[0]].values + ca) / (df[cont[0]].values + df[cont[1]].values + ca + cb)), alpha=alpha[IX.diploid_snps], beta=beta[IX.diploid_snps], alpha_hap=alpha[IX.haploid_snps], beta_hap=beta[IX.haploid_snps], lib=np.array(df.lib), ) return P else: if ancestral is None: pass else: # anc_ref, anc_alt = f"{ancestral}_ref", f"{ancestral}_alt" anc_ref, anc_alt = ancestral + "_ref", ancestral + "_alt" pa = df[anc_alt] + prior * (1 - 2 * np.sign(df[anc_alt])) pb = df[anc_ref] + prior * (1 - 2 * np.sign(df[anc_ref])) cont = "%s_alt" % cont_id, "%s_ref" % cont_id ca, cb = cont_prior print(alpha_ix) alpha = np.array(snp_df[alpha_ix]) + prior beta = np.array(snp_df[beta_ix]) + prior P = Probs2( O=np.array(df.talt.values, np.int8), N=np.array(df.tref.values + df.talt.values, np.int8), P_cont=None if cont_id is None else np.array( (df[cont[0]] + ca) / (df[cont[0]] + df[cont[1]] + ca + cb)), alpha=alpha[IX.diploid_snps], beta=beta[IX.diploid_snps], alpha_hap=alpha[IX.haploid_snps], beta_hap=beta[IX.haploid_snps], lib=np.array(df.lib), ) return P
def init_pars(state_ids, sex=None, F0=0.001, tau0=1, e0=1e-2, c0=1e-2, est_inbreeding=False, init_guess=None, do_hap=True, **kwargs): """initialize parameters returns a pars object """ h**o = [s for s in state_ids] het = [] hap = ["h%s" % s for s in h**o] for i, s in enumerate(state_ids): for s2 in state_ids[i + 1:]: het.append(s + s2) gamma_names = h**o + het if est_inbreeding: gamma_names.extend(hap) n_states = len(gamma_names) n_homo = len(h**o) n_het = len(het) n_hap = len(hap) alpha0 = np.array([1 / n_states] * n_states) alpha0_hap = np.array([1 / n_hap] * n_hap) trans_mat = np.zeros((n_states, n_states)) + 2e-2 trans_mat_hap = np.zeros((n_hap, n_hap)) + 2e-2 np.fill_diagonal(trans_mat, 1 - (n_states - 1) * 2e-2) np.fill_diagonal(trans_mat_hap, 1 - (n_hap - 1) * 2e-2) cont = defaultdict(lambda: c0) error = defaultdict(lambda: e0) if init_guess is not None: # guess = [i for i, n in enumerate(gamma_names) if init_guess in n] guess = [i for i, n in enumerate(gamma_names) if n in init_guess] log_.info("starting with guess %s " % guess) trans_mat[:, guess] = trans_mat[:, guess] + 1 trans_mat /= np.sum(trans_mat, 1)[:, np.newaxis] try: if len(F0) == n_homo: F = F0 elif len(F0) == 1: F = F0 * n_homo else: F = [F0] except TypeError: F = [F0] * n_homo try: if len(tau0) == n_homo: tau = tau0 elif len(F0) == 1: tau = tau0 * n_homo else: tau = [tau0] except TypeError: tau = [tau0] * n_homo if do_hap: return ParsHD( alpha0, alpha0_hap, trans_mat, trans_mat_hap, cont, error, F, tau, gamma_names, sex=sex, ) else: return Pars(alpha0, trans_mat, cont, error, F, tau, gamma_names, sex=sex)
def data2probs( df, IX, state_ids, cont_id=None, prior=None, cont_prior=(1e-8, 1e-8), ancestral=None, ancestral_prior = 0 ): """create data structure that holds the reference genetic data creates an object of type `Probs` with the following entries: O : array[n_obs]: the number of alternative reads N : array[n_obs]: the total number of reads P_cont : array[n_obs]: the contaminant allele frequency lib[n_obs] : the library /read group of the observation alpha[n_snps, n_states] : the reference allele beta-prior beta[n_snps, n_states] : the alt allele beta-prior input: df: merged reference and SNP data. has columns tref, talt with the read counts at each SNP, and "X_alt, X_ref" for each source pop IX: index object, with number of snps, number of reads, etc. state_ids: the references to keep prior: None for empirical bayes prior, otherwise prior to be added ancestral: ancestray allele """ alt_ix = ["%s_alt" % s for s in state_ids] ref_ix = ["%s_ref" % s for s in state_ids] snp_ix_states = set(alt_ix + ref_ix) if cont_id is not None: cont = "%s_alt" % cont_id, "%s_ref" % cont_id snp_ix_states.update(cont) if ancestral is not None: anc = "%s_alt" % ancestral, "%s_ref" % ancestral snp_ix_states.update(anc) snp_df = df[list(snp_ix_states)] snp_df = snp_df[~snp_df.index.get_level_values('snp_id').duplicated()] #snp_df = df[list(snp_ix_states)].groupby(df.index.names).first() n_snps = len(snp_df.index.get_level_values('snp_id')) n_states = len(state_ids) if prior is None: # empirical bayes, estimate from data alt_prior = np.empty((n_snps, n_states)) ref_prior = np.empty((n_snps, n_states)) if cont_id is not None: ca, cb = empirical_bayes_prior(snp_df[cont[0]], snp_df[cont[1]]) if ancestral is None: for i, (a, b, s) in enumerate(zip(alt_ix, ref_ix, state_ids)): pa, pb = empirical_bayes_prior(snp_df[a], snp_df[b]) log_.info("[%s]EB prior [a=%.4f, b=%.4f]: " % (s, pa, pb)) alt_prior[:, i] = snp_df[a] + pa ref_prior[:, i] = snp_df[b] + pb else: anc_ref, anc_alt = f"{ancestral}_ref", f"{ancestral}_alt" #set up vectors stating which allele is ancestral ref_is_anc = (snp_df[anc_ref] > 0) & (snp_df[anc_alt] == 0) alt_is_anc = (snp_df[anc_alt] > 0) & (snp_df[anc_ref] == 0) ref_is_der, alt_is_der = alt_is_anc, ref_is_anc anc_is_unknown = (1 - alt_is_anc) * (1 - ref_is_anc) == 1 for i, (alt_col, ref_col, s) in enumerate(zip(alt_ix, ref_ix, state_ids)): #1. set up base entries based on observed counts alt_prior[:, i] = snp_df[alt_col] ref_prior[:, i] = snp_df[ref_col] #2. where anc is unknown, add symmetric prior estimated from data pa, pb = empirical_bayes_prior(snp_df[alt_col], snp_df[ref_col]) log_.info("[%s]EB prior0 [anc=%.4f, der=%.4f]: " % (s, pa, pb)) alt_prior[anc_is_unknown, i] += pa ref_prior[anc_is_unknown, i] += pb #3. where anc is known, create indices m_anc = pd.concat((ref_is_anc, alt_is_anc), 1) m_der = pd.concat((ref_is_der, alt_is_der), 1) ANC = np.array(snp_df[[ref_col, alt_col]])[m_anc] DER = np.array(snp_df[[ref_col, alt_col]])[m_der] pder, panc = empirical_bayes_prior(DER, ANC, known_anc=True) panc += ancestral_prior log_.info("[%s]EB prior1 [anc=%.4f, der=%.4f]: " % (s, panc, pder)) alt_prior[alt_is_anc, i] += panc alt_prior[alt_is_der, i] += pder ref_prior[ref_is_anc, i] += panc ref_prior[ref_is_der, i] += pder assert np.all(df.tref.values + df.talt.values < 256) P = Probs2( O=np.array(df.talt.values, np.uint8), N=np.array(df.tref.values + df.talt.values, np.uint8), P_cont=np.zeros_like(df.talt.values) if cont_id is None else np.array( (df[cont[0]].values + ca) / (df[cont[0]].values + df[cont[1]].values + ca + cb) ), alpha=alt_prior[IX.diploid_snps], beta=ref_prior[IX.diploid_snps], alpha_hap=alt_prior[IX.haploid_snps], beta_hap=ref_prior[IX.haploid_snps], lib=np.array(df.lib), ) return P else: """ancestral allele contribution to prior the ancestral allele adds one pseudocount to the data """ if ancestral is None: prior_anc_alt, prior_anc_ref = np.zeros(1), np.zeros(1) else: anc_ref, anc_alt = f"{ancestral}_ref", f"{ancestral}_alt" prior_anc_alt = snp_df[anc_alt] * ancestral_prior prior_anc_ref = snp_df[anc_ref] * ancestral_prior cont = "%s_alt" % cont_id, "%s_ref" % cont_id ca, cb = cont_prior alt_prior = snp_df[alt_ix].to_numpy() + prior_anc_alt[:, np.newaxis] + prior ref_prior = snp_df[ref_ix].to_numpy() + prior_anc_ref[:, np.newaxis] + prior #breakpoint() assert df.tref.values + df.talt.values < 256 P = Probs2( O=np.array(df.talt.values, np.uint8), N=np.array(df.tref.values + df.talt.values, np.uint8), P_cont=0. if cont_id is None else np.array( (df[cont[0]] + ca) / (df[cont[0]] + df[cont[1]] + ca + cb) ), alpha=alt_prior[IX.diploid_snps], beta=ref_prior[IX.diploid_snps], alpha_hap=alt_prior[IX.haploid_snps], beta_hap=ref_prior[IX.haploid_snps], lib=np.array(df.lib), ) return P
def close(self): log_.info("数据库成功关闭") self.conn.close()