def get_factor_info(self, curs, factor_table): """ 01-31-06 one typical external_database_links looks like: {"TRANSPATH: MO000024633","EMBL: X12549; DMASCT3","SWISSPROT: P09774; AST3_DROME","PIR: S01165; S01165","FLYBASE: FBgn0002561; l(1)sc"} only focus on EMBL or SWISSPROT """ sys.stderr.write("Getting factor info...") curs.execute( "select tf_acc, organism, external_database_links from %s \ where external_database_links is not null" % factor_table) rows = curs.fetchall() acc_tax_id2tf_acc = {} organism2tax_id = {} for row in rows: tf_acc, organism, external_database_links = row if organism in organism2tax_id: tax_id = organism2tax_id[organism] else: tax_id = get_tax_id_from_org(curs, organism) organism2tax_id[organism] = tax_id if tax_id: #not some weird name that don't have tax_id external_database_links = external_database_links[2:-2].split( '","') for external_database_link in external_database_links: xdb_name_acc_ls = external_database_link.split(':') xdb_name = xdb_name_acc_ls[0] if xdb_name == 'EMBL' or xdb_name == 'SWISSPROT': xdb_acc = xdb_name_acc_ls[1] xdb_acc = xdb_acc.split(';')[0].strip() key = (xdb_acc.upper(), tax_id) acc_tax_id2tf_acc[key] = tf_acc sys.stderr.write("Done.\n") return acc_tax_id2tf_acc
def get_factor_info(self, curs, factor_table): """ 01-31-06 one typical external_database_links looks like: {"TRANSPATH: MO000024633","EMBL: X12549; DMASCT3","SWISSPROT: P09774; AST3_DROME","PIR: S01165; S01165","FLYBASE: FBgn0002561; l(1)sc"} only focus on EMBL or SWISSPROT """ sys.stderr.write("Getting factor info...") curs.execute("select tf_acc, organism, external_database_links from %s \ where external_database_links is not null"%factor_table) rows = curs.fetchall() acc_tax_id2tf_acc = {} organism2tax_id = {} for row in rows: tf_acc, organism, external_database_links = row if organism in organism2tax_id: tax_id = organism2tax_id[organism] else: tax_id = get_tax_id_from_org(curs, organism) organism2tax_id[organism] = tax_id if tax_id: #not some weird name that don't have tax_id external_database_links = external_database_links[2:-2].split('","') for external_database_link in external_database_links: xdb_name_acc_ls = external_database_link.split(':') xdb_name = xdb_name_acc_ls[0] if xdb_name=='EMBL' or xdb_name=='SWISSPROT': xdb_acc = xdb_name_acc_ls[1] xdb_acc = xdb_acc.split(';')[0].strip() key = (xdb_acc.upper(), tax_id) acc_tax_id2tf_acc[key] = tf_acc sys.stderr.write("Done.\n") return acc_tax_id2tf_acc
def run(self): """ 02-01-06 """ (conn, curs) = db_connect(self.hostname, self.dbname) tax_id = get_tax_id_from_org(curs, self.organism) mt_id2no = get_mt_id2no(curs, self.matrix_table) prom_id2gene_id = self.get_prom_id2gene_id(curs, self.prom_seq_table, self.organism) self.parse_input_fname(curs, self.input_fname, self.p_value_cut_off, prom_id2gene_id, mt_id2no, tax_id, self.output_table) if self.commit: curs.execute("end")