예제 #1
0
    def get_factor_info(self, curs, factor_table):
        """
		01-31-06
			one typical external_database_links looks like:
			{"TRANSPATH: MO000024633","EMBL: X12549; DMASCT3","SWISSPROT: P09774; AST3_DROME","PIR: S01165; S01165","FLYBASE: FBgn0002561; l(1)sc"}
			
			only focus on EMBL or SWISSPROT 
		"""
        sys.stderr.write("Getting factor info...")
        curs.execute(
            "select tf_acc, organism, external_database_links from %s \
			where external_database_links is not null" % factor_table)
        rows = curs.fetchall()
        acc_tax_id2tf_acc = {}
        organism2tax_id = {}
        for row in rows:
            tf_acc, organism, external_database_links = row
            if organism in organism2tax_id:
                tax_id = organism2tax_id[organism]
            else:
                tax_id = get_tax_id_from_org(curs, organism)
                organism2tax_id[organism] = tax_id
            if tax_id:  #not some weird name that don't have tax_id
                external_database_links = external_database_links[2:-2].split(
                    '","')
                for external_database_link in external_database_links:
                    xdb_name_acc_ls = external_database_link.split(':')
                    xdb_name = xdb_name_acc_ls[0]
                    if xdb_name == 'EMBL' or xdb_name == 'SWISSPROT':
                        xdb_acc = xdb_name_acc_ls[1]
                        xdb_acc = xdb_acc.split(';')[0].strip()
                        key = (xdb_acc.upper(), tax_id)
                        acc_tax_id2tf_acc[key] = tf_acc
        sys.stderr.write("Done.\n")
        return acc_tax_id2tf_acc
	def get_factor_info(self, curs, factor_table):
		"""
		01-31-06
			one typical external_database_links looks like:
			{"TRANSPATH: MO000024633","EMBL: X12549; DMASCT3","SWISSPROT: P09774; AST3_DROME","PIR: S01165; S01165","FLYBASE: FBgn0002561; l(1)sc"}
			
			only focus on EMBL or SWISSPROT 
		"""
		sys.stderr.write("Getting factor info...")
		curs.execute("select tf_acc, organism, external_database_links from %s \
			where external_database_links is not null"%factor_table)
		rows = curs.fetchall()
		acc_tax_id2tf_acc = {}
		organism2tax_id = {}
		for row in rows:
			tf_acc, organism, external_database_links = row
			if organism in organism2tax_id:
				tax_id = organism2tax_id[organism]
			else:
				tax_id = get_tax_id_from_org(curs, organism)
				organism2tax_id[organism] = tax_id
			if tax_id:	#not some weird name that don't have tax_id
				external_database_links = external_database_links[2:-2].split('","')
				for external_database_link in external_database_links:
					xdb_name_acc_ls = external_database_link.split(':')
					xdb_name = xdb_name_acc_ls[0]
					if xdb_name=='EMBL' or xdb_name=='SWISSPROT':
						xdb_acc = xdb_name_acc_ls[1]
						xdb_acc = xdb_acc.split(';')[0].strip()
						key = (xdb_acc.upper(), tax_id)
						acc_tax_id2tf_acc[key] = tf_acc
		sys.stderr.write("Done.\n")
		return acc_tax_id2tf_acc
	def run(self):
		"""
		02-01-06
		"""
		(conn, curs) =  db_connect(self.hostname, self.dbname)
		tax_id = get_tax_id_from_org(curs, self.organism)
		mt_id2no = get_mt_id2no(curs, self.matrix_table)
		prom_id2gene_id = self.get_prom_id2gene_id(curs, self.prom_seq_table, self.organism)
		
		self.parse_input_fname(curs, self.input_fname, self.p_value_cut_off, prom_id2gene_id, mt_id2no, tax_id, self.output_table)
		if self.commit:
			curs.execute("end")
예제 #4
0
    def run(self):
        """
		02-01-06
		"""
        (conn, curs) = db_connect(self.hostname, self.dbname)
        tax_id = get_tax_id_from_org(curs, self.organism)
        mt_id2no = get_mt_id2no(curs, self.matrix_table)
        prom_id2gene_id = self.get_prom_id2gene_id(curs, self.prom_seq_table,
                                                   self.organism)

        self.parse_input_fname(curs, self.input_fname, self.p_value_cut_off,
                               prom_id2gene_id, mt_id2no, tax_id,
                               self.output_table)
        if self.commit:
            curs.execute("end")