Example #1
0
	def __init__(self, inputFnameLs=None, **keywords):
		"""
		2008-07-27
			use option_default_dict
		2008-07-06
			use the firstline (header) of the fasta file to extract which chromosome. using filename is unreliable.
		"""
		AbstractDBInteractingJob.__init__(self, inputFnameLs=inputFnameLs, **keywords)
		#self.connectDB() called within its __init__()
		
		
		self.FigureOutTaxID_ins = FigureOutTaxID(db_user=self.db_user,
								db_passwd=self.db_passwd, hostname=self.hostname, dbname=self.dbname)
		if self.organism is not None:
			from annot.bin.codense.common import org_short2long, org2tax_id
			if org_short2long(self.organism):
				self.tax_id = org2tax_id(org_short2long(self.organism))
			else:
				self.tax_id = self.FigureOutTaxID_ins.returnTaxIDGivenSentence(self.organism)
		
		#self.p_chromosome = re.compile(r'[a-zA-Z]+_chr(\w+).fa')
		self.p_chromosome = re.compile(r'chromosome (\w+)[,\n\r]?')	#the last ? means [,\n\r] is optional
		self.p_acc_ver = re.compile(r'(\w+)\.(\d+)')
		
		self.parseFastaDescriptionDict = {1: self.parseFastaDescriptionForGenBank, \
										2: self.parseFastaDescriptionForWUSTLVervetScaffolds,\
										3: self.parseFastaDescriptionForFullVervetBACs,\
										4: self.parseFastaDescriptionForWUSTLVervetChromosomeGenome}
Example #2
0
	def __init__(self, inputFnameLs=None, **keywords):
		"""
		2008-07-27
			use option_default_dict
		2008-07-06
			use the firstline (header) of the fasta file to extract which chromosome. using filename is unreliable.
		"""
		AbstractDBInteractingJob.__init__(self, inputFnameLs=inputFnameLs, **keywords)
		#self.connectDB() called within its __init__()
		
		
		self.FigureOutTaxID_ins = FigureOutTaxID(db_user=self.db_user,
								db_passwd=self.db_passwd, hostname=self.hostname, dbname=self.dbname)
		if self.organism is not None:
			from annot.bin.codense.common import org_short2long, org2tax_id
			if org_short2long(self.organism):
				self.tax_id = org2tax_id(org_short2long(self.organism))
			else:
				self.tax_id = self.FigureOutTaxID_ins.returnTaxIDGivenSentence(self.organism)
		
		#self.p_chromosome = re.compile(r'[a-zA-Z]+_chr(\w+).fa')
		self.p_chromosome = re.compile(r'chromosome (\w+)[,\n\r]?')	#the last ? means [,\n\r] is optional
		self.p_acc_ver = re.compile(r'(\w+)\.(\d+)')
		
		self.parseFastaDescriptionDict = {1: self.parseFastaDescriptionForGenBank, \
										2: self.parseFastaDescriptionForWUSTLVervetScaffolds,\
										3: self.parseFastaDescriptionForFullVervetBACs,\
										4: self.parseFastaDescriptionForWUSTLVervetChromosomeGenome}
Example #3
0
	def __init__(self, hostname='zhoudb', dbname='mdb', schema='', inputfile=None, \
		organism='hs', type=1, debug=0, report=0, commit=0):
		self.hostname = hostname
		self.dbname = dbname
		self.schema = schema
		self.inputfile = inputfile
		self.organism = organism
		self.type = int(type)
		self.debug = int(debug)
		self.report = int(report)
		self.commit = int(commit)
		
		self.tax_id = org2tax_id(org_short2long(self.organism))
		self.parser_dict = {1: self.harbison2004_parse,
			2: self.cisred_parse,
			3: self.sgd_regulatory_parse,
			4: self.ucsc_tfbs_conserved_parse}
Example #4
0
	def __init__(self, hostname='dl324b-1', dbname='yhdb', schema='dbsnp', input_fname=None, \
		output_table=None, strain_info_table='strain_info', snp_locus_table='snp_locus', \
		organism='hs', type=1, debug=0, report=0, commit=0):
		self.hostname = hostname
		self.dbname = dbname
		self.schema = schema
		self.input_fname = input_fname
		self.output_table = output_table
		self.strain_info_table = strain_info_table
		self.snp_locus_table = snp_locus_table
		self.tax_id = org2tax_id(org_short2long(organism))
		self.type = int(type)
		self.debug = int(debug)
		self.report = int(report)
		self.commit = int(commit)
		
		self.snp_acc_category_pattern = re.compile("([a-zA-Z]*[\-]*[a-zA-Z]+)[\-_ ]*[\w]+")
Example #5
0
    def __init__(self, hostname='dl324b-1', dbname='yhdb', schema='dbsnp', input_fname=None, \
     output_table=None, strain_info_table='strain_info', snp_locus_table='snp_locus', \
     organism='hs', type=1, debug=0, report=0, commit=0):
        self.hostname = hostname
        self.dbname = dbname
        self.schema = schema
        self.input_fname = input_fname
        self.output_table = output_table
        self.strain_info_table = strain_info_table
        self.snp_locus_table = snp_locus_table
        self.tax_id = org2tax_id(org_short2long(organism))
        self.type = int(type)
        self.debug = int(debug)
        self.report = int(report)
        self.commit = int(commit)

        self.snp_acc_category_pattern = re.compile(
            "([a-zA-Z]*[\-]*[a-zA-Z]+)[\-_ ]*[\w]+")