Esempio n. 1
0
	def __init__(self, inputFnameLs=None, **keywords):
		"""
		2008-07-27
			use option_default_dict
		2008-07-06
			use the firstline (header) of the fasta file to extract which chromosome. using filename is unreliable.
		"""
		AbstractDBInteractingJob.__init__(self, inputFnameLs=inputFnameLs, **keywords)
		#self.connectDB() called within its __init__()
		
		
		self.FigureOutTaxID_ins = FigureOutTaxID(db_user=self.db_user,
								db_passwd=self.db_passwd, hostname=self.hostname, dbname=self.dbname)
		if self.organism is not None:
			from annot.bin.codense.common import org_short2long, org2tax_id
			if org_short2long(self.organism):
				self.tax_id = org2tax_id(org_short2long(self.organism))
			else:
				self.tax_id = self.FigureOutTaxID_ins.returnTaxIDGivenSentence(self.organism)
		
		#self.p_chromosome = re.compile(r'[a-zA-Z]+_chr(\w+).fa')
		self.p_chromosome = re.compile(r'chromosome (\w+)[,\n\r]?')	#the last ? means [,\n\r] is optional
		self.p_acc_ver = re.compile(r'(\w+)\.(\d+)')
		
		self.parseFastaDescriptionDict = {1: self.parseFastaDescriptionForGenBank, \
										2: self.parseFastaDescriptionForWUSTLVervetScaffolds,\
										3: self.parseFastaDescriptionForFullVervetBACs,\
										4: self.parseFastaDescriptionForWUSTLVervetChromosomeGenome}
Esempio n. 2
0
	def __init__(self, inputFnameLs=None, **keywords):
		"""
		2008-07-27
			use option_default_dict
		2008-07-06
			use the firstline (header) of the fasta file to extract which chromosome. using filename is unreliable.
		"""
		AbstractDBInteractingJob.__init__(self, inputFnameLs=inputFnameLs, **keywords)
		#self.connectDB() called within its __init__()
		
		
		self.FigureOutTaxID_ins = FigureOutTaxID(db_user=self.db_user,
								db_passwd=self.db_passwd, hostname=self.hostname, dbname=self.dbname)
		if self.organism is not None:
			from annot.bin.codense.common import org_short2long, org2tax_id
			if org_short2long(self.organism):
				self.tax_id = org2tax_id(org_short2long(self.organism))
			else:
				self.tax_id = self.FigureOutTaxID_ins.returnTaxIDGivenSentence(self.organism)
		
		#self.p_chromosome = re.compile(r'[a-zA-Z]+_chr(\w+).fa')
		self.p_chromosome = re.compile(r'chromosome (\w+)[,\n\r]?')	#the last ? means [,\n\r] is optional
		self.p_acc_ver = re.compile(r'(\w+)\.(\d+)')
		
		self.parseFastaDescriptionDict = {1: self.parseFastaDescriptionForGenBank, \
										2: self.parseFastaDescriptionForWUSTLVervetScaffolds,\
										3: self.parseFastaDescriptionForFullVervetBACs,\
										4: self.parseFastaDescriptionForWUSTLVervetChromosomeGenome}
Esempio n. 3
0
	def __init__(self, hostname='zhoudb', dbname='mdb', schema='', inputfile=None, \
		organism='hs', type=1, debug=0, report=0, commit=0):
		self.hostname = hostname
		self.dbname = dbname
		self.schema = schema
		self.inputfile = inputfile
		self.organism = organism
		self.type = int(type)
		self.debug = int(debug)
		self.report = int(report)
		self.commit = int(commit)
		
		self.tax_id = org2tax_id(org_short2long(self.organism))
		self.parser_dict = {1: self.harbison2004_parse,
			2: self.cisred_parse,
			3: self.sgd_regulatory_parse,
			4: self.ucsc_tfbs_conserved_parse}
Esempio n. 4
0
	def __init__(self, hostname='dl324b-1', dbname='yhdb', schema='dbsnp', input_fname=None, \
		output_table=None, strain_info_table='strain_info', snp_locus_table='snp_locus', \
		organism='hs', type=1, debug=0, report=0, commit=0):
		self.hostname = hostname
		self.dbname = dbname
		self.schema = schema
		self.input_fname = input_fname
		self.output_table = output_table
		self.strain_info_table = strain_info_table
		self.snp_locus_table = snp_locus_table
		self.tax_id = org2tax_id(org_short2long(organism))
		self.type = int(type)
		self.debug = int(debug)
		self.report = int(report)
		self.commit = int(commit)
		
		self.snp_acc_category_pattern = re.compile("([a-zA-Z]*[\-]*[a-zA-Z]+)[\-_ ]*[\w]+")
Esempio n. 5
0
    def __init__(self, hostname='dl324b-1', dbname='yhdb', schema='dbsnp', input_fname=None, \
     output_table=None, strain_info_table='strain_info', snp_locus_table='snp_locus', \
     organism='hs', type=1, debug=0, report=0, commit=0):
        self.hostname = hostname
        self.dbname = dbname
        self.schema = schema
        self.input_fname = input_fname
        self.output_table = output_table
        self.strain_info_table = strain_info_table
        self.snp_locus_table = snp_locus_table
        self.tax_id = org2tax_id(org_short2long(organism))
        self.type = int(type)
        self.debug = int(debug)
        self.report = int(report)
        self.commit = int(commit)

        self.snp_acc_category_pattern = re.compile(
            "([a-zA-Z]*[\-]*[a-zA-Z]+)[\-_ ]*[\w]+")
Esempio n. 6
0
 def __init__(self, hostname='zhoudb', dbname='mdb', schema='transfac', inputfile=None, \
  output_table=None, sequence_type=1, organism='hs', type=1, debug=0, report=0, commit=0):
     self.hostname = hostname
     self.dbname = dbname
     self.schema = schema
     self.inputfile = inputfile
     self.output_table = output_table
     self.sequence_type = int(sequence_type)
     self.organism = org_short2long(organism)
     self.type = int(type)
     self.debug = int(debug)
     self.report = int(report)
     self.commit = int(commit)
     self.parser_dict = {
         1: self.prom_seq_parse,
         2: self.factor_parse,
         3: self.matrix_parse,
         4: self.binding_site_parse,
         5: self.reference_parse,
         6: self.site_parse,
         7: self.cell_parse,
         8: self.fragment_parse,
         9: self.gene_parse,
         10: self.class_parse,
         11: self.binding_site_easy_parse
     }
     self.output_table_dict = {
         1: 'prom_seq',
         2: 'factor',
         3: 'matrix',
         4: 'binding_site',
         5: 'reference',
         6: 'site',
         7: 'cell',
         8: 'fragment',
         9: 'gene',
         10: 'class',
         11: 'binding_site'
     }
     self.pwm_line_pattern = re.compile(r'\d\d  ')
Esempio n. 7
0
	def __init__(self, hostname='zhoudb', dbname='mdb', schema='transfac', inputfile=None, \
		output_table=None, sequence_type=1, organism='hs', type=1, debug=0, report=0, commit=0):
		self.hostname = hostname
		self.dbname = dbname
		self.schema = schema
		self.inputfile = inputfile
		self.output_table = output_table
		self.sequence_type = int(sequence_type)
		self.organism = org_short2long(organism)
		self.type = int(type)
		self.debug = int(debug)
		self.report = int(report)
		self.commit = int(commit)
		self.parser_dict = {1: self.prom_seq_parse,
			2: self.factor_parse,
			3: self.matrix_parse,
			4: self.binding_site_parse,
			5: self.reference_parse,
			6: self.site_parse,
			7: self.cell_parse,
			8: self.fragment_parse,
			9: self.gene_parse,
			10: self.class_parse,
			11: self.binding_site_easy_parse}
		self.output_table_dict = {1: 'prom_seq',
			2: 'factor',
			3: 'matrix',
			4: 'binding_site',
			5: 'reference',
			6: 'site',
			7: 'cell',
			8: 'fragment',
			9: 'gene',
			10: 'class',
			11: 'binding_site'}
		self.pwm_line_pattern = re.compile(r'\d\d  ')