예제 #1
0
	def writeList(self):   
		f = open('tmp/tmp.tr','w')   
		f.writelines(self.list)   
		log.add_log(log.g_logger.info('完成目录树,保存至tmp/tmp.tr文件中'))
		f.close()
		for line in self.list:
			print line,
예제 #2
0
	def find_href(self, tbname, colval):
		try:
			ret = self.find_col(comm.site2db(tbname), 'href', colval)
			return ret
		except ValueError:
			log.add_log(log.g_logger.error(ValueError))
			return []
예제 #3
0
	def show_tree(self, dbfile):
		log.add_log(log.g_logger.info('正在生成目录树...'))
		tr_db = os.path.splitext(dbfile)[0]+'_tr.db'
		self.ld = LocalData(tr_db)
		par_id = 0
		for tb in self.ld.all_table()[0]:
			self.getDirList(par_id, tb)
		self.ld.close_db()
		self.writeList()
예제 #4
0
	def add_data(self, ctbox, **data):
		log.add_log(log.g_logger.info('页面数据加入数据库'))
		site = comm.site2db(data['site'])
		if not self.find_table(site):
			self.__create__(site)
		if not self.find_col(site, 'href', data['href']).__len__():
			reid = self.__insert__(**data)
		else:
			reid = self.__update__(**data)
		data = {'href':data['href']}
		ctbox.add_data(**data)
예제 #5
0
	def end_data(self):
		'''工作结束时将内存数据库保存到本地'''
		log.add_log(log.g_logger.info('将内存中的数据保存到本地数据库中'))
		str_sql = self.get_men_script().getvalue()
		#import pdb
		#pdb.set_trace()
		self.cur_mem.close()
		self.__close__(self.conn_mem)
		#本地数据库
		try:
			os.remove(self.dbfile)
		except:
			pass
		conn_file = sqlite3.connect(self.dbfile)
		cur_file = conn_file.cursor()
		cur_file.executescript(str_sql)
		cur_file.close()
		self.__close__(conn_file)
예제 #6
0
	def save_local(self,**select):
		'''参数格式
		select = {'dbfile':'test.db','tbname':'www_baidu_com','href':'http://www.baidu.com/1'}'''
		log.add_log(log.g_logger.info('正在将数据库中数据转换到本地目录下'))
		if select['href'] is not None:
			self.save_file(self.show_href(select['tbname'], select['href']))
			return
		if select['tbname'] is not None:
			self.save_table(select['tbname'])
			return
		if select['dbfile'] is not None:
			try:
				for tb in self.all_table()[0]:
					self.save_table(tb)
			except:
				pass
		self.close_db()
		log.add_log(log.g_logger.info('完成转换:%s' % os.path.abspath('local/'+''.join(select['dbfile'].split('.')[:-1]))))
		return
예제 #7
0
	def add_data(self, **data):
		'''传入href'''
		try:
			site = self.get_scheme_netloc_path_(data['href']).netloc
			self.create_tree(site)
		except:
			pass
		href = self.split_data(data['href'])
		log.add_log(log.g_logger.debug('分割url '+str(href)))
		#import pdb
		#pdb.set_trace()
		for i in range(len(href)):
			if i == 0:
				self.par_id = 0
			log.add_log(log.g_logger.debug('加入路径'+str(i)+str(href[i])))
			d_hash = comm.get_hash(href[i])
			try:
				if d_hash not in self.list_data:
					#未加入的路径
					#test = self.dbTree.find_href(site, href[i])
					#print test
					#if href[i] not in test:
					self.par_id = self.add_node(site, href[i], i,self.par_id)
				else:
					self.par_id = self.dbTree.find_id(comm.site2db(site), href[i])
			except:
				pass
			log.add_log(log.g_logger.debug(str(self.par_id)+'父节点'))
예제 #8
0
	def getDirList(self, row_id, tbname):   
		files = self.list_dir(row_id,tbname)   
		fileNum = self.getCount(row_id, tbname)  
		tmpNum = 0  
		log.add_log(log.g_logger.debug(files))
		for file in files: 
			myfile = self.find_id(tbname,file['path'])
			size = self.getCount(myfile, tbname)
			file = file['path']
			if not size:   
				tmpNum = tmpNum +1  
				if (tmpNum != fileNum):  
					self.list.append(str(self.SPACE) + "|--" + file + "\n")  
				else:  
					self.list.append(str(self.SPACE) + "`--" + file + "\n")  
			if size:   
				self.list.append(str(self.SPACE) + "|--" + file + "\n")   
				# change into sub directory  
				self.SPACE = self.SPACE + "|   "   
				self.getDirList(myfile, tbname)   
				# if iterator of sub directory is finished, reduce "│  "   
				self.SPACE = self.SPACE[:-4]   
		return self.list   
예제 #9
0
	def test(self):
		#'''遍历所有表名'''
		tbname = '172_4_16_168'
		str_sql = '''SELECT name FROM sqlite_master WHERE type='table' order by name'''
		log.add_log(log.g_logger.debug(self.__cmd__(str_sql)))
		#'''显示这张表所有数据'''
		str_sql = '''SELECT * FROM '%s' ''' % (tbname)
		#print str_sql
		log.add_log(log.g_logger.debug(self.__cmd__(str_sql)))
		str_sql = "PRAGMA table_info('%s')" % (tbname)
		log.add_log(log.g_logger.debug(self.__cmd__(str_sql)))
예제 #10
0
	def add_node(self,site, data, deep, par_id):
		log.add_log(log.g_logger.info('加入目录树结点'))
		cid = self.insert_data(site, data,deep, par_id)
		self.list_data.append(comm.get_hash(data))
		return cid
예제 #11
0
	#import time
	#time.sleep(1)
	db.end_data()
	ctbox.dbTree.end_data()

def test_tree():
	'''将数据库中href,site,id'''
	ctbox = CTBox('test_tr.db')
	data = {'href':'http://172.4.16.168/wp-login.php'}
	ctbox.add_data(**data)

def test_local(dbfile):
	ld = LocalData(dbfile)
	#print ld.all_table()
	#print ld.show_table('www_baidu_com')
	#print ld.show_group('www_baidu_com')
	#print ld.show_href('www_baidu_com', 'http://www.baidu.com/1')
	cmd = {'dbfile':ld.dbfile,'href':None,'tbname':None}
	ld.save_local(**cmd)

if __name__=='__main__':
	pass
	level = log.log_level.get(5)+':'+log.log_level.get(5)
	log.set_logger(filename='test.log', level=level)
	test_db()
	test_tree()
	test_local('test.db')
	d = dir()
	d.show_tree('test')
	#d.read_lines('job.asp',3)