Ejemplo n.º 1
0
	def __init__( self, db_info, id, url_structure ):
		
		# initiate the thread
		Thread.__init__(self)
		
		# set the thread ID
		self.id = id
		
		# save the database connection and the class
		self.dbc = database()
		self.dbc = self.dbc.connect( db_info )
		self.db = database()
		
		# compile and save a regular expression that will match the torrent information URL
		self.torrent_url_pattern = re.compile( r'<div class="detName"><a href="([a-zA-Z0-9\_\-\(\)/\.\[\]]+)"', re.I )
		
		# save the page url structure ( e.g. http://thepiratebay.org/browse/101/TPAGE/3 )
		self.url_structure = url_structure
		
		# set the current page as the first page
		self.first_page()
		
		# pages to parse before we set this to sleep to stop the program
		self.max_parse = 100
		self.num_parsed = 0
Ejemplo n.º 2
0
	def __init__( self, db_info, url ):
		threading.Thread.__init__( self )
		
		self.dbc = database()
		self.dbc = self.dbc.connect( db_info )
		self.db = database()
		
		self.download_url_pattern = re.compile( r'<a href="http://([a-zA-Z0-9\_\-\(\)/\.\[\]]+).TPB.torrent" title="', re.I )
		
		self.url = url
Ejemplo n.º 3
0
import re
Ejemplo n.º 4
0
import random
Ejemplo n.º 5
0
config = ConfigParser.RawConfigParser()

config.read('./config/database.cfg')
config.read('./config/bot.cfg')

db_info = [
			config.get( 'database', 'host' ),
			config.get( 'database', 'port' ),
			config.get( 'database', 'user' ),
			config.get( 'database', 'passw' ),
			config.get( 'database', 'name' ),
			]

max_threads = int( config.get( 'bot', 'max_threads' ) )

db = database()
db = db.connect( db_info )

infinite = 1

while infinite == 1:
	count = 0
	threads = []
	rows = db.to_crawl.find( { "hash": 0 } ).limit( max_threads )
	if rows.count( 1 ) < max_threads:
		print "sleeping"
		time.sleep( 120 )
	else:
		for i in range( rows.count( 1 ) - 1  ):
			thread = get_site( rows[i]['site'] )( db_info, rows[i]['info_url'] )
			threads.append( thread )