Example #1
	def __init__( self, db_info, id, url_structure ):
		# initiate the thread
		# set the thread ID
		self.id = id
		# save the database connection and the class
		self.dbc = database()
		self.dbc = self.dbc.connect( db_info )
		self.db = database()
		# compile and save a regular expression that will match the torrent information URL
		self.torrent_url_pattern = re.compile( r'<div class="detName"><a href="([a-zA-Z0-9\_\-\(\)/\.\[\]]+)"', re.I )
		# save the page url structure ( e.g. http://thepiratebay.org/browse/101/TPAGE/3 )
		self.url_structure = url_structure
		# set the current page as the first page
		# pages to parse before we set this to sleep to stop the program
		self.max_parse = 100
		self.num_parsed = 0
Example #2
	def __init__( self, db_info, url ):
		threading.Thread.__init__( self )
		self.dbc = database()
		self.dbc = self.dbc.connect( db_info )
		self.db = database()
		self.download_url_pattern = re.compile( r'<a href="http://([a-zA-Z0-9\_\-\(\)/\.\[\]]+).TPB.torrent" title="', re.I )
		self.url = url
import re
import random
Example #5
config = ConfigParser.RawConfigParser()


db_info = [
			config.get( 'database', 'host' ),
			config.get( 'database', 'port' ),
			config.get( 'database', 'user' ),
			config.get( 'database', 'passw' ),
			config.get( 'database', 'name' ),

max_threads = int( config.get( 'bot', 'max_threads' ) )

db = database()
db = db.connect( db_info )

infinite = 1

while infinite == 1:
	count = 0
	threads = []
	rows = db.to_crawl.find( { "hash": 0 } ).limit( max_threads )
	if rows.count( 1 ) < max_threads:
		print "sleeping"
		time.sleep( 120 )
		for i in range( rows.count( 1 ) - 1  ):
			thread = get_site( rows[i]['site'] )( db_info, rows[i]['info_url'] )
			threads.append( thread )