Esempio n. 1
0
class SearchFlightPage(object):

    __url = "http://blazedemo.com/"

    def open(self):
        self._web.open(self.__url)

    def __init__(self, browser):
        self._web = Web(browser)

    def select_departure_city(self, city):
        time.sleep(5)
        self._web.get_web_element_by_xpath(
            "//select[@name='fromPort']/option[@value='{}']".format(
                city)).click()

    def select_destination_city(self, city):
        time.sleep(5)
        self._web.get_web_element_by_xpath(
            "//select[@name='toPort']/option[@value='{}']".format(
                city)).click()

    def search_for_flights(self):
        time.sleep(5)
        self._web.get_web_element_by_xpath("//input[@type='submit']").click()

    def get_found_flights(self):
        time.sleep(5)
        return self._web.get_web_elements_by_xpath(
            "//table[@class='table']/tbody/tr")

    def close(self):
        self._web.close_all()
def glasshammers_form_test():
    rando = Random()
    name = "DJdjango"
    email = "*****@*****.**"
    phone = "1230984756"
    inquiries = ["Customer Support","Commission","Questions","Comments"]
    reason = rando.choice(inquiries)
    message = "callllvinnnn!!"

    web = Web()
    web.glass_hammers_form(name,email,phone,reason,message)
Esempio n. 3
0
	def __init__(self, url, debugging=True, location=None):
		self.DB = None
		if MongoClient:
			self.DB = MongoClient()
			 
		self.debugging = debugging
		self.web = Web(debugging=self.debugging) # Web object for downloading/parsing
		self.base_dir = RIP_DIRECTORY
		if not os.path.exists(self.base_dir):
			os.mkdir(self.base_dir)
		self.original_url = url
		#self.debug('class: %s' % self.__class__.__name__)
		#self.debug('basesite.__init__init__ url:' + url)
		self.imgsrcpwd = ""
		self.username = ""
		if 'pwd=' in url:
			self.imgsrcpwd = url[url.find('pwd')+4:]
			#self.debug('imgsrcpwd:%s'%self.imgsrcpwd)
		self.title=""
		self.url = self.sanitize_url(url)
		# Directory to store images in
		galldir = self.get_gallery_dir(self.url)
		if galldir == '':
			galldir = self.get_dir(self.url)
		self.working_dir  = '%s%s%s' % (self.base_dir, os.sep, galldir)
		self.max_threads  = MAX_THREADS
		self.thread_count = 0
		self.image_count  = 0
		self.max_images   = MAX_IMAGES
		self.logfile      = '%s%s%s' % (self.working_dir, os.sep, self.get_gallery_dir(self.url) + LOG_NAME)
		self.first_log    = True
Esempio n. 4
0
	def __init__(self, url, urls_only=False):
		self.web = Web() # Web object for downloading/parsing
		self.base_dir = RIP_DIRECTORY
		if not os.path.exists(self.base_dir):
			os.mkdir(self.base_dir)
		self.url = self.sanitize_url(url)
		# Directory to store images in
		self.working_dir  = '%s%s%s' % (self.base_dir, os.sep, self.get_dir(self.url))
		self.max_threads  = MAX_THREADS
		self.thread_count = 0
		self.image_count  = 0
		self.max_images   = MAX_IMAGES
		self.logfile      = '%s%s%s' % (self.working_dir, os.sep, LOG_NAME)
		self.first_log    = True
		self.urls_only    = urls_only
Esempio n. 5
0
from Web import Web

url = 'http://www.qqride.com/category/scheduled/page/1/'

cnrun = Web(url, 'site-main', 'entry-title', 'single-content')

#print(cnrun.soup)

cnrun.getlist()


def p(L):
    return 'http://www.chinarun.com' + L


#cnrun.map(p)


def getGen():
    return cnrun.process()


if __name__ == "__main__":
    #this is for test only

    for i in getGen():
        print(i)
Esempio n. 6
0
from site_imagefap    import imagefap
from pymongo import MongoClient

for gall in sys.argv[1:]:
	print gall
	

#whynot 494927
#jimmy 1764181
asd = 'http://www.imagefap.com/ajax/newsdata.php?userid=1764181&status=&galleries=&comments=&ts='


mongo = MongoClient()

rGall = re.compile('href="\/gallery\/(\d+)"><b>[^<]+<\/b>')
web = Web()

ts = datetime.now()
ts2 = ts - timedelta(days=5)
sss = calendar.timegm(ts2.utctimetuple())
print sss
#exit()
ar = set()
feed = web.get('http://www.imagefap.com/newsfeed.php?user=JimmyPerv77')
for a in re.findall(rGall, feed):
	a2 = 'http://www.imagefap.com/gallery/' + str(a)
	print a2
	if mongo.feed.url.find_one({"url": a2}):
		print "1found %s" % a2
		continue
	ar.add(a2)
Esempio n. 7
0
class basesite(object):
	"""
		Constructs object using overriding methods.
		Throws Exception if:
		 * URL is invalid (not appropriate for site class),
		 * Working directory could not be created.
	"""
	def __init__(self, url, urls_only=False, debugging=False):
		self.debugging = debugging
		self.web = Web() # Web object for downloading/parsing
		self.base_dir = RIP_DIRECTORY
		if not os.path.exists(self.base_dir):
			os.mkdir(self.base_dir)
		self.url = self.sanitize_url(url)
		# Directory to store images in
		self.working_dir  = '%s%s%s' % (self.base_dir, os.sep, self.get_dir(self.url))
		self.max_threads  = MAX_THREADS
		self.thread_count = 0
		self.image_count  = 0
		self.max_images   = MAX_IMAGES
		self.logfile      = '%s%s%s' % (self.working_dir, os.sep, LOG_NAME)
		self.first_log    = True
		self.urls_only    = urls_only
	
	""" To be overridden """
	def sanitize_url(self, url):
		raise Exception("Method 'sanitize_url' was not overridden!")

	""" Return directory name to store photos in """
	def get_dir(self, url):
		raise Exception("Method 'get_dir' was not overridden!")
	
	""" Creates working dir if zip does not exist """
	def init_dir(self):
		if not os.path.exists(self.working_dir) and \
		       self.existing_zip_path() == None:
			os.mkdir(self.working_dir)
	
	""" Returns true if we hit the image limit, false otherwise """
	def hit_image_limit(self):
		return self.image_count >= self.max_images
	
	""" To be overridden """
	def download(self):
		raise Exception("Method 'download' was not overridden!")

	""" Checks if album is already being downloaded """
	def is_downloading(self):
		return os.path.exists(self.logfile)
	
	""" Appends line to log file """
	def log(self, text, overwrite=False):
		if self.first_log:
			self.first_log = False
			self.log('http://rip.rarchives.com - file log for URL %s' % self.url, overwrite=True)
		sys.stderr.write('%s\n' % text)
		text = text.replace('"', '\\"')
		if overwrite:
			f = open(self.logfile, 'w')
		else:
			f = open(self.logfile, 'a')
		f.write("%s\n" % text)
		f.flush()
		f.close()
	
	""" Gets last line(s) from log """
	def get_log(self, tail_lines=1):
		if not os.path.exists(self.logfile):
			return ''
		f = open(self.logfile, 'r')
		r = f.read().strip()
		f.close()
		while r.endswith('\n'): r = r[:-1]
		lines = r.split('\n')
		return lines[len(lines)-tail_lines:]
	
	""" Starts separate thread to download image from URL """
	def download_image(self, url, index, total='?', subdir='', saveas=None):
		if saveas == None:
			saveas = url[url.rfind('/')+1:]
			# Strip extraneous / non FS safe characters
			if '?' in saveas: saveas = saveas[:saveas.find('?')]
			if ':' in saveas: saveas = saveas[:saveas.find(':')]
		# Add a file extension if necessary
		if not '.' in saveas:
			m = self.web.get_meta(url)
			ct = 'image/jpeg' # Default to jpg
			if 'Content-Type' in m: ct = m['Content-Type']
			ext = ct[ct.rfind('/')+1:]
			if ext == 'jpeg': ext = 'jpg'
			saveas = '%s.%s' % (saveas, ext)
		# Setup subdirectory saves
		if subdir != '': subdir = '/%s' % subdir
		savedir = '%s%s' % (self.working_dir, subdir)
		if not os.path.exists(savedir): os.mkdir(savedir)
		
		saveas = '%s/%03d_%s' % (savedir, index, saveas)
		if os.path.exists(saveas):
			self.log('file exists: %s' % saveas)
			self.image_count += 1
		else:
			while self.thread_count > self.max_threads:
				time.sleep(0.1)
			self.thread_count += 1
			args = (url, saveas, index, total)
			t = Thread(target=self.download_image_thread, args=args)
			t.start()
	
	""" Multi-threaded download of image """
	def download_image_thread(self, url, saveas, index, total):
		m = self.web.get_meta(url)
		if 'Content-Type' not in m:
			text = 'no Content-Type found at URL %s' % (url)
		elif ('image' not in m['Content-Type'] and \
			 'video' not in m['Content-Type'] and \
			 'octet-stream' not in m['Content-Type']):
			text = 'no "image"/"video"/"octet-stream" in Content-Type (found "%s") for URL %s' % (m['Content-Type'], url)
		else:
			if self.web.download(url, saveas):
				self.image_count += 1
				text = 'downloaded (%d' % index
				if total != '?': text += '/%s' % total
				text += ') (%s) - %s' % (self.get_size(saveas), url)
			else:
				text = 'download failed (%d' % index
				if total != '?': text += '/%s' % total
				text += ') - %s' % url
		self.log(text)
		self.thread_count -= 1
	
	def wait_for_threads(self):
		while self.thread_count > 0:
			time.sleep(0.1)
		if os.path.exists(self.working_dir):
			if not self.urls_only and len(os.listdir(self.working_dir)) <= 1 \
					or self.urls_only and len(os.listdir(self.working_dir)) == 0:
				rmtree(self.working_dir) # Delete everything in working dir
	
	""" Returns human-readable filesize for file """
	def get_size(self, filename):
		try:
			bytes = os.path.getsize(filename)
		except:
			return '?b'
		b = 1024 * 1024 * 1024
		a = ['g','m','k','']
		for i in a:
			if bytes >= b:
				return '%.2f%sb' % (float(bytes) / float(b), i)
			b /= 1024
		return '0b'

	
	""" Returns path to zip file if it exists, otherwise None. """
	def existing_zip_path(self):
		extension = 'zip'
		if self.urls_only:
			extension = 'txt'
		zipfile = '%s.%s' % (self.working_dir, extension)
		if os.path.exists(zipfile) and not os.path.exists(self.working_dir):
			return zipfile
		else:
			return None
	
	""" 
		Zips site's working directory,
		Deletes zipped files after zip is created
		Returns path to zip file
	"""
	def zip(self):
		if self.urls_only:
			if not os.path.exists('%s/log.txt' % self.working_dir):
				raise Exception('no log found')
			url_filename = '%s.txt' % self.working_dir
			f = open('%s/log.txt' % self.working_dir, 'r')
			lines = f.read().split('\n')[1:]
			tuples = []
			for line in lines:
				if line.strip() == '' or ' - ' not in line: continue
				if line.count('|') < 1: continue
				line = line[line.find(' - ')+3:]
				splits = line.split('|')
				index  = splits[0]
				url    = '|'.join(splits[1:])
				tuples.append( (index, url) )
			tuples = sorted(tuples, key=lambda tup: int(tup[0]))
			f = open(url_filename, 'w')
			for (index, url) in tuples:
				f.write('%s\n' % url)
			f.close()
			rmtree(self.working_dir) # Delete everything in working dir
			return url_filename
		self.log('zipping album...')
		zip_filename = '%s.zip' % self.working_dir
		z = ZipFile(zip_filename, "w", ZIP_DEFLATED)
		for root, dirs, files in os.walk(self.working_dir):
			# NOTE: ignore empty directories
			for fn in files:
				#if 'log.txt' in fn: continue
				absfn = os.path.join(root, fn)
				zfn = absfn[len(self.working_dir)+len(os.sep):] #XXX: relative path
				z.write(absfn, zfn)
		z.close()
		rmtree(self.working_dir) # Delete everything in working dir
		return zip_filename
		
	def add_url(self, index, url, total=0):
		self.image_count += 1
		string = '(%d' % index
		if total > 0:
			string += '/%d' % total
		string += ')'
		self.log('%s - %d|%s' % (string, index, url))

	def debug(self, text):
		if not self.debugging: return
		sys.stderr.write('%s\n' % text)
Esempio n. 8
0
class EnquiryFormHelper():

    #__url = "http://blazedemo.com/"
    __url = "https://<url>/front/default/newhome"

    def Open(self):
        self._web.open(self.__url)

    def __init__(self, browser):
        self._web = Web(browser)

    def click_customize_button(self):
        self._web.get_web_element_by_xpath(
            "(//*[contains(text(),'Customize Your Trip')])[1]").click()
        time.sleep(3)

    def enter_name(self, name):
        self._web.get_web_element_by_xpath("//*[@name='name']").send_keys(name)

    def enter_phone(self, phone):
        self._web.get_web_element_by_xpath("//*[@name='phone']").send_keys(
            phone)

    def enter_email(self, email):
        self._web.get_web_element_by_xpath("//*[@name='email']").send_keys(
            email)
        time.sleep

    def select_no_of_people(self, noOfPeople):
        try:
            self._web.get_web_element_by_xpath(
                "//input[@value='No. of People']").click()
            time.sleep(3)
            self._web.get_web_element_by_xpath(
                "//div[contains(@class,'NoOfAdults')]//li/span[contains(text(),'{}')]"
                .format(noOfPeople)).click()
            time.sleep(2)
        except Exception as e:
            print(e)

    def select_trip_begin(self, trip_begin):
        try:
            self._web.get_web_element_by_xpath(
                "//input[@value='I will book']").click()
            time.sleep(3)
            self._web.get_web_element_by_xpath(
                "//div[contains(@class,'duration')]//li/span[contains(text(),'{}')]"
                .format(trip_begin)).click()
            time.sleep(2)
        except Exception as e:
            print(e)

    def select_trip_location(self, location):
        self._web.get_web_element_by_xpath(
            "//input[@value='Trip Location*']").click()
        time.sleep(2)
        self._web.get_web_element_by_xpath(
            "//div[contains(@class,'location1')]//li/span[contains(text(),'{}')]"
            .format(location)).click()
        time.sleep(2)

    def select_random_date(self):
        self._web.selectDate()

    def Close(self):
        self._web.close_all()
Esempio n. 9
0
class basesite(object):
	"""
		Constructs object using overriding methods.
		Throws Exception if:
		 * URL is invalid (not appropriate for site class),
		 * Working directory could not be created.
	"""
	def __init__(self, url, debugging=True, location=None):
		self.DB = None
		if MongoClient:
			self.DB = MongoClient()
			 
		self.debugging = debugging
		self.web = Web(debugging=self.debugging) # Web object for downloading/parsing
		self.base_dir = RIP_DIRECTORY
		if not os.path.exists(self.base_dir):
			os.mkdir(self.base_dir)
		self.original_url = url
		#self.debug('class: %s' % self.__class__.__name__)
		#self.debug('basesite.__init__init__ url:' + url)
		self.imgsrcpwd = ""
		self.username = ""
		if 'pwd=' in url:
			self.imgsrcpwd = url[url.find('pwd')+4:]
			#self.debug('imgsrcpwd:%s'%self.imgsrcpwd)
		self.title=""
		self.url = self.sanitize_url(url)
		# Directory to store images in
		galldir = self.get_gallery_dir(self.url)
		if galldir == '':
			galldir = self.get_dir(self.url)
		self.working_dir  = '%s%s%s' % (self.base_dir, os.sep, galldir)
		self.max_threads  = MAX_THREADS
		self.thread_count = 0
		self.image_count  = 0
		self.max_images   = MAX_IMAGES
		self.logfile      = '%s%s%s' % (self.working_dir, os.sep, self.get_gallery_dir(self.url) + LOG_NAME)
		self.first_log    = True
		
		
	def get_gallery_dir(self, url, r='', gallno2=''):
		return ''

	""" To be overridden """
	def sanitize_url(self, url):
		raise Exception("Method 'sanitize_url' was not overridden!")

	""" Return directory name to store photos in """
	def get_dir(self, url):
		raise Exception("Method 'get_dir' was not overridden!")
	
	""" Creates working dir if zip does not exist """
	def init_dir(self):
		if not os.path.exists(self.working_dir) and \
		       self.existing_zip_path() == None:
			os.mkdir(self.working_dir)
	
	""" Returns true if we hit the image limit, false otherwise """
	def hit_image_limit(self):
		if self.image_count >= self.max_images:
			self.log('hit image limit: %d >= %d' % (self.image_count, self.max_images))
			return True
		return False
	
	""" To be overridden """
	def download(self):
		raise Exception("Method 'download' was not overridden!")

	""" Checks if album is already being downloaded """
	def is_downloading(self):
		return os.path.exists(self.logfile)
	
	""" Appends line to log file """
	def log(self, text, overwrite=False):
		if self.first_log:
			self.first_log = False
			self.log('URL %s @ %s' % (self.original_url, strftime('%Y-%m-%dT%H:%M:%S PDT')), overwrite=False)
		if self.debugging:
			sys.stderr.write('%s\n' % text)
		text = text.replace('"', '\\"')
		if overwrite:
			f = open(self.logfile, 'w')
		else:
			f = open(self.logfile, 'a')
		f.write("%s\n" % text)
		f.flush()
		f.close()
	
	""" Gets last line(s) from log """
	def get_log(self, tail_lines=1):
		if not os.path.exists(self.logfile):
			return ''
		f = open(self.logfile, 'r')
		r = f.read().strip()
		f.close()
		while r.endswith('\n'): r = r[:-1]
		lines = r.split('\n')
		return lines[len(lines)-tail_lines:]
	
	""" Starts separate thread to download image from URL """
	def download_image(self, url, index, total='?', subdir='', saveas=None, gallname=''):
		# nur in imgSRc !!!!!!
		url = url.replace('http://b', 'http://o')  
		gallname = gallname.replace('/', '_').replace('.', '_')
		unique_saveas = True
		if saveas == None:
			unique_saveas = False
			saveas = url[url.rfind('/')+1:]
			if gallname != '':
				saveas = gallname + '_' + saveas.replace('\/', '_')
			#self.debug('Pic:' + saveas)
			
			# Strip extraneous / non FS safe characters
			saveas = saveas.replace('?:\\', '')
			#if '?' in saveas: saveas = saveas[:saveas.find('?')]
			#if ':' in saveas: saveas = saveas[:saveas.find(':')]
		# Add a file extension if necessary
		if saveas[len(saveas)-4] != '.':
			m = self.web.get_meta(url)
			ct = 'image/jpeg' # Default to jpg
			if 'Content-Type' in m: ct = m['Content-Type']
			ext = ct[ct.rfind('/')+1:]
			if ext == 'jpeg': ext = 'jpg'
			saveas = '%s.%s' % (saveas, ext)
		# Setup subdirectory saves
		savedir = ''
		if subdir != '': 
			subdir = '/%s' % subdir
			savedir = '%s%s' % (self.base_dir, subdir)
		else:
			savedir = '%s%s' % (self.working_dir, subdir)
			
		if not os.path.exists(savedir): os.mkdir(savedir)
		
		if unique_saveas:
			saveas = '%s/%s' % (savedir, saveas)
		else:
			saveas = '%s/%03d_%s' % (savedir, index, saveas)
		if os.path.exists(saveas):
			self.debug('file exists: %s' % saveas)
			self.image_count += 1
		else:
			while self.thread_count > self.max_threads:
				time.sleep(0.1)
			self.thread_count += 1
			args = (url, saveas, index, total)
			t = Thread(target=self.download_image_thread, args=args)
			t.start()
	
	""" Multi-threaded download of image """
	def download_image_thread(self, url, saveas, index, total):
		m = self.web.get_meta(url)
		if 'Content-Type' not in m:
			text = 'no Content-Type found at URL %s' % (url)
			if url.startswith('http://o'):
				url = url.replace('http://o', 'http://b')
				#self.debug("Switching back from O to B:%s m:%s\n" % (url,m))
				args = (url, saveas, index, total)
				return self.download_image_thread(url, saveas, index, total)
		elif ('image'        not in m['Content-Type'] and \
		      'video'        not in m['Content-Type'] and \
		      'octet-stream' not in m['Content-Type']):
			text = 'no image/video/octet-stream in Content-Type (found "%s") for URL %s' % (m['Content-Type'], url)
		else:
			indextotal = self.get_index_total(index, total)
			hmm = None
			text = ""
			if self.DB:
				hmm = self.DB.imgsrc.imgurls.find_one({"url": str(url)})
				if hmm != None:
					self.debug('(thread) DB: url found: %s'%str(hmm))
					return 0
			
			if hmm == None:		
				if self.web.download(url, saveas):
					self.image_count += 1
					# Create thumbnail
					thumbnail = self.create_thumb(saveas)
					if self.DB:
						imgurls = self.DB.imgsrc.imgurls
						u96 = imgurls.find_one({"url": str(url)})
						if u96 != None:
							text = 'DB u96 %s/%s: (%s)' % (indextotal, self.get_size(saveas), saveas)
						else:
							imgurls.insert({"url": str(url)})
							text = 'vv %s/%s: (%s)' % (indextotal, self.get_size(saveas), saveas)
				else:
					text = 'download failed %s - %s' % (indextotal, url)
		if text != "":
			self.log(text)
		self.thread_count -= 1
		return self.get_size(saveas)

	""" Same-thread downlod/save (does not launch new thread) """
	def save_image(self, url, saveas, index, total='?'):
		indextotal = self.get_index_total(index, total)
		if os.path.exists(saveas):
			self.image_count += 1
			self.log('file exists: %s' % saveas)
		elif self.web.download(url, saveas):
			self.image_count += 1
			thumbnail = self.create_thumb(saveas)
			self.log('Download %s (%s) - source: (%s)' % (indextotal, self.get_size(saveas), url))
		else:
			self.log('Download %s ERROR << %s' % (indextotal, url))

	""" 
		Wait for threads to finish downloading.
		Delete working dir if no images are downloaded
	"""
	def wait_for_threads(self):
		i = 0
		while (self.thread_count > 0) and (i < 1000):
			time.sleep(0.1)
			i = i + 1
		if os.path.exists(self.working_dir):
			if len(os.listdir(self.working_dir)) <= 1:
				rmtree(self.working_dir) # Delete everything in working dir
	
	""" Returns human-readable filesize for file """
	def get_size(self, filename):
		try:
			bytes = os.path.getsize(filename)
		except:
			return '?b'
		b = 1024 * 1024 * 1024
		a = ['g','m','k','']
		for i in a:
			if bytes >= b:
				return '%.2f%sb' % (float(bytes) / float(b), i)
			b /= 1024
		return '0b'

	""" 
		Returns path to zip file if it exists, otherwise None.
		Does not return path if zipping is in progress.
	"""
	def existing_zip_path(self):
		zipfile = '%s.zip' % (self.working_dir)
		if os.path.exists(zipfile):
			if not os.path.exists(self.working_dir):
				# No direcotry; only zip exists
				return zipfile
			else:
				if not os.path.exists('%s%szipping.txt' % (self.working_dir, os.sep)):
					# 'zipping' file/flag does not exist
					return zipfile
		return None
	
	""" 
		Zips site's working directory,
		Deletes zipped files after zip is created
		Returns path to zip file
	"""
	def zip(self):
		x = self.working_dir
		x = x + "-" + self.get_dir(self.url) 
		self.log('zipping album... %s ' % x)
		zip_filename = '%s.zip' % x
		z = ZipFile(zip_filename, "w", ZIP_DEFLATED)
		for root, dirs, files in os.walk(self.working_dir):
			if root.endswith('/thumbs'): continue # Do not zip thumbnails
			for fn in files:
				# Ignore files used by service:
				if fn.endswith('zipping.txt'):  continue # Album is currently zipping
				if fn.endswith('complete.txt'): continue # Album download completed
				if fn.endswith('ip.txt'):       continue # IP address of ripper
				if fn.endswith('reports.txt'):  continue # Number of reports, report messages
				absfn = os.path.join(root, fn)
				zfn = absfn[len(self.working_dir)+len(os.sep):] #XXX: relative path
				z.write(absfn, zfn)
		z.close()
		return zip_filename

	"""
		Creates thumbnail based on file path.
		Creates /thumbs/ sub dir & stores thumbnail.
		Returns thumbnail path on success, empty string on failure.
	"""
	def create_thumb(self, inp):
		return
		if inp.lower().endswith('.mp4'):
			return self.create_video_thumb(inp)
		if Image == None:
			sys.stderr.write('Python Image Library (PIL) not installed; unable to create thumbnail for %s\n' % inp)
			sys.stderr.write('Go to http://www.pythonware.com/products/pil/ to install PIL\n')
			sys.stderr.flush()
			return 'rips/nothumb.png'
		fields = inp.split(os.sep)
		fields.insert(-1, 'thumbs')
		saveas = os.sep.join(fields)
		if os.path.exists(saveas): return ''
		thumbpath = os.sep.join(fields[:-1])
		if not os.path.exists(thumbpath):
			try: os.mkdir(thumbpath)
			except: pass
		try:
			im = Image.open(inp)
			(width, height) = im.size
			if width > MAX_THUMB_DIM or height > MAX_THUMB_DIM:
				# Image too large to create thumbnail
				self.log('unable to create thumbnail, %dx%d > %d' % (width, height, MAX_THUMB_DIM))
				return 'rips/nothumb.png'
			if os.path.getsize(inp) > MAX_THUMB_SIZE:
				self.log('unable to create thumbnail, %db > %db' % (os.path.getsize(inp), MAX_THUMB_SIZE))
				return 'rips/nothumb.png'
			if im.mode != 'RGB': im = im.convert('RGB')
			im.thumbnail( (200,200), Image.ANTIALIAS)
			im.save(saveas, 'JPEG')
			return saveas
		except Exception, e:
			self.log('failed to create thumb: %s' % str(e))
			pass
		return 'rips/nothumb.png'
Esempio n. 10
0
from Web import Web

#this file will thow some errors, remember to catch it

url='http://ps.zestbike.com/match/preview'

cnrun=Web(url,'pagecontent','commonlist yahei','commonarticle yahei')


#print(cnrun.soup)

cnrun.getlist()

def p(L):
    return 'http://ps.zestbike.com/'+L

print(cnrun.list)


cnrun.map(p)

def getGen():
    return cnrun.process()

if __name__=="__main__":
    #this is for test only
    number=0
    for i in getGen():
    	print(i)
    	number+=1
    	print(number)
Esempio n. 11
0
from Web import Web

cnrun = Web(
    'http://www.chinarun.com/html/event_k_%20%E8%87%AA%E8%A1%8C%E8%BD%A6%E8%B5%9B_0_.html#cnt',
    'ulHdList', 'n', 'divCnt divHA')

cnrun.getlist()


def p(L):
    return 'http://www.chinarun.com' + L


cnrun.map(p)


def getGen():
    return cnrun.process()


if __name__ == "__main__":
    #this is for test only
    for i in getGen():
        print(i[0])
Esempio n. 12
0
import ujson

import WifiConnect
from LedController import LedController
from Scheduler import Scheduler
from Web import Web

hostname = open("config.hostname.txt").read()
print("hostname = {}".format(hostname))
config_file = "config.{0}.json".format(hostname)
config = ujson.loads(open(config_file).read())
print("config = {}".format(config))

ssid = config.get("ssid")
password = open("wifi-password.txt").read()
WifiConnect.connect(ssid, password, hostname)

led_data_pin = int(config.get("led_data_pin"))
led_count = int(config.get("led_count"))
led_bpp = int(config.get("led_bpp"))
ledController = LedController(led_data_pin, led_count, led_bpp)

ledController.all_off()
ledController.pulse_status_led(0, 12, 32, 0)

scheduler = Scheduler(ledController)

web = Web(ledController, scheduler)

scheduler.run()
Esempio n. 13
0
def main():
    web = Web()
    web.run_ftp_server()
Esempio n. 14
0
from Web import Web
import re

url = 'http://zuicool.com/news/archives/category/user-submitted/%E8%87%AA%E8%A1%8C%E8%BD%A6'

cnrun = Web(url, 'container', 'zuicool-index-post clearfix', 'entry-content')

cnrun.getlist()


def p(L):
    return 'http://zuicool.com' + L


cnrun.map(p)


def getGen():
    return w(cnrun.process())


def w(process):
    for i in process:
        title = re.findall('报名 | (.*) - 最酷ZUICOOL - 马拉松赛事第一站_最COOL', i[0])
        if len(title) is 1:
            title = title[0]
        else:
            title = title[1]
        yield (title, i[1])

Esempio n. 15
0
#!/usr/bin/python
# coding=utf-8
from Utils import Utils

__author__ = "Aleksandr Shyshatsky"

from Battle import Battle
from Flash import Flash
from Callbacks import Callbacks
from Events import Events
from Web import Web

callbacks = Callbacks()
flash = Flash()
battle = Battle()
events = Events()
web = Web()
utils = Utils

__all__ = ['flash', 'battle', 'callbacks', 'events', 'web', 'utils']
Esempio n. 16
0
 def __init__(self, browser):
     self._web = Web(browser)
Esempio n. 17
0
from Web import Web
url = 'http://www.wildto.com/event/'

cnrun = Web(url, 'cmptList clearfix', 'pic', 'leftBox')

#print(cnrun.soup)

cnrun.getlist()


def p(L):
    return 'http://www.wildto.com' + L


cnrun.map(p)


def getGen():
    return cnrun.process()


if __name__ == "__main__":
    #this is for test only

    for i in getGen():
        print(i)
Esempio n. 18
0
class basesite(object):
	"""
		Constructs object using overriding methods.
		Throws Exception if:
		 * URL is invalid (not appropriate for site class),
		 * Working directory could not be created.
	"""
	def __init__(self, url, urls_only=False, debugging=False):
		self.debugging = debugging
		self.web = Web(debugging=self.debugging) # Web object for downloading/parsing
		self.base_dir = RIP_DIRECTORY
		if not os.path.exists(self.base_dir):
			os.mkdir(self.base_dir)
		self.url = self.sanitize_url(url)
		# Directory to store images in
		self.working_dir  = '%s%s%s' % (self.base_dir, os.sep, self.get_dir(self.url))
		self.max_threads  = MAX_THREADS
		self.thread_count = 0
		self.image_count  = 0
		self.max_images   = MAX_IMAGES
		self.logfile      = '%s%s%s' % (self.working_dir, os.sep, LOG_NAME)
		self.first_log    = True
		self.urls_only    = urls_only
	
	""" To be overridden """
	def sanitize_url(self, url):
		raise Exception("Method 'sanitize_url' was not overridden!")

	""" Return directory name to store photos in """
	def get_dir(self, url):
		raise Exception("Method 'get_dir' was not overridden!")
	
	""" Creates working dir if zip does not exist """
	def init_dir(self):
		if not os.path.exists(self.working_dir) and \
		       self.existing_zip_path() == None:
			os.mkdir(self.working_dir)
	
	""" Returns true if we hit the image limit, false otherwise """
	def hit_image_limit(self):
		return self.image_count >= self.max_images
	
	""" To be overridden """
	def download(self):
		raise Exception("Method 'download' was not overridden!")

	""" Checks if album is already being downloaded """
	def is_downloading(self):
		return os.path.exists(self.logfile)
	
	""" Appends line to log file """
	def log(self, text, overwrite=False):
		if self.first_log:
			self.first_log = False
			self.log('http://rip.rarchives.com - file log for URL %s' % self.url, overwrite=True)
		if self.debugging:
			sys.stderr.write('%s\n' % text)
		text = text.replace('"', '\\"')
		if overwrite:
			f = open(self.logfile, 'w')
		else:
			f = open(self.logfile, 'a')
		f.write("%s\n" % text)
		f.flush()
		f.close()
	
	""" Gets last line(s) from log """
	def get_log(self, tail_lines=1):
		if not os.path.exists(self.logfile):
			return ''
		f = open(self.logfile, 'r')
		r = f.read().strip()
		f.close()
		while r.endswith('\n'): r = r[:-1]
		lines = r.split('\n')
		return lines[len(lines)-tail_lines:]
	
	""" Starts separate thread to download image from URL """
	def download_image(self, url, index, total='?', subdir='', saveas=None):
		unique_saveas = True
		if saveas == None:
			unique_saveas = False
			saveas = url[url.rfind('/')+1:]
			# Strip extraneous / non FS safe characters
			if '?' in saveas: saveas = saveas[:saveas.find('?')]
			if ':' in saveas: saveas = saveas[:saveas.find(':')]
		# Add a file extension if necessary
		if not '.' in saveas:
			m = self.web.get_meta(url)
			ct = 'image/jpeg' # Default to jpg
			if 'Content-Type' in m: ct = m['Content-Type']
			ext = ct[ct.rfind('/')+1:]
			if ext == 'jpeg': ext = 'jpg'
			saveas = '%s.%s' % (saveas, ext)
		# Setup subdirectory saves
		if subdir != '': subdir = '/%s' % subdir
		savedir = '%s%s' % (self.working_dir, subdir)
		if not os.path.exists(savedir): os.mkdir(savedir)
		
		if unique_saveas:
			saveas = '%s/%s' % (savedir, saveas)
		else:
			saveas = '%s/%03d_%s' % (savedir, index, saveas)
		if os.path.exists(saveas):
			self.log('file exists: %s' % saveas)
			self.image_count += 1
		else:
			while self.thread_count > self.max_threads:
				time.sleep(0.1)
			self.thread_count += 1
			args = (url, saveas, index, total)
			t = Thread(target=self.download_image_thread, args=args)
			t.start()
	
	""" Multi-threaded download of image """
	def download_image_thread(self, url, saveas, index, total):
		m = self.web.get_meta(url)
		if 'Content-Type' not in m:
			text = 'no Content-Type found at URL %s' % (url)
		elif ('image'        not in m['Content-Type'] and \
		      'video'        not in m['Content-Type'] and \
		      'octet-stream' not in m['Content-Type']):
			text = 'no "image"/"video"/"octet-stream" in Content-Type (found "%s") for URL %s' % (m['Content-Type'], url)
		else:
			if self.web.download(url, saveas):
				self.image_count += 1
				text = 'downloaded (%d' % index
				if total != '?': text += '/%s' % total
				text += ') (%s) - %s' % (self.get_size(saveas), url)
				# Create thumbnail
				self.create_thumb(saveas)
			else:
				text = 'download failed (%d' % index
				if total != '?': text += '/%s' % total
				text += ') - %s' % url
		self.log(text)
		self.thread_count -= 1
	
	def wait_for_threads(self):
		while self.thread_count > 0:
			time.sleep(0.1)
		if os.path.exists(self.working_dir):
			if not self.urls_only and len(os.listdir(self.working_dir)) <= 1 \
					or self.urls_only and len(os.listdir(self.working_dir)) == 0:
				rmtree(self.working_dir) # Delete everything in working dir
	
	""" Returns human-readable filesize for file """
	def get_size(self, filename):
		try:
			bytes = os.path.getsize(filename)
		except:
			return '?b'
		b = 1024 * 1024 * 1024
		a = ['g','m','k','']
		for i in a:
			if bytes >= b:
				return '%.2f%sb' % (float(bytes) / float(b), i)
			b /= 1024
		return '0b'

	""" Returns path to zip file if it exists, otherwise None. """
	def existing_zip_path(self):
		if self.urls_only:
			txtfile = '%s.txt' % self.working_dir
			f = txtfile.split('/')
			f.insert(-1, 'txt')
			txtfile = '/'.join(f)
			if os.path.exists(txtfile):
				return txtfile
			return None
		zipfile = '%s.zip' % (self.working_dir)
		if os.path.exists(zipfile):
			if not os.path.exists(self.working_dir):
				# No direcotry; only zip exists
				return zipfile
			else:
				if not os.path.exists('%s%szipping.txt' % (self.working_dir, os.sep)):
					# 'zipping' file/flag does not exist
					return zipfile
		return None
	
	""" 
		Zips site's working directory,
		Deletes zipped files after zip is created
		Returns path to zip file
	"""
	def zip(self):
		if self.urls_only:
			# Just URLs, need to store in order & store to a .txt file
			if not os.path.exists('%s/log.txt' % self.working_dir):
				raise Exception('no log found')
			if not os.path.exists('txt/'):
				try: os.mkdir('txt')
				except: pass
			f = self.working_dir.split('/')
			f.insert(-1, 'txt')
			url_filename = '%s.txt' % '/'.join(f)
			f = open('%s/log.txt' % self.working_dir, 'r')
			lines = f.read().split('\n')[1:]
			tuples = []
			for line in lines:
				if line.strip() == '' or ' - ' not in line: continue
				if line.count('|') < 1: continue
				line = line[line.find(' - ')+3:]
				splits = line.split('|')
				index  = splits[0]
				url    = '|'.join(splits[1:])
				tuples.append( (index, url) )
			tuples = sorted(tuples, key=lambda tup: int(tup[0]))
			f = open(url_filename, 'w')
			for (index, url) in tuples:
				f.write('%s\n' % url)
			f.close()
			rmtree(self.working_dir) # Delete everything in working dir
			return url_filename
		
		self.log('zipping album...')
		zip_filename = '%s.zip' % self.working_dir
		z = ZipFile(zip_filename, "w", ZIP_DEFLATED)
		for root, dirs, files in os.walk(self.working_dir):
			# NOTE: ignore empty directories & thumbnails
			if root.endswith('/thumbs'): continue
			for fn in files:
				#if 'log.txt' in fn: continue
				if fn.endswith('zipping.txt'): continue
				if fn.endswith('complete.txt'): continue
				if fn.endswith('ip.txt'): continue
				if fn.endswith('reports.txt'): continue
				absfn = os.path.join(root, fn)
				zfn = absfn[len(self.working_dir)+len(os.sep):] #XXX: relative path
				z.write(absfn, zfn)
		z.close()
		#rmtree(self.working_dir) # Delete everything in working dir
		return zip_filename

	"""
		Creates thumbnail based on file path
		Creates /thumbs/ sub dir & stores thumbnail
	"""
	def create_thumb(self, inp):
		if inp.lower().endswith('.mp4'):
			self.create_video_thumb(inp)
			return
		if Image == None:
			sys.stderr.write('Python Image Library (PIL) not installed; unable to create thumbnail for %s\n' % inp)
			sys.stderr.write('Go to http://www.pythonware.com/products/pil/ to install PIL\n')
			sys.stderr.flush()
			return
		fields = inp.split(os.sep)
		fields.insert(-1, 'thumbs')
		saveas = os.sep.join(fields)
		if os.path.exists(saveas): return
		thumbpath = os.sep.join(fields[:-1])
		if not os.path.exists(thumbpath):
			try: os.mkdir(thumbpath)
			except: pass
		try:
			im = Image.open(inp)
			if im.mode != 'RGB': im = im.convert('RGB')
			im.thumbnail( (200,200), Image.ANTIALIAS)
			im.save(saveas, 'JPEG')
			del im
		except: pass
	
	def create_video_thumb(self, inp):
		fields = inp.split(os.sep)
		fields.insert(-1, 'thumbs')
		saveas = os.sep.join(fields)
		saveas = saveas[:saveas.rfind('.')] + '.png'
		thumbpath = os.sep.join(fields[:-1])
		if not os.path.exists(thumbpath):
			try: os.mkdir(thumbpath)
			except: pass
		overlay = 'play_overlay.png'

		ffmpeg = '/usr/bin/ffmpeg'
		if not os.path.exists(ffmpeg):
			ffmpeg = '/opt/local/bin/ffmpeg'
			if not os.path.exists(ffmpeg):
				return # Can't get images if we can't find ffmpeg
		cmd = ffmpeg
		cmd += ' -i "'
		cmd += inp
		cmd += '" -vf \'movie='
		cmd += overlay
		cmd += ' [watermark]; '
		cmd += '[in]scale=200:200 [scale]; '
		cmd += '[scale][watermark] overlay=(main_w-overlay_w)/2:(main_h-overlay_h)/2 [out]\' '
		cmd += saveas
		try:
			(s, o) = getstatusoutput(cmd)
		except:
			pass
			
	"""
		Add url to list of URLs found. For "URLs Only" feature
	"""
	def add_url(self, index, url, total=0):
		self.image_count += 1
		string = '(%d' % index
		if total > 0:
			string += '/%d' % total
		string += ')'
		self.log('%s - %d|%s' % (string, index, url))

	def debug(self, text):
		if not self.debugging: return
		sys.stderr.write('%s\n' % text)