def begin(urlpath): #fetch the member's page using the now defined opener response = opener.open(urlpath) html = response.read() #parse out all of the links splita = html.split('<A'); logger.info('There are '+ str(len(splita)) +' links') folderlist=[] #make a list of links that go to galleries for index, object in enumerate(splita): current=splita[index] if 'http://members.latexlair.com/galleries/'+currentyear in current: #print str(index) + ' ' + current splitb = current.split('\'') for indexb, object in enumerate(splitb): current2=splitb[indexb] if 'http://members.latexlair.com/galleries/'+currentyear in current2: if '\\' not in current2: #print 'Found: '+current2 folderlist.append(current2) #now we should have an array of current galleries logger.info('We have found ' +str(len(folderlist))+' folders') folderlist.sort() #print folderlist folderparse(folderlist)
def thumbdbbuild(): logger.info('Rebuilding Thumbnail Database') myDB = db.DBConnection() #myDB.action("CREATE TABLE IF NOT EXISTS thumbs (year text, title text primary key not null, path text)") myDB.action("DROP TABLE IF EXISTS thumbs") myDB.action("CREATE TABLE thumbs (year text, title text primary key not null, path text)") go = myDB.action("SELECT year, title FROM sets ORDER BY year DESC, title ASC") for row in go: year = row[0] title = row[1] if herp.ROOTDIR == None: herp.ROOTDIR = 'BB/' dpath = herp.ROOTDIR+row[0]+'/'+row[1]; print dpath path = checkdir(herp.ROOTDIR+year+'/'+title) #print path myDB.action("INSERT or IGNORE INTO thumbs (year, title, path) VALUES ('"+year+"','"+title+"','"+path+"')") go = myDB.action("SELECT year, title FROM oldsets ORDER BY year DESC, title ASC") for row in go: year = row[0] title = row[1] if herp.ROOTDIR == None: herp.ROOTDIR = 'BB/' dpath = herp.ROOTDIR+row[0]+'/'+row[1]; print dpath path = checkdir(herp.ROOTDIR+year+'/'+title) #print path myDB.action("INSERT or IGNORE INTO thumbs (year, title, path) VALUES ('"+year+"','"+title+"','"+path+"')")
def check_setting_int(config, cfg_name, item_name, def_val): try: my_val = int(config[cfg_name][item_name]) except: my_val = def_val logger.info('Error in Int Function CFG') try: config[cfg_name][item_name] = my_val except: config[cfg_name] = {} config[cfg_name][item_name] = my_val return my_val
def bbparse(cat=0): init() #Start 5 worker threads for downloading# for i in range(5): t = ThreadUrl(queue) t.setDaemon(True) t.start() logger.info('Beginning Scrape Process') # This will run a gallery search on the member page, this pulls only the new galleries begin('http://members.latexlair.com/members.html') if cat==1: ## catparse works for the bulk category pages format is (url, debug), only new galleries catparse('http://members.latexlair.com/galleries-heavyrubber.html') catparse('http://members.latexlair.com/galleries-solo.html') catparse('http://members.latexlair.com/galleries-catsuits.html') catparse('http://members.latexlair.com/galleries-blonde.html') catparse('http://members.latexlair.com/galleries-events.html') catparse('http://members.latexlair.com/galleries-friends.html') # This parses searches added to the database, and pulls down photos doparse() # This compresses any finished sets to a solid CBZ file for easy cataloging and viewing if herp.CBZ_Compress == 1: docompress() # this searches the sets table, not the oldsets table. # Oldsets aren't compressed by this script, since cover download automation has not yet been implemented. #Check for incomplete sets, print them out out= myDB.action("SELECT COUNT(*) FROM sets WHERE status is not 'cbz' ORDER BY year DESC, title ASC").fetchone() if out[0] != 0: print '--The following are incomplete--' for row in myDB.action("SELECT * FROM sets WHERE status is not 'cbz' ORDER BY year DESC, title ASC"): #print row print "Status: " +row[3] +" Year: "+row[0] + " Title: " + row[1] print '--------------------------------' #Smart folder completionuses rulesets to define finished sets. - technically this could be used instead of the 5 folder counter but it feels a little too lazy to do that. smartfoldercompletion() fileutil.thumbdbbuild()
def config_write(): logger.info('Writing Config') new_config = ConfigObj() new_config.filename = CONFIG_FILE new_config['General'] = {} new_config['General']['http_port'] = HTTP_PORT new_config['General']['http_username'] = HTTP_USERNAME new_config['General']['http_password'] = HTTP_PASSWORD new_config['General']['site_username'] = USERNAME new_config['General']['site_password'] = PASSWORD new_config['General']['dldir'] = ROOTDIR new_config['General']['launch_browser'] = int(LAUNCH_BROWSER) new_config['General']['makecbz'] = int(CBZ_Compress) #Write Config new_config.write()
def initialize(): with INIT_LOCK: global USERNAME, PASSWORD, ROOTDIR, WEBUSER, WEBPASS, HTTP_PORT, HTTP_USERNAME, HTTP_PASSWORD,LAUNCH_BROWSER, CFG, __INITIALIZED__, DATA_DIR, CBZ_Compress #if __INITIALIZED__: # return False CheckSection('General') # Set global variables based on config file or use defaults try: HTTP_PORT = check_setting_int(CFG, 'General', 'http_port', 8090) except: logger.info('Error Reverting to 8090') HTTP_PORT = 8090 USERNAME = check_setting_str(CFG, 'General', 'site_username', '') PASSWORD = check_setting_str(CFG, 'General', 'site_password', '') HTTP_USERNAME = check_setting_str(CFG, 'General', 'http_username', '') HTTP_PASSWORD = check_setting_str(CFG, 'General', 'http_password', '') LAUNCH_BROWSER = bool(check_setting_int(CFG, 'General', 'launch_browser', 1)) ROOTDIR = check_setting_str(CFG, 'General', 'dldir', 'BB/') LOG_DIR = check_setting_str(CFG, 'General', 'log_dir', '') CBZ_Compress = bool(check_setting_int(CFG, 'General', 'makecbz',0)) if not LOG_DIR: LOG_DIR = os.path.join(DATA_DIR, 'logs') if not os.path.exists(LOG_DIR): try: os.makedirs(LOG_DIR) except OSError: if VERBOSE: logger.info( 'Unable to create the log directory. Logging to screen only.') logger.lldl_log.initLogger(verbose=VERBOSE) setupdb() __INITIALIZED__ = True return True
def run(self): while True: #grabs host from queue job = self.queue.get() # first element in the list item is the url, second is the foldername, removed prefix for now. url = job[0] foldername = job[1] prefix= job[2] #Cannot call DL function w/o collision here, snagged the function code and dependencies password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() top_level_url = "http://members.latexlair.com" password_mgr.add_password(None, top_level_url, herp.USERNAME, herp.PASSWORD) handler = urllib2.HTTPBasicAuthHandler(password_mgr) opener = urllib2.build_opener(handler) urllib2.install_opener(opener) fname=rootdownloadfolder+foldername+'/'+prefix+url.split('/')[-1] #Check if the file is there before overwriting it if not os.path.exists(fname): start = time.clock() webFile = opener.open(url) localFile = open(fname+'-temp', 'wb') localFile.write(webFile.read()) webFile.close() localFile.close() os.rename(fname+'-temp', fname) end = time.clock() kilobytes = os.path.getsize(fname)/1024 logger.info(indent(1)+url) logger.info( indent(2)+'Downloaded '+str(kilobytes) + 'KB in '+ str(end-start)+' seconds Rate:'+ str(kilobytes/(end-start))+'KBps') #signals to queue job is done self.queue.task_done()
def shutdown(restart=False, update=False): #write Configuration cherrypy.engine.exit() config_write() SCHED.shutdown(wait=False) if not restart and not update: logger.info('Now Exiting') if restart: logger.info('lldl is restarting...') popen_list = [sys.executable, FULL_PATH] logger.info('Restarting lldl with ' + str(popen_list)) subprocess.Popen(popen_list, cwd=os.getcwd()) os._exit(0)
def folderparse(folderlist): logger.info('Beginning Folder Parse') #loop through and add to our download handler for n, object in enumerate(folderlist): lefolder = folderlist[n] #print lefolder explodefolder = lefolder.split('/') year = explodefolder[4] currentalbum = explodefolder[5] #albumpart = explodefolder[6].replace('?folder=','') basepath = "http://members.latexlair.com/galleries/"+year+"/"+currentalbum+"/" out=myDB.action("SELECT COUNT(*) FROM sets WHERE title is '"+currentalbum+"'").fetchone() if out[0] != 0: logger.info(year+ ' '+currentalbum+' Exists in database, doing nothing') else: logger.info(year+ ' '+currentalbum+' Not yet in database, adding') #add set to database addset(year,currentalbum,basepath)
def dowloadfolder(foldname, prefix=''): #open folder - just testing now, will convert to a loop later logger.info( 'Now looking in '+foldname) explodefolder = foldname.split('/') year = explodefolder[4] currentalbum = explodefolder[5] albumpart = explodefolder[6].replace('?folder=','') imagelist = [] #instantiate a new list #howto check if url is valid? currenthtml = opener.open(foldname).read() #get images from page#### logger.info('Current Album: '+currentalbum+' Part: ' +albumpart+' Year: '+year) #print currenthtml splita= currenthtml.split('<a href=') for index, object in enumerate(splita): current=splita[index] splitb= current.split('\''); for i, object in enumerate(splitb): images=splitb[i] if 'thumbs' not in images: if 'jpg' in images: imagelist.append(images) #now we have a list of pictures relative to our folder numimages = len(imagelist) logger.info( 'Found '+ str(numimages) +' images, in '+currentalbum) currentimagefolder = 'http://members.latexlair.com/galleries/'+year+'/'+currentalbum+'/' foldername = year+'/'+currentalbum+'/' ## Make sure the directory will exist ensure_dir(rootdownloadfolder+foldername) for i, object in enumerate(imagelist): downloadurl= currentimagefolder+imagelist[i] #perform Download #print downloadurl #download(downloadcdurl,foldername,prefix) job = [downloadurl,foldername,prefix] queue.put(job) queue.join() logger.info('Do Database update'+ albumpart + ' ' + str(numimages)) if albumpart=='': logger.info( 'Null album - setting variable to 0') albumpart = '00' if numimages > 1: logger.info( 'We downloaded at least 2 photos, increment folder') updateset(currentalbum,albumpart)
herp.DATA_DIR = os.path.dirname(os.path.abspath(__file__)) herp.LOG_DIR = os.path.join(herp.DATA_DIR, 'logs') #init herp herp.initialize() webstart.initialize({ 'http_port': herp.HTTP_PORT, 'http_username': herp.HTTP_USERNAME, 'http_password': herp.HTTP_PASSWORD }) logger.info('Starting LLDL on port: %i' % herp.HTTP_PORT) logger.info('Initialization Complete') if herp.LAUNCH_BROWSER == 1: herp.launch_browser('localhost',herp.HTTP_PORT,'') herp.start() while True: if not herp.SIGNAL: time.sleep(1) else: print 'Received signal: ' + herp.SIGNAL if herp.SIGNAL == 'shutdown':