Example #1
0
def begin(urlpath):
    #fetch the member's page using the now defined opener
    response = opener.open(urlpath)
    html = response.read()
    
    #parse out all of the links
    splita = html.split('<A');
    logger.info('There are '+ str(len(splita)) +' links')
    
    
    folderlist=[]
    #make a list of links that go to galleries
    for index, object in enumerate(splita):
        current=splita[index]
        if 'http://members.latexlair.com/galleries/'+currentyear in current:
            #print str(index) + ' ' + current
            splitb = current.split('\'')
            for indexb, object in enumerate(splitb):
                current2=splitb[indexb]
                if 'http://members.latexlair.com/galleries/'+currentyear in current2:
                    if '\\' not in current2:
                        #print 'Found: '+current2
                        folderlist.append(current2)
    
    
    
    #now we should have an array of current galleries
    logger.info('We have found ' +str(len(folderlist))+' folders')
    
    
    folderlist.sort()
    #print folderlist
    folderparse(folderlist)
Example #2
0
def thumbdbbuild():
    logger.info('Rebuilding Thumbnail Database')
    myDB = db.DBConnection()
    #myDB.action("CREATE TABLE IF NOT EXISTS thumbs (year text, title text primary key not null, path text)")
    myDB.action("DROP TABLE IF EXISTS thumbs")
    myDB.action("CREATE TABLE thumbs (year text, title text primary key not null, path text)")
    
    go = myDB.action("SELECT year, title FROM sets ORDER BY year DESC, title ASC")
    for row in go:
        year = row[0]
        title = row[1]
        if herp.ROOTDIR == None:
            herp.ROOTDIR = 'BB/'
        dpath = herp.ROOTDIR+row[0]+'/'+row[1]; print dpath        
        path = checkdir(herp.ROOTDIR+year+'/'+title)
        #print path
        myDB.action("INSERT  or IGNORE INTO thumbs (year, title, path) VALUES ('"+year+"','"+title+"','"+path+"')")
        
    go = myDB.action("SELECT year, title FROM oldsets ORDER BY year DESC, title ASC")
    for row in go:
        year = row[0]
        title = row[1]
        if herp.ROOTDIR == None:
            herp.ROOTDIR = 'BB/'
        dpath = herp.ROOTDIR+row[0]+'/'+row[1]; print dpath        
        path = checkdir(herp.ROOTDIR+year+'/'+title)
        #print path
        myDB.action("INSERT  or IGNORE INTO thumbs (year, title, path) VALUES ('"+year+"','"+title+"','"+path+"')")
Example #3
0
def check_setting_int(config, cfg_name, item_name, def_val):
    try:
        my_val = int(config[cfg_name][item_name])
    except:
        my_val = def_val
        logger.info('Error in Int Function CFG')
        try:
            config[cfg_name][item_name] = my_val
        except:
            config[cfg_name] = {}
            config[cfg_name][item_name] = my_val

    return my_val
Example #4
0
def bbparse(cat=0):
    init()
    #Start 5 worker threads for downloading#
    for i in range(5):
        t = ThreadUrl(queue)
        t.setDaemon(True)
        t.start()
    
    logger.info('Beginning Scrape Process')
    
    # This will run a gallery search on the member page, this pulls only the new galleries
    begin('http://members.latexlair.com/members.html')




    if cat==1:
        ## catparse works for the bulk category pages format is (url, debug), only new galleries
        catparse('http://members.latexlair.com/galleries-heavyrubber.html')
        catparse('http://members.latexlair.com/galleries-solo.html')
        catparse('http://members.latexlair.com/galleries-catsuits.html')
        catparse('http://members.latexlair.com/galleries-blonde.html')
        catparse('http://members.latexlair.com/galleries-events.html')
        catparse('http://members.latexlair.com/galleries-friends.html')


    # This parses searches added to the database, and pulls down photos
    doparse()

    # This compresses any finished sets to a solid CBZ file for easy cataloging and viewing
    if herp.CBZ_Compress == 1:
        docompress() # this searches the sets table, not the oldsets table.


    # Oldsets aren't compressed by this script, since cover download automation has not yet been implemented.

    #Check for incomplete sets, print them out
    out= myDB.action("SELECT COUNT(*) FROM sets WHERE status is not 'cbz' ORDER BY year DESC, title ASC").fetchone()
    if out[0] != 0:
        print '--The following are incomplete--'
        for row in myDB.action("SELECT * FROM sets WHERE status is not 'cbz' ORDER BY year DESC, title ASC"):
            #print row
            print "Status: " +row[3] +" Year: "+row[0] + " Title: " + row[1]
        
        print '--------------------------------'
        #Smart folder completionuses rulesets to define finished sets. - technically this could be used instead of the 5 folder counter but it feels a little too lazy to do that.
        smartfoldercompletion()
    fileutil.thumbdbbuild()
Example #5
0
def config_write():

    logger.info('Writing Config')
    new_config = ConfigObj()
    
    new_config.filename = CONFIG_FILE

    new_config['General'] = {}
    new_config['General']['http_port'] = HTTP_PORT
    new_config['General']['http_username'] = HTTP_USERNAME
    new_config['General']['http_password'] = HTTP_PASSWORD
    new_config['General']['site_username'] = USERNAME
    new_config['General']['site_password'] = PASSWORD
    new_config['General']['dldir'] = ROOTDIR
    new_config['General']['launch_browser'] = int(LAUNCH_BROWSER)
    new_config['General']['makecbz'] = int(CBZ_Compress)



	#Write Config
    new_config.write()
Example #6
0
def initialize():
	with INIT_LOCK:
		global USERNAME, PASSWORD, ROOTDIR, WEBUSER, WEBPASS, HTTP_PORT, HTTP_USERNAME, HTTP_PASSWORD,LAUNCH_BROWSER, CFG, __INITIALIZED__, DATA_DIR, CBZ_Compress
        #if __INITIALIZED__:
        #    return False
        CheckSection('General')
        # Set global variables based on config file or use defaults
        try:
            HTTP_PORT = check_setting_int(CFG, 'General', 'http_port', 8090)
        except:
            logger.info('Error Reverting to 8090')
            HTTP_PORT = 8090
        USERNAME = check_setting_str(CFG, 'General', 'site_username', '')
        PASSWORD = check_setting_str(CFG, 'General', 'site_password', '')
        HTTP_USERNAME = check_setting_str(CFG, 'General', 'http_username', '')
        HTTP_PASSWORD = check_setting_str(CFG, 'General', 'http_password', '')
        LAUNCH_BROWSER = bool(check_setting_int(CFG, 'General', 'launch_browser', 1))
        ROOTDIR = check_setting_str(CFG, 'General', 'dldir', 'BB/')
        LOG_DIR = check_setting_str(CFG, 'General', 'log_dir', '')
        CBZ_Compress = bool(check_setting_int(CFG, 'General', 'makecbz',0))



        if not LOG_DIR:
            LOG_DIR = os.path.join(DATA_DIR, 'logs')
        if not os.path.exists(LOG_DIR):
            try:
                os.makedirs(LOG_DIR)
            except OSError:
                if VERBOSE:
                     logger.info( 'Unable to create the log directory. Logging to screen only.')

        logger.lldl_log.initLogger(verbose=VERBOSE)
        setupdb()
       
       
       
       
        __INITIALIZED__ = True
        return True
Example #7
0
 def run(self):
     while True:
         #grabs host from queue
         job = self.queue.get()
         
         # first element in the list item is the url, second is the foldername, removed prefix for now.
         
         
         url = job[0]
         foldername = job[1]
         prefix= job[2]
         
         #Cannot call DL function w/o collision here, snagged the function code and dependencies
         password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
         top_level_url = "http://members.latexlair.com"
         password_mgr.add_password(None, top_level_url, herp.USERNAME, herp.PASSWORD)
         handler = urllib2.HTTPBasicAuthHandler(password_mgr)
         opener = urllib2.build_opener(handler)
         urllib2.install_opener(opener)
         
         fname=rootdownloadfolder+foldername+'/'+prefix+url.split('/')[-1]
         #Check if the file is there before overwriting it
         if not os.path.exists(fname):
             start = time.clock()
             webFile = opener.open(url)
             localFile = open(fname+'-temp', 'wb')
             localFile.write(webFile.read())
             webFile.close()
             localFile.close()
             os.rename(fname+'-temp', fname)
             end = time.clock()
             kilobytes = os.path.getsize(fname)/1024
             logger.info(indent(1)+url)
             logger.info( indent(2)+'Downloaded '+str(kilobytes) + 'KB in '+ str(end-start)+' seconds Rate:'+   str(kilobytes/(end-start))+'KBps')
         
         
         
         #signals to queue job is done
         self.queue.task_done()
Example #8
0
def shutdown(restart=False, update=False):
	#write Configuration
    cherrypy.engine.exit()
    config_write()
    SCHED.shutdown(wait=False)

    if not restart and not update:
         logger.info('Now Exiting')

    if restart:
        logger.info('lldl is restarting...')
        popen_list = [sys.executable, FULL_PATH]


        logger.info('Restarting lldl with ' + str(popen_list))
        subprocess.Popen(popen_list, cwd=os.getcwd())


    os._exit(0)
Example #9
0
def folderparse(folderlist):
    logger.info('Beginning Folder Parse')
    #loop through and add to our download handler
    for n, object in enumerate(folderlist):
        lefolder = folderlist[n]
        #print lefolder
        
        explodefolder = lefolder.split('/')
        year = explodefolder[4]
        currentalbum = explodefolder[5]
        #albumpart = explodefolder[6].replace('?folder=','')
        basepath = "http://members.latexlair.com/galleries/"+year+"/"+currentalbum+"/"

        
        out=myDB.action("SELECT COUNT(*) FROM sets WHERE title is '"+currentalbum+"'").fetchone()
        if out[0] != 0:
            logger.info(year+ ' '+currentalbum+' Exists in database, doing nothing')
        else:
            logger.info(year+ ' '+currentalbum+' Not yet in database, adding')
        
        #add set to database
        addset(year,currentalbum,basepath)
Example #10
0
def dowloadfolder(foldname, prefix=''):
    #open folder - just testing now, will convert to a loop later
    logger.info( 'Now looking in '+foldname)
    explodefolder = foldname.split('/')
    year = explodefolder[4]
    currentalbum = explodefolder[5]
    albumpart = explodefolder[6].replace('?folder=','')
    
    imagelist = [] #instantiate a new list
    
    #howto check if url is valid?
    currenthtml = opener.open(foldname).read()
    
    
    
    #get images from page####
    logger.info('Current Album: '+currentalbum+' Part: ' +albumpart+' Year: '+year)
    #print currenthtml
    splita= currenthtml.split('<a href=')
    for index, object in enumerate(splita):
        current=splita[index]
        splitb= current.split('\'');
        for i, object in enumerate(splitb):
            images=splitb[i]
            if 'thumbs' not in images:
                if 'jpg' in images:
                    imagelist.append(images)
    
    
    #now we have a list of pictures relative to our folder
    numimages = len(imagelist)
    
    logger.info( 'Found '+ str(numimages) +' images, in '+currentalbum)
    currentimagefolder = 'http://members.latexlair.com/galleries/'+year+'/'+currentalbum+'/'
    foldername = year+'/'+currentalbum+'/'
    ## Make sure the directory will exist
    ensure_dir(rootdownloadfolder+foldername)
    for i, object in enumerate(imagelist):
        downloadurl= currentimagefolder+imagelist[i]
        #perform Download
        #print downloadurl
        
        #download(downloadcdurl,foldername,prefix)
        
        job = [downloadurl,foldername,prefix]
        queue.put(job)
    queue.join()
	
    
    
    
    logger.info('Do Database update'+ albumpart + ' ' + str(numimages))
    if albumpart=='':
        logger.info( 'Null album - setting variable to 0')
        albumpart = '00'
    if numimages > 1:
        logger.info( 'We downloaded at least 2 photos, increment folder')
        updateset(currentalbum,albumpart)
Example #11
0
herp.DATA_DIR = os.path.dirname(os.path.abspath(__file__))
herp.LOG_DIR = os.path.join(herp.DATA_DIR, 'logs')




#init herp
herp.initialize()

webstart.initialize({
                    'http_port': herp.HTTP_PORT,
                    'http_username': herp.HTTP_USERNAME,
                    'http_password': herp.HTTP_PASSWORD
            })

logger.info('Starting LLDL on port: %i' % herp.HTTP_PORT)
logger.info('Initialization Complete')


if herp.LAUNCH_BROWSER == 1:
    herp.launch_browser('localhost',herp.HTTP_PORT,'')

herp.start()


while True:
    if not herp.SIGNAL:
        time.sleep(1)
    else:
        print 'Received signal: ' + herp.SIGNAL
        if herp.SIGNAL == 'shutdown':