예제 #1
0
def SaveCaptcha(url):
    ipath = opt['prjPath'] + '/tmp/'
    ext = GetFileExtFromURL(url)
    filename = id_generator() + ext
    g = Grab(connect_timeout=5, userpwd='user:pass', debug_post='True', log_dir='log', headers={'Accept-Language':    'ru,en;q=0.8'})
    try:
        g.download(url, ipath + filename)
        return ipath + filename
    except:
        return 'no image'
예제 #2
0
def SaveImage(url):
    ipath = opt['prjPath'] + '\img/'
    ext = GetFileExtFromURL(url)
    filename = id_generator() + ext
    g = Grab(connect_timeout=5, userpwd='user:pass', debug_post='True', log_dir='log', headers={'Accept-Language':    'ru,en;q=0.8'})
    try:
        g.download(url, ipath + filename)
        print filename + " saved"
        return opt['imgServerPath'] + filename
    except:
        return 'no image'
예제 #3
0
def SaveCaptcha(url):
    ipath = opt['prjPath'] + '/tmp/'
    ext = GetFileExtFromURL(url)
    filename = id_generator() + ext
    g = Grab(connect_timeout=5,
             userpwd='user:pass',
             debug_post='True',
             log_dir='log',
             headers={'Accept-Language': 'ru,en;q=0.8'})
    try:
        g.download(url, ipath + filename)
        return ipath + filename
    except:
        return 'no image'
예제 #4
0
def SaveImage(url):
    ipath = opt['prjPath'] + '\img/'
    ext = GetFileExtFromURL(url)
    filename = id_generator() + ext
    g = Grab(connect_timeout=5,
             userpwd='user:pass',
             debug_post='True',
             log_dir='log',
             headers={'Accept-Language': 'ru,en;q=0.8'})
    try:
        g.download(url, ipath + filename)
        print filename + " saved"
        return opt['imgServerPath'] + filename
    except:
        return 'no image'
예제 #5
0
파일: packtdl.py 프로젝트: mbirth/UnixTools
class PacktPub():
    def __init__(self):
        self.g = Grab()
        self.g.setup(follow_location=True)
        self.g.setup(follow_refresh=True)
        self.g.setup(timeout=120)
        self.g.setup(connect_timeout=10)
        #self.g.setup(body_maxsize=512000)
        self.logged_in = False

    def login(self, email, password):
        self.g.go('https://www.packtpub.com/')
        self.g.doc.save('/tmp/packtpub-home.html')
        self.g.doc.choose_form(id='packt-user-login-form')
        print("Logging in with account: {}".format(email))
        self.g.doc.set_input('email', email)
        self.g.doc.set_input('password', password)
        self.g.doc.submit()
        self.g.doc.save('/tmp/packpub-home-after-login.html')
        self.g.doc.text_assert('"sid":')
        self.logged_in = True

    def get_ebooks_list(self, url="https://www.packtpub.com/account/my-ebooks"):
        '''Loads the list of purchased ebooks and returns a Selection object with all books.'''
        if url.startswith("http") and not self.logged_in:
            raise LoggedOutException("Must be logged in before getting ebooks list!")
        self.g.go(url)
        self.g.doc.save('/tmp/packtpub-my-ebooks.html')
        self.g.doc.text_assert('<h1>My eBooks </h1>')
        all_books_xsel = self.g.doc.select('//div[@id="product-account-list"]/div[starts-with(@class, "product-line")][@title]')
        all_books = []
        for b in all_books_xsel:
            book_obj = PacktBook()
            book_obj.parse_from_xsel(b)
            all_books.append(book_obj)
        return all_books

    def download_book_all(self, book: PacktBook, destination_directory):
        '''Downloads all available files for given book to destination_directory/book_name.'''
        if not self.logged_in:
            raise LoggedOutException("Must be logged in before download!")
        base_name = book.get_safe_name()
        if not os.path.exists(destination_directory):
            os.makedirs(destination_directory, mode=0o775, exist_ok=True)
        print("Downloading PDF of {} from {}".format(base_name, book.dl_pdf))
        self.g.download(book.dl_pdf, destination_directory + "/" + base_name + ".pdf")
예제 #6
0
     desc = re.sub('<a>.+?</a>', '', desc)
     desc = re.sub('<[^>]*>', '', desc)
     desc = desc.decode('utf-8')
 else: desc = ''
 image =  'http://kampfer.ru' + ''.join(doc.xpath('//*[@id="img-current_picture"]/@src'))
 image_name = '%s.%s' %(number, image.split('.')[-1])
 category = ''.join(doc.xpath('//li[@class="child current" or @class=" current"]/a/text()')).strip()
 count_tabs = len(doc.xpath('//li[@class="child current" or @class=" current"]/a/img'))
 if count_tabs >= 2 and category[0].upper() in string.ascii_uppercase:
     try:
         category = doc.xpath('//li[@class="child current" or @class=" current"]/preceding-sibling::li/a[count(img)=%s]/text()' %(count_tabs-1))[-1].strip()
     except: 
         category = ''
         print '[ERROR] category'
         
 try: g.download(image, os.path.join('images',  image_name))
 except GrabNetworkError:
     print 'Fake download image'
 except IOError:
     print 'IOError'
 image_counter = count(1)
 image_number = image_counter.next()
 for extimageurl in doc.xpath('//div[@class="dopf"]//img/@src'):
     try: 
         g.download('http://kampfer.ru' + extimageurl, os.path.join('images',  '%s_%s.%s' %(number, image_number, extimageurl.split('.')[-1])))
         image_number = image_counter.next()
     except GrabNetworkError: continue
     except IOError: print 'IOError'
 ws0.write(rownum, 0, number)
 ws0.write(rownum, 1, number)
 ws0.write(rownum, 2, number)