Esempio n. 1
0
def update():
    """Gather new chapters from followed series."""
    pool = concurrent.futures.ThreadPoolExecutor(config.get().download_threads)
    futures = []
    warnings = []
    aliases = {}
    query = db.session.query(db.Series).filter_by(following=True).all()
    output.series('Updating {} series'.format(len(query)))
    for follow in query:
        fut = pool.submit(series_by_url, follow.url)
        futures.append(fut)
        aliases[fut] = follow.alias
    with click.progressbar(length=len(futures), show_pos=True,
                           fill_char='>', empty_char=' ') as bar:
        for future in concurrent.futures.as_completed(futures):
            try:
                series = future.result()
            except requests.exceptions.ConnectionError as e:
                warnings.append('Unable to update {} (connection error)'
                                .format(aliases[future]))
            except exceptions.ScrapingError:
                warnings.append('Unable to update {} (scraping error)'
                                .format(follow.alias))
            else:
                series.update()
            bar.update(1)
    for w in warnings:
        output.warning(w)
    list_new()
Esempio n. 2
0
File: db.py Progetto: kozec/cum
def test_database():
    """Runs a database sanity test."""
    sanity_tester = sanity.DatabaseSanity(Base, engine)
    sanity_tester.test()
    if sanity_tester.errors:
        for error in sanity_tester.errors:
            err_target, err_msg = str(error).split(' ', 1)
            message = ' '.join([click.style(err_target, bold=True), err_msg])
            output.warning(message)
        output.error('Database has failed sanity check; '
                     'run `cum repair-db` to repair database')
        exit(1)
Esempio n. 3
0
def test_database():
    """Runs a database sanity test."""
    sanity_tester = sanity.DatabaseSanity(Base, engine)
    sanity_tester.test()
    if sanity_tester.errors:
        for error in sanity_tester.errors:
            err_target, err_msg = str(error).split(' ', 1)
            message = ' '.join([click.style(err_target, bold=True), err_msg])
            output.warning(message)
        output.error('Database has failed sanity check; '
                     'run `cum repair-db` to repair database')
        exit(1)
Esempio n. 4
0
    def get(self, use_db=True):
        """Downloads the chapter if it is available.

        Optionally does not attempt to remove the chapter from the database or
        mark the chapter as downloaded if `db_remove` is set to False.
        """
        if self.available():
            self.download()
            if use_db:
                self.mark_downloaded()
        elif use_db:
            output.warning('Removing {} {}: missing from remote'
                           .format(self.name, self.chapter))
            self.db_remove()
Esempio n. 5
0
    def get(self, use_db=True):
        """Downloads the chapter if it is available.

        Optionally does not attempt to remove the chapter from the database or
        mark the chapter as downloaded if `db_remove` is set to False.
        """
        if self.available():
            self.download()
            if use_db:
                self.mark_downloaded()
        elif use_db:
            output.warning('Removing {} {}: missing from remote'
                           .format(self.name, self.chapter))
            self.db_remove()
Esempio n. 6
0
File: cum.py Progetto: theowhy/cum
def follow(urls, directory, download, ignore):
    """Follow a series."""
    chapters = []
    for url in urls:
        try:
            series = utility.series_by_url(url)
        except exceptions.ScrapingError:
            output.warning('Scraping error ({})'.format(url))
            continue
        except exceptions.LoginError as e:
            output.warning('{} ({})'.format(e.message, url))
            continue
        if not series:
            output.warning('Invalid URL "{}"'.format(url))
            continue
        series.directory = directory
        if ignore:
            series.follow(ignore=True)
            output.chapter('Ignoring {} chapters'.format(len(series.chapters)))
        else:
            series.follow()
            chapters += db.Chapter.find_new(alias=series.alias)

    if download:
        output.chapter('Downloading {} chapters'.format(len(chapters)))
        for chapter in chapters:
            try:
                chapter.get()
            except exceptions.LoginError as e:
                output.warning(
                    'Could not download {c.alias} {c.chapter}: {e}'.format(
                        c=chapter, e=e.message))
Esempio n. 7
0
def follow(urls, directory, download, ignore):
    """Follow a series."""
    chapters = []
    for url in urls:
        try:
            series = series_by_url(url)
        except exceptions.ScrapingError:
            output.warning('Scraping error ({})'.format(url))
            continue
        except exceptions.LoginError as e:
            output.warning('{} ({})'.format(e.message, url))
            continue
        if not series:
            output.warning('Invalid URL "{}"'.format(url))
            continue
        series.directory = directory
        if ignore:
            series.follow(ignore=True)
            output.chapter('Ignoring {} chapters'.format(len(series.chapters)))
        else:
            series.follow()
            chapters += db.Chapter.find_new(alias=series.alias)

    if download:
        output.chapter('Downloading {} chapters'.format(len(chapters)))
        for chapter in chapters:
            try:
                chapter.get()
            except exceptions.LoginError as e:
                output.warning('Could not download {c.alias} {c.chapter}: {e}'
                               .format(c=chapter, e=e.message))
                continue
Esempio n. 8
0
File: cum.py Progetto: Hamuko/cum
def download(aliases):
    """Download all available chapters.

    If an optional alias is specified, the command will only download new
    chapters for that alias.
    """
    chapters = []
    if not aliases:
        chapters = db.Chapter.find_new()
    for alias in aliases:
        chapters += db.Chapter.find_new(alias=alias)
    output.chapter('Downloading {} chapters'.format(len(chapters)))
    for chapter in chapters:
        try:
            chapter.get()
        except exceptions.LoginError as e:
            output.warning('Could not download {c.alias} {c.chapter}: {e}'
                           .format(c=chapter, e=e.message))
Esempio n. 9
0
File: cum.py Progetto: theowhy/cum
def update(fast):
    """Gather new chapters from followed series."""
    pool = concurrent.futures.ThreadPoolExecutor(config.get().download_threads)
    futures = []
    warnings = []
    aliases = {}
    query = db.session.query(db.Series).filter_by(following=True).all()
    if fast:
        skip_count = 0
        for series in query.copy():
            if not series.needs_update:
                skip_count += 1
                query.remove(series)
        output.series('Updating {} series ({} skipped)'.format(
            len(query), skip_count))
    else:
        output.series('Updating {} series'.format(len(query)))
    for follow in query:
        fut = pool.submit(utility.series_by_url, follow.url)
        futures.append(fut)
        aliases[fut] = follow.alias
    with click.progressbar(length=len(futures),
                           show_pos=True,
                           fill_char='>',
                           empty_char=' ') as bar:
        for future in concurrent.futures.as_completed(futures):
            try:
                series = future.result()
            except exceptions.ConnectionError:
                warnings.append(
                    'Unable to update {} (connection error)'.format(
                        aliases[future]))
            except exceptions.ScrapingError:
                warnings.append('Unable to update {} (scraping error)'.format(
                    aliases[future]))
            except exceptions.LoginError as e:
                warnings.append('Unable to update {} ({})'.format(
                    aliases[future], e.message))
            else:
                series.update()
            bar.update(1)
    for w in warnings:
        output.warning(w)
    utility.list_new()
Esempio n. 10
0
File: cum.py Progetto: theowhy/cum
def download(aliases):
    """Download all available chapters.

    If an optional alias is specified, the command will only download new
    chapters for that alias.
    """
    chapters = []
    if not aliases:
        chapters = db.Chapter.find_new()
    for alias in aliases:
        chapters += db.Chapter.find_new(alias=alias)
    output.chapter('Downloading {} chapters'.format(len(chapters)))
    for chapter in chapters:
        try:
            chapter.get()
        except exceptions.LoginError as e:
            output.warning(
                'Could not download {c.alias} {c.chapter}: {e}'.format(
                    c=chapter, e=e.message))
Esempio n. 11
0
    def follow(self, ignore=False):
        """Adds the series details to database and all current chapters."""
        output.series('Adding follow for {s.name} ({s.alias})'.format(s=self))

        try:
            s = db.session.query(db.Series).filter_by(url=self.url).one()
        except NoResultFound:
            s = db.Series(self)
            db.session.add(s)
            db.session.commit()
        else:
            if s.following:
                output.warning('You are already following this series')
            else:
                s.directory = self.directory
                s.following = True
                db.session.commit()

        for chapter in self.chapters:
            chapter.save(s, ignore=ignore)
Esempio n. 12
0
File: cum.py Progetto: Hamuko/cum
def update(fast):
    """Gather new chapters from followed series."""
    pool = concurrent.futures.ThreadPoolExecutor(config.get().download_threads)
    futures = []
    warnings = []
    aliases = {}
    query = db.session.query(db.Series).filter_by(following=True).all()
    if fast:
        skip_count = 0
        for series in query.copy():
            if not series.needs_update:
                skip_count += 1
                query.remove(series)
        output.series('Updating {} series ({} skipped)'
                      .format(len(query), skip_count))
    else:
        output.series('Updating {} series'.format(len(query)))
    for follow in query:
        fut = pool.submit(utility.series_by_url, follow.url)
        futures.append(fut)
        aliases[fut] = follow.alias
    with click.progressbar(length=len(futures), show_pos=True,
                           fill_char='>', empty_char=' ') as bar:
        for future in concurrent.futures.as_completed(futures):
            try:
                series = future.result()
            except exceptions.ConnectionError:
                warnings.append('Unable to update {} (connection error)'
                                .format(aliases[future]))
            except exceptions.ScrapingError:
                warnings.append('Unable to update {} (scraping error)'
                                .format(aliases[future]))
            except exceptions.LoginError as e:
                warnings.append('Unable to update {} ({})'
                                .format(aliases[future], e.message))
            else:
                series.update()
            bar.update(1)
    for w in warnings:
        output.warning(w)
    utility.list_new()
Esempio n. 13
0
    def follow(self, ignore=False):
        """Adds the series details to database and all current chapters."""

        try:
            s = db.session.query(db.Series).filter_by(url=self.url).one()
        except NoResultFound:
            s = db.Series(self)
            s.check_alias_uniqueness()
            output.series('Adding follow for {s.name} ({s.alias})'.format(s=s))
            db.session.add(s)
            db.session.commit()
        else:
            if s.following:
                output.warning('You are already following {s.name} ({s.alias})'
                               .format(s=s))
            else:
                s.directory = self.directory
                s.following = True
                db.session.commit()

        for chapter in self.chapters:
            chapter.save(s, ignore=ignore)
Esempio n. 14
0
def get(input, directory):
    """Download chapters by URL or by alias:chapter.

    The command accepts input as either the chapter of the URL or the
    alias:chapter combination (e.g. 'bakuon:11'), if the chapter is already
    found in the database through a follow. The command will not enter the
    downloads in the database in case of URLs and ignores downloaded status
    in case of alias:chapter, so it can be used to download one-shots that
    don't require follows or for redownloading already downloaded chapters.
    """
    chapter_list = []
    for i in input:
        try:
            series = series_by_url(i)
        except exceptions.ScrapingError:
            output.warning('Scraping error ({})'.format(i))
            continue
        if series:
            chapter_list += series.chapters
        try:
            chapter = chapter_by_url(i)
        except exceptions.ScrapingError:
            output.warning('Scraping error ({})'.format(i))
            continue
        if chapter:
            chapter_list.append(chapter)
        if not series or chapter:
            try:
                a, c = i.split(':')
            except ValueError:
                output.warning('Invalid selection "{}"'.format(i))
            else:
                chapters = (db.session.query(db.Chapter)
                            .join(db.Series)
                            .filter(db.Series.alias == a,
                                    db.Chapter.chapter == c)
                            .all())
                for chapter in chapters:
                    chapter_list.append(chapter.to_object())
    for chapter in chapter_list:
        chapter.directory = directory
        chapter.get(use_db=False)
Esempio n. 15
0
 def download(self):
     if getattr(self, 'r', None):
         r = self.r
     else:
         r = self.reader_get(1)
     soup = BeautifulSoup(r.text, config.get().html_parser)
     if soup.find('a', href='#{}_1_t'.format(self.batoto_hash)):
         # The chapter uses webtoon layout, meaning all of the images are on
         # the same page.
         pages = [''.join(i) for i in re.findall(self.img_path_re, r.text)]
     else:
         pages = [x.get('value') for x in
                  soup.find('select', id='page_select').find_all('option')]
         if not pages:
             output.warning('Skipping {s.name} {s.chapter}: '
                            'chapter has no pages'
                            .format(s=self))
             return
         # Replace the first URL with the first image URL to avoid scraping
         # the first page twice.
         pages[0] = soup.find('img', id='comic_page').get('src')
         next_match = re.search(self.next_img_path_re, r.text)
         if next_match:
             pages[1] = next_match.group(1)
     files = [None] * len(pages)
     futures = []
     last_image = None
     with self.progress_bar(pages) as bar:
         for i, page in enumerate(pages):
             try:
                 if guess_type(page)[0]:
                     image = page
                 else:   # Predict the next URL based on the last URL
                     for reg in [self.img_path_re, self.ch_img_path_re]:
                         m = re.match(reg, last_image)
                         if m:
                             break
                     else:
                         raise ValueError
                     mg = list(m.groups())
                     num_digits = len(mg[1])
                     mg[1] = "{0:0>{digits}}".format(int(mg[1]) + 1,
                                                     digits=num_digits)
                     image = "".join(mg)
                 r = requests.get(image, stream=True)
                 if r.status_code == 404:
                     r.close()
                     raise ValueError
             except ValueError:  # If we fail to do prediction, scrape
                 r = self.reader_get(i + 1)
                 soup = BeautifulSoup(r.text, config.get().html_parser)
                 image = soup.find('img', id='comic_page').get('src')
                 image2_match = re.search(self.next_img_path_re, r.text)
                 if image2_match:
                     pages[i + 1] = image2_match.group(1)
                 r = requests.get(image, stream=True)
             fut = download_pool.submit(self.page_download_task, i, r)
             fut.add_done_callback(partial(self.page_download_finish,
                                           bar, files))
             futures.append(fut)
             last_image = image
         concurrent.futures.wait(futures)
         self.create_zip(files)
Esempio n. 16
0
File: cum.py Progetto: Hamuko/cum
def get(input, directory):
    """Download chapters by URL or by alias:chapter.

    The command accepts input as either the chapter of the URL, the alias of a
    followed series, or the alias:chapter combination (e.g. 'bakuon:11'), if
    the chapter is already found in the database through a follow. The command
    will not enter the downloads in the database in case of URLs and ignores
    downloaded status in case of alias:chapter, so it can be used to download
    one-shots that don't require follows or for redownloading already
    downloaded chapters.
    """
    chapter_list = []
    for item in input:
        try:
            series = utility.series_by_url(item)
        except exceptions.ScrapingError:
            output.warning('Scraping error ({})'.format(item))
            continue
        except exceptions.LoginError as e:
            output.warning('{} ({})'.format(e.message, item))
            continue
        if series:
            chapter_list += series.chapters
        try:
            chapter = utility.chapter_by_url(item)
        except exceptions.ScrapingError:
            output.warning('Scraping error ({})'.format(item))
            continue
        except exceptions.LoginError as e:
            output.warning('{} ({})'.format(e.message, item))
            continue
        if chapter:
            chapter_list.append(chapter)
        if not series or chapter:
            chapters = db.session.query(db.Chapter).join(db.Series)
            try:
                alias, chapter = item.split(':')
                chapters = chapters.filter(db.Series.alias == alias,
                                           db.Chapter.chapter == chapter)
            except ValueError:
                chapters = chapters.filter(db.Series.alias == item)
            chapters = chapters.all()
            if not chapters:
                output.warning('Invalid selection "{}"'.format(item))
            for chapter in chapters:
                chapter_list.append(chapter.to_object())
    for chapter in chapter_list:
        chapter.directory = directory
        try:
            chapter.get(use_db=False)
        except exceptions.LoginError as e:
            output.warning('Could not download {c.alias} {c.chapter}: {e}'
                           .format(c=chapter, e=e.message))
Esempio n. 17
0
File: cum.py Progetto: theowhy/cum
def get(input, directory):
    """Download chapters by URL or by alias:chapter.

    The command accepts input as either the chapter of the URL, the alias of a
    followed series, or the alias:chapter combination (e.g. 'bakuon:11'), if
    the chapter is already found in the database through a follow. The command
    will not enter the downloads in the database in case of URLs and ignores
    downloaded status in case of alias:chapter, so it can be used to download
    one-shots that don't require follows or for redownloading already
    downloaded chapters.
    """
    chapter_list = []
    for item in input:
        try:
            series = utility.series_by_url(item)
        except exceptions.ScrapingError:
            output.warning('Scraping error ({})'.format(item))
            continue
        except exceptions.LoginError as e:
            output.warning('{} ({})'.format(e.message, item))
            continue
        if series:
            chapter_list += series.chapters
        try:
            chapter = utility.chapter_by_url(item)
        except exceptions.ScrapingError:
            output.warning('Scraping error ({})'.format(item))
            continue
        except exceptions.LoginError as e:
            output.warning('{} ({})'.format(e.message, item))
            continue
        if chapter:
            chapter_list.append(chapter)
        if not (series or chapter):
            chapters = db.session.query(db.Chapter).join(db.Series)
            try:
                alias, chapter = item.split(':')
                chapters = chapters.filter(db.Series.alias == alias,
                                           db.Chapter.chapter == chapter)
            except ValueError:
                chapters = chapters.filter(db.Series.alias == item)
            chapters = chapters.all()
            if not chapters:
                output.warning('Invalid selection "{}"'.format(item))
            for chapter in chapters:
                chapter_list.append(chapter.to_object())
    for chapter in chapter_list:
        chapter.directory = directory
        try:
            chapter.get(use_db=False)
        except exceptions.LoginError as e:
            output.warning(
                'Could not download {c.alias} {c.chapter}: {e}'.format(
                    c=chapter, e=e.message))
Esempio n. 18
0
File: batoto.py Progetto: Hamuko/cum
 def download(self):
     if getattr(self, 'r', None):
         r = self.r
     else:
         r = self.reader_get(1)
     soup = BeautifulSoup(r.text, config.get().html_parser)
     if soup.find('a', href='#{}_1_t'.format(self.batoto_hash)):
         # The chapter uses webtoon layout, meaning all of the images are on
         # the same page.
         pages = [''.join(i) for i in re.findall(self.img_path_re, r.text)]
     else:
         pages = [x.get('value') for x in
                  soup.find('select', id='page_select').find_all('option')]
         if not pages:
             output.warning('Skipping {s.name} {s.chapter}: '
                            'chapter has no pages'
                            .format(s=self))
             return
         # Replace the first URL with the first image URL to avoid scraping
         # the first page twice.
         pages[0] = soup.find('img', id='comic_page').get('src')
         next_match = re.search(self.next_img_path_re, r.text)
         if next_match:
             pages[1] = next_match.group(1)
     files = [None] * len(pages)
     futures = []
     last_image = None
     with self.progress_bar(pages) as bar:
         for i, page in enumerate(pages):
             try:
                 if guess_type(page)[0]:
                     image = page
                 else:   # Predict the next URL based on the last URL
                     for reg in [self.img_path_re, self.ch_img_path_re]:
                         m = re.match(reg, last_image)
                         if m:
                             break
                     else:
                         raise ValueError
                     mg = list(m.groups())
                     num_digits = len(mg[1])
                     mg[1] = "{0:0>{digits}}".format(int(mg[1]) + 1,
                                                     digits=num_digits)
                     image = "".join(mg)
                 r = requests.get(image, stream=True)
                 if r.status_code == 404:
                     r.close()
                     raise ValueError
             except ValueError:  # If we fail to do prediction, scrape
                 r = self.reader_get(i + 1)
                 soup = BeautifulSoup(r.text, config.get().html_parser)
                 image = soup.find('img', id='comic_page').get('src')
                 image2_match = re.search(self.next_img_path_re, r.text)
                 if image2_match:
                     pages[i + 1] = image2_match.group(1)
                 r = requests.get(image, stream=True)
             fut = download_pool.submit(self.page_download_task, i, r)
             fut.add_done_callback(partial(self.page_download_finish,
                                           bar, files))
             futures.append(fut)
             last_image = image
         concurrent.futures.wait(futures)
         self.create_zip(files)