Example #1
0
def run(argv):
    """This function is the core-function using all the other components to
       do the expected work.

       This includes parsing the commandline, reading "config.ini", parsing
       the input-file and actual downloading of all the files.

       Called by ```run.py``` in base-dir.

    """
    # Parse arguments
    arg_parser = CLI()
    input_path, verbose = arg_parser.parse(argv[1:])  # argv[0] = script-name

    # Read config-file
    output_path = read_config()

    # Create and use input-parser
    parser = InputParser(input_path, output_path, verbose)

    # Create downloader
    downloader = Downloader(output_path, verbose)

    # Use downloader
    for url, filename in parser.get_url_targetname_pairs():
        downloader.download(url, filename)
Example #2
0
class TestDownloader(unittest.TestCase):

    def setUp(self):
        self.d = Downloader()

    def test_download_contents_file(self):
        download_mock_fn = Mock()
        self.d._download = download_mock_fn
        res = self.d.download_contents_file('amd-foo')
        expected = 'Contents-amd-foo'
        expected_ext = expected + '.gz'
        self.assertEquals(res, expected)
        self.d._download.assert_called_with(expected_ext, expected)

    @patch('urllib.request.urlopen')
    @patch('gzip.decompress')
    @patch("__main__.open", new_callable=mock_open, read_data="data")
    def test_download(self, mock_urlopen, mock_gzip, mock_open_handler):
        a = Mock()
        a.read.return_value = bytes('foo','utf-8')
        mock_urlopen.return_value = a
        mock_gzip.return_value = bytes('foo', 'utf-8')
        fpath = '/tmp/baz'
        fname = fpath.split('/')[-1]
        self.d._download(fname, fpath)
        mock_open_handler.assert_called_once()
Example #3
0
async def main() -> None:
    new_e3 = NewE3()
    old_e3 = OldE3()

    with open("config.json", "r") as f:
        config = json.loads(f.read())

    username = config.get("studentId", "")
    old_e3_pwd = config.get("oldE3Password", "")
    new_e3_pwd = config.get("newE3Password", "")
    download_path = config.get("downloadPath", "e3")
    gdrive_enable = config.get("gdrive_enable", True)
    download_path = os.path.expanduser(download_path)

    if gdrive_enable:
        store = oauth_file.Storage("token.json")
        creds = store.get()
        if not creds or creds.invalid:
            flow = client.flow_from_clientsecrets("credentials.json", SCOPES)
            creds = tools.run_flow(flow, store)

    while True:
        if username == "":
            username = input("StudentID: ")
        if old_e3_pwd == "":
            old_e3_pwd = getpass("Old E3 Password: "******"", ""
        print("ID or Old E3 Password Error")

    while True:
        if new_e3_pwd == "":
            new_e3_pwd = getpass("New E3 Password: "******""
        print("New E3 Password Error")

    downloader = Downloader(download_path)
    async with stream.merge(new_e3.all_files(), old_e3.all_files()).stream() as files:
        async for file in files:
            downloader.add_file(file)
    modified_files = await downloader.done()

    if gdrive_enable:
        gdirve_client = GDrive(download_path)
        await gdirve_client.upload()

    print("")

    if modified_files:
        print("The below files are added or modified")
        modified_files.sort(key=lambda x: x.course_name)
        for modified_file in modified_files:
            print(f"{modified_file.course_name} - {modified_file.name}")
    else:
        print("No files are added or modified")
Example #4
0
    def test_nonexisting_download_valid_outdir(self):
        """Invalid Download-link; valid out-dir.

        """
        target_file = os.path.join(self.valid_out_dir, '1.file')

        downloader = Downloader(self.valid_out_dir)
        with self.assertRaises(DownloaderDownloadError):
            downloader.download(NON_EXISTING_DOWNLOAD_URL, target_file)
Example #5
0
    def test_existing_download_valid_outdir(self):
        """Valid Download-link; valid out-dir.

        """
        target_file = os.path.join(self.valid_out_dir, '1.file')

        downloader = Downloader(self.valid_out_dir)
        downloader.download(EXISTING_DOWNLOADL_URL, target_file)
        self.assertEqual(get_size(target_file), 35832)
Example #6
0
    def test_nonexisting_download_invalid_outdir(self):
        """Invalid Download-link; invalid out-dir.

           Directory invalid / non-existing with high-probability.

        """
        target_file = os.path.join(self.valid_out_dir + '1', '1.file')

        downloader = Downloader(self.valid_out_dir)
        with self.assertRaises(DownloaderDownloadError):
            downloader.download(NON_EXISTING_DOWNLOAD_URL, target_file)
Example #7
0
    def run(self, parser=2, downloader=2):
        self._logger.info('이미지 다운로드 작업 시작')
        start = time.time()

        # 멀티 프로세싱 처리를 위한 매니저
        with Manager() as manager:
            # 프로세스 목록
            processes = []

            # 공유 메모리 변수
            content_list = manager.list()
            image_list = manager.list()
            count = manager.Value('i', 0)
            lock = manager.Lock()
            feeder_running = manager.Value('i', 1)
            parser_running = manager.Value('i', 1)

            parser_logger = Logger('cybot_parser.log')
            downloader_logger = Logger('cybot_downloader.log')
            main_cookies = self._driver.get_cookies()
            cookie = []

            for c in main_cookies:
                cookie.append({'name': c['name'], 'value': c['value']})

            # 파서 프로세스 생성 및 시작
            for idx in range(parser):
                parser_instance = Parser(self._chromedriver, cookie,
                                         parser_logger, self._delay)
                parser_process = Process(target=parser_instance.parse, \
                    args=(content_list, image_list, feeder_running, parser_running)
                )
                parser_process.name = 'Parser::' + str(idx)
                parser_process.start()
                processes.append(parser_process)
                self._logger.info('Parser', str(idx), '프로세스 시작')

            # 다운로더 프로세스 생성 및 시작
            for idx in range(downloader):
                downloader_instance = Downloader(downloader_logger)
                downloader_process = Process(target=downloader_instance.downloader, \
                    args=(image_list, count, lock, parser_running))
                downloader_process.name = 'Downloader::' + str(idx)
                downloader_process.start()
                processes.append(downloader_process)
                self._logger.info('Downloader', str(idx), '프로세스 시작')

            # 피더 프로세스 시작
            self._logger.info('Feeder 시작')
            self.feeder(content_list, feeder_running)

            # 파서, 다운로더 프로세스가 종료되지않은 경우 대기
            for p in processes:
                p.join()

            self._logger.info('작업 소요시간: {}초'.format(
                round(time.time() - start, 2)))
            self._logger.info('전체 이미지 수: {}'.format(count.value))
Example #8
0
class App:
    __FILE_FORMAT = '.mp4'
    __TIMEOUT = config.BLOCKED_TIMEOUT


    def __init__(self, anime_url:str, download_path:str):
        self.__scraper = Scraper(anime_url)
        self.__downloader = Downloader(download_path)

    def download(self, episode:str) -> bool:
        while True:
            try:
                LOGGER.info(f'downloading episode {episode}')
                
                # acquire list of downloadable video urls
                videos = self.__scraper.get(episode)
                break
            except RequestBlocked:
                LOGGER.error(f'request blocked by anime heaven for episode {episode}, going to try again in {self.__TIMEOUT} seconds')
                time.sleep(self.__TIMEOUT)

        if not videos:
            LOGGER.error(f'url not found for episode {episode}')
            return False

        filename = self.__get_filename(episode)
        # NOTE: use first download url only
        todownload = videos[0]
        self.__downloader.download(filename, todownload)        

        LOGGER.info(f'downloaded episode {episode}')
        return True

    def get_downloads(self) -> dict:
        return self.__downloader.get_downloads()

    def __get_filename(self, episode:str) -> str:
        return f'Episode-{episode}{self.__FILE_FORMAT}'
Example #9
0
def get_pkg_stats(arch_type):
    # download contents file
    outfile_path = Downloader().download_contents_file(arch_type)
    # parse contents file and print top 10 files
    Parser().parse_contents_file(outfile_path)
Example #10
0
 def __init__(self):
     self._log = logging.getLogger("src.tagesschau")
     self.latest_download_file_handler = LatestDownloadFileHandler(config.LATEST_DOWNLOAD_FILE_PATH)
     self.downloader = Downloader()
Example #11
0
class Tagesschau(object):
    def __init__(self):
        self._log = logging.getLogger("src.tagesschau")
        self.latest_download_file_handler = LatestDownloadFileHandler(config.LATEST_DOWNLOAD_FILE_PATH)
        self.downloader = Downloader()

    def run(self):
        self.create_download_path_if_not_exists()

        self._log.debug("Fetch latest downloaded show and latest download date")
        self.fetch_latest_downloaded_show_and_date()
        self._log.debug("latest_show: %s - %s" % (self.latest_downloaded_show,
                                                  str(self.latest_date)))

        while self.latest_date <= date.today():
            if self.latest_downloaded_show == "tt":
                self.latest_date += timedelta(days=1)

            self._log.info("Fetch html from website for " + str(self.latest_date))
            url = VIDEO_ARCHIV_URL % self.latest_date.strftime("%Y%m%d")
            html = self.downloader.fetch_html(url)
            self._log.debug("Fetch the main content from html")
            main_content = self.fetch_main_content_from_html(html)
            self.download_shows_for_one_day(main_content)

    def create_download_path_if_not_exists(self):
        if not os.path.exists(config.DOWNLOAD_DIR):
            os.mkdir(config.DOWNLOAD_DIR)

    def fetch_latest_downloaded_show_and_date(self):
        file_content = self.latest_download_file_handler.fetch_content()
        self.latest_downloaded_show, latest_date = file_content.split("_")
        self.latest_date = datetime.strptime(latest_date, "%Y%m%d").date()

    def fetch_main_content_from_html(self, html):
        content = re.search("<!-- START -->(.+?)<!-- section -->", html, re.S)
        if not content:
            raise "Pattern did not match."
        return content.group(1)

    def download_shows_for_one_day(self, html):
        if self.latest_downloaded_show == "tt":
            self.download_tagesschau(html)
        if self.latest_downloaded_show == "ts":
            self.download_tagesthemen(html)

    def download_tagesschau(self, html):
        self._log.info("Download tagesschau published on " + str(self.latest_date))
        url_match = re.search(PATTERN_TAGESSCHAU_URL, html, re.S)
        if not url_match:
            raise("Pattern not found. Tagesschau maybe not yet published.")
        self.download_show(url_match.group(1))
        self.latest_downloaded_show = "ts"
        self.update_latest_downloaded_show_file()

    def download_tagesthemen(self, html):
        self._log.info("Download tagesthemen published on " + str(self.latest_date))
        url_match = re.search(PATTERN_TAGESTHEMEN_URL, html)
        if not url_match:
            raise("Pattern not found. Tagesthemen maybe not yet published.")
        self.download_show(url_match.group(1))
        self.latest_downloaded_show = "tt"
        self.update_latest_downloaded_show_file()

    def download_show(self, url):
        url = urllib.parse.urljoin(TAGESSCHAU_URL, url)
        filename = self.create_filename(url)
        self.downloader.download_show(url, filename)

    def create_filename(self, url):
        match = re.search("/((t[st])-\d+?)\.html", url)

        page_id, show = match.groups()
        self._log.debug("page_id: " + page_id)
        self._log.debug("show: " + show)

        filename = "%s_%s.mp4" % (self.latest_date.strftime("%Y_%m_%d"), show)
        return os.path.join(config.DOWNLOAD_DIR, filename)

    def update_latest_downloaded_show_file(self):
        latest_date = self.latest_date.strftime("%Y%m%d")
        new_content = "%s_%s" % (self.latest_downloaded_show, latest_date)
        self.latest_download_file_handler.update(new_content)
Example #12
0
                    level=os.getenv('LOG_LEVEL', logging.INFO))
logger = logging.getLogger(__name__)

config = Config(logger,
                os.getenv('BOT_TOKEN', Helper.list_get(sys.argv, 1, None)),
                os.getenv('BOT_ADMIN', Helper.list_get(sys.argv, 2, None)),
                os.getenv('BOT_DESTINATION',
                          Helper.list_get(sys.argv, 3, None)),
                os.getenv('BOT_PERSISTENCE',
                          Helper.list_get(sys.argv, 6, True)),
                os.getenv('BOT_QUALITY', Helper.list_get(sys.argv, 7, '320')),
                mpd_host=os.getenv('BOT_MPD_HOST',
                                   Helper.list_get(sys.argv, 4, False)),
                mpd_port=os.getenv('BOT_MPD_PORT',
                                   Helper.list_get(sys.argv, 5, False)))
downloader = Downloader(config)

updates = {}

button_list = [
    telegram.InlineKeyboardButton("▶ Play", callback_data='mpd_play'),
    telegram.InlineKeyboardButton("⏸ Pause", callback_data='mpd_pause'),
    telegram.InlineKeyboardButton("🗃 All Playlists",
                                  callback_data='mpd_lists'),
    telegram.InlineKeyboardButton("🔊 Up +5", callback_data='mpd_up'),
    telegram.InlineKeyboardButton("🔇 Mute", callback_data='mpd_mute'),
    telegram.InlineKeyboardButton("🔉 Down -5", callback_data='mpd_down')
]
reply_markup = telegram.InlineKeyboardMarkup(
    Helper.build_menu(button_list, n_cols=3))
Example #13
0
 def __init__(self, anime_url:str, download_path:str):
     self.__scraper = Scraper(anime_url)
     self.__downloader = Downloader(download_path)
Example #14
0
def get_dates_in_range(start_date: date, end_date: date) -> list:
    date_list = []
    delta = end_date - start_date
    for i in range(delta.days + 1):
        date_list.append(start_date + timedelta(days=i))
    return date_list


if __name__ == "__main__":
    work_dir = args.work_dir
    if not os.path.exists(work_dir):
        os.mkdir(work_dir)

    processing_dates = get_dates_in_range(args.start_date, args.end_date)

    if args.operation == 'download':
        hail_dir = os.path.join(work_dir, 'hail_reports')
        if not os.path.exists(hail_dir):
            os.mkdir(hail_dir)

        downloader = Downloader(processing_dates, hail_dir)
        downloader.download_hail_reports()

    if args.operation == 'preprocess':
        hail_dir = os.path.join(work_dir, 'hail_reports')
        if not os.path.exists(hail_dir):
            raise FileNotFoundError('Missing hail data')

        hail_preprocessor = HailReportPreprocessor(work_dir, hail_dir)
        hail_preprocessor.wrangle_data()
Example #15
0
 def setUp(self):
     self.d = Downloader()
Example #16
0
def main(argv):
    # Setup logging
    duallog.setup(Path(FLAGS.data_directory) / 'logs')
    logging.set_verbosity(
        FLAGS.logging_verbosity
    )  # Must be called after duallog.setup() to function properly

    # Configure GDAL
    gdal.SetCacheMax(8 * 1000000000)

    # Create absolute paths (either use full path provided as argument or use data dir in the project folder)
    data_dir = Path(FLAGS.data_directory) if os.path.isabs(
        FLAGS.data_directory) else Path.cwd() / FLAGS.data_directory

    # Ensure filename on geojson file
    geojson_path = FLAGS.geojson if FLAGS.geojson.endswith(
        '.geojson') else FLAGS.geojson + '.geojson'

    # If no order_id from previous order is provided, then download the data requested for this order
    order_id = FLAGS.order_id
    if order_id == 'Empty':
        order_id = 'order_' + datetime.datetime.today().strftime(
            '%Y%m%d-%H%M%S')

        logging.info("####################################")
        logging.info("# Initializing Sentinel downloader #")
        logging.info("####################################")
        logging.info("Order id: " + order_id)
        downloader = Downloader(username=FLAGS.username,
                                password=FLAGS.password,
                                satellite=FLAGS.satellite,
                                order_id=order_id,
                                directory=data_dir)

        # Load the geojson file (check whether the filename was included in the provided name)
        if 'denmark_without_bornholm' in str(geojson_path):
            # Load the default geojson (denmark_without_bornholm), which is included in the project code
            footprint = geojson_to_wkt(
                read_geojson(
                    Path('data') / 'geojson' /
                    'denmark_without_bornholm.geojson'))
        else:
            # Load the provided geojson file from the data directory
            footprint = geojson_to_wkt(
                read_geojson(data_dir / 'geojson' /
                             geojson_path))  # Load from data directory

        # Query the data (multiple footprints can be used, but it is recommended to stick to a single footprint)
        downloader.query(footprint, FLAGS.startdate, FLAGS.enddate)

        # Following code can be used if several geojson files are to be queried
        # footprint = geojson_to_wkt(read_geojson('data/geojson/bornholm.geojson'))
        # downloader.query(footprint, FLAGS.startdate, FLAGS.enddate)

        # Print the number of products and size of all products to be downloaded
        downloader.print_num_and_size_of_products()
        downloader.save_queried_products(
        )  # Save a geojson containing all products to be downloaded
        logging.info("")

        if FLAGS.download:
            logging.info("####################")
            logging.info("# Downloading data #")
            logging.info("####################")
            downloader.download_zipfiles()
            logging.info("")

    if FLAGS.process_tiles:
        # Load products to be processed (always load from file to ensure modularity for the downloader and processor)
        queried_products_path = (data_dir / 'orders' /
                                 order_id).with_suffix('.pkl')
        products_df = pd.read_pickle(queried_products_path)

        logging.info("###################")
        logging.info("# Processing data #")
        logging.info("###################")
        processpipeliner = ProcessPipeliner(products_df=products_df,
                                            directory=data_dir)
        processpipeliner.process_products()
Example #17
0
from src.twitter_image_getter import TwitterImageGetter
from src.downloader import Downloader
from src.uploader import Uploader

if __name__ == '__main__':
    tmp_dir = "tmp/"
    twitter_image_getter = TwitterImageGetter()
    twitter_image_getter.login()
    image_urls = twitter_image_getter.get_image_url_list()
    downloader = Downloader(tmp_dir, image_urls)
    downloader.run()
    uploader = Uploader(tmp_dir, image_urls)
    uploader.login()
    uploader.run()
Example #18
0
 def run(self) -> None:
     d = Downloader(self.novel)
     if self.mode == "singleVolume":
         logging.info(f"下载{self.novel.title} {self.kwargs['name']}中")
         d.singleVolume(self.kwargs["vid"], self.kwargs["name"])
         logging.info("下载成功!")
     elif self.mode == "cover":
         logging.info("下载封面中")
         d.cover()
         logging.info("下载成功!")
     elif self.mode == "allBook":
         logging.info(f"下载{self.novel.title}全本中")
         d.allBooks()
         logging.info("下载成功!")
     elif self.mode == "volumes":
         for i in self.novel.volumeList:
             for j in i["chapters"]:
                 if i["chapters"].index(j) == 0:
                     logging.info(f"下载{self.novel.title} {i['name']}中")
                     d.singleVolume(j['cid'] - 1, i["name"])
                     logging.info("下载成功!")
                 elif j["name"] == "插图":
                     logging.info(f"下载{self.novel.title}-{i['name']}插图中")
                     d.pictures(j['cid'], False, i["name"])
                     logging.info("下载成功!")
Example #19
0
def begin_download(download_link):
    d = Downloader(download_link)
    d.download()