Example #1
0
    def init_album(self):
        #album json
        js = self.handler.read_link(url_album % self.album_id).json()['album']
        #name
        self.album_name = util.decode_html(js['name'])
        #album logo
        self.logo = js['picUrl']
        # artist_name
        self.artist_name = js['artists'][0]['name']
        #handle songs
        for jsong in js['songs']:
            song = NeteaseSong(self.handler, song_json=jsong)
            song.group_dir = self.artist_name + u'_' + self.album_name
            song.group_dir = song.group_dir.replace('/', '_')
            song.post_set()
            self.songs.append(song)

        d = path.dirname(self.songs[-1].abs_path)
        #creating the dir
        LOG.debug(msg.head_163 + msg.fmt_create_album_dir % d)
        util.create_dir(d)

        #download album logo images
        LOG.debug(msg.head_163 + msg.fmt_dl_album_cover % self.album_name)
        downloader.download_url(self.logo, path.join(d,'cover.' +self.logo.split('.')[-1]))
Example #2
0
    def init_album(self):
        #album json
        js = self.handler.read_link(url_album % self.album_id).json()['album']
        #name
        self.album_name = util.decode_html(js['name'])
        #album logo
        self.logo = js['picUrl']
        # artist_name
        self.artist_name = js['artists'][0]['name']
        #handle songs
        for jsong in js['songs']:
            song = NeteaseSong(self.handler, song_json=jsong)
            song.group_dir = self.artist_name + u'_' + self.album_name
            song.group_dir = song.group_dir.replace('/', '_')
            song.post_set()
            self.songs.append(song)

        d = path.dirname(self.songs[-1].abs_path)
        #creating the dir
        LOG.debug(msg.head_163 + msg.fmt_create_album_dir % d)
        util.create_dir(d)

        #download album logo images
        LOG.debug(msg.head_163 + msg.fmt_dl_album_cover % self.album_name)
        downloader.download_url(
            self.logo, path.join(d, 'cover.' + self.logo.split('.')[-1]))
Example #3
0
    def init_album(self):
        resp_json = self.handler.read_link(url_album % self.album_id).json()
        j = resp_json['data']['trackList']

        if not j:
            LOG.error(resp_json['message'])
            return
        #description
        html = self.handler.read_link(self.url).text
        soup = BeautifulSoup(html, 'html.parser')
        if soup.find('meta', property="og:title"):
            self.album_desc = soup.find('span', property="v:summary").text
            # name
            self.album_name = soup.find('meta', property="og:title")['content']
            # album logo
            self.logo = soup.find('meta', property="og:image")['content']
            # artist_name
            self.artist_name = soup.find('meta',
                                         property="og:music:artist")['content']
        else:
            aSong = j[0]
            self.album_name = aSong['album_name']
            self.logo = aSong['album_pic']
            self.artist_name = aSong['artistVOs'][0]['artistName']
            self.album_desc = None

        #handle songs
        for jsong in j:
            song = XiamiSong(self.handler, song_json=jsong)
            song.song_name = jsong['name']  # name or songName
            song.group_dir = self.artist_name + u'_' + self.album_name
            song.group_dir = song.group_dir.replace('/', '_')
            song.post_set()
            self.songs.append(song)

        d = path.dirname(self.songs[-1].abs_path)
        #creating the dir
        LOG.debug(msg.head_xm + msg.fmt_create_album_dir % d)
        util.create_dir(d)

        #download album logo images
        LOG.debug(msg.head_xm + msg.fmt_dl_album_cover % self.album_name)
        if self.logo:
            self.logo = self.handler.add_http_prefix(self.logo)
            downloader.download_url(
                self.logo, path.join(d, 'cover.' + self.logo.split('.')[-1]))

        LOG.debug(msg.head_xm + msg.fmt_save_album_desc % self.album_name)
        if self.album_desc:
            self.album_desc = re.sub(r'<\s*[bB][rR]\s*/>', '\n',
                                     self.album_desc)
            self.album_desc = re.sub(r'<.*?>', '', self.album_desc)
            self.album_desc = util.decode_html(self.album_desc)
            import codecs
            with codecs.open(path.join(d, 'album_description.txt'), 'w',
                             'utf-8') as f:
                f.write(self.album_desc)
Example #4
0
    def init_album(self):
        resp_json = self.handler.read_link(url_album % self.album_id).json()
        j = resp_json['data']['trackList']

        if not j :
            LOG.error(resp_json['message'])
            return
        #description
        html = self.handler.read_link(self.url).text
        soup = BeautifulSoup(html,'html.parser')
        if  soup.find('meta', property="og:title"):
            self.album_desc = soup.find('span', property="v:summary").text
            # name
            self.album_name = soup.find('meta', property="og:title")['content']
            # album logo
            self.logo = soup.find('meta', property="og:image")['content']
            # artist_name
            self.artist_name = soup.find('meta', property="og:music:artist")['content']
        else:
            aSong = j[0]
            self.album_name = aSong['album_name']
            self.logo = aSong['album_pic']
            self.artist_name = aSong['artistVOs'][0]['artistName']
            self.album_desc = None

        #handle songs
        for jsong in j:
            song = XiamiSong(self.handler, song_json=jsong)
            song.song_name = jsong['name']  # name or songName
            song.group_dir = self.artist_name + u'_' + self.album_name
            song.group_dir = song.group_dir.replace('/','_')
            song.post_set()
            self.songs.append(song)

        d = path.dirname(self.songs[-1].abs_path)
        #creating the dir
        LOG.debug(msg.head_xm + msg.fmt_create_album_dir % d)
        util.create_dir(d)

        #download album logo images
        LOG.debug(msg.head_xm + msg.fmt_dl_album_cover % self.album_name)
        if self.logo:
            self.logo = self.handler.add_http_prefix(self.logo)
            downloader.download_url(self.logo, path.join(d,'cover.' +self.logo.split('.')[-1]))

        LOG.debug(msg.head_xm + msg.fmt_save_album_desc % self.album_name)
        if self.album_desc:
            self.album_desc = re.sub(r'<\s*[bB][rR]\s*/>','\n',self.album_desc)
            self.album_desc = re.sub(r'<.*?>','',self.album_desc)
            self.album_desc = util.decode_html(self.album_desc)
            import codecs
            with codecs.open(path.join(d,'album_description.txt'), 'w', 'utf-8') as f:
                f.write(self.album_desc)
Example #5
0
def download_handler(event, context):
    config = _get_configuration()
    wowc = wow.WoWCommunityAPIClient(config['wow_client_id'],
                                     config['wow_client_secret'],
                                     endpoint=config['wow_api_endpoint'])
    for batch in wowc.get_auction_data_status(config['wow_realm'],
                                              config['wow_locale']):
        s3key = _keyname_from_datetime(config, batch.last_modified)
        downloader.download_url(batch.url,
                                s3key,
                                s3bucket=config['s3_bucket_name'],
                                s3region=config['aws_region'])
    return
Example #6
0
    def download_grib(self, url_base, rel_path, max_retries=3):
        """
        Download a GRIB file from a GRIB service and stream to <rel_path> in ingest_dir.

        :param url_base: the base URL part of the GRIB service
        :param rel_path: the relative path of the file (w.r.t GRIB base url and w.r.t self.ingest_dir)
        :param max_retries: how many times we may retry to download the file
        """
        url = url_base + '/' + rel_path
        grib_path = osp.join(self.ingest_dir, rel_path)
        try:
            download_url(url, grib_path, max_retries)
        except DownloadError as e:
            raise GribError('GribSource: failed to download file %s' % url)
Example #7
0
    def download_hdf(self, url_base, rel_path, max_retries=3):
        """
        Download an HDF file from an HDF service and stream to <rel_path> in ingest_dir.

        :param url_base: the base URL part of the HDF service
        :param rel_path: the relative path of the file
        :param max_retries: how many times we may retry to download the file
        """
        url = url_base + '/' + rel_path
        hdf_path = osp.join(self.ingest_dir, rel_path)
        try:
            download_url(url, hdf_path, max_retries)
        except DownloadError as e:
            raise HdfError('HDFSource: failed to download file %s' % url)
Example #8
0
def download_file(ingest_dir, url, rel_path, max_retries=3):
    """
    Download a file and stream to <rel_path> in ingest_dir.

    :param url_base: the base URL where the file is hosted
    :param rel_path: the relative path of the file
    :param max_retries: how many times we may retry to download the file
    """
    # logging.info("Downloading %s from %s" % (rel_path, url))
    path = osp.join(ingest_dir, rel_path)
    try:
        download_url(url, path, max_retries)
    except DownloadError as e:
        raise data_sourceError('data_source: failed to download file %s' % url)
Example #9
0
    def download_grib(self, url_base, rel_path, max_retries=3):
        """
        Download a GRIB file from a GRIB service and stream to <rel_path> in ingest_dir.

        :param url_base: the base URL part of the GRIB service
        :param rel_path: the relative path of the file (w.r.t GRIB base url and w.r.t self.ingest_dir)
        :param max_retries: how many times we may retry to download the file
        """
        url = url_base + '/' + rel_path
        grib_path = osp.join(self.ingest_dir, rel_path)
        try:
            download_url(url, grib_path, max_retries)
        except DownloadError as e:
            raise GribError('GribSource: failed to download file %s' % url)
Example #10
0
    def retrieve_rtma(self, cycle):
        """
        Attempts to retrieve the variables passed in during initialization.
        Any files already downloaded are not modified.
        Returns a list of variables that have not been downloaded.

        :param cycle: the cycle (UTC) for which to retrieve the RTMA
        :return: tuple with list of all variables that are not ready yet
                 and dictonary with path to stored files
        """
        ts = cycle.replace(minute=0, second=0, microsecond=0)
        logging.info('RTMA retrieving variables %s for cycle %s.' % (self.var_list, str(ts)))

        vars_paths = map(lambda x: (x, self._local_var_path(ts, x)), self.var_list)
        ready = dict(filter(lambda x: self._is_var_cached(x[1]), vars_paths))
        nonlocals = filter(lambda x: not self._is_var_cached(x[1]), vars_paths)
        if nonlocals:
            nl_vars = [x[0] for x in nonlocals]
            logging.info('RTMA variables %s are not available locally, trying to download.' % nl_vars)

        not_ready = []
        for var, local_path in nonlocals:
            var_ready = False
            for i in range(0, max_retries):
                try:
                    if self._is_var_ready(ts, var):
                        download_url(self._remote_var_url(cycle.hour, var), local_path)
                        num=grib_messages(local_path,print_messages=True,max_messages=9999)
                        logging.info('file %s contains %s message(s)' % (local_path, num))
                        if num == 0:
                            raise ValueError
                        var_ready = True
                        break
                except Exception as e:
                    logging.error(str(e))
                time.sleep(sleep_seconds)
            if var_ready:
                ready[var] = local_path
            else:
                not_ready.append(var)

        if not_ready:
            logging.info('RTMA the variables %s for hour %d are not ready.' % (not_ready, cycle.hour))
            # unless a file was downloaded, it makes no sense to check the server immediately again
        else:
            # if all files are available, return
            logging.info('RTMA success obtaining variables %s for hour %d.' % (self.var_list, cycle.hour))

        return not_ready, ready
Example #11
0
    def download_file(self, url_base, rel_path, max_retries=3):
        """
        Download a file and stream to <rel_path> in ingest_dir.

        :param url_base: the base URL where the file is hosted
        :param rel_path: the relative path of the file
        :param max_retries: how many times we may retry to download the file
        """
        url = url_base + '/' + rel_path
        path = osp.join(self.ingest_dir, rel_path)
        try:
            # print 'downloading', url
            download_url(url, path, max_retries)
            # print 'done'
        except DownloadError as e:
            raise data_sourceError('data_source: failed to download file %s' %
                                   url)
Example #12
0
    def download_grib(self, url_base, rel_path):
        """
        Download a GRIB file from a GRIB service and stream to <rel_path> in ingest_dir.

        :param url_base: the base URL part of the GRIB service
        :param rel_path: the relative path of the file (w.r.t GRIB base url and w.r.t self.ingest_dir)
        :param max_retries: how many times we may retry to download the file
        """
        url = url_base + '/' + rel_path
        logging.info('downloading %s grib from %s' % (self.id, url))
        grib_path = osp.join(self.ingest_dir, rel_path)
        try:
            download_url(url, grib_path)
        except DownloadError as e:
            logging.error('%s cannot download grib file %s' % (self.id, url))
            logging.warning('Pleae check %s for %s' %
                            (self.info_url, self.info))
            raise GribError('GribSource: failed to download file %s' % url)
Example #13
0
    def init_album(self):
        j = self.handler.read_link(url_album %
                                   self.album_id).json()['data']['trackList']
        j_first_song = j[0]
        #name
        self.album_name = util.decode_html(j_first_song['album_name'])
        #album logo
        self.logo = j_first_song['album_pic']
        # artist_name
        self.artist_name = j_first_song['artist']

        #description
        html = self.handler.read_link(self.url).text
        soup = BeautifulSoup(html, 'html.parser')
        self.album_desc = soup.find('span', property="v:summary").text

        #handle songs
        for jsong in j:
            song = XiamiSong(self.handler, song_json=jsong)
            song.group_dir = self.artist_name + u'_' + self.album_name
            song.post_set()
            self.songs.append(song)

        d = path.dirname(self.songs[-1].abs_path)
        #creating the dir
        LOG.debug(msg.head_xm + msg.fmt_create_album_dir % d)
        util.create_dir(d)

        #download album logo images
        LOG.debug(msg.head_xm + msg.fmt_dl_album_cover % self.album_name)
        downloader.download_url(
            self.logo, path.join(d, 'cover.' + self.logo.split('.')[-1]))

        LOG.debug(msg.head_xm + msg.fmt_save_album_desc % self.album_name)
        if self.album_desc:
            self.album_desc = re.sub(r'&lt;\s*[bB][rR]\s*/&gt;', '\n',
                                     self.album_desc)
            self.album_desc = re.sub(r'&lt;.*?&gt;', '', self.album_desc)
            self.album_desc = util.decode_html(self.album_desc)
            import codecs
            with codecs.open(path.join(d, 'album_description.txt'), 'w',
                             'utf-8') as f:
                f.write(self.album_desc)
Example #14
0
def getYarnMappings(version: str):
    DOWNLOAD_LINK = f"https://github.com/FabricMC/yarn/archive/{version}.zip"
    ZIP_PATH = f"stich_yarn_tmp_{version}.zip"

    if os.path.exists(f"yarn-{version}"):
        print("Found existing mappings, will use those")
    else:
        print("Could not find existing mappings, this might take a minute")
        print(f"Getting {DOWNLOAD_LINK}")
        downloader.download_url(DOWNLOAD_LINK, ZIP_PATH)

        print("Extracting mappings from zip (Could take upwards of 2 minutes)")
        archive = ZipFile(ZIP_PATH, 'r')
        for zippedFile in archive.namelist():
            if zippedFile.startswith(f"yarn-{version}/mappings/"):
                archive.extract(zippedFile)
        archive.close()

        print("Cleaning up zip")
        os.remove(ZIP_PATH)
Example #15
0
    def init_album(self):
        j = self.handler.read_link(url_album % self.album_id).json()['data']['trackList']
        j_first_song = j[0]
        #name
        self.album_name = util.decode_html(j_first_song['album_name'])
        #album logo
        self.logo = j_first_song['album_pic']
        # artist_name
        self.artist_name = j_first_song['artist']

        #description
        html = self.handler.read_link(self.url).text
        soup = BeautifulSoup(html,'html.parser')
        self.album_desc = soup.find('span', property="v:summary").text

        #handle songs
        for jsong in j:
            song = XiamiSong(self.handler, song_json=jsong)
            song.group_dir = self.artist_name + u'_' + self.album_name
            song.post_set()
            self.songs.append(song)

        d = path.dirname(self.songs[-1].abs_path)
        #creating the dir
        LOG.debug(msg.head_xm + msg.fmt_create_album_dir % d)
        util.create_dir(d)

        #download album logo images
        LOG.debug(msg.head_xm + msg.fmt_dl_album_cover % self.album_name)
        downloader.download_url(self.logo, path.join(d,'cover.' +self.logo.split('.')[-1]))

        LOG.debug(msg.head_xm + msg.fmt_save_album_desc % self.album_name)
        if self.album_desc:
            self.album_desc = re.sub(r'&lt;\s*[bB][rR]\s*/&gt;','\n',self.album_desc)
            self.album_desc = re.sub(r'&lt;.*?&gt;','',self.album_desc)
            self.album_desc = util.decode_html(self.album_desc)
            import codecs
            with codecs.open(path.join(d,'album_description.txt'), 'w', 'utf-8') as f:
                f.write(self.album_desc)
Example #16
0
    def retrieve_rtma(self, cycle):
        """
        Attempts to retrieve the variables passed in during initialization.
        Any files already downloaded are not modified.
        Returns a list of variables that have not been downloaded.

        :param cycle: the cycle (UTC) for which to retrieve the RTMA
        :return: tuple with list of all variables that are not ready yet
                 and dictonary with path to stored files
        """
        ts = cycle.replace(minute=0, second=0, microsecond=0)
        logging.info('RTMA retrieving variables %s for cycle %s.' % (self.var_list, str(ts)))

        vars_paths = map(lambda x: (x, self._local_var_path(ts, x)), self.var_list)
        ready = dict(filter(lambda x: self._is_var_cached(x[1]), vars_paths))
        nonlocals = filter(lambda x: not self._is_var_cached(x[1]), vars_paths)
        if nonlocals:
            nl_vars = [x[0] for x in nonlocals]
            logging.info('RTMA variables %s are not available locally, trying to download.' % nl_vars)

        not_ready = []
        for var, local_path in nonlocals:
            if self._is_var_ready(ts, var):
                download_url(self._remote_var_url(cycle.hour, var), local_path)
                ready[var] = local_path
            else:
                not_ready.append(var)

        if not_ready:
            logging.info('RTMA the variables %s for hour %d are not ready.' % (not_ready, cycle.hour))
            # unless a file was downloaded, it makes no sense to check the server immediately again
        else:
            # if all files are available, return
            logging.info('RTMA success obtaining variables %s for hour %d.' % (self.var_list, cycle.hour))

        return not_ready, ready
Example #17
0
    # today's data is not available yet, we want yesterdays which has recently become available
    yesterday = datetime.utcnow() - timedelta(days=1)
    julian_day = (yesterday - datetime(yesterday.year, 1, 1)).days + 1
    year = yesterday.year

    urls = [
        'ftp://*****:*****@nrt3.modaps.eosdis.nasa.gov/FIRMS/c6/USA_contiguous_and_Hawaii/',
        'ftp://*****:*****@nrt3.modaps.eosdis.nasa.gov/FIRMS/c6/Alaska/',
        'ftp://*****:*****@nrt3.modaps.eosdis.nasa.gov/FIRMS/viirs/USA_contiguous_and_Hawaii/',
        'ftp://*****:*****@nrt3.modaps.eosdis.nasa.gov/FIRMS/viirs/Alaska/'
    ]

    filenames = [
        'MODIS_C6_USA_contiguous_and_Hawaii_MCD14DL_NRT_%04d%03d.txt' %
        (year, julian_day),
        'MODIS_C6_Alaska_MCD14DL_NRT_%04d%03d.txt' % (year, julian_day),
        'VIIRS_I_USA_contiguous_and_Hawaii_VNP14IMGTDL_NRT_%04d%03d.txt' %
        (year, julian_day),
        'VIIRS_I_Alaska_VNP14IMGTDL_NRT_%04d%03d.txt' % (year, julian_day)
    ]

    for i in range(len(urls)):
        download_url(urls[i] + filenames[i], ingest_dir + '/' + filenames[i])

    logging.info('SUCCESS, the following files are now available:')

    print('')
    for f in filenames:
        print(osp.join(ingest_dir, f))
Example #18
0
    params.filter_by_user("RaidyHD")
    params.set_purity(True, True, True)

    latest_scrape = wallhaven.search(params)

    if os.path.exists("walls.csv"):
        with open("walls.csv", newline="") as f:
            reader = csv.reader(f)
            local_data = np.array(list(reader))
    else:
        local_data = np.array([
            ['id', 'downloaded', 'url'],
        ])

    new_downloads = list()

    for wall in reversed(latest_scrape[:10]):
        if wall["id"] not in local_data[:, 0]:
            download_path = wall["path"]
            status = download_url(download_path)
            status = True
            local_data = np.vstack(
                (local_data, [wall["id"], status, download_path]))
            new_downloads.append([wall["id"], status, download_path])

    print(new_downloads)

    with open("walls.csv", "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerows(local_data.tolist())