Python fetch_listing 예제들, htmllistparse.fetch_listing Python 예제들

예제 #1

0

파일 보기

파일: single_rinex_station_download_from_garner.py 프로젝트: yxw027/PW_from_GPS

def single_station_rinex_garner_download(save_dir,
                                         minimum_year=None,
                                         station='tela'):
    import htmllistparse
    import requests
    import os
    import logging
    logger = logging.getLogger('rinex_garner')
    savepath = save_dir
    if not os.path.exists(savepath):
        try:
            os.makedirs(savepath)
            logger.info('Creating {} for station {}'.format(savepath, station))
        except OSError:
            logger.error("Creation of the directory %s failed" % savepath)
        else:
            logger.info("Successfully created the directory %s" % savepath)
    else:
        logger.warning('Folder {} already exists.'.format(savepath))
    command = 'http://*****:*****@garner.ucsd.edu/pub/rinex/'
    cwd, listing = htmllistparse.fetch_listing(command, timeout=30)
    dirs = [f.name for f in listing if '/' in f.name]
    if minimum_year is not None:
        years = [int(x.split('/')[0]) for x in dirs]
        years = [x for x in years if x >= minimum_year]
        dirs = [str(x) + '/' for x in years]
        logger.info('starting search from year {}'.format(minimum_year))
    for year in dirs:
        logger.info(year)
        cwd, listing = htmllistparse.fetch_listing(command + year, timeout=30)
        days = [f.name for f in listing if '/' in f.name]
        for day in days:
            cwd, listing = htmllistparse.fetch_listing(command + year + day,
                                                       timeout=30)
            files = [f.name for f in listing if f.size is not None]
            found = [f for f in files if station in f]
            if found:
                filename = found[0]
                saved_filename = savepath / filename
                if saved_filename.is_file():
                    logger.warning(
                        '{} already exists in {}, skipping...'.format(
                            filename, savepath))
                    continue
                logger.info('Downloading {} to {}.'.format(filename, savepath))
                r = requests.get(command + year + day + filename)
                with open(saved_filename, 'wb') as file:
                    file.write(r.content)
    logger.info('Done downloading station {}.'.format(station))
    return

예제 #2

0

파일 보기

파일: utils.py 프로젝트: vasukulkarni/cephci

def fetch_image_builds(version):
    """
    Fetch ceph container image builds

        1) Search Share path for ceph image under DEFAULT_OSBS_SERVER
        2) look for particular RHCS version json files.
        3) Sort builds based on timestamp and return builds.

    todo: Fix when upgrade scenario needs image from source path
    """
    try:
        cwd, c_list = fetch_listing(DEFAULT_OSBS_SERVER, timeout=60)
        assert c_list, "Container file(s) not found"
        c_list = [i for i in c_list if i.endswith("json")]

        builds = dict()
        for comp in c_list:
            if version in comp.name:
                dt = datetime.datetime.fromtimestamp(mktime(
                    comp.modified)).timestamp()
                builds.update({dt: comp})

        builds = [builds[k] for k in sorted(builds)]

        return builds
    except AssertionError as err:
        logging.warning(err)
        raise AssertionError(
            f"Ceph Image builds not found : {DEFAULT_OSBS_SERVER}")

예제 #3

0

파일 보기

파일: tarball.py 프로젝트: Babbleshack/kraft

def tarball_probe_remote_versions(source=None):
    versions = {}

    if source is None:
        return versions

    # Remove everything after the $ (start of variable)
    if '/$' in source:
        source = source[:source.index('$')]

    # Remove the filename
    else:
        for ext in TARBALL_SUPPORTED_EXTENSIONS:
            if source.endswith(ext):
                filename = source.split('/')[-1]
                source = source.replace(filename, '')
                break

    try:
        cwd, listings = htmllistparse.fetch_listing(source, timeout=30)

        for listing in listings:
            if listing.name.endswith(tuple(TARBALL_SUPPORTED_EXTENSIONS)):
                ver = SEMVER_PATTERN.search(listing.name)
                if ver is not None and ver.group(0) not in versions.keys():
                    versions[ver.group(0)] = listing.name

    except Exception as e:
        logger.warn(e)
        pass

    print(versions)

    return versions

예제 #4

0

파일 보기

    def query(self,
              mgrs,
              sat=None,
              year=None,
              version='v1.4',
              start_date=None,
              end_date=None):
        if sat is None:
            sat = 'L'
        sat = sat.upper()
        assert sat in 'LS'

        if year is None:
            year = datetime.datetime.now().year

        assert isint(year)
        year = int(year)

        zone = mgrs[:2]
        grid = mgrs[2]
        aa_x, aa_y = tuple(mgrs[3:5])

        url = 'https://hls.gsfc.nasa.gov/data/{version}/{sat}30/{year}/{zone}/{grid}/{aa_x}/{aa_y}/'\
              .format(version=version,
                      sat=sat,
                      year=year,
                      zone=zone,
                      grid=grid,
                      aa_x=aa_x, aa_y=aa_y)

        try:
            cwd, listing = htmllistparse.fetch_listing(url)
        except requests.exceptions.HTTPError as e:
            if e.response.status_code == 404:
                listing = []
                warnings.warn(url + ' returned 404')
            else:
                raise

        listing = [item.name for item in listing if item.name.endswith('hdf')]

        if start_date is not None:
            startdate_m, startdate_d = map(int, start_date.split('-'))
            start_jd = (datetime.date(year, startdate_m, startdate_d) -
                        datetime.date(year, 1, 1)).days + 1
            listing = [
                name for name in listing
                if int(name.split('.')[3][4:]) >= start_jd
            ]

        if end_date is not None:
            enddate_m, enddate_d = map(int, end_date.split('-'))
            enddate_jd = (datetime.date(year, enddate_m, enddate_d) -
                          datetime.date(year, 1, 1)).days + 1
            listing = [
                name for name in listing
                if int(name.split('.')[3][4:]) <= enddate_jd
            ]

        return listing

예제 #5

0

파일 보기

def file_names(url_or_local, extension):
    is_local = Path(url_or_local).exists() and Path(url_or_local).is_dir()
    is_url = is_local == False

    if is_url:
        try:
            cwd, listing = htmllistparse.fetch_listing(url_or_local)
        except requests.exceptions.HTTPError as err:
            status_code = err.response.status_code
            print(status_code)
            return [], []
        else:
            names = []
            for dir in listing:
                name = dir.name
                if name.endswith(extension):
                    names.append(name)
            return cwd, sorted(names)


    # remove current dir from the subdir list
    if is_local:
        cwd = Path(url_or_local)
        cwd_name = cwd.name
        names = []
        files = [f for f in listdir(url_or_local) if isfile(join(url_or_local, f))]
        for file in files:
            name = Path(file).name
            if name.endswith(extension):
                names.append(name)
        return cwd, sorted(names)

예제 #6

0

파일 보기

파일: util.py 프로젝트: panodata/dwdweather2

def fetch_html_file_list(baseurl, extension):

    cwd, listing = htmllistparse.fetch_listing(baseurl, timeout=10)
    result = [
        baseurl + "/" + item.name for item in listing
        if item.name.endswith(extension)
    ]
    return result

예제 #7

0

파일 보기

파일: parent_directory.py 프로젝트: spiegela/http-listing-concourse-resource

 def fetch(self, version=None):
     _cwd, listing = htmllistparse.fetch_listing(self.url, params=self.url_params)
     for e in listing:
         try:
             semver = self.__parse_entry(e.name)
             if version == None or semver > Version(version, partial=True):
                 self.versions.add(semver)
         except ValueError:
             print('Ignoring invalid version:', e.name, file=sys.stderr)

예제 #8

0

파일 보기

파일: hls.py 프로젝트: rogerlew/rangesat-biomass

    def query(self,
              mgrs,
              sat='L',
              year=None,
              version='v1.4',
              startdate=None,
              enddate=None):
        sat = sat.upper()
        assert sat in 'LS'

        if year is None:
            year = datetime.datetime.now().year

        assert isint(year)
        year = int(year)

        zone = mgrs[:2]
        grid = mgrs[2]
        aa_x, aa_y = tuple(mgrs[3:5])

        url = 'https://hls.gsfc.nasa.gov/data/{version}/{sat}30/{year}/{zone}/{grid}/{aa_x}/{aa_y}/'\
              .format(version=version,
                      sat=sat,
                      year=year,
                      zone=zone,
                      grid=grid,
                      aa_x=aa_x, aa_y=aa_y)

        cwd, listing = htmllistparse.fetch_listing(url)

        listing = [item.name for item in listing if item.name.endswith('hdf')]

        if startdate is not None:
            startdate_m, startdate_d = map(int, startdate.split('-'))
            start_jd = (datetime.date(year, startdate_m, startdate_d) -
                        datetime.date(year, 1, 1)).days + 1
            listing = [
                name for name in listing
                if int(name.split('.')[3][4:]) >= start_jd
            ]

        if enddate is not None:
            enddate_m, enddate_d = map(int, enddate.split('-'))
            enddate_jd = (datetime.date(year, enddate_m, enddate_d) -
                          datetime.date(year, 1, 1)).days + 1
            listing = [
                name for name in listing
                if int(name.split('.')[3][4:]) <= enddate_jd
            ]

        return listing

예제 #9

0

파일 보기

파일: show_wpiver.py 프로젝트: robotpy/robotpy-wpilib

def print_site(uri, show_all):
    _, l = htmllistparse.fetch_listing(uri)
    l = sorted(l, key=lambda i: i.modified, reverse=True)
    for i in l:

        # TODO: figure out what the latest 'stable' release is and print
        # that instead.. probably can use a regex?

        if not i.name.endswith("/"):
            continue

        print(i.name.strip("/"))
        if not show_all:
            break

예제 #10

0

파일 보기

파일: show_wpiver.py 프로젝트: warrenlp/robotpy-wpilib

def print_site(uri, show_all):
    _, l = htmllistparse.fetch_listing(uri)
    l = sorted(l, key=lambda i: i.modified, reverse=True)
    for i in l:

        # TODO: figure out what the latest 'stable' release is and print
        # that instead.. probably can use a regex?

        if not i.name.endswith("/"):
            continue

        print(i.name.strip("/"))
        if not show_all:
            break

예제 #11

0

파일 보기

def scan_url(url, regex, name, recursive):
    time.sleep(1)
    logging.debug(f"scan_url({url}, {regex}, {name}, {recursive}")
    output_array = []
    try:
        cwd, listing = htmllistparse.fetch_listing(url, timeout=15)
        for file_item in listing:
            if not file_item.size and recursive:
                scan_url(urljoin(url, file_item.name), regex, name)
            elif regex.search(file_item.name):
                file_url = urljoin(config_item["url"], file_item.name)
                output_array.extend(scan_binary(file_url, file_item, name))
    except:
        logging.error(
            f"htmllistparse.fetch_listing({url}, timeout=15) returned an exception"
        )
    return output_array

예제 #12

0

파일 보기

파일: remote_import.py 프로젝트: supervisely-ecosystem/remote-import

def preview_remote(api: sly.Api, task_id, context, state, app_logger):
    global listing
    api.task.set_field(task_id, "data.previewError", "")
    try:
        remote_dir = state["remoteDir"]
        parts = urlparse(remote_dir)
        project_name = parts.path.rstrip("/")
        if project_name not in ["", "/"]:
            project_name = sly.fs.get_file_name(project_name) # last directory name from path
        else:
            project_name = ""

        cwd, raw_listing = htmllistparse.fetch_listing(remote_dir, timeout=30)

        listing = []
        listing_flags = []
        meta_json_exists = False
        for file_entry in raw_listing:
            name = file_entry.name
            #name = slugify(name, lowercase=False, save_order=True)
            if name == 'meta.json':
                meta_json_exists = True
                listing.append({"name": name})
                listing_flags.append({"selected": True, "disabled": True})
            elif name.endswith("/"):
                listing.append({"name": name.rstrip("/")})
                listing_flags.append({"selected": True, "disabled": False})
            else:
                app_logger.info("Skip file {!r}".format(urljoin(remote_dir, name)))
                listing.append({"name": name})
                listing_flags.append({"selected": False, "disabled": True})

        if meta_json_exists is False:
            raise FileNotFoundError("meta.json")

        fields = [
            #{"field": "state.projectName", "payload": slugify(project_name, lowercase=False, save_order=True)},
            {"field": "state.projectName", "payload": project_name},
            {"field": "data.listing", "payload": listing},
            {"field": "state.listingFlags", "payload": listing_flags},
        ]
        api.app.set_fields(task_id, fields)
    except Exception as e:
        api.task.set_field(task_id, "data.previewError", repr(e))

예제 #13

0

파일 보기

def download_directory(url, storage_path):
    #Given a url and a local directory, list and download the contents of an entire directory.
    #Change directory to the storage directory.
    os.chdir(storage_path)
    #Get a list of all files in the local directory
    #https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory
    _, _, local_filenames = next(os.walk(storage_path))
    #Get remote directory listing via HTTP (uses 3rd party module htmllistparse)
    cwd, listing = htmllistparse.fetch_listing(url, timeout=30)
    #Filter listing for executable scripts.  Uses a feature in Python called list comprehension
    listing = [
        i.name for i in listing if not ('.py' in i.name or '.sh' in i.name)
    ]
    print(f"Listing: {listing}")
    #Iterate through all served files/directories to determine what is a file and what is a directory
    for thing in listing:
        #Determine if object is directory with / in name
        if '/' in thing:
            directory = thing
            directorypath = f"{storage_path}/{directory}"
            directory_noslash = directory.replace(os.path.sep, '')
            newurl = f"{url}/{directory_noslash}"
            #Check if directory exists locally
            if not os.path.isdir(directorypath):
                #If not, create directory
                os.mkdir(directorypath)
            #Recursively call this function until no more directories are found.
            download_directory(newurl, directorypath)
            #When complete, change back to original directory to reset the recursive "chdir"s
            os.chdir(storage_path)
        else:
            #Else... item is a file to download
            file = thing
            #Check if we've downloaded a file already.  If so, skip it!
            if file not in local_filenames:
                download_string = f"{url}/{file}"
                #Call download_file to download the single file chunk by chunk.
                download_file(download_string, storage_path)
            else:
                print(f"Already downloaded {file}")

예제 #14

0

파일 보기

파일: apacheobserver.py 프로젝트: Archiatrus/sc2ladderobserver

def startbattle():
    # get results from API
    cwd, listing = htmllistparse.fetch_listing(website, timeout=10)
    battle = random.choice(listing)
    print(battle.name)

    replayfile = str(website) + str(battle.name)
    replaysave = temp_path + str(battle.name)

    # download replay
    try:
        urllib.request.urlretrieve(replayfile, replaysave)
    except urllib.error.URLError as e:
        return

    # run Observer
    os.system("ExampleObserver.exe --Path \"" + replaysave + "\"")

    # delete temp files
    tempfilelist = glob.glob(os.path.join(temp_path, "*.*"))
    for tempfile in tempfilelist:
        os.remove(tempfile)

예제 #15

0

파일 보기

파일: store.py 프로젝트: pythseq/scelvis

def glob_data_sets(url):
    """Return list of all data sets behind the given ``url``."""
    result = []
    if url.scheme in data.PYFS_SCHEMES:
        curr_fs = data.make_fs(url)
        for match in curr_fs.glob("*.h5ad"):
            match_path = fs.path.basename(match.path)
            logger.info("Found data set %s at %s" %
                        (match_path, data.redacted_urlunparse(url)))
            result.append(
                url._replace(path=fs.path.join(url.path, match.path[1:])))
    elif url.scheme == "s3":
        anon = url.username is None and url.password is None
        s3 = s3fs.S3FileSystem(anon=anon,
                               key=url.username,
                               secret=url.password)
        if url.path:
            pattern = "%s/%s/*.h5ad" % (url.hostname, url.path)
        else:
            pattern = "%s/*.h5ad" % (url.hostname, )
        for match in s3.glob(pattern):
            result.append(url._replace(path=match.split("/", 1)[1]))
    elif url.scheme.startswith("http"):
        cwd, listing = htmllistparse.fetch_listing(urlunparse(url), timeout=30)
        for entry in listing:
            if entry.name.endswith(".h5ad"):
                result.append(url._replace(path=fs.path.join(cwd, entry.name)))
    elif url.scheme.startswith("irods"):
        with data.create_irods_session(url) as irods_session:
            # Get pointed-to collection.
            collection = irods_session.collections.get(url.path)
            for data_obj in collection.data_objects:
                if data_obj.name.endswith(".h5ad"):
                    result.append(
                        url._replace(
                            path=fs.path.join(url.path, data_obj.name)))
    else:
        raise ScelVisException("Invalid URL scheme: %s" % url.scheme)
    return result

예제 #16

0

파일 보기

파일: quadtree.py 프로젝트: EvanYangAB/FIQTI

import htmllistparse as ftp
from epivizfileserver.parser import BigWig
from joblib import Parallel, delayed
import struct
import pandas
import json
import pickle

url = "https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/"
cwd, files = ftp.fetch_listing(url)

print("total files - ", len(files))


def get_file_index(file, baseurl):
    print("processing file - ", file.name)
    bw = BigWig(baseurl + file.name)
    print("\t getting zoom headers")
    bw.getZoomHeader()
    print("\t get tree for full data offset")
    tree = bw.getTree(-2)
    bw.getId("chr1")
    ofile = open("objects/" + file.name + ".pickle", 'wb')
    pickle.dump(bw, ofile)
    # ifile = "trees/" + file.name + ".fulltreeindex"
    # print("\t writing index ", ifile)
    # with open(ifile, "wb") as f:
    #     f.write(tree)


# This will download the index from all the files

예제 #17

0

파일 보기

파일: single_rinex_station_download_from_garner.py 프로젝트: yxw027/PW_from_GPS

def all_orbitals_download(save_dir, minimum_year=None, hr_only=None):
    import htmllistparse
    import requests
    import os
    import logging
    logger = logging.getLogger('rinex_garner')
    logger.info('Creating {}/{}'.format(save_dir, 'gipsy_orbitals'))
    savepath = save_dir / 'gipsy_orbitals'
    if not os.path.exists(savepath):
        try:
            os.makedirs(savepath)
        except OSError:
            logger.error("Creation of the directory %s failed" % savepath)
        else:
            logger.info("Successfully created the directory %s" % savepath)
    else:
        logger.warning('Folder {} already exists.'.format(savepath))
    command = 'https://sideshow.jpl.nasa.gov/pub/JPL_GPS_Products/Final/'
    cwd, listing = htmllistparse.fetch_listing(command, timeout=30)
    dirs = [f.name for f in listing if '/' in f.name]
    if minimum_year is not None:
        years = [int(x.split('/')[0]) for x in dirs]
        years = [x for x in years if x >= minimum_year]
        dirs = [str(x) + '/' for x in years]
        logger.info('starting search from year {}'.format(minimum_year))
    for year in dirs:
        logger.info(year)
        cwd, listing = htmllistparse.fetch_listing(command + year, timeout=30)
        files = [f.name for f in listing if f.size is not None]
        #        2017-01-28.eo.gz
        #        2017-01-28.shad.gz
        #        2017-01-28_hr.tdp.gz
        #        2017-01-28.ant.gz
        #        2017-01-28.tdp.gz
        #        2017-01-28.frame.gz
        #        2017-01-28.pos.gz
        #        2017-01-28.wlpb.gz
        if hr_only is None:
            suffixes = ['eo', 'shad', 'ant', 'tdp', 'frame', 'pos', 'wlpb']
            for suff in suffixes:
                found = [
                    f for f in files
                    if suff in f.split('.')[1] and '_' not in f
                ]
                if found:
                    for filename in found:
                        logger.info('Downloading {} to {}.'.format(
                            filename, savepath))
                        r = requests.get(command + year + filename)
                        with open(savepath / filename, 'wb') as file:
                            file.write(r.content)
        else:
            pre_found = [f for f in files if '_' in f]
            if pre_found:
                found = [
                    f for f in pre_found
                    if f.split('.')[0].split('_')[1] == 'hr'
                ]
                if found:
                    for filename in found:
                        logger.info('Downloading {} to {}.'.format(
                            filename, savepath))
                        r = requests.get(command + year + filename)
                        with open(savepath / filename, 'w') as file:
                            file.write(r.content)
    return

예제 #18

0

파일 보기

파일: remote_import.py 프로젝트: supervisely-ecosystem/remote-import

def start_import(api: sly.Api, task_id, context, state, app_logger):
    fields = [
        {"field": "data.destinationError", "payload": ""},
        {"field": "data.uploadError", "payload": ""},
        {"field": "data.uploadStarted", "payload": True},
        {"field": "data.uploadedCount", "payload": 0},
        {"field": "data.totalCount", "payload": 0},
        {"field": "data.uploadProgress", "payload": 0},
        {"field": "data.uploadDsName", "payload": ""},
        {"field": "data.uploadedDsCount", "payload": 0},
        {"field": "data.totalDsCount", "payload": 0},
        {"field": "data.uploadDsProgress", "payload": 0},
    ]
    api.app.set_fields(task_id, fields)

    remote_dir = state["remoteDir"]
    listing_flags = state["listingFlags"]

    workspace_name = state["workspaceName"]
    project_name = state["projectName"] #slugify(state["projectName"], lowercase=False, save_order=True)
    if project_name == "":
        _show_error(api, task_id, "data.destinationError", "Project name is not defined", app_logger)
        return

    #@TODO: will be added in future releases
    add_to_existing_project = False #state["addToExisting"]

    existing_meta = None
    try:
        workspace = api.workspace.get_info_by_name(TEAM_ID, workspace_name)
        if workspace is None:
            workspace = api.workspace.create(TEAM_ID, workspace_name)
            app_logger.info("Workspace {!r} is created".format(workspace.name))
        else:
            app_logger.info("Workspace {!r} already exists".format(workspace.name))

        project = api.project.get_info_by_name(workspace.id, project_name)
        if project is None:
            project = api.project.create(workspace.id, project_name)
            app_logger.info("Project {!r} is created".format(project.name))
        else:
            _show_error(api, task_id, "data.destinationError", "Project {!r} already exists".format(project.name), app_logger)
            return

            if add_to_existing_project is False:
                app_logger.warn("Project {!r} already exists. Allow add to existing project or change the name of "
                                "destination project. We recommend to upload to new project. Thus the existing project "
                                "will be safe. New name will be generated".format(project.name))
                project = api.project.create(workspace.id, project_name, change_name_if_conflict=True)
            else:
                existing_meta_json = api.project.get_meta(project.id)
                existing_meta = sly.ProjectMeta.from_json(existing_meta_json)

        update_res_project_icon = None
        fields = [
            {"field": "data.resultProject", "payload": project.name},
            {"field": "data.resultProjectId", "payload": project.id},
            # {"field": "data.resultProjectPreviewUrl", "payload": 0},
        ]
        api.app.set_fields(task_id, fields)

        resp = requests.get(urljoin(remote_dir, 'meta.json'))
        meta_json = resp.json()
        meta = sly.ProjectMeta.from_json(meta_json)
        if existing_meta is not None:
            meta = existing_meta.merge(meta)

        api.project.update_meta(project.id, meta.to_json())

        datasets_to_upload = []
        for ds_info, flags in zip(listing, listing_flags):
            dataset_name = ds_info['name']
            if flags["selected"] is False:
                app_logger.info("Folder {!r} is not selected, it will be skipped".format(dataset_name))
                continue
            if flags["disabled"] is True:
                app_logger.info("File {!r} is skipped".format(dataset_name))
                continue
            datasets_to_upload.append(dataset_name)

        api.task.set_field(task_id, "data.totalDsCount", len(datasets_to_upload))
        for index, dataset_name in enumerate(datasets_to_upload):
            dataset = api.dataset.get_info_by_name(project.id, dataset_name)
            if dataset is None:
                dataset = api.dataset.create(project.id, dataset_name)
                app_logger.info("Dataset {!r} is created".format(dataset.name))
            else:
                app_logger.warn("Dataset {!r} already exists. Uploading is skipped".format(dataset.name))
                _increment_ds_progress(task_id, api, index + 1, len(datasets_to_upload))
                continue

            #img_dir = reduce(urljoin, [remote_dir, dataset_name, 'img'])
            #ann_dir = reduce(urljoin, [remote_dir, dataset_name, 'ann'])
            img_dir = os.path.join(remote_dir, dataset_name, 'img/')
            ann_dir = os.path.join(remote_dir, dataset_name, 'ann/')

            cwd, img_listing = htmllistparse.fetch_listing(img_dir, timeout=30)

            uploaded_to_dataset = 0
            fields = [
                {"field": "data.totalCount", "payload": len(img_listing)},
                {"field": "data.uploadDsName", "payload": dataset.name},
            ]
            api.app.set_fields(task_id, fields)

            task_progress = sly.Progress("Uploading dataset {!r}".format(dataset.name), len(img_listing))
            for batch in sly.batched(img_listing, batch_size=50):
                try:
                    names = []
                    image_urls_batch = []
                    annotations_batch = []

                    for file_entry in batch:
                        name = file_entry.name
                        try:
                            img_url = urljoin(img_dir, name) #'https://i.imgur.com/uFYNj9Z.jpg'
                            ann_url = urljoin(ann_dir, name + sly.ANN_EXT)

                            resp = requests.get(ann_url)
                            if resp.status_code == 404:
                                ann_url = urljoin(ann_dir, sly.fs.get_file_name(name) + sly.ANN_EXT)
                                resp = requests.get(ann_url)

                            resp.raise_for_status()
                            ann_json = resp.json()

                            ann = sly.Annotation.from_json(ann_json, meta)
                        except Exception as e:
                            app_logger.warn("Image {!r} and annotation {!r} are skipped due to error: {}"
                                            .format(img_url, ann_url, repr(e)))
                            continue

                        names.append(name)
                        image_urls_batch.append(img_url)
                        annotations_batch.append(ann)

                    img_infos = api.image.upload_links(dataset.id, names, image_urls_batch)
                    uploaded_ids = [img_info.id for img_info in img_infos]
                    api.annotation.upload_anns(uploaded_ids, annotations_batch)
                    uploaded_to_dataset += len(uploaded_ids)
                except Exception as e:
                    app_logger.warn("Batch ({} items) of images is skipped due to error: {}"
                                    .format(len(batch), repr(e)))
                finally:
                    task_progress.iters_done_report(len(batch))
                    _increment_task_progress(task_id, api, task_progress)

                    #only once + to check the image urls are loaded correctly
                    if update_res_project_icon is None:
                        pinfo = api.project.get_info_by_id(project.id)
                        if pinfo.reference_image_url is None:
                            raise RuntimeError("Preview image is not accessible. Check that image URLs are public.")
                        update_res_project_icon = api.image.preview_url(pinfo.reference_image_url, 100, 100),
                        api.task.set_field(task_id, "data.resultProjectPreviewUrl", update_res_project_icon)

            _increment_ds_progress(task_id, api, index + 1, len(datasets_to_upload))
            app_logger.info("Dataset {!r} is uploaded: {} images with annotations"
                            .format(dataset.name, uploaded_to_dataset))

    except Exception as e:
        app_logger.error(repr(e))
        api.task.set_field(task_id, "data.uploadError", repr(e))

    api.task.set_output_project(task_id, project.id, project.name)
    my_app.stop()