Example #1
0
    def test_path(self):
        url = FILE_SMALL

        # path=''
        download(url=url, path='')
        f = os.path.join(TEST_DATA_DIR, get_resource_name(url))
        assert os.path.exists(f)
        os.remove(f)

        # path='.'
        download(url=url, path='.')
        f = os.path.join(TEST_DATA_DIR, get_resource_name(url))
        assert os.path.exists(f)
        os.remove(f)

        # path=TEST_DATA_SUBDIR
        download(url=url, path=TEST_DATA_SUBDIR)
        f = os.path.join(TEST_DATA_SUBDIR, get_resource_name(url))
        assert os.path.exists(f)
        os.remove(f)

        # path='foobar'
        download(url=url, path='foobar')
        f = os.path.join(TEST_DATA_DIR, 'foobar')
        assert os.path.exists(f)
        os.remove(f)

        # path='foo/bar'
        with self.assertRaises(IOError):
            download(url=url, path='foo/bar')
        f = os.path.join(TEST_DATA_DIR, 'foo', 'bar')
        assert not os.path.exists(f)
def main(cmdargs):
    index_dir = os.path.abspath(cmdargs.index_dir)
    if not os.path.exists(index_dir):
        os.mkdir(index_dir)

    dl_index_fname = "index.csv"
    index_db_fname = "index.db"
    index_db_tname = "landsat"
    temp_index_fname = "tmpindex"
    temp_index_db_fname = "tmpindex.db"

    tmpdir = tempfile.mkdtemp()
    zip_file = os.path.join(tmpdir, "{0:s}.gz".format(temp_index_fname))

    if cmdargs.channel == "google":
        index_url = GOOGLE_LANDSAT_INDEX
        index_dtype = GOOGLE_LANDSAT_DTYPE
        index_parse_dates = ["DATE_ACQUIRED", "SENSING_TIME"]
        unzip_cmd = ["gzip", "-d", zip_file]
    elif cmdargs.channel == "aws":
        index_url = S3_LANDSAT_INDEX
        index_dtype = S3_LANDSAT_DTYPE
        index_parse_dates = ["acquisitionDate"]
        unzip_cmd = ["gzip", "-d", zip_file]
    else:
        raise RuntimeError("Channel is not implemented yet!")

    logger.info("Downloading zipped index file started.")
    download(index_url, path=zip_file)
    logger.info("Downloading zipped index file finished.")

    logger.info("Unzipping index file started.")
    if (0 != subprocess.call(unzip_cmd)):
        raise RuntimeError("Unzip the file of Google Landsat index failed!")
    logger.info("Unzipping index file finished.")

    csv_file = os.path.join(index_dir, dl_index_fname)
    shutil.move(os.path.join(tmpdir, temp_index_fname),
                os.path.join(index_dir, temp_index_fname))
    os.rename(os.path.join(index_dir, temp_index_fname), csv_file)

    tmp_db_file = os.path.join(tmpdir, temp_index_db_fname)
    csv_db = sa.create_engine("sqlite:///{0:s}".format(tmp_db_file))
    chunksize = int(1e5)
    j = 1
    logger.info("Updating SQLite database started.")
    for df in pd.read_csv(csv_file,
                          chunksize=chunksize,
                          iterator=True,
                          dtype=index_dtype,
                          parse_dates=index_parse_dates):
        df = df.rename(columns={c: c.replace(' ', '_') for c in df.columns})
        df.index += j
        df.to_sql(index_db_tname, csv_db, if_exists='append')
        j = df.index[-1] + 1
    csv_db_file = os.path.join(index_dir, index_db_fname)
    shutil.move(tmp_db_file, csv_db_file)
    logger.info("Updating SQLit database finished.")
Example #3
0
    def test_pass_through_opts(self):
        url = FILE_5MB
        opts_url = FILE_1MB

        download(url=url, pass_through_opts={pycurl.URL: opts_url})
        f = os.path.join(TEST_DATA_DIR, get_resource_name(url))
        opts_f = os.path.join(TEST_DATA_DIR, get_resource_name(opts_url))
        assert os.path.exists(opts_f)
        assert not os.path.exists(f)
        os.remove(opts_f)
Example #4
0
    def test_pass_through_opts(self):
        url = FILE_5MB
        opts_url = FILE_1MB

        download(url=url, pass_through_opts={pycurl.URL: opts_url})
        f = os.path.join(TEST_DATA_DIR, get_resource_name(url))
        opts_f = os.path.join(TEST_DATA_DIR, get_resource_name(opts_url))
        assert os.path.exists(opts_f)
        assert not os.path.exists(f)
        os.remove(opts_f)
Example #5
0
def download_and_detect(id, url, filename):
    # try:
        opt = {pycurl.PROXY: config.tor_pool_url,
               pycurl.PROXYPORT: config.tor_pool_port,
               pycurl.PROXYTYPE: pycurl.PROXYTYPE_SOCKS5_HOSTNAME}
        download(url,path=filename, pass_through_opts=opt)
        f = open (filename, 'rb')
        tags = exifread.process_file(f)
        set_exif_data(id, {'exif': tags.keys()})
        f.close()
def download_all(entries, zipDir, unzipDir):
    for i in range(0, len(entries)):
        entry = entries[i]
        url = entry['link'][0]['href']
        print 'downloading product ' + str(i)
        filepath = zipDir + str(i) + '.zip'
        homura.download(url=url, auth=session.auth, path=filepath)
        zip_ref = zipfile.ZipFile(filepath, 'r')
        zip_ref.extractall(unzipDir)
        zip_ref.close()
    print 'finished downloads'
Example #7
0
    def test_auth(self):
        url = "http://httpbin.org/basic-auth/aaa/bbb"
        auth = ("aaa", "bbb")

        download(url=url, auth=auth)
        f = os.path.join(TEST_DATA_DIR, get_resource_name(url))
        with open(f) as handle:
            txt = handle.read()
        assert '"authenticated": true' in txt
        assert '"user": "******"' in txt
        assert os.path.exists(f)
        os.remove(f)
Example #8
0
    def test_unicode(self):
        url = FILE_UNICODE
        path_ascii = TEST_DATA_ASCII
        path_unicode = TEST_DATA_UNICODE
        path_utf8 = TEST_DATA_UTF8

        # No path
        download(url=url)
        f = os.path.join(TEST_DATA_DIR, get_resource_name(url))
        assert os.path.exists(f)
        os.remove(f)

        # ASCII path
        download(url=url, path=path_ascii)
        f = os.path.join(TEST_DATA_DIR, path_ascii, get_resource_name(url))
        assert os.path.exists(f)
        os.remove(f)

        # Unicode path
        download(url=url, path=path_unicode)
        f = os.path.join(TEST_DATA_DIR, path_unicode, get_resource_name(url))
        assert os.path.exists(f)
        os.remove(f)

        # UTF-8 path
        download(url=url, path=path_utf8)
        f = os.path.join(utf8_encode(TEST_DATA_DIR), path_utf8,
                         utf8_encode(get_resource_name(url)))
        assert os.path.exists(f)
        os.remove(f)
Example #9
0
    def test_unicode(self):
        url = FILE_UNICODE
        path_ascii = TEST_DATA_ASCII
        path_unicode = TEST_DATA_UNICODE
        path_utf8 = TEST_DATA_UTF8

        # No path
        download(url=url)
        f = os.path.join(TEST_DATA_DIR, get_resource_name(url))
        assert os.path.exists(f)
        os.remove(f)

        # ASCII path
        download(url=url, path=path_ascii)
        f = os.path.join(TEST_DATA_DIR, path_ascii, get_resource_name(url))
        assert os.path.exists(f)
        os.remove(f)

        # Unicode path
        download(url=url, path=path_unicode)
        f = os.path.join(TEST_DATA_DIR, path_unicode, get_resource_name(url))
        assert os.path.exists(f)
        os.remove(f)

        # UTF-8 path
        download(url=url, path=path_utf8)
        f = os.path.join(utf8_encode(TEST_DATA_DIR), path_utf8,
                         utf8_encode(get_resource_name(url)))
        assert os.path.exists(f)
        os.remove(f)
Example #10
0
    def download(self, scenes, bands):
        """
        Downloads landsat 5 from Google Storage
        :scenes: 
            list of scenes to down
        :bands:
            bands to download
        """

        scene_urls = [build_band_urls(scene, bands) for scene in scenes]
        image_path = os.path.join(self.download_dir, scenes[0])
        os.makedirs(image_path)

        for bands in scene_urls:
            for band in bands:
                download(band, image_path)
Example #11
0
    def read_file(self, changeset_file):
        """Download the replication changeset file or read it directly from the
        filesystem (to test purposes).
        """
        if isfile(changeset_file):
            self.filename = changeset_file
        else:
            self.path = mkdtemp()
            self.filename = join(self.path, basename(changeset_file))
            download(changeset_file, self.path)

        self.xml = ET.fromstring(gzip.open(self.filename).read())

        # delete folder created to download the file
        if not isfile(changeset_file):
            rmtree(self.path)
Example #12
0
    def read_file(self, changeset_file):
        """Download the replication changeset file or read it directly from the
        filesystem (to test purposes).
        """
        if isfile(changeset_file):
            self.filename = changeset_file
        else:
            self.path = mkdtemp()
            self.filename = join(self.path, basename(changeset_file))
            download(changeset_file, self.path)

        self.xml = ET.fromstring(gzip.open(self.filename).read())

        # delete folder created to download the file
        if not isfile(changeset_file):
            rmtree(self.path)
Example #13
0
 def downloadImages_l(self):
     self.downloadBar_l.setMinimum(0)
     self.downloadBar_l.setValue(0)
     self.downloadLabel_l.setText(u"等待下载")
     idfile = self.savecsv_l.text()
     outputdir = self.savedownload_l.text()
     csvf = open(idfile,'r')
     csv_count = len(csvf.readlines()) -1
     csvf.close()
     print(csv_count)
     self.downloadBar_l.setMaximum(csv_count)
     with open(idfile, 'r') as f:
         idcsv = csv.reader(f)
         header = next(idcsv)
         for ck,row in enumerate(idcsv):
             self.downloadBar_l.setValue(ck)
             sid = row[2]
             ourl = row[5]
             urlpaths = self.geturl_l(ourl)
             alldir = os.path.join(outputdir, sid)
             if os.path.exists(outputdir + '/{}'.format(sid)):
                 shutil.rmtree(outputdir + '/{}'.format(sid))
             if not os.path.exists(outputdir + '/{}.zip'.format(sid)):
                 ct = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
                 self.downloadLabel_l.setText(str(ck) + "/" + str(csv_count))
                 print(ct + 'Downloading {}...'.format(sid))
                 for urlpath in urlpaths:
                     if not os.path.exists(alldir):
                         os.makedirs(alldir)
                     ttt = download('{}'.format(urlpath), alldir)
                     # self.downloadLabel.setText(ttt["speed"])
                 dt = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
                 print(dt + 'compressing......')
                 shutil.make_archive(outputdir + '/{}'.format(sid), 'zip', alldir)
                 shutil.rmtree(alldir)
Example #14
0
    def download(self, obj, path=None, show_progress=True, resume=True,
                 auto_retry=True):
        """
        Download a file

        :param obj: :class:`.File` object
        :param str path: local path
        :param bool show_progress: whether to show download progress
        :param bool resume: whether to resume on unfinished downloads
            identified by filename
        :param bool auto_retry: whether to retry automatically upon closed
            transfer until the file's download is finished
        """
        download(obj.url, path=path, session=self.http.session,
                 show_progress=show_progress, resume=resume,
                 auto_retry=auto_retry)
Example #15
0
def download_images(scene=SCENE, bands=BANDS, url=URL, path=PATH):
    """
    Download bands on defined PATH for test_process
    """

    downloaded_images = []

    _bands = ["_B{}.TIF".format(i) for i in bands]
    _bands.extend(["_BQA.TIF", "_MTL.txt"])

    scene_bands = [{
        "url":
        "{url}/{scene}/{scene}{band}".format(url=url, scene=scene, band=band),
        "band":
        band.split(".")[0]
    } for band in _bands]

    assert (len(scene_bands) == len(bands) + 2)

    path = check_create_folder(path)

    for band in scene_bands:
        f = os.path.join(path, band["url"].split("/")[-1])
        d = download(url=band["url"], path=path)
        downloaded_images.append(f)

    return downloaded_images
Example #16
0
    def install_user_plugin(self, plugin_zip_url):
        """
        Download and install the plugin from an unofficial repository

        Download and extract a zip archive into DeepNLPF's plugins root
        directory then run the installation script `requirements.sh`

        Arguments:
            plugin_zip_url: str
                The URL to a plugin's zip archive
        Return: None
        """
        import re
        import shutil
        import zipfile

        import homura

        if not os.path.exists(self.PLUGIN_PATH):
            os.makedirs(self.PLUGIN_PATH)

        zipfile_path = os.path.join(self.PLUGIN_PATH, 'user_zip.zip')
        try:
            homura.download(url=plugin_zip_url, path=zipfile_path)
        except Exception as err:
            log.logger.error(err)
            sys.exit(1)

        plugin_root_dir = ''
        with zipfile.ZipFile(zipfile_path) as zfd:
            plugin_root_dir = zfd.namelist()[0]
            zfd.extractall(self.PLUGIN_PATH)

        extracted_path = os.path.join(self.PLUGIN_PATH, plugin_root_dir)
        installation_dir = re.sub(r'^plugin_', '', plugin_root_dir)
        installation_dir = re.sub(r'-master/?$', '', installation_dir)
        installation_path = os.path.join(self.PLUGIN_PATH, installation_dir)

        if os.path.exists(installation_path):
            shutil.rmtree(installation_path)

        os.rename(extracted_path, installation_path)
        os.remove(zipfile_path)

        install_script_path = os.path.join(installation_path,
                                           'requirements.sh')
        os.system(f'sh {install_script_path}')
Example #17
0
    def test_redirect(self):
        url = FILE_301_SMALL
        eurl = FILE_SMALL

        # No path
        download(url=url)
        f = os.path.join(TEST_DATA_DIR, get_resource_name(url))
        ef = os.path.join(TEST_DATA_DIR, get_resource_name(eurl))
        assert not os.path.exists(f)
        assert os.path.exists(ef)
        os.remove(ef)

        # path='foobar'
        download(url=url, path='foobar')
        f = os.path.join(TEST_DATA_DIR, 'foobar')
        assert os.path.exists(f)
        os.remove(f)
Example #18
0
    def test_redirect(self):
        url = FILE_301_SMALL
        eurl = FILE_SMALL

        # No path
        download(url=url)
        f = os.path.join(TEST_DATA_DIR, get_resource_name(url))
        ef = os.path.join(TEST_DATA_DIR, get_resource_name(eurl))
        assert not os.path.exists(f)
        assert os.path.exists(ef)
        os.remove(ef)

        # path='foobar'
        download(url=url, path='foobar')
        f = os.path.join(TEST_DATA_DIR, 'foobar')
        assert os.path.exists(f)
        os.remove(f)
def download_compranet(years):
  """
  Download Compranet data for a list of years, unzip the files and convert 
  the XLS to CSV

  :param years:
    The years for which to download data
  :type years:
    List

  :returns:

  :example:

  """
  
  tmp_folder = os.path.join(settings.folder_full_cache, 'tmp')
  check_create_folder(tmp_folder)

  for year in years:
    file_name = os.path.join(settings.fn_prefix + year + settings.fn_extension)
    src_url = settings.compranet_base_url + file_name

    print "Downloading %s" % file_name
    download(url=src_url, path=tmp_folder) 

    file_path = os.path.join(tmp_folder, file_name)
    with zipfile.ZipFile(file_path, 'r') as myzip:
      myzip.extractall(tmp_folder)

  pattern = os.path.join(tmp_folder, '*.xls*')

  for src_file in list_files(pattern):
    csv_path = os.path.join(settings.folder_full_cache, get_filename(src_file) + '.csv')
    wb = xlrd.open_workbook(src_file)
    sheet = wb.sheet_by_index(0)

    with open(csv_path, 'w') as csvfile:
      writer = unicodecsv.writer(csvfile, encoding='utf-8')
      for rownum in xrange(sheet.nrows):
        writer.writerow(sheet.row_values(rownum))

  remove_folder(tmp_folder)
Example #20
0
    def download(self, id, path='.', **kwargs):
        """Download a product using homura's download function.

        If you don't pass the title of the product, it will use the id as
        filename. Further keyword arguments are passed to the
        homura.download() function.
        """
        product = self.get_product_info(id)
        path = join(path, product['title'] + '.zip')
        kwargs = self._fillin_cainfo(kwargs)

        print('Downloading %s to %s' % (id, path))

        # Check if the file exists and if it is complete
        if exists(path):
            if getsize(path) == product['size']:
                print('%s was already downloaded.' % path)
                return path

        download(product['url'], path=path, session=self.session, **kwargs)
        return path
Example #21
0
def download_mbtiles(dest_folder, country, **kwargs):
    """Download QA Tiles for the selected country.

    Download a gzipped mbtiles file of all OSM data within a country from S3.
    More details at https://osmlab.github.io/osm-qa-tiles/

    Parameters
    ------------
    dest_folder: str
        Folder to save download into
    country: str
        Country for which to download the OSM QA tiles
    **kwargs: dict
        Other properties from CLI config passed as keywords to other utility functions
    """
    download_file = path.join(dest_folder, '{}.mbtiles'.format(country))
    print('Saving QA tiles to {}'.format(download_file))
    url = 'https://s3.amazonaws.com/mapbox/osm-qa-tiles-production/latest.country/{}.mbtiles.gz'.format(country)
    gz = tempfile.TemporaryDirectory()
    tmp_path = path.join(gz.name, '{}.mbtiles.gz'.format(country))
    download(url=url, path=tmp_path)
    with gzip.open(tmp_path, 'rb') as r:
        with open(download_file, 'wb') as w:
            w.write(r.read())
Example #22
0
    def downloadProduct(self, id, locale=None, path=None):
        info = None
        while info is None:
            try:
                info = self.getOData(id)
            except APIError as e:
                self.logger.info('API error: %s \n Waiting %d seconds.' %
                                 (str(e), self.sleep))
                time.sleep(self.sleep)
        if path is None:
            outputPath = util.checkFolder('SentinelAPI', Output=True)
            year = datetime.strptime(info['date'], '%Y-%m-%dT%H:%M:%SZ').year
            outputName = info['name'] + '.zip'

            #             oldPath = os.path.join(outputPath, outputName)
            if locale:
                outputPath = util.checkFolder(locale, path=outputPath)
            outputPath = util.checkFolder(year, path=outputPath)
            outputPath = os.path.join(outputPath, outputName)
        else:
            outputPath = path

        if os.path.exists(outputPath) and os.path.getsize(
                outputPath) == info['size']:
            # check if md5 matches with server
            if self.compareMD5(outputPath, info['md5']):
                self.logger.info('%s was already found.' % outputPath)
                return outputPath, info
            else:
                self.logger.info('%s was not downloaded correctly' %
                                 outputPath)
                os.remove(outputPath)

        homura.download(info['url'], path=outputPath, auth=self.sesh.auth)

        return outputPath, info
Example #23
0
    def download(self, id, path='.', checksum=False, **kwargs):
        """Download a product using homura's download function.

        If you don't pass the title of the product, it will use the id as
        filename. Further keyword arguments are passed to the
        homura.download() function.
        """
        # Check if API is reachable.
        product = None
        while product is None:
            try:
                product = self.get_product_info(id)
            except ValueError:
                print("Invalid API response. Trying again in 3 minutes.")
                sleep(180)

        path = join(path, product['title'] + '.zip')
        kwargs = self._fillin_cainfo(kwargs)

        print('Downloading %s to %s' % (id, path))

        # Check if the file exists and passes md5 test
        if exists(path):
            if md5_compare(path, product['md5'].lower()):
                print('%s was already downloaded.' % path)
                return path
            else:
                remove(path)

        download(product['url'], path=path, session=self.session, **kwargs)

        # Check integrity with MD5 checksum
        if checksum is True:
            if not md5_compare(path, product['md5'].lower()):
                raise ValueError('File corrupt: Checksums do not match')
        return path
Example #24
0
def download_file(
    url,
    file_path,
    md5_hash=None,
    timeout=10,
    block_size=1024 * 1024,
    show_progress=False,
    proxy=None
):
    """Resumable download.
    Expect the server to support byte ranges.

    Parameters
    ----------
    url: string
         URL
    file_path: string
               Local file path to store the downloaded file
    md5_hash: string
              Expected MD5 string of downloaded file
    timeout: int
             Seconds to wait before terminating request
    block_size: int
                Chunkx of bytes to read (default: 1024 * 1024 = 1MB)
    show_progress: bool
                   Show progress bar
    """
    if os.path.exists(file_path) and os.path.getsize(file_path):
        return

    if HOMURA:
        if proxy:
            pass_through_opts={pycurl.PROXY: proxy}
            download(url=url, path=file_path, pass_through_opts=pass_through_opts)
        else:
            download(url=url, path=file_path)
    else:
        tmp_file_path = file_path + ".part"
        first_byte = os.path.getsize(tmp_file_path) if os.path.exists(tmp_file_path) else 0
        file_mode = "ab" if first_byte else "wb"
        file_size = -1
        try:
            file_size = int(requests.head(url).headers["Content-length"])
            headers = {"Range": "bytes=%s-" % first_byte}
            r = requests.get(url, headers=headers, stream=True)
            if show_progress:
                desc = "Downloading {}".format(url.split("/")[-1])
                pbar = tqdm(
                    total=file_size,
                    initial=first_byte,
                    unit="B",
                    unit_scale=True,
                    desc=desc,
                )
            with open(tmp_file_path, file_mode) as f:
                for chunk in r.iter_content(chunk_size=block_size):
                    if chunk:  # filter out keep-alive new chunks
                        f.write(chunk)
                        if show_progress:
                            pbar.update(block_size)
            if show_progress:
                pbar.close()
        except IOError as e:
            sys.stderr.write("IO Error - {}\n".format(e))
        finally:
            # Move the temp file to desired location
            if file_size == os.path.getsize(tmp_file_path):
                # if there's a hash value, validate the file
                if md5_hash and not md5_validate_file(tmp_file_path, md5_hash):
                    raise Exception("Error validating the file against its MD5 hash")
                shutil.move(tmp_file_path, file_path)
            elif file_size == -1:
                raise Exception("Error getting Content-Length from server: %s" % url)
Example #25
0
def get_metadata(download_dir='.'):
    """Download Landsat 8 metadata file."""
    download(
        'http://landsat.usgs.gov/metadata_service/bulk_metadata_files/LANDSAT_8.csv',
        download_dir
        )
Example #26
0
 def test_simple(self):
     download(FILE_1MB)
     f = os.path.join(TEST_DATA_DIR, get_resource_name(FILE_1MB))
     assert os.path.exists(f)
     os.remove(f)
Example #27
0
# -*- coding: utf-8 -*-

import gzip
import csv
import psycopg2
import uuid
from homura import download

# download index file to current dirctory
landsat_index_url = 'http://storage.googleapis.com/gcp-public-data-landsat/index.csv.gz'
download(landsat_index_url, './')
g = gzip.GzipFile(mode='rb', fileobj=open('./index.csv.gz', 'rb'))
f_csv = open('./index.csv', 'wb')
f_csv.write(g.read())
f_csv.close()

# config
csv_file = './index.csv'
db = 'landsat'
usr = '******'
pw = 'postgres'
host = '127.0.0.1'
port = '5432'


def dbimport(tb):
    # connect to database
    pgiscon = psycopg2.connect(database=db,
                               user=usr,
                               password=pw,
                               host=host,
Example #28
0
def get_screenshot(url, filename):
    # Use try to handle errors with calling the download function.
    try:
        download(url=get_splash_uri(url), path=get_save_path(filename))
    except:
        pass
Example #29
0
from homura import download
from ftp import ftp_client

download(
    'https://globalmanifestv2.blob.core.windows.net/manifests/2018-07-07-01-11-07_80-8867.zip?sv=2015-12-11&sr=b&sig=Exl4E3Q3R6N4w8jDcbygZ8aRGLoR3LzfH2VR%2FgMtNSk%3D&st=2018-07-07T06%3A39%3A09Z&se=2018-07-10T06%3A44%3A09Z&sp=rw '
)
    sub = script.findAll("script")
    su.append(sub[1].string)
print("done.")
for s in su:
    sa = s[39:-2]
    subsceneLinks.append(baseUrl + sa)

print videolinks
print subsceneLinks

if not os.path.exists(directory):
    os.makedirs(directory)
if not os.path.exists(directory + "/download.txt"):
    saveFile = open(directory + "/download.txt", 'w')
    saveFile.write("")
    saveFile.close()
for i in range(len(videolinks)):
    print i
    if subsceneLinks[i] not in open(directory + "/download.txt").read():
        print("downloading : " + subsceneLinks[i])
        download(url=subsceneLinks[i], path=directory)
        appendFile = open(directory + "/download.txt", 'a')
        appendFile.write(subsceneLinks[i])
        appendFile.close()
    if videolinks[i] not in open(directory + "/download.txt").read():
        print("downloading : " + videolinks[i])
        download(url=videolinks[i], path=directory)
        appendFile = open(directory + "/download.txt", 'a')
        appendFile.write(videolinks[i])
        appendFile.close()
Example #31
0
def get_screenshot(url, filename):
    download(url=get_splash_uri(url), path=get_save_path(filename))
Example #32
0
def download_(lines):
	try:
		download(lines)
	except Exception as e:
		print(e)
Example #33
0
 def test_simple(self):
     download(FILE_1MB)
     f = os.path.join(TEST_DATA_DIR, get_resource_name(FILE_1MB))
     assert os.path.exists(f)
     os.remove(f)
# -*- coding: utf-8 -*-


import csv
import psycopg2
import uuid
import gzip
from homura import download

# download index file to current dirctory
sentinel_index_url = 'http://storage.googleapis.com/gcp-public-data-landsat/index.csv.gz'
download(sentinel_index_url, './')
g = gzip.GzipFile(mode='rb', fileobj=open('./index.csv.gz', 'rb'))
f_csv = open('./index.csv', 'wb')
f_csv.write(g.read())
f_csv.close()
print('unzip successfully!')

# config
csv_file = './index.csv'
db = 'landsat'
usr = '******'
pw = 'postgres'
host = '127.0.0.1'
port = '5432'

# connect to database
pgiscon = psycopg2.connect(database=db, user=usr, password=pw, host=host, port=port)
pgiscursor = pgiscon.cursor()

# 打开csv文件,按行读取数据并记录到数据库
Example #35
0
def main(input_genes, output, disease, cutoff, gene_set):
    u"""
    main function
    """
    is_input_file = None
    if os.path.exists(input_genes):
        is_input_file = input_genes
        with open(input_genes) as r:
            input_genes = [x.strip() for x in r.read().split("\n")]
            input_genes = [x for x in input_genes if x]
    else:
        input_genes = input_genes.split(",")

    print("Input genes: {}".format(len(input_genes)))
    check = valid(input_genes, disease)
    while check is not None:
        input_genes.remove(check)
        print("Input genes: {}; {} is invalid".format(len(input_genes), check))
        time.sleep(0.5)
        check = valid(input_genes, disease)

    if is_input_file is not None:
        with open(is_input_file, "w+") as w:
            for i in sorted(input_genes):
                w.write("{}\n".format(i))

    if not input_genes:
        print("No valid genes")
        exit(0)

    if gene_set:
        data = post(genes=input_genes,
                    disease=disease.split(","),
                    groupcutoff1=cutoff,
                    groupcutoff2=100 - cutoff)

        if data["outdir"] == "fail":
            print("Failed {}".format(data["outdir"]))
            exit(data)

        outdir = os.path.dirname(output)
        if not os.path.exists(outdir):
            os.makedirs(outdir)

        if os.path.exists(output):
            os.remove(output)

        download(url=os.path.join(OUTPUT, data["outdir"]), path=output)

    else:
        if not os.path.exists(output):
            os.makedirs(output)

        for i in input_genes:
            print(i)
            data = post(genes=[i],
                        disease=disease.split(","),
                        groupcutoff1=cutoff,
                        groupcutoff2=100 - cutoff)

            if data["outdir"] == "fail":
                print("Failed {}".format(data["outdir"]))
                continue

            outfile = os.path.join(output, data["outdir"])
            if os.path.exists(outfile):
                os.remove(outfile)

            download(url=os.path.join(OUTPUT, data["outdir"]), path=outfile)
Example #36
0
    def download(self,
                 id,
                 directory_path='.',
                 checksum=False,
                 check_existing=False,
                 **kwargs):
        """Download a product using homura.

        Uses the filename on the server for the downloaded file, e.g.
        "S1A_EW_GRDH_1SDH_20141003T003840_20141003T003920_002658_002F54_4DD1.zip".

        Incomplete downloads are continued and complete files are skipped.

        Further keyword arguments are passed to the homura.download() function.

        Parameters
        ----------
        id : string
            UUID of the product, e.g. 'a8dd0cfd-613e-45ce-868c-d79177b916ed'
        directory_path : string, optional
            Where the file will be downloaded
        checksum : bool, optional
            If True, verify the downloaded file's integrity by checking its MD5 checksum.
            Throws InvalidChecksumError if the checksum does not match.
            Defaults to False.
        check_existing : bool, optional
            If True and a fully downloaded file with the same name exists on the disk,
            verify its integrity using its MD5 checksum. Re-download in case of non-matching checksums.
            Defaults to False.

        Returns
        -------
        path : string
            Disk path of the downloaded file,
        product_info : dict
            Dictionary containing the product's info from get_product_info().

        Raises
        ------
        InvalidChecksumError
            If the MD5 checksum does not match the checksum on the server.
        """
        # Check if API is reachable.
        product_info = None
        while product_info is None:
            try:
                product_info = self.get_product_info(id)
            except requests.HTTPError:
                print("Invalid API response. Trying again in 1 minute.")
                sleep(60)

        path = join(directory_path, product_info['title'] + '.zip')
        kwargs = self._fillin_cainfo(kwargs)

        print('Downloading %s to %s' % (id, path))

        # Check if the file exists and passes md5 test
        # Homura will by default continue the download if the file exists but is incomplete
        if exists(path) and getsize(path) == product_info['size']:
            if not check_existing or md5_compare(path, product_info['md5']):
                print('%s was already downloaded.' % path)
                return path, product_info
            else:
                print(
                    '%s was already downloaded but is corrupt: checksums do not match. Re-downloading.'
                    % path)
                remove(path)

        if (exists(path) and getsize(path) >= 2**31
                and pycurl.version.split()[0].lower() <= 'pycurl/7.43.0'):
            # Workaround for PycURL's bug when continuing > 2 GB files
            # https://github.com/pycurl/pycurl/issues/405
            remove(path)

        homura.download(product_info['url'],
                        path=path,
                        session=self.session,
                        **kwargs)

        # Check integrity with MD5 checksum
        if checksum is True:
            if not md5_compare(path, product_info['md5']):
                raise InvalidChecksumError(
                    'File corrupt: checksums do not match')
        return path, product_info
Example #37
0
    def download(self, id, directory_path='.', checksum=False, check_existing=False, **kwargs):
        """Download a product using homura.

        Uses the filename on the server for the downloaded file, e.g.
        "S1A_EW_GRDH_1SDH_20141003T003840_20141003T003920_002658_002F54_4DD1.zip".

        Incomplete downloads are continued and complete files are skipped.

        Further keyword arguments are passed to the homura.download() function.

        Parameters
        ----------
        id : string
            UUID of the product, e.g. 'a8dd0cfd-613e-45ce-868c-d79177b916ed'
        directory_path : string, optional
            Where the file will be downloaded
        checksum : bool, optional
            If True, verify the downloaded file's integrity by checking its MD5 checksum.
            Throws InvalidChecksumError if the checksum does not match.
            Defaults to False.
        check_existing : bool, optional
            If True and a fully downloaded file with the same name exists on the disk,
            verify its integrity using its MD5 checksum. Re-download in case of non-matching checksums.
            Defaults to False.

        Returns
        -------
        path : string
            Disk path of the downloaded file,
        product_info : dict
            Dictionary containing the product's info from get_product_info().

        Raises
        ------
        InvalidChecksumError
            If the MD5 checksum does not match the checksum on the server.
        """
        # Check if API is reachable.
        product_info = None
        while product_info is None:
            try:
                product_info = self.get_product_info(id)
            except SentinelAPIError as e:
                print("Invalid API response:\n{}\nTrying again in 1 minute.".format(str(e)))
                sleep(60)

        path = join(directory_path, product_info['title'] + '.zip')
        kwargs = self._fillin_cainfo(kwargs)

        print('Downloading %s to %s' % (id, path))

        # Check if the file exists and passes md5 test
        # Homura will by default continue the download if the file exists but is incomplete
        if exists(path) and getsize(path) == product_info['size']:
            if not check_existing or md5_compare(path, product_info['md5']):
                print('%s was already downloaded.' % path)
                return path, product_info
            else:
                print('%s was already downloaded but is corrupt: checksums do not match. Re-downloading.' % path)
                remove(path)

        if (exists(path) and getsize(path) >= 2 ** 31 and
                pycurl.version.split()[0].lower() <= 'pycurl/7.43.0'):
            # Workaround for PycURL's bug when continuing > 2 GB files
            # https://github.com/pycurl/pycurl/issues/405
            remove(path)

        homura.download(product_info['url'], path=path, session=self.session, **kwargs)

        # Check integrity with MD5 checksum
        if checksum is True:
            if not md5_compare(path, product_info['md5']):
                raise InvalidChecksumError('File corrupt: checksums do not match')
        return path, product_info
Example #38
0
    def install(self, plugin_name):
        import zipfile
        from homura import download  # gestor fast download file.

        # URL for download of plugin.
        # https://github.com/deepnlpf/plugin_stanza/archive/master.zip
        URL = (self.PLUGIN_SERVER + "plugin_" + plugin_name +
               "/archive/master" + self.EXTENSION)

        # Path for save plugin.
        PATH_DOWNLOAD_PLUGIN = (self.PLUGIN_PATH + "plugin_" + plugin_name +
                                "-master" + self.EXTENSION)

        # check folder plugin exist.
        if not os.path.exists(self.PLUGIN_PATH):
            os.makedirs(self.PLUGIN_PATH)

        # Download plugin.
        try:
            print("Downloading plugin", plugin_name, "..")
            # check url exists.
            download(url=URL, path=PATH_DOWNLOAD_PLUGIN)
        except Exception as err:
            print("❗️Plugin no found!")
            log.logger.error(err)
            sys.exit(0)

        # Extracting files plugin.
        try:
            fantasy_zip = zipfile.ZipFile(PATH_DOWNLOAD_PLUGIN)
            fantasy_zip.extractall(self.PLUGIN_PATH)
            fantasy_zip.close()
        except Exception as err:
            print("❗️Error extracting files!")
            log.logger.error(err)
            sys.exit(0)

        # Config dir name plugin.
        try:
            os.rename(
                self.PLUGIN_PATH + "plugin_" + plugin_name + "-master",
                self.PLUGIN_PATH + plugin_name,
            )
        except Exception as err:
            print("❗️Error config directory plugin!")
            log.logger.error(err)
            sys.exit(0)

        # Install requirements.
        try:
            # Check in plugin file requirements.sh exist.
            if os.path.isfile(self.PLUGIN_PATH + plugin_name +
                              "/requeriments.sh"):
                print("Install requirements..")
                os.system(
                    "cd " +
                    str(self.PLUGIN_PATH + plugin_name +
                        " && chmod 777 requeriments.sh && ./requeriments.sh"))
        except Exception as err:
            print("❗Error when executing the requeriments.sh plugin file!")
            log.logger.error(err)
            sys.exit(0)

        os.remove(PATH_DOWNLOAD_PLUGIN)  # clear file zip.
        print("🎉 Plugin", plugin_name, "installed!")
        log.logger.info("Plugin installed: {}".format(plugin_name))
        print("Path of installed plugins:", self.PLUGIN_PATH)
        sys.exit(0)