Python urlretrieveの例、urllib.request.urlretrieve Pythonの例

コード例 #1

0

ファイルを表示

ファイル: woim.py プロジェクト: Chubby-Chocobo/misc-stuffs

def process_track(root_dir, track_url):
    print("======= Process track: " + track_url)
    f = urlopen(track_url)
    # f = codecs.open("test_chapter.html", "r", "utf-8")
    data = f.read()
    f.close()
    parsed_html = BeautifulSoup(data)
    param = parsed_html.find("param", {"name": "flashvars"})
    songs_urls = re.findall(
        "http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", param["value"]
    )
    for songs_url in songs_urls:
        f2 = urlopen(songs_url)
        data2 = f2.read()
        parsed_html2 = BeautifulSoup(data2)
        tracks = parsed_html2.findAll("track")
        for track in tracks:
            song_url = track["location"]
            o = urlparse(song_url)
            file_name = basename(song_url)
            file_path = root_dir + "/" + file_name
            if os.path.exists(file_path):
                print("File exists. Skip: " + file_path)
            else:
                print("Download: " + file_name)
                urlretrieve(song_url, file_path)

コード例 #2

0

ファイルを表示

ファイル: environment_manager.py プロジェクト: aashish24/qiita

def _download_reference_files(conn):
    print('Downloading reference files')
    if not exists(reference_base_dir):
        mkdir(reference_base_dir)

    files = {'tree': (get_reference_fp('gg_13_8-97_otus.tree'),
                      'ftp://ftp.microbio.me/greengenes_release/'
                      'gg_13_8_otus/trees/97_otus.tree'),
             'taxonomy': (get_reference_fp('gg_13_8-97_otu_taxonomy.txt'),
                          'ftp://ftp.microbio.me/greengenes_release/'
                          'gg_13_8_otus/taxonomy/97_otu_taxonomy.txt'),
             'sequence': (get_reference_fp('gg_13_8-97_otus.fasta'),
                          'ftp://ftp.microbio.me/greengenes_release/'
                          'gg_13_8_otus/rep_set/97_otus.fasta')}

    for file_type, (local_fp, url) in viewitems(files):
        # Do not download the file if it exists already
        if exists(local_fp):
            print("SKIPPING %s: file already exists at %s. To "
                  "download the file again, erase the existing file first" %
                  (file_type, local_fp))
        else:
            try:
                urlretrieve(url, local_fp)
            except:
                raise IOError("Error: Could not fetch %s file from %s" %
                              (file_type, url))

    ref = Reference.create('Greengenes', '13_8', files['sequence'][0],
                           files['taxonomy'][0], files['tree'][0])

    _insert_processed_params(conn, ref)

コード例 #3

0

ファイルを表示

ファイル: helper.py プロジェクト: Forrest-Z/self-driving-car

def maybe_download_pretrained_vgg(data_dir):
    """
    Download and extract pretrained vgg model if it doesn't exist
    :param data_dir: Directory to download the model to
    """
    vgg_filename = 'vgg.zip'
    vgg_path = os.path.join(data_dir, 'vgg')
    vgg_files = [
        os.path.join(vgg_path, 'variables/variables.data-00000-of-00001'),
        os.path.join(vgg_path, 'variables/variables.index'),
        os.path.join(vgg_path, 'saved_model.pb')]

    missing_vgg_files = [vgg_file for vgg_file in vgg_files if not os.path.exists(vgg_file)]
    if missing_vgg_files:
        # Clean vgg dir
        if os.path.exists(vgg_path):
            shutil.rmtree(vgg_path)
        os.makedirs(vgg_path)

        # Download vgg
        print('Downloading pre-trained vgg model...')
        with DLProgress(unit='B', unit_scale=True, miniters=1) as pbar:
            urlretrieve(
                'https://s3-us-west-1.amazonaws.com/udacity-selfdrivingcar/vgg.zip',
                os.path.join(vgg_path, vgg_filename),
                pbar.hook)

        # Extract vgg
        print('Extracting model...')
        zip_ref = zipfile.ZipFile(os.path.join(vgg_path, vgg_filename), 'r')
        zip_ref.extractall(data_dir)
        zip_ref.close()

        # Remove zip file to save space
        os.remove(os.path.join(vgg_path, vgg_filename))

コード例 #4

0

ファイルを表示

ファイル: buildDNACommands.py プロジェクト: mvysotskiy/ePMV

    def retrieveDNAPDBonServer(self,path,name=None,pathTo=None):
        done = False
        cut= 0
        dnafile = None
        print ("http://w3dna.rutgers.edu/"+path[1:-1]+"/s0.pdb")
        if name is None :
            name = "s0.pdb"
        if pathTo is None :
            pathTo = self.vf.rcFolder+os.sep+"pdbcache"+os.sep 
        tmpFileName =  pathTo+name   
        while not done :
            if cut > 100 :        
                break
            try :
#                dnafile = urllib2.urlopen("http://w3dna.rutgers.edu/data/usr/"+path+"/rebuild/s0.pdb")
#                dnafile = urllib2.urlopen("http://w3dna.rutgers.edu/"+path[1:-1]+"/s0.pdb")
                urllib.urlretrieve("http://w3dna.rutgers.edu/"+path[1:-1]+"/s0.pdb", tmpFileName)
                done = True
            except :
                cut+=1        
                continue
        if done :
            #should download in the  rcFolder
#
#            output = open(pathTo+name,'w')
#            output.write(dnafile.read())
#            output.close()
            return name,pathTo
        return None,None

コード例 #5

0

ファイルを表示

ファイル: mnist.py プロジェクト: mano143/theanet

def _load_mnist():
    data_dir = os.path.dirname(os.path.abspath(__file__))
    data_file = os.path.join(data_dir, "mnist.pkl.gz")

    print("Looking for data file: ", data_file)

    if not os.path.isfile(data_file):
        import urllib.request as url

        origin = "http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz"
        print("Downloading data from: ", origin)
        url.urlretrieve(origin, data_file)

    print("Loading MNIST data")
    f = gzip.open(data_file, "rb")
    u = pickle._Unpickler(f)
    u.encoding = "latin1"
    train_set, valid_set, test_set = u.load()
    f.close()

    train_x, train_y = train_set
    valid_x, valid_y = valid_set
    testing_x, testing_y = test_set

    training_x = np.vstack((train_x, valid_x))
    training_y = np.concatenate((train_y, valid_y))

    training_x = training_x.reshape((training_x.shape[0], 1, 28, 28))
    testing_x = testing_x.reshape((testing_x.shape[0], 1, 28, 28))

    return training_x, training_y, testing_x, testing_y

コード例 #6

0

ファイルを表示

ファイル: bootstrap-obvious-ci-and-miniconda.py プロジェクト: danielballan/Obvious-CI

def main(target_dir, target_arch, major_py_version, miniconda_version='3.8.3', install_obvci=True):
    system = platform.system()
    URL = miniconda_url(system, target_arch, major_py_version, miniconda_version)
    basename = URL.rsplit('/', 1)[1]
    if system in ['Linux', 'Darwin']:
        cmd = ['bash', basename, '-b', '-p', target_dir]
        bin_dir = 'bin'
    elif system in ['Windows']:
        cmd = ['powershell', 'Start-Process', '-FilePath', basename, '-ArgumentList',
               '/S,/D=' + target_dir,
	       '-Wait', ]#'-Passthru']
        bin_dir = 'scripts'
    else:
        raise ValueError('Unsupported operating system.')
    
    if not os.path.exists(basename):
        print('Downloading from {}'.format(URL))
        urlretrieve(URL, basename)
    else:
        print('Using cached version of {}'.format(URL))

    # Install with powershell.
    if os.path.exists(target_dir):
        raise IOError('Installation directory already exists')
    subprocess.check_call(cmd)
    
    if not os.path.isdir(target_dir):
        raise RuntimeError('Failed to install miniconda :(')

    if install_obvci:
        conda_path = os.path.join(target_dir, bin_dir, 'conda')
        subprocess.check_call([conda_path, 'install', '--yes', '--quiet', '-c', 'pelson', 'obvious-ci'])

コード例 #7

0

ファイルを表示

ファイル: tvdb_mp4.py プロジェクト: Mirabis/sickbeard_mp4_automator

    def getArtwork(self, mp4Path, filename='cover', thumbnail=False):
        # Check for local cover.jpg or cover.png artwork in the same directory as the mp4
        extensions = valid_poster_extensions
        poster = None
        for e in extensions:
            head, tail = os.path.split(os.path.abspath(mp4Path))
            path = os.path.join(head, filename + os.extsep + e)
            if (os.path.exists(path)):
                poster = path
                self.log.info("Local artwork detected, using %s." % path)
                break
        # Pulls down all the poster metadata for the correct season and sorts them into the Poster object
        if poster is None:
            if thumbnail:
                try:
                    poster = urlretrieve(self.episodedata['filename'], os.path.join(tempfile.gettempdir(), "poster-%s.jpg" % self.title))[0]
                except Exception as e:
                    self.log.error("Exception while retrieving poster %s.", str(e))
                    poster = None
            else:
                posters = posterCollection()
                try:
                    for bannerid in self.showdata['_banners']['season']['season'].keys():
                        if str(self.showdata['_banners']['season']['season'][bannerid]['season']) == str(self.season):
                            poster = Poster()
                            poster.ratingcount = int(self.showdata['_banners']['season']['season'][bannerid]['ratingcount'])
                            if poster.ratingcount > 0:
                                poster.rating = float(self.showdata['_banners']['season']['season'][bannerid]['rating'])
                            poster.bannerpath = self.showdata['_banners']['season']['season'][bannerid]['_bannerpath']
                            posters.addPoster(poster)

                    poster = urlretrieve(posters.topPoster().bannerpath, os.path.join(tempfile.gettempdir(), "poster-%s.jpg" % self.title))[0]
                except:
                    poster = None
        return poster

コード例 #8

0

ファイルを表示

ファイル: cibuild.py プロジェクト: sztsian/repo

def get_sources(itemList, output=srcDir, verb=None):
    '''Get source files from local and internet.

    Args:
        itemList: A list of source files.
        output: A string of temp directory.
        verb: A bool of verbose.
    '''

    for item in itemList:
        if not os.path.exists(os.path.join(output, item[0].split('/')[-1])):
            if item[0].split('://')[0] in ['http', 'https', 'ftp']:
                if verb:
                    echo('cyan', 'verb:', ' downloading {} file.'.format(item[0]))
                try:
                    urlretrieve(item[0], '{}/{}'.format(output, item[0].split('/')[-1]))
                    #call(['wget', '-q', '-P', output, item[0]])
                except Exception as e:
                    echo('red', 'erro:', ' downloading error. {}'.format(e))
                    sys.exit(1)
            else:
                for src in find_files(item[0], 'rpms'):
                    if verb:
                        echo('cyan', 'verb:', ' copy {} file to build directory.'.format(src))
                    shutil.copy(src, output)

コード例 #9

0

ファイルを表示

ファイル: pip_downloader.py プロジェクト: aurv/PTVS

def install_from_source(setuptools_source, pip_source):
    setuptools_temp_dir = tempfile.mkdtemp('-setuptools', 'ptvs-')
    pip_temp_dir = tempfile.mkdtemp('-pip', 'ptvs-')
    cwd = os.getcwd()

    try:
        os.chdir(setuptools_temp_dir)
        print('Downloading setuptools from ' + setuptools_source)
        sys.stdout.flush()
        setuptools_package, _ = urlretrieve(setuptools_source, 'setuptools.tar.gz')

        package = tarfile.open(setuptools_package)
        try:
            safe_members = [m for m in package.getmembers() if not m.name.startswith(('..', '\\'))]
            package.extractall(setuptools_temp_dir, members=safe_members)
        finally:
            package.close()

        extracted_dirs = [d for d in os.listdir(setuptools_temp_dir) if os.path.exists(os.path.join(d, 'setup.py'))]
        if not extracted_dirs:
            raise OSError("Failed to find setuptools's setup.py")
        extracted_dir = extracted_dirs[0]

        print('\nInstalling from ' + extracted_dir)
        sys.stdout.flush()
        os.chdir(extracted_dir)
        subprocess.check_call(
            EXECUTABLE + ['setup.py', 'install', '--single-version-externally-managed', '--record', 'setuptools.txt']
        )

        os.chdir(pip_temp_dir)
        print('Downloading pip from ' + pip_source)
        sys.stdout.flush()
        pip_package, _ = urlretrieve(pip_source, 'pip.tar.gz')

        package = tarfile.open(pip_package)
        try:
            safe_members = [m for m in package.getmembers() if not m.name.startswith(('..', '\\'))]
            package.extractall(pip_temp_dir, members=safe_members)
        finally:
            package.close()

        extracted_dirs = [d for d in os.listdir(pip_temp_dir) if os.path.exists(os.path.join(d, 'setup.py'))]
        if not extracted_dirs:
            raise OSError("Failed to find pip's setup.py")
        extracted_dir = extracted_dirs[0]

        print('\nInstalling from ' + extracted_dir)
        sys.stdout.flush()
        os.chdir(extracted_dir)
        subprocess.check_call(
            EXECUTABLE + ['setup.py', 'install', '--single-version-externally-managed', '--record', 'pip.txt']
        )

        print('\nInstallation Complete')
        sys.stdout.flush()
    finally:
        os.chdir(cwd)
        shutil.rmtree(setuptools_temp_dir, ignore_errors=True)
        shutil.rmtree(pip_temp_dir, ignore_errors=True)

コード例 #10

0

ファイルを表示

ファイル: feedService.py プロジェクト: remlov/Deprecated-Projects

def download_feed():
    """
    Grab feed to local file, make sure it actually has some data.
    """
    advertiser = sys.argv[1]
    print("Grabbing Feed For Advertiser: {0}<br>".format(parse.unquote(advertiser)))
    start_time = time.time()
    if os.path.isfile("feed.xml"):
        os.remove("feed.xml")
    try:
        feed_url = "https://api.rewardstyle.com/v1/product_feed?" "oauth_token={0}&advertiser={1}".format(
            TOKEN, advertiser
        )
        request.urlretrieve(feed_url, "feed.xml")
        print("Feed download time: {0}ms<br>".format(str(round((time.time() - start_time) * 1000))))

    except error.HTTPError as e:
        print("Feed most likely does not exist: {0}".format(e))
        sys.exit()

    file = open("feed.xml", "rb")
    file.seek(0, 2)
    size = file.tell()
    file.close()
    if size <= 64:
        print("Feed looks empty")
        sys.exit()

コード例 #11

0

ファイルを表示

ファイル: opusapi.py プロジェクト: michaelaye/planetpy

    def download_results(self, savedir=None, only_raw=True, only_calib=False,
                         index=None):
        """Download the previously found and stored Opus obsids.

        Parameters
        ==========
        savedir: str or pathlib.Path, optional
            If the database root folder as defined by the config.ini should not be used,
            provide a different savedir here. It will be handed to PathManager.
        """
        obsids = self.obsids if index is None else [self.obsids[index]]
        for obsid in obsids:
            pm = io.PathManager(obsid.img_id, savedir=savedir)
            pm.basepath.mkdir(exist_ok=True)
            if only_raw is True:
                to_download = obsid.raw_urls
            elif only_calib is True:
                to_download = obsid.calib_urls
            else:
                to_download = obsid.all_urls
            for url in to_download:
                basename = Path(url).name
                print("Downloading", basename)
                store_path = str(pm.basepath / basename)
                try:
                    urlretrieve(url, store_path)
                except Exception as e:
                    urlretrieve(url.replace('https', 'http'), store_path)
            return str(pm.basepath)

コード例 #12

0

ファイルを表示

ファイル: install_grocery.py プロジェクト: AllanYiin/CNTK

def download_grocery_data():
    base_folder = os.path.dirname(os.path.abspath(__file__))
    dataset_folder = os.path.join(base_folder, "..")
    if not os.path.exists(os.path.join(dataset_folder, "Grocery", "testImages")):
        filename = os.path.join(dataset_folder, "Grocery.zip")
        if not os.path.exists(filename):
            url = "https://www.cntk.ai/DataSets/Grocery/Grocery.zip"
            print('Downloading data from ' + url + '...')
            urlretrieve(url, filename)
            
        try:
            print('Extracting ' + filename + '...')
            with zipfile.ZipFile(filename) as myzip:
                myzip.extractall(dataset_folder)
            if platform != "win32":
                testfile  = os.path.join(dataset_folder, "Grocery", "test.txt")
                unixfile = os.path.join(dataset_folder, "Grocery", "test_unix.txt")
                out = open(unixfile, 'w')
                with open(testfile) as f:
                    for line in f:
                        out.write(line.replace('\\', '/'))
                out.close()
                shutil.move(unixfile, testfile)
        finally:
            os.remove(filename)
        print('Done.')
    else:
        print('Data already available at ' + dataset_folder + '/Grocery')

コード例 #13

0

ファイルを表示

ファイル: get.py プロジェクト: cuda-convnet/WiFi_Kit_series

def get_tool(tool):
    sys_name = platform.system()
    archive_name = tool['archiveFileName']
    local_path = dist_dir + archive_name
    url = tool['url']
    #real_hash = tool['checksum'].split(':')[1]
    if not os.path.isfile(local_path):
        print('Downloading ' + archive_name);
        sys.stdout.flush()
        if 'CYGWIN_NT' in sys_name:
            ctx = ssl.create_default_context()
            ctx.check_hostname = False
            ctx.verify_mode = ssl.CERT_NONE
            urlretrieve(url, local_path, report_progress, context=ctx)
        elif 'Windows' in sys_name:
            r = requests.get(url)
            f = open(local_path, 'wb')
            f.write(r.content)
            f.close()
        else:
            urlretrieve(url, local_path, report_progress)
        sys.stdout.write("\rDone\n")
        sys.stdout.flush()
    else:
        print('Tool {0} already downloaded'.format(archive_name))
        sys.stdout.flush()
    #local_hash = sha256sum(local_path)
    #if local_hash != real_hash:
    #    print('Hash mismatch for {0}, delete the file and try again'.format(local_path))
    #    raise RuntimeError()
    unpack(local_path, '.')

コード例 #14

0

ファイルを表示

ファイル: github2s3.py プロジェクト: aslanbekirov/crate-demo

def run(params):
    bucket_name, prefix_name, key_name = params
    s3_key_name = '{}/{}'.format(prefix_name, key_name)
    git_key_url = 'http://data.githubarchive.org/{}'.format(key_name)
    print('Processing {} to s3...'.format(s3_key_name))

    s3_conn = S3Connection()
    bucket = s3_conn.get_bucket(bucket_name)
    key = bucket.get_key(s3_key_name)

    if key:
        print('{} is already in the bucket'.format(key))
    elif exists_url(git_key_url) is False:
        print('{} does not exist'.format(git_key_url))
    else:
        urlretrieve(git_key_url, key_name)

        # pre-process data
        preprocess.process_file(key_name)

        retry_count = 0
        while not upload_to_s3(key_name, bucket, s3_key_name) and retry_count <= MAX_RETRIES:
            retry_count += 1
            print('Failed to upload {} !'.format(s3_key_name))
        else:
            print('File {} is uploaded to {}/{}!'.format(key_name, bucket_name, prefix_name))
        os.remove(key_name)

コード例 #15

0

ファイルを表示

ファイル: update.py プロジェクト: jdrudolph/mypy

def download(url):
    """ saves url to current folder. returns filename """
    #TODO: check that download was successful
    filename = os.path.basename(url)
    print('downloading', filename)
    urlretrieve(url, filename)
    return filename

コード例 #16

0

ファイルを表示

ファイル: pyvenvex.py プロジェクト: Bilalh/csplib

 def install_script(self, context, name, url):
     _, _, path, _, _, _ = urlparse(url)
     fn = os.path.split(path)[-1]
     binpath = context.bin_path
     distpath = os.path.join(binpath, fn)
     # Download script into the env's binaries folder
     urlretrieve(url, distpath)
     progress = self.progress
     if self.verbose:
         term = '\n'
     else:
         term = ''
     if progress is not None:
         progress('Installing %s ...%s' % (name, term), 'main')
     else:
         sys.stderr.write('Installing %s ...%s' % (name, term))
         sys.stderr.flush()
     # Install in the env
     args = [context.env_exe, fn]
     p = Popen(args, stdout=PIPE, stderr=PIPE, cwd=binpath)
     t1 = Thread(target=self.reader, args=(p.stdout, 'stdout'))
     t1.start()
     t2 = Thread(target=self.reader, args=(p.stderr, 'stderr'))
     t2.start()
     p.wait()
     t1.join()
     t2.join()
     if progress is not None:
         progress('done.', 'main')
     else:
         sys.stderr.write('done.\n')
     # Clean up - no longer needed
     os.unlink(distpath)

コード例 #17

0

ファイルを表示

ファイル: tiledata.py プロジェクト: jmfield2/pyroutelib2

def GetOsmTileData(z,x,y):
  """Download OSM data for the region covering a slippy-map tile"""
  if(x < 0 or y < 0 or z < 0 or z > 25):
    print("Disallowed (%d,%d) at zoom level %d" % (x, y, z))
    return
  
  directory = 'cache/%d/%d/%d' % (z,x,y)
  filename = '%s/data.osm.pkl' % (directory)
  if(not os.path.exists(directory)):
    os.makedirs(directory)

  if(z == DownloadLevel()):
    # Download the data
    s,w,n,e = tileEdges(x,y,z)
    # /api/0.6/map?bbox=left,bottom,right,top
    URL = 'http://api.openstreetmap.org/api/0.6/map?bbox={},{},{},{}'.format(w,s,e,n)

     
    if(not os.path.exists(filename)): # TODO: allow expiry of old data
      urlretrieve(URL, filename)
    return(filename)
    
  elif(z > DownloadLevel()):
    # use larger tile
    while(z > DownloadLevel()):
      z = z - 1
      x = int(x / 2)
      y = int(y / 2)
    return(GetOsmTileData(z,x,y))
  return(None)

コード例 #18

0

ファイルを表示

ファイル: custom_search.py プロジェクト: umentu/collect_image

    def save_images(self, word, max_count=100, resize=False):
        """
        画像を取得する
        """

        dir_name = IMAGE_DIR + "/" + word + "/"

        if not os.path.exists(dir_name):
            os.mkdir(dir_name)

        for start in range(1, max_count+1):
            image_info = self.get_image_info(word, start)
            for image in image_info:
                file_name = self.create_file_name(
                    dir_name, image["link"], image["extension"])
                file_path = dir_name + file_name

                print(image["link"])

                try:
                    connection_test = request.urlopen(image["link"])
                except (HTTPError, OSError) as e:
                    continue

                request.urlretrieve(image["link"], file_path)

                if resize is True:
                    self.resize_image(file_path, image["extension"])


            # APIの接続制限があるため、5秒ごとにあける
            time.sleep(5)

コード例 #19

0

ファイルを表示

ファイル: VScraper.py プロジェクト: Jstillerman/VScraper

def get_files():
    """ Gets files of specified extension through user input
    from a specified full URL path; downloads each file to
    the user's specified local directory.
    """
    
    while True:
        url = input("Enter the URL you want to scrape from: ")

        suffix = input("\nWhat type of file do you want to scrape? \nExamples: .png, .pdf, .doc - ")

        filepath = input("Specify a file path to save to: ")

        if not url.startswith('http://') and not url.startswith('https://'):
            url += 'http://'

        response = requests.get(url, stream=True)            
        soup = bs(response.text)

        list_of_links = [link.get('href') for link in soup.find_all('a') if suffix in str(link)]

        for link in list_of_links:
            file_name = link.rpartition('/')[-1]
            urlretrieve(url.rsplit('/', 1)[0] + '/' + link, filepath + '\\' + file_name)
            
        print_message(list_of_links, suffix)
        if not repeat(input("\nScrape from another URL? ")):
            break

コード例 #20

0

ファイルを表示

ファイル: trainingimages.py プロジェクト: ergosimulation/mpslib

def get_remote(url = 'http://www.trainingimages.org/uploads/3/4/7/0/34703305/ti_strebelle.sgems',local_file = 'ti.dat', is_zip=0, filename_in_zip=''):
    #import os    
    
    if (is_zip==1):
        local_file_zip = local_file + '.zip'
    
    if not (os.path.exists(local_file)):
        if (is_zip==1):
            import zipfile
            # download zip file
            print('Beginning download of ' + url + ' to ' + local_file_zip)
            urlretrieve(url, local_file_zip)
            # unzip file
            print('Unziping %s to %s' % (local_file_zip,local_file))
            zip_ref = zipfile.ZipFile(local_file_zip, 'r')
            zip_ref.extractall('.')
            zip_ref.close()
            # rename unzipped file            
            if len(filename_in_zip)>0:
                os.rename(filename_in_zip,local_file)
            
            
        else:
            print('Beginning download of ' + url + ' to ' + local_file)
            urlretrieve(url, local_file)
        
        
    return local_file

コード例 #21

0

ファイルを表示

ファイル: import_products.py プロジェクト: ThanawatTS/lab9

def main():
    os.makedirs('imgs')
    Product.objects.all().delete()
    with open('./scripts/condoms.csv') as csv_file:
        csv_content = csv.reader(csv_file, delimiter=',')
        counter = 0
        for row in csv_content:
            img_url = row[3]
            img_type = img_url.split('.')[-1]
            img_dest = './media/imgs/product_img{0}.{1}'.format(counter, img_type)
            try:
                request.urlretrieve(img_url, img_dest)
            except error.HTTPError as e:
                print(counter, e)
            product = Product.objects.create(
                name=row[0],
                desc=row[1],
                price=row[2],
                amount=50,
                pic= ('/imgs/product_img{0}.{1}'.format(counter, img_type))
            )
            product.save()
            counter += 1

    print(Product.objects.all())

コード例 #22

0

ファイルを表示

ファイル: download_import_cldr.py プロジェクト: python-babel/babel

def main():
    scripts_path = os.path.dirname(os.path.abspath(__file__))
    repo = os.path.dirname(scripts_path)
    cldr_dl_path = os.path.join(repo, 'cldr')
    cldr_path = os.path.join(repo, 'cldr', os.path.splitext(FILENAME)[0])
    zip_path = os.path.join(cldr_dl_path, FILENAME)
    changed = False

    while not is_good_file(zip_path):
        log('Downloading \'%s\'', FILENAME)
        if os.path.isfile(zip_path):
            os.remove(zip_path)
        urlretrieve(URL, zip_path, reporthook)
        changed = True
        print()
    common_path = os.path.join(cldr_path, 'common')

    if changed or not os.path.isdir(common_path):
        if os.path.isdir(common_path):
            log('Deleting old CLDR checkout in \'%s\'', cldr_path)
            shutil.rmtree(common_path)

        log('Extracting CLDR to \'%s\'', cldr_path)
        with contextlib.closing(zipfile.ZipFile(zip_path)) as z:
            z.extractall(cldr_path)

    subprocess.check_call([
        sys.executable,
        os.path.join(scripts_path, 'import_cldr.py'),
        common_path])

コード例 #23

0

ファイルを表示

ファイル: downloadit2.py プロジェクト: CBickel87/downloadit

def navigate_dl():
    browser.get('https://www.urltodlownloadfrom.com/specificaddress')

    while True:
        wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "body > div.course-mainbar.lecture-content > "
                                                                      "div:nth-child(2) > div.video-options > a")))
        dl_url = browser.find_element_by_css_selector("body > div.course-mainbar.lecture-content > "
                                                      "div:nth-child(2) > div.video-options > a").get_attribute("href")
        next_btn = browser.find_element_by_css_selector("#lecture_complete_button > span")

        title = get_title()

        try:
            dl_extras = browser.find_element_by_css_selector("body > div.course-mainbar.lecture-content > "
                                                             "div:nth-child(4) > div:nth-child(3) > a").get_attribute("href")
            print(dl_extras)
            urlretrieve(dl_extras, save_path + title + '.pptx', reporthook)
        except NoSuchElementException:
            pass

        try:
            print(dl_url)
            urlretrieve(dl_url, save_path+title+'.mp4', reporthook)
            next_btn.click()
        except NoSuchElementException:
            break

コード例 #24

0

ファイルを表示

ファイル: fetch_nxml.py プロジェクト: clulab/reach

 def retrieve_nxml_abstract(pmid, outfile = None):
     """
     Retrieves nxml file of the abstract associated with the provided pmid
     """
     query = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id={}&rettype=abstract".format(pmid)
     nxml_file = outfile or "{}.nxml".format(pmid)
     urlretrieve(query, nxml_file)

コード例 #25

0

ファイルを表示

ファイル: fetch_nxml.py プロジェクト: clulab/reach

 def retrieve_nxml_paper(pmcid, outfile = None):
     """
     Retrieves nxml file for the provided pmcid
     """
     query = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id={}".format(pmcid)
     nxml_file = outfile or "{}.nxml".format(pmcid)
     urlretrieve(query, nxml_file)

コード例 #26

0

ファイルを表示

ファイル: kissanime-rip.py プロジェクト: siikamiika/kissanime-rip

 def _download_episode(self, url, title):
     """Save the video stream to the disk.
     The filename will be sanitized(title).mp4."""
     filename = self._sanitize_filename(title) + '.mp4'
     print('Downloading {}...'.format(title))
     filename = '{}/{}'.format(self.folder, filename)
     urlretrieve(url, filename=filename)

コード例 #27

0

ファイルを表示

ファイル: module.py プロジェクト: containers-tools/cct

    def fetch(self, directory):
        if not os.path.exists(directory):
            os.makedirs(directory)

        self.path = os.path.join(directory, self.filename)

        url = self.artifact
        if self.check_sum():
            logger.info("Using cached artifact for %s" % self.filename)
            return

        logger.info("Fetching %s from  %s." % (self.filename, url))

        try:
            if os.path.basename(url) == url:
                raise CCTError("Artifact is referenced by filename - can't download it.")
            urlrequest.urlretrieve(url, self.path)
        except Exception as ex:
            if self.hint:
                raise CCTError('artifact: "%s" was not found. %s' % (self.path, self.hint))
            else:
                raise CCTError("cannot download artifact from url %s, error: %s" % (url, ex))

        if not self.check_sum():
            if self.hint:
                raise CCTError('hash is not correct for artifact: "%s". %s' % (self.path, self.hint))
            else:
                raise CCTError("artifact from %s doesn't match required chksum" % url)

コード例 #28

0

ファイルを表示

ファイル: karaoke.py プロジェクト: muris2016/ptavi-p3

 def do_local(self):
     attrs_dict = [tag[1] for tag in self.list_tags]
     for attrs in attrs_dict:
         if 'src' in attrs and 'http://' in attrs['src']:
             filename = attrs['src'].split('/')[-1]
             urlretrieve(attrs['src'], filename)
             attrs['src'] = filename

コード例 #29

0

ファイルを表示

ファイル: base.py プロジェクト: daniel-perry/scikit-learn

def _fetch_remote(remote, dirname=None):
    """Helper function to download a remote dataset into path

    Fetch a dataset pointed by remote's url, save into path using remote's
    filename and ensure its integrity based on the SHA256 Checksum of the
    downloaded file.

    Parameters
    ----------
    remote : RemoteFileMetadata
        Named tuple containing remote dataset meta information: url, filename
        and checksum

    dirname : string
        Directory to save the file to.

    Returns
    -------
    file_path: string
        Full path of the created file.
    """

    file_path = (remote.filename if dirname is None
                 else join(dirname, remote.filename))
    urlretrieve(remote.url, file_path)
    checksum = _sha256(file_path)
    if remote.checksum != checksum:
        raise IOError("{} has an SHA256 checksum ({}) "
                      "differing from expected ({}), "
                      "file may be corrupted.".format(file_path, checksum,
                                                      remote.checksum))
    return file_path

コード例 #30

0

ファイルを表示

ファイル: roller.py プロジェクト: akerl/roller

    def download(self):
        if 'rc' in self.version:
            base_url = 'https://git.kernel.org/torvalds/t'
            url = '{0}/linux-{1}.tar.gz'.format(base_url, self.version)
        else:
            base_url = 'https://cdn.kernel.org/pub/linux/kernel'
            major = 'v' + self.version[0] + '.x'
            url = '{0}/{1}/linux-{2}.tar.gz'.format(
                base_url,
                major,
                self.version
            )

        destination = '{0}/archives/linux-{1}.tar.gz'.format(
            self.build_dir,
            self.version
        )

        if os.path.isfile(destination):
            self.log('Kernel already downloaded: {0}'.format(self.version))
            return
        self.log('Downloading kernel: {0}'.format(self.version))
        if self.verbose:
            hook = download_progress
        else:
            hook = None
        try:
            urlretrieve(
                url,
                filename=destination,
                reporthook=hook
            )
        except Exception:
            os.remove(destination)
            raise

コード例 #31

0

ファイルを表示

ファイル: index.py プロジェクト: uragirii/FFCS

def download_file(url, filename, folder):
    print("Downloading file : ", filename)
    urlretrieve(url, os.path.join(folder, filename), reporthook)
    print("Download Complete")
    return True

コード例 #32

0

ファイルを表示

ファイル: run.py プロジェクト: lijz36/CaptcheSpider

def main():
    headers = {
        'Accept':
        '*/*',
        'Accept-Encoding':
        'gzip, deflate',
        'Accept-Language':
        'zh-CN,zh;q=0.9',
        'Cache-Control':
        'no-cache',
        'DNT':
        '1',
        'Host':
        'c.dun.163yun.com',
        'Referer':
        'https://dun.163.com/trial/jigsaw',
        'Pragma':
        'no-cache',
        'Proxy-Connection':
        'keep-alive',
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
    }
    with open('mm.js', 'r', encoding='utf-8') as f:
        content = f.read()
    ctx = execjs.compile(content)
    fp = ctx.call('get_fp')
    callback = ctx.call('get_callback')
    with open('tnp.js', 'r') as f:
        content = f.read()
    ctx = execjs.compile(content)
    cb = ctx.call('cb')
    data = {
        "id": "07e2387ab53a4d6f930b8d9a9be71bdf",
        "fp": fp,
        "https": "true",
        "type": "2",
        "version": "2.13.6",
        "dpr": "1",
        "dev": "1",
        "cb": cb,
        "ipv6": "false",
        "runEnv": "10",
        "group": "",
        "scene": "",
        "width": "320",
        "token": "",
        "referer": "https://dun.163.com/trial/jigsaw",
        "callback": callback
    }
    r = requests.get('https://c.dun.163.com/api/v2/get',
                     params=data,
                     headers=headers)
    data = json.loads(re.findall('.*?\((.*?)\);', r.text)[0])
    token = data['data']['token']
    request.urlretrieve(data['data']['front'][0], 'img/1.png')
    request.urlretrieve(data['data']['bg'][0], 'img/2.jpg')
    distance = get_gap() + 5
    trace = get_track(distance)
    left = trace[-1][0] - 10
    data = ctx.call('get_data', token, trace, left)
    cb = ctx.call('cb')
    get_data = {
        "id": "07e2387ab53a4d6f930b8d9a9be71bdf",
        "token": token,
        "acToken": "",
        "data": data,
        "width": "320",
        "type": "2",
        "version": "2.13.6",
        "cb": cb,
        "extraData": "",
        "runEnv": "10",
        "referer": "https://dun.163.com/trial/jigsaw",
        "callback": "__JSONP_hhjwbon_4"
    }
    r = requests.get('https://c.dun.163.com/api/v2/check',
                     headers=headers,
                     params=get_data)
    print(r.text)

コード例 #33

0

ファイルを表示

ファイル: twitter-export-image-fill.py プロジェクト: olivierh59500/twitter-export-image-fill

                    sys.stdout.write(
                        "\r  [%i/%i] %s %s..." %
                        (tweet_count, tweet_length,
                         "Copying" if can_be_copied else "Downloading", url))
                    sys.stdout.write("\033[K")  # Clear the end of the line
                    sys.stdout.flush()

                    if can_be_copied:
                        copyfile(earlier_archive_path + local_filename,
                                 local_filename)
                    else:
                        while not downloaded:
                            # Actually download the file!
                            try:
                                urlretrieve(better_url, local_filename)
                            except:
                                download_tries = download_tries - 1
                                if download_tries == 0:
                                    print("")
                                    print(
                                        "Failed to download %s after 3 tries."
                                        % better_url)
                                    print("Please try again later?")
                                    sys.exit()
                                time.sleep(5)  # Wait 5 seconds before retrying
                            else:
                                downloaded = True

                    # Change the URL so that the archive's index.html will now point to the
                    # just-download local file...

コード例 #34

0

ファイルを表示

ファイル: get_chromedriver.py プロジェクト: thiagoblima/pgadmin4

if not os.path.isdir(args.directory):
    print('The specified output directory could not be accessed.')
    sys.exit(1)

chromedriver_version = get_chromedriver_version(chrome_version)

system = get_system()

url = 'https://chromedriver.storage.googleapis.com/{}/chromedriver_{}.zip' \
    .format(chromedriver_version, system)

print('Downloading chromedriver v{} for Chrome v{} on {}...'.format(
    chromedriver_version, chrome_version, system))

try:
    file, headers = urlretrieve(url)
except URLError as e:
    print('The chromedriver download URL could not be accessed: {}'.format(e))
    sys.exit(1)

# Unzip chromedriver
print('Extracting chromedriver...')

found = False
fp = open(file, 'rb')
z = zipfile.ZipFile(fp)
for name in z.namelist():
    if (system == 'win32' and name == 'chromedriver.exe') or \
            (system != 'win32' and name == 'chromedriver'):
        z.extract(name, args.directory)
        found = True

コード例 #35

0

ファイルを表示

if len(sys.argv) > 1 and sys.argv[1] == 'sdist':
    # exclude the weight files in sdist
    weight_files = []
else:
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    # in all other cases, download and decompress weight files
    for weight_file in weight_files:
        weight_path = os.path.join(model_dir, weight_file)
        if not os.path.isfile(weight_path):
            compressed_file = weight_file + '.gz'
            compressed_path = os.path.join(model_dir, compressed_file)
            if not os.path.isfile(compressed_file):
                print('Downloading weight file {} ...'.format(compressed_file))
                urlretrieve(base_url + compressed_file, compressed_path)

            print('Decompressing ...')
            with open(weight_path, 'wb') as target:
                try:
                    with gzip.open(compressed_path, 'rb') as source:
                        target.write(source.read())
                except OSError:
                    # Handle symlinks
                    with open(compressed_path) as symlink:
                        # Github raw stores symlinks as text files, so we need
                        # to read it to check the text
                        real_compressed_file = symlink.read()
                    os.remove(compressed_path)
                    msg = '{} is symlink, downloading {} ...'
                    print(msg.format(compressed_file, real_compressed_file))

コード例 #36

0

ファイルを表示

ファイル: indicators.py プロジェクト: tyhicks/snapcraft

    def download(self):
        urlretrieve(self.uri, self.destination, self._progress_callback)

        if self.progress_bar:
            self.progress_bar.finish()

コード例 #37

0

ファイルを表示

async def speedtest(event):
    arg_from_event = event.pattern_match.group(1)
    chat = await event.get_chat()
    share_as_pic = True if arg_from_event.lower() == "pic" else False
    if share_as_pic:
        # if speedtest is send to a group and send media is
        # not allowed then skip 'pic' argument
        if (hasattr(chat, "default_banned_rights") and not chat.creator
                and not chat.admin_rights
                and chat.default_banned_rights.send_media):
            share_as_pic = False  # disable
    process = None
    all_test_passed = False
    check_mark = u"\u2705"
    warning = u"\u26A0"
    try:
        process = (f"**Speedtest by Ookla**\n\n"
                   f"- {msgRep.SPD_TEST_SELECT_SERVER}...")
        await event.edit(process)
        s = Speedtest()
        s.get_best_server()
        process = (f"**Speedtest by Ookla**\n\n"
                   f"- {msgRep.SPD_TEST_SELECT_SERVER} {check_mark}\n"
                   f"- {msgRep.SPD_TEST_DOWNLOAD}...")
        await event.edit(process)
        s.download()
        process = (f"**Speedtest by Ookla**\n\n"
                   f"- {msgRep.SPD_TEST_SELECT_SERVER} {check_mark}\n"
                   f"- {msgRep.SPD_TEST_DOWNLOAD} {check_mark}\n"
                   f"- {msgRep.SPD_TEST_UPLOAD}...")
        await event.edit(process)
        s.upload()
        process = (f"**Speedtest by Ookla**\n\n"
                   f"- {msgRep.SPD_TEST_SELECT_SERVER} {check_mark}\n"
                   f"- {msgRep.SPD_TEST_DOWNLOAD} {check_mark}\n"
                   f"- {msgRep.SPD_TEST_UPLOAD} {check_mark}")
        all_test_passed = True
        if share_as_pic:
            s.results.share()
        result = s.results.dict()
        if not result:
            await event.edit(process + "\n\n" +
                             f"`{msgRep.SPD_FAILED}: {msgRep.SPD_NO_RESULT}`")
            return
    except MemoryError as me:
        log.error(me)
        if not all_test_passed:
            process = process[:-3] + f" {warning}"
            await event.edit(process + "\n\n" +
                             f"`{msgRep.SPD_FAILED}: {msgRep.SPD_NO_MEMORY}`")
        else:
            await event.edit(process + "\n\n" +
                             f"`{msgRep.SPD_FAILED}: {msgRep.SPD_NO_MEMORY}`")
        return
    except Exception as e:
        log.error(e)
        if not all_test_passed:
            process = process[:-3] + f" {warning}"
            await event.edit(process + "\n\n" + msgRep.SPD_FAILED)
        else:
            await event.edit(process + "\n\n" + msgRep.SPD_FAILED)
        return

    if share_as_pic:
        try:
            await event.edit(process + "\n\n" + f"{msgRep.SPD_PROCESSING}...")
            png_file = path.join(getConfig("TEMP_DL_DIR"), "speedtest.png")
            urlretrieve(result["share"], png_file)
            await event.client.send_file(chat.id, png_file)
            await event.delete()
            remove(png_file)
        except Exception as e:
            log.error(e)
            await event.edit(msgRep.SPD_FAIL_SEND_RESULT)
    else:
        # Convert speed to Mbit/s
        down_in_mbits = round(result["download"] / 10**6, 2)
        up_in_mbits = round(result["upload"] / 10**6, 2)
        # Convert speed to MB/s (real speed?)
        down_in_mb = round(result["download"] / ((10**6) * 8), 2)
        up_in_mb = round(result["upload"] / ((10**6) * 8), 2)
        time = parse(result["timestamp"])
        ping = result["ping"]
        isp = result["client"]["isp"]
        host = result["server"]["sponsor"]
        host_cc = result["server"]["cc"]

        text = "<b>Speedtest by Ookla</b>\n\n"
        text += (f"<b>{msgRep.SPD_TIME}</b>: "
                 f"<code>{time.strftime('%B %d, %Y')} - "
                 f"{time.strftime('%H:%M:%S')} {time.tzname()}</code>\n")
        text += (f"<b>{msgRep.SPD_DOWNLOAD}</b>: "
                 f"<code>{down_in_mbits}</code> "
                 f"{msgRep.SPD_MEGABITS} (<code>{down_in_mb}</code> "
                 f"{msgRep.SPD_MEGABYTES})\n")
        text += (f"<b>{msgRep.SPD_UPLOAD}</b>: "
                 f"<code>{up_in_mbits}</code> {msgRep.SPD_MEGABITS} "
                 f"(<code>{up_in_mb}</code> {msgRep.SPD_MEGABYTES})\n")
        text += f"<b>{msgRep.SPD_PING}</b>: <code>{ping}</code> ms\n"
        text += f"<b>{msgRep.SPD_ISP}</b>: {isp}\n"
        text += f"<b>{msgRep.SPD_HOSTED_BY}</b>: {host} ({host_cc})\n"
        await event.edit(text, parse_mode="html")
    return

コード例 #38

0

ファイルを表示

ファイル: downloadData.py プロジェクト: samanthahoch/droughtmapping

)

# a new folder where everything for today's date will eventually be stored
write_folder = "C:/Users/samihoch/" + today
os.mkdir(write_folder)

arcpy.management.CreateFileGDB(write_folder, "Drought.gdb")
arcpy.env.workspace = os.path.join(write_folder, "Drought.gdb")

# copy a blank map template into the new folder
blank_map = "C:/Users/samihoch/BlankMap/BlankMap.aprx"
shutil.copyfile(blank_map, write_folder + "/DroughtMap.aprx")

# download the raw data into the folder for today
write_filename = write_folder + "/Drought.gdb/most_recent.geojson"
response = request.urlretrieve(url, write_filename)

print("Downloading data from noaa")

json_file = open(write_filename)
data_raw = json.load(json_file)

# create a dictionary so that the json data can be properly read
with open(write_filename) as json_file:
    data_raw = json.load(json_file)

if not os.path.exists('C:\Temp'):
    os.makedirs('C:\Temp')

arcpy.management.CreateFileGDB(r'C:\Temp', 'Live.gdb')
arcpy.env.workspace = os.path.join(r'C:\Temp', 'Live.gdb')

コード例 #39

0

ファイルを表示

    def get_mkdir(self):
        jsonobj = json.loads(self.get_html().decode('utf-8'))
        # 列表页 - 图片
        imgList = jsonpath.jsonpath(jsonobj, '$..img')
        # 列表页 - 价格
        pricelist = jsonpath.jsonpath(jsonobj, '$..price')
        # 列表页 - 商品名
        titleList = jsonpath.jsonpath(jsonobj, '$..title')
        # 列表页 - 商品id -- skuId
        skuIdList = jsonpath.jsonpath(jsonobj, '$..promotionInfo.skuId')
        # 商品价格
        priceList = jsonpath.jsonpath(jsonobj, '$..price')
        # 商品品牌
        brandList = jsonpath.jsonpath(jsonobj, '$..brandName')
        # 商品分类
        categoryList = jsonpath.jsonpath(jsonobj, '$..thirdCatName')
        listdata = zip(titleList, imgList, pricelist, skuIdList, priceList,
                       brandList, categoryList)

        for item in listdata:

            print(item)

            # 替换'/'
            import re
            strinfo = re.compile('/')
            itemdir = strinfo.sub('-', item[0])
            print(itemdir)
            time.sleep(1)
            # 商品名称目录
            if not os.path.exists(itemdir):
                os.makedirs(itemdir)
            else:
                print(itemdir + ' -- 目录已存在！')
            self.dataurl = ''
            # 存储本地主页图片链接地址
            self.pimg = ''
            # 列表页 - 图片

            # 文件夹和文件命名不能出现这9个字符：/ \ : * " < > | ？

            if os.path.exists(itemdir + '/' + item[1][-20:].replace(
                    '/', '-').replace('\\', '-').replace(':', '-').replace(
                        '*', '-').replace('"', '-').replace('<', '-').replace(
                            '>', '-').replace('|', '-').replace('?', '-') +
                              '.webp'):
                print('文件已存在！')
                # return 0
            else:

                if item[1].startswith('//'):
                    self.dataurl = "http:" + item[1]
                else:
                    self.dataurl = item[1]
                try:
                    req = request.Request(self.dataurl, headers=self.headers)
                    reponse = request.urlopen(req)
                    get_img = reponse.read()
                    self.pimg = '/pimgs/' + itemdir + '/' + self.dataurl[
                        -20:].replace('/', '-').replace('\\', '-').replace(
                            ':', '-').replace('*', '-').replace(
                                '"', '-').replace('<', '-').replace(
                                    '>', '-').replace('|', '-').replace(
                                        '?', '-') + '.webp'
                    with open(
                            itemdir + '/' +
                            self.dataurl[-20:].replace('/', '-').replace(
                                '\\', '-').replace(':', '-').replace(
                                    '*', '-').replace('"', '-').replace(
                                        '<', '-').replace('>', '-').replace(
                                            '|', '-').replace('?', '-') +
                            '.webp', 'wb') as fp:
                        fp.write(get_img)
                except Exception as e:
                    print(e)
            # 详情目录
            if not os.path.exists(itemdir + '/详情'):
                os.makedirs(itemdir + '/详情')
            else:
                print('详情' + ' -- 目录已存在！')
            driver = webdriver.PhantomJS(
                executable_path='./phantomjs-2.1.1-macosx/bin/phantomjs')
            time.sleep(5)
            driver.get(self.detailurl + str(item[3]))
            time.sleep(5)
            driver.find_element_by_class_name('tipinfo').click()
            time.sleep(5)
            html = etree.HTML(driver.page_source)
            imglist = html.xpath('//img/@src')
            print(self.detailurl + str(item[3]))
            # 轮番图
            lunfantu = html.xpath('//img[@class="detail-img"]/@src')
            # 猜你喜欢
            # like = html.xpath('//img[@class="J_ItemImage recommend-img"]/@src')
            # 商品宣传图
            xuanchuan = html.xpath(
                '//div[@class="J_descriptionDetail parameter"]//img/@src')
            # 规格
            # 左边的参数名
            leftspec = html.xpath(
                '//div[@class="left attr_key border-1px border-r border-b"]/text()'
            )
            # 右边的参数值
            rightspec = html.xpath(
                '//div[@class="left attr_value border-1px border-b"]/span/text()'
            )
            spec = zip(leftspec, rightspec)
            # time.sleep(5)
            # print(driver.page_source)
            print(str(item[3]))
            print(
                "-------------------------- 轮播图 --------------------------------"
            )
            print(lunfantu)
            print(
                "--------------------------- 规格 ---------------------------------"
            )
            print(spec)
            print(
                "-------------------------- 介绍图 ---------------------------------"
            )
            print(xuanchuan)
            print(
                "-------------------------- 主页图 ---------------------------------"
            )
            print(self.dataurl)

            for simple in imglist:
                if not os.path.exists(
                        itemdir + '/详情/' + simple[-20:].replace('/', '-').
                        replace('\\', '-').replace(':', '-').replace('*', '-').
                        replace('"', '-').replace('<', '-').replace('>', '-').
                        replace('|', '-').replace('?', '-') + '.webp'):
                    request.urlretrieve(
                        simple, itemdir + '/详情' +
                        '/' + simple[-20:].replace('/', '-').replace(
                            '\\', '-').replace(':', '-').replace(
                                '*', '-').replace('"', '-').replace(
                                    '<', '-').replace('>', '-').replace(
                                        '|', '-').replace('?', '-') + ".webp")
                    print("正在下载......")
                else:
                    print('文件已存在！')

                #     NOT
                #     NULL
                #     AUTO_INCREMENT, title
                #     VARCHAR(1000), img
                #     VARCHAR(1000), lunfanimg
                #     VARCHAR(1000), spec
                #     VARCHAR(1000), xcimg
                #     VARCHAR(1000),
            # 插入数据库l
        # 判断数据库是否有skuId，有就不插入，无则插入

            result = self.cur.execute(
                "select skuid from duodian WHERE skuid=" + str(item[3]))
            print(str(result) + '-----------------------')

            if result:
                print("数据库里面存在此数据")
            else:
                # 不存在，存数据
                lunfantu1 = {}
                specpagram = {}
                xuanchuan1 = {}
                # 轮番图
                for index1, item1 in enumerate(lunfantu):
                    lunfantu1[index1] = item1
                # 规格
                speckey = 0
                for itemspec in spec:
                    specvalue = str(itemspec[0]) + '-' + str(itemspec[1])
                    specpagram[str(speckey)] = specvalue
                    speckey += 1
                # 介绍图
                for index3, item3 in enumerate(xuanchuan):
                    xuanchuan1[index3] = item3
                # 存储本地图片链接地址
                plunfantu = {}
                pxuanchuan = {}
                for pindex1, pitem1 in enumerate(lunfantu):
                    plunfantu[pindex1] = '/pimgs/' + itemdir + '/详情/' + pitem1[
                        -20:].replace('/', '-').replace('\\', '-').replace(
                            ':', '-').replace('*', '-').replace(
                                '"', '-').replace('<', '-').replace(
                                    '>', '-').replace('|', '-').replace(
                                        '?', '-') + '.webp'
                for pindex2, pitem2 in enumerate(xuanchuan):
                    pxuanchuan[
                        pindex2] = '/pimgs/' + itemdir + '/详情/' + pitem2[
                            -20:].replace('/', '-').replace('\\', '-').replace(
                                ':', '-').replace('*', '-').replace(
                                    '"', '-').replace('<', '-').replace(
                                        '>', '-').replace('|', '-').replace(
                                            '?', '-') + '.webp'
                self.cur.execute(
                    'INSERT INTO ' + self.tablename +
                    ' (title, img, lunfanimg, spec, xcimg,skuid,pimg, plunfanimg, pxcimg,categoryid,price,brandname,categoryname) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s,%s, %s, %s,%s)',
                    (itemdir, self.dataurl,
                     json.dumps(lunfantu1, ensure_ascii=False),
                     json.dumps(specpagram, ensure_ascii=False),
                     json.dumps(xuanchuan1, ensure_ascii=False), str(item[3]),
                     self.pimg, json.dumps(plunfantu, ensure_ascii=False),
                     json.dumps(pxuanchuan, ensure_ascii=False), '11386',
                     '%.2f' % (item[4] / 100), str(item[5]), str(item[6])))
                self.cur.connection.commit()
                print(
                    "------------------------  插入成功  ----------------------------------"
                )

コード例 #40

0

ファイルを表示

def retrieve_to_file(url):
    filename, headers = urlretrieve(url)
    return filename, headers

コード例 #41

0

ファイルを表示

ファイル: 4-2-hw.py プロジェクト: lsm9275/section4

import sys
import io
import os.path
import urllib.request as req
from bs4 import BeautifulSoup

sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.detach(), encoding='utf-8')

url = 'https://www.weather.go.kr/weather/forecast/mid-term-rss3.jsp?stnId=159'
rawdata = 'D:/6_PWork/5_inflearn/01_Python_Automation_and_GUI/Section4/forecast_hw.xml'

if not os.path.exists(rawdata):
    req.urlretrieve(url, rawdata)

xml = open(rawdata, 'r', encoding='utf-8').read()
soup = BeautifulSoup(xml, 'html.parser')
#print(soup.find_all('city'))

info = {}
for location in soup.find_all('location'):
    city = location.find('city').string
    #print(city)
    weather = location.find_all('tmn')
    #print(tmns)

    if not city in info:
        info[city] = []
        for tmn in weather:
            info[city].append(tmn.string)

コード例 #42

0

ファイルを表示

ファイル: crawler_thaitickket.py プロジェクト: nongharunana/BUDDY-MATCHING-SYSTEM-FOR-CONCERTS-AND-LIVE-SHOWS

    title = title_soup.get_text('title')
    #print(title)
    rename_title = title
    for symbol in cannot_mk_dir_list:
        rename_title = rename_title.replace(symbol, '')

    concert_path = f'{out_dir}/{rename_title}'
    if not os.path.exists(concert_path):
        os.mkdir(concert_path)

    pic_soup = li_soup.find('a', {'class': 'box-img'})
    #print(pic_soup)
    img_soup = pic_soup.find('img')
    #print(img_soup)
    link = img_soup.get('data-src')
    print(link)

    urlretrieve('http://www.thaiticketmajor.com' + link,
                f"{concert_path}/img.jpg")

    date_soup = detail_soup.find('span')
    #print(date_soup)
    date = date_soup.get_text('span')
    print(date)

    detail = {}
    detail['title'] = title
    detail['date'] = date
    # record[] = ....
    with codecs.open(f'{concert_path}/detail.json', 'w', 'utf-8') as outfile:
        json.dump(detail, outfile, ensure_ascii=False)

コード例 #43

0

ファイルを表示

#Imports
from urllib.request import urlretrieve
import re
import datetime

URL_PATH = 'https://s3.amazonaws.com/tcmg476/http_access_log'
LOCAL_FILE = 'local_copy.log'
total_requests = 0
year_count = 0

local_file, headers = urlretrieve(URL_PATH, LOCAL_FILE)

FILE_NAME = 'path/to/file'

#counts and matches for dates
oct94_count = 0
nov94_count = 0
dec94_count = 0
jan94_count = 0
feb94_count = 0
mar94_count = 0
apr94_count = 0
may94_count = 0
jun94_count = 0
jul94_count = 0
aug94_count = 0
sep94_count = 0
oct95_count = 0
jan_match = 0
feb_match = 0
mar_match = 0

コード例 #44

0

ファイルを表示

from urllib import request
import datetime
today = datetime.datetime.today()
from bs4 import BeautifulSoup
import ctypes

code = request.urlopen('http://cn.bing.com').read().decode('utf-8')

soup = BeautifulSoup(code, 'html.parser')

imgLink = 'http://cn.bing.com' + soup.find(name='link', attrs={'id': 'bgLink'
                                                               })['href']
filename = str(today.year) + ',' + str(today.month) + ',' + str(
    today.day) + '.png'
request.urlretrieve(imgLink, filename)

ctypes.windll.user32.SystemParametersInfoW(
    20, 0,
    __import__('os').path.abspath('.') + '/' + filename, 0)

コード例 #45

0

ファイルを表示

ファイル: stickers.py プロジェクト: PAINBOI2008/SheebaQueen

def kang(update: Update, context: CallbackContext):
    msg = update.effective_message
    user = update.effective_user
    args = context.args
    packnum = 0
    packname = "a" + str(user.id) + "_by_" + context.bot.username
    packname_found = 0
    max_stickers = 120
    while packname_found == 0:
        try:
            stickerset = context.bot.get_sticker_set(packname)
            if len(stickerset.stickers) >= max_stickers:
                packnum += 1
                packname = (
                    "a"
                    + str(packnum)
                    + "_"
                    + str(user.id)
                    + "_by_"
                    + context.bot.username
                )
            else:
                packname_found = 1
        except TelegramError as e:
            if e.message == "Stickerset_invalid":
                packname_found = 1
    kangsticker = "kangsticker.png"
    is_animated = False
    file_id = ""

    if msg.reply_to_message:
        if msg.reply_to_message.sticker:
            if msg.reply_to_message.sticker.is_animated:
                is_animated = True
            file_id = msg.reply_to_message.sticker.file_id

        elif msg.reply_to_message.photo:
            file_id = msg.reply_to_message.photo[-1].file_id
        elif msg.reply_to_message.document:
            file_id = msg.reply_to_message.document.file_id
        else:
            msg.reply_text("Yea, I can't kang that.")

        kang_file = context.bot.get_file(file_id)
        if not is_animated:
            kang_file.download("kangsticker.png")
        else:
            kang_file.download("kangsticker.tgs")

        if args:
            sticker_emoji = str(args[0])
        elif msg.reply_to_message.sticker and msg.reply_to_message.sticker.emoji:
            sticker_emoji = msg.reply_to_message.sticker.emoji
        else:
            sticker_emoji = "🤔"

        if not is_animated:
            try:
                im = Image.open(kangsticker)
                maxsize = (512, 512)
                if (im.width and im.height) < 512:
                    size1 = im.width
                    size2 = im.height
                    if im.width > im.height:
                        scale = 512 / size1
                        size1new = 512
                        size2new = size2 * scale
                    else:
                        scale = 512 / size2
                        size1new = size1 * scale
                        size2new = 512
                    size1new = math.floor(size1new)
                    size2new = math.floor(size2new)
                    sizenew = (size1new, size2new)
                    im = im.resize(sizenew)
                else:
                    im.thumbnail(maxsize)
                if not msg.reply_to_message.sticker:
                    im.save(kangsticker, "PNG")
                context.bot.add_sticker_to_set(
                    user_id=user.id,
                    name=packname,
                    png_sticker=open("kangsticker.png", "rb"),
                    emojis=sticker_emoji,
                )
                msg.reply_text(
                    f"Sticker successfully added to [pack](t.me/addstickers/{packname})"
                    + f"\nEmoji is: {sticker_emoji}",
                    parse_mode=ParseMode.MARKDOWN,
                )

            except OSError as e:
                msg.reply_text("I can only kang images m8.")
                print(e)
                return

            except TelegramError as e:
                if e.message == "Stickerset_invalid":
                    makepack_internal(
                        update,
                        context,
                        msg,
                        user,
                        sticker_emoji,
                        packname,
                        packnum,
                        png_sticker=open("kangsticker.png", "rb"),
                    )
                elif e.message == "Sticker_png_dimensions":
                    im.save(kangsticker, "PNG")
                    context.bot.add_sticker_to_set(
                        user_id=user.id,
                        name=packname,
                        png_sticker=open("kangsticker.png", "rb"),
                        emojis=sticker_emoji,
                    )
                    msg.reply_text(
                        f"Sticker successfully added to [pack](t.me/addstickers/{packname})"
                        + f"\nEmoji is: {sticker_emoji}",
                        parse_mode=ParseMode.MARKDOWN,
                    )
                elif e.message == "Invalid sticker emojis":
                    msg.reply_text("Invalid emoji(s).")
                elif e.message == "Stickers_too_much":
                    msg.reply_text("Max packsize reached. Press F to pay respecc.")
                elif e.message == "Internal Server Error: sticker set not found (500)":
                    msg.reply_text(
                        "Sticker successfully added to [pack](t.me/addstickers/%s)"
                        % packname
                        + "\n"
                        "Emoji is:" + " " + sticker_emoji,
                        parse_mode=ParseMode.MARKDOWN,
                    )
                print(e)

        else:
            packname = "animated" + str(user.id) + "_by_" + context.bot.username
            packname_found = 0
            max_stickers = 50
            while packname_found == 0:
                try:
                    stickerset = context.bot.get_sticker_set(packname)
                    if len(stickerset.stickers) >= max_stickers:
                        packnum += 1
                        packname = (
                            "animated"
                            + str(packnum)
                            + "_"
                            + str(user.id)
                            + "_by_"
                            + context.bot.username
                        )
                    else:
                        packname_found = 1
                except TelegramError as e:
                    if e.message == "Stickerset_invalid":
                        packname_found = 1
            try:
                context.bot.add_sticker_to_set(
                    user_id=user.id,
                    name=packname,
                    tgs_sticker=open("kangsticker.tgs", "rb"),
                    emojis=sticker_emoji,
                )
                msg.reply_text(
                    f"Sticker successfully added to [pack](t.me/addstickers/{packname})"
                    + f"\nEmoji is: {sticker_emoji}",
                    parse_mode=ParseMode.MARKDOWN,
                )
            except TelegramError as e:
                if e.message == "Stickerset_invalid":
                    makepack_internal(
                        update,
                        context,
                        msg,
                        user,
                        sticker_emoji,
                        packname,
                        packnum,
                        tgs_sticker=open("kangsticker.tgs", "rb"),
                    )
                elif e.message == "Invalid sticker emojis":
                    msg.reply_text("Invalid emoji(s).")
                elif e.message == "Internal Server Error: sticker set not found (500)":
                    msg.reply_text(
                        "Sticker successfully added to [pack](t.me/addstickers/%s)"
                        % packname
                        + "\n"
                        "Emoji is:" + " " + sticker_emoji,
                        parse_mode=ParseMode.MARKDOWN,
                    )
                print(e)

    elif args:
        try:
            try:
                urlemoji = msg.text.split(" ")
                png_sticker = urlemoji[1]
                sticker_emoji = urlemoji[2]
            except IndexError:
                sticker_emoji = "🤔"
            urllib.urlretrieve(png_sticker, kangsticker)
            im = Image.open(kangsticker)
            maxsize = (512, 512)
            if (im.width and im.height) < 512:
                size1 = im.width
                size2 = im.height
                if im.width > im.height:
                    scale = 512 / size1
                    size1new = 512
                    size2new = size2 * scale
                else:
                    scale = 512 / size2
                    size1new = size1 * scale
                    size2new = 512
                size1new = math.floor(size1new)
                size2new = math.floor(size2new)
                sizenew = (size1new, size2new)
                im = im.resize(sizenew)
            else:
                im.thumbnail(maxsize)
            im.save(kangsticker, "PNG")
            msg.reply_photo(photo=open("kangsticker.png", "rb"))
            context.bot.add_sticker_to_set(
                user_id=user.id,
                name=packname,
                png_sticker=open("kangsticker.png", "rb"),
                emojis=sticker_emoji,
            )
            msg.reply_text(
                f"Sticker successfully added to [pack](t.me/addstickers/{packname})"
                + f"\nEmoji is: {sticker_emoji}",
                parse_mode=ParseMode.MARKDOWN,
            )
        except OSError as e:
            msg.reply_text("I can only kang images m8.")
            print(e)
            return
        except TelegramError as e:
            if e.message == "Stickerset_invalid":
                makepack_internal(
                    update,
                    context,
                    msg,
                    user,
                    sticker_emoji,
                    packname,
                    packnum,
                    png_sticker=open("kangsticker.png", "rb"),
                )
            elif e.message == "Sticker_png_dimensions":
                im.save(kangsticker, "PNG")
                context.bot.add_sticker_to_set(
                    user_id=user.id,
                    name=packname,
                    png_sticker=open("kangsticker.png", "rb"),
                    emojis=sticker_emoji,
                )
                msg.reply_text(
                    "Sticker successfully added to [pack](t.me/addstickers/%s)"
                    % packname
                    + "\n"
                    + "Emoji is:"
                    + " "
                    + sticker_emoji,
                    parse_mode=ParseMode.MARKDOWN,
                )
            elif e.message == "Invalid sticker emojis":
                msg.reply_text("Invalid emoji(s).")
            elif e.message == "Stickers_too_much":
                msg.reply_text("Max packsize reached. Press F to pay respecc.")
            elif e.message == "Internal Server Error: sticker set not found (500)":
                msg.reply_text(
                    "Sticker successfully added to [pack](t.me/addstickers/%s)"
                    % packname
                    + "\n"
                    "Emoji is:" + " " + sticker_emoji,
                    parse_mode=ParseMode.MARKDOWN,
                )
            print(e)
    else:
        packs = "Please reply to a sticker, or image to kang it!\nOh, by the way. here are your packs:\n"
        if packnum > 0:
            firstpackname = "a" + str(user.id) + "_by_" + context.bot.username
            for i in range(0, packnum + 1):
                if i == 0:
                    packs += f"[pack](t.me/addstickers/{firstpackname})\n"
                else:
                    packs += f"[pack{i}](t.me/addstickers/{packname})\n"
        else:
            packs += f"[pack](t.me/addstickers/{packname})"
        msg.reply_text(packs, parse_mode=ParseMode.MARKDOWN)
    try:
        if os.path.isfile("kangsticker.png"):
            os.remove("kangsticker.png")
        elif os.path.isfile("kangsticker.tgs"):
            os.remove("kangsticker.tgs")
    except:
        pass

コード例 #46

0

ファイルを表示

ファイル: recipe-576618.py プロジェクト: kaestnja/pystrict3

 def run(self):
     while True:
         link, filename = self.links_filenames.get()
         urlretrieve(link, filename)
         self.links_filenames.task_done()

コード例 #47

0

ファイルを表示

ファイル: help_functions.py プロジェクト: BoMarconiHenriksen/bobRossByCollegeImpossible

def download(from_url, to_file):
    if not os.path.isfile(to_file):
        req.urlretrieve(from_url, to_file)

コード例 #48

0

ファイルを表示

ファイル: demo.py プロジェクト: hasancaslan/DeepGlobalRegistration

from config import get_config
from util.pointcloud import combine_point_clouds
from util.misc import get_random_color

BASE_URL = "http://node2.chrischoy.org/data/"
DOWNLOAD_LIST = [(BASE_URL + "datasets/registration/", "redkitchen_000.ply"),
                 (BASE_URL + "datasets/registration/", "redkitchen_010.ply"),
                 (BASE_URL + "projects/DGR/",
                  "ResUNetBN2C-feat32-3dmatch-v0.05.pth")]

# Check if the weights and file exist and download
if not os.path.isfile('redkitchen_000.ply'):
    print('Downloading weights and pointcloud files...')
    for f in DOWNLOAD_LIST:
        print(f"Downloading {f}")
        urlretrieve(f[0] + f[1], f[1])

if __name__ == '__main__':
    config = get_config()
    if config.weights is None:
        config.weights = DOWNLOAD_LIST[-1][-1]

    # preprocessing
    pcd0 = o3d.io.read_point_cloud(config.pcd0)
    pcd0.estimate_normals()
    pcd1 = o3d.io.read_point_cloud(config.pcd1)
    pcd1.estimate_normals()

    # registration
    dgr = DeepGlobalRegistration(config)
    T01 = dgr.register(pcd0, pcd1)

コード例 #49

0

ファイルを表示

ファイル: chapter5-01.py プロジェクト: Jetboy3/web-scraping-with-python

from urllib.request import urlretrieve
from urllib.request import urlopen
from bs4 import BeautifulSoup

html = urlopen("http://www.pythonscraping.com")
soup = BeautifulSoup(html, 'html.parser')
imageLocation = soup.find('a', id='logo').find('img')['src']
urlretrieve(imageLocation, 'logo.jpg')
#download the logo.jpg, and put it in the current directory

コード例 #50

0

ファイルを表示

ファイル: Helper_functions.py プロジェクト: thordisstella/20180129

def extract_data(url, file_name):
    urlretrieve(url, file_name)    
    json_data = open(file_name)
    return json.load(json_data)

コード例 #51

0

ファイルを表示

ファイル: Section5-2.py プロジェクト: Has3ong/Crawling-Tutorial

opener.addheaders = [('User-agent', UserAgent().ie)]

request.install_opener(opener)

base = "https://search.naver.com/search.naver?where=image&sm=tab_jum&query="
quote = parse.quote_plus("벤츠")

url = base + quote
res = request.urlopen(url)

savePath = BASE_DIR + ('/5-2/')

try:
    if not (os.path.isdir(savePath)):
        os.makedirs(os.path.join(savePath))

except OSError as e:
    print("Folder creation failed!")
else:
    print('Folder creation success!')

soup = BeautifulSoup(res, "html.parser")
imageSource = soup.select("div.img_area > a.thumb._thumb > img")

for i, imageSource in enumerate(imageSource, 1):
    fullFileName = os.path.join(savePath, savePath + str(i) + '.png')
    print(i, imageSource)
    request.urlretrieve(imageSource['data-source'], fullFileName)

print("download succeeded!")

コード例 #52

0

ファイルを表示

def download_url(url, filename):
    """Download a file from url to filename, with a progress bar."""
    with TqdmUpTo(unit="B", unit_scale=True, unit_divisor=1024,
                  miniters=1) as t:
        urlretrieve(url, filename, reporthook=t.update_to, data=None)  # nosec

コード例 #53

0

ファイルを表示

ファイル: xml-forecast.py プロジェクト: nanara1119/DataScience003

from bs4 import BeautifulSoup
import urllib.request as req
import os.path

#   XML 다운로드
url = "http://www.kma.go.kr/weather/forecast/mid-term-rss3.jsp?stnId=108"


#   파일로 저장
savename = "forecast.xml"
if not os.path.exists(savename) :
    req.urlretrieve(url, savename)

#   BeautifulSoup 로 분석
xml = open(savename, "r", encoding="utf-8").read()
soup = BeautifulSoup(xml, "html.parser")

#   각 지역 확인
info = {}
for location in soup.find_all("location") :
    name = location.find('city').string
    weather = location.find('wf').string
    if not (weather in info) :
        info[weather] = []
    info[weather].append(name)

#   출력
for weather in info.keys() :
    print("+", weather)
    for name in info[weather] :
        print("- ", name)

コード例 #54

0

ファイルを表示

ファイル: cvpr.py プロジェクト: alisure-ml/PaperDownLoad

def _main(i, all_num, url, filename):
    print("[{}/{}] Downloading {} -> {}".format(i, all_num, url, filename))
    request.urlretrieve(url, filename)
    pass

コード例 #55

0

ファイルを表示

ファイル: downloader.py プロジェクト: Himura2la/Cosplay2-Automation

    def download_files(self, folder, action=DOWNLOAD_UPDATED_REQUESTS, flat=False):
        run_time = time.strftime('%d%m%y%H%M%S', time.localtime())
        log_file = os.path.join(folder, 'log-%s.txt' % run_time)
        self.log_infos, self.log_errors, self.log_links = '', '', ''
        paths = set()
        name = ""
        counter = 0

        existing_update_time, new_update_time = None, {}
        request_updates_path = os.path.join(folder, 'requests-update-time.json')

        if os.path.isfile(request_updates_path):
            existing_update_time = json.load(open(request_updates_path, 'r', encoding='utf-8'), parse_int=True)
            if action >= self.DOWNLOAD_UPDATED_REQUESTS:
                os.rename(request_updates_path, request_updates_path.replace('.', '-bkp-%s.' % run_time))

        for row in self.data:
            prev_name = name
            request_id, update_time, nom, num, title, file_type, file = row
            name = self.to_filename(title if title else 'No title').replace('  ', ' ')
            nom, file_type = self.to_filename(nom), self.to_filename(file_type)
            download_skipped_by_preprocessor, dir_name, file_name = self.preprocess(int(num), name, file_type)
            display_path = ' | '.join([nom, dir_name, file_name])
            if download_skipped_by_preprocessor:
                self.log_info('SKIP: ' + display_path)
                continue
            dir_path = os.path.join(folder, dir_name) if flat else os.path.join(folder, nom, dir_name)
            try:
                is_img = False
                if not file:
                    self.log_error('No file for %s.' % display_path)
                    continue
                file = json.loads(file)
                new_update_time[request_id] = update_time  # assuming it's the same for all request files
                if 'link' in file.keys():  # External site
                    file_exists = file_name in [name.split('.', 1)[0] for name in os.listdir(dir_path)] \
                                    if os.path.exists(dir_path) else False
                    request_up_to_date = existing_update_time \
                                            and str(request_id) in existing_update_time \
                                            and existing_update_time[str(request_id)] == update_time
                    if file_exists:
                        self.log_info(display_path + ' exists. ', inline=True)
                        if request_up_to_date:
                            self.log_info('And the request did not update. Skipping...', head=False)
                        else:
                            self.log_info('And the request updated. You should update it!', head=False)

                    if not file_exists or (file_exists and not request_up_to_date):
                        link_dir_path = os.path.join(folder, dir_name) if flat else os.path.join(folder, nom, dir_name)
                        if not os.path.exists(dir_path) \
                                and not os.path.exists(link_dir_path):
                            os.makedirs(link_dir_path)
                        successful_download = False
                        if action >= self.DOWNLOAD_UPDATED_REQUESTS:
                            self.log_info(("DL: " + file['link'] + " -> " + display_path))
                            successful_download = CloudDownloader.get(file['link'], os.path.join(dir_path, file_name))
                        if successful_download:
                            self.log_info("[CLOUD OK] " + display_path)
                        else:
                            self.log_info("[CLOUD FAIL] " + display_path)
                            self.log_link("%s -> %s" % (file['link'], display_path))
                    continue
                else:
                    src_filename = file['filename']
                    if 'fileext' in file:
                        file_ext = file['fileext']
                    else:
                        file_ext = '.jpg'
                        is_img = True

                if prev_name == name:
                    counter += 1
                    file_name += '-' + str(counter)
                file_name += file_ext
                path = os.path.join(dir_path, file_name)
                file_url = 'http://' + parse.quote('%s.cosplay2.ru/uploads/%d/%d/%s' % (self.event_name, self.event_id,
                                                                                        request_id, src_filename))

                if is_img:
                    file_url += '.jpg'  # Yes, it works this way
                download_required = True
                if os.path.isfile(path) or os.path.isfile(path + '_'):  # This makes a file invisible for extractor
                    self.log_info(display_path + ' exists. ', inline=True)
                    if action in (self.CHECK_UPDATES_ONLY, self.DOWNLOAD_UPDATED_REQUESTS) and existing_update_time:
                        if str(request_id) in existing_update_time \
                                and existing_update_time[str(request_id)] == update_time:
                            self.log_info('And the request did not update. Skipping...', head=False)
                            download_required = False
                        else:
                            self.log_info('And the request updated. Updating...', head=False)
                    else:
                        self.log_info('Configured not to check or no data on updates. Skipping...', head=False)
                        download_required = False
                if download_required:
                    if path not in paths:
                        paths.add(path)
                    else:
                        self.log_error("!!!! %s was about to overwrite. Check your SQL query!!!" % path)
                        break
                    self.log_info(("DL: " + file_url + " -> " + path), inline=True)
                    if action >= self.DOWNLOAD_UPDATED_REQUESTS:
                        if not os.path.isdir(dir_path):
                            os.makedirs(dir_path)
                        request.urlretrieve(file_url, path)
                        self.log_info(' [OK]', head=False)
                    else:
                        self.log_info(' [READY]', head=False)
            except (TypeError, AttributeError, request.HTTPError) as e:
                print("[FAIL]", name + ":", e)

        if not os.path.isdir(folder):
            os.makedirs(folder)
        if action >= self.DOWNLOAD_UPDATED_REQUESTS:
            json.dump(new_update_time, open(request_updates_path, 'w', encoding='utf-8'), indent=4)

        with open(log_file, 'w', encoding='utf-8') as f:
            f.write("ERRORS:" + os.linesep + self.log_errors + os.linesep)
            f.write("LINKS:" + os.linesep + self.log_links + os.linesep)
            f.write("INFO:" + os.linesep + self.log_infos + os.linesep)
        if self.log_errors:
            print("\n--- ERRORS ---")
            print(self.log_errors)
        if self.log_links:
            print("\n--- LINKS ---")
            print(self.log_links)

コード例 #56

0

ファイルを表示

ファイル: income.py プロジェクト: ephsmith/pyb

import os
from pathlib import Path
from urllib.request import urlretrieve
from collections import defaultdict
import xml.etree.ElementTree as ET

# import the countries xml file
tmp = Path(os.getenv("TMP", "/tmp"))
countries = tmp / 'countries.xml'

if not countries.exists():
    urlretrieve('https://bites-data.s3.us-east-2.amazonaws.com/countries.xml',
                countries)


def get_income_distribution(xml=countries):
    """
    - Read in the countries xml as stored in countries variable.
    - Parse the XML
    - Return a dict of:
      - keys = incomes (wb:incomeLevel)
      - values = list of country names (wb:name)
    """
    dist = defaultdict(list)
    tree = ET.parse(countries)
    root = tree.getroot()

    for child in root:
        dist[child[4].text].append(child[1].text)

    return dist

コード例 #57

0

ファイルを表示

ファイル: train.py プロジェクト: brooklyn2016/team-5

	time_values = []
	for i in range(time_slots):
		if i == (time_slots - 1):
			new_data = data[counter:]
			time_values.append(dataFourier(new_data, fourier_slots))
		else:
			new_data = data[counter:counter+data_slots]
			counter += data_slots
			time_values.append(dataFourier(new_data, fourier_slots))
	return time_values

for i in range(len(autisminput)):
	item = autisminput[i]
	dtft_output = dtft_output + item + "|"
	filename = item+".wav"
	urlretrieve("http://api.voicerss.org/?key=04f49802d32d442ca997d4d2ea76d3d5"
        "&hl=en-us&c=wav&src="+item, filename)
    rate, data = wav.read(filename)
    realitem = parentinput[i]
    timefingers = dataTimeDivandFourier(data, 300, 10)
    time_str_output = ','.join(str(x) for x in timefingers)
    dtft_output += time_str_output
    dtft_output += "|"
    dtft_output += realitem
    dtft_output += "&"

dtft_output = dtft_output[:len(dtft_output) - 1]

コード例 #58

0

ファイルを表示

ファイル: DownImg.py プロジェクト: xx025/Zhenai-Data

def urllib_download(IMAGE_URL, ID, imgtype):
    from urllib.request import urlretrieve
    urlretrieve(IMAGE_URL, './image/' + ID + imgtype)
    return True

コード例 #59

0

ファイルを表示

def checkDependencies():

    #Check git
    retcode = subprocess.Popen(subprocess.list2cmdline(["git", "--version"]),
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE,
                               shell=True).wait()
    if retcode != 0:
        logging.error(
            'Lime requires git. Get it from http://git-scm.com/download')
        sys.exit(1)

    #Closure Library
    if not (os.path.exists(closure_dir) and os.path.exists(closure_deps_file)):
        print('Closure Library not found. Downloading to %s' % closure_dir)
        print('Please wait...')

        retcode = subprocess.Popen(subprocess.list2cmdline([
            "git", "clone", "https://github.com/google/closure-library.git",
            closure_dir
        ]),
                                   shell=True).wait()

        if (retcode != 0):
            print('Failed to clone Closure Library via Git. Discontinuing.')
            sys.exit(1)

    #Box2D
    if not os.path.exists(box2d_dir):
        print('Box2DJS not found. Downloading to %s' % box2d_dir)
        print('Please wait...')

        retcode = subprocess.Popen(subprocess.list2cmdline([
            "git", "clone", "https://github.com/thinkpixellab/pl.git",
            box2d_dir
        ]),
                                   shell=True).wait()

        if (retcode != 0):
            logging.error('Error while downloading Box2D. Discontinuing.')
            sys.exit(1)

    #External tools dir
    if not os.path.exists(extdir):
        os.mkdir(extdir)

    #Closure compiler
    if not os.path.exists(compiler_path):
        zip_path = os.path.join(extdir, 'compiler.zip')
        print('Downloading Closure Compiler: ')
        urlretrieve(
            "http://closure-compiler.googlecode.com/files/compiler-20130411.zip",
            zip_path, rephook)
        print('\nUnzipping...')
        zippedFile = zipfile.ZipFile(zip_path)
        zippedFile.extract('compiler.jar', extdir)
        zippedFile.close()
        print('Cleanup')
        os.unlink(zip_path)
        os.rename(os.path.join(extdir, 'compiler.jar'), compiler_path)

    #Closure Templates
    if not os.path.exists(soy_path):
        zip_path = os.path.join(extdir, 'soy.zip')
        print('Downloading Closure Templates(Soy):')
        urlretrieve(
            "http://closure-templates.googlecode.com/files/closure-templates-for-javascript-latest.zip",
            zip_path, rephook)
        print('\nUnzipping...')
        zippedFile = zipfile.ZipFile(zip_path)
        zippedFile.extract('SoyToJsSrcCompiler.jar', extdir)
        zippedFile.close()
        print('Cleanup')
        os.unlink(zip_path)

    if not os.path.exists(projects_path):
        open(projects_path, 'w').close()

    makeProjectPaths('')

コード例 #60

0

ファイルを表示

def PolaRxS_batchDataDownloadToLocal(input_datetime):

    # start timer
    start_timer = time.time()

    #local path where the script and spreadsheet exists
    local_pwd = '/home/kibrom/kwork/sw-GNSS/fdl18_Frontiers/GNSS_data_derived_products'
    #sub directory to put raw data
    level1 = '/level1/'
    #CHAIN data labels spreadsheet
    data_labels = '/CHAIN_data_labels.xlsx'
    #how long is the downloading time
    download_time = 'data_download_runtimes/'

    #Python makes directories if they don't exist

    if not os.path.exists(local_pwd + level1):
        os.makedirs(local_pwd + level1)

    if not os.path.exists(local_pwd + level1 + download_time):
        os.makedirs(local_pwd + level1 + download_time)

    df_labels_PolaRxS = pd.read_excel(local_pwd + data_labels,
                                      sheet_name='PolaRxS_labels',
                                      header=None,
                                      usecols=[1])

    # input python function to generate daily data here
    print('-------> working on datetime = {0}'.format(input_datetime))

    thisdatetime = input_datetime
    thisdoy = thisdatetime.timetuple().tm_yday
    thisyr = thisdatetime.year
    thisdy = thisdatetime.day
    thismon = thisdatetime.month

    # Initialize the daily dataframe
    df_save = pd.DataFrame()
    #      SET DESIRED DIRECTORY HERE
    save_data = local_pwd + level1
    # File name to save the the full day of data to local disk
    filename_save = save_data + 'PolaRxS_CHAINdata__' + format(
        thisyr, '04') + '_' + format(thisdoy, '03') + '.csv'
    if os.path.isfile(filename_save):
        print('The data for this doy is available at: ')
        print(filename_save)
        sys.exit('We have data for this doy')

    #for h in range(0,1):
    for h in range(0, 24):

        print('this date = {0}'.format(thisdatetime))

        hour_dir = '/gps/ismr/' + '{:04}'.format(
            int(thisyr)) + '/' + '{:03}'.format(
                int(thisdoy)) + '/' + '{:02}'.format(
                    int(thisdatetime.hour + h)) + '/'

        print('this hour directory = {0}'.format(hour_dir))

        #Get files for current hour
        try:
            ftp = ftplib.FTP("chain.physics.unb.ca")
            ftp.login("*****@*****.**", "4Kindahafti4")
            ftp.cwd(hour_dir)
            #List the files in the current directory
            files_thishour = ftp.nlst()
        except Exception as e:
            print(
                '\n-------unable to login, change to directory, or list files {0}'
                .format(hour_dir))
            print('with error {0}--------\n'.format(e))
            continue

        for s in range(len(files_thishour)):
            #			print('this station file = {0}'.format(files_thishour[s]))

            # establish and make, if necessary, a local directory for the data
            local_dir = save_data
            #			local_fn_and_dir = local_dir + files_thishour[s]
            local_fn_and_dir = local_dir + files_thishour[s][-18:]
            # 			print('local_fn_and_dir = {0}'.format(local_fn_and_dir))

            #			if not os.path.exists(local_dir):
            #				os.makedirs(local_dir)

            # clean up the cache that may have been created by previous calls to urlretrieve
            urlcleanup()

            # download the data for the current hour
            if not os.path.isfile(local_fn_and_dir):
                urlretrieve(
                    'ftp://[email protected]:[email protected]/'
                    + hour_dir[1:] + files_thishour[s], local_fn_and_dir)

            try:
                txt_thishour_thisfile = np.genfromtxt(local_fn_and_dir,
                                                      delimiter=",",
                                                      filling_values=99)
    #             df_thishour_thisfile = pd.DataFrame(np.genfromtxt(local_fn_and_dir, delimiter=",", filling_values=99),columns=df_labels_PolaRxS[1].tolist())

    #print np.shape(txt_thishour_thisfile)
            except:
                print('\n\n ***unable to read {} ***\n\n'.format(
                    local_fn_and_dir))
                continue

            thisabbr = local_fn_and_dir[-18:-15]

            # Remove KUG station due to bias
            if thisabbr == 'kug':
                # 				print('\n\n skipping kugc... \n\n')
                os.remove(local_fn_and_dir)
                continue

            if len(txt_thishour_thisfile) == 0:
                print('\n\n ***file is empty, continuing***\n\n')
                os.remove(local_fn_and_dir)
                continue

            os.remove(local_fn_and_dir)

            df_thishour_thisfile = pd.DataFrame(
                data=txt_thishour_thisfile,
                columns=df_labels_PolaRxS[0].tolist())
            df_thishour_thisfile['CHAIN station'] = pd.Series(
                np.full((len(txt_thishour_thisfile[:, 0])), thisabbr))

            # Concatenate the new dataframe to the existing dataframe
            df_save = pd.concat([df_save, df_thishour_thisfile])

            del df_thishour_thisfile

    # Save the full day of data to local disk

    pd.DataFrame.to_csv(df_save, filename_save, na_rep='NaN')
    del filename_save

    # end timer
    end_timer = time.time()
    runtime_thisday = end_timer - start_timer
    np.savetxt(save_data + download_time + 'runtime__' + format(thisyr, '04') +
               '_' + format(thisdoy, '03') + '.txt',
               np.array(runtime_thisday).reshape(1, ),
               fmt='%.2f')