Beispiel #1
0
def Img(html):
    photo = r'src="(.*?\.jpg)" width'
    image = re.compile(photo)
    imagelist = re.findall(image,html)
    x = 0
    for j in imagelist:
        urllib2.urlretrieve(j, '%s.jpg' % x)
        x += 1
def DownloadFile(url, save_file):
    def reporthook(bnum, bsize, tsize):
        progress = bnum * bsize
        sys.stdout.write("\rDownloaded: {n} bytes{blank}".format(
            n=progress,
            blank=" " * (15 - len(str(progress)))),)
        sys.stdout.flush()

    try:
        urlrequest.urlretrieve(url, save_file, reporthook)
        print("")
        return True
    except:
        return False
Beispiel #3
0
def getContent(content):
	#利用正则来进行匹配
	pattern=re.compile(r'src="(.*?)" pic_ext=')
	items=re.findall(pattern,content)
	count=0
	for item in items:
		count++
		urllib2.urlretrieve(item,"%s.jpg"  % count)
Beispiel #4
0
def fetch_arrhythmia(data_home=None, download_if_missing=True):
    """Fetcher for xxxxxxxxxxxxxxxxxxxxx.

    Parameters
    ----------
    data_home : optional, default: None
        Specify another download and cache folder for the datasets. By default
        the original datasets for this `data_balance` study are stored at
        `../data/raw/` subfolders.

    download_if_missing: optional, True by default
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    """
    data_home = get_dataset_home(data_home=data_home)
    if not exists(data_home):
        makedirs(data_home)
    print('downloading Arrhythmia data from %s to %s' % (DATA_URL, data_home))
    urlretrieve(DATA_URL, join(data_home,'data.csv'))
Beispiel #5
0
    def downloadSources(self):
        
        if( os.path.exists(self.installPath) ):
            return

        trymakedir( self.installPath+'/sources' )
        os.chdir( os.path.dirname(self.installPath) )

        import urllib2 as urllib

        if self.version == '2005' :
            # cernlib fix from Harald Vogt: http://www-zeuthen.desy.de/~hvogt/
            urllib.urlretrieve( "http://www-zeuthen.desy.de/linear_collider/cernlib/new/cernlib-2005-all-new.tgz", "cernlib-2005-all-new.tgz" )
            urllib.urlretrieve( "http://www-zeuthen.desy.de/linear_collider/cernlib/new/cernlib.2005.corr.2009.06.13.tgz", "cernlib.2005.corr.2009.06.13.tgz" )

            if( os.system( "tar xzf cernlib-2005-all-new.tgz") != 0 ):
                self.abort("failed to extract cernlib sources")

            # use more recent corrections (64 bit compatible)
            os.system( "mv cernlib.2005.corr.tgz cernlib.2005.corr.tgz-old && ln -s cernlib.2005.corr.2009.06.13.tgz cernlib.2005.corr.tgz")

        elif self.version == '2006' :
            

            # binary tarballs
            #if platform.architecture()[0] == '64bit':
            #    if Version( self.parent.debugInfo['GCC_VERSION'] )[:2] == (4,1) :
            # ...
           
            ## download index.html
            #if( os.system( "wget " + self.download.url ) != 0 ):
            #    self.abort( "Problems ocurred downloading sources!!")
            ## parse index.html for extracting source tarballs
            #src_tarballs = getoutput( r"grep tar.gz index.html | sed -e 's/.*href=\"\(.*\)\".*/\1/'" ).split('\n')
            ## index.html no longer needed
            #os.unlink( "index.html" )

            #index_html=urllib.urlopen( self.download.url ).read()

            #import re
            #regex=re.compile( 'href="(.*)"' , re.VERBOSE )

            #hrefs=regex.findall( index_html )

            #src_tarballs=[ i.strip() for i in hrefs if i.strip()[-7:] == '.tar.gz' ]

            #for tarball in src_tarballs:
            #    print 'downloading:', self.download.url + tarball
            #    urllib.urlretrieve( self.download.url + tarball, tarball )
            #    print 'extracting:', tarball
            #    os.system( "tar xzf " + tarball )
            #    os.system( "mv %s %s/sources" % (tarball, self.installPath) )

            tarballs = [ '2006_src.tar.gz', 'include.tar.gz' ]
            for tarball in tarballs:
                print 'downloading:', tarball
                urllib.urlretrieve( self.download.url + tarball, tarball )
                if os.system( "tar xzf " + tarball ) != 0:
                    self.abort( 'failed to extract '+ tarball )
                os.system( "mv " + tarball + " " + self.installPath+'/sources' )
Beispiel #6
0
def check_fetch_data(dataset_raw_home=None,
                     base_url='',
                     target_filenames=[],
                     dataset_name='',
                     download_if_missing=True):
    """ Helper function for downloading any missing data of the dataset.

    Parameters
    ----------
    dataset_raw_home : Specify the folder for downloading the data of the
        dataset.  the original datasets for this `data_balance` study are 
        stored at `../data/raw/` subfolders.

    base_url: string containing the base url for fetching.

    target_filenames: list of the files that need to be download.

    download_if_missing: optional, True by default
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    #TODO: create a test for download_if_missing
    """
    #TODO: assert url directory
    #TODO: assert no empty list

    if not exists(dataset_raw_home):
        makedirs(dataset_raw_home)
    for target in target_filenames:
        path = join(dataset_raw_home, target)
        if not exists(path):
            if download_if_missing:
                full_url = join(base_url, target)
                print('downloading %s data from %s to %s' %
                    (RAW_DATA_LABEL, full_url, dataset_raw_home))
                urlretrieve(full_url, path)
            else:
                raise IOError('%s is missing' % path)
Beispiel #7
0
def fetch_coil_2000(data_home=None, download_if_missing=True):
    """Fetcher for the CoIL 2000 dataset.

    Parameters
    ----------
    data_home : optional, default: None
        Specify another download and cache folder for the datasets. By default
        the original datasets for this `data_balance` study are stored at
        `../data/raw/` subfolders.

    download_if_missing: optional, True by default
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    """
    data_home = join(get_data_home(data_home=data_home), 'coil_2000')
    if not exists(data_home):
        makedirs(data_home)
    for target_filename in TARGET_FILENAME_:
	path = join(data_home, target_filename)
        if not exists(path):
            url = join(DATA_URL, target_filename)
            print('downloading Coil 2000 from %s to %s' % (url, data_home))
	    urlretrieve(url, path)
Beispiel #8
0
    sql_query = """
    SELECT DISTINCT *
    FROM photo_data_table

    WHERE datetaken >= '{day}'::date
    AND datetaken < ('{day}'::date + '1 day'::interval)
    AND latitude > {lat_min} AND latitude < {lat_max} 
    AND longitude > {lon_min} AND longitude < {lon_max}
    ORDER BY views;
    """.format(day='10-%02d-11' %date, lon_min=-74.3, lat_min=40.5, lon_max=-73.64,lat_max=40.94)
    photos = pd.read_sql_query(sql_query,con)
    # print 'hour: ', i, photo_data_from_sql.shape[0], 'hits'


    for url in photos['url_s']:
        print url
        photo_name = url, '10-%d/%s' %(date, url.split('/')[-1])

        if not os.path.exists(photo_name[1]):
            urllib.urlretrieve(*photo_name)


# correction
# 12: 1
# 13: 1
# 14: 2
# 15: 10
# 23: 129
# 30: 87
url01 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp01totals.zip"
url02 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp02totals.zip"
url03 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp03totals.zip"
url04 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp04totals.zip"
url05 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp05totals.zip"
url06 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp06totals.zip"
url07 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp07totals.zip"
url08 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp08totals.zip"
url09 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp09totals.zip"
url10 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp10totals.zip"
url11 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp11totals.zip"
url12 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp12totals.zip"
url13 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp13totals.zip"
url14 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp14totals.zip"

urllib.urlretrieve(url94, "zbp94totals.zip")
urllib.urlretrieve(url95, "zbp95totals.zip")
urllib.urlretrieve(url96, "zbp96totals.zip")
urllib.urlretrieve(url97, "zbp97totals.zip")
urllib.urlretrieve(url98, "zbp98totals.zip")
urllib.urlretrieve(url99, "zbp99totals.zip")
urllib.urlretrieve(url00, "zbp00totals.zip")
urllib.urlretrieve(url01, "zbp01totals.zip")
urllib.urlretrieve(url02, "zbp02totals.zip")
urllib.urlretrieve(url03, "zbp03totals.zip")
urllib.urlretrieve(url04, "zbp04totals.zip")
urllib.urlretrieve(url05, "zbp05totals.zip")
urllib.urlretrieve(url06, "zbp06totals.zip")
urllib.urlretrieve(url07, "zbp07totals.zip")
urllib.urlretrieve(url08, "zbp08totals.zip")
urllib.urlretrieve(url09, "zbp09totals.zip")
Beispiel #10
0
def load_photo(url):
    file, mime = urllib.urlretrieve(url)
    photo = Image.open(file)
    return photo
Beispiel #11
0
                'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
                'User-Agent':
                'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko'
            })
        rep = urllib2.urlopen(req)
        cont = rep.read().decode('utf-8')
        pat = re.compile('<div.*?id="title">(.*?)</h1>', re.S)
        fname = re.search(pat, cont).group(1)
        fname = fname.strip().lstrip().rstrip().replace('<h1>', '')
        # J'Ai Deux Amours转码为J'Ai Deux Amours
        fname = html.parser.unescape(fname)
        fname = fname.split("<a target")[0]
        fname = str(i + 1) + "_" + fname
        print("歌曲名为: " + fname + "  开始下载")
        xi = XiamiDownload(url)
        if xi.url_location == "exception":
            continue
        url_download = xi.get_url()
        url_pic = xi.pic
        url_lyc = xi.lyc

        print('下载地址是: ' + url_download)

        try:
            urllib2.urlretrieve(url_download, fname + '.mp3')
            urllib2.urlretrieve(url_pic, fname + '.jpg')
            urllib2.urlretrieve(url_lyc, fname + '.lyc')
        except:
            continue

        print("完成下载...")
import urllib2
i = 1  #change it to any  value according to your url
while True:
    chr = str(i)
    ret = urllib2.urlopen("your URL" + chr +
                          ".jpg")  #this checks  for response
    if ret.code == 200:
        urllib2.urlretrieve("your url" + chr + ".jpg", chr +
                            ".jpg")  #this downloads in your current  directory
    i = i + 1  #its infinite loop unless u want to make it  finite
Beispiel #13
0
def download(links, url):
    for link in links:
        urllib2.urlretrieve(url+link)
Beispiel #14
0
import tweepy
import urllib2

# twitter crap
consumer_key = "uGrShu4GnN5TOTQHLU7d61aIm"
consumer_secret = "RWpcnM191iY0zsxeXeENZ3O2PayhNtuH54hKWo4xrgH8tz9XJe"

access_token = "4717423181-ZiIHqKgOyxQ66MXSdTGORTXgICphU047mIdVy8W"
access_token_secret = "WUzTfkG7KdAYFYsv3o1iPg2hUYapS0qf9IdspgzQ1h3oS"

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)

auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)

# Get the image
req = urllib2.Request('http://inspirobot.me/api?generate=true', headers={ 'User-Agent': 'Mozilla/5.0' })
img_src = urllib2.urlopen(req).read()
urllib2.urlretrieve(img_src, "new_image.jpg")

api.update_with_media("new_image.jpg")
Beispiel #15
0
def loadFile(url, destination):
    print("Loading file " + url)
    file, message = urllib.urlretrieve(url)
    shutil.copy(file, destination)
    os.remove(file)
Beispiel #16
0
def getImg(html,i):
    imgre = re.compile(r'(?i)src="(\/U.+?\.jpg)')
    imglist = imgre.findall(html)
    my_str =str(i)+ '_'	
    for num in range(len(imglist)):
        urllib2.urlretrieve("http://www.ulux.cn/"+imglist[num],'%s.jpg' % (my_str+str(num))) 
Beispiel #17
0
import os
import sys
import urllib2

if len(sys.argv) == 3 and sys.argv[1] == '-u':
    response = urllib2.urlretrieve(sys.argv[2], 'tmp.pdf')
    filename = 'tmp.pdf'
elif len(sys.argv) == 3:
    print("Invalid option.")
    sys.exit()
else:
    filename = sys.argv[1]

try:
    os.system("pdftotext -raw -enc UTF-8 " + filename + " tmp.txt")
    os.system("python3 process.py tmp.txt")
except IOError:
    sys.exit()
Beispiel #18
0
	def urlsave(link, filename):
		urllib2.urlretrieve(link, filename)
		return "Saved " + link + " as " + filename
import urllib2
i=1					#change it to any  value according to your url
while True:
	chr=str(i)
	ret = urllib2.urlopen("your URL"+chr+".jpg")  #this checks  for response  
	if ret.code == 200:
    		urllib2.urlretrieve("your url"+chr+".jpg",chr+".jpg") #this downloads in your current  directory
	i=i+1              #its infinite loop unless u want to make it  finite
def download(link, dirname):
    if not os.path.isdir(dirname):
        os.makedirs(dirname)
    urlretrieve(link, dirname + "/" + link.split("#")[0].split("/")[-1])
Beispiel #21
0
def download(url, filePath):
    try:
        urlretrieve(url, filePath)
    except IOError:
        ssl._create_default_https_context = ssl._create_unverified_context
        urlretrieve(url, filePath)