def DownloadUpdate(self, file):
		self.log('Downloading: %s' % file)
		dirfile = os.path.join(self.UpdateTempDir,file)
		dirname, filename = os.path.split(dirfile)
		if not os.path.isdir(dirname):
			try:
				os.makedirs(dirname)
			except:
				self.log('Error creating directory: '  +dirname)
		url = self.SVNPathAddress+urllib.quote(file)
		try:
			if re.findall(".xbt",url):
				self.totalsize = int(re.findall("File length: ([0-9]*)",urllib2.urlopen(url+"?view=log").read())[0])
				urllib.urlretrieve( url.decode("utf-8"), dirfile.decode("utf-8"))
			else: urllib.urlretrieve( url.decode("utf-8"), dirfile.decode("utf-8") )
			self.DownloadedFiles.append(urllib.unquote(url))
			return 1
		except:
			try:
				time.sleep(2)
				if re.findall(".xbt",url):
					self.totalsize = int(re.findall("File length: ([0-9]*)",urllib2.urlopen(url+"?view=log").read())[0])
					urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8"))
				else: urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8") )
				urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8"))
				self.DownloadedFiles.append(urllib.unquote(url))
				return 1
			except:
				self.log("Download failed: %s" % url)
				self.DownloadFailedFiles.append(urllib.unquote(url))
				return 0
def setup_client(domain, title):
    """
    Download tar ball from github, upack it and set it up.
    """
    if os.path.exists(CLIENT_DIR):
        rmtree(CLIENT_DIR)
    os.makedirs(CLIENT_DIR)
    urllib.urlretrieve("https://github.com/emory-libraries-ecds/OpenTourBuilder-Client/releases/download/1.0.1/OpenTourBuilder-Client.tar.gz", "%sOpenTourBuilder-Client.tar.gz" % CLIENT_DIR)

    tar = tarfile.open('%sOpenTourBuilder-Client.tar.gz' % CLIENT_DIR)
    tar.extractall(CLIENT_DIR)
    tar.close()

    js_file = '%sassets/open-tour-builder-ember.js' % CLIENT_DIR
    tmp_js_file = 'tmp.js'
    move(js_file, tmp_js_file)
    new_js = open(js_file, 'w+')
    for line in open(tmp_js_file, 'r'):
        line = line.replace('$api-host', 'api.%s' % domain)
        new_js.write(line)
    os.remove(tmp_js_file)

    index_file = '%sindex.html' % CLIENT_DIR
    tmp_index_file = 'tmp.html'
    move(index_file, tmp_index_file)
    new_index = open(index_file, 'w+')
    for line in open(tmp_index_file, 'r'):
        line = line.replace('$tour-title', title)
        line = line.replace('%24api-host', 'http://api.%s' % domain)
        new_index.write(line)
    os.remove(tmp_index_file)
Example #3
0
def main():
    
    #set up loop
    start_date = datetime.date(2003,01,01)
    end_date = datetime.date(2008,12,31)
    d = start_date
    delta = datetime.timedelta(days=1)
    while d <= end_date:
         
        #set up url
        url = 'http://www.cloud-net.org/quicklooks/data/chilbolton/products/classification/' \
                + str(d.strftime("%Y")) \
                + '/' + str(d.strftime("%Y%m%d")) \
                + '_chilbolton_classification.png'
                
        #check if exists
        code = urllib.urlopen(url).code
        if (code / 100 >= 4):
            print "No data for ", str(d.strftime("%Y%m%d")), ' continuing...'
            d += delta
            continue

        #check size
        urllib.urlretrieve(url, str(d.strftime("%Y%m%d")) + '.png')
        
        #increment date
        print 'Succesfully retrieved ', str(d.strftime("%Y%m%d")), ' moving on...'
        d += delta
def ftp_download():
	ftp_host = 'ftp.uniprot.org'
	ftp_user = '******'
	ftp_pass = ''
	ftp_path = '/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes'

	ftp = FTP(ftp_host)
	ftp.login(ftp_user, ftp_pass)
	ftp.getwelcome()
	ftp.cwd(ftp_path)

	dirs = ftp.nlst()
	# print(dirs)
	p = 0

	# Navigate to the required directory and thereby download data.
	for dir in dirs:
		if re.search(species, dir):
			path = ftp_path + '/' + str(species)
			# print(path)
			ftp.cwd(path)
			types = ftp.nlst()
			for x in types:
				if not re.search('DNA.fasta.gz', x) and re.search('fasta.gz', x):
					final = path + '/' + str(x)
					# print(final)
					fullfilename = os.path.join(store + str(x))
					urllib.urlretrieve('ftp://' + ftp_host + str(final), fullfilename)
					p+=1
				else:
					pass

	print("Number of viruses: " + str(p))

	print(ftp.pwd())
Example #5
0
def getAndUncompress( libraries ):
	
	for libname, url, inNewDirectory in libraries:
		print '_'*80
		print '--', libname
		parts = url.split('/')
		filename = [p for p in parts if len(getKnowExtensions(p))]
		#if len(filename) == 0:
		#	print '-'*40
		#	print 'No filename with a regognize extension in "'+libname+'" url="'+url+'"'
		#	print '-'*40
		#	continue
		filename = filename[0]
		print url, ' -> ', filename
		ext = getKnowExtensions(filename)[0]
		current_file = filename
		try:
			if os.path.isfile(filename): # if not already downloaded
				print 'Already downloaded: ', filename
			else:
				urllib.urlretrieve(url, filename, dlProgress)
			dirname = filename[:-len(ext)-1]
			#if os.path.isdir(libname) or os.path.islink(libname): # if not already uncompressed
			if os.path.isdir(filename[:-len(ext)-1]) :
				print 'Already uncompressed : ', dirname
			else:
				uncompress( filename, ext, inNewDirectory, libname )
		except Exception, e:
			print 'uncompress error (', str(e), ')'
Example #6
0
def wget(url,saveto):
    print "get",url,"to",saveto
    import urllib
    if url.endswith(".gif"):
        prefix=saveto.rsplit(".",1)[0]
        saveto = prefix+".png"
        txt_name = prefix+".txt"
        import gif2strip
        if not (os.path.exists(txt_name) or os.path.exists(saveto)):
            try:
                gif2strip.go(url,saveto)
            except (urllib2.HTTPError,urllib2.URLError):
                pass
    elif url.endswith(".mp3"):
        prefix=saveto.rsplit(".",1)[0]
        saveto=prefix+".ogg"
        if not os.path.exists(saveto):
            urllib.urlretrieve(url.replace(" ","%20"),"mp3ogg/input.mp3")
            subprocess.call(["mp3ogg\mpg123.exe","-w","mp3ogg\output.wav","mp3ogg\input.mp3"])
            subprocess.call(["mp3ogg\oggenc2.exe","mp3ogg\output.wav","mp3ogg\output.ogg","--resample=44100"])
            f = open("mp3ogg/output.ogg","rb")
            o = f.read()
            f.close()
            f = open(saveto,"wb")
            f.write(o)
            f.close()
    elif not os.path.exists(saveto):
        print "retrieving"
        urllib.urlretrieve(url.replace(" ","%20"),saveto)
    return saveto
Example #7
0
def FetchRevision(context, rev, filename, quit_event=None, progress_event=None):
  """Downloads and unzips revision |rev|.
  @param context A PathContext instance.
  @param rev The Chromium revision number/tag to download.
  @param filename The destination for the downloaded file.
  @param quit_event A threading.Event which will be set by the master thread to
                    indicate that the download should be aborted.
  @param progress_event A threading.Event which will be set by the master thread
                    to indicate that the progress of the download should be
                    displayed.
  """
  def ReportHook(blocknum, blocksize, totalsize):
    if quit_event and quit_event.isSet():
      raise RuntimeError('Aborting download of revision %s' % str(rev))
    if progress_event and progress_event.isSet():
      size = blocknum * blocksize
      if totalsize == -1:  # Total size not known.
        progress = 'Received %d bytes' % size
      else:
        size = min(totalsize, size)
        progress = 'Received %d of %d bytes, %.2f%%' % (
            size, totalsize, 100.0 * size / totalsize)
      # Send a \r to let all progress messages use just one line of output.
      sys.stdout.write('\r' + progress)
      sys.stdout.flush()
  download_url = context.GetDownloadURL(rev)
  try:
    urllib.urlretrieve(download_url, filename, ReportHook)
    if progress_event and progress_event.isSet():
      print

  except RuntimeError:
    pass
  def test_CalibrationDifference1(self):
    """ Ideally you should have several levels of tests.  At the lowest level
    tests sould exercise the functionality of the logic with different inputs
    (both valid and invalid).  At higher levels your tests should emulate the
    way the user would interact with your code and confirm that it still works
    the way you intended.
    One of the most important features of the tests is that it should alert other
    developers when their changes will have an impact on the behavior of your
    module.  For example, if a developer removes a feature that you depend on,
    your test should break so they know that the feature is needed.
    """

    self.delayDisplay("Starting the test")
    #
    # first, get some data
    #
    import urllib
    downloads = (
        ('http://slicer.kitware.com/midas3/download?items=5767', 'FA.nrrd', slicer.util.loadVolume),
        )

    for url,name,loader in downloads:
      filePath = slicer.app.temporaryPath + '/' + name
      if not os.path.exists(filePath) or os.stat(filePath).st_size == 0:
        print('Requesting download %s from %s...\n' % (name, url))
        urllib.urlretrieve(url, filePath)
      if loader:
        print('Loading %s...\n' % (name,))
        loader(filePath)
    self.delayDisplay('Finished with download and loading\n')

    volumeNode = slicer.util.getNode(pattern="FA")
    logic = CalibrationDifferenceLogic()
    self.assertTrue( logic.hasImageData(volumeNode) )
    self.delayDisplay('Test passed!')
Example #9
0
def download(origin, destination):
    '''
    download the corresponding atis file
    from http://www-etud.iro.umontreal.ca/~mesnilgr/atis/
    '''
    print('Downloading data from %s' % origin)
    urllib.urlretrieve(origin, destination)
Example #10
0
def downloadFile(url,dest):
    try:
        urllib.urlretrieve(url,dest)
    except Exception, e:
        dialog = xbmcgui.Dialog()
        main.ErrorReport(e)
        dialog.ok("Mash Up", "Report the error below at " + main.supportsite, str(e), "We will try our best to help you")
Example #11
0
def downloader_is(url, name, showProgress=True):
	import downloader, extract

	addonsDir = xbmc.translatePath(os.path.join('special://home', 'addons')).decode("utf-8")
	packageFile = os.path.join(addonsDir, 'packages', 'isr.zip')
	'''
	try:
		os.remove(packageFile)
	except:
		pass
	'''	
	if showProgress:
		dp = xbmcgui.DialogProgress()
		dp.create(AddonName, "Downloading", name, "Please Wait")
		downloader.download(url, packageFile, dp)
		dp.update(0, "", "Extracting Zip Please Wait")
		extract.all(packageFile, addonsDir, dp)
	else:
		urllib.urlretrieve(url, packageFile)
		extract.all(packageFile, addonsDir)
		
	try:
		os.remove(packageFile)
	except:
		pass
			
	xbmc.executebuiltin("UpdateLocalAddons")
	xbmc.executebuiltin("UpdateAddonRepos")
Example #12
0
  def genericRetrieve(self, url, root, name):
    '''Fetch the gzipped tarfile indicated by url and expand it into root
       - All the logic for removing old versions, updating etc. must move'''

    # get the tarball file name from the URL
    filename = os.path.basename(urlparse.urlparse(url)[2])
    localFile = os.path.join(root,'_d_'+filename)
    ext =  os.path.splitext(localFile)[1]
    if ext not in ['.bz2','.tbz','.gz','.tgz','.zip','.ZIP']:
      raise RuntimeError('Unknown compression type in URL: '+ url)
    self.logPrint('Downloading '+url+' to '+localFile)
    if os.path.exists(localFile):
      os.unlink(localFile)

    try:
      urllib.urlretrieve(url, localFile)
    except Exception, e:
      failureMessage = '''\
Unable to download package %s from: %s
* If URL specified manually - perhaps there is a typo?
* If your network is disconnected - please reconnect and rerun ./configure
* Or perhaps you have a firewall blocking the download
* Alternatively, you can download the above URL manually, to /yourselectedlocation/%s
  and use the configure option:
  --download-%s=/yourselectedlocation/%s
''' % (name, url, filename, name.lower(), filename)
      raise RuntimeError(failureMessage)
Example #13
0
def updateServer():

	stopServer();
	time.sleep(2);

	if actionfile == "core":
		urllib.urlretrieve("http://www.armafiles.info/" + "dayz/dayz_v" + codever + ".rar", "..\\@dayz\\Addons\\" + actionfile + ".zip");
	else:
		urllib.urlretrieve("http://www.armafiles.info/" + "dayz/dayz_"+ actionfile + "_v" + codever + ".rar", "..\\@dayz\\Addons\\" + actionfile + ".zip");
	
	time.sleep(1);

	os.rename("..\\@dayz\\Addons\\" + actionfile + ".rar","..\\@dayz\\Addons\\" + actionfile + ".zip");

	zip = zipfile.ZipFile("..\\@dayz\\Addons\\" + actionfile + ".zip");
	zip.extractall(path="..\\@dayz\\Addons\\");	

	print "Unziped!";

	if actionfile == "code":
		filename = "..\\" + "cfgdayz" + server + "\\server.cfg";
		writestring = 'hostname = "DayZ Zombie RPG - NZ (v' + codever + ') www.dayzmod.com";\n';

		f = open(filename,'r');
		lines = f.readlines();
		f.close();

		f = open(filename,'w');
		f.write(writestring);
		f.write(''.join(lines[1:]));
		f.close();

	    
	startServer();
	def showInfo(self):
		if self.check == "true" and self.menulist:
			m_title = self["menulist"].getCurrent()[0][0]
			m_url = self["menulist"].getCurrent()[0][1]
			if m_url:
				#m_url = re.findall('(.*?)\.', m_url)
				#extra_imdb_convert = "._V1_SX320.jpg"
				#m_url = "http://ia.media-imdb.com/images/%s%s" % (m_url[0], extra_imdb_convert)
				print "EMC iMDB: Download Poster - %s" % m_url
				urllib._urlopener = AppURLopener()
				urllib.urlretrieve(m_url, self.path)
				urllib.urlcleanup()
				if os.path.exists(self.path):
					self.poster_resize(self.path, m_title)

					#ptr = LoadPixmap(self.path)
					#if ptr is None:
					#        ptr = LoadPixmap("/usr/lib/enigma2/python/Plugins/Extensions/EnhancedMovieCenter/img/no_poster.png")
					#        print "EMC iMDB: Load default NO Poster."
					#if ptr is not None:
					#        self["poster"].instance.setPixmap(ptr)
					#        print "EMC iMDB: Load Poster - %s" % m_title
				else:
					print "EMC iMDB: No url found for - %s" % m_title
			else:
				print "EMC iMDB: No url found for - %s" % m_title
Example #15
0
    def load(self):
        ids_filename = os.path.join(os.getenv("DATA_PATH"), "openml", "ids.pkl")
        if not os.path.exists(ids_filename):
            url = "http://www.openml.org/api_query/?{0}".format(urllib.urlencode({"q": self.query}))
            result = get_result_as_dict(url)
            f = open(ids_filename, "w")
            pickle.dump(result, f)
        else:
            result = pickle.load(open(ids_filename, "r"))

        ds_ids = [int(r[0]) for r in result["data"]]

        for ds_id in ds_ids:
            if self.verbose:
                print("retrieving {0}...".format(ds_id))
            url_desc = "http://www.openml.org/d/{0}/json".format(ds_id)
            u = urllib.urlopen(url_desc)
            desc = json.load(u)
            u.close()
            if "arff" not in desc["url"]:
                if self.verbose:
                    print("skipping {0}...".format(ds_id))
                continue
            filename = os.path.join(os.getenv("DATA_PATH"), "openml", "{0}.arff".format(ds_id))
            if not os.path.exists(filename):
                urllib.urlretrieve(desc["url"], filename)
Example #16
0
File: network.py Project: SKIRT/PTS
def download_file_no_requests(url, path, overwrite=False):

    """
    This function ...
    :param url:
    :param path:
    :param overwrite:
    :return:
    """

    # Get the name of the file
    filename = fs.name(url)

    # Determine the local path to the file
    filepath = fs.join(path, filename) if fs.is_directory(path) else path

    # Check filepath
    if fs.is_file(filepath):
        if overwrite: fs.remove_file(filepath)
        else: raise IOError("File is already present: " + filepath)

    # Debugging
    log.debug("Downloading '" + filename + "' to '" + path + "' ...")
    log.debug("URL: " + url)

    # Download
    urllib.urlretrieve(url, filepath)

    # Return the file path
    return filepath
Example #17
0
    def retrieve_jquery_ui(self):
        if not os.path.exists(self.relative_path(self.JQUERY_UI_FILENAME)):
            urllib.urlretrieve(self.JQUERY_UI_LOCATION, self.relative_path(self.JQUERY_UI_FILENAME))
        if os.path.exists(self.relative_path('jquery-ui')):
            shutil.rmtree(self.relative_path('jquery-ui'))
        zip = zipfile.ZipFile(self.relative_path(self.JQUERY_UI_FILENAME))
        zip.extractall(self.relative_path('jquery-ui'))

        for path in ('js', 'development-bundle/external', 'development-bundle/ui/minified'):
            shutil.rmtree(self.relative_path('jquery-ui', *path.split('/')))

        for path in ('ui', 'themes'):
            os.rename(self.relative_path('jquery-ui', 'development-bundle', path),
                      self.relative_path('jquery-ui', path))

        shutil.rmtree(self.relative_path('jquery-ui', 'development-bundle'))
        os.unlink(self.relative_path('jquery-ui', 'index.html'))

        for path in os.listdir(self.relative_path('jquery-ui', 'ui')):
            if path.startswith('jquery-ui-') and path.endswith('.custom.js'):
                os.symlink(os.path.join('ui', path),
                           self.relative_path('jquery-ui', 'jquery-ui.js'))
        for path in os.listdir(self.relative_path('jquery-ui', 'css', 'smoothness')):
            if path.startswith('jquery-ui-') and path.endswith('.custom.css'):
                os.symlink(os.path.join(path),
                           self.relative_path('jquery-ui', 'css', 'smoothness', 'jquery-ui.css'))

        minify = [self.relative_path('jquery-ui', 'jquery-ui.js'),
                  self.relative_path('jquery-ui', 'css', 'smoothness', 'jquery-ui.css')]

        for path in minify:
            with open(path+'.min', 'w') as stdout:
                subprocess.call(['java', '-jar', self.relative_path('jquery', 'build', 'yuicompressor-2.4.2.jar'),
                                         path],
                                stdout=stdout)
Example #18
0
  def asLegend( self ):
    """ Return a legend graphic image for this model from the cache directory. If it is empty it makes a wms request and populates the directory. """
    myFileName = str(self.id) + "." + str(self.image_format)

    myLocalPath = os.path.join( settings.LEGEND_IMAGE_ROOT, myFileName )
    myWebPath = os.path.join( settings.LEGEND_IMAGE_URL, myFileName )

    if not os.path.exists( myLocalPath ):
      logging.info('Get Legend Graphic not cached...fetching....')
      # the image is not into the expected dir, get it from WMS request..
      '''
      if (not self.legend_style is None) and (self.legend_style != ''):
         myFetchUrl = """%s?request=GetLegendGraphic&style=%s&version=1.0.0&format=image/png&width=20&height=20&layer=%s""" % ( self.url, self.legend_style, self.layers )
      else:
         myFetchUrl = """%s?request=GetLegendGraphic&version=1.0.0&format=image/png&width=20&height=20&layer=%s""" % ( self.url, self.layers )
      '''
      if re.search('\?', self.url):
        question_mark = '&'
      else:
        question_mark = '?'
      myFetchUrl = """%s%srequest=GetLegendGraphic&version=1.0.0&format=image/png&width=20&height=20&layer=%s""" % ( self.url, question_mark, self.layers )
      if (not self.legend_style is None) and (self.legend_style != ''):
        myFetchUrl += r'&style='+self.legend_style
      # .. and store into filesystem
      logging.info('GET %s -> %s' % (myFetchUrl, myLocalPath))
      urllib.urlretrieve(myFetchUrl, myLocalPath)
    else:
      logging.info('Using cached LegendGraphic %s' % myLocalPath)

    myUrl = """<img src="%s" id="legend-image-%s" class="legend-image"/>""" % ( myWebPath, self.id )
    return myUrl
Example #19
0
def download_planck():
    from urllib import urlretrieve
    basepath = 'http://irsa.ipac.caltech.edu/data/Planck/release_1/all-sky-maps/maps/'
    file = 'HFI_SkyMap_217_2048_R1.10_nominal.fits'
    url = basepath + file
    savename = datadir + file
    urlretrieve(url, savename)
def getimg(aaa):
    reg = re.compile(r'<img.*?src=".*?" data-lazyload-src="(.*?)".*?>')
    l = re.findall(reg, aaa)
    temp = 101
    for i in l:
        temp += 1
        urllib.urlretrieve(i, '/home/guoweikuang/图片/%s.jpg' %temp)
Example #21
0
def download_images(img_urls, dest_dir):
    """Given the urls already in the correct order, downloads
    each image into the given directory.
    Gives the images local filenames img0, img1, and so on.
    Creates an index.html in the directory
    with an img tag to show each local image file.
    Creates the directory if necessary.
    """
    # Build directory if it does not currently exist.
    if not os.path.exists(dest_dir):
        os.mkdir(dest_dir)
    index = file(os.path.join(dest_dir, 'index.html'), 'w')
    index.write('<html><body>\n')
    for x, img_url in enumerate(img_urls):
        dest_file = os.path.join(dest_dir, "img%d.jpg" % x) # rename
        try:
            print "Retrieving.....%s" % img_url
            urllib.urlretrieve(img_url, dest_file)  # downloads the url data to the given file path
        except IOError:
            print "Problem reading url: %s" % img_url
        else:
            index.write('<img src="%s">' % os.path.abspath(dest_file))
    index.write('\n</body></html>\n')
    index.close()
    return
Example #22
0
def fetch(url):
    
    fetchFail = True
    failCount = 0
    while fetchFail:
        try:
            urlretrieve(url, 'myfile')
            data = gzip.open('myfile', 'rb').read()
            
            try:
                data_decoded = data.decode('gbk')
            except Exception as e:
                data_decoded = data
                print("error decoding")
                
            fetchFail = False
        except Exception as e:
            failCount += 1
            print "attempt " + str(failCount) + " failed"
            print "sleeping for 1 seconds before retrying..."
            
            if failCount >= 20:
                return ""
            
            time.sleep(1)
    
    return data_decoded
Example #23
0
 def imagetitan_save_image(self, imggrp, imgmiddle, imgname):
     # generate just the filename of the image to be locally saved
     savefile = join(self.basedir, imgname)
     # generate the url of the image
     download_url = 'http://' + imggrp + '.imagetitan.com/' + imggrp + imgmiddle + imgname
     # finally save the image on the desidered directory
     urlretrieve(download_url, savefile) 
def update(params):
	# Descarga el ZIP
	xbmc.output("[updater.py] update")
	xbmc.output("[updater.py] cwd="+os.getcwd())
	remotefilename = REMOTE_FILE+params.get("version")+".zip"
	localfilename = LOCAL_FILE+params.get("version")+".zip"
	xbmc.output("[updater.py] remotefilename=%s" % remotefilename)
	xbmc.output("[updater.py] localfilename=%s" % localfilename)
	xbmc.output("[updater.py] descarga fichero...")
	inicio = time.clock()
	urllib.urlretrieve(remotefilename,localfilename)
	fin = time.clock()
	xbmc.output("[updater.py] Descargado en %d segundos " % (fin-inicio+1))
	
	# Lo descomprime
	xbmc.output("[updater.py] descomprime fichero...")
	import ziptools
	unzipper = ziptools.ziptools()
	destpathname = DESTINATION_FOLDER
	xbmc.output("[updater.py] destpathname=%s" % destpathname)
	unzipper.extract(localfilename,destpathname)
	
	# Borra el zip descargado
	xbmc.output("[updater.py] borra fichero...")
	os.remove(localfilename)
Example #25
0
 def cacheSong( self, songNumber ):
     info = self.songinfo[songNumber]
     if not info["caching"]:
         print "Caching song %s"%info['title']
         info["caching"] = True
         urllib.urlretrieve(str(info['url']), os.path.join(CachePath, "%s.mp3"%info['title']))
         info["localpath"] = os.path.join(CachePath, "%s.mp3"%info['title'])
Example #26
0
def download_images(img_urls, dest_dir):
  """Given the urls already in the correct order, downloads
  each image into the given directory.
  Gives the images local filenames img0, img1, and so on.
  Creates an index.html in the directory
  with an img tag to show each local image file.
  Creates the directory if necessary.
  """
  
  # prepare output directory - if it is not there already, create it
  if not(os.path.exists(dest_dir)):
      os.mkdir(dest_dir)

  i=0
  imgstr=''
  for slice in img_urls:
      # use a simple naming scheme for new image files and make sure they end up in the given directory
      outfile='img'+str(i)+'.png'
      imgstr+='<img src="'+outfile+'">'
      outfile=os.path.join(dest_dir, outfile)
      print "Fetching "+outfile+' '+ os.path.basename(slice) 
      urllib.urlretrieve('http://code.google.com/'+slice, outfile)
      i+=1
  print "All image strips downloaded"

  f=open(os.path.join(dest_dir, 'index.html'), 'w')
  f.write('<html><body>')
  f.write(imgstr)
  f.write('</body></html>')
  f.close()
def download_images(img_urls, dest_dir):
  """Given the urls already in the correct order, downloads
  each image into the given directory.
  Gives the images local filenames img0, img1, and so on.
  Creates an index.html in the directory
  with an img tag to show each local image file.
  Creates the directory if necessary.
  """
  # Verify Directory Exists.  If not create it.
  print 'Verifying Directory: ' + dest_dir
  if os.path.exists(dest_dir)==False:
    print 'Creating Directory: ' + dest_dir
    os.makedirs(dest_dir)

  # Download Image Files with incremented nama and create IMG html line
  f = open(dest_dir + '/index.html', 'a')
  img_html = []
  img_count = len(img_urls)
  i = 0
  for img in img_urls:
    i = i + 1
    print 'Retrieving File ' + str(i) + ' of ' + str(img_count) + '.'
    urllib.urlretrieve(img, dest_dir + '/img' + str(i))
    img_html.append('<img src=\"img' + str(i) + '\">')

  # Create index.html file
  print 'Writing index.html'
  f.write('<verbatim>')
  f.write('<html>')
  f.write('<body>')
  f.write(''.join(img_html))
  f.write('</body>')
  f.write('</html>')
  f.close()
Example #28
0
def reload_indexp(working_dir, cycle):
    def execute_file(filename):
        contents = " ".join([line for line in open(filename, 'r') if line[0:2] != '--'])
        statements = contents.split(';')[:-1] # split on semi-colon. Last element will be trailing whitespace

        for statement in statements:
            log.info("Executing %s" % statement)
            c.execute(statement)

    try:
        working_dir = os.path.expanduser(working_dir)
        if not os.path.isdir(working_dir):
            os.makedirs(working_dir)

        log = set_up_logger('indexp_importer', working_dir, 'IndExp Importer Fail')
        
        local_file_path = os.path.join(working_dir, LOCAL_FILE)
        log.info("downloading %s to %s..." % (DOWNLOAD_URL.format(cycle), local_file_path))
        urllib.urlretrieve(DOWNLOAD_URL.format(cycle), local_file_path)
    
        log.info("uploading to table %s..." % TABLE_NAME)
        c = connection.cursor()
        c.execute("insert into fec_indexp_out_of_date_cycles (cycle) values ({})".format(cycle))
        c.execute("DELETE FROM %s" % TABLE_NAME)
        c.copy_expert("COPY %s (candidate_id, candidate_name, spender_id, spender_name, election_type, candidate_state, candidate_district, candidate_office, candidate_party, amount, date, aggregate_amount, support_oppose, purpose, payee, filing_number, amendment, transaction_id, image_number, received_date, prev_file_num) FROM STDIN CSV HEADER" % TABLE_NAME, open(local_file_path, 'r'))
        c.execute("update {} set cycle = {}".format(TABLE_NAME, cycle))
        execute_file(SQL_POSTLOAD_FILE)
        c.execute("delete from fec_indexp_out_of_date_cycles")
        
        log.info("Import Succeeded.")
    except Exception as e:
        log.error(e)
        raise
Example #29
0
def searchcode(url, regex):
    code = urlopen(url).read()
    result = ""
    try:
        regexresults = re.search(regex, str(code))
        result = str(regexresults.group(0))
        if result is not None:
            if args.url is True:
                logger.info("        " + str(url))
            if args.verbose is True:
                logger.info("      [+] Found the following results")
                logger.info("        " + str(result))
            if args.write_file:
                if result == "":
                    pass
                else:
                    f = open(args.write_file, "a")
                    f.write(str(result + "\n"))
                    f.close()

            if args.directory:
                filename = args.directory + "/" + url.replace("/", "-")
                if not os.path.exists(args.directory):
                    os.makedirs(args.directory)
                logger.info("        [+] Downloading " + filename)
                urlretrieve(url, filename)
                fp = open(filename, "wb")
                fp.write(code)
                fp.close()
        else:
            pass
    except:
        pass
Example #30
0
def get_amalgamation():
    """Download the SQLite amalgamation if it isn't there, already."""
    if os.path.exists(AMALGAMATION_ROOT):
        return
    os.mkdir(AMALGAMATION_ROOT)
    print "Downloading amalgation."

    # XXX upload the amalgamation file to a somewhat more
    # official place
    amalgamation_url = ("http://futeisha.org/sqlcipher/"
                        "amalgamation-sqlcipher-2.1.0.zip")

    # and download it
    print 'amalgamation url: %s' % (amalgamation_url,)
    urllib.urlretrieve(amalgamation_url, "tmp.zip")

    zf = zipfile.ZipFile("tmp.zip")
    files = ["sqlite3.c", "sqlite3.h"]
    directory = zf.namelist()[0]

    for fn in files:
        print "Extracting", fn
        outf = open(AMALGAMATION_ROOT + os.sep + fn, "wb")
        outf.write(zf.read(directory + fn))
        outf.close()
    zf.close()
    os.unlink("tmp.zip")

try:
    FILE = sys.argv[2]
    PORT = sys.argv[1]
except:
    print "Usage:", sys.argv[0], "[comm port] [hex file or url]"
    sys.exit(-1)

# check if file is url
urlRegex = re.compile(r'^(?:http|ftp)s?://', re.IGNORECASE)

if urlRegex.match(FILE): # file is an url
    # download the file
    print "Downloading firmware..."
    FILE,_ = urllib.urlretrieve(FILE)
    print "Firmware downloaded to", FILE

def find_languagepack(filename):
    res = 0
    with open(filename, 'r') as f:
        for l in iter(f.readline, ''): # need to use this to avoid buffering which screws up f.tell()
            l = l.strip()
            if l == HEX_TERMINATOR:
                if res == 0:
                    res = f.tell()
                else:
                    return res # make sure to return res only if another one had been found before
        return None
    
def escapeFilename(filename):
def download(url, filename):
    urllib.urlretrieve(url, filename, reporthook)
Example #33
0
        print(card["name"] + " exists in master.csv. Skipping...")
    else:
        print(card["name"] + " is not in master.csv. Adding...")
        with open('master.csv', 'rU') as csvfile:
            masterreader = csv.reader(csvfile)
            ID = sum(1 for row in masterreader)
        mc = open('master.csv', 'a')
        writer=csv.writer(mc)
        myCsvRow = [ID, unicodedata.normalize('NFKD', card["name"]).encode('ascii', 'ignore'), card["multiverseid"]]
        if card['layout'] == 'split':
            myCsvRow = [ID, unicodedata.normalize('NFKD', card['names'][0] + '//' + card['names'][1]).encode('ascii', 'ignore'), card["multiverseid"]]
        writer.writerow(myCsvRow)
        mc.close()

download_mtg_json()
with open('AllSets-x.json') as data_file:
   data = json.load(data_file)
#Build master.csv: ID/Name/MultiverseID
for set in sets:
    for card in data[set]["cards"]:
        Add_card(card)
#Download images from gatherer for all cards listed in master.csv:
with open('master.csv', 'rU') as csvfile:
    masterreader = csv.reader(csvfile)
    for row in masterreader:
        mvid = row[2]
        localid = row[0]
        urllib.urlretrieve("http://gatherer.wizards.com/Handlers/Image.ashx?multiverseid=" + mvid + "&type=card", "pics/" + localid + ".jpg")




# Starter Code

import os 
import urllib
import csv

url = 'https://raw.githubusercontent.com/fivethirtyeight/data/master/fandango/fandango_scrape.csv'

filename = 'fandango_scrape.csv'

if not os.path.isfile(filename):

    urllib.urlretrieve(url, filename)
    # use requests if you have pip on your machine

else:
    pass

# raw_data = []

with open(filename, 'r') as f:
    reader = csv.reader(f, delimiter=',')
    raw_data = list(reader)


    # for row in reader:
    #     raw_data.append(row)
Example #35
0
    docgdata = docg.getfile().read()

doclinks = re.compile('"r"><a href="' + '(.*?)"' + ' ')
doclinksres = doclinks.findall(docgdata)

i = 0
y = 0
em = 0
print "[*]-Start downloading DOC(s) for domain:", domain_name

for p in doclinksres:
    if DocRes.count(p) == 0:
        DocRes.append(p)
        dlURL = str(p)
        dlDOC = domain_name + str(i) + ".doc"
        urllib.urlretrieve(dlURL, dlDOC)
        i = i + 1

        cmd = command + ' -author ' + dlDOC
        p = subprocess.Popen(cmd,
                             shell=True,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT)
        for line in p.stdout.readlines():
            if Users.count(line) == 0:
                Users.append(line)
                u = line.replace("Author                          :", "")
                print " |-" + dlURL + " (Author: " + u.rstrip('\n') + ")"
                y = y + 1

        doccmd2 = command2 + ' ' + dlDOC + ' ' + '|egrep "\w+([._-]\w)*@"' + domain_name
Example #36
0
              The timestamp of the last probe, or 'undef' if the mirror was never probed.
            """
            line = line.strip()
            (url, age, status, probe_time, probe_history, state_history,
             last_probe) = line.split()
            if state_history in ("f", "z", "x"):
                bad_mirror_urls.append(url)
            else:
                good_mirror_urls.append(url)

    return good_mirror_urls, bad_mirror_urls


if __name__ == '__main__':

    (sites_path, headers) = urlretrieve(SITES_URL)

    with open(sites_path, "rb") as sites_file:

        state = 0
        state |= JUNK
        saved_line = None

        continents = []
        current_mirror = None

        for line in sites_file:

            line = line.strip()

            # skip empty lines
Example #37
0
def property_list(request):
    address, city, state = "", "", ""
    if request.method == "POST":
        address = request.POST.get("address",
                                   "").replace(", United States", "").strip()
        city = request.POST.get("city", "").strip()
        state = request.POST.get("stateSelection", "").strip()
        if address and city:
            search_address = address + ", " + city + ", " + state
        elif not address and city:
            search_address = city + ", " + state
        if address and city:
            try:
                property = Property.objects.filter(address__icontains=address,
                                                   city__icontains=city,
                                                   state__icontains=state)
                if not property:
                    zillow_property = zillow_offline_api(address, city, state)
                    if not "Error" in zillow_property:
                        for z_p in zillow_property:
                            zpid = z_p.pop('zpid', None)
                            u = User.objects.get_or_create(
                                username="******")[0]
                            u.set_password('zilLow@321')
                            u.save()
                            z_p['user'] = u
                            property, created = Property.objects.get_or_create(
                                **z_p)
                            if created:
                                zpid_data = zillow_zpid_detail(zpid)
                                if not "Error" in zpid_data:
                                    # TODO save detail and image
                                    try:
                                        property.description = zpid_data[
                                            'description']
                                    except:
                                        pass
                                    try:
                                        photo = Images(
                                        )  # set any other fields, but don't commit to DB (ie. don't save())
                                        name = urlparse(
                                            zpid_data['image']).path.split(
                                                '/')[-1]
                                        content = urllib.urlretrieve(
                                            zpid_data['image'],
                                            settings.MEDIA_ROOT)
                                        photo.image.save(name,
                                                         File(open(
                                                             content[0])),
                                                         save=True)
                                        PropertyImages.objects.create(
                                            property=property, image=photo)
                                    except:
                                        pass
                            property.search_index = property.address.strip() + ", " + property.city.strip() \
                                                    + ", " + property.state.strip() + " " \
                                                    + property.zip_code.strip()
                            property.save()
                            if len(zillow_property) == 1:
                                return HttpResponseRedirect(
                                    "/property/preview/" + str(property.id))
            except:
                return render(
                    request, 'home_page.html', {
                        'error':
                        "please use proper address or use suggestion in dropdown as help."
                    })

    return render(request, 'property/property_list.html', {
        "address": address,
        "city": city,
        "state": state
    })
Example #38
0
def obtain(dir_path):
    """
    Downloads the dataset to ``dir_path``.
    """

    dir_path = os.path.expanduser(dir_path)
    print 'Downloading the dataset'
    import urllib
    urllib.urlretrieve('http://www.cs.toronto.edu/~larocheh/public/datasets/corrupted_mnist/mnist_corrupted_u.mat',os.path.join(dir_path,'mnist_corrupted_u.mat'))
    urllib.urlretrieve('http://www.cs.toronto.edu/~larocheh/public/datasets/corrupted_mnist/mnist_corrupted_v.mat',os.path.join(dir_path,'mnist_corrupted_v.mat'))
    urllib.urlretrieve('http://www.cs.toronto.edu/~larocheh/public/datasets/corrupted_mnist/mnist_corrupted_valid_u.mat',os.path.join(dir_path,'mnist_corrupted_valid_u.mat'))
    urllib.urlretrieve('http://www.cs.toronto.edu/~larocheh/public/datasets/corrupted_mnist/mnist_corrupted_valid_v.mat',os.path.join(dir_path,'mnist_corrupted_valid_v.mat'))
    urllib.urlretrieve('http://www.cs.toronto.edu/~larocheh/public/datasets/corrupted_mnist/mnist_corrupted_test_u.mat',os.path.join(dir_path,'mnist_corrupted_test_u.mat'))
    urllib.urlretrieve('http://www.cs.toronto.edu/~larocheh/public/datasets/corrupted_mnist/mnist_corrupted_test_v.mat',os.path.join(dir_path,'mnist_corrupted_test_v.mat'))

    # Writing everything into text files, to allow for not loading the data into memory
    def write_to_txt_file(u,v,filename):
        f = open(filename,'w')
        for u_t,v_t in zip(u,v):
            for i in range(len(u_t)):
                f.write(str(int(u_t[i]>127))+' ')
            for i in range(len(v_t)-1):
                f.write(str(int(v_t[i]>127))+' ')
            f.write(str(int(v_t[-1]>127))+'\n')
        f.close()

    import scipy.io
    u = scipy.io.loadmat(os.path.join(dir_path,'mnist_corrupted_u.mat'))['dat']
    v = scipy.io.loadmat(os.path.join(dir_path,'mnist_corrupted_v.mat'))['dat']
    write_to_txt_file(u,v,os.path.join(dir_path,'corrupted_mnist_train.txt'))

    u = scipy.io.loadmat(os.path.join(dir_path,'mnist_corrupted_valid_u.mat'))['dat']
    v = scipy.io.loadmat(os.path.join(dir_path,'mnist_corrupted_valid_v.mat'))['dat']
    write_to_txt_file(u,v,os.path.join(dir_path,'corrupted_mnist_valid.txt'))

    u = scipy.io.loadmat(os.path.join(dir_path,'mnist_corrupted_test_u.mat'))['dat']
    v = scipy.io.loadmat(os.path.join(dir_path,'mnist_corrupted_test_v.mat'))['dat']
    write_to_txt_file(u,v,os.path.join(dir_path,'corrupted_mnist_test.txt'))

    print 'Done                     '
Example #39
0
def Get_Lesson(src, num):
    content = urllib2.urlopen(src).read()
    l_pattern = re.compile(r"http.*\.m3u8")
    lesson = re.findall(l_pattern, content)
    lesson_src = lesson[0].replace("m3u8", "mp4")
    urllib.urlretrieve(lesson_src, "Obama%s.mp4" % (num))
def download_links_from_page(page, extension="", output_dir="", ltf=False,
                             base_url=""):
    """ Downloads files from some html page. you can specify that you only want
        files with a particular extension, and where to save them.

        NOTE: Currently it does not support downloading of files that are listed
        as relative links.
    
    Args:
        page: (str)
            either a url, or the contents of an html page.
        extension:(str)
            extension of file you want to download, eg "pdf".
            If left blank, it will download ALL links on the page.
        output_dir:(str)
            what directory to save files to
        ltf: (bool)(default=False)
            Link Text as Filename

            If True Use the link text as the file name. Replacing any spaces with
            underscores.
    
            If false, it uses the original remote file name.
        base_url: (str)(default="")
            This is only used if `page` is not a url.
    
            If page is a string containing the HTML content, then it is a good idea
            to specify the base directory that the page come from. Without it, any
            relative file paths to files in links will fail to download.
    """
    # Creates the directory if it does not already exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Automatically detect if the content is a HTTP URL, if so, download the
    # html page content. otherwise treat 'page' as an html page.
    # Get the html content from the url specified
    url_was_provided = True if re.search("^(https://|http://)", page) else False
    if url_was_provided:
        html = urllib2.urlopen(page)
        base_url = os.path.split(page)[0]
    else:
        html = page

    soup = BeautifulSoup(html)

    # Returns all links
    if extension != "":
        links = soup.findAll('a',
                             attrs={'href': re.compile(".{}$".format(extension))})
    else:
        links = soup.findAll('a')

    # --------------------------------------------------------------------------
    #                                               DOWNLOAD EACH FILE AT A TIME
    # --------------------------------------------------------------------------
    unprocessed_links = []  # keep a list of files that could not be downloaded
    for link in links:
        link_url = link["href"]

        # Determine if it's an absolute path to a file  using http or https file
        full_url = True if re.search("^(https://|http://)", link_url) else False

        # Use base_url to convert to a full path if link_url is a relative path
        link_url = link_url if full_url else os.path.join(base_url, link_url)

        # ----------------------------------------------------------------------
        # Extract the link test to use as the file name, otherwise use the
        # remote filename
        # ----------------------------------------------------------------------
        if ltf:
            filename = (link.contents[0]).replace(" ", "_") + "." + extension
        else:
            filename = os.path.split(link_url)[1]

        # ----------------------------------------------------------------------
        #                                                      Download the file
        # ----------------------------------------------------------------------
        try:
            print("Downloading " + link_url)
            urllib.urlretrieve(link_url, os.path.join(output_dir, filename))
            print("--done")
        except:
            print("Could NOT download " + link_url)
            unprocessed_links.append(link_url)

    print("Done Downloading Files")
    if len(unprocessed_links) > 0:
        print("The following could NOT be downloaded" + "\n    ".join([""] + a))

# download_links_from_page(url, extension="jpg", output_dir="/tmp/mydir", ltf=False)
Example #41
0
opencommand = "gnome-open"
if sys.platform == 'darwin':
  opencommand = "open"

# download full PDF
pdfpath = os.path.join('db', idstr, 'paper.pdf')
urls = pub['FullVersionURL']
pdfurls = [u for u in urls if u.endswith('.pdf')]
gotit = False
print "All paper links:"
for u in urls: print u
for u in pdfurls:
  print "trying to retrieve: ", u
  try:
    urllib.urlretrieve(u, pdfpath)
    print "saved pdf at ", pdfpath
    try:
      print "opening the pdf using %s (%s) for your convenience to verify the download..." %(opencommand, sys.platform)
      os.system(opencommand + " " + pdfpath)
    except Error, e:
      print "%s failed. Make sure the downloaded %s pdf is correct." % (opencommand, pdfpath, )
    isok = raw_input("download good? y/n: ")
    if isok=="y":
      gotit = True
      break
  except Exception, e:
    print "ERROR retrieving: ", e

if not gotit:
  print "Couldn't get the paper pdf. Please download manually and save as %s." % (pdfpath, )
Example #42
0
Page = urllib2.urlopen(DataURL).read()
Links = re.findall('<a href=(.*?)>.*?</a>', Page)
fast5Links = []
for Link in Links:
    if "fast5" in Link:
        fast5Links.append(Link)
print "Totally %d fast5 files" % len(fast5Links)

####################################Download the files
fileNo = 100  #len(fast5Links) #Max number of files need to be download
TryMax = 5  #Max download attemps
Try = 0  #Current attempt times
fileCount = 1
for fileName in fast5Links[0:fileNo]:
    fileName = fileName[1:-1]  #Get rid of the double quotation
    Try = 0
    while Try < TryMax:
        try:
            urllib.urlretrieve(DataURL + fileName, FileSaveLoc + fileName)
            fileSize = os.path.getsize(FileSaveLoc + fileName)
            print "Download %s (%d/%d) %s" % (fileName, fileCount, fileNo,
                                              convert_bytes(fileSize))
            break
        except IOError:
            print "Download data files,try again (%d/%d)" % (Try, TryMax)
            Try += 1
        except:
            print "Unexpected Error when downloading(%d/%d):" % (
                fileCount, fileNo), sys.exc_info()[0]
            Try += 1
    fileCount += 1
Example #43
0
 def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
     print("Downloading %s" % filename)
     urlretrieve(source + filename, filename)
Example #44
0
    print "Downloaded %d of %d bytes (%3.1f%%)\r" \
        % (sizeSoFar, totalSize, 100.0*float(sizeSoFar)/totalSize),
    if sizeSoFar == totalSize:
        sys.stdout.write("\n")
    sys.stdout.flush()


for url, dirName in data.iteritems():
    (baseUrl, fileName) = url.rsplit('/', 1)
    target = os.path.join(installPath, dirName)
    if not os.path.exists(target):
        os.makedirs(target)
    target = os.path.join(target, fileName)
    if not os.path.exists(target):
        print url + " -> " + target
        urllib.urlretrieve(baseUrl + "/" + urllib.quote(fileName), target,
                           progress)

        if fileName.endswith(".tar.gz") or fileName.endswith(".tar.bz2") \
          or fileName.endswith(".tar"):
            raw = tarfile.open(target)
            for m in raw.getmembers():
                raw.extract(m, os.path.dirname(target))
            #os.unlink(target)
        elif fileName.endswith(".gz"):
            raw = gzip.open(target, 'rb').read()
            open(os.path.splitext(target)[0], 'wb').write(raw)
            #os.unlink(target)
        elif fileName.endswith(".zip"):
            raw = zipfile.ZipFile(target, 'r')
            raw.extractall(os.path.dirname(target))
            #os.unlink(target)
Example #45
0
def cache_distro(build):
    """Cache the OpenDaylight distribution to package as RPM/Deb.

    :param build: Description of an RPM build
    :type build: dict
    :return str distro_tar_path: Path to cached distribution tarball

    """
    # Specialize templates for the given build
    distro = distro_template.substitute(build)

    # Append file extensions to get ODL distro zip/tarball templates
    distro_tar = distro + ".tar.gz"
    distro_zip = distro + ".zip"

    # Prepend cache dir path to get template of full path to cached zip/tarball
    distro_tar_path = os.path.join(cache_dir, distro_tar)
    distro_zip_path = os.path.join(cache_dir, distro_zip)

    # Cache OpenDaylight tarball to be packaged
    if not os.path.isfile(distro_tar_path):
        if build["download_url"].endswith(".tar.gz"):
            print("Downloading: {}".format(build["download_url"]))
            urllib.urlretrieve(build["download_url"], distro_tar_path)
            print("Cached: {}".format(distro_tar))
        # If download_url points at a zip, repackage as a tarball
        elif build["download_url"].endswith(".zip"):
            if not os.path.isfile(distro_zip):
                print("URL is to a zip, will download and convert to tar.gz")
                print("Downloading: {}".format(build["download_url"]))
                urllib.urlretrieve(build["download_url"], distro_zip_path)
                print("Downloaded {}".format(distro_zip_path))
            else:
                print("Already cached: {}".format(distro_zip_path))
            # Extract zip archive
            # NB: zipfile.ZipFile.extractall doesn't preserve permissions
            # https://bugs.python.org/issue15795
            subprocess.call(["unzip", "-oq", distro_zip_path, "-d", cache_dir])
            # Get files in cache dir
            cache_dir_ls_all = glob.glob(os.path.join(cache_dir, "*"))
            # Remove pyc files that may be newer than just-extracted zip
            cache_dir_ls = filter(lambda f: '.pyc' not in f, cache_dir_ls_all)
            # Get the most recent file in cache dir, hopefully unzipped archive
            unzipped_distro_path = max(cache_dir_ls, key=os.path.getctime)
            print("Extracted: {}".format(unzipped_distro_path))
            # Remove path from unzipped distro filename, as will cd to dir below
            unzipped_distro = os.path.basename(unzipped_distro_path)
            # Using the full paths here creates those paths in the tarball, which
            # breaks the build. There's a way to change the working dir during a
            # single tar command using the system tar binary, but I don't see a
            # way to do that with Python.
            # TODO: Is there a good way to do this without changing directories?
            # TODO: Try https://goo.gl/XMx5gb
            cwd = os.getcwd()
            os.chdir(cache_dir)
            with tarfile.open(distro_tar, "w:gz") as tb:
                tb.add(unzipped_distro)
                print("Taring {} into {}".format(unzipped_distro, distro_tar))
            os.chdir(cwd)
            print("Cached: {}".format(distro_tar))
    else:
        print("Already cached: {}".format(distro_tar))

    return distro_tar_path
Example #46
0
import re
import requests
import json
import fileinput

#getting string in form of 

#getting real string input

lines = split('&', input)
realinput = split(' ', realstring)
autism_arr = []
real_arr = []
fingerprintsarr = []
for line in lines:
		fields = split('|', line)
		autism = fields[0]
		real = fields[2]
		fingerprints = int(split(',', real))
		autism_arr.append(autism)
		real_arr.append(real)
		fingerprintsarr.append(fingerprints);

for realstring in realinput:
	if realstring not in autism_arr:
		filename = realstring + ".wav"
		urlretrieve("http://api.voicerss.org/?key=04f49802d32d442ca997d4d2ea76d3d5"
	        "&hl=en-us&c=wav&src="+realstring, filename)
	    rate, data = wav.read(filename)
	
Example #47
0
def download(url, path=None):
    """Download a file over HTTP"""
    log.debug("Downloading %s.", url)
    name, headers = urllib.urlretrieve(url, path)
    log.debug("Downloaded to %s.", name)
    return name
Example #48
0
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
##

import os
import urllib2
import urllib

from BeautifulSoup import BeautifulSoup
from django.core.files import File

from montanha.models import PoliticalParty

for pp in PoliticalParty.objects.all():
    if not pp.wikipedia:
        continue
    req = urllib2.Request(pp.wikipedia, headers={"User-Agent": "Mozilla/5.0"})
    html = urllib2.urlopen(req)
    doc = BeautifulSoup(html)
    table = doc.find("table", {"class": "infobox_v2"})
    if table:
        img = table.find("a", {"class": "image"})
        if img:
            logo_url = img.find("img")["src"]
            if "http:" not in logo_url:
                logo_url = "http:%s" % logo_url
            print logo_url
            result = urllib.urlretrieve(logo_url)
            pp.logo.save(os.path.basename(logo_url), File(open(result[0])))
            pp.save()
    146087462 + 19233765
)  # 146,087,462 yellow taxi trips for January - June 2015
GreenCab_NYC_Proportion = 1 - YellowCab_NYC_Proportion  #  19,233,765 green taxi trips for 2015

Chicago_Proportion = 2720546 / (8550405 + 2720546)

# COMMAND ----------

# MAGIC %md ## Read in CSV file

# COMMAND ----------

import urllib

urllib.urlretrieve(
    "https://s3-us-west-2.amazonaws.com/nyctlc/yellow_tripdata_2015-01-06.csv.gz"
)

# COMMAND ----------

dbutils.fs.mv("file:/tmp/tmpy1fBlQ.gz",
              "dbfs:/tmp/sample_zip/yellow_tripdata_zip.csv.gz")

# COMMAND ----------

display(dbutils.fs.ls("dbfs:/tmp/sample_zip"))

# COMMAND ----------

Yellow2015DF = spark.read.load(
    "dbfs:/tmp/sample_zip/yellow_tripdata_zip.csv.gz",
Example #50
0
def load_umontreal_data(dataset):
    ''' Loads the dataset

    :type dataset: string
    :param dataset: the path to the dataset (here MNIST)
    '''

    #############
    # LOAD DATA #
    #############

    # Download the MNIST dataset if it is not present
    data_dir, data_file = os.path.split(dataset)
    if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
        import urllib
        origin = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
        print 'Downloading data from %s' % origin
        urllib.urlretrieve(origin, dataset)

    print '... loading data'

    # Load the dataset
    f = gzip.open(dataset, 'rb')
    train_set, valid_set, test_set = cPickle.load(f)
    f.close()

    #train_set, valid_set, test_set format: tuple(input, target)
    #input is an np.ndarray of 2 dimensions (a matrix)
    #witch row's correspond to an example. target is a
    #np.ndarray of 1 dimensions (vector)) that have the same length as
    #the number of rows in the input. It should give the target
    #target to the example with the same index in the input.

    def _shared_dataset(data_xy):
        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(np.asarray(data_x,
                                            dtype=theano.config.floatX))
        shared_y = theano.shared(np.asarray(data_y,
                                            dtype=theano.config.floatX))
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, 'int32')

    test_set_x, test_set_y = _shared_dataset(test_set)
    valid_set_x, valid_set_y = _shared_dataset(valid_set)
    train_set_x, train_set_y = _shared_dataset(train_set)

    rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
            (test_set_x, test_set_y)]
    return rval
Example #51
0
    def FromFiles(self):
        self.name2cid_map = {}
        self.uid2compound_map = {}
        self.uid2reaction_map = {}
        self.uid2pathway_map = {}
        self.enzrxn2regulation_map = {}

        logging.info("Retrieving COMPOUNDS file and parsing it")
        if (not os.path.exists(self.COMPOUND_FILE)):
            urllib.urlretrieve(self.TAR_URL, self.TAR_FILE)
            os.chdir(self.base_dir)
            os.system('tar xvfz ' + self.org + '.tar.gz')   

        entry2fields_map = parse_metacyc_file(self.COMPOUND_FILE)
        
        for uid in sorted(entry2fields_map.keys()):
            field_map = entry2fields_map[uid]
            
            comp = Compound(uid)
            
            if ("COMMON-NAME" in field_map):
                comp.name = re.sub('<.+?>', '', field_map["COMMON-NAME"].strip())
            if ("SYNONYMS" in field_map):
                all_names = field_map["SYNONYMS"].split('\t')
                for name in all_names:
                    name = re.sub('<.+?>', '', name.strip())
                    self.name2cid_map[name] = uid
                comp.all_names = all_names
            if ("MOLECULAR-WEIGHT" in field_map):
                comp.mass = float(field_map["MOLECULAR-WEIGHT"])
            if ("CHEMICAL-FORMULA" in field_map):    
                comp.formula = field_map["CHEMICAL-FORMULA"]
            if ("INCHI" in field_map):    
                comp.inchi = field_map["INCHI"]
            if ("SMILES" in field_map):    
                comp.smiles = field_map["SMILES"]
            if ("DBLINKS" in field_map):
                for sid in re.findall("PUBCHEM \"(\d+)\"", field_map["DBLINKS"]):
                    comp.pubchem_id = int(sid)
                for cas in re.findall("CAS \"([\d\-]+)\"", field_map["DBLINKS"]):
                    comp.cas = cas
            if ("REGULATES" in field_map):
                comp.regulates = field_map["REGULATES"].split('\t')
            if ("TYPES" in field_map):
                comp.types = field_map["TYPES"].split('\t')
            
            self.uid2compound_map[uid] = comp

        logging.info("Retrieving REGULATION file and parsing it")
        if (not os.path.exists(self.REGULATION_FILE)):
            urllib.urlretrieve(self.TAR_URL, self.TAR_FILE)
            os.chdir(self.base_dir)
            os.system('tar xvfz ' + self.org + '.tar.gz')   

        entry2fields_map = parse_metacyc_file(self.REGULATION_FILE)
        
        for uid in sorted(entry2fields_map.keys()):
            field_map = entry2fields_map[uid]
            
            reg = Regulation(uid)
            
            if ("MODE" in field_map):    
                reg.mode = field_map["MODE"]
            if ("REGULATED-ENTITY" in field_map):    
                reg.regulated = field_map["REGULATED-ENTITY"]
            if ("REGULATOR" in field_map):    
                reg.regulator = field_map["REGULATOR"]
            
            if reg.regulated != None:
                self.enzrxn2regulation_map[reg.regulated] = reg
        
        logging.info("Retrieving REACTIONS file and parsing it")
        if (not os.path.exists(self.REACTION_FILE)):
            urllib.urlretrieve(self.TAR_URL, self.TAR_FILE)
            os.chdir(self.base_dir)
            os.system('tar xvfz ' + self.org + '.tar.gz')   

        entry2fields_map = parse_rxns_metacyc_file(self.REACTION_FILE)
        
        for uid in sorted(entry2fields_map.keys()):
            field_map = entry2fields_map[uid]
            
            direction = '<=>'
            if ("REACTION-DIRECTION" in field_map):
                if (re.search('LEFT-TO-RIGHT', field_map['REACTION-DIRECTION'])):
                    direction = '=>'
                elif (re.search('RIGHT-TO-LEFT', field_map['REACTION-DIRECTION'])):
                    direction = '<='

            rxn = Reaction(uid, sparse_reaction=field_map['SPARSE'], direction=direction)

            if ("COMMON-NAME" in field_map):
                rxn.name = field_map["COMMON-NAME"].strip()
            if ("TYPES" in field_map):
                rxn.types = field_map["TYPES"].split('\t')
            if ("EC-NUMBER" in field_map):    
                rxn.ec_number = field_map["EC-NUMBER"]
            if ("ENZYMATIC-REACTION" in field_map):
                rxn.enzrxns_list = field_map["ENZYMATIC-REACTION"].split('\t')
            
            self.uid2reaction_map[uid] = rxn

        os.remove('../res/metacyc_pathways_stats.txt')
        logging.info("Retrieving PATHWAYS file and parsing it")
        if (not os.path.exists(self.PATHWAY_FILE)):
            urllib.urlretrieve(self.TAR_URL, self.TAR_FILE)
            os.chdir(self.base_dir)
            os.system('tar xvfz ' + self.org + '.tar.gz')   

        entry2fields_map = parse_metacyc_file(self.PATHWAY_FILE)
        
        n_super = 0
        n_rxns_dict_prob = 0
        rxn_parse_error = 0
        dup_rxn_layout = 0
        unknown_dir = 0
        no_pred_info = 0
        no_start = 0
        mul_start = 0
        
        for uid in sorted(entry2fields_map.keys()):
            field_map = entry2fields_map[uid]
            rxn_direction_map = {}
            if ('Super-Pathways' in field_map['TYPES']):
                n_super += 1
                continue
            
            pw = Pathway(uid)

            if ("COMMON-NAME" in field_map):
                pw.name = field_map["COMMON-NAME"].strip()
            if ("TYPES" in field_map):
                pw.types = field_map["TYPES"].split('\t')
            if ("PREDECESSORS" in field_map):    
                pw.preds = field_map["PREDECESSORS"].split('\t')
                try:
                    pw.UpdateRxnsDict()
                    if pw.preds == None:
                        no_pred_info += 1
                except MetaCycPathwayWithoutStartException, e:
                    no_start += 1
                    logging.debug(str(e))
                    continue
                except MetaCycManyPathwayStartException, e:
                    mul_start += 1
                    logging.debug(str(e))
                    continue
Example #52
0
        print(
            'Alternatively, you can upload the dac_sample.tar.gz file to your Jupyter root '
            + 'directory')
        return False


if os.path.isfile(fileName):
    print 'File is already available. Nothing to do.'
elif extractTar(check=True):
    print 'tar.gz file was already available.'
elif not url.endswith('dac_sample.tar.gz'):
    print 'Check your download url.  Are you downloading the Sample dataset?'
else:
    # Download the file and store it in the same directory as this notebook
    try:
        urllib.urlretrieve(url, os.path.basename(urlparse.urlsplit(url).path))
    except IOError:
        print 'Unable to download and store: {0}'.format(url)

    extractTar()

# In[69]:

import os.path
baseDir = os.path.join('data')
inputPath = os.path.join('cs190', 'dac_sample.txt')
fileName = os.path.join(baseDir, inputPath)

if os.path.isfile(fileName):
    rawData = (sc.textFile(fileName, 2).map(lambda x: x.replace('\t', ','))
               )  # work with either ',' or '\t' separated data
Example #53
0
def download_mtg_json():
    urllib.urlretrieve("http://www.mtgjson.com/json/AllSets-x.json.zip", "AllSets-x.json.zip")
    zip_ref = zipfile.ZipFile("AllSets-x.json.zip", 'r')
    zip_ref.extractall()
    zip_ref.close()
    os.remove("AllSets-x.json.zip")
Example #54
0
    def build(self, release=False, dev=False, jobs=None, params=None,
              no_package=False, verbose=False, very_verbose=False,
              target=None, android=False, magicleap=False, libsimpleservo=False, uwp=False,
              features=None, win_arm64=False, **kwargs):
        opts = params or []
        features = features or []
        target, android = self.pick_target_triple(target, android, magicleap)

        target_path = base_path = self.get_target_dir()
        if android:
            target_path = path.join(target_path, "android")
            base_path = path.join(target_path, target)
        elif magicleap:
            target_path = path.join(target_path, "magicleap")
            base_path = path.join(target_path, target)
        release_path = path.join(base_path, "release", "servo")
        dev_path = path.join(base_path, "debug", "servo")

        release_exists = path.exists(release_path)
        dev_exists = path.exists(dev_path)

        if not (release or dev):
            if self.config["build"]["mode"] == "dev":
                dev = True
            elif self.config["build"]["mode"] == "release":
                release = True
            elif release_exists and not dev_exists:
                release = True
            elif dev_exists and not release_exists:
                dev = True
            else:
                print("Please specify either --dev (-d) for a development")
                print("  build, or --release (-r) for an optimized build.")
                sys.exit(1)

        if release and dev:
            print("Please specify either --dev or --release.")
            sys.exit(1)

        if release:
            opts += ["--release"]
            servo_path = release_path
        else:
            servo_path = dev_path

        if jobs is not None:
            opts += ["-j", jobs]
        if verbose:
            opts += ["-v"]
        if very_verbose:
            opts += ["-vv"]

        if win_arm64:
            if target:
                print("Can't specify explicit --target value with --win-arm64.")
                sys.exit(1)
            target = "aarch64-pc-windows-msvc"

        if target:
            if self.config["tools"]["use-rustup"]:
                # 'rustup target add' fails if the toolchain is not installed at all.
                self.call_rustup_run(["rustc", "--version"])

                check_call(["rustup" + BIN_SUFFIX, "target", "add",
                            "--toolchain", self.toolchain(), target])

        env = self.build_env(target=target, is_build=True)
        self.ensure_bootstrapped(target=target)
        self.ensure_clobbered()

        build_start = time()
        env["CARGO_TARGET_DIR"] = target_path

        host = host_triple()
        target_triple = target or host_triple()
        if 'apple-darwin' in host and target_triple == host:
            if 'CXXFLAGS' not in env:
                env['CXXFLAGS'] = ''
            env["CXXFLAGS"] += "-mmacosx-version-min=10.10"

        if 'windows' in host:
            vs_dirs = self.vs_dirs()

        if host != target_triple and 'windows' in target_triple:
            if os.environ.get('VisualStudioVersion'):
                print("Can't cross-compile for Windows inside of a Visual Studio shell.\n"
                      "Please run `python mach build [arguments]` to bypass automatic "
                      "Visual Studio shell.")
                sys.exit(1)
            vcinstalldir = vs_dirs['vcdir']
            if not os.path.exists(vcinstalldir):
                print("Can't find Visual C++ %s installation at %s." % (vs_dirs['vs_version'], vcinstalldir))
                sys.exit(1)

            env['PKG_CONFIG_ALLOW_CROSS'] = "1"

        if uwp:
            # Don't try and build a desktop port.
            libsimpleservo = True

            arches = {
                "aarch64": {
                    "angle": "arm64",
                    "gst": "ARM64",
                    "gst_root": "arm64",
                },
                "x86_64": {
                    "angle": "x64",
                    "gst": "X86_64",
                    "gst_root": "x64",
                },
            }
            arch = arches.get(target_triple.split('-')[0])
            if not arch:
                print("Unsupported UWP target.")
                sys.exit(1)

            # Ensure that the NuGet ANGLE package containing libEGL is accessible
            # to the Rust linker.
            append_to_path_env(angle_root(target_triple, env), env, "LIB")

            # Don't want to mix non-UWP libraries with vendored UWP libraries.
            if "gstreamer" in env['LIB']:
                print("Found existing GStreamer library path in LIB. Please remove it.")
                sys.exit(1)

            # Override any existing GStreamer installation with the vendored libraries.
            env["GSTREAMER_1_0_ROOT_" + arch['gst']] = path.join(
                self.msvc_package_dir("gstreamer-uwp"), arch['gst_root']
            )

        # Ensure that GStreamer libraries are accessible when linking.
        if 'windows' in target_triple:
            gst_root = gstreamer_root(target_triple, env)
            if gst_root:
                append_to_path_env(os.path.join(gst_root, "lib"), env, "LIB")

        if android:
            if "ANDROID_NDK" not in env:
                print("Please set the ANDROID_NDK environment variable.")
                sys.exit(1)
            if "ANDROID_SDK" not in env:
                print("Please set the ANDROID_SDK environment variable.")
                sys.exit(1)

            android_platform = self.config["android"]["platform"]
            android_toolchain_name = self.config["android"]["toolchain_name"]
            android_toolchain_prefix = self.config["android"]["toolchain_prefix"]
            android_lib = self.config["android"]["lib"]
            android_arch = self.config["android"]["arch"]

            # Build OpenSSL for android
            env["OPENSSL_VERSION"] = "1.0.2k"
            make_cmd = ["make"]
            if jobs is not None:
                make_cmd += ["-j" + jobs]
            openssl_dir = path.join(target_path, target, "native", "openssl")
            if not path.exists(openssl_dir):
                os.makedirs(openssl_dir)
            shutil.copy(path.join(self.android_support_dir(), "openssl.makefile"), openssl_dir)
            shutil.copy(path.join(self.android_support_dir(), "openssl.sh"), openssl_dir)

            # Check if the NDK version is 15
            if not os.path.isfile(path.join(env["ANDROID_NDK"], 'source.properties')):
                print("ANDROID_NDK should have file `source.properties`.")
                print("The environment variable ANDROID_NDK may be set at a wrong path.")
                sys.exit(1)
            with open(path.join(env["ANDROID_NDK"], 'source.properties')) as ndk_properties:
                lines = ndk_properties.readlines()
                if lines[1].split(' = ')[1].split('.')[0] != '15':
                    print("Currently only support NDK 15. Please re-run `./mach bootstrap-android`.")
                    sys.exit(1)

            env["RUST_TARGET"] = target
            with cd(openssl_dir):
                status = call(
                    make_cmd + ["-f", "openssl.makefile"],
                    env=env,
                    verbose=verbose)
                if status:
                    return status
            openssl_dir = path.join(openssl_dir, "openssl-{}".format(env["OPENSSL_VERSION"]))
            env['OPENSSL_LIB_DIR'] = openssl_dir
            env['OPENSSL_INCLUDE_DIR'] = path.join(openssl_dir, "include")
            env['OPENSSL_STATIC'] = 'TRUE'
            # Android builds also require having the gcc bits on the PATH and various INCLUDE
            # path munging if you do not want to install a standalone NDK. See:
            # https://dxr.mozilla.org/mozilla-central/source/build/autoconf/android.m4#139-161
            os_type = platform.system().lower()
            if os_type not in ["linux", "darwin"]:
                raise Exception("Android cross builds are only supported on Linux and macOS.")
            cpu_type = platform.machine().lower()
            host_suffix = "unknown"
            if cpu_type in ["i386", "i486", "i686", "i768", "x86"]:
                host_suffix = "x86"
            elif cpu_type in ["x86_64", "x86-64", "x64", "amd64"]:
                host_suffix = "x86_64"
            host = os_type + "-" + host_suffix

            host_cc = env.get('HOST_CC') or _get_exec_path(["clang"]) or _get_exec_path(["gcc"])
            host_cxx = env.get('HOST_CXX') or _get_exec_path(["clang++"]) or _get_exec_path(["g++"])

            llvm_toolchain = path.join(env['ANDROID_NDK'], "toolchains", "llvm", "prebuilt", host)
            gcc_toolchain = path.join(env['ANDROID_NDK'], "toolchains",
                                      android_toolchain_prefix + "-4.9", "prebuilt", host)
            gcc_libs = path.join(gcc_toolchain, "lib", "gcc", android_toolchain_name, "4.9.x")

            env['PATH'] = (path.join(llvm_toolchain, "bin") + ':' + env['PATH'])
            env['ANDROID_SYSROOT'] = path.join(env['ANDROID_NDK'], "sysroot")
            support_include = path.join(env['ANDROID_NDK'], "sources", "android", "support", "include")
            cpufeatures_include = path.join(env['ANDROID_NDK'], "sources", "android", "cpufeatures")
            cxx_include = path.join(env['ANDROID_NDK'], "sources", "cxx-stl",
                                    "llvm-libc++", "include")
            clang_include = path.join(llvm_toolchain, "lib64", "clang", "3.8", "include")
            cxxabi_include = path.join(env['ANDROID_NDK'], "sources", "cxx-stl",
                                       "llvm-libc++abi", "include")
            sysroot_include = path.join(env['ANDROID_SYSROOT'], "usr", "include")
            arch_include = path.join(sysroot_include, android_toolchain_name)
            android_platform_dir = path.join(env['ANDROID_NDK'], "platforms", android_platform, "arch-" + android_arch)
            arch_libs = path.join(android_platform_dir, "usr", "lib")
            clang_include = path.join(llvm_toolchain, "lib64", "clang", "5.0", "include")
            android_api = android_platform.replace('android-', '')
            env['HOST_CC'] = host_cc
            env['HOST_CXX'] = host_cxx
            env['HOST_CFLAGS'] = ''
            env['HOST_CXXFLAGS'] = ''
            env['CC'] = path.join(llvm_toolchain, "bin", "clang")
            env['CPP'] = path.join(llvm_toolchain, "bin", "clang") + " -E"
            env['CXX'] = path.join(llvm_toolchain, "bin", "clang++")
            env['ANDROID_TOOLCHAIN'] = gcc_toolchain
            env['ANDROID_TOOLCHAIN_DIR'] = gcc_toolchain
            env['ANDROID_VERSION'] = android_api
            env['ANDROID_PLATFORM_DIR'] = android_platform_dir
            env['GCC_TOOLCHAIN'] = gcc_toolchain
            gcc_toolchain_bin = path.join(gcc_toolchain, android_toolchain_name, "bin")
            env['AR'] = path.join(gcc_toolchain_bin, "ar")
            env['RANLIB'] = path.join(gcc_toolchain_bin, "ranlib")
            env['OBJCOPY'] = path.join(gcc_toolchain_bin, "objcopy")
            env['YASM'] = path.join(env['ANDROID_NDK'], 'prebuilt', host, 'bin', 'yasm')
            # A cheat-sheet for some of the build errors caused by getting the search path wrong...
            #
            # fatal error: 'limits' file not found
            #   -- add -I cxx_include
            # unknown type name '__locale_t' (when running bindgen in mozjs_sys)
            #   -- add -isystem sysroot_include
            # error: use of undeclared identifier 'UINTMAX_C'
            #   -- add -D__STDC_CONSTANT_MACROS
            #
            # Also worth remembering: autoconf uses C for its configuration,
            # even for C++ builds, so the C flags need to line up with the C++ flags.
            env['CFLAGS'] = ' '.join([
                "--target=" + target,
                "--sysroot=" + env['ANDROID_SYSROOT'],
                "--gcc-toolchain=" + gcc_toolchain,
                "-isystem", sysroot_include,
                "-I" + arch_include,
                "-B" + arch_libs,
                "-L" + arch_libs,
                "-D__ANDROID_API__=" + android_api,
            ])
            env['CXXFLAGS'] = ' '.join([
                "--target=" + target,
                "--sysroot=" + env['ANDROID_SYSROOT'],
                "--gcc-toolchain=" + gcc_toolchain,
                "-I" + cpufeatures_include,
                "-I" + cxx_include,
                "-I" + clang_include,
                "-isystem", sysroot_include,
                "-I" + cxxabi_include,
                "-I" + clang_include,
                "-I" + arch_include,
                "-I" + support_include,
                "-L" + gcc_libs,
                "-B" + arch_libs,
                "-L" + arch_libs,
                "-D__ANDROID_API__=" + android_api,
                "-D__STDC_CONSTANT_MACROS",
                "-D__NDK_FPABI__=",
            ])
            env['CPPFLAGS'] = ' '.join([
                "--target=" + target,
                "--sysroot=" + env['ANDROID_SYSROOT'],
                "-I" + arch_include,
            ])
            env["NDK_ANDROID_VERSION"] = android_api
            env["ANDROID_ABI"] = android_lib
            env["ANDROID_PLATFORM"] = android_platform
            env["NDK_CMAKE_TOOLCHAIN_FILE"] = path.join(env['ANDROID_NDK'], "build", "cmake", "android.toolchain.cmake")
            env["CMAKE_TOOLCHAIN_FILE"] = path.join(self.android_support_dir(), "toolchain.cmake")
            # Set output dir for gradle aar files
            aar_out_dir = self.android_aar_dir()
            if not os.path.exists(aar_out_dir):
                os.makedirs(aar_out_dir)
            env["AAR_OUT_DIR"] = aar_out_dir
            # GStreamer and its dependencies use pkg-config and this flag is required
            # to make it work in a cross-compilation context.
            env["PKG_CONFIG_ALLOW_CROSS"] = '1'
            # Build the name of the package containing all GStreamer dependencies
            # according to the build target.
            gst_lib = "gst-build-{}".format(self.config["android"]["lib"])
            gst_lib_zip = "gstreamer-{}-1.16.0-20190517-095630.zip".format(self.config["android"]["lib"])
            gst_dir = os.path.join(target_path, "gstreamer")
            gst_lib_path = os.path.join(gst_dir, gst_lib)
            pkg_config_path = os.path.join(gst_lib_path, "pkgconfig")
            env["PKG_CONFIG_PATH"] = pkg_config_path
            if not os.path.exists(gst_lib_path):
                # Download GStreamer dependencies if they have not already been downloaded
                # This bundle is generated with `libgstreamer_android_gen`
                # Follow these instructions to build and deploy new binaries
                # https://github.com/servo/libgstreamer_android_gen#build
                print("Downloading GStreamer dependencies")
                gst_url = "https://servo-deps.s3.amazonaws.com/gstreamer/%s" % gst_lib_zip
                print(gst_url)
                urllib.urlretrieve(gst_url, gst_lib_zip)
                zip_ref = zipfile.ZipFile(gst_lib_zip, "r")
                zip_ref.extractall(gst_dir)
                os.remove(gst_lib_zip)

                # Change pkgconfig info to make all GStreamer dependencies point
                # to the libgstreamer_android.so bundle.
                for each in os.listdir(pkg_config_path):
                    if each.endswith('.pc'):
                        print("Setting pkgconfig info for %s" % each)
                        pc = os.path.join(pkg_config_path, each)
                        expr = "s#libdir=.*#libdir=%s#g" % gst_lib_path
                        subprocess.call(["perl", "-i", "-pe", expr, pc])

        if magicleap:
            if platform.system() not in ["Darwin"]:
                raise Exception("Magic Leap builds are only supported on macOS. "
                                "If you only wish to test if your code builds, "
                                "run ./mach build -p libmlservo.")

            ml_sdk = env.get("MAGICLEAP_SDK")
            if not ml_sdk:
                raise Exception("Magic Leap builds need the MAGICLEAP_SDK environment variable")
            if not os.path.exists(ml_sdk):
                raise Exception("Path specified by MAGICLEAP_SDK does not exist.")

            ml_support = path.join(self.get_top_dir(), "support", "magicleap")

            # We pretend to be an Android build
            env.setdefault("ANDROID_VERSION", "21")
            env.setdefault("ANDROID_NDK", env["MAGICLEAP_SDK"])
            env.setdefault("ANDROID_NDK_VERSION", "16.0.0")
            env.setdefault("ANDROID_PLATFORM_DIR", path.join(env["MAGICLEAP_SDK"], "lumin"))
            env.setdefault("ANDROID_TOOLCHAIN_DIR", path.join(env["MAGICLEAP_SDK"], "tools", "toolchains"))
            env.setdefault("ANDROID_CLANG", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "clang"))

            # A random collection of search paths
            env.setdefault("STLPORT_LIBS", " ".join([
                "-L" + path.join(env["MAGICLEAP_SDK"], "lumin", "stl", "libc++-lumin", "lib"),
                "-lc++"
            ]))
            env.setdefault("STLPORT_CPPFLAGS", " ".join([
                "-I" + path.join(env["MAGICLEAP_SDK"], "lumin", "stl", "libc++-lumin", "include")
            ]))
            env.setdefault("CPPFLAGS", " ".join([
                "--no-standard-includes",
                "--sysroot=" + env["ANDROID_PLATFORM_DIR"],
                "-I" + path.join(env["ANDROID_PLATFORM_DIR"], "usr", "include"),
                "-isystem" + path.join(env["ANDROID_TOOLCHAIN_DIR"], "lib64", "clang", "3.8", "include"),
            ]))
            env.setdefault("CFLAGS", " ".join([
                env["CPPFLAGS"],
                "-L" + path.join(env["ANDROID_TOOLCHAIN_DIR"], "lib", "gcc", target, "4.9.x"),
            ]))
            env.setdefault("CXXFLAGS", " ".join([
                # Sigh, Angle gets confused if there's another EGL around
                "-I./gfx/angle/checkout/include",
                env["STLPORT_CPPFLAGS"],
                env["CFLAGS"]
            ]))

            # The toolchain commands
            env.setdefault("AR", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-ar"))
            env.setdefault("AS", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-clang"))
            env.setdefault("CC", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-clang"))
            env.setdefault("CPP", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-clang -E"))
            env.setdefault("CXX", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-clang++"))
            env.setdefault("LD", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-ld"))
            env.setdefault("OBJCOPY", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-objcopy"))
            env.setdefault("OBJDUMP", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-objdump"))
            env.setdefault("RANLIB", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-ranlib"))
            env.setdefault("STRIP", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-strip"))

            # Undo all of that when compiling build tools for the host
            env.setdefault("HOST_CFLAGS", "")
            env.setdefault("HOST_CXXFLAGS", "")
            env.setdefault("HOST_CC", "/usr/local/opt/llvm/bin/clang")
            env.setdefault("HOST_CXX", "/usr/local/opt/llvm/bin/clang++")
            env.setdefault("HOST_LD", "ld")

            # Some random build configurations
            env.setdefault("HARFBUZZ_SYS_NO_PKG_CONFIG", "1")
            env.setdefault("PKG_CONFIG_ALLOW_CROSS", "1")
            env.setdefault("CMAKE_TOOLCHAIN_FILE", path.join(ml_support, "toolchain.cmake"))
            env.setdefault("_LIBCPP_INLINE_VISIBILITY", "__attribute__((__always_inline__))")

            # The Open SSL configuration
            env.setdefault("OPENSSL_DIR", path.join(target_path, target, "native", "openssl"))
            env.setdefault("OPENSSL_VERSION", "1.0.2k")
            env.setdefault("OPENSSL_STATIC", "1")

            # GStreamer configuration
            env.setdefault("GSTREAMER_DIR", path.join(target_path, target, "native", "gstreamer-1.16.0"))
            env.setdefault("GSTREAMER_URL", "https://servo-deps.s3.amazonaws.com/gstreamer/gstreamer-magicleap-1.16.0-20190823-104505.tgz")
            env.setdefault("PKG_CONFIG_PATH", path.join(env["GSTREAMER_DIR"], "system", "lib64", "pkgconfig"))

            # Override the linker set in .cargo/config
            env.setdefault("CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER", path.join(ml_support, "fake-ld.sh"))

            # Only build libmlservo
            opts += ["--package", "libmlservo"]

            # Download and build OpenSSL if necessary
            status = call(path.join(ml_support, "openssl.sh"), env=env, verbose=verbose)
            if status:
                return status

            # Download prebuilt Gstreamer if necessary
            if not os.path.exists(path.join(env["GSTREAMER_DIR"], "system")):
                if not os.path.exists(env["GSTREAMER_DIR"] + ".tgz"):
                    check_call([
                        'curl',
                        '-L',
                        '-f',
                        '-o', env["GSTREAMER_DIR"] + ".tgz",
                        env["GSTREAMER_URL"],
                    ])
                check_call([
                    'mkdir',
                    '-p',
                    env["GSTREAMER_DIR"],
                ])
                check_call([
                    'tar',
                    'xzf',
                    env["GSTREAMER_DIR"] + ".tgz",
                    '-C', env["GSTREAMER_DIR"],
                ])

        if very_verbose:
            print (["Calling", "cargo", "build"] + opts)
            for key in env:
                print((key, env[key]))

        if sys.platform == "win32":
            env.setdefault("CC", "clang-cl.exe")
            env.setdefault("CXX", "clang-cl.exe")
            if uwp:
                env.setdefault("CFLAGS", "")
                env.setdefault("CXXFLAGS", "")
                env["CFLAGS"] += " -DWINAPI_FAMILY=WINAPI_FAMILY_APP"
                env["CXXFLAGS"] += " -DWINAPI_FAMILY=WINAPI_FAMILY_APP"
        else:
            env.setdefault("CC", "clang")
            env.setdefault("CXX", "clang++")

        status = self.run_cargo_build_like_command(
            "build", opts, env=env, verbose=verbose,
            target=target, android=android, magicleap=magicleap, libsimpleservo=libsimpleservo, uwp=uwp,
            features=features, **kwargs
        )

        elapsed = time() - build_start

        # Do some additional things if the build succeeded
        if status == 0:
            if android and not no_package:
                flavor = None
                if "googlevr" in features:
                    flavor = "googlevr"
                elif "oculusvr" in features:
                    flavor = "oculusvr"
                rv = Registrar.dispatch("package", context=self.context,
                                        release=release, dev=dev, target=target, flavor=flavor)
                if rv:
                    return rv

            if sys.platform == "win32":
                servo_exe_dir = os.path.dirname(
                    self.get_binary_path(release, dev, target=target, simpleservo=libsimpleservo)
                )
                assert os.path.exists(servo_exe_dir)

                # on msvc builds, use editbin to change the subsystem to windows, but only
                # on release builds -- on debug builds, it hides log output
                if not dev and not libsimpleservo:
                    call(["editbin", "/nologo", "/subsystem:windows", path.join(servo_exe_dir, "servo.exe")],
                         verbose=verbose)
                # on msvc, we need to copy in some DLLs in to the servo.exe dir
                for ssl_lib in ["libssl.dll", "libcrypto.dll"]:
                    shutil.copy(path.join(env['OPENSSL_LIB_DIR'], "../bin", ssl_lib),
                                servo_exe_dir)
                # Search for the generated nspr4.dll
                build_path = path.join(servo_exe_dir, "build")
                assert os.path.exists(build_path)

                def package_generated_shared_libraries(libs, build_path, servo_exe_dir):
                    for root, dirs, files in os.walk(build_path):
                        remaining_libs = list(libs)
                        for lib in libs:
                            if lib in files:
                                shutil.copy(path.join(root, lib), servo_exe_dir)
                                remaining_libs.remove(lib)
                                continue
                        libs = remaining_libs
                        if not libs:
                            return
                    for lib in libs:
                        print("WARNING: could not find " + lib)

                # UWP build has its own ANGLE library that it packages.
                if not uwp:
                    package_generated_shared_libraries(["libEGL.dll", "libGLESv2.dll"], build_path, servo_exe_dir)

                # copy needed gstreamer DLLs in to servo.exe dir
                print("Packaging gstreamer DLLs")
                if not package_gstreamer_dlls(env, servo_exe_dir, target_triple, uwp):
                    status = 1

                # UWP app packaging already bundles all required DLLs for us.
                print("Packaging MSVC DLLs")
                if not package_msvc_dlls(servo_exe_dir, target_triple, vs_dirs['vcdir'], vs_dirs['vs_version']):
                    status = 1

            elif sys.platform == "darwin":
                # On the Mac, set a lovely icon. This makes it easier to pick out the Servo binary in tools
                # like Instruments.app.
                try:
                    import Cocoa
                    icon_path = path.join(self.get_top_dir(), "resources", "servo.png")
                    icon = Cocoa.NSImage.alloc().initWithContentsOfFile_(icon_path)
                    if icon is not None:
                        Cocoa.NSWorkspace.sharedWorkspace().setIcon_forFile_options_(icon,
                                                                                     servo_path,
                                                                                     0)
                except ImportError:
                    pass

        # Generate Desktop Notification if elapsed-time > some threshold value
        notify_build_done(self.config, elapsed, status == 0)

        print("Build %s in %s" % ("Completed" if status == 0 else "FAILED", format_duration(elapsed)))
        return status
# Return the JSON triples
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

# Open the file for writing urls (this is for image magick)
listImages = open('bmimagesResized/files.txt', 'w')

# Iterate over the results
for result in results["results"]["bindings"]:
    image = result["image"]["value"]
    if os.path.isfile(os.path.join('bmimages', os.path.basename(image))):
        print "File already exists"
    else:
        path = os.path.join('bmimages', os.path.basename(image))
        urllib.urlretrieve(image, path)
        print "Image " + os.path.basename(image) + " downloaded"

for file in os.listdir('bmimages'):
    if not file.startswith('.'):
        listImages.write(
            os.path.join("bmimagesResized", os.path.basename(file)) + "\n")

# Iterate through files and crop as required
for file in os.listdir('bmimages'):
    # Make sure file is not a hidden one etc
    if not file.startswith('.') and os.path.isfile(
            os.path.join('bmimages', file)):
        # Open the file checking if it is valid or not. It fails otherwise :-(
        try:
            if not os.path.exists(os.path.join('bmimagesResized', file)):
def download_images(urls):
    global downloaded
    for url in urls:
        urllib.urlretrieve (url, "../../datasets/Delhi_imgs/" + url[36:42] + '_' + url[45:51] + ".jpg")
Example #57
0
    def size_for_node(self, node, client):
        '''Given a docutils image node, returns the size the image should have
        in the PDF document, and what 'kind' of size that is.
        That involves lots of guesswork'''

        uri = str(node.get("uri"))
        if uri.split("://")[0].lower() not in ('http', 'ftp', 'https'):
            uri = os.path.join(client.basedir, uri)
        else:
            uri, _ = urllib.urlretrieve(uri)
            client.to_unlink.append(uri)

        srcinfo = client, uri
        # Extract all the information from the URI
        imgname, extension, options = self.split_uri(uri)

        if not os.path.isfile(imgname):
            imgname = missing

        scale = float(node.get('scale', 100)) / 100
        size_known = False

        # Figuring out the size to display of an image is ... annoying.
        # If the user provides a size with a unit, it's simple, adjustUnits
        # will return it in points and we're done.

        # However, often the unit wil be "%" (specially if it's meant for
        # HTML originally. In which case, we will use a percentage of
        # the containing frame.

        # Find the image size in pixels:
        kind = 'direct'
        xdpi, ydpi = client.styles.def_dpi, client.styles.def_dpi
        extension = imgname.split('.')[-1].lower()
        if extension in ['svg', 'svgz']:
            iw, ih = SVGImage(imgname, srcinfo=srcinfo).wrap(0, 0)
            # These are in pt, so convert to px
            iw = iw * xdpi / 72
            ih = ih * ydpi / 72

        elif extension == 'pdf':
            if VectorPdf is not None:
                xobj = VectorPdf.load_xobj(srcinfo)
                iw, ih = xobj.w, xobj.h
            else:
                reader = pdfinfo.PdfFileReader(open(imgname, 'rb'))
                box = [float(x) for x in reader.getPage(0)['/MediaBox']]
                iw, ih = x2 - x1, y2 - y1
            # These are in pt, so convert to px
            iw = iw * xdpi / 72.0
            ih = ih * ydpi / 72.0
            size_known = True  # Assume size from original PDF is OK

        else:
            keeptrying = True
            if PILImage:
                try:
                    img = PILImage.open(imgname)
                    img.load()
                    iw, ih = img.size
                    xdpi, ydpi = img.info.get('dpi', (xdpi, ydpi))
                    keeptrying = False
                except IOError:  # PIL throws this when it's a broken/unknown image
                    pass
            if keeptrying:
                if extension not in ['jpg', 'jpeg']:
                    log.error(
                        "The image (%s, %s) is broken or in an unknown format",
                        imgname, nodeid(node))
                    raise ValueError
                else:
                    # Can be handled by reportlab
                    log.warning(
                        "Can't figure out size of the image (%s, %s). Install PIL for better results.",
                        imgname, nodeid(node))
                    iw = 1000
                    ih = 1000

        # Try to get the print resolution from the image itself via PIL.
        # If it fails, assume a DPI of 300, which is pretty much made up,
        # and then a 100% size would be iw*inch/300, so we pass
        # that as the second parameter to adjustUnits
        #
        # Some say the default DPI should be 72. That would mean
        # the largest printable image in A4 paper would be something
        # like 480x640. That would be awful.
        #

        w = node.get('width')
        h = node.get('height')
        if h is None and w is None:  # Nothing specified
            # Guess from iw, ih
            log.debug(
                "Using image %s without specifying size."
                "Calculating based on image size at %ddpi [%s]", imgname, xdpi,
                nodeid(node))
            w = iw * inch / xdpi
            h = ih * inch / ydpi
        elif w is not None:
            # Node specifies only w
            # In this particular case, we want the default unit
            # to be pixels so we work like rst2html
            if w[-1] == '%':
                kind = 'percentage_of_container'
                w = int(w[:-1])
            else:
                # This uses default DPI setting because we
                # are not using the image's "natural size"
                # this is what LaTeX does, according to the
                # docutils mailing list discussion
                w = client.styles.adjustUnits(w,
                                              client.styles.tw,
                                              default_unit='px')

            if h is None:
                # h is set from w with right aspect ratio
                h = w * ih / iw
            else:
                h = client.styles.adjustUnits(h,
                                              ih * inch / ydpi,
                                              default_unit='px')
        elif h is not None and w is None:
            if h[-1] != '%':
                h = client.styles.adjustUnits(h,
                                              ih * inch / ydpi,
                                              default_unit='px')

                # w is set from h with right aspect ratio
                w = h * iw / ih
            else:
                log.error('Setting height as a percentage does **not** work. '\
                          'ignoring height parameter [%s]', nodeid(node))
                # Set both from image data
                w = iw * inch / xdpi
                h = ih * inch / ydpi

        # Apply scale factor
        w = w * scale
        h = h * scale

        # And now we have this probably completely bogus size!
        log.info("Image %s size calculated:  %fcm by %fcm [%s]", imgname,
                 w / cm, h / cm, nodeid(node))

        return w, h, kind
Example #58
0
def getPage(url):
    urllib.urlretrieve(url,"test.html")
from bs4 import BeautifulSoup
import urllib2
import os
import urllib
import os
import getxml
#create connection with main page
url = 'http://www.heart.org/HEARTORG/General/State-by-State-NIH-Allocations_UCM_440585_Article.jsp'
page = urllib2.urlopen(url)
soup = BeautifulSoup(page.read(), 'lxml')

#get all the links to countries
div = soup.find('div', class_ = 'content')
table = div.find('table', width = 400)

for row in table.findChildren('tr'):
	for cell in row.findChildren('td'):

		link = cell.find('a').get('href')
		url = 'http://www.heart.org/' + link

		state = cell.text

		urllib.urlretrieve (url, 'pdf/' + state + '.pdf')

		getxml.getxml('pdf/' + state)

		print state + ' done'
	
soup2 = BeautifulSoup(response2.text, 'html.parser')
pp = soup2.find_all("a")

# To download the mcat (galex catalogue).
catalogue_link = []
for link in pp:
    somel = link.get('href')
    try:
        if somel[-12:] == 'mcat.fits.gz':
            catalogue_link.append(somel)
    except:
        pass

if len(catalogue_link) != 0:
    catalogue = catalogue_link[0].split('/')[-1]
    urllib.urlretrieve(catalogue_link[0], catalogue)
else:
    sys.exit(1)

# Reading coordinates from catalogue.
hdu = fits.open(catalogue)
alpha = hdu[1].data['alpha_j2000_merged']
delta = hdu[1].data['delta_j2000_merged']

# NUV
nuv_mag = hdu[1].data['nuv_mag']
refined_set = [(al, de, nm) for al, de, nm in zip(alpha, delta, nuv_mag)
               if int(nm) != -999 and nm <= 22.]

nalpha, ndelta, nuv_mag = zip(*refined_set)