Ejemplo n.º 1
0
 def getfiles(self, *files):
     """Download Rebase files."""
     for file in self.update(*files):
         print('copying %s' % file)
         fn = os.path.basename(file)
         # filename = os.path.join(Rebase, fn)
         filename = os.path.join(os.getcwd(), fn)
         print('to %s' % filename)
         try:
             self.retrieve(file, filename)
             # The following line is a workaround for an urllib bug in
             # Python 2.7.11 - 2.7.xx (?). It does not seem to work on
             # Python 3.xx. Try to remove the line in new Python versions.
             urlcleanup()
         except IOError as e:
             print(e)
             print('This error is probably due to a non-solved ftp bug in '
                   'recent Python versions. Please download the emboss '
                   'files manually from http://rebase.neb.com/rebase/'
                   'rebase.f37.html and then run ranacompiler.py. Find '
                   'more details in the Restriction manual.')
             self.close()
             return
     self.close()
     return
def mainProcess(linkList=[]):
    print 'Process {0} is starting to work!'.format(os.getpid())
    st=time.time()
    p=PAIRSeg()
    p._PAIRSeg__ResetSQLVariables()
    log=LogProcessor.LogProcess()
    fNum=os.path.basename(linkList[0]).split('.')[0]
    lNum=os.path.basename(linkList[len(linkList)-1]).split('.')[0]
    numRange=fNum+'-'+lNum
    for link in linkList:
        fileName=os.path.basename(link)
        filePath=p.dirPath+'/'+fileName
        try:
            urllib.urlretrieve(link,filePath)
            #print '[Downloaded .zip File: [{0}]'.format(fileName)
            urllib.urlcleanup()
            zipOrNot=zipfile.is_zipfile(filePath)
            if(zipOrNot==True):
                p.ExtractTSV(filePath)
                os.remove(filePath) # save space on sofus
            elif(zipOrNot==False):
                os.remove(filePath)
            log.write(log.logPath_PAIR,fileName+'\t'+link+'\t'+'PAIR\tProcessed')
        except:
            print 'ERROR: time out. {fileName}'.format(fileName)
            log.write(log.logPath_PAIR_Error,fileName+'\t'+link+'\t'+'PAIR\tProcessed')
    p.writeCSV(numRange)
    print 'Processed range:{range}'.format(range=numRange)
    print '[Process {0} is finished. Populated {1} links. Time:{2}]'.format(os.getpid(),len(linkList),time.time()-st)
Ejemplo n.º 3
0
def image_analyzer(url, string, depth, k):
    
    try:
        example_image = download_image(url)
    except (HTTPError,IOError), e:
        urllib.urlcleanup()    
        sys.exit('Unable to download %s.\n' % url + 'Error: %s.\n' % e)
Ejemplo n.º 4
0
def aursearch(keywords, totalnum):
	search = "http://aur.archlinux.org/packages.php?O=0&L=0&C=0&K=" + keywords.replace(" ", "+") + "&SeB=nd&SB=n&SO=a&PP=25&do_Search=Go"
	urllib.urlretrieve(search, "aur.tmp")
	block = ""
	num = 0
	sect = 0
	packages = ""
	aur = open("aur.tmp")
	for aurl in aur:
		if num <= totalnum:
			if aurl[:20] == "  <td class='data1'>" or aurl[:20] == "  <td class='data2'>": print "aursearch", aurl
			
			if aurl[:20] == "  <td class='data1'>" and not block == "data1":
				sect = 0
				block = "data1"
			elif aurl[:20] == "  <td class='data2'>" and not block == "data2":
				sect = 0
				block = "data2"
			elif aurl[:20] == "  <td class='data1'>" or aurl[:20] == "  <td class='data2'>":
				print aurl
				sect += 1
				if sect == 2:
					pacname = aurl.split("<span class='black'>")
					pacname = pacname[1].split("</span>")
					pacname = pacname[0]
					packages = packages + pacname
				elif sect == 4:
					pacdesc = aurl.split("<span class='blue'>")
					pacdesc = pacdesc[1].split("</span>")
					pacdesc = pacdesc[0]
					packages = packages + " - " + pacdesc + "\n"
					num += 1
	aur.close()
	urllib.urlcleanup()
	return packages
Ejemplo n.º 5
0
def download_file(url, destfile):
    """
     download_file: function for download from url to save as destfile
        @url the source file to download.
        @destfile the destination save file for local.
    """
    file_url = url

    try:
        print("--> Downloading file: %s" % file_url)
        filename, msg = urllib.urlretrieve(
                #'http://code.jquery.com/jquery-2.1.1.js',
                file_url,
                reporthook = reporthook)

        print ""
        #print "File:", filename
        print "Header:"
        print msg
        if os.path.exists(filename):
            if os.path.exists(destfile):
                now = currenttime()
                tmpfile = "%s.%s" % (destfile, now)
                shutil.move(destfile, tmpfile)
            shutil.move(filename, destfile)

        #print 'File exists before cleanup:', os.path.exists(filename)
    finally:
        urllib.urlcleanup()
Ejemplo n.º 6
0
def downloadImage( imageURL, subID ) :
    """Download images"""

    # image url
    image_url = 'http://bbs.sjtu.edu.cn' + imageURL

    # create the directory to store images
    # if not os.path.exists( './download' ) :
    try :
        os.makedirs( './download/' + subID )
    except OSError :
        pass
        #print "Failed to create directories"

     
    # get filename of image
    filename = 'download/' + subID + '/' + imageURL.split( '/' )[-1]

    # clear the cache that may have been built up
    # by previous calls to urlretrieve()
    urllib.urlcleanup()
     
    # retrieve the image
    try :
        urllib.urlretrieve( image_url, filename )
    except ContentTooShortError :
        print "The data available was less than that of expected"
        print "Downloading file %s was interrupted" \
                        % os.path.basename( filename )
    else :
        # get the size of file
        size = os.path.getsize( filename ) / 1024
        print ">>>File %s (%s Kb) was done..." % ( filename, size )
Ejemplo n.º 7
0
    def get_wallpgig(self):
        if not self.check_connection():
            return "No internet connection!"

        tags_from_file = self.filemanage.file_get_tags()
        query_from_file = self.filemanage.file_get_query()
        if (set(tags_from_file) == set(self.tags)) & (query_from_file == self.query):
            self.status.set_label("Loading urls...")
            urls = self.filemanage.file_get_urls()
            self.status.set_label("Urls loaded.")
        else:
            self.status.set_label("Downloading urls...")
            urls = self.download_urls()
            self.status.set_label("Urls downloaded.")

        random.shuffle(urls)

        try:
            url = urls[0]
            urllib.urlretrieve(url, tfile)
            urllib.urlcleanup()

            check = 1
        except IndexError:
            check = "No photos for this tags!"
        except urllib2.URLError:
            check = "No internet connection!"

        return check
Ejemplo n.º 8
0
 def run(self):
     try:
         urllib._urlopener = SmartFancyURLopener()
         urllib.urlretrieve(self.url, self.tmpfile, reporthook=self._hook)
         urllib.urlcleanup()
     except Abort: print 'Download Aborted'
     except:
Ejemplo n.º 9
0
	def populate_sidebar(self, branch = 'master', count = 50):
		self.commits = self.repo.commits(branch, max_count = count)
		for commit in self.commits:
			commit_time = time.strftime("%c", commit.authored_date)
			parts = commit.message.split('\n')
			if len(parts) > 1:
				text = "<b>%s ...</b>" % parts[0]
			else:
				text = "<b>%s</b>" % commit.message

			text += "\n<small>by %s on %s</small>" % (commit.author,
														commit_time)
			
			hashed = hashlib.md5(commit.author.email).hexdigest()
			image_path = "%s/grav_cache/%s.jpg" % (installdir, hashed)
			
			if not os.path.exists(image_path):
				gravatar_url = "http://www.gravatar.com/avatar.php?"			
				gravatar_url += urllib.urlencode({'gravatar_id':hashed, 
													'size':str(30)})
				urllib.urlretrieve(gravatar_url, image_path)
				urllib.urlcleanup()
				
			image = gtk.gdk.pixbuf_new_from_file(image_path)

			self.sidebar.add_item(None,	[text, image])
Ejemplo n.º 10
0
def download_version(version):

  chromium_file = 'chromium-%s.tar.xz' % version
  path = '%s%s' % (chromium_url, chromium_file)

  if (args.clean):
    remove_file_if_exists(chromium_file)

  # Let's make sure we haven't already downloaded it.
  if os.path.isfile("./%s" % chromium_file):
    print "%s already exists!" % chromium_file
  else:
    print "Downloading %s" % path
    # Perhaps look at using python-progressbar at some point?
    urllib.urlretrieve(path, chromium_file, reporthook=dlProgress)
    urllib.urlcleanup()
    print ""

  if (args.tests):
    chromium_testdata_file = 'chromium-%s-testdata.tar.xz' % version
    path = '%s%s' % (chromium_url, chromium_testdata_file)

    if (args.clean):
      remove_file_if_exists(chromium_testdata_file)

    # Let's make sure we haven't already downloaded it.
    if os.path.isfile("./%s" % chromium_testdata_file):
      print "%s already exists!" % chromium_testdata_file
    else:
      # Perhaps look at using python-progressbar at some point?
      print "Downloading %s" % path
      urllib.urlretrieve(path, chromium_testdata_file, reporthook=dlProgress)
      urllib.urlcleanup()
      print ""
Ejemplo n.º 11
0
 def oai_listIdentifiers(self, src="www.astros-test.bodleian.ox.ac.uk/sandbox", resumptionToken=None):
     self.ids_data_file = '/tmp/%s_ids_data_file'%unicode(uuid.uuid4())
     src_url = None
     if resumptionToken:
         src_url = "%s?verb=ListIdentifiers&resumptionToken=%s"%(src, resumptionToken)
     else:
         src_url = "%s?verb=ListIdentifiers&metadataPrefix=oai_dc"%src_
         for arg, val in self.args.iteritems():
             if val:
                 src_url = "%s&%s=%s"%(src_url, arg, val)
         if 'args' in src:
             src_url = "%s&%s"%(src_url,src['args'])
     tries = 1
     while tries < 11:
         urlretrieve(src_url, self.ids_data_file)
         if os.path.isfile(self.ids_data_file):
             self.logger.info("Downloaded identifiers for %s - %s"%(src, src_url))
             break
         self.logger.warn("Error retreiving identifiers for %s - %s (try # %d)"%(src, src_url, tries))
         tries += 1
     urlcleanup()
     tree = ET.ElementTree(file=self.ids_data_file)
     rt = tree.getroot()
     ids = rt.findall("%(ns)sListIdentifiers/%(ns)sheader/%(ns)sidentifier"%{'ns':self.oai_ns})
     for ID in ids:
         if resumptionToken and 'deletion' in resumptionToken:
             self.delete_identifiers.append(ID.text)
         else:
             self.identifiers.append(ID.text)
     rtoken = rt.findall("%(ns)sListIdentifiers/%(ns)sresumptionToken"%{'ns':self.oai_ns})
     os.remove(self.ids_data_file)
     if rtoken:
         self.oai_listIdentifiers(src, resumptionToken=rtoken[0].text)
Ejemplo n.º 12
0
def dash_R_cleanup(fs, ps, pic):
    import gc, copy_reg
    import _strptime, linecache, dircache
    import urlparse, urllib, urllib2, mimetypes, doctest
    import struct, filecmp
    from distutils.dir_util import _path_created

    # Restore some original values.
    warnings.filters[:] = fs
    copy_reg.dispatch_table.clear()
    copy_reg.dispatch_table.update(ps)
    sys.path_importer_cache.clear()
    sys.path_importer_cache.update(pic)

    # Clear assorted module caches.
    _path_created.clear()
    re.purge()
    _strptime._regex_cache.clear()
    urlparse.clear_cache()
    urllib.urlcleanup()
    urllib2.install_opener(None)
    dircache.reset()
    linecache.clearcache()
    mimetypes._default_mime_types()
    struct._cache.clear()
    filecmp._cache.clear()
    doctest.master = None

    # Collect cyclic trash.
    gc.collect()
Ejemplo n.º 13
0
def download_http_content (uri, params=None):
    urlcleanup()

    #proxy = settings.PROXY_SERVER

    #The proxy must not be used with local address
    host = urlparse(uri)[1]

    #manage proxies with authentication (get it from environment)
    proxy=None
    for proxy_name in settings.NOT_PROXY_FOR:
        if host.startswith(proxy_name):
            proxy = urllib2.ProxyHandler({})#no proxy
            break

    if not proxy:
        #Host is not included in the NOT_PROXY_FOR list => proxy is needed!
        proxy = urllib2.ProxyHandler()#proxies from environment

    opener = urllib2.build_opener(proxy)

    if params:
        return opener.open(uri,data=urlencode(params)).read()
    else:
        return opener.open(uri).read()
Ejemplo n.º 14
0
    def run ( self, site=None,flush_lists=True,flush_rpms=True ):
        flushdirs = ['rpms','rdfs','lists']
        if not flush_lists:
            flushdirs.remove('lists')
        if not flush_rpms:
            flushdirs.remove('rpms')
        urllib.urlcleanup()

        if os.path.isdir(self.cachedir):
            for subdir in flushdirs: 
                dir = os.path.join(self.cachedir, subdir)
                if site:
                    dir = os.path.join(dir, site)
                if os.path.isdir(dir):
                    shutil.rmtree(dir)
                if not site:
                    os.mkdir(dir, 0755)
            if not site:
                #
                # Now let'a recreate the infrastructure:
                #
                for subdir in ['file:', 'http:', 'https:', 'ftp:']:
                    for type in ['rpms/','rdfs/']:
                        sd = type + subdir
                        dir = os.path.join(self.cachedir, sd)
                        os.mkdir(dir, 0755)
Ejemplo n.º 15
0
 def _download_video( self ):
     try:
         # spam log file
         LOG( ">>> _download_video(title: %s)" % ( repr( self.g_title ), ), heading=True )
         # get filepath and tmp_filepath
         tmppath, self.filepath = get_legal_filepath( self.g_title, self.params[ "download" ], self.settings[ "play_mode" ], self.settings[ "download_path" ], self.settings[ "use_title" ], self.settings[ "use_trailer" ] )
         # only download if the trailer doesn't exist
         if ( not os.path.isfile( self.filepath.encode( "utf-8" ) ) ):
             # only need to retrieve video if not in tmp path
             if ( not os.path.isfile( tmppath.encode( "utf-8" ) ) ):
                 # fetch the video
                 urllib.urlretrieve( self.params[ "download" ], tmppath.encode( "utf-8" ), self._report_hook )
             # create the conf file for xbox and copy to final location
             ok = self._finalize_download( tmppath )
             # if the copy failed raise an error
             if ( not ok ): raise
     except Exception, e:
         # oops, notify user what error occurred
         LOG( str( e ), xbmc.LOGERROR )
         # filepath is not always released immediately, we may need to try more than one attempt, sleeping between
         urllib.urlcleanup()
         remove_tries = 3
         while remove_tries and os.path.isfile( tmppath ):
             try:
                 os.remove( tmppath.encode( "utf-8" ) )
             except:
                 remove_tries -= 1
                 xbmc.sleep( 1000 )
         pDialog.close()
         self.filepath = ""
Ejemplo n.º 16
0
    def firmware_update(self, args):
        if not args.file and not args.url:
            raise Exception("Must provide firmware filename or URL")

        if args.file:
            fp = open(args.file, 'r')
        elif args.url:
            print "Downloading from", args.url
            resp = urllib.urlretrieve(args.url)
            fp = open(resp[0], 'r')
            urllib.urlcleanup() # We still keep file pointer open

        if fp.read(8) == '54525a52':
            print "Converting firmware to binary"

            fp.seek(0)
            fp_old = fp

            fp = tempfile.TemporaryFile()
            fp.write(binascii.unhexlify(fp_old.read()))

            fp_old.close()

        fp.seek(0)
        if fp.read(4) != 'KPKY':
            raise Exception("KeepKey firmware header expected")

        print "Please confirm action on device..."

        fp.seek(0)
        return self.client.firmware_update(fp=fp)
    def _get_url(self, url, content=None):
        urllib.urlcleanup()
        headers = {"Accept": "text/plain"}
        rest = self.base_url
        user = self.username
        passwd = self.password
        
        if not rest or not user or not passwd:
            self.logger.info("Could contact RT, bad or missing args (host: %s user: %s or passwd)", rest, user)
            return u""

        cj = cookielib.LWPCookieJar()
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
        urllib2.install_opener(opener)
        if content is None:
            data = {'user': user, 'pass': passwd}
        else:
            data = {'user': user, 'pass': passwd, 'content': content}

        #self.log.info("Data to be sent by RT:\n%r", data)
        ldata = urllib.urlencode(data)
        uri = rest + url
        login = urllib2.Request(uri, ldata)

        response_data = ""
        try:
            response = urllib2.urlopen(login)
            response_data = response.read()

            self.logger.info("RT Connection successful: %r", response_data)
        except urllib2.URLError, exc:
            # could not connect to server
            self.logger.info("RT Connection failed: %r", exc)
Ejemplo n.º 18
0
  def refresh_games(self):
    debug.log("Updating games for {}/{}/{}".format(self.month, self.day, self.year))
    urllib.urlcleanup()
    attempts_remaining = 5
    while attempts_remaining > 0:
      try:
        current_day = self.day
        self.set_current_date()

        all_games = mlbgame.day(self.year, self.month, self.day)
        if self.config.rotation_only_preferred:
          self.games = self.__filter_list_of_games(all_games, self.config.preferred_teams)
        else:
          self.games = all_games

        if current_day != self.day:
          self.current_game_index = self.game_index_for_preferred_team()
        self.games_refresh_time = time.time()
        break
      except URLError, e:
        debug.error("URLError: {}".format(e.reason))
        attempts_remaining -= 1
        time.sleep(NETWORK_RETRY_SLEEP_TIME)
      except ValueError:
        debug.error("ValueError: Failed to refresh list of games")
        attempts_remaining -= 1
        time.sleep(NETWORK_RETRY_SLEEP_TIME)
Ejemplo n.º 19
0
 def fetch_preferred_team_overview(self):
   if not self.is_offday_for_preferred_team():
     urllib.urlcleanup()
     game = self.games[self.game_index_for_preferred_team()]
     game_overview = mlbgame.overview(game.game_id)
     debug.log("Preferred Team's Game Status: {}, {} {}".format(game_overview.status, game_overview.inning_state, game_overview.inning))
     return game_overview
Ejemplo n.º 20
0
	def __init__(self):
		dialog=xbmcgui.Dialog()
		try:
			urllib.urlcleanup()
			# urllib.urlretrieve(URLhome+'autoupdate.txt',ScriptPath+'autoupdate.py')
		except: 
			dialog.ok("Wake on Lan Installer","Master Server Connection Problems", "Please try again later.")
		pass
		#dialog.close()


		options=['Start Timothys Computer','Wake On Lan 2','Wake On Lan 3','Wake On Lan 4', 'Wake On Lan 5', 'Change Settings','Exit Wake On Lan']
		choice=dialog.select("PC Network...",options)
		if choice==6:
			xbmcgui.Window.close()
		if choice==5:
			settingsmgr.OpenControlPanel(settingsfile)
			WoL()
		if choice==4:
			dialog = xbmcgui.Dialog()
			confirm=dialog.yesno('Wake On Lan', "Do you want to wake this pc?", "Mac: "+settings['mac5'], "Name: "+settings['mac5name'])
			if confirm:
				wake_on_lan(settings['mac5'])
				WoL()
			else:
				WoL()
			
		if choice==3:
			dialog = xbmcgui.Dialog()
			confirm=dialog.yesno('Wake On Lan', "Do you want to wake this pc?", "Mac: "+settings['mac4'], "Name: "+settings['mac4name'])
			if confirm:
				wake_on_lan(settings['mac4'])
				WoL()
			else:
				WoL()
		if choice==2:
			dialog = xbmcgui.Dialog()
			confirm=dialog.yesno('Wake On Lan', "Do you want to wake this pc?", "Mac: "+settings['mac3'], "Name: "+settings['mac3name'])
			if confirm:
				wake_on_lan(settings['mac3'])
				WoL()
			else:
				WoL()
		if choice==1:
			dialog = xbmcgui.Dialog()
			confirm=dialog.yesno('Wake On Lan', "Do you want to wake this pc?", "Mac: "+settings['mac2'], "Name: "+settings['mac2name'])
			if confirm:
				wake_on_lan(settings['mac2'])
				WoL()
			else:
				WoL()
		if choice==0:
			dialog = xbmcgui.Dialog()
			confirm=dialog.yesno('Wake On Lan', "Do you want to wake this pc?", "Mac: "+settings['mac1'], "Name: "+settings['mac1name'])
			if confirm:
				wake_on_lan(settings['mac1'])
				WoL()
			else:
				WoL()
		dialog.close()
Ejemplo n.º 21
0
def song_download():
       

        song = user_input('Enter the name of song: ')  
        
        try:
            query_string = encode({"search_query" : song})
            content = urlopen("http://www.youtube.com/results?" + query_string)
            
            if version == 3:
                ##I hate RE

                search_results = re.findall(r'href=\"\/watch\?v=(.{11})', content.read().decode())
                
            else:
                ##ok!! if its not going work! I'm gonna kill you!!!
                search_results = re.findall(r'href=\"\/watch\?v=(.{11})', content.read())
                ##finally(Thanks to git)
                
        except:
            print('Something happened!!')
            exit(1)

        # youtube2mp3 API
        downloadLinkOnly = 'http://www.youtubeinmp3.com/fetch/?video=' + 'http://www.youtube.com/watch?v=' + search_results[0]
        try:
            print('Downloading %s' % song)
            urllib.urlretrieve(downloadLinkOnly, filename='%s.mp3' % song)
            urllib.urlcleanup()  
        except:
            print('Error  %s' % song)
            exit(1)
Ejemplo n.º 22
0
def refresh_seeds():
    # Get a new batch of random bits from our friends at Fourmilab
    try:
        os.remove(cache_file)
    except:
        pass
    urllib.urlretrieve("http://www.fourmilab.ch/cgi-bin/uncgi/Hotbits?nbytes=240&fmt=hex", cache_file)
        # We know they're good because they're imported from Switzerland
    urllib.urlcleanup()
    # Load the web page source
    bitsFile = open(cache_file)
    lines = bitsFile.readlines()
    bitsFile.close()
    randomLines = []
    # Filter out the good lines
    for line in lines:
        if re.match(r'[0123456789ABCDEF]+', line):
            randomLines.append(line)
    os.remove(cache_file)
    outLine = ""
    # Stuff them into one big line and write it back out
    for line in randomLines:
        outLine = outLine + string.strip(line)
    bitsFile = open(cache_file, "w")
    bitsFile.write(outLine)
    bitsFile.close()
Ejemplo n.º 23
0
def download_http_content(url, user=None):
    urlcleanup()

    #proxy = settings.PROXY_SERVER

    #The proxy must not be used with local address
    host = urlparse(url)[1]

    #manage proxies with authentication (get it from environment)
    proxy = None
    for proxy_name in settings.NOT_PROXY_FOR:
        if host.startswith(proxy_name):
            proxy = urllib2.ProxyHandler({})  # no proxy
            break

    if not proxy:
        #Host is not included in the NOT_PROXY_FOR list => proxy is needed!
        proxy = urllib2.ProxyHandler()  # proxies from environment

    opener = urllib2.build_opener(proxy)

    headers = {
        'User-Agent': 'Mozilla/5.0 (%(system)s %(machine)s;U) Wirecloud/%(wirecloud_version)s Python-urllib2/%(urllib2_version)s' % VERSIONS,
        'Accept': '*/*',
        'Accept-Language': 'en-gb,en;q=0.8,*;q=0.7',
        'Accept-Charset': 'utf-8;q=1,*;q=0.2',
    }

    if user and not user.is_anonymous():
        headers.update({
            'Remote-User': user.username,
        })

    request = urllib2.Request(url, None, headers)
    return opener.open(request).read()
Ejemplo n.º 24
0
    def _setMoviePicture(self, imageUrl):
        """
        'Private' method that renders the image designated by 'imageUrl'.
        """
        imageFilename = imageUrl.split("/")[-1]
        imagePath = "cache/" + imageFilename

        # Create 'cache' folder if it does not exist.
        if not os.path.exists("./cache/"):
            os.makedirs("./cache/")

        try:
            if not os.path.exists(imagePath):
                # print "Creating '%s'..." % ( imagePath )
                urllib.urlretrieve(imageUrl, imagePath)
            urllib.urlcleanup()

            try:
                # Scaffold image loading. If any exception arises for image
                # parsing, the 'image' file won't be locked.
                with open(imagePath, 'rb') as imageFile:
                    image = Image.open(imageFile)
                    self.photoImage = ImageTk.PhotoImage(image)
                    self.picture.create_image(0, 0, image = self.photoImage, anchor = NW)
                    return
            except IOError:
                print "Unable to load cache image '%s'." % ( imagePath )
                os.remove(imagePath)
        except IOError:
            print "Unable to retrieve the movie image."

        self.clearMoviePicture()
Ejemplo n.º 25
0
 def test03(self):
     import urllib
     r=urllib.urlretrieve("http://www.python.org","webpage.html")
     print r
     with open("webpage.html") as f:
         print f.read()
     urllib.urlcleanup()
Ejemplo n.º 26
0
 def iq_register(self, iq):
     """
     Register to a new VMCast.
     @type iq: xmpp.Protocol.Iq
     @param iq: the sender request IQ
     @rtype: xmpp.Protocol.Iq
     @return: a ready-to-send IQ containing the results
     """
     reply = iq.buildReply("result")
     url = iq.getTag("query").getTag("archipel").getAttr("url")
     try:
         if not url or url == "":
             raise Exception("IncorrectStanza", "Stanza must have url: %s" % str(iq))
         try:
             urllib.urlcleanup()
             f = urllib.urlopen(url)
         except:
             raise Exception("The given url doesn't exist. Can't register.")
         try:
             self.getFeed(f.read())
         except:
             raise Exception("The given url doesn't contains a valid VMCast feed. Can't register.")
         self.cursor.execute("INSERT INTO vmcastsources (url) VALUES ('%s')" % url)
         self.database_connection.commit()
         self.parseRSS()
         self.entity.push_change("vmcasting", "register")
         self.entity.shout("vmcast", "I'm now registred to vmcast %s as asked by %s" % (url, iq.getFrom()))
     except Exception as ex:
         reply = build_error_iq(self, ex, iq, ARCHIPEL_ERROR_CODE_VMCASTS_REGISTER)
     return reply
	def showInfo(self):
		if self.check == "true" and self.menulist:
			m_title = self["menulist"].getCurrent()[0][0]
			m_url = self["menulist"].getCurrent()[0][1]
			if m_url:
				#m_url = re.findall('(.*?)\.', m_url)
				#extra_imdb_convert = "._V1_SX320.jpg"
				#m_url = "http://ia.media-imdb.com/images/%s%s" % (m_url[0], extra_imdb_convert)
				print "EMC iMDB: Download Poster - %s" % m_url
				urllib._urlopener = AppURLopener()
				urllib.urlretrieve(m_url, self.path)
				urllib.urlcleanup()
				if os.path.exists(self.path):
					self.poster_resize(self.path, m_title)

					#ptr = LoadPixmap(self.path)
					#if ptr is None:
					#        ptr = LoadPixmap("/usr/lib/enigma2/python/Plugins/Extensions/EnhancedMovieCenter/img/no_poster.png")
					#        print "EMC iMDB: Load default NO Poster."
					#if ptr is not None:
					#        self["poster"].instance.setPixmap(ptr)
					#        print "EMC iMDB: Load Poster - %s" % m_title
				else:
					print "EMC iMDB: No url found for - %s" % m_title
			else:
				print "EMC iMDB: No url found for - %s" % m_title
Ejemplo n.º 28
0
 def dowload(url):
     try:
         urlretrieve(url, self.image_name(url))
         urlcleanup()
     except:
         return False
     return True
Ejemplo n.º 29
0
def fire_url(url, params):
    try:
        urllib.urlcleanup()
        real_url = url+"?"+urllib.urlencode(params)
        #print "Firing url="+real_url
        file = urllib.urlopen(real_url)
    except:
        pass
def get_bing_pic() :
    # bing url
    url = "http://www.bing.com/"

    urllib.urlcleanup()
    args = urllib.urlencode({"setmkt" : COUNTRY}, {"setlang" : "match"})
    
    # open bing url
    page = urllib.urlopen(url, args)
    if None == page:
        print('open %s error' % (url))
        return -1

    # get html souce code
    data = page.read()
    if not data:
        print ('read %s content error' % url)
        return -1
    page.close()

    # parse picture url
    posleft = data.find(b'g_img={url:')
    if -1 == posleft:
        print ('jpg url not found')
        return -1
    posright = data.find(b'\'', posleft + 12)
    if -1 == posright:
        print ('jpg url not found')
        return -1    
    jpgpath = data[posleft + 12 : posright].decode("ascii");
    
    if 0 == cmp('/', jpgpath[0:1]):
        jpgurl = url + jpgpath
    else:
        jpgurl = jpgpath

    # make local file dir
    if 0 == cmp('Windows', get_platform()):
        localpath = TOP_DIR + time.strftime('bing\\%Y\\%m\\')
    else:
        localpath = TOP_DIR + time.strftime('bing/%Y/%m/')

    if not os.path.exists(localpath):
        os.makedirs(localpath)

    # make local file path
    localjpg = localpath + time.strftime('%d.jpg')

    print ("remote file : %s" % jpgurl)
    print ("local  file : %s" % localjpg)

    # download jpg file
    urllib.urlretrieve(jpgurl, localjpg) 

    urllib.urlcleanup()
    
    return 0
from win10toast import ToastNotifier
from Adafruit_IO import Client, Data
aio = Client('')

import re, urllib, sys
toaster = ToastNotifier()

version = sys.version_info[0]

if version == 2:
    user_input = raw_input
    import urllib2
    urlopen = urllib2.urlopen
    encode = urllib.urlencode
    retrieve = urllib.urlretrieve
    cleanup = urllib.urlcleanup()

else:
    user_input = input
    import urllib.request
    import urllib.parse
    urlopen = urllib.request.urlopen
    encode = urllib.parse.urlencode
    retrieve = urllib.request.urlretrieve
    cleanup = urllib.request.urlcleanup()


def ming(st):
    path = st
    list_ = os.listdir(path)
    for file_ in list_:
Ejemplo n.º 32
0
# * genres.list.gz
# * ratings.list.gz
# 
# _** Note: The total size of files mentioned above is roughly 30M. Running the following code may take a few minutes._

# In[2]:

import gzip

# Obtaining IMDB's text files
imdb_url_prefix = 'ftp://ftp.funet.fi/pub/mirrors/ftp.imdb.com/pub/'
imdb_files_list = ['genres.list.gz', 'ratings.list.gz']
for name in imdb_files_list:
    if not os.path.exists('./data/' + name):
        response = urllib.urlretrieve(imdb_url_prefix + name, './data/' + name)
        urllib.urlcleanup()   # urllib fails to download two files from a ftp source. This fixes the bug!
        with gzip.open('./data/' + name) as comp_file, open('./data/' + name[:-3], 'w') as reg_file:
            file_content = comp_file.read()
            reg_file.write(file_content)


# ### Step 3: downloading the "IMDB Prepared Data"
# During this tutorial, we discuss how the contents of _genres.list.gz_ and _ratings.list.gz_ files can be integrated. However, to make the tutorial more concise, we avoid including the same process for all the files in the "IMDB Plain Text Data". The "IMDB Prepared Data" is the dataset that we obtained by integrating a number of files from the "IMDB Plain Text Data" which we will use during later stages of this tutorial. The following code snippet downloads this dataset.

# In[3]:

imdb_url = 'https://anaconda.org/BigGorilla/datasets/1/download/imdb_dataset.csv'
if not os.path.exists('./data/imdb_dataset.csv'):     # avoid downloading if the file exists
    response = urllib.urlretrieve(kaggle_url, './data/imdb_dataset.csv')

Ejemplo n.º 33
0
class CninfoSpider(Spider):  
    name = "cninfo"  
    allowed_domains = ["cninfo.com.cn"]  
    start_urls = ["http://www.cninfo.com.cn/cninfo-new/disclosure/szse_main"]
    allstockjson_url = "http://www.cninfo.com.cn/cninfo-new/js/data/szse_stock.json"
    stockNumsInAllStockJson = 0
    homePage = r"http://www.cninfo.com.cn"  

    socket.setdefaulttimeout(35)
    logger = logging.getLogger('CninfoCninfoSpiderLogger')
    jsonSzse_stocks = ''
    
    def GetJsonStockIndex(self,response):
        if('jsonStockIndex='==response.body[0:len('jsonStockIndex=')]):
            print '==============GetJsonStockIndex:',response.body, response.body[len('jsonStockIndex='):len(response.body)]
            return int(response.body[len('jsonStockIndex='):len(response.body)])        
        else:
            return -1  
        
    def generateUrl(self,url,stock,pageNum,jsonStockIndex):
        return url+'?stock='+stock+'&pageNum='+str(pageNum)+'&jsonStockIndex='+str(jsonStockIndex)
    
    def parse(self, response):
        SzseStockFile = self.downloadAllStockJson(False)
        queryUrl='http://www.cninfo.com.cn/cninfo-new/announcement/query'
        jsonStockIndex = self.GetJsonStockIndex(response)
        
        if self.stockNumsInAllStockJson == 0:
            SzseStockFile = self.downloadAllStockJson(True)
            self.jsonSzse_stocks=json.loads(open(SzseStockFile, 'rb').read())
            for jsonSzse_stock in self.jsonSzse_stocks['stockList']:
                self.stockNumsInAllStockJson=self.stockNumsInAllStockJson+1
            jsonStockIndex=StockStartIndex
        print "start get stock data,jsonStockIndex=",jsonStockIndex,'self.stockNumsInAllStockJson:',self.stockNumsInAllStockJson
                    
        if(StockStartIndex<=jsonStockIndex and jsonStockIndex<min(self.stockNumsInAllStockJson, StockEndIndex)):      
              
            code=self.jsonSzse_stocks['stockList'][jsonStockIndex]['code']
            orgId=self.jsonSzse_stocks['stockList'][jsonStockIndex]['orgId']
            stock=self.jsonSzse_stocks['stockList'][jsonStockIndex]['code']+'%2C'+self.jsonSzse_stocks['stockList'][jsonStockIndex]['orgId']
            pageNum=1
            yield Request(self.generateUrl(queryUrl,stock,pageNum,jsonStockIndex), callback=self.parseDetail,meta={'code':code,'orgId':orgId,'pageNum':pageNum,'jsonStockIndex':jsonStockIndex}) 
        elif (jsonStockIndex==self.stockNumsInAllStockJson):
            print '====================================='
            print 'fetch stock data finished,please check if have fail lists in result/szse_stock_failList.json'
            print '====================================='
        else:
            print '====================================='
            print 'fetch stock data fail,exit!!! jsonStockIndex=',jsonStockIndex,response.url,response.body
            print 'please check fail lists in result/szse_stock_failList.json'
            print '====================================='
    
    def parseDetail(self, response): 
        filename = 'result\szse_stock_failList.json'
        queryUrl='http://www.cninfo.com.cn/cninfo-new/announcement/query'
        startUrl='http://www.cninfo.com.cn/cninfo-new/disclosure/szse_main'
        print "********* enter parseDetail",response.url
        #filename = response.url.split("/")[-2]
        #open(filename, 'wb').write(response.body)
        jsonStockIndex = self.GetJsonStockIndex(response)
        if(-1==jsonStockIndex):
            #enter here only get data successful 
            jsonAnnouncements = json.loads(response.body_as_unicode()) 
            pageNum=jsonAnnouncements['pageNum']
            jsonStockIndex=jsonAnnouncements['jsonStockIndex']
            pageSumNums=0
            pageSize=30
            savedInfo = {"secCode": " ","secName": " ","announcementTitle": " ","adjunctUrl": " ","pdfPath": " ","announcementTime": " "}
            if(0==jsonAnnouncements['totalRecordNum']%pageSize):
                pageSumNums=jsonAnnouncements['totalRecordNum']/pageSize
            else:
                pageSumNums=(jsonAnnouncements['totalRecordNum']/pageSize+1)
            print 'totalRecordNum and current pageNum:',pageSumNums,pageNum
            for announcement in jsonAnnouncements['announcements']:
                code=announcement['secCode']
                orgId=announcement['orgId']
                stock=code+'%2C'+orgId
                
                #Skip not needed pdf
                if not self.isNeededAnnouncementTitle(announcement['announcementTitle']):
                    continue
                
                #Download pdf
                companyFolder = self.createCompanyFolder(announcement['secCode'])
                if announcement["secName"] == None or announcement['announcementTitle'] == None:
                    pdfname = announcement['announcementTitle']
                else:
                    pdfname = announcement["secName"]+announcement['announcementTitle']
                filePath = self.downloadPDF(companyFolder, pdfname,announcement['adjunctUrl'], 5)
                if filePath == False:
                    print "Save download failed file info"
                    errorStr = "code:"+code+",jsonStockIndex:"+str(jsonStockIndex)+",pdfname:"+pdfname
                    errorStr = errorStr + ",adjunctUrl:"+announcement['adjunctUrl']
                    print "ERR:", errorStr
                    self.logger.info(errorStr)
                    outputDownloadPdfFailLists = codecs.open(DownloadPdfFailLists, 'a','utf-8')
                    outputDownloadPdfFailLists.write('\n')
                    outputDownloadPdfFailLists.write(errorStr)
                    outputDownloadPdfFailLists.close()
                    
                #save info in Json
                savedInfo['secCode'] = announcement['secCode']
                savedInfo['secName'] = announcement['secName']
                savedInfo['announcementTitle'] = announcement['announcementTitle']
                savedInfo['adjunctUrl'] = announcement['adjunctUrl']
                savedInfo['pdfPath'] = filePath
                savedInfo['announcementTime'] = announcement['announcementTime']
                #savedInfo['announcementTime'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(announcement['announcementTime']))
                if not self.isInfoInJson(savedInfo):
                    try:
                        savedInfofileread = codecs.open(SavedInfoFile,'rb','utf-8')
                        readdata = savedInfofileread.read()[:-2]
                        if(20<len(readdata)):
                            readdata=readdata+','
                        writedata = json.dumps(savedInfo,ensure_ascii=False,indent=2)
                        writedata = readdata+writedata+']}'
                        savedInfofilewrite = codecs.open(SavedInfoFile,'w','utf-8')
                        savedInfofilewrite.write(writedata)
                    finally:
                        savedInfofilewrite.close()
                        savedInfofileread.close()
            print "parseDetail: ########################################################"
            if(pageSumNums>pageNum):#go to read next page of current stock code
                pageNum=pageNum+1
                print "parseDetail: get next page:",str(pageNum),"pageSumNums:",str(pageSumNums),"jsonStockIndex:",str(jsonStockIndex)
                yield Request(self.generateUrl(queryUrl,stock,pageNum,jsonStockIndex), callback=self.parseDetail,meta={'code':code,'orgId':orgId,'pageNum':pageNum,'jsonStockIndex':jsonStockIndex}) 
            else:#go to read next stock code
                yield Request(self.generateUrl(startUrl, '', 1, jsonStockIndex+1), callback=self.parse, meta={'jsonStockIndex':jsonStockIndex+1}) 
        else:#fail to get current stock data, go to read next stock code
            
            yield Request(self.generateUrl(startUrl, '', 1, jsonStockIndex), callback=self.parse, meta={'jsonStockIndex':jsonStockIndex}) 

    def createCompanyFolder(self, secCode):
        companyFolder = FinancialFolder + secCode
        if not os.path.exists(companyFolder):
            os.mkdir(companyFolder)
        return companyFolder
            
    def downloadPDF(self, companyFolder, reportName, downloadURL, downloadTime):
        downloadTime -= 1
        if downloadTime == 0:
            return False
        
        suffix = downloadURL[downloadURL.find('.'):].lower()
        print "downloadPDF suffix", suffix
        
        if SysStr == "Windows":
            pdfPath = companyFolder + '\\'+ reportName + '.pdf'
            filePath = companyFolder + '\\' + reportName + suffix
        else:
            pdfPath = companyFolder + '/'+ reportName + '.pdf'
            filePath = companyFolder + '/' + reportName + suffix
                     
        if ".pdf" != suffix and os.path.exists(pdfPath):
            os.remove(pdfPath)
              
        realURL = self.homePage + "/" + downloadURL
        print "Download pdfPath:", filePath, ' realURL:',realURL
        try:
            if not os.path.exists(filePath):
                urllib.urlretrieve(realURL, filePath)
            else:
                print 'WRN: ', filePath, 'is already exists'
                return filePath
        except Exception, e:
            urllib.urlcleanup()
            return self.downloadPDF(companyFolder, reportName, downloadURL, downloadTime)
        
        urllib.urlcleanup()
        gc.collect()
        return pdfPath
Ejemplo n.º 34
0
def UpdateOne(dir, toupdate):
    try:
        urllib.urlretrieve(updateserver + toupdate, GetFilePath(dir, toupdate))
        urllib.urlcleanup()
    except:
        pass
Ejemplo n.º 35
0
    def authenticate(self, request):
        user = request.user or None
        access_token = None
        # assume logging in normal way
        params = {}
        params["client_id"] = CONSUMER_KEY
        params["client_secret"] = CONSUMER_SECRET
        params["redirect_uri"] = request.build_absolute_uri(reverse("facebook_login_done"))
        params["code"] = request.GET.get('code', '')

        url = ("https://graph.facebook.com/oauth/access_token?"
               + urllib.urlencode(params))
        from cgi import parse_qs
        userdata = urllib.urlopen(url).read()
        res_parse_qs = parse_qs(userdata)
        # Could be a bot query
        if not ('access_token') in res_parse_qs:
            return None
        access_token = res_parse_qs['access_token'][-1]

        url = "https://graph.facebook.com/me?access_token=" + access_token

        fb_data = json.loads(urllib.urlopen(url).read())
        uid = fb_data["id"]

        if not fb_data:
            return None

        try:
            same_email_user = UserProfile.objects.get(email=fb_data.get('email', None))
        except:
            same_email_user = None

        if user.is_anonymous() and not same_email_user:
            try:
                fb_user = FacebookUserProfile.objects.get(facebook_uid=uid)
                fb_user.accesstoken = access_token
                fb_user.save()
                return fb_user.user
            except FacebookUserProfile.DoesNotExist:
                fb_picture_url = "http://graph.facebook.com/%s/picture?type=large" % uid

                username = fb_data.get('username')
                if not username:
                    username = uid
                userProfile = UserProfile.objects.create(username=username)
                userProfile.first_name = fb_data['first_name']
                userProfile.last_name = fb_data['last_name']
                if fb_data['gender'] == "male":
                    userProfile.gender = 'M'
                else:
                    if fb_data['gender'] == "female":
                        userProfile.gender = 'F'
                userProfile.email = fb_data.get('email', None)
                userProfile.isVerified_email = True
                userProfile.location = fb_data.get('location', fb_data).get('name', None)
                userProfile.save()

                img = urllib.urlretrieve(fb_picture_url)
                userProfile.profile_picture.save("Facebook-profile.jpg", File(open(img[0])))
                urllib.urlcleanup()

                userProfile.facebook_link = fb_data.get('link', None)
                from django.contrib.auth.hashers import make_password
                raw_pass = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(12))
                tmp_pass = make_password(raw_pass)
                userProfile.password = tmp_pass
                userProfile.save()

                fb_profile = FacebookUserProfile(facebook_uid=uid, user=userProfile, email=fb_data['email'],
                    url=fb_data['link'], location=userProfile.location, accesstoken=access_token)
                fb_profile.save()

                return userProfile
        else:
            try:
                if same_email_user:
                    user = same_email_user
                user_facebook = FacebookUserProfile.objects.get(user=user)
                if user_facebook.facebook_uid == uid:
                    return user_facebook.user
                else:
                    request.session['fb_accesstoken'] = access_token
                    next = request.session['next'] or ""
                    if next:
                        del request.session['next']
                        return HttpResponseRedirect(next)
                    else:
                        return HttpResponseRedirect(reverse('sync_facebook'))
            except FacebookUserProfile.DoesNotExist:
                try:
                    user_facebook = FacebookUserProfile.objects.get(facebook_uid=uid)
                    request.session['fb_accesstoken'] = access_token
                    next = request.session['next'] or ""
                    if next:
                        del request.session['next']
                        return HttpResponseRedirect(next)
                    else:
                        return HttpResponseRedirect(reverse('sync_facebook'))
                except FacebookUserProfile.DoesNotExist:
                    fb_profile = FacebookUserProfile(facebook_uid=uid, user=UserProfile.objects.get(username=user.username), email=fb_data['email'],
                    url=fb_data['link'], location=fb_data.get('location', fb_data).get('name', None), accesstoken=access_token)
                    fb_profile.save()
                    return fb_profile.user
Ejemplo n.º 36
0
    """total_size is reported in bytes.
    block_size is the amount read each time.
    blocks_read is the number of blocks successfully read.
    """
    if not blocks_read:
        print 'Connection opened'
        return
    if total_size < 0:
        # Unknown size
        print 'Read %d blocks (%d bytes)' % (blocks_read,
                                             blocks_read * block_size)
    else:
        amount_read = blocks_read * block_size
        print 'Read %d blocks, or %d/%d' % (blocks_read, amount_read,
                                            total_size)
    return


filename, msg = urllib.urlretrieve('http://blog.doughellmann.com/',
                                   reporthook=reporthook)
"""
print
print 'File:', filename
print 'Headers:'
print msg
print 'File exists before cleanup:', os.path.exists(filename)


urllib.urlcleanup()
print 'File still exists:', os.path.exists(filename)
"""
Ejemplo n.º 37
0
def dlFile(url, f_path):
    printIfVerbose("Downloading %s" % url)
    urllib.urlretrieve(url, f_path)
    urllib.urlcleanup()
    printIfVerbose("Finished.")
    return True
Ejemplo n.º 38
0
 def clean_up(self):
     '''destroy all temporary files created during the render'''
     for tempfile in self.tempfiles:
         os.remove(tempfile)
     urlcleanup()
Ejemplo n.º 39
0
    def _fetchsinglerecnum(self,
                           newname=None,
                           todir='.',
                           fetch_rn=None,
                           extn_filt=''):
        """ Fetch a single file.  This will have an assigned name or
        maybe names, so you might want to rename it.

        Most users won't use this, but will probably want just
        fetch().

        Args are the recnum, optional new filename (the system adds an
        extension) and optional directory.  If no new filename is
        given a name based on the recnum is used, which is just the
        name that the server delivers.
        
        The routine sets the status message and returns None or the
        filename stem (i.e. without the extensions which distinguish
        multiple files).  That is is, there may be multiple files, all
        with this name stem plus various extensions.
        """
        # $$ the logic of some of this may be redundant
        if fetch_rn == None:
            fetch_rn = self.recnum[0]
        if fetch_rn == None:
            self.statmsg = 'No recnum given or obtained by query'
            return None
        if newname == None:
            self.filename = fetch_rn
        else:
            self.filename = newname
        # Is it possibly already in cache?
        if cacheroot != None:  # or else no cache
            fnl = glob.glob(self._getcache() + '/' + fetch_rn + '.*')
            if len(fnl) > 0:  # They seem to be in cache
                for fn in fnl:
                    n, extn = os.path.basename(fn).split('.', 1)
                    if extn_filt == '' or extn_filt == extn:
                        shutil.copy(fn,
                                    todir + '/' + self.filename + '.' + extn)
                self.statmsg = 'File from cache'
                return self.filename  # i.e. either recnum or arg

        # Not in cache, need to get
        urllib.urlcleanup()
        record_set = self.series + '[:#' + fetch_rn + ']'
        message = urllib.urlencode({'rsquery': record_set, 'n': '1'})
        message = urllib.quote(urllib.unquote_plus(message),
                               '&=/')  # urlencode adds not understood +'s
        full_url = 'http://' + netdrmsserver + fetch_url
        try:
            r = urllib.urlretrieve(full_url, data=message)  # file is r[0]
        except:
            self.statmsg = 'No response from server'  # something really odd, not e.g. 404
            return None
        fetchname = _header2fn(r[1])  # filename from mime header

        if fetchname == None:
            self.statmsg = 'Badly formed server header'
            return None
        to_snip = _tarpath(self.series)  # dir in tarfile plus file name
        if fetchname.endswith('.tar'):
            if not tarfile.is_tarfile(r[0]):
                self.statmsg = 'Tar file from server unreadable'
                return None
            tf = tarfile.open(r[0])
            res = False
            for fn in tf.getnames():
                if fn.startswith(to_snip):  # ignore other content
                    tf.extract(fn)
                    rn, extn = fn[len(to_snip):].split('.', 1)
                    if rn != fetch_rn:
                        self.statmsg = 'Wrong data files in tar file'
                        return None
                    if extn_filt == '' or extn_filt == extn:
                        self._placedata(fn, fetch_rn, todir, self.filename,
                                        extn)
                    res = True
            if not res:
                self.statmsg = 'Could not find data files in tar file'
                return None
        else:  # plain file, not a tar file
            to_snip = os.path.basename(to_snip)
            if not fetchname.startswith(to_snip):
                self.statmsg = 'Data file name does not match query'  # name looks v. wrong
                return None
            rn, extn = fetchname[len(to_snip):].split('.', 1)
            if rn != fetch_rn:
                self.statmsg = 'Wrong data file returned'
                return None
            if extn_filt == '' or extn_filt == extn:
                self._placedata(r[0], fetch_rn, todir, self.filename, extn)
        self.statmsg = 'New file from server'
        return self.filename
Ejemplo n.º 40
0
def get_files_from_types(types, base_name, ftp, path, ftp_true=False):
    """
    from https://github.com/ctSkennerton/scriptShed/blob/master/download_ncbi_assembly.py

    Download the genome
    
    input :
        types (list)      : List of NCBI type suffix file
        bas_name (string) : Basename of the genome file
        ftp (ftp object)  : ftp session object to the NCBI FTP Server
        path (string)     : output folder
    return:
        out : path of the downloaded genome
    """
    
    out = str()
    if ftp_true:

        #download
        for t in types:
            f = path+base_name+types[0]
            try:
                urllib.urlretrieve(ftp_true+'/'+base_name+t, f)
            except IOError:
                print('No proteomic file availlable for : {}'.format(base_name))
                return False
                    
            urllib.urlcleanup()
                
            #extract
            with gzip.open(f, 'rb') as f_in:
                out = f.replace('.gz','')
                with open(out, 'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)
                    
            #remove file
            os.remove(f)
            return out
    
    else:
        
        for g in ftp.nlst():
            for t in types:
                
                if g == base_name + t:

                    #download
                    try:
                        urllib.urlretrieve("ftp://ftp.ncbi.nlm.nih.gov/{}/{}".format(ftp.pwd(), g), path+g)
                    except IOError:
                        print('No proteomic file availlable for : {}'.format(g))
                        return False
                    urllib.urlcleanup()
                        
                    #extract
                    with gzip.open(path+g, 'rb') as f_in:
                        out = path+g.replace('.gz','')
                        with open(out, 'wb') as f_out:
                            shutil.copyfileobj(f_in, f_out)
                            
                    #remove file
                    os.remove(path+g)
        return out
Ejemplo n.º 41
0
    def get(self, url, update=False):
        """Looks in the cache if the file is there and takes the cached one.
        Otherwise it is downloaded first.

        Knows how to deal with http:// and svn:// URLs.

        :Return:
          file handler
        """
        # look whether it is compressed
        cext = url.split('.')[-1]
        if cext in ['gz', 'bz2']:
            target_url = url[:-1 * len(cext) - 1]
        else:
            # assume not compressed
            target_url = url
            cext = None

        # turn url into a filename -- mimik what APT does for
        # /var/lib/apt/lists/
        tfilename = '_'.join(target_url.split('/')[2:])

        # if we need to download anyway do not search
        if update:
            cfilename = os.path.join(self.cachedir, tfilename)
        else:
            # look for the uncompressed file anywhere in the cache
            cfilename = None
            for cp in [self.cachedir] + self.ro_cachedirs:
                if os.path.exists(os.path.join(cp, tfilename)):
                    cfilename = os.path.join(cp, tfilename)

        # nothing found?
        if cfilename is None:
            # add cache item
            cfilename = os.path.join(self.cachedir, tfilename)
            update = True

        # if updated needed -- download
        if update:
            #print 'Caching file from %s' % url

            if url.startswith('svn://'):
                # export from SVN
                pysvn.Client().export(url, cfilename)
            if url.startswith('http://'):
                # download
                tempfile, ignored = urllib.urlretrieve(url)

                # decompress
                decompressor = None
                if cext == 'gz':
                    decompressor = 'gzip'
                elif cext == 'bz2':
                    decompressor = 'bzip2'
                elif cext == None:
                    decompressor = None
                else:
                    raise ValueError, \
                          "Don't know how to decompress %s files" \
                          % cext

                if not decompressor is None:
                    if subprocess.call(
                        [decompressor, '-d', '-q', '-f', tempfile]) == 1:
                        raise RuntimeError, \
                              "Something went wrong while decompressing '%s'" \
                              % tempfile

                # move decompressed file into cache
                shutil.move(os.path.splitext(tempfile)[0], cfilename)

                # XXX do we need that if explicit filename is provided?
                urllib.urlcleanup()

        # open cached file
        fh = codecs.open(cfilename, 'r', 'utf-8')

        return fh
def downloadfiles(maxyears):
    # get parameters for and start constructing filenames
    URLroot = "ftp://ftp.ncdc.noaa.gov/pub/data/gsod/"  # base URL for all files
    filesuffix = ".op.gz"  # suffix for all the raw files
    firstyear = 1928  # this is the first year available for any station
    USAFcode = raw_input("Please enter the USAF code for the station you want " \
        "data for (first column of  " \
        "ftp://ftp.ncdc.noaa.gov/pub/data/inventories/ISH-HISTORY.TXT )\n")
    WBANcode = raw_input("Please enter the WBAN code for the station you want " \
        "data for (second column of " \
        "ftp://ftp.ncdc.noaa.gov/pub/data/inventories/ISH-HISTORY.TXT )\n")
    # e.g. Seattle (SEA) is USAF 727930 WBAN 24233
    # Portland, OR is USAF 726980 WBAN 24229
    # LHR is USAF 037720 WBAN 99999
    stationname = raw_input("What would you like to call this station?\n")
    stationcode = str(USAFcode) + '-' + str(WBANcode)

    yearsdownloaded = 0

    for year in range(datetime.datetime.now().year - 1, firstyear, -1):
        # stopping before the current year because it's necessarily incomplete, and
        #        looping back from last year, on the assumption that more recent years
        #        are of greater interest and have higher quality data.
        # First we assemble the URL for the year of interest
        fullURL = (URLroot + str(year) + '/' + stationcode + '-' + str(year) +
                   filesuffix)
        if verbose:
            sys.stdout.write("Trying " + fullURL + " ... ")
            sys.stdout.flush()

        # Now we try to download the file, with very basic error handling if verbose
        try:
            urllib.urlretrieve(fullURL, str(year) + filesuffix)
            if verbose: sys.stdout.write("retrieved ... ")
            yearsdownloaded += 1
        except IOError as e:
            if verbose: print(" ")
            print(e)
        else:  # if we got the file without any errors, then
            # uncompress the file
            f_in = gzip.open(str(year) + filesuffix)
            if verbose: sys.stdout.write("decompressed ... ")
            # and start writing the output
            if yearsdownloaded == 1:
                # since it's the first year, open the file and write the header row
                firstyear = year
                f_out = open(stationname + '.csv', 'w')
                csv.writer(f_out).writerow(["Station", "Year", "Month", "Day", \
                    "MeanTemp", "NTempObs", "DewPoint", "NDewPointObs", \
                    "SeaLevelPressure", "NSeaLevPressObs", "StationPressure", \
                    "NStatPressObs", "Visibility", "NVisibilityObs", "MeanWindSpeed", \
                    "NWindObs", "MaxSustWindSpeed", "MaxWindGust", "MaxTemp",  \
                    "MaxTempSource", "MinTemp", "MinTempSource", "PrecipAmount", \
                    "NPrecipReportHours", "PrecipFlag", "SnowDepth", "Fog", "Rain", \
                    "Snow", "Hail", "Thunder", "Tornado"])
            # This function does the actual ETL
            parsefile(f_in, f_out, stationname)
            # clean up after ourselves
            f_in.close()
            os.remove(str(year) + filesuffix)
        urllib.urlcleanup()
        if yearsdownloaded == maxyears:
            break  # if we have enough years, then end this loop
        else:
            time.sleep(5)  # slow down here to stop the server locking us out
        time.sleep(1)
    print("Successfully downloaded " + str(yearsdownloaded) +
          " years between " + str(year) + " and " + str(firstyear) +
          " for station " + stationname)
    if yearsdownloaded < maxyears:
        # If we didn't get as many years as requested, alert the user
        print(
            "No more years are available at the NOAA website for this station."
        )
    f_out.close()
Ejemplo n.º 43
0
    def setCuts(self,
                ra,
                dec,
                roi,
                triggerTime,
                tstart,
                tstop,
                timetype='MET',
                strict=False):
        self.ra = float(ra)
        self.dec = float(dec)
        self.roi = float(roi)
        self.triggerTime = float(triggerTime)
        self.tstart = float(tstart)
        self.tstop = float(tstop)
        self.timetype = timetype

        #Retrieve the HTML page with the input mask, to get the maximum
        #time available in the server (this is needed for BA analysis)
        temporaryFileName = "__temp_query_result.html"
        try:
            os.remove(temporaryFileName)
        except:
            pass
        pass

        urllib.urlcleanup()
        try:
            urllib.urlretrieve(
                "http://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi",
                temporaryFileName)
        except socket.timeout:
            raise GtBurstException(
                11,
                "Time out when connecting to the server. Check your internet connection, or that you can access http://fermi.gsfc.nasa.gov, then retry"
            )
        except:
            raise GtBurstException(
                1,
                "Problems with the download. Check your connection then retry")
        pass

        htmlFile = open(temporaryFileName)
        maxTimeLimit = ''
        for line in htmlFile.readlines():
            res = re.findall(
                '(.+)The event database currently holds [0-9]+ events, collected between (.+) UTC and (.+) UTC \(Mission Elapsed Time \(MET\) ([0-9]+) to ([0-9]+) seconds\)',
                line)
            if (len(res) != 0):
                #Found
                maxTimeLimit = res[-1][-1]
                break
            pass
        pass
        htmlFile.close()

        os.remove(temporaryFileName)
        if (maxTimeLimit.replace(" ", "") == ''):
            raise GtBurstException(
                12,
                "The LAT data server is probably down for maintenance or loading new data. Check the page http://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi or retry later."
            )
        else:
            maxTimeLimit = float(maxTimeLimit)
        pass

        if (maxTimeLimit < self.tstop):
            if (strict):
                #Fail
                raise GtBurstException(
                    14,
                    "The requested time limit %s is too large. Data are available up to %s."
                    % (self.tstop, maxTimeLimit - 1))
                return maxTimeLimit - 1
            else:
                print(
                    "\n\nWARNING:The requested time limit %s is too large. Data are available up to %s. Will download up to %s.\n\n"
                    % (self.tstop, maxTimeLimit - 1, maxTimeLimit - 1))
                self.tstop = float(maxTimeLimit) - 1
            pass
        pass
Ejemplo n.º 44
0
#[Out]# ParseResult(scheme='', netloc='', path='hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ&q=%22graphs+such+as+call+graphs%22&spell=1&biw=1283&bih=684', params='', query='', fragment='')
import urllib
urllib.unquote(s)
#[Out]# 'https://encrypted.google.com/search?hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ&q="graphs+such+as+call+graphs"&spell=1&biw=1283&bih=684'
urllib.urlparse(urllib.unquote(s))
urlparse.urlparse(urllib.unquote(s))
#[Out]# ParseResult(scheme='https', netloc='encrypted.google.com', path='/search', params='', query='hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ&q="graphs+such+as+call+graphs"&spell=1&biw=1283&bih=684', fragment='')
urllib.urlparse(urllib.unquote(s))
urlparse.urlparse(urllib.unquote(s))
#[Out]# ParseResult(scheme='https', netloc='encrypted.google.com', path='/search', params='', query='hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ&q="graphs+such+as+call+graphs"&spell=1&biw=1283&bih=684', fragment='')
urlparse.urlparse(urllib.unquote(s)).query
#[Out]# 'hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ&q="graphs+such+as+call+graphs"&spell=1&biw=1283&bih=684'
urlparse.urlparse(urlparse.urlparse(urllib.unquote(s)).query)
#[Out]# ParseResult(scheme='', netloc='', path='hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ&q="graphs+such+as+call+graphs"&spell=1&biw=1283&bih=684', params='', query='', fragment='')
#?urllib.urlcleanup
urllib.urlcleanup(s)
#?urllib.splitquery
urllib.splitquery(s)
#[Out]# ('https://encrypted.google.com/search', 'hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ&q=%22graphs+such+as+call+graphs%22&spell=1&biw=1283&bih=684')
url.query
#[Out]# 'hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ&q=%22graphs+such+as+call+graphs%22&spell=1&biw=1283&bih=684'
url.query.split('&')
#[Out]# ['hl=en', 'sa=X', 'ei=v_3tTaPHGsX10gG2_aWZCA', 'ved=0CBoQvwUoAQ', 'q=%22graphs+such+as+call+graphs%22', 'spell=1', 'biw=1283', 'bih=684']
url.query.split('&q=')
#[Out]# ['hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ', '%22graphs+such+as+call+graphs%22&spell=1&biw=1283&bih=684']
url.query.split('&q=')[1]
#[Out]# '%22graphs+such+as+call+graphs%22&spell=1&biw=1283&bih=684'
urlparse.parse_qs(url.query)
#[Out]# {'ei': ['v_3tTaPHGsX10gG2_aWZCA'], 'bih': ['684'], 'spell': ['1'], 'q': ['"graphs such as call graphs"'], 'ved': ['0CBoQvwUoAQ'], 'hl': ['en'], 'biw': ['1283'], 'sa': ['X']}
urlparse.parse_qs(url.query)['q']
#[Out]# ['"graphs such as call graphs"']
def mapSpecies(mousepeptrackfilename):
    RETRY_TIME = 20.0
    mouseTohumanfilepath = os.path.join(os.getcwd(), 'MouseToHuman.tsv')
    print("Extracting Mouse to Human Map data, job starts",
          str(datetime.datetime.now()))
    #increase the field size of CSV
    csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2))
    try:
        urllib.urlretrieve(
            'http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt',
            mouseTohumanfilepath)
        urllib.urlcleanup()
    except:
        print("Can't able to download MouseToHuman.tsv file!!")

    colnameMousHu = [
        'HomoloGene ID', 'Common Organism Name', 'NCBI Taxon ID', 'Symbol',
        'EntrezGene ID', 'Mouse MGI ID', 'HGNC ID', 'OMIM Gene ID',
        'Genetic Location', 'Genomic Coordinates (mouse: , human: )',
        'Nucleotide RefSeq IDs', 'Protein RefSeq IDs', 'SWISS_PROT IDs'
    ]

    mouseHumandata = []
    homologID = []
    with open(mouseTohumanfilepath) as mhtsvfile:
        mhreader = csv.DictReader(mhtsvfile, delimiter='\t')
        for mhrow in mhreader:
            mhtemplist = []
            for i in colnameMousHu:
                mhtempdata = str(mhrow[i]).strip()
                mhtemplist.append(mhtempdata)
            if len(mhtemplist[-1].strip()) > 0:
                homologID.append(mhtemplist[0])
                mouseHumandata.append(mhtemplist)
    homologID = list(set(homologID))
    homologID.sort()

    mousehumandic = {}
    for homologidItem in homologID:
        tempHumanHomoUniID = ''
        tempMouseHomoUniID = ''
        for item in mouseHumandata:
            if homologidItem == item[0]:
                if 'mouse' in item[1].strip().lower():
                    tempMouseHomoUniID = item[-1].strip()
                else:
                    tempHumanHomoUniID = item[-1].strip()
        if len(tempMouseHomoUniID.strip()) > 0 and len(
                tempHumanHomoUniID.strip()) > 0 and tempHumanHomoUniID.strip(
                ).upper() != 'NA':
            mousehumandic[tempMouseHomoUniID] = tempHumanHomoUniID

    colname=['UniProtKB Accession','Protein','Gene','Organism','Peptide Sequence','Summary Concentration Range Data','All Concentration Range Data','All Concentration Range Data-Sample LLOQ Based','Peptide ID',\
    'Special Residues','Molecular Weight','GRAVY Score','Transitions','Retention Time','Analytical inofrmation',\
    'Gradients','AAA Concentration','CZE Purity','Panel','Knockout','LLOQ','ULOQ','Sample LLOQ','Protocol','Trypsin','QC. Conc. Data','Human UniProtKB Accession']

    finalresult = []
    finalresult.append(colname)
    humanUniprotID = []
    with open(mousepeptrackfilename) as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            templist = []
            for i in colname[:-1]:
                tempdata = str(row[i]).strip()
                templist.append(tempdata)
            if len(str(templist[0]).strip()) > 0:
                if templist[0].split('-')[0] in mousehumandic:
                    humanUniprotID.append(
                        mousehumandic[templist[0].split('-')[0]])
                    templist.append(mousehumandic[templist[0].split('-')[0]])
                else:
                    templist.append('NA')

            finalresult.append(templist)

    with open(mousepeptrackfilename, 'wb') as pf:
        pwriter = csv.writer(pf, delimiter='\t')
        pwriter.writerows(finalresult)

    disGenDataDicName = disGenData()
    #disGenDataDicName='disGen.obj'
    disGenDataDic = cPickle.load(open(disGenDataDicName, 'rb'))
    unqhumanUniprotID = list(set(humanUniprotID))
    humanUniprotfuncinfodic = {}
    countProt = 0
    for subcode in unqhumanUniprotID:
        time.sleep(2)
        drugbanklist = []
        PN = 'NA'
        GN = 'NA'
        OG = 'NA'
        OGID = 'NA'
        dislist = []
        unidislist = []
        unidisURLlist = []
        disgendislist = []
        disgendisURLlist = []
        GoIDList = []
        GoNamList = []
        GoTermList = []
        GOinfo = []
        try:
            countProt += 1
            if countProt % 1000 == 0:
                print str(
                    countProt
                ), "th protein Protein Name, Gene, Organism Name,drug bank data,disease data job starts", str(
                    datetime.datetime.now())

            SGrequestURL = "https://www.uniprot.org/uniprot/" + str(
                subcode) + ".xml"
            SGunifile = urllib.urlopen(SGrequestURL)
            SGunidata = SGunifile.read()
            SGunifile.close()

            try:
                SGunidata = minidom.parseString(SGunidata)
                try:
                    drugdata = (SGunidata.getElementsByTagName('dbReference'))
                    for duItem in drugdata:
                        if (duItem.attributes['type'].value
                            ).upper() == 'DRUGBANK':
                            try:
                                drugname = (str(
                                    duItem.getElementsByTagName('property')
                                    [0].attributes['value'].value).strip())
                                drugid = str(
                                    duItem.attributes['id'].value).strip()
                                durl = '<a target="_blank" href="https://www.drugbank.ca/drugs/' + drugid + '">' + drugname + '</a>'
                                drugbanklist.append(durl)
                            except:
                                pass
                        if (duItem.attributes['type'].value
                            ).strip() == 'NCBI Taxonomy':
                            try:
                                OGID = str(
                                    duItem.attributes['id'].value).strip()
                            except:
                                pass
                except IndexError:
                    pass

                try:
                    godata = (SGunidata.getElementsByTagName('dbReference'))
                    for gItem in godata:
                        if (gItem.attributes['type'].value).upper() == 'GO':
                            try:
                                gonamedetails = (str(
                                    gItem.getElementsByTagName('property')
                                    [0].attributes['value'].value).strip()
                                                 ).split(':')[1]
                                gotermdetails = (str(
                                    gItem.getElementsByTagName('property')
                                    [0].attributes['value'].value).strip()
                                                 ).split(':')[0]
                                GoNamList.append(gonamedetails)
                                goid = str(
                                    gItem.attributes['id'].value).strip()
                                GoIDList.append(goid)
                                tempGoTerm = None

                                if gotermdetails.lower() == 'p':
                                    tempGoTerm = 'Biological Process'
                                if gotermdetails.lower() == 'f':
                                    tempGoTerm = 'Molecular Function'
                                if gotermdetails.lower() == 'c':
                                    tempGoTerm = 'Cellular Component'
                                GoTermList.append(tempGoTerm)
                                tempGOData = gonamedetails + ';' + goid + ';' + tempGoTerm
                                GOinfo.append(tempGOData)
                            except:
                                pass

                        if (gItem.attributes['type'].value
                            ).strip() == 'NCBI Taxonomy':
                            try:
                                OGID = str(
                                    gItem.attributes['id'].value).strip()
                            except:
                                pass
                except IndexError:
                    pass

                try:
                    try:
                        PN = (((SGunidata.getElementsByTagName('protein')[0]
                                ).getElementsByTagName('recommendedName')[0]
                               ).getElementsByTagName('fullName')[0]
                              ).firstChild.nodeValue

                    except:
                        PN = (((SGunidata.getElementsByTagName('protein')[0]
                                ).getElementsByTagName('submittedName')[0]
                               ).getElementsByTagName('fullName')[0]
                              ).firstChild.nodeValue

                except IndexError:
                    pass

                try:
                    try:
                        GN = ((
                            SGunidata.getElementsByTagName('gene')[0]
                        ).getElementsByTagName('name')[0]).firstChild.nodeValue
                    except:
                        GN = 'NA'
                except IndexError:
                    pass

                try:
                    try:
                        OG = ((
                            SGunidata.getElementsByTagName('organism')[0]
                        ).getElementsByTagName('name')[0]).firstChild.nodeValue
                    except:
                        OG = 'NA'
                except IndexError:
                    pass

                try:
                    disdata = SGunidata.getElementsByTagName('disease')
                    for dItem in disdata:
                        disname = ''
                        disshort = ''
                        disURL = ''
                        disID = ''
                        try:
                            disname = (dItem.getElementsByTagName('name')[0]
                                       ).firstChild.nodeValue
                            disID = (dItem.attributes['id'].value).upper()
                        except:
                            pass
                        try:
                            disshort = (dItem.getElementsByTagName('acronym')
                                        [0]).firstChild.nodeValue
                        except:
                            pass
                        if len(disname.strip()) > 0:
                            disURL = '<a target="_blank" href="https://www.uniprot.org/diseases/' + disID + '">' + str(
                                disname.strip()) + '(' + str(
                                    disshort) + ')' + '</a>'
                            dislist.append(
                                str(disname.strip()) + '(' + str(disshort) +
                                ')')
                            unidislist.append(
                                str(disname.strip()) + '(' + str(disshort) +
                                ')')
                            unidisURLlist.append(disURL)
                except IndexError:
                    pass

            except ExpatError:
                pass
        except IOError:
            pass
        drugbankdata = 'NA'
        disdata = 'NA'
        uniDisData = 'NA'
        uniDisURLData = 'NA'
        disgenDisData = 'NA'
        disgenDisURLData = 'NA'
        goiddata = 'NA'
        gonamedata = 'NA'
        gotermdata = 'NA'
        goData = 'NA'
        if GN != 'NA' and GN in disGenDataDic:
            disgendislist = disGenDataDic[GN][0]
            disgendisURLlist = disGenDataDic[GN][1]
            if len(dislist) > 0:
                dislist = dislist + disGenDataDic[GN][0]
            else:
                dislist = disGenDataDic[GN][0]

        if len(GoIDList) > 0:
            goiddata = '|'.join(list(set(GoIDList)))
        if len(GoNamList) > 0:
            gonamedata = '|'.join(list(set(GoNamList)))
        if len(GoTermList) > 0:
            gotermdata = '|'.join(list(set(GoTermList)))
        if len(GOinfo) > 0:
            goData = '|'.join(list(set(GOinfo)))
        if len(drugbanklist) > 0:
            drugbankdata = '|'.join(list(set(drugbanklist)))
        if len(dislist) > 0:
            disdata = '|'.join(list(set(dislist)))
        if len(unidislist) > 0:
            uniDisData = '|'.join(list(set(unidislist)))
        if len(unidisURLlist) > 0:
            uniDisURLData = '|'.join(list(set(unidisURLlist)))
        if len(disgendislist) > 0:
            disgenDisData = '|'.join(list(set(disgendislist)))
        if len(disgendisURLlist) > 0:
            disgenDisURLData = '|'.join(list(set(disgendisURLlist)))
        humanUniprotfuncinfodic[subcode] = [
            PN, GN, OG, OGID, disdata, uniDisData, uniDisURLData,
            disgenDisData, disgenDisURLData, drugbankdata, goiddata,
            gonamedata, gotermdata, goData
        ]
    hudicfile = 'humanUniprotfuncinfodic.obj'
    hudicf = open(hudicfile, 'wb')
    pickle.dump(humanUniprotfuncinfodic, hudicf, pickle.HIGHEST_PROTOCOL)
    hudicf.close()

    print("Extracting KEGG pathway name, job starts",
          str(datetime.datetime.now()))
    hkeggdictfile = {}
    huniproturl = 'https://www.uniprot.org/uploadlists/'
    hk = KEGG()
    for hkx in range(0, len(unqhumanUniprotID), 2000):
        countProt += hkx + 2000
        if countProt % 2000 == 0:
            print(str(countProt), "th protein kegg job starts",
                  str(datetime.datetime.now()))

        huniprotcodes = ' '.join(unqhumanUniprotID[hkx:hkx + 2000])
        huniprotparams = {
            'from': 'ACC',
            'to': 'KEGG_ID',
            'format': 'tab',
            'query': huniprotcodes
        }

        while True:
            try:
                hkuniprotdata = urllib.urlencode(huniprotparams)
                hkuniprotrequest = urllib2.Request(huniproturl, hkuniprotdata)
                hkuniprotresponse = urllib2.urlopen(hkuniprotrequest)
                for hkuniprotline in hkuniprotresponse:
                    hkudata = hkuniprotline.strip()
                    if not hkudata.startswith("From"):
                        hkuinfo = hkudata.split("\t")
                        if len(hkuinfo[1].strip()):
                            hkegg = hk.get(hkuinfo[1].strip())
                            hkudict_data = hk.parse(hkegg)
                            try:
                                try:
                                    if len(str(hkuinfo[0]).strip()) > 5:
                                        tempkeggData = '|'.join(
                                            '{};{}'.format(key, value)
                                            for key, value in
                                            hkudict_data['PATHWAY'].items())
                                        hkeggdictfile[hkuinfo[0].strip()] = [
                                            hkudict_data['PATHWAY'].values(),
                                            tempkeggData
                                        ]
                                except TypeError:
                                    pass
                            except KeyError:
                                pass
                break
            except urllib2.HTTPError:
                time.sleep(RETRY_TIME)
                print(
                    'Hey, I am trying again until succeeds to get data from KEGG!',
                    str(datetime.datetime.now()))
                pass

    hkdicfile = 'humankeggdic.obj'
    hkdicf = open(hkdicfile, 'wb')
    pickle.dump(hkeggdictfile, hkdicf, pickle.HIGHEST_PROTOCOL)
    hkdicf.close()
Ejemplo n.º 46
0
def fill_standard_stations():
    """get the standard station list stuff"""
    GPS_dict = {}
    # If the user has a position file in the current directory, use that file
    # and only that file
    test_file = ".gps_pos_default.snx"
    if(os.path.isfile(test_file)):
        warnings.warn("Using GPS receiver positions only from user file '%s'"%test_file)
        try:
            GPS_dict = fill_GPS_station_dict(GPS_dict, sys_file)
        except:
            pass
        return GPS_dict
    #Otherwise, get the standard files
    month = 86400 * 30
    try:
        now_time = time.time()   # in seconds
        # find the system list
        py_path = os.environ["PYTHONPATH"]
        sys_file = None
        for d in py_path.split(os.pathsep):
            test_path = d + "/../libdata/JMA/gps_pos_default.snx"
            if(os.path.isfile(test_path)):
                sys_file = test_path
                break
            test_path = d + "/../../libdata/JMA/gps_pos_default.snx"
            if(os.path.isfile(test_path)):
                sys_file = test_path
                break
        sys_time = 0
        if(sys_file):
            sys_time = os.path.getmtime(sys_file)
            if(now_time - sys_time > 6 * month):
                warnings.warn("System default GPS station file %s is getting old.\nContact your system administrator."%sys_file)
        else:
            warnings.warn("Cannot find default GPS station file %s.\nContact your system administrator."%sys_file)
        user_file = os.environ['HOME'] + "/.ParselTongue/GPS_station_list.txt"
        user_time = 0
        if(os.path.isfile(user_file)):
            user_time = os.path.getmtime(user_file)
        need_new = 0
        write_user = 0
        if(sys_time > 0):  # Read in the system file, if available
            if(user_time > sys_time): # Read system first, then personal file
                GPS_dict = fill_GPS_station_dict(GPS_dict, sys_file)
                GPS_dict = _read_GPS_station_list(GPS_dict, user_file)
            else:
                write_user = 1
                if(user_time > 0):  
                    GPS_dict = _read_GPS_station_list(GPS_dict, user_file)
                GPS_dict = fill_GPS_station_dict(GPS_dict, sys_file)
        else: # Try just the user's personal file
            need_new = 1
            write_user = 1
            if(user_time > 0):  
                GPS_dict = _read_GPS_station_list(GPS_dict, user_file)
        if((now_time - user_time > 3 * month)
           and (now_time - sys_time > 3 * month)):
            need_new = 1
            write_user = 1
        if(need_new):
            # try getting a new file from the web
            try:
                try:
                    temp_file = tempfile.NamedTemporaryFile()
                    webfile = "ftp://igscb.jpl.nasa.gov/pub/station/general/igs_with_former.snx"
                    try:
                        print "Downloading %s"%webfile
                        urllib.urlretrieve(webfile, temp_file.name)
                        urllib.urlcleanup()
                        GPS_dict = fill_GPS_station_dict(GPS_dict, temp_file.name)
                    except IOError:
                            warnings.warn("Could not download new GPS stations list")
                finally:
                    temp_file.close()
            except:
                pass
        if(write_user):
            _write_GPS_station_list(GPS_dict, user_file)
    except:
        pass
    return GPS_dict
Ejemplo n.º 47
0
def getFileSizeOnServer(url):
    d = urllib.urlopen(url)
    size = int(d.info()['Content-Length'])
    urllib.urlcleanup()
    return size
Ejemplo n.º 48
0
 #print(token+year+month+day+hour+minStore)
 for token in tokens:
     if (filename_video.startswith(token + year + month + day + hour +
                                   minStore)
             and filename_video.endswith(".avi")):
         non = True
         for name in listFileName:
             if (filename_video == name):
                 non = False
         if (non == True):
             listFileName.append(filename_video)
             # if(previousVideo_name):
             # 	file = open("video/"+previousVideo_name, 'rb')
             # 	print("previous size : "+file)
             print(filename_video)
             urlcleanup()
             urlretrieve(
                 "ftp://" + ftpUser + ":" + ftpPass + "@" + ftpAddr +
                 "/ipcam/" + year + "" + month + "" + day + "/" + hour +
                 "00/" + str(filename_video),
                 "video/" + str(filename_video))
             previousVideo = "/ipcam/" + year + "" + month + "" + day + "/" + hour + "00/" + str(
                 filename_video)
             previousVideo_name = str(filename_video)
             f = open('downloadList', 'a+')
             f.write(filename_video + '\n')
             f.close
             time.sleep(1)
             print("Download complete")
         if (minStore != minute):
             minStore = minute
Ejemplo n.º 49
0
    def get_poster(self, item):
        """Returns file path to the new poster"""

        from movie import Progress, Retriever

        file_to_copy = tempfile.mktemp(suffix=self.widgets['movie']['number'].get_text(), \
            dir=self.locations['temp'])
        file_to_copy += ".jpg"
        canceled = False
        try:
            progress = Progress(self.widgets['window'], _("Fetching poster"), _("Wait a moment"))
            retriever = Retriever(item.LargeImage.URL, self.widgets['window'], progress, file_to_copy)
            retriever.start()
            while retriever.isAlive():
                progress.pulse()
                if progress.status:
                    canceled = True
                while gtk.events_pending():
                    gtk.main_iteration()
            progress.close()
            urlcleanup()
        except:
            canceled = True
            gutils.warning(_("Sorry. A connection error has occurred."))
            try:
                os.remove(file_to_copy)
            except:
                log.error("no permission for %s" % file_to_copy)

        if not canceled:
            if os.path.isfile(file_to_copy):
                im = None
                try:
                    im = Image.open(file_to_copy)
                except IOError:
                    log.warn("failed to identify %s" % file_to_copy)

                if im and im.size == (1, 1):
                    url = FancyURLopener().open("http://www.amazon.com/gp/product/images/%s" % item.ASIN).read()
                    if url.find('no-img-sm._V47056216_.gif') > 0:
                        log.warn('No image available')
                        gutils.warning(_("Sorry. This movie is listed but has no poster available at Amazon.com."))
                        return False
                    url = gutils.after(url, 'id="imageViewerDiv"><img src="')
                    url = gutils.before(url, '" id="prodImage"')
                    urlretrieve(url, file_to_copy)
                    try:
                        im = Image.open(file_to_copy)
                    except IOError:
                        log.warn("failed to identify %s", file_to_copy)

                if not im:
                    # something wrong with the image, give some feedback to the user
                    log.warn('No image available')
                    gutils.warning(_("Sorry. This movie is listed but has no poster available at Amazon.com."))
                    return False

                if im.mode != 'RGB': # convert GIFs
                    im = im.convert('RGB')
                    im.save(file_to_copy, 'JPEG')
                # set to None because the file is locked otherwise (os.remove throws an exception)
                im = None

                handler = self.widgets['big_poster'].set_from_file(file_to_copy)

                self.widgets['poster_window'].show()
                self.widgets['poster_window'].move(0, 0)
                if gutils.question(_("Do you want to use this poster instead?"), self.widgets['window']):
                    return file_to_copy
                else:
                    log.info("Reverting to previous poster and deleting new one from disk.")
                    try:
                        os.remove(file_to_copy)
                    except:
                        log.error('cannot remove %s', file_to_copy)

                self.widgets['poster_window'].hide()
            else:
                gutils.warning(_("Sorry. This movie is listed but has no poster available at Amazon.com."))
        else:
            # cleanup temporary files after canceling the download
            if os.path.isfile(file_to_copy):
                try:
                    os.remove(file_to_copy)
                except:
                    log.error('cannot remove %s', file_to_copy)
Ejemplo n.º 50
0
#!/usr/bin/python
#coding=utf-8
#__author__='dahu'
#data=2017-
#图片下载,用urllib库
import urllib

# fi=urllib.urlretrieve('https://ss1.baidu.com/6ONXsjip0QIZ8tyhnq/it/u=3129311788,3946097352&fm=173&s=B7F45B9569C0514BDA20966C0300B0F5&w=620&h=308&img.JPEG',filename="/home/dahu/PycharmProjects/SpiderLearning/urllib_lianxi/pic.JPEG")
fi = urllib.urlretrieve(
    'https://ss1.baidu.com/6ONXsjip0QIZ8tyhnq/it/u=3129311788,3946097352&fm=173&s=B7F45B9569C0514BDA20966C0300B0F5&w=620&h=308&img.JPEG',
    filename="t2.pic.JPEG")  #位置支持相对位置和绝对位置
urllib.urlcleanup()  #清除缓存
Ejemplo n.º 51
0
def main():

    year = ''
    try:
        opts, args = getopt.getopt(sys.argv[1:], "ha:n", ["help", "ayear="])
    except getopt.GetoptError:
        print 'noaa2postgresql.py -a <year>'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'noaa2postgresql.py -i <year>'
            sys.exit()
        elif opt in ("-a", "--ayear"):
            year = arg

    if year == '':
        print 'ERROR: specify an year'
        sys.exit()
    print 'Download data for the ', year
    #quit()

    #nomefile1 = "FRA_31102011.txt"
    percorso_FTP = 'ftp://ftp.ncdc.noaa.gov/pub/data/noaa/isd-lite/'

    # Connect to an existing database
    sys.path.append(os.path.abspath("../"))
    from credenziali import *
    conn = psycopg2.connect(host=ip,
                            dbname=db,
                            user=user,
                            password=pwd,
                            port=port)
    #autocommit
    conn.set_session(autocommit=True)
    cur = conn.cursor()
    cur2 = conn.cursor()

    #leggo il codice delle stazioni presenti sul DB
    # Open a cursor to perform database operations
    query = "SELECT id_station,descr, country FROM {}.stations_p_t;".format(
        schema)
    cur.execute(query)

    while True:
        row = cur.fetchone()
        if row == None:
            break
        cod = row[0]
        url_filename = '%s%s/%s-99999-%s.gz' % (percorso_FTP, year, cod, year)
        filename = '%s-99999-%s' % (cod, year)
        zipname = '%s.gz' % filename

        print url_filename
        print filename
        print zipname
        print '*****************'
        try:
            urllib.urlretrieve(url_filename, zipname)
            urllib.urlcleanup()

            zip_ref = gzip.open(zipname, 'rb')
            file_content = zip_ref.read()
            f1 = open(filename, 'w')
            for line in file_content:
                f1.write(line)
            zip_ref.close()
            f1.close()

            #time.sleep(10)
            f1 = open(filename, 'r')
            print "Reading the file", filename
            #leggo le tre colonne dove nella prima c'e' il 2
            i = 0
            n = 0
            riga_prima = " "
            line = " "
            yyyy = []
            mm = []
            dd = []
            hh = []
            tt = []
            pp = []
            for riga in file(filename):
                line = riga
                #print i
                #print line
                a = line.split()
                #print a
                yyyy.append(a[0])
                mm.append(a[1])
                dd.append(a[2])
                hh.append(a[3])
                ########################################################################################
                #read temperature (scaling factor NOAA = 10.0)
                if (a[4] == '-9999'):  # null value NOAA
                    tt.append(99999)  # null value DICCA
                else:
                    tt.append(
                        float(a[4]) / 10.0 + 273.15
                    )  #conversione da Celsius a Kelvin K = °C + 273.15
                ########################################################################################
                #read pressure (scaling factor NOAA = 10.0)
                if (a[6] == '-9999'):  # null value NOAA
                    pp.append(99999)  # null value DICCA
                else:
                    pp.append(float(a[6]) / 10.0)

            #print min(tt)
            #print max(tt)
            #print min(pp)
            #print max(pp)

            # Insert data in the DB
            i = 0
            print "lunghezza file", len(tt)
            while i < len(tt):
                #print id_stazione[i]
                data = '%s/%s/%s %s:00' % (yyyy[i], mm[i], dd[i], hh[i])
                query2 = "INSERT INTO noaa.data_p_t(id_station, time, \"T\", \"P_mare\") VALUES ('%s', '%s',%f , %f);" % (
                    cod, data, tt[i], pp[i])
                #print i,query
                #print i
                try:
                    cur2.execute(query2)
                except:
                    print "violazione chiave primaria", query2
                i += 1

            os.remove(filename)
            os.remove(zipname)
        except:
            print "Non trovato file", filename

    #quit()

    # Make the changes to the database persistent
    #conn.commit()

    # Close communication with the database
    cur.close()
    conn.close()
Ejemplo n.º 52
0
def update(debug=False):

    print("Searching updates at %s..." % (remoteUrl))

    #Download file_list file

    try:

        os.remove("__file_list")

    except:

        pass

    urllib.urlcleanup()

    try:

        urllib.urlretrieve("%s/__file_list" % remoteUrl, "__file_list")

    except socket.timeout:

        raise GtBurstException(
            11,
            "Time out when connecting to %s. Check your internet connection, then retry"
            % (remoteUrl))

    except:

        raise GtBurstException(
            1,
            "Problems with the download. Check your connection, and that you can reach %s"
            % (remoteUrl))

    pass

    #Read the list of files
    f = open('__file_list')
    files = f.readlines()
    f.close()
    os.remove("__file_list")

    #Get the path of the gtburst installation
    path = GtBurst.__file__
    installationPath = os.path.join(
        os.path.sep.join(path.split(os.path.sep)[0:-3]))

    nUpdates = 0
    for ff in files:
        atoms = ff.split()
        pathname = atoms[-1].replace('*', '')
        if (ff.find("__file_list") >= 0):
            if (debug):
                print("Skipping %s..." % (ff))
        else:
            remoteMD5 = atoms[0]
            if (debug):
                print("File %s has remote MD5 checksum %s" %
                      (pathname, remoteMD5))

            #Get the MD5 of the same file in the GtBurst package path
            pathnameThisSys = pathname.replace("/", os.path.sep)
            localPath = os.path.join(installationPath, pathnameThisSys)
            if (not os.path.exists(localPath)):
                print(
                    "File %s does not exist in the current installation. Creating it..."
                    % (localPath))
                #If the new file is in a new directory, the directory needs to be created
                try:
                    os.makedirs(os.path.dirname(localPath))
                except:
                    #This will fail if the directory already exists
                    pass
                downloadFile(pathname, localPath)
                nUpdates += 1
            else:
                #File exists. Check the size
                localMD5 = md5.md5(open(localPath, 'rb').read()).hexdigest()
                if (localMD5 != remoteMD5):
                    print("Updating %s..." % (localPath))
                    downloadFile(pathname, localPath)
                    nUpdates += 1
                else:
                    if (debug):
                        print(
                            "NOT updating %s (local MD5: %s, remote MD5: %s)..."
                            % (localPath, localMD5, remoteMD5))
                    pass
                pass
                if (debug):
                    print("\n\n")
        pass
    pass

    return nUpdates
Ejemplo n.º 53
0
 def __del__(self):
     urlcleanup()
Ejemplo n.º 54
0
 def getFTP(self,what='Extended'):
   #Re-implementing this
   
   #This will complete automatically the form available at
   #https://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi
   #After submitting the form, an html page will inform about
   #the identifier assigned to the query and the time which will be
   #needed to process it. After retrieving the query number,
   #this function will wait for the files to be completed on the server,
   #then it will download them
   
   url                         = "https://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi"
   #Save parameters for the query in a dictionary
   parameters                  = {}
   parameters['coordfield']    = "%s,%s" %(self.ra,self.dec)
   parameters['coordsystem']   = "J2000"
   parameters['shapefield']    = "%s" %(self.roi)
   parameters['timefield']     = "%s,%s" %(self.tstart,self.tstop)
   parameters['timetype']      = "%s" %(self.timetype)
   parameters['energyfield']   = "30,1000000"
   parameters['photonOrExtendedOrNone'] = what
   parameters['destination']   = 'query'
   parameters['spacecraft']    = 'checked'
   
   print("Query parameters:")
   for k,v in parameters.iteritems():
     print("%30s = %s" %(k,v))
   
   #POST encoding    
   postData                    = urllib.urlencode(parameters)
   temporaryFileName           = "__temp_query_result.html"
   try:
     os.remove(temporaryFileName)
   except:
     pass
   pass
      
   urllib.urlcleanup()
   try:
     urllib.urlretrieve(url, 
                      temporaryFileName, 
                      lambda x,y,z:0, postData)
   except socket.timeout:
     raise GtBurstException(11,"Time out when connecting to the server. Check your internet connection, or that you can access https://fermi.gsfc.nasa.gov, then retry")
   except:
     raise GtBurstException(1,"Problems with the download. Check your connection or that you can access https://fermi.gsfc.nasa.gov, then retry.")
   pass
   
   #Now open the file, parse it and get the query ID
   htmlFile                    = open(temporaryFileName)
   lines                       = []
   for line in htmlFile:
     lines.append(line.encode('utf-8'))
   pass
   html                        = " ".join(lines).strip()
   htmlFile.close()
   print("\nAnswer from the LAT data server:\n")
   
   text                        = html2text.html2text(html.encode('utf-8').strip()).split("\n")
   
   if("".join(text).replace(" ","")==""):
     raise GtBurstException(1,"Problems with the download. Empty answer from the LAT server. Normally this means that the server is ingesting new data, please retry in half an hour or so.")
   text                        = filter(lambda x:x.find("[") < 0 and 
                                                 x.find("]") < 0 and 
                                                 x.find("#") < 0 and 
                                                 x.find("* ") < 0 and
                                                 x.find("+") < 0 and
                                                 x.find("Skip navigation")<0,text)
   text                        = filter(lambda x:len(x.replace(" ",""))>1,text)
   print "\n".join(text)
   print("\n\n")
   os.remove(temporaryFileName)
   if(" ".join(text).find("down due to maintenance")>=0):
     raise GtBurstException(12,"LAT Data server looks down due to maintenance.")
   
   parser                      = DivParser("sec-wrapper")
   parser.feed(html)
   
   if(parser.data==[]):
     parser                      = DivParser("right-side")
     parser.feed(html)
   pass
   
   try: 
     estimatedTimeLine           = filter(lambda x:x.find("The estimated time for your query to complete is")==0,parser.data)[0]
     estimatedTimeForTheQuery    = re.findall("The estimated time for your query to complete is ([0-9]+) seconds",estimatedTimeLine)[0]
   except:
     raise GtBurstException(1,"Problems with the download. Empty or wrong answer from the LAT server (see console). Please retry later.")
   pass
   
   try:
   
   	httpAddress                 = filter(lambda x:x.find("http://fermi.gsfc.nasa.gov") >=0,parser.data)[0]
   
   except IndexError:
       
       # Try https
       httpAddress                 = filter(lambda x:x.find("https://fermi.gsfc.nasa.gov") >=0,parser.data)[0]
   
   #Now periodically check if the query is complete
   startTime                   = time.time()
   timeout                     = 1.5*max(5.0,float(estimatedTimeForTheQuery)) #Seconds
   refreshTime                 = 2.0  #Seconds
   #When the query will be completed, the page will contain this string:
   #The state of your query is 2 (Query complete)
   endString                   = "The state of your query is 2 (Query complete)"
   #Url regular expression
   regexpr                     = re.compile("wget (.*.fits)")
   
   #Build the window for the progress
   if(self.parent is None):
     #No graphical output
     root                 = None
   else:
     #make a transient window
     root                 = Toplevel()
     root.transient(self.parent)
     root.grab_set()
     l                    = Label(root,text='Waiting for the server to complete the query (estimated time: %s seconds)...' %(estimatedTimeForTheQuery))
     l.grid(row=0,column=0)
     m1                    = Meter(root, 500,20,'grey','blue',0,None,None,'white',relief='ridge', bd=3)
     m1.grid(row=1,column=0)
     m1.set(0.0,'Waiting...')
   pass
   
   links                       = None
   fakeName                    = "__temp__query__result.html"
   while(time.time() <= startTime+timeout):
     if(root is not None):
       if(estimatedTimeForTheQuery==0):
         m1.set(1)
       else:
         m1.set((time.time()-startTime)/float(max(estimatedTimeForTheQuery,1)))
     sys.stdout.flush()
     #Fetch the html with the results
     try:
       (filename, header)        = urllib.urlretrieve(httpAddress,fakeName)
     except socket.timeout:
       urllib.urlcleanup()
       if(root is not None):
         root.destroy()
       raise GtBurstException(11,"Time out when connecting to the server. Check your internet connection, or that you can access https://fermi.gsfc.nasa.gov, then retry")
     except:
       urllib.urlcleanup()
       if(root is not None):
         root.destroy()
       raise GtBurstException(1,"Problems with the download. Check your connection or that you can access https://fermi.gsfc.nasa.gov, then retry.")
     pass
     
     f                         = open(fakeName)
     html                      = " ".join(f.readlines())
     status                    = re.findall("The state of your query is ([0-9]+)",html)[0]
     #print("Status = %s" % status)
     if(status=='2'):
       #Get the download link
       links                   = regexpr.findall(html)
       break
     f.close()
     os.remove(fakeName)
     urllib.urlcleanup()
     time.sleep(refreshTime)
   pass
   
   if(root is not None):
     root.destroy()
   
   #Download the files
   #if(links is not None):
   #  for link in links:
   #    print("Downloading %s..." %(link))
   #    urllib.urlretrieve(link,link.split("/")[-1])
   #  pass
   #else:
   #  raise RuntimeError("Could not download LAT Standard data")
   #pass    
   remotePath                = "%s/%s/queries/" %(self.dataRepository,self.instrument)
   
   if(links is not None):
     filenames                 = map(lambda x:x.split('/')[-1],links)    
     try:
       self.downloadDirectoryWithFTP(remotePath,filenames=filenames)
     except Exception as e:
       #Try with "wget", if the system has it
       for ff in filenames:
         try:
           self.makeLocalDir()
           dataHandling.runShellCommand("wget %s%s -P %s" %("https://fermi.gsfc.nasa.gov/FTP/fermi/data/lat/queries/",ff,self.localRepository),True)
         except:
           raise e
         pass
       pass
     pass
   else:
     raise GtBurstException(1,"Could not download LAT Standard data")
   pass
   
   #Rename the files to something neater...
   newFilenames              = {}
   for f in filenames:
     #EV or SC?
     suffix                  = f.split("_")[1]
     if(suffix.find("EV")>=0):
       suffix                = 'ft1'
     elif(suffix.find("SC")>=0):
       suffix                = 'ft2'
     else:
       raise GtBurstException(13,"Could not understand the type of a downloaded file (%s)" %(f))
     newfilename             = os.path.join(self.localRepository,"gll_%s_tr_bn%s_v00.fit" %(suffix,self.grbName))
     localPath               = os.path.join(self.localRepository,f)
     
     os.rename(localPath,newfilename)
     newFilenames[suffix]    = newfilename
   pass
   
   ###########################
   if('ft1' in newFilenames.keys() and 'ft2' in newFilenames.keys()):
     dataHandling._makeDatasetsOutOfLATdata(newFilenames['ft1'],newFilenames['ft2'],
                                            self.grbName,self.tstart,self.tstop,
                                            self.ra,self.dec,self.triggerTime,
                                            self.localRepository,
                                            cspecstart=-1000,
                                            cspecstop=1000)
Ejemplo n.º 55
0
def get_html_source(url, path, save_file=True, overwrite=False):
    """ fetch the html source """
    log("Retrieving HTML Source")
    log("Fetching URL: %s" % url)
    error = False
    htmlsource = "null"
    file_name = ""
    if save_file:
        path += ".json"
        tempxml_folder = __cdam__.path_temp_xml()
        if not xbmcvfs.exists(os.path.join(tempxml_folder, '')):
            xbmcvfs.mkdir(os.path.join(tempxml_folder, ''))
        file_name = os.path.join(tempxml_folder, path)

    class AppURLopener(urllib.FancyURLopener):
        version = __cdam__.user_agent()

    urllib._urlopener = AppURLopener()
    for _ in range(0, 4):
        try:
            if save_file:
                if xbmcvfs.exists(file_name):
                    file_mtime = datetime.datetime.fromtimestamp(os.path.getmtime(file_name))
                    file_age = datetime.datetime.today() - file_mtime
                    # yes i know... but this is temporary and will be configurable in a later release
                    if file_age.days > 14:
                        log("Cached file is %s days old, refreshing" % file_age.days)
                        xbmcvfs.delete(file_name)

                if xbmcvfs.exists(file_name) and not overwrite:
                    log("Retrieving local source")
                    sock = open(file_name, "r")
                else:
                    log("Retrieving online source")
                    urllib.urlcleanup()
                    sock = urllib.urlopen(url)
            else:
                urllib.urlcleanup()
                sock = urllib.urlopen(url)
            htmlsource = sock.read()
            if save_file and htmlsource not in ("null", ""):
                if not xbmcvfs.exists(file_name) or overwrite:
                    file(file_name, "w").write(htmlsource)
            sock.close()
            break
        except IOError as e:
            log("error: %s" % e, xbmc.LOGERROR)
            log("e.errno: %s" % e.errno, xbmc.LOGERROR)
            if not e.errno == "socket error":
                log("errno.errorcode: %s" % errno.errorcode[e.errno], xbmc.LOGERROR)
        except Exception as e:
            log("error: %s" % e, xbmc.LOGERROR)
            traceback.print_exc()
            log("!!Unable to open page %s" % url)
            error = True
    if error:
        return "null"
    else:
        log("HTML Source:\n%s" % htmlsource)
        if htmlsource == "":
            htmlsource = "null"
        return htmlsource
def DownloadRequest(section, url, img, LabelName):
    if (LabelName == '') and (_param['title'] is not ''):
        LabelName == _param['title']
    if (LabelName == '') and (_param['showtitle'] is not ''):
        LabelName == _param['showtitle']
    LabelFile = clean_filename(LabelName)
    deb('LabelName', LabelName)
    if (LabelName == ''):
        deb('Download Error', 'Missing Filename String.')
        myNote('Download Error', 'Missing Filename String.')
        return
    if (section == ps('section.wallpaper')):
        FolderDest = xbmc.translatePath(addst("download_folder_wallpapers"))
    elif (section == ps('section.tv')):
        FolderDest = xbmc.translatePath(addst("download_folder_tv"))
    elif (section == ps('section.movie')):
        FolderDest = xbmc.translatePath(addst("download_folder_movies"))
    else:
        FolderDest = xbmc.translatePath(addst("download_folder_movies"))
    if os.path.exists(FolderDest) == False: os.mkdir(FolderDest)
    if os.path.exists(FolderDest):
        if (section == ps('section.tv')) or (section == ps('section.movie')):
            ### param >> url:  /link/show/1466546/
            match = re.search(
                '/.+?/.+?/(.+?)/', url
            )  ## Example: http://www.solarmovie.so/link/show/1052387/ ##
            videoId = match.group(1)
            deb('Solar ID', videoId)
            url = BASE_URL + '/link/play/' + videoId + '/'  ## Example: http://www.solarmovie.so/link/play/1052387/ ##
            html = net.http_GET(url).content
            match = re.search('<iframe.+?src="(.+?)"', html,
                              re.IGNORECASE | re.MULTILINE | re.DOTALL)
            link = match.group(1)
            link = link.replace('/embed/', '/file/')
            deb('hoster link', link)
            try:
                stream_url = urlresolver.HostedMediaFile(link).resolve()
            except:
                stream_url = ''
            ext = Download_PrepExt(stream_url, '.flv')
        else:
            stream_url = url
            ext = Download_PrepExt(stream_url, '.jpg')
        t = 1
        c = 1
        if os.path.isfile(
                xbmc.translatePath(os.path.join(FolderDest, LabelFile + ext))):
            t = LabelFile
            while t == LabelFile:
                if os.path.isfile(
                        xbmc.translatePath(
                            os.path.join(FolderDest, LabelFile + '[' + str(c) +
                                         ']' + ext))) == False:
                    LabelFile = LabelFile + '[' + str(c) + ']'
                c = c + 1
        start_time = time.time()
        deb('start_time', str(start_time))
        download_method = addst(
            'download_method')  ### 'Progress|ProgressBG|Hidden'
        urllib.urlcleanup()
        if (download_method == 'Progress'):
            dp = xbmcgui.DialogProgress()
            dialogType = 12  ## For Frodo and earlier.
            dp.create('Downloading', LabelFile + ext)
            urllib.urlretrieve(stream_url,
                               xbmc.translatePath(
                                   os.path.join(FolderDest, LabelFile + ext)),
                               lambda nb, bs, fs: DownloadStatus(
                                   nb, bs, fs, dp, download_method, start_time,
                                   section, url, img, LabelName, ext, LabelFile
                               ))  #urllib.urlretrieve(url, localfilewithpath)
            myNote('Download Complete', LabelFile + ext, 15000)
        elif (download_method == 'ProgressBG'):
            dp = xbmcgui.DialogProgressBG()
            dialogType = 13  ## Only works on daily build of XBMC.
            dp.create('Downloading', LabelFile + ext)
            urllib.urlretrieve(stream_url,
                               xbmc.translatePath(
                                   os.path.join(FolderDest, LabelFile + ext)),
                               lambda nb, bs, fs: DownloadStatus(
                                   nb, bs, fs, dp, download_method, start_time,
                                   section, url, img, LabelName, ext, LabelFile
                               ))  #urllib.urlretrieve(url, localfilewithpath)
            myNote('Download Complete', LabelFile + ext, 15000)
        elif (download_method == 'Test'):
            dp = xbmcgui.DialogProgress()
            myNote('Download Started', LabelFile + ext, 15000)
            urllib.urlretrieve(stream_url,
                               xbmc.translatePath(
                                   os.path.join(FolderDest, LabelFile + ext)),
                               lambda nb, bs, fs: DownloadStatus(
                                   nb, bs, fs, dp, download_method, start_time,
                                   section, url, img, LabelName, ext, LabelFile
                               ))  #urllib.urlretrieve(url, localfilewithpath)
            myNote('Download Complete', LabelFile + ext, 15000)
        elif (download_method == 'Hidden'):
            dp = xbmcgui.DialogProgress()
            myNote('Download Started', LabelFile + ext, 15000)
            urllib.urlretrieve(stream_url,
                               xbmc.translatePath(
                                   os.path.join(FolderDest, LabelFile + ext)),
                               lambda nb, bs, fs: DownloadStatus(
                                   nb, bs, fs, dp, download_method, start_time,
                                   section, url, img, LabelName, ext, LabelFile
                               ))  #urllib.urlretrieve(url, localfilewithpath)
            myNote('Download Complete', LabelFile + ext, 15000)
        elif (download_method == 'jDownloader (StreamURL)'):
            myNote('Download', 'sending to jDownloader plugin', 15000)
            xbmc.executebuiltin(
                "XBMC.RunPlugin(plugin://plugin.program.jdownloader/?action=addlink&url=%s)"
                % stream_url)
            #return
        elif (download_method == 'jDownloader (Link)'):
            myNote('Download', 'sending to jDownloader plugin', 15000)
            xbmc.executebuiltin(
                "XBMC.RunPlugin(plugin://plugin.program.jdownloader/?action=addlink&url=%s)"
                % link)
            #return
        else:
            deb('Download Error', 'Incorrect download method.')
            myNote('Download Error', 'Incorrect download method.')
            return
        ##
        ##urllib.urlretrieve(stream_url, xbmc.translatePath(os.path.join(FolderDest,LabelFile+ext)), lambda nb, bs, fs: DownloadStatus(nb, bs, fs, dp, download_method, start_time, section, url, img, LabelName, ext, LabelFile)) #urllib.urlretrieve(url, localfilewithpath)
        ##
        #myNote('Download Complete',LabelFile+ext,15000)
        ##
        #### xbmc.translatePath(os.path.join(FolderDest,localfilewithpath+ext))
        _addon.resolve_url(url)
        _addon.resolve_url(stream_url)
        #
        #
    else:
        deb('Download Error', 'Unable to create destination path.')
        myNote('Download Error', 'Unable to create destination path.')
        return
Ejemplo n.º 57
0
# Python version used : - Python 3.6.1+

# import all the library used
import re, urllib, os, sys

# determine python version
version = sys.version_info[0]

# set user_input and import modules for correct version of python
if version == 2:  # python 2.x
    user_input = raw_input
    import urllib2
    urlopen = urllib2.urlopen  # open a url
    encode = urllib.urlencode  # encode a search line
    retrieve = urllib.urlretrieve  # retrieve url info
    cleanup = urllib.urlcleanup()  # cleanup url cache

else:  # python 3.x
    user_input = input
    import urllib.request
    import urllib.parse
    urlopen = urllib.request.urlopen
    encode = urllib.parse.urlencode
    retrieve = urllib.request.urlretrieve
    cleanup = urllib.request.urlcleanup()


# clear the terminal screen
def screen_clear():
    if os.name == 'nt':
        os.system('cls')
Ejemplo n.º 58
0
def download_tile(tile, url, pid, srtmv3, one, username, password):
    output = tile + '.r.in.srtm.tmp.' + str(pid)
    if srtmv3:
        if one:
            local_tile = str(tile) + '.SRTMGL1.hgt.zip'
        else:
            local_tile = str(tile) + '.SRTMGL3.hgt.zip'
    else:
        local_tile = str(tile) + '.hgt.zip'

    urllib.urlcleanup()

    if srtmv3:
        remote_tile = str(url) + local_tile
        goturl = 1

        try:
            password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
            password_manager.add_password(None, "https://urs.earthdata.nasa.gov", username, password)

            cookie_jar = CookieJar()

            opener = urllib2.build_opener(
                        urllib2.HTTPBasicAuthHandler(password_manager),
                        #urllib2.HTTPHandler(debuglevel=1),    # Uncomment these two lines to see
                        #urllib2.HTTPSHandler(debuglevel=1),   # details of the requests/responses
                        urllib2.HTTPCookieProcessor(cookie_jar))
            urllib2.install_opener(opener)

            request = urllib2.Request(remote_tile)
            response = urllib2.urlopen(request)

            fo = open(local_tile, 'w+b')
            fo.write(response.read())
            fo.close
            time.sleep(0.5)
        except:
            goturl = 0
            pass
        
        return goturl
        
    # SRTM subdirs: Africa, Australia, Eurasia, Islands, North_America, South_America
    for srtmdir in ('Africa', 'Australia', 'Eurasia', 'Islands', 'North_America', 'South_America'):
        remote_tile = str(url) + str(srtmdir) + '/' + local_tile
        goturl = 1
    
        try:
            response = urllib2.urlopen(request)
            fo = open(local_tile, 'w+b')
            fo.write(response.read())
            fo.close
            time.sleep(0.5)
            # does not work:
            #urllib.urlretrieve(remote_tile, local_tile, data = None)
        except:
            goturl = 0
            pass
        
        if goturl == 1:
            return 1

    return 0
Ejemplo n.º 59
0
                res_data = urllib2.urlopen(requrl + '?' + test_data_encode)
                break
            except urllib2.URLError, e:
                print e
                time.sleep(20)
                continue
        res = res_data.read()
        mq_str = res.split('\n')
        print mq_str[0]
        ret_str = mq_str[0].rstrip('\r')
        if (ret_str == "UCMQ_HTTP_OK"):
            break
        else:
            time.sleep(1)
        res_data.close()
    urllib.urlcleanup()
    return mq_str[1]


def get_a_rabbitmq_channel():
    credentials = pika.PlainCredentials("guest", "guest")
    conn_params = pika.ConnectionParameters(host="localhost",
                                            port=5672,
                                            credentials=credentials)
    # conn_params = pika.ConnectionParameters(host= "192.168.7.19",port=5672, credentials=credentials)
    conn_broker = pika.BlockingConnection(conn_params)
    channel = conn_broker.channel()
    channel.exchange_declare(exchange="amq",
                             exchange_type="direct",
                             passive=False,
                             durable=True,
Ejemplo n.º 60
0
    def authenticate(self, request):
        user = request.user or None
        auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
        # Get access token
        verifier = request.GET.get('oauth_verifier')
        auth.set_request_token(request.session['request_token.key'],
                               request.session['request_token.secret'])
        del request.session['request_token.secret']
        del request.session['request_token.key']
        auth.get_access_token(verifier)
        # Construct the API instance
        api = tweepy.API(auth)

        if user.is_anonymous():
            try:
                twitter_user = TwitterUserProfile.objects.get(
                    screen_name=api.me().screen_name)
                return twitter_user.user
            except TwitterUserProfile.DoesNotExist:
                userProfile = UserProfile.objects.create(
                    username=api.me().screen_name)
                userProfile.first_name = api.me().screen_name
                userProfile.location = api.me().location
                userProfile.save()

                img = urllib.urlretrieve(api.me().profile_image_url)
                userProfile.profile_picture.save("Twitter-profile.jpg",
                                                 File(open(img[0])))
                urllib.urlcleanup()

                userProfile.twitter_link = 'https://twitter.com/' + api.me(
                ).screen_name
                from django.contrib.auth.hashers import make_password
                raw_pass = ''.join(
                    random.choice(string.ascii_uppercase + string.digits)
                    for x in range(12))
                tmp_pass = make_password(raw_pass)
                userProfile.password = tmp_pass
                userProfile.save()

                twitter_user = TwitterUserProfile.objects.create(
                    user=userProfile)
                twitter_user.screen_name = api.me().screen_name
                twitter_user.profile_image_url = userProfile.profile_picture
                twitter_user.location = api.me().location
                twitter_user.url = 'https://twitter.com/' + api.me(
                ).screen_name
                twitter_user.access_token = auth.access_token.key
                twitter_user.access_secret = auth.access_token.secret
                twitter_user.save()
                return userProfile
        else:
            try:
                user_twitter = TwitterUserProfile.objects.get(user=user)
                if user_twitter.screen_name == api.me().screen_name:
                    return user_twitter.user
                else:
                    request.session['access_token'] = auth.access_token.key
                    request.session['access_secret'] = auth.access_token.secret
                    next = request.session['next'] or ""
                    if next:
                        del request.session['next']
                        return HttpResponseRedirect(next)
                    else:
                        return HttpResponseRedirect(reverse('sync_twitter'))

            except TwitterUserProfile.DoesNotExist:
                try:
                    user_twitter = TwitterUserProfile.objects.get(
                        screen_name=api.me().screen_name)
                    request.session['access_token'] = auth.access_token.key
                    request.session['access_secret'] = auth.access_token.secret
                    next = request.session['next'] or ""
                    if next:
                        del request.session['next']
                        return HttpResponseRedirect(next)
                    else:
                        return HttpResponseRedirect(reverse('sync_twitter'))
                except TwitterUserProfile.DoesNotExist:
                    twitter_user = TwitterUserProfile.objects.create(
                        user=UserProfile.objects.get(username=user.username))
                    twitter_user.screen_name = api.me().screen_name
                    twitter_user.location = api.me().location
                    twitter_user.url = 'https://twitter.com/' + api.me(
                    ).screen_name
                    twitter_user.access_token = auth.access_token.key
                    twitter_user.access_secret = auth.access_token.secret
                    twitter_user.save()
                    return twitter_user.user