def getfiles(self, *files): """Download Rebase files.""" for file in self.update(*files): print('copying %s' % file) fn = os.path.basename(file) # filename = os.path.join(Rebase, fn) filename = os.path.join(os.getcwd(), fn) print('to %s' % filename) try: self.retrieve(file, filename) # The following line is a workaround for an urllib bug in # Python 2.7.11 - 2.7.xx (?). It does not seem to work on # Python 3.xx. Try to remove the line in new Python versions. urlcleanup() except IOError as e: print(e) print('This error is probably due to a non-solved ftp bug in ' 'recent Python versions. Please download the emboss ' 'files manually from http://rebase.neb.com/rebase/' 'rebase.f37.html and then run ranacompiler.py. Find ' 'more details in the Restriction manual.') self.close() return self.close() return
def mainProcess(linkList=[]): print 'Process {0} is starting to work!'.format(os.getpid()) st=time.time() p=PAIRSeg() p._PAIRSeg__ResetSQLVariables() log=LogProcessor.LogProcess() fNum=os.path.basename(linkList[0]).split('.')[0] lNum=os.path.basename(linkList[len(linkList)-1]).split('.')[0] numRange=fNum+'-'+lNum for link in linkList: fileName=os.path.basename(link) filePath=p.dirPath+'/'+fileName try: urllib.urlretrieve(link,filePath) #print '[Downloaded .zip File: [{0}]'.format(fileName) urllib.urlcleanup() zipOrNot=zipfile.is_zipfile(filePath) if(zipOrNot==True): p.ExtractTSV(filePath) os.remove(filePath) # save space on sofus elif(zipOrNot==False): os.remove(filePath) log.write(log.logPath_PAIR,fileName+'\t'+link+'\t'+'PAIR\tProcessed') except: print 'ERROR: time out. {fileName}'.format(fileName) log.write(log.logPath_PAIR_Error,fileName+'\t'+link+'\t'+'PAIR\tProcessed') p.writeCSV(numRange) print 'Processed range:{range}'.format(range=numRange) print '[Process {0} is finished. Populated {1} links. Time:{2}]'.format(os.getpid(),len(linkList),time.time()-st)
def image_analyzer(url, string, depth, k): try: example_image = download_image(url) except (HTTPError,IOError), e: urllib.urlcleanup() sys.exit('Unable to download %s.\n' % url + 'Error: %s.\n' % e)
def aursearch(keywords, totalnum): search = "http://aur.archlinux.org/packages.php?O=0&L=0&C=0&K=" + keywords.replace(" ", "+") + "&SeB=nd&SB=n&SO=a&PP=25&do_Search=Go" urllib.urlretrieve(search, "aur.tmp") block = "" num = 0 sect = 0 packages = "" aur = open("aur.tmp") for aurl in aur: if num <= totalnum: if aurl[:20] == " <td class='data1'>" or aurl[:20] == " <td class='data2'>": print "aursearch", aurl if aurl[:20] == " <td class='data1'>" and not block == "data1": sect = 0 block = "data1" elif aurl[:20] == " <td class='data2'>" and not block == "data2": sect = 0 block = "data2" elif aurl[:20] == " <td class='data1'>" or aurl[:20] == " <td class='data2'>": print aurl sect += 1 if sect == 2: pacname = aurl.split("<span class='black'>") pacname = pacname[1].split("</span>") pacname = pacname[0] packages = packages + pacname elif sect == 4: pacdesc = aurl.split("<span class='blue'>") pacdesc = pacdesc[1].split("</span>") pacdesc = pacdesc[0] packages = packages + " - " + pacdesc + "\n" num += 1 aur.close() urllib.urlcleanup() return packages
def download_file(url, destfile): """ download_file: function for download from url to save as destfile @url the source file to download. @destfile the destination save file for local. """ file_url = url try: print("--> Downloading file: %s" % file_url) filename, msg = urllib.urlretrieve( #'http://code.jquery.com/jquery-2.1.1.js', file_url, reporthook = reporthook) print "" #print "File:", filename print "Header:" print msg if os.path.exists(filename): if os.path.exists(destfile): now = currenttime() tmpfile = "%s.%s" % (destfile, now) shutil.move(destfile, tmpfile) shutil.move(filename, destfile) #print 'File exists before cleanup:', os.path.exists(filename) finally: urllib.urlcleanup()
def downloadImage( imageURL, subID ) : """Download images""" # image url image_url = 'http://bbs.sjtu.edu.cn' + imageURL # create the directory to store images # if not os.path.exists( './download' ) : try : os.makedirs( './download/' + subID ) except OSError : pass #print "Failed to create directories" # get filename of image filename = 'download/' + subID + '/' + imageURL.split( '/' )[-1] # clear the cache that may have been built up # by previous calls to urlretrieve() urllib.urlcleanup() # retrieve the image try : urllib.urlretrieve( image_url, filename ) except ContentTooShortError : print "The data available was less than that of expected" print "Downloading file %s was interrupted" \ % os.path.basename( filename ) else : # get the size of file size = os.path.getsize( filename ) / 1024 print ">>>File %s (%s Kb) was done..." % ( filename, size )
def get_wallpgig(self): if not self.check_connection(): return "No internet connection!" tags_from_file = self.filemanage.file_get_tags() query_from_file = self.filemanage.file_get_query() if (set(tags_from_file) == set(self.tags)) & (query_from_file == self.query): self.status.set_label("Loading urls...") urls = self.filemanage.file_get_urls() self.status.set_label("Urls loaded.") else: self.status.set_label("Downloading urls...") urls = self.download_urls() self.status.set_label("Urls downloaded.") random.shuffle(urls) try: url = urls[0] urllib.urlretrieve(url, tfile) urllib.urlcleanup() check = 1 except IndexError: check = "No photos for this tags!" except urllib2.URLError: check = "No internet connection!" return check
def run(self): try: urllib._urlopener = SmartFancyURLopener() urllib.urlretrieve(self.url, self.tmpfile, reporthook=self._hook) urllib.urlcleanup() except Abort: print 'Download Aborted' except:
def populate_sidebar(self, branch = 'master', count = 50): self.commits = self.repo.commits(branch, max_count = count) for commit in self.commits: commit_time = time.strftime("%c", commit.authored_date) parts = commit.message.split('\n') if len(parts) > 1: text = "<b>%s ...</b>" % parts[0] else: text = "<b>%s</b>" % commit.message text += "\n<small>by %s on %s</small>" % (commit.author, commit_time) hashed = hashlib.md5(commit.author.email).hexdigest() image_path = "%s/grav_cache/%s.jpg" % (installdir, hashed) if not os.path.exists(image_path): gravatar_url = "http://www.gravatar.com/avatar.php?" gravatar_url += urllib.urlencode({'gravatar_id':hashed, 'size':str(30)}) urllib.urlretrieve(gravatar_url, image_path) urllib.urlcleanup() image = gtk.gdk.pixbuf_new_from_file(image_path) self.sidebar.add_item(None, [text, image])
def download_version(version): chromium_file = 'chromium-%s.tar.xz' % version path = '%s%s' % (chromium_url, chromium_file) if (args.clean): remove_file_if_exists(chromium_file) # Let's make sure we haven't already downloaded it. if os.path.isfile("./%s" % chromium_file): print "%s already exists!" % chromium_file else: print "Downloading %s" % path # Perhaps look at using python-progressbar at some point? urllib.urlretrieve(path, chromium_file, reporthook=dlProgress) urllib.urlcleanup() print "" if (args.tests): chromium_testdata_file = 'chromium-%s-testdata.tar.xz' % version path = '%s%s' % (chromium_url, chromium_testdata_file) if (args.clean): remove_file_if_exists(chromium_testdata_file) # Let's make sure we haven't already downloaded it. if os.path.isfile("./%s" % chromium_testdata_file): print "%s already exists!" % chromium_testdata_file else: # Perhaps look at using python-progressbar at some point? print "Downloading %s" % path urllib.urlretrieve(path, chromium_testdata_file, reporthook=dlProgress) urllib.urlcleanup() print ""
def oai_listIdentifiers(self, src="www.astros-test.bodleian.ox.ac.uk/sandbox", resumptionToken=None): self.ids_data_file = '/tmp/%s_ids_data_file'%unicode(uuid.uuid4()) src_url = None if resumptionToken: src_url = "%s?verb=ListIdentifiers&resumptionToken=%s"%(src, resumptionToken) else: src_url = "%s?verb=ListIdentifiers&metadataPrefix=oai_dc"%src_ for arg, val in self.args.iteritems(): if val: src_url = "%s&%s=%s"%(src_url, arg, val) if 'args' in src: src_url = "%s&%s"%(src_url,src['args']) tries = 1 while tries < 11: urlretrieve(src_url, self.ids_data_file) if os.path.isfile(self.ids_data_file): self.logger.info("Downloaded identifiers for %s - %s"%(src, src_url)) break self.logger.warn("Error retreiving identifiers for %s - %s (try # %d)"%(src, src_url, tries)) tries += 1 urlcleanup() tree = ET.ElementTree(file=self.ids_data_file) rt = tree.getroot() ids = rt.findall("%(ns)sListIdentifiers/%(ns)sheader/%(ns)sidentifier"%{'ns':self.oai_ns}) for ID in ids: if resumptionToken and 'deletion' in resumptionToken: self.delete_identifiers.append(ID.text) else: self.identifiers.append(ID.text) rtoken = rt.findall("%(ns)sListIdentifiers/%(ns)sresumptionToken"%{'ns':self.oai_ns}) os.remove(self.ids_data_file) if rtoken: self.oai_listIdentifiers(src, resumptionToken=rtoken[0].text)
def dash_R_cleanup(fs, ps, pic): import gc, copy_reg import _strptime, linecache, dircache import urlparse, urllib, urllib2, mimetypes, doctest import struct, filecmp from distutils.dir_util import _path_created # Restore some original values. warnings.filters[:] = fs copy_reg.dispatch_table.clear() copy_reg.dispatch_table.update(ps) sys.path_importer_cache.clear() sys.path_importer_cache.update(pic) # Clear assorted module caches. _path_created.clear() re.purge() _strptime._regex_cache.clear() urlparse.clear_cache() urllib.urlcleanup() urllib2.install_opener(None) dircache.reset() linecache.clearcache() mimetypes._default_mime_types() struct._cache.clear() filecmp._cache.clear() doctest.master = None # Collect cyclic trash. gc.collect()
def download_http_content (uri, params=None): urlcleanup() #proxy = settings.PROXY_SERVER #The proxy must not be used with local address host = urlparse(uri)[1] #manage proxies with authentication (get it from environment) proxy=None for proxy_name in settings.NOT_PROXY_FOR: if host.startswith(proxy_name): proxy = urllib2.ProxyHandler({})#no proxy break if not proxy: #Host is not included in the NOT_PROXY_FOR list => proxy is needed! proxy = urllib2.ProxyHandler()#proxies from environment opener = urllib2.build_opener(proxy) if params: return opener.open(uri,data=urlencode(params)).read() else: return opener.open(uri).read()
def run ( self, site=None,flush_lists=True,flush_rpms=True ): flushdirs = ['rpms','rdfs','lists'] if not flush_lists: flushdirs.remove('lists') if not flush_rpms: flushdirs.remove('rpms') urllib.urlcleanup() if os.path.isdir(self.cachedir): for subdir in flushdirs: dir = os.path.join(self.cachedir, subdir) if site: dir = os.path.join(dir, site) if os.path.isdir(dir): shutil.rmtree(dir) if not site: os.mkdir(dir, 0755) if not site: # # Now let'a recreate the infrastructure: # for subdir in ['file:', 'http:', 'https:', 'ftp:']: for type in ['rpms/','rdfs/']: sd = type + subdir dir = os.path.join(self.cachedir, sd) os.mkdir(dir, 0755)
def _download_video( self ): try: # spam log file LOG( ">>> _download_video(title: %s)" % ( repr( self.g_title ), ), heading=True ) # get filepath and tmp_filepath tmppath, self.filepath = get_legal_filepath( self.g_title, self.params[ "download" ], self.settings[ "play_mode" ], self.settings[ "download_path" ], self.settings[ "use_title" ], self.settings[ "use_trailer" ] ) # only download if the trailer doesn't exist if ( not os.path.isfile( self.filepath.encode( "utf-8" ) ) ): # only need to retrieve video if not in tmp path if ( not os.path.isfile( tmppath.encode( "utf-8" ) ) ): # fetch the video urllib.urlretrieve( self.params[ "download" ], tmppath.encode( "utf-8" ), self._report_hook ) # create the conf file for xbox and copy to final location ok = self._finalize_download( tmppath ) # if the copy failed raise an error if ( not ok ): raise except Exception, e: # oops, notify user what error occurred LOG( str( e ), xbmc.LOGERROR ) # filepath is not always released immediately, we may need to try more than one attempt, sleeping between urllib.urlcleanup() remove_tries = 3 while remove_tries and os.path.isfile( tmppath ): try: os.remove( tmppath.encode( "utf-8" ) ) except: remove_tries -= 1 xbmc.sleep( 1000 ) pDialog.close() self.filepath = ""
def firmware_update(self, args): if not args.file and not args.url: raise Exception("Must provide firmware filename or URL") if args.file: fp = open(args.file, 'r') elif args.url: print "Downloading from", args.url resp = urllib.urlretrieve(args.url) fp = open(resp[0], 'r') urllib.urlcleanup() # We still keep file pointer open if fp.read(8) == '54525a52': print "Converting firmware to binary" fp.seek(0) fp_old = fp fp = tempfile.TemporaryFile() fp.write(binascii.unhexlify(fp_old.read())) fp_old.close() fp.seek(0) if fp.read(4) != 'KPKY': raise Exception("KeepKey firmware header expected") print "Please confirm action on device..." fp.seek(0) return self.client.firmware_update(fp=fp)
def _get_url(self, url, content=None): urllib.urlcleanup() headers = {"Accept": "text/plain"} rest = self.base_url user = self.username passwd = self.password if not rest or not user or not passwd: self.logger.info("Could contact RT, bad or missing args (host: %s user: %s or passwd)", rest, user) return u"" cj = cookielib.LWPCookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) if content is None: data = {'user': user, 'pass': passwd} else: data = {'user': user, 'pass': passwd, 'content': content} #self.log.info("Data to be sent by RT:\n%r", data) ldata = urllib.urlencode(data) uri = rest + url login = urllib2.Request(uri, ldata) response_data = "" try: response = urllib2.urlopen(login) response_data = response.read() self.logger.info("RT Connection successful: %r", response_data) except urllib2.URLError, exc: # could not connect to server self.logger.info("RT Connection failed: %r", exc)
def refresh_games(self): debug.log("Updating games for {}/{}/{}".format(self.month, self.day, self.year)) urllib.urlcleanup() attempts_remaining = 5 while attempts_remaining > 0: try: current_day = self.day self.set_current_date() all_games = mlbgame.day(self.year, self.month, self.day) if self.config.rotation_only_preferred: self.games = self.__filter_list_of_games(all_games, self.config.preferred_teams) else: self.games = all_games if current_day != self.day: self.current_game_index = self.game_index_for_preferred_team() self.games_refresh_time = time.time() break except URLError, e: debug.error("URLError: {}".format(e.reason)) attempts_remaining -= 1 time.sleep(NETWORK_RETRY_SLEEP_TIME) except ValueError: debug.error("ValueError: Failed to refresh list of games") attempts_remaining -= 1 time.sleep(NETWORK_RETRY_SLEEP_TIME)
def fetch_preferred_team_overview(self): if not self.is_offday_for_preferred_team(): urllib.urlcleanup() game = self.games[self.game_index_for_preferred_team()] game_overview = mlbgame.overview(game.game_id) debug.log("Preferred Team's Game Status: {}, {} {}".format(game_overview.status, game_overview.inning_state, game_overview.inning)) return game_overview
def __init__(self): dialog=xbmcgui.Dialog() try: urllib.urlcleanup() # urllib.urlretrieve(URLhome+'autoupdate.txt',ScriptPath+'autoupdate.py') except: dialog.ok("Wake on Lan Installer","Master Server Connection Problems", "Please try again later.") pass #dialog.close() options=['Start Timothys Computer','Wake On Lan 2','Wake On Lan 3','Wake On Lan 4', 'Wake On Lan 5', 'Change Settings','Exit Wake On Lan'] choice=dialog.select("PC Network...",options) if choice==6: xbmcgui.Window.close() if choice==5: settingsmgr.OpenControlPanel(settingsfile) WoL() if choice==4: dialog = xbmcgui.Dialog() confirm=dialog.yesno('Wake On Lan', "Do you want to wake this pc?", "Mac: "+settings['mac5'], "Name: "+settings['mac5name']) if confirm: wake_on_lan(settings['mac5']) WoL() else: WoL() if choice==3: dialog = xbmcgui.Dialog() confirm=dialog.yesno('Wake On Lan', "Do you want to wake this pc?", "Mac: "+settings['mac4'], "Name: "+settings['mac4name']) if confirm: wake_on_lan(settings['mac4']) WoL() else: WoL() if choice==2: dialog = xbmcgui.Dialog() confirm=dialog.yesno('Wake On Lan', "Do you want to wake this pc?", "Mac: "+settings['mac3'], "Name: "+settings['mac3name']) if confirm: wake_on_lan(settings['mac3']) WoL() else: WoL() if choice==1: dialog = xbmcgui.Dialog() confirm=dialog.yesno('Wake On Lan', "Do you want to wake this pc?", "Mac: "+settings['mac2'], "Name: "+settings['mac2name']) if confirm: wake_on_lan(settings['mac2']) WoL() else: WoL() if choice==0: dialog = xbmcgui.Dialog() confirm=dialog.yesno('Wake On Lan', "Do you want to wake this pc?", "Mac: "+settings['mac1'], "Name: "+settings['mac1name']) if confirm: wake_on_lan(settings['mac1']) WoL() else: WoL() dialog.close()
def song_download(): song = user_input('Enter the name of song: ') try: query_string = encode({"search_query" : song}) content = urlopen("http://www.youtube.com/results?" + query_string) if version == 3: ##I hate RE search_results = re.findall(r'href=\"\/watch\?v=(.{11})', content.read().decode()) else: ##ok!! if its not going work! I'm gonna kill you!!! search_results = re.findall(r'href=\"\/watch\?v=(.{11})', content.read()) ##finally(Thanks to git) except: print('Something happened!!') exit(1) # youtube2mp3 API downloadLinkOnly = 'http://www.youtubeinmp3.com/fetch/?video=' + 'http://www.youtube.com/watch?v=' + search_results[0] try: print('Downloading %s' % song) urllib.urlretrieve(downloadLinkOnly, filename='%s.mp3' % song) urllib.urlcleanup() except: print('Error %s' % song) exit(1)
def refresh_seeds(): # Get a new batch of random bits from our friends at Fourmilab try: os.remove(cache_file) except: pass urllib.urlretrieve("http://www.fourmilab.ch/cgi-bin/uncgi/Hotbits?nbytes=240&fmt=hex", cache_file) # We know they're good because they're imported from Switzerland urllib.urlcleanup() # Load the web page source bitsFile = open(cache_file) lines = bitsFile.readlines() bitsFile.close() randomLines = [] # Filter out the good lines for line in lines: if re.match(r'[0123456789ABCDEF]+', line): randomLines.append(line) os.remove(cache_file) outLine = "" # Stuff them into one big line and write it back out for line in randomLines: outLine = outLine + string.strip(line) bitsFile = open(cache_file, "w") bitsFile.write(outLine) bitsFile.close()
def download_http_content(url, user=None): urlcleanup() #proxy = settings.PROXY_SERVER #The proxy must not be used with local address host = urlparse(url)[1] #manage proxies with authentication (get it from environment) proxy = None for proxy_name in settings.NOT_PROXY_FOR: if host.startswith(proxy_name): proxy = urllib2.ProxyHandler({}) # no proxy break if not proxy: #Host is not included in the NOT_PROXY_FOR list => proxy is needed! proxy = urllib2.ProxyHandler() # proxies from environment opener = urllib2.build_opener(proxy) headers = { 'User-Agent': 'Mozilla/5.0 (%(system)s %(machine)s;U) Wirecloud/%(wirecloud_version)s Python-urllib2/%(urllib2_version)s' % VERSIONS, 'Accept': '*/*', 'Accept-Language': 'en-gb,en;q=0.8,*;q=0.7', 'Accept-Charset': 'utf-8;q=1,*;q=0.2', } if user and not user.is_anonymous(): headers.update({ 'Remote-User': user.username, }) request = urllib2.Request(url, None, headers) return opener.open(request).read()
def _setMoviePicture(self, imageUrl): """ 'Private' method that renders the image designated by 'imageUrl'. """ imageFilename = imageUrl.split("/")[-1] imagePath = "cache/" + imageFilename # Create 'cache' folder if it does not exist. if not os.path.exists("./cache/"): os.makedirs("./cache/") try: if not os.path.exists(imagePath): # print "Creating '%s'..." % ( imagePath ) urllib.urlretrieve(imageUrl, imagePath) urllib.urlcleanup() try: # Scaffold image loading. If any exception arises for image # parsing, the 'image' file won't be locked. with open(imagePath, 'rb') as imageFile: image = Image.open(imageFile) self.photoImage = ImageTk.PhotoImage(image) self.picture.create_image(0, 0, image = self.photoImage, anchor = NW) return except IOError: print "Unable to load cache image '%s'." % ( imagePath ) os.remove(imagePath) except IOError: print "Unable to retrieve the movie image." self.clearMoviePicture()
def test03(self): import urllib r=urllib.urlretrieve("http://www.python.org","webpage.html") print r with open("webpage.html") as f: print f.read() urllib.urlcleanup()
def iq_register(self, iq): """ Register to a new VMCast. @type iq: xmpp.Protocol.Iq @param iq: the sender request IQ @rtype: xmpp.Protocol.Iq @return: a ready-to-send IQ containing the results """ reply = iq.buildReply("result") url = iq.getTag("query").getTag("archipel").getAttr("url") try: if not url or url == "": raise Exception("IncorrectStanza", "Stanza must have url: %s" % str(iq)) try: urllib.urlcleanup() f = urllib.urlopen(url) except: raise Exception("The given url doesn't exist. Can't register.") try: self.getFeed(f.read()) except: raise Exception("The given url doesn't contains a valid VMCast feed. Can't register.") self.cursor.execute("INSERT INTO vmcastsources (url) VALUES ('%s')" % url) self.database_connection.commit() self.parseRSS() self.entity.push_change("vmcasting", "register") self.entity.shout("vmcast", "I'm now registred to vmcast %s as asked by %s" % (url, iq.getFrom())) except Exception as ex: reply = build_error_iq(self, ex, iq, ARCHIPEL_ERROR_CODE_VMCASTS_REGISTER) return reply
def showInfo(self): if self.check == "true" and self.menulist: m_title = self["menulist"].getCurrent()[0][0] m_url = self["menulist"].getCurrent()[0][1] if m_url: #m_url = re.findall('(.*?)\.', m_url) #extra_imdb_convert = "._V1_SX320.jpg" #m_url = "http://ia.media-imdb.com/images/%s%s" % (m_url[0], extra_imdb_convert) print "EMC iMDB: Download Poster - %s" % m_url urllib._urlopener = AppURLopener() urllib.urlretrieve(m_url, self.path) urllib.urlcleanup() if os.path.exists(self.path): self.poster_resize(self.path, m_title) #ptr = LoadPixmap(self.path) #if ptr is None: # ptr = LoadPixmap("/usr/lib/enigma2/python/Plugins/Extensions/EnhancedMovieCenter/img/no_poster.png") # print "EMC iMDB: Load default NO Poster." #if ptr is not None: # self["poster"].instance.setPixmap(ptr) # print "EMC iMDB: Load Poster - %s" % m_title else: print "EMC iMDB: No url found for - %s" % m_title else: print "EMC iMDB: No url found for - %s" % m_title
def dowload(url): try: urlretrieve(url, self.image_name(url)) urlcleanup() except: return False return True
def fire_url(url, params): try: urllib.urlcleanup() real_url = url+"?"+urllib.urlencode(params) #print "Firing url="+real_url file = urllib.urlopen(real_url) except: pass
def get_bing_pic() : # bing url url = "http://www.bing.com/" urllib.urlcleanup() args = urllib.urlencode({"setmkt" : COUNTRY}, {"setlang" : "match"}) # open bing url page = urllib.urlopen(url, args) if None == page: print('open %s error' % (url)) return -1 # get html souce code data = page.read() if not data: print ('read %s content error' % url) return -1 page.close() # parse picture url posleft = data.find(b'g_img={url:') if -1 == posleft: print ('jpg url not found') return -1 posright = data.find(b'\'', posleft + 12) if -1 == posright: print ('jpg url not found') return -1 jpgpath = data[posleft + 12 : posright].decode("ascii"); if 0 == cmp('/', jpgpath[0:1]): jpgurl = url + jpgpath else: jpgurl = jpgpath # make local file dir if 0 == cmp('Windows', get_platform()): localpath = TOP_DIR + time.strftime('bing\\%Y\\%m\\') else: localpath = TOP_DIR + time.strftime('bing/%Y/%m/') if not os.path.exists(localpath): os.makedirs(localpath) # make local file path localjpg = localpath + time.strftime('%d.jpg') print ("remote file : %s" % jpgurl) print ("local file : %s" % localjpg) # download jpg file urllib.urlretrieve(jpgurl, localjpg) urllib.urlcleanup() return 0
from win10toast import ToastNotifier from Adafruit_IO import Client, Data aio = Client('') import re, urllib, sys toaster = ToastNotifier() version = sys.version_info[0] if version == 2: user_input = raw_input import urllib2 urlopen = urllib2.urlopen encode = urllib.urlencode retrieve = urllib.urlretrieve cleanup = urllib.urlcleanup() else: user_input = input import urllib.request import urllib.parse urlopen = urllib.request.urlopen encode = urllib.parse.urlencode retrieve = urllib.request.urlretrieve cleanup = urllib.request.urlcleanup() def ming(st): path = st list_ = os.listdir(path) for file_ in list_:
# * genres.list.gz # * ratings.list.gz # # _** Note: The total size of files mentioned above is roughly 30M. Running the following code may take a few minutes._ # In[2]: import gzip # Obtaining IMDB's text files imdb_url_prefix = 'ftp://ftp.funet.fi/pub/mirrors/ftp.imdb.com/pub/' imdb_files_list = ['genres.list.gz', 'ratings.list.gz'] for name in imdb_files_list: if not os.path.exists('./data/' + name): response = urllib.urlretrieve(imdb_url_prefix + name, './data/' + name) urllib.urlcleanup() # urllib fails to download two files from a ftp source. This fixes the bug! with gzip.open('./data/' + name) as comp_file, open('./data/' + name[:-3], 'w') as reg_file: file_content = comp_file.read() reg_file.write(file_content) # ### Step 3: downloading the "IMDB Prepared Data" # During this tutorial, we discuss how the contents of _genres.list.gz_ and _ratings.list.gz_ files can be integrated. However, to make the tutorial more concise, we avoid including the same process for all the files in the "IMDB Plain Text Data". The "IMDB Prepared Data" is the dataset that we obtained by integrating a number of files from the "IMDB Plain Text Data" which we will use during later stages of this tutorial. The following code snippet downloads this dataset. # In[3]: imdb_url = 'https://anaconda.org/BigGorilla/datasets/1/download/imdb_dataset.csv' if not os.path.exists('./data/imdb_dataset.csv'): # avoid downloading if the file exists response = urllib.urlretrieve(kaggle_url, './data/imdb_dataset.csv')
class CninfoSpider(Spider): name = "cninfo" allowed_domains = ["cninfo.com.cn"] start_urls = ["http://www.cninfo.com.cn/cninfo-new/disclosure/szse_main"] allstockjson_url = "http://www.cninfo.com.cn/cninfo-new/js/data/szse_stock.json" stockNumsInAllStockJson = 0 homePage = r"http://www.cninfo.com.cn" socket.setdefaulttimeout(35) logger = logging.getLogger('CninfoCninfoSpiderLogger') jsonSzse_stocks = '' def GetJsonStockIndex(self,response): if('jsonStockIndex='==response.body[0:len('jsonStockIndex=')]): print '==============GetJsonStockIndex:',response.body, response.body[len('jsonStockIndex='):len(response.body)] return int(response.body[len('jsonStockIndex='):len(response.body)]) else: return -1 def generateUrl(self,url,stock,pageNum,jsonStockIndex): return url+'?stock='+stock+'&pageNum='+str(pageNum)+'&jsonStockIndex='+str(jsonStockIndex) def parse(self, response): SzseStockFile = self.downloadAllStockJson(False) queryUrl='http://www.cninfo.com.cn/cninfo-new/announcement/query' jsonStockIndex = self.GetJsonStockIndex(response) if self.stockNumsInAllStockJson == 0: SzseStockFile = self.downloadAllStockJson(True) self.jsonSzse_stocks=json.loads(open(SzseStockFile, 'rb').read()) for jsonSzse_stock in self.jsonSzse_stocks['stockList']: self.stockNumsInAllStockJson=self.stockNumsInAllStockJson+1 jsonStockIndex=StockStartIndex print "start get stock data,jsonStockIndex=",jsonStockIndex,'self.stockNumsInAllStockJson:',self.stockNumsInAllStockJson if(StockStartIndex<=jsonStockIndex and jsonStockIndex<min(self.stockNumsInAllStockJson, StockEndIndex)): code=self.jsonSzse_stocks['stockList'][jsonStockIndex]['code'] orgId=self.jsonSzse_stocks['stockList'][jsonStockIndex]['orgId'] stock=self.jsonSzse_stocks['stockList'][jsonStockIndex]['code']+'%2C'+self.jsonSzse_stocks['stockList'][jsonStockIndex]['orgId'] pageNum=1 yield Request(self.generateUrl(queryUrl,stock,pageNum,jsonStockIndex), callback=self.parseDetail,meta={'code':code,'orgId':orgId,'pageNum':pageNum,'jsonStockIndex':jsonStockIndex}) elif (jsonStockIndex==self.stockNumsInAllStockJson): print '=====================================' print 'fetch stock data finished,please check if have fail lists in result/szse_stock_failList.json' print '=====================================' else: print '=====================================' print 'fetch stock data fail,exit!!! jsonStockIndex=',jsonStockIndex,response.url,response.body print 'please check fail lists in result/szse_stock_failList.json' print '=====================================' def parseDetail(self, response): filename = 'result\szse_stock_failList.json' queryUrl='http://www.cninfo.com.cn/cninfo-new/announcement/query' startUrl='http://www.cninfo.com.cn/cninfo-new/disclosure/szse_main' print "********* enter parseDetail",response.url #filename = response.url.split("/")[-2] #open(filename, 'wb').write(response.body) jsonStockIndex = self.GetJsonStockIndex(response) if(-1==jsonStockIndex): #enter here only get data successful jsonAnnouncements = json.loads(response.body_as_unicode()) pageNum=jsonAnnouncements['pageNum'] jsonStockIndex=jsonAnnouncements['jsonStockIndex'] pageSumNums=0 pageSize=30 savedInfo = {"secCode": " ","secName": " ","announcementTitle": " ","adjunctUrl": " ","pdfPath": " ","announcementTime": " "} if(0==jsonAnnouncements['totalRecordNum']%pageSize): pageSumNums=jsonAnnouncements['totalRecordNum']/pageSize else: pageSumNums=(jsonAnnouncements['totalRecordNum']/pageSize+1) print 'totalRecordNum and current pageNum:',pageSumNums,pageNum for announcement in jsonAnnouncements['announcements']: code=announcement['secCode'] orgId=announcement['orgId'] stock=code+'%2C'+orgId #Skip not needed pdf if not self.isNeededAnnouncementTitle(announcement['announcementTitle']): continue #Download pdf companyFolder = self.createCompanyFolder(announcement['secCode']) if announcement["secName"] == None or announcement['announcementTitle'] == None: pdfname = announcement['announcementTitle'] else: pdfname = announcement["secName"]+announcement['announcementTitle'] filePath = self.downloadPDF(companyFolder, pdfname,announcement['adjunctUrl'], 5) if filePath == False: print "Save download failed file info" errorStr = "code:"+code+",jsonStockIndex:"+str(jsonStockIndex)+",pdfname:"+pdfname errorStr = errorStr + ",adjunctUrl:"+announcement['adjunctUrl'] print "ERR:", errorStr self.logger.info(errorStr) outputDownloadPdfFailLists = codecs.open(DownloadPdfFailLists, 'a','utf-8') outputDownloadPdfFailLists.write('\n') outputDownloadPdfFailLists.write(errorStr) outputDownloadPdfFailLists.close() #save info in Json savedInfo['secCode'] = announcement['secCode'] savedInfo['secName'] = announcement['secName'] savedInfo['announcementTitle'] = announcement['announcementTitle'] savedInfo['adjunctUrl'] = announcement['adjunctUrl'] savedInfo['pdfPath'] = filePath savedInfo['announcementTime'] = announcement['announcementTime'] #savedInfo['announcementTime'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(announcement['announcementTime'])) if not self.isInfoInJson(savedInfo): try: savedInfofileread = codecs.open(SavedInfoFile,'rb','utf-8') readdata = savedInfofileread.read()[:-2] if(20<len(readdata)): readdata=readdata+',' writedata = json.dumps(savedInfo,ensure_ascii=False,indent=2) writedata = readdata+writedata+']}' savedInfofilewrite = codecs.open(SavedInfoFile,'w','utf-8') savedInfofilewrite.write(writedata) finally: savedInfofilewrite.close() savedInfofileread.close() print "parseDetail: ########################################################" if(pageSumNums>pageNum):#go to read next page of current stock code pageNum=pageNum+1 print "parseDetail: get next page:",str(pageNum),"pageSumNums:",str(pageSumNums),"jsonStockIndex:",str(jsonStockIndex) yield Request(self.generateUrl(queryUrl,stock,pageNum,jsonStockIndex), callback=self.parseDetail,meta={'code':code,'orgId':orgId,'pageNum':pageNum,'jsonStockIndex':jsonStockIndex}) else:#go to read next stock code yield Request(self.generateUrl(startUrl, '', 1, jsonStockIndex+1), callback=self.parse, meta={'jsonStockIndex':jsonStockIndex+1}) else:#fail to get current stock data, go to read next stock code yield Request(self.generateUrl(startUrl, '', 1, jsonStockIndex), callback=self.parse, meta={'jsonStockIndex':jsonStockIndex}) def createCompanyFolder(self, secCode): companyFolder = FinancialFolder + secCode if not os.path.exists(companyFolder): os.mkdir(companyFolder) return companyFolder def downloadPDF(self, companyFolder, reportName, downloadURL, downloadTime): downloadTime -= 1 if downloadTime == 0: return False suffix = downloadURL[downloadURL.find('.'):].lower() print "downloadPDF suffix", suffix if SysStr == "Windows": pdfPath = companyFolder + '\\'+ reportName + '.pdf' filePath = companyFolder + '\\' + reportName + suffix else: pdfPath = companyFolder + '/'+ reportName + '.pdf' filePath = companyFolder + '/' + reportName + suffix if ".pdf" != suffix and os.path.exists(pdfPath): os.remove(pdfPath) realURL = self.homePage + "/" + downloadURL print "Download pdfPath:", filePath, ' realURL:',realURL try: if not os.path.exists(filePath): urllib.urlretrieve(realURL, filePath) else: print 'WRN: ', filePath, 'is already exists' return filePath except Exception, e: urllib.urlcleanup() return self.downloadPDF(companyFolder, reportName, downloadURL, downloadTime) urllib.urlcleanup() gc.collect() return pdfPath
def UpdateOne(dir, toupdate): try: urllib.urlretrieve(updateserver + toupdate, GetFilePath(dir, toupdate)) urllib.urlcleanup() except: pass
def authenticate(self, request): user = request.user or None access_token = None # assume logging in normal way params = {} params["client_id"] = CONSUMER_KEY params["client_secret"] = CONSUMER_SECRET params["redirect_uri"] = request.build_absolute_uri(reverse("facebook_login_done")) params["code"] = request.GET.get('code', '') url = ("https://graph.facebook.com/oauth/access_token?" + urllib.urlencode(params)) from cgi import parse_qs userdata = urllib.urlopen(url).read() res_parse_qs = parse_qs(userdata) # Could be a bot query if not ('access_token') in res_parse_qs: return None access_token = res_parse_qs['access_token'][-1] url = "https://graph.facebook.com/me?access_token=" + access_token fb_data = json.loads(urllib.urlopen(url).read()) uid = fb_data["id"] if not fb_data: return None try: same_email_user = UserProfile.objects.get(email=fb_data.get('email', None)) except: same_email_user = None if user.is_anonymous() and not same_email_user: try: fb_user = FacebookUserProfile.objects.get(facebook_uid=uid) fb_user.accesstoken = access_token fb_user.save() return fb_user.user except FacebookUserProfile.DoesNotExist: fb_picture_url = "http://graph.facebook.com/%s/picture?type=large" % uid username = fb_data.get('username') if not username: username = uid userProfile = UserProfile.objects.create(username=username) userProfile.first_name = fb_data['first_name'] userProfile.last_name = fb_data['last_name'] if fb_data['gender'] == "male": userProfile.gender = 'M' else: if fb_data['gender'] == "female": userProfile.gender = 'F' userProfile.email = fb_data.get('email', None) userProfile.isVerified_email = True userProfile.location = fb_data.get('location', fb_data).get('name', None) userProfile.save() img = urllib.urlretrieve(fb_picture_url) userProfile.profile_picture.save("Facebook-profile.jpg", File(open(img[0]))) urllib.urlcleanup() userProfile.facebook_link = fb_data.get('link', None) from django.contrib.auth.hashers import make_password raw_pass = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(12)) tmp_pass = make_password(raw_pass) userProfile.password = tmp_pass userProfile.save() fb_profile = FacebookUserProfile(facebook_uid=uid, user=userProfile, email=fb_data['email'], url=fb_data['link'], location=userProfile.location, accesstoken=access_token) fb_profile.save() return userProfile else: try: if same_email_user: user = same_email_user user_facebook = FacebookUserProfile.objects.get(user=user) if user_facebook.facebook_uid == uid: return user_facebook.user else: request.session['fb_accesstoken'] = access_token next = request.session['next'] or "" if next: del request.session['next'] return HttpResponseRedirect(next) else: return HttpResponseRedirect(reverse('sync_facebook')) except FacebookUserProfile.DoesNotExist: try: user_facebook = FacebookUserProfile.objects.get(facebook_uid=uid) request.session['fb_accesstoken'] = access_token next = request.session['next'] or "" if next: del request.session['next'] return HttpResponseRedirect(next) else: return HttpResponseRedirect(reverse('sync_facebook')) except FacebookUserProfile.DoesNotExist: fb_profile = FacebookUserProfile(facebook_uid=uid, user=UserProfile.objects.get(username=user.username), email=fb_data['email'], url=fb_data['link'], location=fb_data.get('location', fb_data).get('name', None), accesstoken=access_token) fb_profile.save() return fb_profile.user
"""total_size is reported in bytes. block_size is the amount read each time. blocks_read is the number of blocks successfully read. """ if not blocks_read: print 'Connection opened' return if total_size < 0: # Unknown size print 'Read %d blocks (%d bytes)' % (blocks_read, blocks_read * block_size) else: amount_read = blocks_read * block_size print 'Read %d blocks, or %d/%d' % (blocks_read, amount_read, total_size) return filename, msg = urllib.urlretrieve('http://blog.doughellmann.com/', reporthook=reporthook) """ print print 'File:', filename print 'Headers:' print msg print 'File exists before cleanup:', os.path.exists(filename) urllib.urlcleanup() print 'File still exists:', os.path.exists(filename) """
def dlFile(url, f_path): printIfVerbose("Downloading %s" % url) urllib.urlretrieve(url, f_path) urllib.urlcleanup() printIfVerbose("Finished.") return True
def clean_up(self): '''destroy all temporary files created during the render''' for tempfile in self.tempfiles: os.remove(tempfile) urlcleanup()
def _fetchsinglerecnum(self, newname=None, todir='.', fetch_rn=None, extn_filt=''): """ Fetch a single file. This will have an assigned name or maybe names, so you might want to rename it. Most users won't use this, but will probably want just fetch(). Args are the recnum, optional new filename (the system adds an extension) and optional directory. If no new filename is given a name based on the recnum is used, which is just the name that the server delivers. The routine sets the status message and returns None or the filename stem (i.e. without the extensions which distinguish multiple files). That is is, there may be multiple files, all with this name stem plus various extensions. """ # $$ the logic of some of this may be redundant if fetch_rn == None: fetch_rn = self.recnum[0] if fetch_rn == None: self.statmsg = 'No recnum given or obtained by query' return None if newname == None: self.filename = fetch_rn else: self.filename = newname # Is it possibly already in cache? if cacheroot != None: # or else no cache fnl = glob.glob(self._getcache() + '/' + fetch_rn + '.*') if len(fnl) > 0: # They seem to be in cache for fn in fnl: n, extn = os.path.basename(fn).split('.', 1) if extn_filt == '' or extn_filt == extn: shutil.copy(fn, todir + '/' + self.filename + '.' + extn) self.statmsg = 'File from cache' return self.filename # i.e. either recnum or arg # Not in cache, need to get urllib.urlcleanup() record_set = self.series + '[:#' + fetch_rn + ']' message = urllib.urlencode({'rsquery': record_set, 'n': '1'}) message = urllib.quote(urllib.unquote_plus(message), '&=/') # urlencode adds not understood +'s full_url = 'http://' + netdrmsserver + fetch_url try: r = urllib.urlretrieve(full_url, data=message) # file is r[0] except: self.statmsg = 'No response from server' # something really odd, not e.g. 404 return None fetchname = _header2fn(r[1]) # filename from mime header if fetchname == None: self.statmsg = 'Badly formed server header' return None to_snip = _tarpath(self.series) # dir in tarfile plus file name if fetchname.endswith('.tar'): if not tarfile.is_tarfile(r[0]): self.statmsg = 'Tar file from server unreadable' return None tf = tarfile.open(r[0]) res = False for fn in tf.getnames(): if fn.startswith(to_snip): # ignore other content tf.extract(fn) rn, extn = fn[len(to_snip):].split('.', 1) if rn != fetch_rn: self.statmsg = 'Wrong data files in tar file' return None if extn_filt == '' or extn_filt == extn: self._placedata(fn, fetch_rn, todir, self.filename, extn) res = True if not res: self.statmsg = 'Could not find data files in tar file' return None else: # plain file, not a tar file to_snip = os.path.basename(to_snip) if not fetchname.startswith(to_snip): self.statmsg = 'Data file name does not match query' # name looks v. wrong return None rn, extn = fetchname[len(to_snip):].split('.', 1) if rn != fetch_rn: self.statmsg = 'Wrong data file returned' return None if extn_filt == '' or extn_filt == extn: self._placedata(r[0], fetch_rn, todir, self.filename, extn) self.statmsg = 'New file from server' return self.filename
def get_files_from_types(types, base_name, ftp, path, ftp_true=False): """ from https://github.com/ctSkennerton/scriptShed/blob/master/download_ncbi_assembly.py Download the genome input : types (list) : List of NCBI type suffix file bas_name (string) : Basename of the genome file ftp (ftp object) : ftp session object to the NCBI FTP Server path (string) : output folder return: out : path of the downloaded genome """ out = str() if ftp_true: #download for t in types: f = path+base_name+types[0] try: urllib.urlretrieve(ftp_true+'/'+base_name+t, f) except IOError: print('No proteomic file availlable for : {}'.format(base_name)) return False urllib.urlcleanup() #extract with gzip.open(f, 'rb') as f_in: out = f.replace('.gz','') with open(out, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) #remove file os.remove(f) return out else: for g in ftp.nlst(): for t in types: if g == base_name + t: #download try: urllib.urlretrieve("ftp://ftp.ncbi.nlm.nih.gov/{}/{}".format(ftp.pwd(), g), path+g) except IOError: print('No proteomic file availlable for : {}'.format(g)) return False urllib.urlcleanup() #extract with gzip.open(path+g, 'rb') as f_in: out = path+g.replace('.gz','') with open(out, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) #remove file os.remove(path+g) return out
def get(self, url, update=False): """Looks in the cache if the file is there and takes the cached one. Otherwise it is downloaded first. Knows how to deal with http:// and svn:// URLs. :Return: file handler """ # look whether it is compressed cext = url.split('.')[-1] if cext in ['gz', 'bz2']: target_url = url[:-1 * len(cext) - 1] else: # assume not compressed target_url = url cext = None # turn url into a filename -- mimik what APT does for # /var/lib/apt/lists/ tfilename = '_'.join(target_url.split('/')[2:]) # if we need to download anyway do not search if update: cfilename = os.path.join(self.cachedir, tfilename) else: # look for the uncompressed file anywhere in the cache cfilename = None for cp in [self.cachedir] + self.ro_cachedirs: if os.path.exists(os.path.join(cp, tfilename)): cfilename = os.path.join(cp, tfilename) # nothing found? if cfilename is None: # add cache item cfilename = os.path.join(self.cachedir, tfilename) update = True # if updated needed -- download if update: #print 'Caching file from %s' % url if url.startswith('svn://'): # export from SVN pysvn.Client().export(url, cfilename) if url.startswith('http://'): # download tempfile, ignored = urllib.urlretrieve(url) # decompress decompressor = None if cext == 'gz': decompressor = 'gzip' elif cext == 'bz2': decompressor = 'bzip2' elif cext == None: decompressor = None else: raise ValueError, \ "Don't know how to decompress %s files" \ % cext if not decompressor is None: if subprocess.call( [decompressor, '-d', '-q', '-f', tempfile]) == 1: raise RuntimeError, \ "Something went wrong while decompressing '%s'" \ % tempfile # move decompressed file into cache shutil.move(os.path.splitext(tempfile)[0], cfilename) # XXX do we need that if explicit filename is provided? urllib.urlcleanup() # open cached file fh = codecs.open(cfilename, 'r', 'utf-8') return fh
def downloadfiles(maxyears): # get parameters for and start constructing filenames URLroot = "ftp://ftp.ncdc.noaa.gov/pub/data/gsod/" # base URL for all files filesuffix = ".op.gz" # suffix for all the raw files firstyear = 1928 # this is the first year available for any station USAFcode = raw_input("Please enter the USAF code for the station you want " \ "data for (first column of " \ "ftp://ftp.ncdc.noaa.gov/pub/data/inventories/ISH-HISTORY.TXT )\n") WBANcode = raw_input("Please enter the WBAN code for the station you want " \ "data for (second column of " \ "ftp://ftp.ncdc.noaa.gov/pub/data/inventories/ISH-HISTORY.TXT )\n") # e.g. Seattle (SEA) is USAF 727930 WBAN 24233 # Portland, OR is USAF 726980 WBAN 24229 # LHR is USAF 037720 WBAN 99999 stationname = raw_input("What would you like to call this station?\n") stationcode = str(USAFcode) + '-' + str(WBANcode) yearsdownloaded = 0 for year in range(datetime.datetime.now().year - 1, firstyear, -1): # stopping before the current year because it's necessarily incomplete, and # looping back from last year, on the assumption that more recent years # are of greater interest and have higher quality data. # First we assemble the URL for the year of interest fullURL = (URLroot + str(year) + '/' + stationcode + '-' + str(year) + filesuffix) if verbose: sys.stdout.write("Trying " + fullURL + " ... ") sys.stdout.flush() # Now we try to download the file, with very basic error handling if verbose try: urllib.urlretrieve(fullURL, str(year) + filesuffix) if verbose: sys.stdout.write("retrieved ... ") yearsdownloaded += 1 except IOError as e: if verbose: print(" ") print(e) else: # if we got the file without any errors, then # uncompress the file f_in = gzip.open(str(year) + filesuffix) if verbose: sys.stdout.write("decompressed ... ") # and start writing the output if yearsdownloaded == 1: # since it's the first year, open the file and write the header row firstyear = year f_out = open(stationname + '.csv', 'w') csv.writer(f_out).writerow(["Station", "Year", "Month", "Day", \ "MeanTemp", "NTempObs", "DewPoint", "NDewPointObs", \ "SeaLevelPressure", "NSeaLevPressObs", "StationPressure", \ "NStatPressObs", "Visibility", "NVisibilityObs", "MeanWindSpeed", \ "NWindObs", "MaxSustWindSpeed", "MaxWindGust", "MaxTemp", \ "MaxTempSource", "MinTemp", "MinTempSource", "PrecipAmount", \ "NPrecipReportHours", "PrecipFlag", "SnowDepth", "Fog", "Rain", \ "Snow", "Hail", "Thunder", "Tornado"]) # This function does the actual ETL parsefile(f_in, f_out, stationname) # clean up after ourselves f_in.close() os.remove(str(year) + filesuffix) urllib.urlcleanup() if yearsdownloaded == maxyears: break # if we have enough years, then end this loop else: time.sleep(5) # slow down here to stop the server locking us out time.sleep(1) print("Successfully downloaded " + str(yearsdownloaded) + " years between " + str(year) + " and " + str(firstyear) + " for station " + stationname) if yearsdownloaded < maxyears: # If we didn't get as many years as requested, alert the user print( "No more years are available at the NOAA website for this station." ) f_out.close()
def setCuts(self, ra, dec, roi, triggerTime, tstart, tstop, timetype='MET', strict=False): self.ra = float(ra) self.dec = float(dec) self.roi = float(roi) self.triggerTime = float(triggerTime) self.tstart = float(tstart) self.tstop = float(tstop) self.timetype = timetype #Retrieve the HTML page with the input mask, to get the maximum #time available in the server (this is needed for BA analysis) temporaryFileName = "__temp_query_result.html" try: os.remove(temporaryFileName) except: pass pass urllib.urlcleanup() try: urllib.urlretrieve( "http://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi", temporaryFileName) except socket.timeout: raise GtBurstException( 11, "Time out when connecting to the server. Check your internet connection, or that you can access http://fermi.gsfc.nasa.gov, then retry" ) except: raise GtBurstException( 1, "Problems with the download. Check your connection then retry") pass htmlFile = open(temporaryFileName) maxTimeLimit = '' for line in htmlFile.readlines(): res = re.findall( '(.+)The event database currently holds [0-9]+ events, collected between (.+) UTC and (.+) UTC \(Mission Elapsed Time \(MET\) ([0-9]+) to ([0-9]+) seconds\)', line) if (len(res) != 0): #Found maxTimeLimit = res[-1][-1] break pass pass htmlFile.close() os.remove(temporaryFileName) if (maxTimeLimit.replace(" ", "") == ''): raise GtBurstException( 12, "The LAT data server is probably down for maintenance or loading new data. Check the page http://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi or retry later." ) else: maxTimeLimit = float(maxTimeLimit) pass if (maxTimeLimit < self.tstop): if (strict): #Fail raise GtBurstException( 14, "The requested time limit %s is too large. Data are available up to %s." % (self.tstop, maxTimeLimit - 1)) return maxTimeLimit - 1 else: print( "\n\nWARNING:The requested time limit %s is too large. Data are available up to %s. Will download up to %s.\n\n" % (self.tstop, maxTimeLimit - 1, maxTimeLimit - 1)) self.tstop = float(maxTimeLimit) - 1 pass pass
#[Out]# ParseResult(scheme='', netloc='', path='hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ&q=%22graphs+such+as+call+graphs%22&spell=1&biw=1283&bih=684', params='', query='', fragment='') import urllib urllib.unquote(s) #[Out]# 'https://encrypted.google.com/search?hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ&q="graphs+such+as+call+graphs"&spell=1&biw=1283&bih=684' urllib.urlparse(urllib.unquote(s)) urlparse.urlparse(urllib.unquote(s)) #[Out]# ParseResult(scheme='https', netloc='encrypted.google.com', path='/search', params='', query='hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ&q="graphs+such+as+call+graphs"&spell=1&biw=1283&bih=684', fragment='') urllib.urlparse(urllib.unquote(s)) urlparse.urlparse(urllib.unquote(s)) #[Out]# ParseResult(scheme='https', netloc='encrypted.google.com', path='/search', params='', query='hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ&q="graphs+such+as+call+graphs"&spell=1&biw=1283&bih=684', fragment='') urlparse.urlparse(urllib.unquote(s)).query #[Out]# 'hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ&q="graphs+such+as+call+graphs"&spell=1&biw=1283&bih=684' urlparse.urlparse(urlparse.urlparse(urllib.unquote(s)).query) #[Out]# ParseResult(scheme='', netloc='', path='hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ&q="graphs+such+as+call+graphs"&spell=1&biw=1283&bih=684', params='', query='', fragment='') #?urllib.urlcleanup urllib.urlcleanup(s) #?urllib.splitquery urllib.splitquery(s) #[Out]# ('https://encrypted.google.com/search', 'hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ&q=%22graphs+such+as+call+graphs%22&spell=1&biw=1283&bih=684') url.query #[Out]# 'hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ&q=%22graphs+such+as+call+graphs%22&spell=1&biw=1283&bih=684' url.query.split('&') #[Out]# ['hl=en', 'sa=X', 'ei=v_3tTaPHGsX10gG2_aWZCA', 'ved=0CBoQvwUoAQ', 'q=%22graphs+such+as+call+graphs%22', 'spell=1', 'biw=1283', 'bih=684'] url.query.split('&q=') #[Out]# ['hl=en&sa=X&ei=v_3tTaPHGsX10gG2_aWZCA&ved=0CBoQvwUoAQ', '%22graphs+such+as+call+graphs%22&spell=1&biw=1283&bih=684'] url.query.split('&q=')[1] #[Out]# '%22graphs+such+as+call+graphs%22&spell=1&biw=1283&bih=684' urlparse.parse_qs(url.query) #[Out]# {'ei': ['v_3tTaPHGsX10gG2_aWZCA'], 'bih': ['684'], 'spell': ['1'], 'q': ['"graphs such as call graphs"'], 'ved': ['0CBoQvwUoAQ'], 'hl': ['en'], 'biw': ['1283'], 'sa': ['X']} urlparse.parse_qs(url.query)['q'] #[Out]# ['"graphs such as call graphs"']
def mapSpecies(mousepeptrackfilename): RETRY_TIME = 20.0 mouseTohumanfilepath = os.path.join(os.getcwd(), 'MouseToHuman.tsv') print("Extracting Mouse to Human Map data, job starts", str(datetime.datetime.now())) #increase the field size of CSV csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2)) try: urllib.urlretrieve( 'http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt', mouseTohumanfilepath) urllib.urlcleanup() except: print("Can't able to download MouseToHuman.tsv file!!") colnameMousHu = [ 'HomoloGene ID', 'Common Organism Name', 'NCBI Taxon ID', 'Symbol', 'EntrezGene ID', 'Mouse MGI ID', 'HGNC ID', 'OMIM Gene ID', 'Genetic Location', 'Genomic Coordinates (mouse: , human: )', 'Nucleotide RefSeq IDs', 'Protein RefSeq IDs', 'SWISS_PROT IDs' ] mouseHumandata = [] homologID = [] with open(mouseTohumanfilepath) as mhtsvfile: mhreader = csv.DictReader(mhtsvfile, delimiter='\t') for mhrow in mhreader: mhtemplist = [] for i in colnameMousHu: mhtempdata = str(mhrow[i]).strip() mhtemplist.append(mhtempdata) if len(mhtemplist[-1].strip()) > 0: homologID.append(mhtemplist[0]) mouseHumandata.append(mhtemplist) homologID = list(set(homologID)) homologID.sort() mousehumandic = {} for homologidItem in homologID: tempHumanHomoUniID = '' tempMouseHomoUniID = '' for item in mouseHumandata: if homologidItem == item[0]: if 'mouse' in item[1].strip().lower(): tempMouseHomoUniID = item[-1].strip() else: tempHumanHomoUniID = item[-1].strip() if len(tempMouseHomoUniID.strip()) > 0 and len( tempHumanHomoUniID.strip()) > 0 and tempHumanHomoUniID.strip( ).upper() != 'NA': mousehumandic[tempMouseHomoUniID] = tempHumanHomoUniID colname=['UniProtKB Accession','Protein','Gene','Organism','Peptide Sequence','Summary Concentration Range Data','All Concentration Range Data','All Concentration Range Data-Sample LLOQ Based','Peptide ID',\ 'Special Residues','Molecular Weight','GRAVY Score','Transitions','Retention Time','Analytical inofrmation',\ 'Gradients','AAA Concentration','CZE Purity','Panel','Knockout','LLOQ','ULOQ','Sample LLOQ','Protocol','Trypsin','QC. Conc. Data','Human UniProtKB Accession'] finalresult = [] finalresult.append(colname) humanUniprotID = [] with open(mousepeptrackfilename) as csvfile: reader = csv.DictReader(csvfile, delimiter='\t') for row in reader: templist = [] for i in colname[:-1]: tempdata = str(row[i]).strip() templist.append(tempdata) if len(str(templist[0]).strip()) > 0: if templist[0].split('-')[0] in mousehumandic: humanUniprotID.append( mousehumandic[templist[0].split('-')[0]]) templist.append(mousehumandic[templist[0].split('-')[0]]) else: templist.append('NA') finalresult.append(templist) with open(mousepeptrackfilename, 'wb') as pf: pwriter = csv.writer(pf, delimiter='\t') pwriter.writerows(finalresult) disGenDataDicName = disGenData() #disGenDataDicName='disGen.obj' disGenDataDic = cPickle.load(open(disGenDataDicName, 'rb')) unqhumanUniprotID = list(set(humanUniprotID)) humanUniprotfuncinfodic = {} countProt = 0 for subcode in unqhumanUniprotID: time.sleep(2) drugbanklist = [] PN = 'NA' GN = 'NA' OG = 'NA' OGID = 'NA' dislist = [] unidislist = [] unidisURLlist = [] disgendislist = [] disgendisURLlist = [] GoIDList = [] GoNamList = [] GoTermList = [] GOinfo = [] try: countProt += 1 if countProt % 1000 == 0: print str( countProt ), "th protein Protein Name, Gene, Organism Name,drug bank data,disease data job starts", str( datetime.datetime.now()) SGrequestURL = "https://www.uniprot.org/uniprot/" + str( subcode) + ".xml" SGunifile = urllib.urlopen(SGrequestURL) SGunidata = SGunifile.read() SGunifile.close() try: SGunidata = minidom.parseString(SGunidata) try: drugdata = (SGunidata.getElementsByTagName('dbReference')) for duItem in drugdata: if (duItem.attributes['type'].value ).upper() == 'DRUGBANK': try: drugname = (str( duItem.getElementsByTagName('property') [0].attributes['value'].value).strip()) drugid = str( duItem.attributes['id'].value).strip() durl = '<a target="_blank" href="https://www.drugbank.ca/drugs/' + drugid + '">' + drugname + '</a>' drugbanklist.append(durl) except: pass if (duItem.attributes['type'].value ).strip() == 'NCBI Taxonomy': try: OGID = str( duItem.attributes['id'].value).strip() except: pass except IndexError: pass try: godata = (SGunidata.getElementsByTagName('dbReference')) for gItem in godata: if (gItem.attributes['type'].value).upper() == 'GO': try: gonamedetails = (str( gItem.getElementsByTagName('property') [0].attributes['value'].value).strip() ).split(':')[1] gotermdetails = (str( gItem.getElementsByTagName('property') [0].attributes['value'].value).strip() ).split(':')[0] GoNamList.append(gonamedetails) goid = str( gItem.attributes['id'].value).strip() GoIDList.append(goid) tempGoTerm = None if gotermdetails.lower() == 'p': tempGoTerm = 'Biological Process' if gotermdetails.lower() == 'f': tempGoTerm = 'Molecular Function' if gotermdetails.lower() == 'c': tempGoTerm = 'Cellular Component' GoTermList.append(tempGoTerm) tempGOData = gonamedetails + ';' + goid + ';' + tempGoTerm GOinfo.append(tempGOData) except: pass if (gItem.attributes['type'].value ).strip() == 'NCBI Taxonomy': try: OGID = str( gItem.attributes['id'].value).strip() except: pass except IndexError: pass try: try: PN = (((SGunidata.getElementsByTagName('protein')[0] ).getElementsByTagName('recommendedName')[0] ).getElementsByTagName('fullName')[0] ).firstChild.nodeValue except: PN = (((SGunidata.getElementsByTagName('protein')[0] ).getElementsByTagName('submittedName')[0] ).getElementsByTagName('fullName')[0] ).firstChild.nodeValue except IndexError: pass try: try: GN = (( SGunidata.getElementsByTagName('gene')[0] ).getElementsByTagName('name')[0]).firstChild.nodeValue except: GN = 'NA' except IndexError: pass try: try: OG = (( SGunidata.getElementsByTagName('organism')[0] ).getElementsByTagName('name')[0]).firstChild.nodeValue except: OG = 'NA' except IndexError: pass try: disdata = SGunidata.getElementsByTagName('disease') for dItem in disdata: disname = '' disshort = '' disURL = '' disID = '' try: disname = (dItem.getElementsByTagName('name')[0] ).firstChild.nodeValue disID = (dItem.attributes['id'].value).upper() except: pass try: disshort = (dItem.getElementsByTagName('acronym') [0]).firstChild.nodeValue except: pass if len(disname.strip()) > 0: disURL = '<a target="_blank" href="https://www.uniprot.org/diseases/' + disID + '">' + str( disname.strip()) + '(' + str( disshort) + ')' + '</a>' dislist.append( str(disname.strip()) + '(' + str(disshort) + ')') unidislist.append( str(disname.strip()) + '(' + str(disshort) + ')') unidisURLlist.append(disURL) except IndexError: pass except ExpatError: pass except IOError: pass drugbankdata = 'NA' disdata = 'NA' uniDisData = 'NA' uniDisURLData = 'NA' disgenDisData = 'NA' disgenDisURLData = 'NA' goiddata = 'NA' gonamedata = 'NA' gotermdata = 'NA' goData = 'NA' if GN != 'NA' and GN in disGenDataDic: disgendislist = disGenDataDic[GN][0] disgendisURLlist = disGenDataDic[GN][1] if len(dislist) > 0: dislist = dislist + disGenDataDic[GN][0] else: dislist = disGenDataDic[GN][0] if len(GoIDList) > 0: goiddata = '|'.join(list(set(GoIDList))) if len(GoNamList) > 0: gonamedata = '|'.join(list(set(GoNamList))) if len(GoTermList) > 0: gotermdata = '|'.join(list(set(GoTermList))) if len(GOinfo) > 0: goData = '|'.join(list(set(GOinfo))) if len(drugbanklist) > 0: drugbankdata = '|'.join(list(set(drugbanklist))) if len(dislist) > 0: disdata = '|'.join(list(set(dislist))) if len(unidislist) > 0: uniDisData = '|'.join(list(set(unidislist))) if len(unidisURLlist) > 0: uniDisURLData = '|'.join(list(set(unidisURLlist))) if len(disgendislist) > 0: disgenDisData = '|'.join(list(set(disgendislist))) if len(disgendisURLlist) > 0: disgenDisURLData = '|'.join(list(set(disgendisURLlist))) humanUniprotfuncinfodic[subcode] = [ PN, GN, OG, OGID, disdata, uniDisData, uniDisURLData, disgenDisData, disgenDisURLData, drugbankdata, goiddata, gonamedata, gotermdata, goData ] hudicfile = 'humanUniprotfuncinfodic.obj' hudicf = open(hudicfile, 'wb') pickle.dump(humanUniprotfuncinfodic, hudicf, pickle.HIGHEST_PROTOCOL) hudicf.close() print("Extracting KEGG pathway name, job starts", str(datetime.datetime.now())) hkeggdictfile = {} huniproturl = 'https://www.uniprot.org/uploadlists/' hk = KEGG() for hkx in range(0, len(unqhumanUniprotID), 2000): countProt += hkx + 2000 if countProt % 2000 == 0: print(str(countProt), "th protein kegg job starts", str(datetime.datetime.now())) huniprotcodes = ' '.join(unqhumanUniprotID[hkx:hkx + 2000]) huniprotparams = { 'from': 'ACC', 'to': 'KEGG_ID', 'format': 'tab', 'query': huniprotcodes } while True: try: hkuniprotdata = urllib.urlencode(huniprotparams) hkuniprotrequest = urllib2.Request(huniproturl, hkuniprotdata) hkuniprotresponse = urllib2.urlopen(hkuniprotrequest) for hkuniprotline in hkuniprotresponse: hkudata = hkuniprotline.strip() if not hkudata.startswith("From"): hkuinfo = hkudata.split("\t") if len(hkuinfo[1].strip()): hkegg = hk.get(hkuinfo[1].strip()) hkudict_data = hk.parse(hkegg) try: try: if len(str(hkuinfo[0]).strip()) > 5: tempkeggData = '|'.join( '{};{}'.format(key, value) for key, value in hkudict_data['PATHWAY'].items()) hkeggdictfile[hkuinfo[0].strip()] = [ hkudict_data['PATHWAY'].values(), tempkeggData ] except TypeError: pass except KeyError: pass break except urllib2.HTTPError: time.sleep(RETRY_TIME) print( 'Hey, I am trying again until succeeds to get data from KEGG!', str(datetime.datetime.now())) pass hkdicfile = 'humankeggdic.obj' hkdicf = open(hkdicfile, 'wb') pickle.dump(hkeggdictfile, hkdicf, pickle.HIGHEST_PROTOCOL) hkdicf.close()
def fill_standard_stations(): """get the standard station list stuff""" GPS_dict = {} # If the user has a position file in the current directory, use that file # and only that file test_file = ".gps_pos_default.snx" if(os.path.isfile(test_file)): warnings.warn("Using GPS receiver positions only from user file '%s'"%test_file) try: GPS_dict = fill_GPS_station_dict(GPS_dict, sys_file) except: pass return GPS_dict #Otherwise, get the standard files month = 86400 * 30 try: now_time = time.time() # in seconds # find the system list py_path = os.environ["PYTHONPATH"] sys_file = None for d in py_path.split(os.pathsep): test_path = d + "/../libdata/JMA/gps_pos_default.snx" if(os.path.isfile(test_path)): sys_file = test_path break test_path = d + "/../../libdata/JMA/gps_pos_default.snx" if(os.path.isfile(test_path)): sys_file = test_path break sys_time = 0 if(sys_file): sys_time = os.path.getmtime(sys_file) if(now_time - sys_time > 6 * month): warnings.warn("System default GPS station file %s is getting old.\nContact your system administrator."%sys_file) else: warnings.warn("Cannot find default GPS station file %s.\nContact your system administrator."%sys_file) user_file = os.environ['HOME'] + "/.ParselTongue/GPS_station_list.txt" user_time = 0 if(os.path.isfile(user_file)): user_time = os.path.getmtime(user_file) need_new = 0 write_user = 0 if(sys_time > 0): # Read in the system file, if available if(user_time > sys_time): # Read system first, then personal file GPS_dict = fill_GPS_station_dict(GPS_dict, sys_file) GPS_dict = _read_GPS_station_list(GPS_dict, user_file) else: write_user = 1 if(user_time > 0): GPS_dict = _read_GPS_station_list(GPS_dict, user_file) GPS_dict = fill_GPS_station_dict(GPS_dict, sys_file) else: # Try just the user's personal file need_new = 1 write_user = 1 if(user_time > 0): GPS_dict = _read_GPS_station_list(GPS_dict, user_file) if((now_time - user_time > 3 * month) and (now_time - sys_time > 3 * month)): need_new = 1 write_user = 1 if(need_new): # try getting a new file from the web try: try: temp_file = tempfile.NamedTemporaryFile() webfile = "ftp://igscb.jpl.nasa.gov/pub/station/general/igs_with_former.snx" try: print "Downloading %s"%webfile urllib.urlretrieve(webfile, temp_file.name) urllib.urlcleanup() GPS_dict = fill_GPS_station_dict(GPS_dict, temp_file.name) except IOError: warnings.warn("Could not download new GPS stations list") finally: temp_file.close() except: pass if(write_user): _write_GPS_station_list(GPS_dict, user_file) except: pass return GPS_dict
def getFileSizeOnServer(url): d = urllib.urlopen(url) size = int(d.info()['Content-Length']) urllib.urlcleanup() return size
#print(token+year+month+day+hour+minStore) for token in tokens: if (filename_video.startswith(token + year + month + day + hour + minStore) and filename_video.endswith(".avi")): non = True for name in listFileName: if (filename_video == name): non = False if (non == True): listFileName.append(filename_video) # if(previousVideo_name): # file = open("video/"+previousVideo_name, 'rb') # print("previous size : "+file) print(filename_video) urlcleanup() urlretrieve( "ftp://" + ftpUser + ":" + ftpPass + "@" + ftpAddr + "/ipcam/" + year + "" + month + "" + day + "/" + hour + "00/" + str(filename_video), "video/" + str(filename_video)) previousVideo = "/ipcam/" + year + "" + month + "" + day + "/" + hour + "00/" + str( filename_video) previousVideo_name = str(filename_video) f = open('downloadList', 'a+') f.write(filename_video + '\n') f.close time.sleep(1) print("Download complete") if (minStore != minute): minStore = minute
def get_poster(self, item): """Returns file path to the new poster""" from movie import Progress, Retriever file_to_copy = tempfile.mktemp(suffix=self.widgets['movie']['number'].get_text(), \ dir=self.locations['temp']) file_to_copy += ".jpg" canceled = False try: progress = Progress(self.widgets['window'], _("Fetching poster"), _("Wait a moment")) retriever = Retriever(item.LargeImage.URL, self.widgets['window'], progress, file_to_copy) retriever.start() while retriever.isAlive(): progress.pulse() if progress.status: canceled = True while gtk.events_pending(): gtk.main_iteration() progress.close() urlcleanup() except: canceled = True gutils.warning(_("Sorry. A connection error has occurred.")) try: os.remove(file_to_copy) except: log.error("no permission for %s" % file_to_copy) if not canceled: if os.path.isfile(file_to_copy): im = None try: im = Image.open(file_to_copy) except IOError: log.warn("failed to identify %s" % file_to_copy) if im and im.size == (1, 1): url = FancyURLopener().open("http://www.amazon.com/gp/product/images/%s" % item.ASIN).read() if url.find('no-img-sm._V47056216_.gif') > 0: log.warn('No image available') gutils.warning(_("Sorry. This movie is listed but has no poster available at Amazon.com.")) return False url = gutils.after(url, 'id="imageViewerDiv"><img src="') url = gutils.before(url, '" id="prodImage"') urlretrieve(url, file_to_copy) try: im = Image.open(file_to_copy) except IOError: log.warn("failed to identify %s", file_to_copy) if not im: # something wrong with the image, give some feedback to the user log.warn('No image available') gutils.warning(_("Sorry. This movie is listed but has no poster available at Amazon.com.")) return False if im.mode != 'RGB': # convert GIFs im = im.convert('RGB') im.save(file_to_copy, 'JPEG') # set to None because the file is locked otherwise (os.remove throws an exception) im = None handler = self.widgets['big_poster'].set_from_file(file_to_copy) self.widgets['poster_window'].show() self.widgets['poster_window'].move(0, 0) if gutils.question(_("Do you want to use this poster instead?"), self.widgets['window']): return file_to_copy else: log.info("Reverting to previous poster and deleting new one from disk.") try: os.remove(file_to_copy) except: log.error('cannot remove %s', file_to_copy) self.widgets['poster_window'].hide() else: gutils.warning(_("Sorry. This movie is listed but has no poster available at Amazon.com.")) else: # cleanup temporary files after canceling the download if os.path.isfile(file_to_copy): try: os.remove(file_to_copy) except: log.error('cannot remove %s', file_to_copy)
#!/usr/bin/python #coding=utf-8 #__author__='dahu' #data=2017- #图片下载,用urllib库 import urllib # fi=urllib.urlretrieve('https://ss1.baidu.com/6ONXsjip0QIZ8tyhnq/it/u=3129311788,3946097352&fm=173&s=B7F45B9569C0514BDA20966C0300B0F5&w=620&h=308&img.JPEG',filename="/home/dahu/PycharmProjects/SpiderLearning/urllib_lianxi/pic.JPEG") fi = urllib.urlretrieve( 'https://ss1.baidu.com/6ONXsjip0QIZ8tyhnq/it/u=3129311788,3946097352&fm=173&s=B7F45B9569C0514BDA20966C0300B0F5&w=620&h=308&img.JPEG', filename="t2.pic.JPEG") #位置支持相对位置和绝对位置 urllib.urlcleanup() #清除缓存
def main(): year = '' try: opts, args = getopt.getopt(sys.argv[1:], "ha:n", ["help", "ayear="]) except getopt.GetoptError: print 'noaa2postgresql.py -a <year>' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'noaa2postgresql.py -i <year>' sys.exit() elif opt in ("-a", "--ayear"): year = arg if year == '': print 'ERROR: specify an year' sys.exit() print 'Download data for the ', year #quit() #nomefile1 = "FRA_31102011.txt" percorso_FTP = 'ftp://ftp.ncdc.noaa.gov/pub/data/noaa/isd-lite/' # Connect to an existing database sys.path.append(os.path.abspath("../")) from credenziali import * conn = psycopg2.connect(host=ip, dbname=db, user=user, password=pwd, port=port) #autocommit conn.set_session(autocommit=True) cur = conn.cursor() cur2 = conn.cursor() #leggo il codice delle stazioni presenti sul DB # Open a cursor to perform database operations query = "SELECT id_station,descr, country FROM {}.stations_p_t;".format( schema) cur.execute(query) while True: row = cur.fetchone() if row == None: break cod = row[0] url_filename = '%s%s/%s-99999-%s.gz' % (percorso_FTP, year, cod, year) filename = '%s-99999-%s' % (cod, year) zipname = '%s.gz' % filename print url_filename print filename print zipname print '*****************' try: urllib.urlretrieve(url_filename, zipname) urllib.urlcleanup() zip_ref = gzip.open(zipname, 'rb') file_content = zip_ref.read() f1 = open(filename, 'w') for line in file_content: f1.write(line) zip_ref.close() f1.close() #time.sleep(10) f1 = open(filename, 'r') print "Reading the file", filename #leggo le tre colonne dove nella prima c'e' il 2 i = 0 n = 0 riga_prima = " " line = " " yyyy = [] mm = [] dd = [] hh = [] tt = [] pp = [] for riga in file(filename): line = riga #print i #print line a = line.split() #print a yyyy.append(a[0]) mm.append(a[1]) dd.append(a[2]) hh.append(a[3]) ######################################################################################## #read temperature (scaling factor NOAA = 10.0) if (a[4] == '-9999'): # null value NOAA tt.append(99999) # null value DICCA else: tt.append( float(a[4]) / 10.0 + 273.15 ) #conversione da Celsius a Kelvin K = °C + 273.15 ######################################################################################## #read pressure (scaling factor NOAA = 10.0) if (a[6] == '-9999'): # null value NOAA pp.append(99999) # null value DICCA else: pp.append(float(a[6]) / 10.0) #print min(tt) #print max(tt) #print min(pp) #print max(pp) # Insert data in the DB i = 0 print "lunghezza file", len(tt) while i < len(tt): #print id_stazione[i] data = '%s/%s/%s %s:00' % (yyyy[i], mm[i], dd[i], hh[i]) query2 = "INSERT INTO noaa.data_p_t(id_station, time, \"T\", \"P_mare\") VALUES ('%s', '%s',%f , %f);" % ( cod, data, tt[i], pp[i]) #print i,query #print i try: cur2.execute(query2) except: print "violazione chiave primaria", query2 i += 1 os.remove(filename) os.remove(zipname) except: print "Non trovato file", filename #quit() # Make the changes to the database persistent #conn.commit() # Close communication with the database cur.close() conn.close()
def update(debug=False): print("Searching updates at %s..." % (remoteUrl)) #Download file_list file try: os.remove("__file_list") except: pass urllib.urlcleanup() try: urllib.urlretrieve("%s/__file_list" % remoteUrl, "__file_list") except socket.timeout: raise GtBurstException( 11, "Time out when connecting to %s. Check your internet connection, then retry" % (remoteUrl)) except: raise GtBurstException( 1, "Problems with the download. Check your connection, and that you can reach %s" % (remoteUrl)) pass #Read the list of files f = open('__file_list') files = f.readlines() f.close() os.remove("__file_list") #Get the path of the gtburst installation path = GtBurst.__file__ installationPath = os.path.join( os.path.sep.join(path.split(os.path.sep)[0:-3])) nUpdates = 0 for ff in files: atoms = ff.split() pathname = atoms[-1].replace('*', '') if (ff.find("__file_list") >= 0): if (debug): print("Skipping %s..." % (ff)) else: remoteMD5 = atoms[0] if (debug): print("File %s has remote MD5 checksum %s" % (pathname, remoteMD5)) #Get the MD5 of the same file in the GtBurst package path pathnameThisSys = pathname.replace("/", os.path.sep) localPath = os.path.join(installationPath, pathnameThisSys) if (not os.path.exists(localPath)): print( "File %s does not exist in the current installation. Creating it..." % (localPath)) #If the new file is in a new directory, the directory needs to be created try: os.makedirs(os.path.dirname(localPath)) except: #This will fail if the directory already exists pass downloadFile(pathname, localPath) nUpdates += 1 else: #File exists. Check the size localMD5 = md5.md5(open(localPath, 'rb').read()).hexdigest() if (localMD5 != remoteMD5): print("Updating %s..." % (localPath)) downloadFile(pathname, localPath) nUpdates += 1 else: if (debug): print( "NOT updating %s (local MD5: %s, remote MD5: %s)..." % (localPath, localMD5, remoteMD5)) pass pass if (debug): print("\n\n") pass pass return nUpdates
def __del__(self): urlcleanup()
def getFTP(self,what='Extended'): #Re-implementing this #This will complete automatically the form available at #https://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi #After submitting the form, an html page will inform about #the identifier assigned to the query and the time which will be #needed to process it. After retrieving the query number, #this function will wait for the files to be completed on the server, #then it will download them url = "https://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi" #Save parameters for the query in a dictionary parameters = {} parameters['coordfield'] = "%s,%s" %(self.ra,self.dec) parameters['coordsystem'] = "J2000" parameters['shapefield'] = "%s" %(self.roi) parameters['timefield'] = "%s,%s" %(self.tstart,self.tstop) parameters['timetype'] = "%s" %(self.timetype) parameters['energyfield'] = "30,1000000" parameters['photonOrExtendedOrNone'] = what parameters['destination'] = 'query' parameters['spacecraft'] = 'checked' print("Query parameters:") for k,v in parameters.iteritems(): print("%30s = %s" %(k,v)) #POST encoding postData = urllib.urlencode(parameters) temporaryFileName = "__temp_query_result.html" try: os.remove(temporaryFileName) except: pass pass urllib.urlcleanup() try: urllib.urlretrieve(url, temporaryFileName, lambda x,y,z:0, postData) except socket.timeout: raise GtBurstException(11,"Time out when connecting to the server. Check your internet connection, or that you can access https://fermi.gsfc.nasa.gov, then retry") except: raise GtBurstException(1,"Problems with the download. Check your connection or that you can access https://fermi.gsfc.nasa.gov, then retry.") pass #Now open the file, parse it and get the query ID htmlFile = open(temporaryFileName) lines = [] for line in htmlFile: lines.append(line.encode('utf-8')) pass html = " ".join(lines).strip() htmlFile.close() print("\nAnswer from the LAT data server:\n") text = html2text.html2text(html.encode('utf-8').strip()).split("\n") if("".join(text).replace(" ","")==""): raise GtBurstException(1,"Problems with the download. Empty answer from the LAT server. Normally this means that the server is ingesting new data, please retry in half an hour or so.") text = filter(lambda x:x.find("[") < 0 and x.find("]") < 0 and x.find("#") < 0 and x.find("* ") < 0 and x.find("+") < 0 and x.find("Skip navigation")<0,text) text = filter(lambda x:len(x.replace(" ",""))>1,text) print "\n".join(text) print("\n\n") os.remove(temporaryFileName) if(" ".join(text).find("down due to maintenance")>=0): raise GtBurstException(12,"LAT Data server looks down due to maintenance.") parser = DivParser("sec-wrapper") parser.feed(html) if(parser.data==[]): parser = DivParser("right-side") parser.feed(html) pass try: estimatedTimeLine = filter(lambda x:x.find("The estimated time for your query to complete is")==0,parser.data)[0] estimatedTimeForTheQuery = re.findall("The estimated time for your query to complete is ([0-9]+) seconds",estimatedTimeLine)[0] except: raise GtBurstException(1,"Problems with the download. Empty or wrong answer from the LAT server (see console). Please retry later.") pass try: httpAddress = filter(lambda x:x.find("http://fermi.gsfc.nasa.gov") >=0,parser.data)[0] except IndexError: # Try https httpAddress = filter(lambda x:x.find("https://fermi.gsfc.nasa.gov") >=0,parser.data)[0] #Now periodically check if the query is complete startTime = time.time() timeout = 1.5*max(5.0,float(estimatedTimeForTheQuery)) #Seconds refreshTime = 2.0 #Seconds #When the query will be completed, the page will contain this string: #The state of your query is 2 (Query complete) endString = "The state of your query is 2 (Query complete)" #Url regular expression regexpr = re.compile("wget (.*.fits)") #Build the window for the progress if(self.parent is None): #No graphical output root = None else: #make a transient window root = Toplevel() root.transient(self.parent) root.grab_set() l = Label(root,text='Waiting for the server to complete the query (estimated time: %s seconds)...' %(estimatedTimeForTheQuery)) l.grid(row=0,column=0) m1 = Meter(root, 500,20,'grey','blue',0,None,None,'white',relief='ridge', bd=3) m1.grid(row=1,column=0) m1.set(0.0,'Waiting...') pass links = None fakeName = "__temp__query__result.html" while(time.time() <= startTime+timeout): if(root is not None): if(estimatedTimeForTheQuery==0): m1.set(1) else: m1.set((time.time()-startTime)/float(max(estimatedTimeForTheQuery,1))) sys.stdout.flush() #Fetch the html with the results try: (filename, header) = urllib.urlretrieve(httpAddress,fakeName) except socket.timeout: urllib.urlcleanup() if(root is not None): root.destroy() raise GtBurstException(11,"Time out when connecting to the server. Check your internet connection, or that you can access https://fermi.gsfc.nasa.gov, then retry") except: urllib.urlcleanup() if(root is not None): root.destroy() raise GtBurstException(1,"Problems with the download. Check your connection or that you can access https://fermi.gsfc.nasa.gov, then retry.") pass f = open(fakeName) html = " ".join(f.readlines()) status = re.findall("The state of your query is ([0-9]+)",html)[0] #print("Status = %s" % status) if(status=='2'): #Get the download link links = regexpr.findall(html) break f.close() os.remove(fakeName) urllib.urlcleanup() time.sleep(refreshTime) pass if(root is not None): root.destroy() #Download the files #if(links is not None): # for link in links: # print("Downloading %s..." %(link)) # urllib.urlretrieve(link,link.split("/")[-1]) # pass #else: # raise RuntimeError("Could not download LAT Standard data") #pass remotePath = "%s/%s/queries/" %(self.dataRepository,self.instrument) if(links is not None): filenames = map(lambda x:x.split('/')[-1],links) try: self.downloadDirectoryWithFTP(remotePath,filenames=filenames) except Exception as e: #Try with "wget", if the system has it for ff in filenames: try: self.makeLocalDir() dataHandling.runShellCommand("wget %s%s -P %s" %("https://fermi.gsfc.nasa.gov/FTP/fermi/data/lat/queries/",ff,self.localRepository),True) except: raise e pass pass pass else: raise GtBurstException(1,"Could not download LAT Standard data") pass #Rename the files to something neater... newFilenames = {} for f in filenames: #EV or SC? suffix = f.split("_")[1] if(suffix.find("EV")>=0): suffix = 'ft1' elif(suffix.find("SC")>=0): suffix = 'ft2' else: raise GtBurstException(13,"Could not understand the type of a downloaded file (%s)" %(f)) newfilename = os.path.join(self.localRepository,"gll_%s_tr_bn%s_v00.fit" %(suffix,self.grbName)) localPath = os.path.join(self.localRepository,f) os.rename(localPath,newfilename) newFilenames[suffix] = newfilename pass ########################### if('ft1' in newFilenames.keys() and 'ft2' in newFilenames.keys()): dataHandling._makeDatasetsOutOfLATdata(newFilenames['ft1'],newFilenames['ft2'], self.grbName,self.tstart,self.tstop, self.ra,self.dec,self.triggerTime, self.localRepository, cspecstart=-1000, cspecstop=1000)
def get_html_source(url, path, save_file=True, overwrite=False): """ fetch the html source """ log("Retrieving HTML Source") log("Fetching URL: %s" % url) error = False htmlsource = "null" file_name = "" if save_file: path += ".json" tempxml_folder = __cdam__.path_temp_xml() if not xbmcvfs.exists(os.path.join(tempxml_folder, '')): xbmcvfs.mkdir(os.path.join(tempxml_folder, '')) file_name = os.path.join(tempxml_folder, path) class AppURLopener(urllib.FancyURLopener): version = __cdam__.user_agent() urllib._urlopener = AppURLopener() for _ in range(0, 4): try: if save_file: if xbmcvfs.exists(file_name): file_mtime = datetime.datetime.fromtimestamp(os.path.getmtime(file_name)) file_age = datetime.datetime.today() - file_mtime # yes i know... but this is temporary and will be configurable in a later release if file_age.days > 14: log("Cached file is %s days old, refreshing" % file_age.days) xbmcvfs.delete(file_name) if xbmcvfs.exists(file_name) and not overwrite: log("Retrieving local source") sock = open(file_name, "r") else: log("Retrieving online source") urllib.urlcleanup() sock = urllib.urlopen(url) else: urllib.urlcleanup() sock = urllib.urlopen(url) htmlsource = sock.read() if save_file and htmlsource not in ("null", ""): if not xbmcvfs.exists(file_name) or overwrite: file(file_name, "w").write(htmlsource) sock.close() break except IOError as e: log("error: %s" % e, xbmc.LOGERROR) log("e.errno: %s" % e.errno, xbmc.LOGERROR) if not e.errno == "socket error": log("errno.errorcode: %s" % errno.errorcode[e.errno], xbmc.LOGERROR) except Exception as e: log("error: %s" % e, xbmc.LOGERROR) traceback.print_exc() log("!!Unable to open page %s" % url) error = True if error: return "null" else: log("HTML Source:\n%s" % htmlsource) if htmlsource == "": htmlsource = "null" return htmlsource
def DownloadRequest(section, url, img, LabelName): if (LabelName == '') and (_param['title'] is not ''): LabelName == _param['title'] if (LabelName == '') and (_param['showtitle'] is not ''): LabelName == _param['showtitle'] LabelFile = clean_filename(LabelName) deb('LabelName', LabelName) if (LabelName == ''): deb('Download Error', 'Missing Filename String.') myNote('Download Error', 'Missing Filename String.') return if (section == ps('section.wallpaper')): FolderDest = xbmc.translatePath(addst("download_folder_wallpapers")) elif (section == ps('section.tv')): FolderDest = xbmc.translatePath(addst("download_folder_tv")) elif (section == ps('section.movie')): FolderDest = xbmc.translatePath(addst("download_folder_movies")) else: FolderDest = xbmc.translatePath(addst("download_folder_movies")) if os.path.exists(FolderDest) == False: os.mkdir(FolderDest) if os.path.exists(FolderDest): if (section == ps('section.tv')) or (section == ps('section.movie')): ### param >> url: /link/show/1466546/ match = re.search( '/.+?/.+?/(.+?)/', url ) ## Example: http://www.solarmovie.so/link/show/1052387/ ## videoId = match.group(1) deb('Solar ID', videoId) url = BASE_URL + '/link/play/' + videoId + '/' ## Example: http://www.solarmovie.so/link/play/1052387/ ## html = net.http_GET(url).content match = re.search('<iframe.+?src="(.+?)"', html, re.IGNORECASE | re.MULTILINE | re.DOTALL) link = match.group(1) link = link.replace('/embed/', '/file/') deb('hoster link', link) try: stream_url = urlresolver.HostedMediaFile(link).resolve() except: stream_url = '' ext = Download_PrepExt(stream_url, '.flv') else: stream_url = url ext = Download_PrepExt(stream_url, '.jpg') t = 1 c = 1 if os.path.isfile( xbmc.translatePath(os.path.join(FolderDest, LabelFile + ext))): t = LabelFile while t == LabelFile: if os.path.isfile( xbmc.translatePath( os.path.join(FolderDest, LabelFile + '[' + str(c) + ']' + ext))) == False: LabelFile = LabelFile + '[' + str(c) + ']' c = c + 1 start_time = time.time() deb('start_time', str(start_time)) download_method = addst( 'download_method') ### 'Progress|ProgressBG|Hidden' urllib.urlcleanup() if (download_method == 'Progress'): dp = xbmcgui.DialogProgress() dialogType = 12 ## For Frodo and earlier. dp.create('Downloading', LabelFile + ext) urllib.urlretrieve(stream_url, xbmc.translatePath( os.path.join(FolderDest, LabelFile + ext)), lambda nb, bs, fs: DownloadStatus( nb, bs, fs, dp, download_method, start_time, section, url, img, LabelName, ext, LabelFile )) #urllib.urlretrieve(url, localfilewithpath) myNote('Download Complete', LabelFile + ext, 15000) elif (download_method == 'ProgressBG'): dp = xbmcgui.DialogProgressBG() dialogType = 13 ## Only works on daily build of XBMC. dp.create('Downloading', LabelFile + ext) urllib.urlretrieve(stream_url, xbmc.translatePath( os.path.join(FolderDest, LabelFile + ext)), lambda nb, bs, fs: DownloadStatus( nb, bs, fs, dp, download_method, start_time, section, url, img, LabelName, ext, LabelFile )) #urllib.urlretrieve(url, localfilewithpath) myNote('Download Complete', LabelFile + ext, 15000) elif (download_method == 'Test'): dp = xbmcgui.DialogProgress() myNote('Download Started', LabelFile + ext, 15000) urllib.urlretrieve(stream_url, xbmc.translatePath( os.path.join(FolderDest, LabelFile + ext)), lambda nb, bs, fs: DownloadStatus( nb, bs, fs, dp, download_method, start_time, section, url, img, LabelName, ext, LabelFile )) #urllib.urlretrieve(url, localfilewithpath) myNote('Download Complete', LabelFile + ext, 15000) elif (download_method == 'Hidden'): dp = xbmcgui.DialogProgress() myNote('Download Started', LabelFile + ext, 15000) urllib.urlretrieve(stream_url, xbmc.translatePath( os.path.join(FolderDest, LabelFile + ext)), lambda nb, bs, fs: DownloadStatus( nb, bs, fs, dp, download_method, start_time, section, url, img, LabelName, ext, LabelFile )) #urllib.urlretrieve(url, localfilewithpath) myNote('Download Complete', LabelFile + ext, 15000) elif (download_method == 'jDownloader (StreamURL)'): myNote('Download', 'sending to jDownloader plugin', 15000) xbmc.executebuiltin( "XBMC.RunPlugin(plugin://plugin.program.jdownloader/?action=addlink&url=%s)" % stream_url) #return elif (download_method == 'jDownloader (Link)'): myNote('Download', 'sending to jDownloader plugin', 15000) xbmc.executebuiltin( "XBMC.RunPlugin(plugin://plugin.program.jdownloader/?action=addlink&url=%s)" % link) #return else: deb('Download Error', 'Incorrect download method.') myNote('Download Error', 'Incorrect download method.') return ## ##urllib.urlretrieve(stream_url, xbmc.translatePath(os.path.join(FolderDest,LabelFile+ext)), lambda nb, bs, fs: DownloadStatus(nb, bs, fs, dp, download_method, start_time, section, url, img, LabelName, ext, LabelFile)) #urllib.urlretrieve(url, localfilewithpath) ## #myNote('Download Complete',LabelFile+ext,15000) ## #### xbmc.translatePath(os.path.join(FolderDest,localfilewithpath+ext)) _addon.resolve_url(url) _addon.resolve_url(stream_url) # # else: deb('Download Error', 'Unable to create destination path.') myNote('Download Error', 'Unable to create destination path.') return
# Python version used : - Python 3.6.1+ # import all the library used import re, urllib, os, sys # determine python version version = sys.version_info[0] # set user_input and import modules for correct version of python if version == 2: # python 2.x user_input = raw_input import urllib2 urlopen = urllib2.urlopen # open a url encode = urllib.urlencode # encode a search line retrieve = urllib.urlretrieve # retrieve url info cleanup = urllib.urlcleanup() # cleanup url cache else: # python 3.x user_input = input import urllib.request import urllib.parse urlopen = urllib.request.urlopen encode = urllib.parse.urlencode retrieve = urllib.request.urlretrieve cleanup = urllib.request.urlcleanup() # clear the terminal screen def screen_clear(): if os.name == 'nt': os.system('cls')
def download_tile(tile, url, pid, srtmv3, one, username, password): output = tile + '.r.in.srtm.tmp.' + str(pid) if srtmv3: if one: local_tile = str(tile) + '.SRTMGL1.hgt.zip' else: local_tile = str(tile) + '.SRTMGL3.hgt.zip' else: local_tile = str(tile) + '.hgt.zip' urllib.urlcleanup() if srtmv3: remote_tile = str(url) + local_tile goturl = 1 try: password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm() password_manager.add_password(None, "https://urs.earthdata.nasa.gov", username, password) cookie_jar = CookieJar() opener = urllib2.build_opener( urllib2.HTTPBasicAuthHandler(password_manager), #urllib2.HTTPHandler(debuglevel=1), # Uncomment these two lines to see #urllib2.HTTPSHandler(debuglevel=1), # details of the requests/responses urllib2.HTTPCookieProcessor(cookie_jar)) urllib2.install_opener(opener) request = urllib2.Request(remote_tile) response = urllib2.urlopen(request) fo = open(local_tile, 'w+b') fo.write(response.read()) fo.close time.sleep(0.5) except: goturl = 0 pass return goturl # SRTM subdirs: Africa, Australia, Eurasia, Islands, North_America, South_America for srtmdir in ('Africa', 'Australia', 'Eurasia', 'Islands', 'North_America', 'South_America'): remote_tile = str(url) + str(srtmdir) + '/' + local_tile goturl = 1 try: response = urllib2.urlopen(request) fo = open(local_tile, 'w+b') fo.write(response.read()) fo.close time.sleep(0.5) # does not work: #urllib.urlretrieve(remote_tile, local_tile, data = None) except: goturl = 0 pass if goturl == 1: return 1 return 0
res_data = urllib2.urlopen(requrl + '?' + test_data_encode) break except urllib2.URLError, e: print e time.sleep(20) continue res = res_data.read() mq_str = res.split('\n') print mq_str[0] ret_str = mq_str[0].rstrip('\r') if (ret_str == "UCMQ_HTTP_OK"): break else: time.sleep(1) res_data.close() urllib.urlcleanup() return mq_str[1] def get_a_rabbitmq_channel(): credentials = pika.PlainCredentials("guest", "guest") conn_params = pika.ConnectionParameters(host="localhost", port=5672, credentials=credentials) # conn_params = pika.ConnectionParameters(host= "192.168.7.19",port=5672, credentials=credentials) conn_broker = pika.BlockingConnection(conn_params) channel = conn_broker.channel() channel.exchange_declare(exchange="amq", exchange_type="direct", passive=False, durable=True,
def authenticate(self, request): user = request.user or None auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) # Get access token verifier = request.GET.get('oauth_verifier') auth.set_request_token(request.session['request_token.key'], request.session['request_token.secret']) del request.session['request_token.secret'] del request.session['request_token.key'] auth.get_access_token(verifier) # Construct the API instance api = tweepy.API(auth) if user.is_anonymous(): try: twitter_user = TwitterUserProfile.objects.get( screen_name=api.me().screen_name) return twitter_user.user except TwitterUserProfile.DoesNotExist: userProfile = UserProfile.objects.create( username=api.me().screen_name) userProfile.first_name = api.me().screen_name userProfile.location = api.me().location userProfile.save() img = urllib.urlretrieve(api.me().profile_image_url) userProfile.profile_picture.save("Twitter-profile.jpg", File(open(img[0]))) urllib.urlcleanup() userProfile.twitter_link = 'https://twitter.com/' + api.me( ).screen_name from django.contrib.auth.hashers import make_password raw_pass = ''.join( random.choice(string.ascii_uppercase + string.digits) for x in range(12)) tmp_pass = make_password(raw_pass) userProfile.password = tmp_pass userProfile.save() twitter_user = TwitterUserProfile.objects.create( user=userProfile) twitter_user.screen_name = api.me().screen_name twitter_user.profile_image_url = userProfile.profile_picture twitter_user.location = api.me().location twitter_user.url = 'https://twitter.com/' + api.me( ).screen_name twitter_user.access_token = auth.access_token.key twitter_user.access_secret = auth.access_token.secret twitter_user.save() return userProfile else: try: user_twitter = TwitterUserProfile.objects.get(user=user) if user_twitter.screen_name == api.me().screen_name: return user_twitter.user else: request.session['access_token'] = auth.access_token.key request.session['access_secret'] = auth.access_token.secret next = request.session['next'] or "" if next: del request.session['next'] return HttpResponseRedirect(next) else: return HttpResponseRedirect(reverse('sync_twitter')) except TwitterUserProfile.DoesNotExist: try: user_twitter = TwitterUserProfile.objects.get( screen_name=api.me().screen_name) request.session['access_token'] = auth.access_token.key request.session['access_secret'] = auth.access_token.secret next = request.session['next'] or "" if next: del request.session['next'] return HttpResponseRedirect(next) else: return HttpResponseRedirect(reverse('sync_twitter')) except TwitterUserProfile.DoesNotExist: twitter_user = TwitterUserProfile.objects.create( user=UserProfile.objects.get(username=user.username)) twitter_user.screen_name = api.me().screen_name twitter_user.location = api.me().location twitter_user.url = 'https://twitter.com/' + api.me( ).screen_name twitter_user.access_token = auth.access_token.key twitter_user.access_secret = auth.access_token.secret twitter_user.save() return twitter_user.user