def getTeamSeasonHtml(self): html = INVALID_STRING if cmp(self._htmlWebSite, INVALID_STRING) == 0: print "htmlWebSite has not been initialized yet" else: #If we have already synced the data we need to fetch the html from local instead of reloading the website again pathNeed2Test = CURRENT_PATH + self._Season0 + self._Season1 + '/' + self._Team_id print "Constructing path as ", pathNeed2Test self._dataStoredPath = pathNeed2Test util.mkdir(pathNeed2Test) htmlFile = pathNeed2Test + '/' + self._GameType + '.html' self._seasonDataHtmlFile = htmlFile print "Check if html file exist or not ", htmlFile if os.path.isfile(htmlFile): print "html file exists, open the file, read it and return the string" html = util.openFile(htmlFile) #print html else: print "html file does not exist, now loading the webpage from network" html = util.getHtmlFromUrl(self._htmlWebSite) if cmp(html, INVALID_STRING) != 0: util.saveFile(htmlFile, html) return html return html
def test(): proxy_support = urllib2.ProxyHandler({'http':'http://61.187.251.235:80'}) opener = urllib2.build_opener(proxy_support, urllib2.HTTPHandler) urllib2.install_opener(opener) content = urllib2.urlopen('http://180.76.247.130:8888/').read() util.saveFile("ipcheck.html", content)
def getTeamSeasonHtml(self): html = INVALID_STRING if cmp(self._htmlWebSite, INVALID_STRING) == 0: print "htmlWebSite has not been initialized yet" else: #If we have already synced the data we need to fetch the html from local instead of reloading the website again pathNeed2Test = CURRENT_PATH + self._Season0 + self._Season1 + '/' + self._Team_id print "Constructing path as ", pathNeed2Test self._dataStoredPath = pathNeed2Test util.mkdir(pathNeed2Test) htmlFile = pathNeed2Test + '/' + self._GameType + '.html' self._seasonDataHtmlFile = htmlFile print "Check if html file exist or not ", htmlFile if os.path.isfile(htmlFile): print "html file exists, open the file, read it and return the string" html = util.openFile(htmlFile) #print html else: print "html file does not exist, now loading the webpage from network" html = util.getHtmlFromUrl(self._htmlWebSite) if cmp(html, INVALID_STRING)!=0: util.saveFile(htmlFile,html) return html return html
def resumableMerge(self, filehash, filename=None): # not thread safely.single thread filehash_path = os.path.join(self.downloadpath, filehash) tempjson = os.path.join(filehash_path, 'temp.json') if not os.path.isfile(tempjson): # save chunks result into the temp.json self.list(filehash) if self.links: i = 0 for link in self.links: if 'Hash' in link.keys(): self.chunks.update({ i: { 'filehash': link.get('Hash'), 'success': False } }) i += 1 util.saveFile(tempjson, json.dumps(self.chunks)) else: print("no chunks.Error get the {} chunks result".format( filehash)) # download chunk with open(tempjson) as target_file: self.chunks = json.load(target_file) if self.chunks: for chunk, chunk_result in self.chunks.iteritems(): if not chunk_result.get('success'): chunk_result['success'] = self.downloadhash( chunk_result.get('filehash'), filehash_path) or chunk_result.get('success') util.saveFile(tempjson, json.dumps(self.chunks)) # merge chunks if filename: localfile = os.path.join(filehash_path, filename) else: localfile = os.path.join(filehash_path, filehash) with open(localfile, 'wb') as target_file: for i in range(len(self.chunks)): chunk = os.path.join( filehash_path, self.chunks.get(str(i)).get('filehash')) with open(chunk, 'rb') as source_file: for line in source_file: target_file.write(line) try: os.remove(chunk) # 删除该分片,节约空间 except Exception, e: print("{0}:{1} remove failed:{2}".format( chunk, os.path.isfile(chunk), e)) try: os.remove(tempjson) except Exception, e: print("{0}:{1} remove failed:{2}".format( tempjson, os.path.isfile(tempjson), e))
def getDiv(soup, class_name): div_list = soup.find_all('div', {'class': "class_name"}) count = len(div_list) print count if count == 0: return [] else: content = str(div_list[0]) util.saveFile(class_name, content) text = getText(class_name) util.saveFile(class_name + ".txt", text) return div_list[0]
def savePage(url, rank): driver = returnDriver() driver.get(url) # js="var q=document.documentElement.scrollTop=10000" # driver.execute_script(js) time.sleep(30) text = driver.page_source filename = "weixin" + str(rank) file_html = filename + ".html" file_pdf = filename + ".pdf" util.saveFile(file_html, text) pdfkit.from_file(file_html, file_pdf) driver.quit()
def getSingleGameHtml(self, singleGameLink): singleGameUrl = 'http://stat-nba.com/' + singleGameLink singleGameHtmlFileName = re.sub('\/', '_', singleGameLink) print singleGameHtmlFileName singleGameHtmlFilePath = self._dataStoredPath + '/' + singleGameHtmlFileName print singleGameHtmlFilePath singleGameHtml = INVALID_STRING if os.path.isfile(singleGameHtmlFilePath): #If html file exist, read it and return the html content print singleGameHtmlFilePath + " exist, open the file and read it" singleGameHtml = util.openFile(singleGameHtmlFilePath) else: print singleGameHtmlFilePath + " doesn't exist, load the webpage" singleGameHtml = util.getHtmlFromUrl(singleGameUrl) util.saveFile(singleGameHtmlFilePath, singleGameHtml) return singleGameHtml
def getSingleGameHtml(self, singleGameLink): singleGameUrl = 'http://stat-nba.com/'+singleGameLink singleGameHtmlFileName = re.sub('\/', '_', singleGameLink) print singleGameHtmlFileName singleGameHtmlFilePath = self._dataStoredPath + '/' + singleGameHtmlFileName print singleGameHtmlFilePath singleGameHtml = INVALID_STRING if os.path.isfile(singleGameHtmlFilePath): #If html file exist, read it and return the html content print singleGameHtmlFilePath+ " exist, open the file and read it" singleGameHtml = util.openFile(singleGameHtmlFilePath) else: print singleGameHtmlFilePath + " doesn't exist, load the webpage" singleGameHtml = util.getHtmlFromUrl(singleGameUrl) util.saveFile(singleGameHtmlFilePath, singleGameHtml) return singleGameHtml
def getUrlContent(url): print "current processing url : %s" % url User_Agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36' req = urllib2.Request(url) req.add_header('User-Agent', User_Agent) try: response = urllib2.urlopen(req, timeout=5) data = response.read() filename = genFilename() util.saveFile(filename, data) print filename return filename except urllib2.URLError as e: print "error" if hasattr(e, 'code'): print "Error code:", e.code elif hasttr(e, 'reasion'): print "Reason:", e.reason return "error" finally: if response: response.close()
title = '{}'.format(ctype.capitalize()) # adjust limits according to polarization lim = (-15, 0) if pol == 'VH': lim = (-20, -5) # complete scatter subplot scatter(x, y, title=title, xlab=xlab, ylab=ylab, xlim=lim, ylim=lim, denscol=True, measures=['mean_x', 'mean_y', 'rmse', 'eq'], regline=True, o2o=True, grid=True) ax.legend(loc='upper left') idx += 1 # show plot #plt.tight_layout() # save file saveFile(args, 'algorithm') #plt.show()
xmin, xmax = min(xt), max(xt) lnspc = np.linspace(xmin, xmax, len(mean)) # get normal distribution stats pdf_mean, pdf_sigma = stats.norm.fit( reject_outliers( mean ) ) pdf_g = stats.norm.pdf(lnspc, pdf_mean, pdf_sigma) plt.plot(lnspc, pdf_g, label="Gaussian") # create text with pdf stats fields = [] fields.append ( r'Gaussian PDF' ) fields.append ( r'$\mu$ = {:.2f}'.format( pdf_mean ) ) fields.append ( r'$\sigma$ = {:.2f}'.format( pdf_sigma ) ) text = '\n'.join(fields) # create and draw text box text_box = AnchoredText(text, frameon=True, loc='upper right') plt.setp(text_box.patch, facecolor='white') # , alpha=0.5 plt.gca().add_artist(text_box) # enable grid plt.grid() idx += 1 # save file saveFile( args, 'error-distribution' ) # show plot plt.show()
if idx <= cols: title = '{}'.format(alg.upper()) # adjust limits according to polarization lim = (-15, 0) if pol == 'VH': lim = (-20, -5) # complete scatter subplot scatter(x, y, title=title, xlab=xlab, ylab=ylab, xlim=lim, ylim=lim, denscol=True, measures=['mean_x', 'mean_y', 'rmse', 'eq'], regline=True, o2o=True, grid=True) ax.legend(loc='upper left') idx += 1 # save file saveFile(args, 'orbit-' + args.landcover + '.png') # show plot #plt.show()
# coding=utf-8 #python2.7 import urllib2 import urllib import util from bs4 import BeautifulSoup import re from pprint import pprint import time import json from pymongo import MongoClient import pytesseract from PIL import Image, ImageEnhance url = "http://s.weibo.com/ajax/pincode/pin?type=sass&ts=1508411223" User_Agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36' req = urllib2.Request(url) req.add_header('User-Agent', User_Agent) response = urllib2.urlopen(req, timeout=5) data = response.read() util.saveFile('test.jpg', data) image = Image.open('./test.jpg') imgry = image.convert('L') #图像加强,二值化 sharpness = ImageEnhance.Contrast(imgry) #对比度增强 sharp_img = sharpness.enhance(2.0) vcode = pytesseract.image_to_string(image) print(vcode)
schema = 'timeline' + '_' + slope records = getRecords( plist, schema ) # plot time signature x, y = getData ( records ) plt.plot(x, y,label=slope) # create text with stats fields.append ( r'{} $\mu$ = {:.2f}'.format( slope, np.mean( y ) ) ) # create and draw text box text = '\n'.join(fields) text_box = AnchoredText(text, frameon=True, loc='lower right') plt.setp(text_box.patch, facecolor='white') # , alpha=0.5 plt.gca().add_artist(text_box) ax.legend(loc='upper left') # next subplot plt.grid() idx += 1 # show plot plt.gcf().autofmt_xdate() # save file saveFile( args, 'timeline-rmse' ) #plt.show()
title = '{}'.format( ctype.capitalize() ) plt.title(title) plt.xlabel(xlab); plt.ylabel(ylab) # for each polarization for pol in args.polarization: plist[ 'pol' ] = pol records = getRecords( plist ) # plot time signature x, y, yerr = getData ( records ) plt.plot(x, y) # alpha fill stddev range ymin = y - yerr; ymax = y + yerr plt.fill_between(x, ymax, ymin, alpha=0.3 ) # next subplot plt.grid() idx += 1 # show plot plt.gcf().autofmt_xdate() # save file saveFile( args, 'timeline' ) #plt.show()
def process(args): f = util.loadFile(args.input) output_dir = './output' if args.output: output_dir = args.output util.writeDir(output_dir) #encoding print('---encoding') e = encoding.encode(f, args.token) userID = e.get_userID() itemID = e.get_itemID() adjlist = e.get_adjlist() user_train, item_train, value_train = e.output4FM() if args.format == 'FM': if args.sampling == True: #zero sampling print('---Zero samping') zero_user, zero_item, zero_value = sampling.get_zero( sampling.zeroSampling(adjlist) ) user_train.extend(zero_user) item_train.extend(zero_item) value_train.extend(zero_value) #Testing print('---Create Testing Data') test_user, test_item, test_value = build.build(len(userID), len(itemID)) #save print('---Save') util.saveFile('{0}/userID'.format(output_dir), userID) util.saveFile('{0}/itemID'.format(output_dir), itemID) if args.format == 'deepwalk-bipartite': #deepwalk util.saveFile('{0}/adjlist'.format(output_dir), adjlist) elif args.format == 'FM': #FM util.saveFile('{0}/rel-user'.format(output_dir), ['0 {0}:1'.format(i) for i in range(len(userID))]) util.saveFile('{0}/rel-item'.format(output_dir), ['0 {0}:1'.format(i) for i in range(len(itemID))]) util.saveFile('{0}/rel-user.train'.format(output_dir), user_train) util.saveFile('{0}/rel-item.train'.format(output_dir), item_train) util.saveFile('{0}/ans.train'.format(output_dir), value_train) util.saveFile('{0}/rel-user.test'.format(output_dir), test_user) util.saveFile('{0}/rel-item.test'.format(output_dir), test_item) util.saveFile('{0}/ans.test'.format(output_dir), test_value)