Exemple #1
0
    def getTeamSeasonHtml(self):
        html = INVALID_STRING
        if cmp(self._htmlWebSite, INVALID_STRING) == 0:
            print "htmlWebSite has not been initialized yet"
        else:
            #If we have already synced the data we need to fetch the html from local instead of reloading the website again
            pathNeed2Test = CURRENT_PATH + self._Season0 + self._Season1 + '/' + self._Team_id
            print "Constructing path as ", pathNeed2Test
            self._dataStoredPath = pathNeed2Test
            util.mkdir(pathNeed2Test)

            htmlFile = pathNeed2Test + '/' + self._GameType + '.html'
            self._seasonDataHtmlFile = htmlFile
            print "Check if html file exist or not ", htmlFile

            if os.path.isfile(htmlFile):
                print "html file exists, open the file, read it and return the string"
                html = util.openFile(htmlFile)

                #print html
            else:
                print "html file does not exist, now loading the webpage from network"
                html = util.getHtmlFromUrl(self._htmlWebSite)

                if cmp(html, INVALID_STRING) != 0:
                    util.saveFile(htmlFile, html)

                return html

        return html
Exemple #2
0
def test():
    proxy_support = urllib2.ProxyHandler({'http':'http://61.187.251.235:80'}) 
    opener = urllib2.build_opener(proxy_support, urllib2.HTTPHandler) 
    urllib2.install_opener(opener) 
    content = urllib2.urlopen('http://180.76.247.130:8888/').read()

    util.saveFile("ipcheck.html", content)
Exemple #3
0
    def getTeamSeasonHtml(self):
        html = INVALID_STRING
        if cmp(self._htmlWebSite, INVALID_STRING) == 0:
            print "htmlWebSite has not been initialized yet"
        else:
            #If we have already synced the data we need to fetch the html from local instead of reloading the website again
            pathNeed2Test = CURRENT_PATH + self._Season0 + self._Season1 + '/' + self._Team_id
            print "Constructing path as ", pathNeed2Test
            self._dataStoredPath = pathNeed2Test
            util.mkdir(pathNeed2Test)

            htmlFile = pathNeed2Test + '/' + self._GameType + '.html'
            self._seasonDataHtmlFile = htmlFile
            print "Check if html file exist or not ", htmlFile

            if os.path.isfile(htmlFile):
                print "html file exists, open the file, read it and return the string"
                html = util.openFile(htmlFile)

                #print html
            else:
                print "html file does not exist, now loading the webpage from network"
                html = util.getHtmlFromUrl(self._htmlWebSite)

                if cmp(html, INVALID_STRING)!=0:
                    util.saveFile(htmlFile,html)

                return html


        return html
Exemple #4
0
 def resumableMerge(self, filehash, filename=None):
     # not thread safely.single thread
     filehash_path = os.path.join(self.downloadpath, filehash)
     tempjson = os.path.join(filehash_path, 'temp.json')
     if not os.path.isfile(tempjson):
         # save chunks result into the temp.json
         self.list(filehash)
         if self.links:
             i = 0
             for link in self.links:
                 if 'Hash' in link.keys():
                     self.chunks.update({
                         i: {
                             'filehash': link.get('Hash'),
                             'success': False
                         }
                     })
                 i += 1
             util.saveFile(tempjson, json.dumps(self.chunks))
         else:
             print("no chunks.Error get the {} chunks result".format(
                 filehash))
     # download chunk
     with open(tempjson) as target_file:
         self.chunks = json.load(target_file)
     if self.chunks:
         for chunk, chunk_result in self.chunks.iteritems():
             if not chunk_result.get('success'):
                 chunk_result['success'] = self.downloadhash(
                     chunk_result.get('filehash'),
                     filehash_path) or chunk_result.get('success')
                 util.saveFile(tempjson, json.dumps(self.chunks))
         # merge chunks
         if filename:
             localfile = os.path.join(filehash_path, filename)
         else:
             localfile = os.path.join(filehash_path, filehash)
         with open(localfile, 'wb') as target_file:
             for i in range(len(self.chunks)):
                 chunk = os.path.join(
                     filehash_path,
                     self.chunks.get(str(i)).get('filehash'))
                 with open(chunk, 'rb') as source_file:
                     for line in source_file:
                         target_file.write(line)
                 try:
                     os.remove(chunk)  # 删除该分片,节约空间
                 except Exception, e:
                     print("{0}:{1} remove failed:{2}".format(
                         chunk, os.path.isfile(chunk), e))
             try:
                 os.remove(tempjson)
             except Exception, e:
                 print("{0}:{1} remove failed:{2}".format(
                     tempjson, os.path.isfile(tempjson), e))
def getDiv(soup, class_name):
    div_list = soup.find_all('div', {'class': "class_name"})
    count = len(div_list)
    print count
    if count == 0:
        return []
    else:
        content = str(div_list[0])
        util.saveFile(class_name, content)
        text = getText(class_name)
        util.saveFile(class_name + ".txt", text)
        return div_list[0]
def savePage(url, rank):
    driver = returnDriver()
    driver.get(url)
    # js="var q=document.documentElement.scrollTop=10000"
    # driver.execute_script(js)
    time.sleep(30)
    text = driver.page_source
    filename = "weixin" + str(rank)
    file_html = filename + ".html"
    file_pdf = filename + ".pdf"
    util.saveFile(file_html, text)
    pdfkit.from_file(file_html, file_pdf)
    driver.quit()
Exemple #7
0
    def getSingleGameHtml(self, singleGameLink):
        singleGameUrl = 'http://stat-nba.com/' + singleGameLink
        singleGameHtmlFileName = re.sub('\/', '_', singleGameLink)
        print singleGameHtmlFileName
        singleGameHtmlFilePath = self._dataStoredPath + '/' + singleGameHtmlFileName
        print singleGameHtmlFilePath

        singleGameHtml = INVALID_STRING
        if os.path.isfile(singleGameHtmlFilePath):
            #If html file exist, read it and return the html content
            print singleGameHtmlFilePath + " exist, open the file and read it"
            singleGameHtml = util.openFile(singleGameHtmlFilePath)
        else:
            print singleGameHtmlFilePath + " doesn't exist, load the webpage"
            singleGameHtml = util.getHtmlFromUrl(singleGameUrl)
            util.saveFile(singleGameHtmlFilePath, singleGameHtml)
        return singleGameHtml
Exemple #8
0
    def getSingleGameHtml(self, singleGameLink):
        singleGameUrl = 'http://stat-nba.com/'+singleGameLink
        singleGameHtmlFileName = re.sub('\/', '_', singleGameLink)
        print singleGameHtmlFileName
        singleGameHtmlFilePath = self._dataStoredPath + '/' + singleGameHtmlFileName
        print singleGameHtmlFilePath

        singleGameHtml = INVALID_STRING
        if os.path.isfile(singleGameHtmlFilePath):
            #If html file exist, read it and return the html content
            print singleGameHtmlFilePath+ " exist, open the file and read it"
            singleGameHtml = util.openFile(singleGameHtmlFilePath)
        else:
            print singleGameHtmlFilePath + " doesn't exist, load the webpage"
            singleGameHtml = util.getHtmlFromUrl(singleGameUrl)
            util.saveFile(singleGameHtmlFilePath, singleGameHtml)
        return  singleGameHtml
def getUrlContent(url):
    print "current processing url : %s" % url
    User_Agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36'
    req = urllib2.Request(url)
    req.add_header('User-Agent', User_Agent)
    try:
        response = urllib2.urlopen(req, timeout=5)
        data = response.read()
        filename = genFilename()
        util.saveFile(filename, data)
        print filename
        return filename
    except urllib2.URLError as e:
        print "error"
        if hasattr(e, 'code'):
            print "Error code:", e.code
        elif hasttr(e, 'reasion'):
            print "Reason:", e.reason

        return "error"
    finally:
        if response:
            response.close()
                title = '{}'.format(ctype.capitalize())

            # adjust limits according to polarization
            lim = (-15, 0)
            if pol == 'VH':
                lim = (-20, -5)

            # complete scatter subplot
            scatter(x,
                    y,
                    title=title,
                    xlab=xlab,
                    ylab=ylab,
                    xlim=lim,
                    ylim=lim,
                    denscol=True,
                    measures=['mean_x', 'mean_y', 'rmse', 'eq'],
                    regline=True,
                    o2o=True,
                    grid=True)

            ax.legend(loc='upper left')
            idx += 1

# show plot
#plt.tight_layout()

# save file
saveFile(args, 'algorithm')
#plt.show()
Exemple #11
0
            xmin, xmax = min(xt), max(xt)  
            lnspc = np.linspace(xmin, xmax, len(mean))

            # get normal distribution stats
            pdf_mean, pdf_sigma = stats.norm.fit( reject_outliers( mean ) )
            pdf_g = stats.norm.pdf(lnspc, pdf_mean, pdf_sigma)
            plt.plot(lnspc, pdf_g, label="Gaussian")

            # create text with pdf stats
            fields = []
            fields.append ( r'Gaussian PDF' )
            fields.append ( r'$\mu$ = {:.2f}'.format( pdf_mean ) )
            fields.append ( r'$\sigma$ = {:.2f}'.format( pdf_sigma ) )
            text = '\n'.join(fields)

            # create and draw text box
            text_box = AnchoredText(text, frameon=True, loc='upper right')
            plt.setp(text_box.patch, facecolor='white')  # , alpha=0.5
            plt.gca().add_artist(text_box)

            # enable grid
            plt.grid()
            idx += 1

# save file
saveFile( args, 'error-distribution' )

# show plot
plt.show()

        if idx <= cols:
            title = '{}'.format(alg.upper())

        # adjust limits according to polarization
        lim = (-15, 0)
        if pol == 'VH':
            lim = (-20, -5)

        # complete scatter subplot
        scatter(x,
                y,
                title=title,
                xlab=xlab,
                ylab=ylab,
                xlim=lim,
                ylim=lim,
                denscol=True,
                measures=['mean_x', 'mean_y', 'rmse', 'eq'],
                regline=True,
                o2o=True,
                grid=True)

        ax.legend(loc='upper left')
        idx += 1

# save file
saveFile(args, 'orbit-' + args.landcover + '.png')

# show plot
#plt.show()
Exemple #13
0
# coding=utf-8
#python2.7
import urllib2
import urllib
import util
from bs4 import BeautifulSoup
import re
from pprint import pprint
import time
import json
from pymongo import MongoClient
import pytesseract
from PIL import Image, ImageEnhance

url = "http://s.weibo.com/ajax/pincode/pin?type=sass&ts=1508411223"
User_Agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36'
req = urllib2.Request(url)
req.add_header('User-Agent', User_Agent)

response = urllib2.urlopen(req, timeout=5)
data = response.read()
util.saveFile('test.jpg', data)

image = Image.open('./test.jpg')
imgry = image.convert('L')  #图像加强,二值化
sharpness = ImageEnhance.Contrast(imgry)  #对比度增强
sharp_img = sharpness.enhance(2.0)
vcode = pytesseract.image_to_string(image)

print(vcode)
                schema = 'timeline' + '_' + slope
                records = getRecords( plist, schema )

                # plot time signature
                x, y = getData ( records )
                plt.plot(x, y,label=slope)

                # create text with stats
                fields.append ( r'{} $\mu$ = {:.2f}'.format( slope, np.mean( y ) ) )
   
            # create and draw text box
            text = '\n'.join(fields)
            text_box = AnchoredText(text, frameon=True, loc='lower right')

            plt.setp(text_box.patch, facecolor='white')  # , alpha=0.5
            plt.gca().add_artist(text_box)

            ax.legend(loc='upper left')

        # next subplot
        plt.grid()  
        idx += 1

# show plot
plt.gcf().autofmt_xdate()

# save file
saveFile( args, 'timeline-rmse' )
#plt.show()

Exemple #15
0
                title = '{}'.format( ctype.capitalize() )

            plt.title(title)
            plt.xlabel(xlab); plt.ylabel(ylab)

            # for each polarization
            for pol in args.polarization:

                plist[ 'pol' ] = pol   
                records = getRecords( plist )

                # plot time signature
                x, y, yerr = getData ( records )
                plt.plot(x, y)

                # alpha fill stddev range
                ymin = y - yerr; ymax = y + yerr
                plt.fill_between(x, ymax, ymin, alpha=0.3 )

            # next subplot
            plt.grid()  
            idx += 1

# show plot
plt.gcf().autofmt_xdate()

# save file
saveFile( args, 'timeline' )
#plt.show()

Exemple #16
0
def process(args):
	f = util.loadFile(args.input)
	output_dir = './output'
	if args.output:
		output_dir = args.output

	util.writeDir(output_dir)
		
	
	#encoding
	print('---encoding')
	e = encoding.encode(f, args.token)
	userID = e.get_userID()
	itemID = e.get_itemID()
	adjlist = e.get_adjlist()
	user_train, item_train, value_train = e.output4FM()
	
	if args.format == 'FM': 
		if args.sampling == True:
			#zero sampling
			print('---Zero samping')
			zero_user, zero_item, zero_value = sampling.get_zero( sampling.zeroSampling(adjlist) )
			user_train.extend(zero_user)
			item_train.extend(zero_item)
			value_train.extend(zero_value)


		#Testing
		print('---Create Testing Data')
		test_user, test_item, test_value = build.build(len(userID), len(itemID))

	#save
	print('---Save')
	util.saveFile('{0}/userID'.format(output_dir), userID)
	util.saveFile('{0}/itemID'.format(output_dir), itemID)

	if args.format == 'deepwalk-bipartite':
		#deepwalk
		util.saveFile('{0}/adjlist'.format(output_dir), adjlist)
	elif args.format == 'FM':
		#FM
		util.saveFile('{0}/rel-user'.format(output_dir), ['0 {0}:1'.format(i) for i in range(len(userID))])
		util.saveFile('{0}/rel-item'.format(output_dir), ['0 {0}:1'.format(i) for i in range(len(itemID))])
		util.saveFile('{0}/rel-user.train'.format(output_dir), user_train)
		util.saveFile('{0}/rel-item.train'.format(output_dir), item_train)
		util.saveFile('{0}/ans.train'.format(output_dir), value_train)
		util.saveFile('{0}/rel-user.test'.format(output_dir), test_user)
		util.saveFile('{0}/rel-item.test'.format(output_dir), test_item)
		util.saveFile('{0}/ans.test'.format(output_dir), test_value)