def get_country_news(lat1, lon1, lat2, lon2): # Make sure lat1, lon1 is less than lat2, lon2 if lat1 > lat2: temp = lat1 lat1 = lat2 lat2 = temp if lon1 > lon2: temp = lon1 lon1 = lon2 lon2 = temp # Find all countries within the boundaries db = mysql.Mysql() db.query("SELECT * FROM Country WHERE lat != '-1' AND lon != '-1'") locations = [] while True: country = db.fetch() if country == {}: break if not (country['lat'] > lat1 and country['lat'] < lat2 and country['lon'] > lon1 and country['lon'] < lon2): continue country_name = country['countryName'] country_lat = get_real_latlon(country['lat']) country_lon = get_real_latlon(country['lon']) locations.append([country_name, country_lat, country_lon]) return locations
def getNewsList(): db = mysql.Mysql() try: index = request.args.get('index') pageIndex = int(pageCount) * int(index) sql = 'select title,time,url from FreeNews order by time desc limit %s,%s' % (pageIndex,pageCount) db.queryData(sql) result_mysql = db.cur.fetchall() result = [] for item in result_mysql: result.append({"title":item[0],"time":item[1],"url":item[2]}) db.conn.close() if len(result) == 0: return getBaseReturnValue(data=result,msg="没有更多数据!",code=False) else: return getBaseReturnValue(data=result,msg='OK',code=True) except KeyError,e: return getBaseReturnValue(data=[],msg="Error",code=False)
def initialize(self): db = self.application.db if not db: self.application.db = mysql.Mysql() db = self.application.db assert (db) db.open_session()
def main(self, pageIndex): f_html = open('houseUrl.txt', 'w') #进程池,4个进程并发 pool = Pool(processes=4) #for page in pageIndex: # print pool.apply_async(self.getAllUrl, (pageIndex,)) poolurl = pool.map(self.getAllUrl, pageIndex) urls = [] #将列表中的子列表合并 map(urls.extend, poolurl) #join()将列表转为字符串 f_html.write('\n'.join(urls)) #关闭进程池,进程池不会再创建新的进程 pool.close() #等待进程池中的全部进程执行完毕,防止主进程再worker进程结束前结束 pool.join() f_html.close() #连接数据库 self.mysql = mysql.Mysql() #将获取的url存入数据库 for i in range(0, len(urls)): url_dict = { "url": urls[i], } self.mysql.insertData('houseurl', url_dict)
def __init__(self): # self.PageNum = 0 # self.payload = {'start' : self.PageNum , 'type' : 'T'} self.TitleList = ['小说'] # self.title = None self.BaseUrl = 'https://book.douban.com/tag/' self.mysql = mysql.Mysql()
def output(self, data): time_now = datetime.datetime.now() data['datetime'] = '\"' + time_now.strftime('%Y-%m-%d %H:%M:%S') + '\"' # to csv df = pd.DataFrame(data.items(), columns=['item', 'content']) bak_file = './result/output_%s.csv' % time_now.strftime( '%Y-%m-%d-%H-%M-%S') df.to_csv(bak_file, index=False, header=False, sep='\t', mode='wb', encoding='utf-8') # for bak_up df.to_csv('./result/output.csv', index=False, header=False, sep='\t', mode='wb', encoding='utf-8') logger.info('Bak_file: %s' % bak_file) # to mysql sql = mysql.Mysql() sql.insert_data(table_name, data) logger.info('Output to mysql is done.')
def demo01(request): db = mysql.Mysql() sql = "show databases sdf;" params = {} params['sql'] = sql res = db.exec_query(params) jsonStr = json.dumps(res, cls=MyEncoder) return HttpResponse(jsonStr)
class ProxyMiddleware(object): mysql = mysql.Mysql() def process_request(self, request, spider): AllPro = self.mysql.selectData() length = len(AllPro) print length print '22222222222222222222' n = random.randint(0, length - 1) pro = AllPro[n][1] print pro request.meta['proxy'] = pro
def demo02(request): # 初始化db --- 目前还不能指定不同的mysql,需要改进 db = mysql.Mysql() # 写你的sql语句 #sql = "select * from account_bkuser" # 定义参数 params = {} params['sql'] = '' # 使用db的get_all(),获取所有的结果 res = db.query_all(params) # 将获取的结果转换成json格式,通过http返回 jsonStr = json.dumps(res, cls=MyEncoder) return HttpResponse(jsonStr)
def load_subcategories_news(country_id, subcategory_id): db = mysql.Mysql() news_url = news_url_prefix + str(subcategory_id) + news_url_suffix json_response = urllib2.urlopen(news_url) news = json.loads(json_response.read()) news = news['articles'] for article in news: title = db.escape_string(article['title']) publish_date = db.escape_string(article['publish_date']) url = db.escape_string(article['url']) query = "INSERT INTO Article (title, minZoom, locId, publishDate, url) \ VALUES('" + title + "', '2', '" + str( country_id) + "', '" + publish_date + "', '" + url + "')" db.query(query)
def get_country_article_list(country_name): db = mysql.Mysql() country_name = db.escape_string(country_name) db.query("SELECT id FROM Country WHERE countryName='"+country_name+"'") country = db.fetch() country_id = country['id'] db.query("SELECT * FROM Article WHERE minZoom='2' AND locId='"+str(country_id)+"'") article_list = [] while True: article = db.fetch() if article == {}: break article_list.append(article) return article_list
def run(self): gpioFd = [] # 创建数据库对象 gpioDB = mysql.Mysql(cfg.getLocalIp(), int(cfg.getLocalPort()), cfg.getLocalUser(), cfg.getLocalPassword(), cfg.getLocalDatabase(), cfg.getLocalTable()) # 连接数据库 gpioDB.connectDatabase() epoll = select.epoll() # 初始化,更新一次本地所有的gpio的状态 for gpioIndex in range(8): # 更新gpio口状态 # 申请gpio gpio.gpioExport(gpioTuple[gpioIndex]) # 设置为输入 gpio.setInput(gpioTuple[gpioIndex]) # 设置gpio的中断触发方式为双边沿触发 gpio.setEdge(gpioTuple[gpioIndex], 'both') # 插入gpio的状态到sqlserver gpioDB.insertInto(gpioIndex, '%d' % (gpioIndex + 1), 'gpio%d' % (gpioIndex + 1), gpio.getInputValue(gpioTuple[gpioIndex]), gpioIndex) # 获取value的文件,后面的操作,文件都是一直打开的,避免重复打开关闭文件带来的时间上的浪费 f = gpio.gpioInputFile(gpioTuple[gpioIndex]) gpioFd.append(f) #print ('fileno: %d' % f.fileno()) # 注册到epoll中 epoll.register(f, select.EPOLLERR | select.EPOLLPRI) while True: events = epoll.poll() for fileno, event in events: # 有数据要读取 if event & select.EPOLLPRI: # 循环检测哪个gpio的数据变化了,要读取 for i in range(8): if fileno == gpioFd[i].fileno(): value = gpioFd[i].read().strip('\n') # print ('f: %s' % value) # 文件指针移动到文件开头 gpioFd[i].seek(0, 0) # 保存到mysql中 gpioDB.insertInto(1, '%d' % (gpioIndex + 1), 'gpio%d' % (i + 1), value, gpioIndex) break if event & select.EPOLLERR: pass
def ready(self): self.mysql = mysql.Mysql() AllNews = self.mysql.selectData('new') content = '' for n in AllNews: title = n[1].encode('utf-8') url = n[2].encode('utf-8') each_line = '<a href="%s">%s</a><br><br>' % (url, title) content = content + each_line sql = "select value from config where field='laoju'" laoju = self.mysql.NoemalSelect(sql) str_laoju = laoju[0][0].split(",") content = content + "<h5>%s<br><br>%s<br><br>%s</h5>" %(str_laoju[0].encode('utf-8'), str_laoju[1].encode('utf-8'), str_laoju[2].encode('utf-8')) return content
def run(self): gpioDB = mysql.Mysql(cfg.getLocalIp(), int(cfg.getLocalPort()), cfg.getLocalUser(), cfg.getLocalPassword(), cfg.getLocalDatabase(), cfg.getLocalTable()) gpioDB.connectDatabase() #sqlserver = cdll.LoadLibrary(os.getcwd() + '/libsqlserver.so') sqlserver = cdll.LoadLibrary('/usr/share/gpio/libsqlserver.so') while True: if self.first_connect or self.upload_result == False: #print ('%s:%s' % (cfg.getRemoteIp(), cfg.getRemotePort())) if sqlserver.openSqlserver( cfg.getRemoteUser().encode(), cfg.getRemotePassword().encode(), cfg.getRemoteDatabase().encode(), ('%s:%s' % (cfg.getRemoteIp(), cfg.getRemotePort())).encode(), cfg.getRemoteTable().encode()) == 0: time.sleep(1) continue self.first_connect = False self.upload_result = True # 获取目前的时间 timeLocal = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) # 获取本地数据库中,更新时间小于当前时间的数据 results = gpioDB.selectByUpdateTime(timeLocal) # 得到的tuple顺序是反的,所以运行调整顺序 # print('upload time ' + configureInotify.get_section_value('remote', 'interval_uploadData')) for res in results[::-1]: print(res) print(res[0]) # 转换成时间戳 t = res[5].timestamp() # 更新 mutexLock.acquire() if sqlserver.insertInto(res[1], res[2].encode(), res[3].encode(), res[4].encode(), int(t), res[6]) == 0: mutexLock.release() # 更新数据失败,从新链接sqlserver self.upload_result = False continue mutexLock.release() # 更新完成的数据在本地删除 gpioDB.deleteByRealDateLogId(res[0]) time.sleep(int(cfg.getUploadInterval()))
def getSearchList(): db = mysql.Mysql() try: keyword = request.args.get('keyword') sql = "select title,url from FreeVideo where title like '%"+ keyword +"%' limit 0,20" db.queryData(sql) result_mysql = db.cur.fetchall() result = [] for item in result_mysql: result.append({"title":item[0],"url":item[1]}) db.conn.close() return getBaseReturnValue(data=result,msg='OK',code=True) except KeyError,e: return getBaseReturnValue(data=[],msg="Error",code=False)
def get_country_categories(url): db = mysql.Mysql() country_categories = {} country_categories = {} json_response = urllib2.urlopen(url).read() subcategories = json.loads(json_response) for subcategory in subcategories: subcategory_name = subcategory['english_subcategory_name'] db.query("SELECT id, countryName FROM Country") for country in db.iterate_rows(): if country['countryName'] in subcategory_name: country_categories[ country['id']] = subcategory['subcategory_id'] break db.query("SELECT id, countryName FROM Country") uncategorized_countries = {} for country in db.iterate_rows(): if country['id'] not in country_categories: uncategorized_countries[country['id']] = country['countryName'] return country_categories, uncategorized_countries
def load_uncategorized_countries_news(country_id, country_name): db = mysql.Mysql() country_name = country_name.replace('.', '') url = uncategorized_news_url + urllib.quote_plus(country_name) try: json_response = urllib2.urlopen(url).read() except Exception as e: print e print 'Counry Name: ' + country_name print 'URL: ' + url articles = json.loads(json_response) articles = articles['articles'] for article in articles: title = db.escape_string(article['title']) publish_date = db.escape_string(article['publish_date']) url = db.escape_string(article['url']) query = "INSERT INTO Article (title, minZoom, locId, publishDate, url) \ VALUES('" + title + "', '2', '" + str( country_id) + "', '" + publish_date + "', '" + url + "')" db.query(query)
class DoubanSpider(scrapy.Spider): mysql = mysql.Mysql() name = "douban" headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, sdch, br", "Accept-Language": "zh-CN,zh;q=0.8", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Content-Type":" application/x-www-form-urlencoded; charset=UTF-8", 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36", 'Host': "www.douban.com", "Upgrade-Insecure-Requests": '1', 'Referer': "https://www.douban.com", } def start_requests(self): # AllPro = self.mysql.selectData() # print AllPro[0][1] # print '111111111111111111111111111111111111111111' # print AllPro[0][1] urls = [ 'https://www.douban.com/', ] url = 'https://www.douban.com/' url2 = 'https://www.baidu.com/' # for url in urls: yield scrapy.Request(url=url, headers=self.headers, callback=self.parse, dont_filter = True) yield scrapy.Request(url=url2, headers=self.headers, callback=self.parse, dont_filter = True) # # 手动设置成功 # for url in urls: # yield scrapy.Request(url=url, meta={'proxy': AllPro[0][1]}, headers=self.headers, callback=self.parse) # 尝试用下载中间件 def parse(self, response): print response.body
def query(zoom, query): db = mysql.Mysql() if zoom not in zooms: abort(404) zoom = db.escape_string(zoom) query = db.escape_string(query) depth = zoom_to_depth[zoom] # Get the location_ids db.query("SELECT id FROM "+depth+" WHERE "+depth.lower()+"Name = '"+query+"'") location_id = db.fetch() location_id = str(location_id['id']) # Get the articles db.query("SELECT id, title, minZoom, publishDate, url FROM Article WHERE \ minZoom='"+zoom+"' AND locId='"+location_id+"'") articles = [] while True: article = db.fetch() if article == {}: break articles.append(article) return json.dumps(articles)
def getMediaList(): db = mysql.Mysql() try: index = request.args.get('index') leve1 = request.args.get('leve1') leve2 = request.args.get('leve2') result = [] pageIndex = int(pageCount) * int(index) if leve2 == "": sql = 'select title,url from FreeVideo where leve1="%s" limit %s,%s' % (leve1,pageIndex,pageCount) else: sql = 'select title,url from FreeVideo where leve2="%s" limit %s,%s' % (leve2,pageIndex,pageCount) db.queryData(sql=sql) result_mysql = db.cur.fetchall() for item in result_mysql: ji = {"title": item[0],"url":item[1]} result.append(ji) db.conn.close() if len(result) == 0: return getBaseReturnValue(data=result,msg="没有更多数据!",code=False) else: return getBaseReturnValue(data=result,msg='OK',code=True) except KeyError,e: print(e) return getBaseReturnValue(data=[],msg="Error",code=False)
elif quarter == 2: if soup_table[i].a: insert_item = insert_item + "'" + soup_table[i].a.string[7:17] + "'" else: insert_item = insert_item + "'" + soup_table[i].string[8:18] + "'" else: insert_item = insert_item + "'" + soup_table[i].string[8:18] + "'" else: insert_item = insert_item + "'" + soup_table[i].string[8:18] + "'" for j in range(1,8): insert_item = insert_item + ", '"+ soup_table[i+j].string + "'" sql.insert(table_name, insert_columns, insert_item) sql.conn.commit() sql.close_conn() if __name__ == '__main__': #id_securities = get_id_security(db_host, db_user, db_passwd, db_name) id_securities = ['300388'] sql = mysql.Mysql(db_host, db_user, db_passwd, db_name, charset_type) unit_action(id_securities, year_today, quarter_today, sql) #unit_test(600053, year_today, quarter_today, sql) ''' 002752 300208 300219 3 Traceback (most recent call last): File "dailyprice_create_sql.py", line 160, in <module> unit_action(id_securities[2372:], year_today, quarter_today, sql) File "dailyprice_create_sql.py", line 128, in unit_action year_start = soup_table[0].find_all('option')[-5].string # 获得年份 IndexError: list index out of range '''
def query(): db = mysql.Mysql() return jsonify(db.queryData())
""" Updates the list of video files on the local computer """ import mysql import os from comm import Comm databaseConnect = mysql.Mysql() databaseConnectDelete = mysql.Mysql() comminstance = Comm() #Get the list of local directories query = "SELECT directory FROM directories WHERE server='localfiles'" databaseConnect.query(query) localDirectory = databaseConnect.fetch() directoryList = [] while localDirectory != {}: directoryList.append(localDirectory['directory']) localDirectory = databaseConnect.fetch() #Get the list of local files and make sure they are in the database for directory in directoryList: fileNameList = [ f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f)) ] fileNameList.sort() for fileName in fileNameList: query = "SELECT * FROM localfiles WHERE filedirectory='" + databaseConnect.escape_string( directory) + "' AND filename='" + databaseConnect.escape_string(
class PoolSpider(scrapy.Spider): identity = 0 tool = Tool() mysql = mysql.Mysql() name = "pool" headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, sdch, br", "Accept-Language": "zh-CN,zh;q=0.8", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Content-Type": " application/x-www-form-urlencoded; charset=UTF-8", 'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0", 'Host': "www.xicidaili.com", "Upgrade-Insecure-Requests": '1', 'Referer': "http://www.xicidaili.com/nn/1", } def start_requests(self): urls = 'http://www.xicidaili.com/nn/' # urls = 'http://www.kuaidaili.com/proxylist/1/' self.mysql.clearnIp() for i in range(1, 11): print i yield scrapy.Request(url=urls + str(i), headers=self.headers, callback=self.parse) # return self.test() # # urls = 'http://www.xicidaili.com/nn/' # # choice = raw_input('(T)est (C)heck (R)un\n') # # if choice == 'T': # # # fp = open('usefulIp.txt' , 'a') # # # fp.write('') # # # fp.close() # # self.mysql.clearnUsefulIp() # # return self.test() # # elif choice == 'R': # # return self.choice() # # elif choice == 'C': # # return self.check() # # else: # # print 'Wrong input' # for i in range(1,11): # print i # yield scrapy.Request(url=urls + str(i), headers=self.headers, callback=self.parse) # def choice(self): # urls = 'http://www.xicidaili.com/nn/' # # urls = 'http://www.kuaidaili.com/proxylist/1/' # self.mysql.clearnIp() # for i in range(1,11): # print i # yield scrapy.Request(url=urls + str(i), headers=self.headers, callback=self.parse) def parse(self, response): # good ip = response.xpath( '//td[(((count(preceding-sibling::*) + 1) = 3) and parent::*)] | //td[(((count(preceding-sibling::*) + 1) = 2) and parent::*)]' ).extract() ips = [] for i in ip: ips.append(self.tool.replace(i)) # identity = 0 if ip: while ips != []: port = ips.pop() Anip = ips.pop() if self.mysql.insertData(self.identity, Anip, port): pass # print u"保存ip成功" else: print u"保存ip失败" self.identity += 1 # filename = 'ipPool.txt' # with open(filename, 'w') as f: # count = 1 # for eachIp in ips: # f.write(eachIp), # # f.write('\t') # if count%2 == 0: # f.write('\n') # else: # f.write('\t') # count += 1 # # 以上为把爬到的ip写入文件中 def test(self): self.mysql.clearnUsefulIp() headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, sdch, br", "Accept-Language": "zh-CN,zh;q=0.8", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Content-Type": " application/x-www-form-urlencoded; charset=UTF-8", 'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0", 'Host': "www.baidu.com", "Upgrade-Insecure-Requests": '1', # 'Referer': "http://www.xicidaili.com/nn/1", } url = 'https://www.baidu.com/' # fp = open('ipPool.txt' , 'r') # AnIp = fp.readlines() # fp.close() AllIp = self.mysql.selectData() proxys = [] for p in AllIp: # TheIp = p.strip('\n').split('\t') # # print TheIp[0],TheIp[1] pro = 'http://' + str(p[1]) + ':' + str(p[2]) try: print pro yield scrapy.Request(url=url, headers=headers, meta={ 'proxy': pro, 'download_timeout': 10 }, callback=self.test_parse, dont_filter=True) except: print 'next' # sys.exit(1) # 以下为尝试并把能用的ip代理写入一个新的文件 # def test_parse(self, response): print '6666666666666666' # fp = open('usefulIp.txt' , 'a') # fp.write(response.meta['proxy']) # fp.write('\n') # print response.meta['proxy'] proxy = response.meta['proxy'] if self.mysql.usefulIp(self.identity, proxy): pass # print u"保存ip成功" else: print u"保存ip失败" self.identity += 1 # # @classmethod # def from_crawler(cls, crawler, *args, **kwargs): # spider = super(PoolSpider, cls).from_crawler(crawler, *args, **kwargs) # crawler.signals.connect(spider.spider_idle, signal=signals.spider_idle) # return spider # # def spider_idle(self, spider): # self.test() # print 'here' # self.mysql.clearnUsefulIp() def check(self): url = 'https://www.baidu.com/' headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, sdch, br", "Accept-Language": "zh-CN,zh;q=0.8", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Content-Type": " application/x-www-form-urlencoded; charset=UTF-8", 'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0", 'Host': "www.baidu.com", "Upgrade-Insecure-Requests": '1', # 'Referer': "http://www.xicidaili.com/nn/1", } UsefulIp = self.mysql.selectip() for eachIp in UsefulIp: pro = eachIp[1] print pro try: yield scrapy.Request(url=url, headers=headers, meta={'proxy': pro}, callback=self.check_parse, dont_filter=True) except: pass identity = eachIp[0] self.mysql.delete(identity) def check_parse(self, response): print response.meta['proxy'] print 'OK'
def get_article(article_id): db = mysql.Mysql() article_id = db.escape_string(article_id) db.query("SELECT * FROM Article WHERE id='"+article_id+"';") article = db.fetch() return article
def main(): print 'Backup starting on %s' % str(datetime.datetime.now()) # Start the counter start = int(time.time()) # Create temporary directory temp_dir = mkdtemp() # MySQL if config.mysql['enabled']: print '\n[MySQL]' i = 0 for settings in config.mysql['servers']: instance = mysql.Mysql(settings) instance_dir = os.path.join(temp_dir, 'db/mysql/instance', str(i)) dump_filepath = os.path.join(instance_dir, 'all_databases.sql') os.makedirs(instance_dir) print '\tInstance %s' % str(i) for db in instance.get_db_list(): print '\t\t%s' % db instance.save(dump_filepath) print '\n\tDump : %s (%s MB)' % (os.path.basename(dump_filepath), str(os.path.getsize(dump_filepath) / 1000000)) i += 1 # ... the rest print '\n[Archiving]' # Create output_dir if it does not exist if not os.path.isdir(config.output_dir): try: os.makedirs(config.output_dir) except: print 'The backup directory %s does not exist and could not be created. Exiting.' % config.output_dir sys.exit(1) # Create the tar archive print '\tCreating directory' tar_file = os.path.join(config.output_dir, config.archive_prefix + str(datetime.date.today()) + ".tar.gz") tar = tarfile.open(tar_file, 'w:gz') # Add databases into the archive if config.mysql['enabled']: print '\tAdding database dumps' tar.add(temp_dir) # Add all the directories from config into the archive if len(config.dirs_to_backup) > 0: print '\tAdding directories from configuration' for directory in config.dirs_to_backup: print '\t\t%s' % directory tar.add(directory) tar.close() shutil.rmtree(temp_dir) print '\n\tArchive : %s, size %s MB.' % (os.path.basename(tar_file), str(os.path.getsize(tar_file) / 1000000)) print '\n[Upload]' # FTP if config.ftp['enabled']: for instance in config.ftp['servers']: try: print '\tUploading to %s' % instance['host'] upload_time = ftp.upload(instance, tar_file) minutes = upload_time / 60 seconds = upload_time - minutes * 60 print '\t\tUpload completed in %s min %s s.' % (str(minutes), str(seconds)) except Exception as e: print 'Error during upload : %s\n' % e.message # Swift if config.swift['enabled']: for server in config.swift['servers']: try: container = swift.SwiftContainer( server['authurl'], server['auth_version'], server['user'], server['key'], server['tenant_name'], server['container_name'], server['autocreate']) print '\tUploading to Swift server %s' % server['name'] swift_upload_start = int(time.time()) container.upload(tar_file) swift_upload_end = int(time.time()) minutes = (swift_upload_end - swift_upload_start) / 60 seconds = (swift_upload_end - swift_upload_start) - minutes * 60 print '\t\tUpload completed in %s min %s s.' % (str(minutes), str(seconds)) except Exception as e: print 'Error during upload : %s\n' % e.message print '\n[Delete old backups]' archives_deleted = False for archive in os.listdir(config.output_dir): archive_info = os.stat(os.path.join(config.output_dir, archive)) # If archive is older than keeptime (in days)... if time.mktime(time.gmtime()) - archive_info.st_mtime > (config.keeptime * 24 * 60 * 60): print '\tDeleting %s' % archive # Remove local copy print '\t\tLocal copy' try: os.remove(config.output_dir + '/' + archive) except Exception as e: print 'Error during removal : %s\n' % e.message # Remove FTP copy if config.ftp['enabled']: for instance in config.ftp['servers']: try: print '\t\tFrom FTP server %s' % instance['host'] ftp.delete_remote(instance, archive) except Exception as e: print 'Error during deletion : %s\n' % e.message if config.swift['enabled']: for server in config.swift['servers']: try: print '\t\tFrom Swift server %s' % server['name'] container = swift.SwiftContainer( server['authurl'], server['auth_version'], server['user'], server['key'], server['tenant_name'], server['container_name']) container.delete(archive) except Exception as e: print 'Error during deletion : %s\n' % e.message archives_deleted = True if not archives_deleted: print '\tNo archives were deleted.' # Stop the counter end = int(time.time()) print '\nBackup ending on %s' % str(datetime.datetime.now()) minutes = (end - start) / 60 seconds = (end - start) - minutes * 60 print 'Time elapsed : %s min %s s.' % (str(minutes), str(seconds))
* ISO 3166 Country Code * Country Name * Latitude * Longitude e.g.: "AF","Afghanistan",33,65 @author: Albert Wang @date: 10/24/2010 """ import csv import mysql mysql_connection = mysql.Mysql() csv_file_location = "./average-latitude-longitude-countries.csv" csv_file_handle = open(csv_file_location, 'r') csv_file_reader = csv.reader(csv_file_handle, delimiter=',') # Read the city data line by line for line in csv_file_reader: country_code = mysql_connection.escape_string(line[0]) name = mysql_connection.escape_string(line[1]) latitude = mysql_connection.escape_string(line[2]) longitude = mysql_connection.escape_string(line[3]) # Insert data into database query = "INSERT INTO countries (code, name, latitude, longitude)\ VALUES('" + country_code + "', '" + name + "', '" + latitude + "', '" + longitude + "')"
#!/usr/bin/python # -*- coding: utf-8 -*- # import requests, sys, re import MySQLdb import mysql from bs4 import BeautifulSoup reload(sys) sys.setdefaultencoding('utf-8') mysql = mysql.Mysql() print '正在从活动家抓取数据......' headers = { 'content-type': 'application/json', 'Host': 'www.huodongjia.com', 'Referer': 'https://www.huodongjia.com/it/', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36' } for page in range(150): # url='https://www.huodongjia.com/beijing/it/page-9' url = 'https://www.huodongjia.com/beijing/it/page-' + str(page + 1) + '/' print '---------------------------正在爬取第' + str( page + 1) + '页......--------------------------------'
""" This script loads cities into the database """ import csv import pycountry import mysql db = mysql.Mysql() # Empty all the relevant tables so we can start from scratch db.query("TRUNCATE TABLE City") db.query("TRUNCATE TABLE CityAlias") # Load cities # Country,City,AccentCity,Region,Population,Latitude,Longitude csv = csv.reader(open('worldcitiespop.txt', 'rb')) for row in csv: print row[1], " in ", row[0] if row[4] <= 1000000 or row[4] == '': continue try: states = pycountry.subdivisions.get(country_code=row[0].upper()) except: continue for state in states: if state.code[3:] == row[3]: break db.query("SELECT id FROM State WHERE stateName='" + db.escape_string(state.name) + "'") state_id = str(db.fetch()['id'])
def __init__(self): self.page_num = 1 self.total_num = None self.page_spider = page.Page() self.mysql = mysql.Mysql()