def get_ips(self, numbers=5, flag=0): #flag: 1:校验 0:不校验 ips = bad_ip = [] with mysql(db='scipy') as sql: all_ip = sql.read( "select proxies from ip order by rand() limit %s", numbers) lenth = len(all_ip) if flag == 1: print("正在校验代理........") while (1): bad_ip = self.testing_sql_ip(all_ip) sql.write(" delete from ip where proxies=%s", bad_ip) bad = len(bad_ip) if bad == 0: break else: with mysql(db='scipy') as Sql: all_ip = Sql.read( "select proxies from ip order by rand() limit %s", bad) lenth = len(all_ip) if lenth < numbers: self.ip_sql(numbers) if lenth == 0 or lenth < numbers: self.ip_sql(numbers) with mysql(db='scipy') as Sql: all_ip = Sql.read( "select proxies from ip order by rand() limit %s", numbers) for _ in range(numbers): ips.append(all_ip[_][0]) return ips # pool=IPPOOL() # # pool.get_ips()
def deal_info(self): HouseInfo = [] position = [] price = [] house = [] follow = [] title, positionInfo, priceInfo, houseInfo, followInfo = self.deal_html() info_lenth = len(title) if info_lenth < 5: return [] for _ in range(info_lenth): # 分割,提取,合并 position.append([positionInfo[_ * 2], positionInfo[_ * 2 + 1]]) price.append([priceInfo[_ * 3], priceInfo[_ * 3 + 2]]) house.append(str(houseInfo[_]).split('|')) follow.append(str(followInfo[_]).split('/')) # number = filter(str.isdigit, my_str) for _ in range(info_lenth): # str--》number price[_][1] = ''.join(filter(str.isdigit, price[_][1])) house[_][1] = house[_][1].split("平米")[0].split(' ')[ 1] # ''.join(filter(str.isdigit or str=='.', house[_][1])) follow[_][0] = ''.join(filter(str.isdigit, follow[_][0])) follow[_][1] = self.get_time(follow[_][1]) with mysql() as sql: readinfo = sql.read("select Title from second_hand where Title=%s", title[_]) if len(readinfo) == 0: # 没有相关内容,加到数据库里 try: HouseInfo.append([title[_], position[_][0], position[_][1], float(price[_][0]), float(price[_][1]), house[_][0], float(house[_][1]), house[_][2], house[_][3], house[_][4], house[_][5], house[_][6], int(follow[_][0]), follow[_][1]]) except: pass # print(len(position[_]),len(price[_]),len(house[_]),len(follow[_])) return HouseInfo
def run(self): data = self.deal_info() # print(data) with mysql() as sql: sql.write("insert IGNORE into rent_house(" "Title, Rentway, District, Probably_Location, Location, Area, Orientation, Apartment, Floor, Price, Release_time" ") values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", data)
def run(self): data = self.deal_info() print(data) with mysql() as sql: sql.write("insert IGNORE into new_house(" "Title,District,Probably_Location,Location,Price" ") values(%s,%s,%s,%s,%s)", data)
def main(ips,proxy): E = recorder() M = mysql() R = rabbit() I = iop() S = swift() if E.test("mysql-install"): M.install_mysql(ips) E.record("mysql-install") if E.test("mysql-config"): M.operator(ips) E.record("mysql-config") if E.test("rabbitmq"): R.operator(ips) E.record("rabbitmq") if E.test("iop-web"): I.config(ips,proxy) E.record("iop-web") if E.test("swift-install"): S.install(ips) E.record("swift-install") if E.test("swift-config"): S.config(ips,proxy) E.record("swift-config") return
def run(self): # sqlalchemy 对大数据量的插入没pymysql用起舒服 # mutex.acquire() # 取得锁 data = self.deal_info() # 不能方with里 # print(data) # mutex.release() # 释放锁 with mysql() as sql: sql.write("insert ignore into second_hand(" "Title,Cellname,Location,Totalprice,UnitPrice,Apartment,Area,Orientation,Renovation,Floor,Construction_time,Characteristics_apartment,Follow,Release_time" ") values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", data)
def Renovation(): with mysql() as sql: info = sql.read("select Renovation,count(*) from second_hand " "group by Renovation ORDER BY count(*) DESC limit 30") Sum = sum([_[1] for _ in info]) ave = [100 * _[1] / Sum for _ in info] Info = [_[0] for _ in info] c = (Pie().add("", [list(z) for z in zip(Info, ave)]).set_colors( ["blue", "green", "red", "pink", "orange", "purple"]).set_global_opts( title_opts=opts.TitleOpts(title="成都二手房装修情况")).set_series_opts( label_opts=opts.LabelOpts(formatter="{b}: {c}"))) Page.add(c).render(path='./html/renovation_second.html')
def into_sql( host='localhost', password='******', db='scipy', port=3306, ): L = [] for _ in range(len(headers)): L.append([headers[_][0], remakes[_][0]]) with mysql( host='localhost', password='******', db='scipy', port=3306, ) as sql: sql.write("insert into headers(header,Remarks) values (%s,%s)", L)
def get_headers( number=5, host='localhost', password='******', db='scipy', port=3306, ): l = [] with mysql( host='localhost', password='******', db='scipy', port=3306, ) as sql: L = sql.read("select * from headers order by rand() limit %s", number) for _ in range(number): l.append(L[_][0]) return l
def deal_info(self): info = [] name, location, price = self.deal_html() try: lenth = int(len(location) / 11) Location = [[location[_ * 11 + 1], location[_ * 11 + 5], location[_ * 11 + 9]] for _ in range(lenth)] for _ in range(lenth): try: price[_] = float(price[_]) except: price[_] = 0.0 with mysql() as sql: readinfo = sql.read("select Title from new_house where Title=%s", name[_]) if len(readinfo) == 0: # 没有相关内容,加到数据库里 info.append([name[_], Location[_][0], Location[_][1], Location[_][2], price[_]]) # print(name[_], Location[_][0], Location[_][1], Location[_][2], price[_]) except Exception as e: print(e) return info
def ip_sql(self, number=30): with mysql(db='scipy') as sql: all_ip = sql.read("select proxies from ip") if len(all_ip) != 0: bad_ip = self.testing_sql_ip(all_ip) sql.write(" delete from ip where proxies=%s", bad_ip) print("当前数据库有{}有效代理,{}个无效代理".format(len(all_ip), len(bad_ip))) while (1): all_ip = sql.read("select proxies from ip") ip_lenth = len(all_ip) if ip_lenth < number: self.page += 1 ip_list = self.testing_ip() sql.write("insert into ip(proxies) values(%s)", ip_list) # else: # bad_ip=self.testing_sql_ip(all_ip) # sql.write(" delete from ip where proxies=%s",bad_ip) # print("当前数据库有{}个有效代理.".format(len(all_ip)+ip_lenth)) else: print("IP池以装满,当前ip池代理数量", ip_lenth) break
# -*-coding:utf8-*- # create by : JiangYangJie # email : [email protected] # filename : newhouse_map # data : 2019/11/25 import os from Mysql import mysql from pyecharts import options as opts from pyecharts.charts import Map, Page from map import area,city,county if not os.path.exists('./html'): os.makedirs('./html') with mysql() as sql: info=sql.read("select District,round(AVG(Price),2) from new_house " "group by District ORDER BY count(*) DESC limit 28" ) Info=[] for _ in info: location = _[0] a="" if location in area: a=location+"区" elif location in city: a = location+ "市" elif location in county: a = location+ "县" if a!="": Info.append([a,_[1]]) c = (