def _get_new_data(self, soup): commoditys = [] #爬取的前30件商品 commo_nodes = soup.findAll('li', class_=re.compile(r'.*gl-item.*')) for commo_node in commo_nodes: commID = commo_node['data-sku'] commName = commo_node.find('div',class_ = re.compile(r'.*p-name p-name-type-2.*'))\ .find('em').get_text() price = commo_node.find('div', class_=re.compile(r'.*p-price.*'))\ .find('i').get_text() evaluate = commo_node.find('div', class_=re.compile(r'.*p-commit.*'))\ .find('strong').find('a').get_text() #推荐指数有可能不存在 try: purchasingIndex = commo_node.find('div', class_=re.compile(r'.*p-commit.*'))\ .find('span').find('em').get_text() except: purchasingIndex = "-" # 店铺有可能不存在 try: shopName = commo_node.find('div', class_=re.compile(r'.*p-shop.*'))\ .find('a').get_text() except: shopName = "-" #是否为京东自营 if self.isJdOwn(commo_node): print("\t商品id:" + commID + "\t商品名:" + commName + "\t价格:" + price) commodity = Commodity(commID, commName, price, evaluate, purchasingIndex, shopName) commoditys.append(commodity) # 爬取的内容 return commoditys
def _read_cfg_info(cfg_file=_DEFAULT_CFG_FILE): ''' 读取配置信息 ''' try: f = open(cfg_file) except IOError: print 'info: get cfg file failed!' print 'cash_register.set_cfg_file() must be called later!' return global _cfg_info _cfg_info = json.load(f, encoding='GBK') global on_sale_info on_sale_info = _cfg_info["on_sale_info"] global commodities for each in _cfg_info["commodities"]: commodity = Commodity( each['barcode'].encode('GBK'), each['name'].encode('GBK'), each['unit'].encode('GBK'), float(each['unit_price']), each['classification'].encode('GBK') ) commodities[each['barcode']] = commodity
def read_commodities(file): commodities = [] reader = csv.reader(file, dialect=csv.excel, delimiter=';') next(reader) # skip headline for line in reader: name = line[2] try: sell = int(line[3]) except ValueError: sell = None try: buy = int(line[4]) except ValueError: buy = None try: demand = int(line[5]) except ValueError: demand = None try: supply = int(line[7]) except ValueError: supply = None commodities.append(Commodity(name, sell, buy, demand, supply)) return commodities
def addCom(self): '''前台: 添加购物车 ''' com_num=input("请输入商品编号:").strip() com_info=Basic.queryOneCommodity(com_num) if not com_info: print("商品不存在.") return com=Commodity(com_info) com_cnt=int(input("请输入商品数量:").strip()) if com_cnt <=0: print("购买数量必须大于0.") return if com_cnt>com.getQuantiy() : print("商品库存不足.") else : self.car.addCommodity(com,com_cnt) print("添加进入购物车成功.")
def queryAll(self): '''前台:查询所有商品信息''' info=Basic.queryAllCommodity() table = comm = Commodity.getTableaHead() for i in info: table.add_row(i) print(table) print("以上共 {} 条记录.".format(len(info)))
def queryOneCahier(): cash_no = input("请输入需要查询的售货员的编号:").strip() cash = Basic.queryOneCashier(cash_no) if cash == []: print("该售货员不存在.") return table = Commodity.getTableaHead() table.add_row(cash) print(table)
def queryOne(self): '''前台:查询一个商品信息''' com_num=input("请输入需要查询商品的编号:") res=Basic.queryOneCommodity(com_num) if not res:#res为空 print("没有该商品") else: table=comm=Commodity.getTableaHead() table.add_row(res) print(table,end="\n\n")
def add_commodity(self, origin: Node, dest: Zip, quantity): """ add commodity to the network :param origin: :param dest: :param quantity: :return: """ commodity = Commodity(origin, dest, quantity) self.commodities.append(commodity)
def getComments(self,debug=False,saveToMongo=False,saveToFile = False): if saveToMongo: client = MongoClient('localhost',44444) db = client['jd'] collection = db['review'] if saveToFile: i = 1 self.finalComments=list() for numItem in self.finalResult: baseUrl = self.__prepare4CommentsReview(numItem) obj = Commodity(baseUrl) obj.getComments(debug) self.finalComments.append(obj.comments) if saveToMongo: self.__saveToMongo(collection,obj.comments) print("save to mongo --port 44444 db:jd collection:review") if saveToFile: self.__saveToFile("./resultoutput/" + str(i) +".txt",obj.comments) i+=1
def run(self): while not self.queueGet.empty(): if self.saveToMongo: client = MongoClient('localhost',44444) db = client['jd'] collection = db['review'] try: numItem = self.queueGet.get(timeout=120) baseUrl = self.__prepare4CommentsReview(numItem) commodityObj = Commodity(baseUrl) commodityObj.getComments() self.queueSave.put(commodityObj.comments) if self.saveToMongo: self.__saveToMongo(collection,commodityObj.comments) print("save to mongo --port 44444 db:jd collection:review") if self.saveToFile: self.__saveToFile("./resultoutput/" + numItem +".txt",commodityObj.comments) except Exception as e: print(e) finally: print(threading.current_thread().name + " {0} comments length: {1}".format(numItem,len(commodityObj.comments)))
def add_commodity(self, origin: Node, dest: Zip, quantity=0, scen_num=-1): """ add commodity to the network :param scen_num: scenario number. -1 means determinsitic problem hence default value :param origin: :param dest: :param quantity: default 0 if not given :return: """ commodity = Commodity(origin, dest) commodity.set_scenario(scen_num) commodity.set_quantity(quantity) self.commodities.append(commodity)
def getBothTopStatic(left, right, com_no): '''返回列表 每个元素是一个商品对象和cnt''' all_sell = Basic.queryAllSell() cnt_dict = {} for DA in all_sell: sell = Sell(DA) com_no = sell.getComNo() if com_no == None: continue now_date = sell.getTime().date() if now_date >= left and now_date <= right: cnt_dict.setdefault(com_no, 0) cnt_dict[com_no] += 1 cnt_list = [] for com_num in cnt_dict: #key 编号,value is cnt ob = Basic.queryOneCommodity(com_num) cnt_list.append(pair(Commodity(ob), cnt_dict[com_num])) cnt_list.sort(key=lambda x: (x.cnt), reverse=True) return cnt_list
from commodity import Commodity urls = [ # "https://www.amazon.com/PlayStation-Slim-500GB-Console-Discontinued-4/dp/B01LRLJV28", # "https://www.amazon.com/PlayStation-4-Pro-1TB-Console/dp/B01LOP8EZC?th=1", # "https://www.amazon.com/God-War-3-Remastered-PlayStation-4/dp/B00USM22DI", # "https://www.amazon.com/dp/B073TS5FSK/ref=dp_sp_detail?psc=1", # "https://www.amazon.com/dp/B00T8VQTGQ", # "https://www.amazon.com/Philips-AVENT-Double-Electric-Comfort/dp/B00N4R4C3M/ref=sr_1_4_s_it?s=baby-products&ie=UTF8&qid=1509343031&sr=1-4&keywords=AVENT+Double+Electric+Comfort+Breast+Pump", # "https://www.amazon.com/Philips-AVENT-Natural-Glass-Bottle/dp/B00PF83R84/ref=sr_1_6_s_it?s=baby-products&ie=UTF8&qid=1508883080&sr=1-6&keywords=philips+avent", # "https://www.amazon.com/Philips-AVENT-Natural-Glass-Bottle/dp/B00PF83R0W/ref=sr_1_6_s_it?s=baby-products&ie=UTF8&qid=1508883080&sr=1-6&keywords=philips%2Bavent&th=1", # "https://www.amazon.com/Britax-Boulevard-G4-1-Convertible-Domino/dp/B00OLRKNGY/ref=sr_1_1_s_it?s=baby-products&ie=UTF8&qid=1508884406&sr=1-1&refinements=p_89%3ABritax%2BUSA&th=1", # "https://www.amazon.com/Bose-QuietComfort-Wireless-Headphones-Cancelling/dp/B01E3SNO1G/ref=sr_1_3?s=electronics&ie=UTF8&qid=1508884685&sr=1-3&keywords=bose", # "https://www.amazon.com/dp/B01LWVX2RG", # "https://www.amazon.com/dp/B01E3SNO1G", # "https://www.amazon.com/dp/B00N4R4C3M", # "https://www.amazon.com/JBL-Wireless-Bluetooth-Speaker-Pairing/dp/B00GOF0ZQ4/ref=sr_1_5?ie=UTF8&qid=1508884897&sr=8-5&keywords=jbl+pulse", "https://www.amazon.com/dp/B00ZY1J5J2", "https://www.amazon.com/dp/B01LWVX2RG", "https://www.amazon.com/dp/B06XCM9LJ4/ref=ods_mccc_Rdr", "https://www.amazon.com/dp/B0794W1SKP/ref=ods_mccc_lr", "https://www.amazon.com/All-new-Echo-Dot-3rd-Gen/dp/B0792K2BK6/ref=sr_1_1?ie=UTF8&qid=1547526586&sr=8-1&keywords=echo+dot", "https://www.amazon.com/Bose-QuietComfort-Wireless-Headphones-Cancelling/dp/B0756CYWWD/ref=dp_ob_title_ce", "https://www.amazon.com/dp/B078GVDB18/ref=emc_b_5_t", "https://www.amazon.com/dp/B00USM22DI?th=1" ] commodity = Commodity(0, urls[0]) commodity.retrieve_info() commodity.print_commodity()
r_date VARCHAR(256) NOT NULL DEFAULT '', CONSTRAINT UC_URL_Date UNIQUE (c_id,r_date) )""") db.execute(""" CREATE TABLE IF NOT EXISTS commodity_url ( c_id MEDIUMINT AUTO_INCREMENT PRIMARY KEY, c_url VARCHAR(256) NOT NULL DEFAULT '', c_title VARCHAR(256) NOT NULL DEFAULT '', UNIQUE (c_url) )""") commodities = [] db.execute("SELECT c_id, c_url FROM commodity_url ORDER BY c_id") for (c_id, c_url) in db: commodity = Commodity(c_id, c_url.decode("UTF-8")) commodity.retrieve_info() commodity.print_commodity() commodities.append(commodity) for commodity in commodities: db.execute( "INSERT INTO commodity_price_record(c_title,c_price,c_id,r_date) VALUES(?,?,?,?) " "ON DUPLICATE KEY UPDATE `c_title`=?,`c_price`=?", [ commodity.title, commodity.price, commodity.c_id, datetime.now().strftime("%Y-%m-%d"), commodity.title, commodity.price ]) db.execute("UPDATE commodity_url SET `c_title`=? WHERE `c_id`=?", [commodity.title, commodity.c_id])
def commodity_builder(self, name, type, url, t_list): commodity = Commodity(name, type, url) t_list.append(commodity)