def get_data(ID): data = crawler.fetch_item(ID) if data['condition']== u'new': condition = 1.0 else: condition = 0.0 data = [data['title'], float(data['init_price'])**(0.5), float(data['seller_point']), condition] return data
def predict_KN(ID): testee = crawler.fetch_item(ID) maker = search_maker(int(testee['category_id'])) if maker == 1: return 'Error' result = dic_KN[maker].predict(ID) cu = result[1].sort('current_price') return (result[0], cu.ix[cu.index[2], 'current_price'], cu.ix[cu.index[7], 'current_price'])
def predict(self,ID,threhold = 0.0,rate=2): testee = fetch_item(ID) try: maker = self.categoryID[int(testee['category_id'])] return self.Auction_model[maker].predict(testee['title'],testee['description'],threhold = threhold,rate=rate) except KeyError as e: print e raise NameError('No category_id found')
def predict(self, ID, threhold=0.0, rate=2): testee = fetch_item(ID) if self.categoryID[int(testee['category_id'])] not in self.makers: raise NameError('No category_id found') else: return self.Auction.predict(testee['title'], testee['description'], threhold=threhold, rate=rate)
def predict(self, ID, threhold=0.0, rate=2): testee = fetch_item(ID) try: maker = self.categoryID[int(testee['category_id'])] return self.Auction_model[maker].predict(testee['title'], testee['description'], threhold=threhold, rate=rate) except KeyError as e: print e raise NameError('No category_id found')
def get_data(ID): data = crawler.fetch_item(ID) if data['condition'] == u'new': condition = 1.0 else: condition = 0.0 data = [ data['title'], float(data['init_price'])**(0.5), float(data['seller_point']), condition ] return data
def predict(self,ID,threhold = 0.0,rate=1): testee = crawler.fetch_item(ID) maker = self.search_maker(int(testee['category_id'])) if maker == None: raise NameError('No category_id found') title_similarity = self.setence_to_similarity(testee['title'],self.model_dict[maker].title_lda) description_similarity = self.setence_to_similarity(testee['title'],self.model_dict[maker].description_lda) sim = [(n,(rate+1)*s1*s2/(s1+rate*s2)) for ((n,s1),s2) in zip(enumerate(title_similarity),description_similarity) if (s1 !=0 and s2 != 0)] print sim p_list = sorted([self.model_dict[maker].price[n] for (n,x) in sim if x > threhold ]) l=len(p_list) if l == 0: return (None,[]) else: return (p_list[l/2],p_list[l/4],p_list[l*3/4])
def predict_LR2(ID): testee = crawler.fetch_item(ID) maker = search_maker(int(testee['category_id'])) if maker == 1: return 'Error' return dic_LR2[maker].predict(ID)
def predict(self, ID, threhold=0.0, rate=2): testee = fetch_item(ID) if self.categoryID[int(testee["category_id"])] not in self.makers: raise NameError("No category_id found") else: return self.Auction.predict(testee["title"], testee["description"], threhold=threhold, rate=rate)
def predict(self,ID,threhold = 0.0,rate=2): testee = fetch_item(ID) if self.categoryID[int(testee['category_id'])] not in self.makers: raise NameError('No category_id found') else: return self.Auction.predict(testee['title'],testee['description'],threhold = threhold,rate=rate)
'2084307168', '2084193602', '2084307178', '2084193594', '2084307171', '2084307189', '2084193603', '2084307177', '2084193585', '2084307180', '2084042163', '2084307169', '2084048237', '2084307192', '2084307167', '2084307187', '2084307172' ] random.shuffle(ctgs) for c in ctgs: print 'start fething items in category %s... ' % c for i in range(3): ids = crawler.fetch_item_list(c, i) for j in ids: result = crawler.fetch_item(j) archiver.add_item(result) print 'page', i, 'count', len(ids) print 'end'