def start(self): while True: print( 'Commands: "show": display all category, "linear <category name>": display linear graph, "exit": exit programme, "coef": Show trening again product stats' ) the_input = raw_input('Enter input:') if the_input == 'exit': break elif the_input == 'show': the_input = raw_input( 'Enter 1 for products, 2 for categories:') hb = Myhbase('crawler') if the_input == '1': print(hb.getAllproducts()) elif the_input == '2': print(hb.getAllcategories()) elif re.compile(r'^linear\s.+$').match(the_input): category = re.compile(r'^linear\s(.+)$').match(the_input) if category is not None: self.lineargraph(category.group(1)) elif the_input == 'coef': self.coef()
def start(self): while True: print('Commands: "show": display all category, "linear <category name>": display linear graph, "exit": exit programme, "coef": Show trening again product stats') the_input = raw_input('Enter input:') if the_input == 'exit': break elif the_input == 'show': the_input = raw_input('Enter 1 for products, 2 for categories:') hb = Myhbase('crawler') if the_input == '1': print(hb.getAllproducts()) elif the_input == '2': print(hb.getAllcategories()) elif re.compile(r'^linear\s.+$').match(the_input): category = re.compile(r'^linear\s(.+)$').match(the_input) if category is not None: self.lineargraph(category.group(1)) elif the_input == 'coef': self.coef()
def coef(self): hb_crawler = Myhbase('crawler') hb_trend = Myhbase('trend') data_hash = collections.defaultdict(dict) categories = hb_crawler.getAllcategories() for category in categories: print(category) #find coffient X = [] Y = [] i = 1 rowkey = category + 'interests' print(rowkey) for key, data in hb_trend.table.scan(row_prefix=rowkey, ): X.append([i]) v = int(data['stats:value']) Y.append([v]) i += 1 if X: mdl = LinearRegression().fit(X, Y) m = mdl.coef_[0] data_hash[category]['coef'] = m min_price = None max_price = None mean_price = 0 total_price = 0 products_in_category = hb_crawler.getCategoryProducts(category) for item in products_in_category: if item['product:price']: item_price = item['product:price'] regex = re.compile('[^0-9\.]') item_price = regex.sub('', item_price) item_price = float(item_price) total_price += item_price if item_price < min_price or min_price is None: min_price = item_price if item_price > max_price or max_price is None: max_price = item_price data_hash[category]['min_price'] = min_price data_hash[category]['max_price'] = max_price data_hash[category]['total_price'] = total_price data_hash[category]['mean_price'] = total_price / len( products_in_category) data_hash[category][ 'range_price'] = max_price - min_price data_hash[category]['total_item'] = len( products_in_category) print(data_hash) x = [] y = [] z = [] for key, item in data_hash.iteritems(): x.append(item['mean_price']) y.append(item['coef'][0]) z.append(item['total_item']) x = np.array(x) y = np.array(y) z = np.array(z) colors = np.random.rand(len(x)) area = np.pi * z plt.scatter(x, y, s=area, c=colors, alpha=0.5) plt.title('Product range, price and trending coefficient', fontsize=20) plt.xlabel('Averge Price', fontsize=15) plt.ylabel('Trend Coefficient', fontsize=15) plt.show()
def coef(self): hb_crawler = Myhbase('crawler') hb_trend = Myhbase('trend') data_hash = collections.defaultdict(dict) categories = hb_crawler.getAllcategories() for category in categories: print(category) #find coffient X = [] Y = [] i = 1 rowkey = category + 'interests' print(rowkey) for key, data in hb_trend.table.scan(row_prefix=rowkey, ): X.append([i]) v = int(data['stats:value']) Y.append([v]) i += 1 if X: mdl = LinearRegression().fit(X, Y) m = mdl.coef_[0] data_hash[category]['coef'] = m min_price = None max_price = None mean_price = 0 total_price = 0 products_in_category = hb_crawler.getCategoryProducts(category) for item in products_in_category: if item['product:price']: item_price = item['product:price'] regex = re.compile('[^0-9\.]') item_price = regex.sub('', item_price) item_price = float(item_price) total_price += item_price if item_price < min_price or min_price is None: min_price = item_price if item_price > max_price or max_price is None: max_price = item_price data_hash[category]['min_price'] = min_price data_hash[category]['max_price'] = max_price data_hash[category]['total_price'] = total_price data_hash[category]['mean_price'] = total_price / len(products_in_category) data_hash[category]['range_price'] = max_price - min_price data_hash[category]['total_item'] = len(products_in_category) print(data_hash) x = [] y = [] z = [] for key, item in data_hash.iteritems(): x.append(item['mean_price']) y.append(item['coef'][0]) z.append(item['total_item']) x = np.array(x) y = np.array(y) z = np.array(z) colors = np.random.rand( len(x) ) area = np.pi * z plt.scatter(x, y, s=area, c=colors, alpha=0.5) plt.title('Product range, price and trending coefficient', fontsize=20) plt.xlabel('Averge Price', fontsize=15) plt.ylabel('Trend Coefficient', fontsize=15) plt.show()