def __init__(self): super(Title_filter, self).__init__() self.us_btgs_api = US_btgs() self.amz_attrs_api = AMZ_attrs() self.amz_sub_attrs_api = AMZ_sub_attrs() self.amz_title_dic = AMZ_title_dic() self.titles = Products()
def get_title_dic_by_attr_id(self, attr_id): query = {} title_dic = [] query['attr_id'] = attr_id api_instance = AMZ_title_dic() try: # get sub_attr_id r = self.amz_attrs.find_one(query) for sub_attr_id in r['sub_attr_ids']: try: # get title_dic by sub_attr_id offset = 0 limit = 50 while True: res = api_instance.get_words_by_sub_attr_id( sub_attr_id, offset=offset, limit=limit) if limit > len(res): break else: offset = offset + limit title_dic.extend(res) except Exception as e: print(e) except Exception as e: print(e) return title_dic
def get_dictionary_sub_attrs_words(subAttrIds): res = GetDictionarySubAttrsWordsResponse() try: title_dic_api = AMZ_title_dic() sub_attr_list = [] for subAttrId in subAttrIds: title_dic_res = title_dic_api.get_dic_by_sub_attr_id( subAttrId, offset=0, limit=1000) if title_dic_res: for a_title_dic in title_dic_res: title_dic_instance = TitleDic() title_dic_instance.sub_attr_id = a_title_dic.get( 'sub_attr_id') title_dic_instance.dic_word = a_title_dic.get( 'sub_attr_dic_word') title_dic_instance.count = a_title_dic.get('count') sub_attr_list.append(title_dic_instance) res.data = sub_attr_list res.message = 'Successful' response_status = 200 except Exception as e: res.message = str(e) response_status = 400 return res, response_status
def get_dictionary_products_attrs_sub_attrs_words(nodeId, attrId, subAttrId, word): res = GetDictionaryProductsAttrsSubAttrsWordsResponse() try: title_dic_api = AMZ_title_dic() title_dic_api_res = title_dic_api.get_dic_by_sub_attr_id( sub_attr_id=subAttrId, offset=0, limit=1000) if title_dic_api_res: title_dic_instance = TitleDic() is_exist = False for a_titls_dic in title_dic_api_res: if a_titls_dic.get('sub_attr_dic_word') == word: title_dic_instance.sub_attr_id = a_titls_dic.get( 'sub_attr_id') title_dic_instance.dic_word = a_titls_dic.get( 'sub_attr_dic_word') title_dic_instance.count = a_titls_dic.get('count') is_exist = True if is_exist: res.data = title_dic_instance res.message = 'Word already enrolled' else: res.message = 'Available' else: res.message = 'SubAttrId not exists' response_status = 200 except Exception as e: res.message = str(e) response_status = 400 return res, response_status
def post_dictionary_sub_attrs_words_count_reset(subAttrIds): res = PostDictionarySubAttrsWordsCountResetResponse() try: title_dic_api = AMZ_title_dic() num = 0 for sub_attr_id in subAttrIds: title_dic_api_res = title_dic_api.reset_count_to_zero_by_sub_attr_id( sub_attr_id=sub_attr_id) num += 1 if len(subAttrIds) == num: res.message = 'Successful' response_status = 200 else: res.message = 'Success partially' response_status = 200 except Exception as e: res.message = str(e) response_status = 400 return res, response_status
from __future__ import print_function from bl_product_amaz.amz_title_dic import AMZ_title_dic from pprint import pprint api_instance = AMZ_title_dic() try: offset = 0 limit = 50 sub_attr_id = "aa0000003" while True: res = api_instance.get_words_by_sub_attr_id(sub_attr_id, offset, limit) if limit > len(res): break else: offset = offset + limit pprint(res) except Exception as e: print("Exception when calling get_words_bt_sub_attr_id %s\n" % e)
class Title_filter(DataBase): def __init__(self): super(Title_filter, self).__init__() self.us_btgs_api = US_btgs() self.amz_attrs_api = AMZ_attrs() self.amz_sub_attrs_api = AMZ_sub_attrs() self.amz_title_dic = AMZ_title_dic() self.titles = Products() def get_title_word_dic_by_node_id(self, node_ids): amz_attr_list = [] filter_list = [] # get sub_attrs text and make filter for node_id in node_ids: res = self.us_btgs_api.get_btg_by_node_id(node_id) for amz_attr in res['attr_ids']: amz_attr_list.append(amz_attr) #reset_count_zero for amz_attr_id in amz_attr_list: res2 = self.amz_attrs_api.get_attr_by_attr_id(amz_attr_id) sub_attrs = res2['sub_attr_ids'] for sub_attr_id in sub_attrs: self.amz_title_dic.reset_count_to_zero_by_sub_attr_id( sub_attr_id) for amz_attr_id in amz_attr_list: res1 = self.amz_attrs_api.get_title_dic_by_attr_id(amz_attr_id) filter_list.extend(res1) filter_list = sorted(filter_list, key=len) for i in range(len(filter_list)): filter_list[i] = filter_list[i].lower() return filter_list def filtering_titles(self, node_ids, filter_list): filtered_titles = [] titles = [] offset = 0 limit = 100 # get titles for node_id in node_ids: try: r = self.titles.get_products_by_node_id(node_id, offset, limit) for product in r: title_dic = {} title_dic['ASIN'] = product['ASIN'] title_dic['brand'] = product['Brand'] title_dic['title'] = product['Title'] titles.append(title_dic) except Exception as e: print(e) count_dic_list = [] # filtering by title dic for title_info in titles: title = title_info['title'] # remove brand title = title.replace(title_info['brand'], "") title = title.lower() title = title.replace("(", "") title = title.replace(")", "") title = title.replace(",", " ") for filter in filter_list: # 길이 순서대로 count = 0 len_title = len(title) title = re.sub('\\b' + filter + ' ' + '\\b', "", title) len_filter = len(filter) + 1 len_filtered_title = len(title) if len_title != len_filtered_title: count_dic = {} sum_count = count + int( (len_title - len_filtered_title) / len_filter) count_dic['word'] = filter count_dic['count'] = sum_count count_dic_list.append(count_dic) title_info['filtered_title'] = title for tmp in count_dic_list: self.amz_title_dic.add_count_by_sub_attr_dic_word( tmp['word'], tmp['count']) # data clouding for title in titles: tmp = title['filtered_title'].split(" ") filtered_titles.extend(tmp) result_words = Counter(filtered_titles) return titles, result_words def add_sub_attr_in_amz_sub_attrs(self, node_id, attr_id, attr_kr_name, attr_us_name, sub_attr_id, sub_attr_kr_name, sub_attr_us_name): # check attr_id is in amz_attrs DB # add attr_id in amz_attrs DB and us_btg that node_id is input node_id self.amz_attrs_api.add_attr(attr_id, attr_kr_name, attr_us_name) self.us_btgs_api.update_attr_id_by_node_id(node_id, attr_id) if sub_attr_id != None: # add sub_attr in amz_sub_attrs DB self.amz_sub_attrs_api.add_sub_attr(sub_attr_id, sub_attr_kr_name, sub_attr_us_name) self.amz_attrs_api.update_sub_attr_ids(attr_id, sub_attr_id) def add_sub_attr_word_in_amz_title_dic(self, sub_attr_id, sub_attr_word): self.amz_title_dic.add_title_dic_word(sub_attr_id, sub_attr_word)
from __future__ import print_function from bl_product_amaz.amz_title_dic import AMZ_title_dic from pprint import pprint api_instance = AMZ_title_dic() try: sub_attr_id = "aa0000000" sub_attr_dic_word = "test" res = api_instance.add_title_dic_word(sub_attr_id, sub_attr_dic_word) pprint(res) except Exception as e: print("Exception when calling add_title_dic_word %s\n" % e)
from __future__ import print_function from bl_product_amaz.amz_title_dic import AMZ_title_dic from pprint import pprint api_instance = AMZ_title_dic() try: sub_attr_id = "aa0000003" res = api_instance.reset_count_to_zero_by_sub_attr_id(sub_attr_id) pprint(res) except Exception as e: print("Exception when calling add_count_by_sub_attr_code: %s\n" % e)
from __future__ import print_function from bl_product_amaz.amz_title_dic import AMZ_title_dic from pprint import pprint api_instance = AMZ_title_dic() try: sub_attr_id = "3/4 sleeve" res = api_instance.add_count_by_sub_attr_dic_word(sub_attr_id, count_up_num=3) pprint(res) except Exception as e: print("Exception when calling add_count_by_sub_attr_code: %s\n" % e)
def get_dictionary_browse_nodes_all(nodeId): res = GetDictionaryBrowseNodesAllResponse() try: attr_data_list = [] us_btgs_api = US_btgs() attrs_api = AMZ_attrs() sub_attrs_api = AMZ_sub_attrs() title_dic_api = AMZ_title_dic() us_btgs_res = us_btgs_api.get_attrs_by_node_id(nodeId) if us_btgs_res: for anAttr in us_btgs_res: for key in anAttr.keys(): attrs_res = attrs_api.get_attr_by_attr_id(key) if attrs_res: res_data = GetDictionaryBrowseNodesAllResponseData( ) attr_instance = Attr() attr_instance.attr_id = attrs_res.get('attr_id') attr_instance.attr_us_name = attrs_res.get( 'attr_us_name') attr_instance.attr_kr_name = attrs_res.get( 'attr_kr_name') sub_attr_ids = attrs_res.get('sub_attr_ids') if sub_attr_ids: sub_attr_list = [] for sub_attr_id in sub_attr_ids: sub_attrs_res = sub_attrs_api.get_sub_attr_by_sub_attr_id( sub_attr_id) if sub_attrs_res: sub_attr = GetDictionaryBrowseNodesAllResponseSubAttrs( ) sub_attr_instance = SubAttr() sub_attr_instance.sub_attr_id = sub_attrs_res.get( 'sub_attr_id') sub_attr_instance.sub_attr_us_name = sub_attrs_res.get( 'sub_attr_us_name') sub_attr_instance.sub_attr_kr_name = sub_attrs_res.get( 'sub_attr_kr_name') sub_attr.sub_attr = sub_attr_instance title_dic_res = title_dic_api.get_dic_by_sub_attr_id( sub_attr_instance.sub_attr_id, offset=0, limit=1000) if title_dic_res: sub_attr.title_dics = [] for a_title_dic in title_dic_res: title_dic_instance = TitleDic() title_dic_instance.sub_attr_id = a_title_dic.get( 'sub_attr_id') title_dic_instance.dic_word = a_title_dic.get( 'sub_attr_dic_word') title_dic_instance.count = a_title_dic.get( 'count') sub_attr.title_dics.append( title_dic_instance) sub_attr_list.append(sub_attr) res_data.attr = attr_instance res_data.sub_attrs = sub_attr_list attr_data_list.append(res_data) res.message = 'Successful' response_status = 200 res_data = attr_data_list res.data = res_data else: res.message = 'No attributes for node_id: ' + nodeId response_status = 400 except Exception as e: res.message = str(e) response_status = 400 return res, response_status