Ejemplo n.º 1
0
 def check_case_3(): # 无点击, 连续3天有展现,点击为零
     csv_data_list = []
     csv_title = ['shop_id', 'camp_id', 'item_id', 'adg_id', '3_day_impr', '3_day_click']
     local_camp_list = Campaign.objects(Q(online_status = 'online')).order_by('+campaign_id') # 查询所有开启的计划
     for temp_camp_list in genr_sublist(local_camp_list, 10):
         for local_camp in temp_camp_list:
             local_adg_list = Adgroup.objects(Q(online_status = 'online') & Q(campaign_id = local_camp.campaign_id)).sum_reports(rpt_days = 3) # 查询所有开启的推广组
             for temp_adg_list in genr_sublist(local_adg_list, 10):
                 for local_adg in temp_adg_list:
                     print 'camp_id=%s   adg_id=%s   qr.impr=%s   qr.click=%s' % (local_camp.campaign_id, local_adg.adgroup_id, local_adg.qr.impressions, local_adg.qr.click)
                     if local_adg.qr.impressions > 0 and local_adg.qr.click == 0:
                         csv_data_list.append([ local_adg.shop_id, local_adg.campaign_id, local_adg.item_id, local_adg.adgroup_id, local_adg.qr.impressions, local_adg.qr.click])
     export_to_file('(check_case_3)', csv_title, csv_data_list)
Ejemplo n.º 2
0
 def check_case_2(): # 质量得分低, 平均质量得分小于6.5,小于5分的词大于40%
     csv_data_list = []
     csv_title = ['shop_id', 'camp_id', 'item_id', 'adg_id', 'kw_avg', 'kw_rate']
     local_camp_list = Campaign.objects(Q(online_status = 'online')).order_by('+campaign_id') # 查询所有开启的计划
     for temp_camp_list in genr_sublist(local_camp_list, 10):
         for local_camp in temp_camp_list:
             local_adg_list = Adgroup.objects(Q(online_status = 'online') & Q(campaign_id = local_camp.campaign_id)) # 查询所有开启的推广组
             for temp_adg_list in genr_sublist(local_adg_list, 10):
                 for local_adg in temp_adg_list:
                     avg = get_avg_qscore(local_adg.adgroup_id)
                     rate = get_qscore_rate(local_adg.adgroup_id)
                     print 'camp_id=%s   adg_id=%s   avg=%s   rate=%s' % (local_camp.campaign_id, local_adg.adgroup_id, avg, rate)
                     if avg and rate and avg < 6.5 and rate > 0.4:
                         csv_data_list.append([ local_adg.shop_id, local_adg.campaign_id, local_adg.item_id, local_adg.adgroup_id, avg, rate])
     export_to_file('(check_case_2)', csv_title, csv_data_list)
Ejemplo n.º 3
0
 def check_case_1(): # 词少, 关键词少于100个
     csv_data_list = []
     csv_title = ['shop_id', 'camp_id', 'item_id', 'adg_id', 'kw_count']
     local_camp_list = Campaign.objects(online_status = 'online').order_by('+campaign_id') # 查询所有开启的计划
     for temp_camp_list in genr_sublist(local_camp_list, 10):
         for local_camp in temp_camp_list:
             local_adg_list = Adgroup.objects(online_status = 'online', campaign_id = local_camp.campaign_id).order_by('adgroup_id') # 查询所有开启的推广组
             for temp_adg_list in genr_sublist(local_adg_list, 10):
                 for local_adg in temp_adg_list:
                     local_kw_list = Keyword.objects.only('word').filter(adgroup_id = local_adg.adgroup_id)
                     kw_count = len(local_kw_list)
                     print 'camp_id=%s   adg_id=%s   kw_count=%s' % (local_camp.campaign_id, local_adg.adgroup_id, kw_count)
                     if kw_count < 100:
                         csv_data_list.append([local_adg.shop_id, local_adg.campaign_id, local_adg.item_id, local_adg.adgroup_id, kw_count])
     export_to_file('(check_case_1)', csv_title, csv_data_list)
Ejemplo n.º 4
0
def repair_longmnt():
    from apps.common.utils.utils_collection import genr_sublist
    from apps.subway.models_account import Account
    from apps.subway.models_adgroup import adg_coll
    shops = Account.objects.only('shop_id').all()
    shop_ids = [s.shop_id for s in shops]
    total_count = len(shop_ids)
    cur_count = 0
    for shop_id_list in genr_sublist(shop_ids, 50):
        adg_coll.update(
            {
                'shop_id': {
                    '$in': shop_id_list
                },
                'use_camp_limit': 0,
                'mnt_type': {
                    '$in': [1, 3]
                }
            }, {'$set': {
                'use_camp_list': 1
            }},
            multi=True)
        cur_count += 50
        print 'total_count=%s, cur_count=%s, %s%%, last_shop_id=%s' % (
            total_count, cur_count, round(cur_count / total_count, 4) * 100,
            shop_id_list[-1])
    print 'ok'
Ejemplo n.º 5
0
    def load_redis_newcat_word_2memcache(cls):
        '''
        .实时导入新词数据到memcache
        '''
        from apps.kwslt.select_words import MemcacheAdpter
        r = WordCat.r_wckeyword
        for cat_id in r.smembers('new_cat_word_set'):
            count = MemcacheAdpter.get_list_count(str(cat_id), 'kwlib')
            word_list = [word.decode('utf8') for word in r.lrange('%s_new_word' % cat_id, 0, -1)]
            if  word_list:
                word_list = WordCat.get_wordcat_data_2memcache(word_list, cat_id)
                if count:
                    cache_word_list = CacheAdpter.get('%s_%s' % (cat_id, count - 1), 'kwlib')
                    if len(cache_word_list) < 4500:
                        word_list = cache_word_list + word_list
                        count = count - 1
                else:
                    count = 0
                for wl in genr_sublist(word_list, 4500):
                    CacheAdpter.set('%s_%s' % (cat_id, count), wl, 'kwlib')
                    count += 1

                CacheAdpter.set(str(cat_id), count, 'kwlib')
            r.delete('%s_new_word' % cat_id)
        r.delete('new_cat_word_set')
Ejemplo n.º 6
0
    def clean_garbage_word(cls, key):
        '''
        .清除长时间没有获取到全网数据的关键词
        '''
        word_list = [kw.decode('utf8') for kw in cls.r_keyword.lrange(key, 0, -1)]
        insert_list, delete_list = [], []
        for word in word_list:
            tmp_list = word.split(':')
            wd, sort_word = tmp_list[0], tmp_list[1]
            min_update_time = datetime.datetime.now() - datetime.timedelta(days = 200)
            update_time = cls.r_hkeyword.hget(sort_word, 'upt_tm')
            if update_time == None:
                delete_list.append(sort_word)
                continue
            if not cmp(datetime.datetime.strptime(update_time, "%Y-%m-%d"), min_update_time):
                delete_list.append(sort_word)
                continue
            else:
                insert_list.append(wd + ':' + sort_word)

        cls.r_hkeyword.delete(*delete_list)
        cls.r_keyword.delete(key)
        for word_list in genr_sublist(insert_list, 10000):
            key_keyword_list = RedisKeyManager.get_keyword_list_key(cls.NEWKEYWORD_ALIAS, cls.NEW_KW_LIST_PREV_KEY)
            RedisKeyManager.redis_lpush(cls.r_nkeyword, key_keyword_list, word_list)
Ejemplo n.º 7
0
 def check_case_6(): # 点击率低, 点击率低于市场点击率的70%
     csv_data_list = []
     csv_title = ['shop_id', 'camp_id', 'item_id', 'adg_id', '7_day_ctr', 'cat_avg_ctr']
     local_camp_list = Campaign.objects(Q(online_status = 'online')).order_by('+campaign_id') # 查询所有开启的计划
     for temp_camp_list in genr_sublist(local_camp_list, 10):
         for local_camp in temp_camp_list:
             local_adg_list = Adgroup.objects(Q(online_status = 'online') & Q(campaign_id = local_camp.campaign_id)).sum_reports(rpt_days = 7) # 查询所有开启的推广组
             for temp_adg_list in genr_sublist(local_adg_list, 10):
                 for local_adg in temp_adg_list:
                     item = local_adg.item_id
                     cat_data = local_adg.cat_data
                     ctr = local_adg.qr.ctr
                     cat_avg_ctr = get_cat_avg_ctr(item, cat_data)
                     print 'camp_id=%s   adg_id=%s   ctr=%s   cat.avg_ctr=%s' % (local_camp.campaign_id, local_adg.adgroup_id, ctr, cat_avg_ctr)
                     if cat_avg_ctr and local_adg.qr.ctr < cat_avg_ctr * 0.7:
                         csv_data_list.append([ local_adg.shop_id, local_adg.campaign_id, local_adg.item_id, local_adg.adgroup_id, ctr])
     export_to_file('(check_case_6)', csv_title, csv_data_list)
Ejemplo n.º 8
0
 def check_case_7(): # 点击转化率低, 点击转化率低于市场点击率的100%
     csv_data_list = []
     csv_title = ['shop_id', 'camp_id', 'item_id', 'adg_id', 'kw_count', '7_day_conv', 'cat_avg_conv']
     local_camp_list = Campaign.objects(Q(online_status = 'online')).order_by('+campaign_id') # 查询所有开启的计划
     for temp_camp_list in genr_sublist(local_camp_list, 10):
         for local_camp in temp_camp_list:
             local_adg_list = Adgroup.objects(Q(online_status = 'online') & Q(campaign_id = local_camp.campaign_id)).sum_reports(rpt_days = 7) # 查询所有开启的推广组
             for temp_adg_list in genr_sublist(local_adg_list, 10):
                 for local_adg in temp_adg_list:
                     item = local_adg.item
                     cat_data = local_adg.cat_data
                     conv = local_adg.qr.conv
                     cat_avg_conv = get_cat_avg_conv(item, cat_data)
                     print 'camp_id=%s   adg_id=%s   qr.conv=%s   cat.avg_conv=%s' % (local_camp.campaign_id, local_adg.adgroup_id, conv, cat_avg_conv)
                     if cat_avg_conv and conv < cat_avg_conv * 1.0:
                         csv_data_list.append([ local_adg.shop_id, local_adg.campaign_id, local_adg.item_id, local_adg.adgroup_id, conv, cat_avg_conv])
     export_to_file('(check_case_7)', csv_title, csv_data_list)
Ejemplo n.º 9
0
 def insert_new_word_list(cls, word_list):
     '''
     .插入新的关键词
     '''
     insert_list = cls.get_insert_list(word_list)
     if insert_list:
         for word_list in genr_sublist(insert_list, 10000):
             key_keyword_list = RedisKeyManager.get_keyword_list_key(cls.NEWKEYWORD_ALIAS, cls.NEW_KW_LIST_PREV_KEY)
             RedisKeyManager.redis_lpush(cls.r_nkeyword, key_keyword_list, word_list)
Ejemplo n.º 10
0
    def struct_download(cls, shop_id, tapi):
        """初始化creative"""
        try:
            top_creative_list = cls.get_creatives_byadgids(shop_id=shop_id,
                                                           tapi=tapi)
            local_crt_id_list = [
                crt['_id']
                for crt in crt_coll.find({'shop_id': shop_id}, {'_id': 1})
            ]
            upd_crt_dict, insert_crt_list, old_crt_id_list = {}, [], []
            for crt in top_creative_list:
                if crt.creative_id in local_crt_id_list:
                    upd_crt_dict.update({
                        crt.creative_id:
                        cls.Parser.parse(crt, trans_type='inc')
                    })
                    old_crt_id_list.append(crt.creative_id)
                else:
                    insert_crt_list.append(
                        cls.Parser.parse(crt,
                                         trans_type='init',
                                         extra_dict={'shop_id': shop_id}))

            del_crt_id_list = list(
                set(local_crt_id_list) - set(old_crt_id_list))

            for temp_insert_list in genr_sublist(insert_crt_list, 50):
                crt_coll.insert(temp_insert_list)

            update_list = []
            for crt_id, update_info in upd_crt_dict.items():
                update_list.append(({
                    'shop_id': shop_id,
                    '_id': crt_id
                }, {
                    '$set': update_info
                }))
            Creative.bulk_update_crt2db(update_list)

            if del_crt_id_list:
                cls.remove_creative({
                    'shop_id': shop_id,
                    '_id': {
                        '$in': del_crt_id_list
                    }
                })

            log.info('init creatives OK, shop_id = %s, get %s creatives' %
                     (shop_id, len(top_creative_list)))
            return True
        except Exception, e:
            log.error('init creatives FAILED, shop_id = %s, e = %s' %
                      (shop_id, e))
            return False
Ejemplo n.º 11
0
def bulk_update_for_model(obj_list, commit_number = 1000):
    '''批量更新model_list数据,按照commit_number将obj_list分组批量更新'''
    for temp_list in genr_sublist(obj_list, commit_number):
        try:
            for obj in temp_list:
                obj.save(False, False)
            transaction.commit()
        except Exception, e:
            log.exception("Object save error, e=%s" % (e))
            transaction.rollback()
            raise e
Ejemplo n.º 12
0
 def bulk_update_db(cls, update_list):
     total_updated_num = 0
     for temp_list in genr_sublist(update_list, 1000):  # bulk一次最多1000个
         bulk = cls._get_collection().initialize_unordered_bulk_op()
         for update_tuple in temp_list:
             bulk.find(update_tuple[0]).update(update_tuple[1])
         try:
             result = bulk.execute()
             total_updated_num += result['nModified']
         except BulkWriteError, e:
             log.error('bulk_update_kw2db, detail=%s' % e.details)
             total_updated_num += e.details['nModified']
Ejemplo n.º 13
0
 def check_case_5(): # PPC高, 高于市场均价的1.2倍,大于 min(客单价*行业转化率/roi, 每个 关键词点击量*行业ppc 的和/所有点击, 日限额/50)
     csv_data_list = []
     csv_title = ['shop_id', 'camp_id', 'item_id', 'adg_id', 'ppc', 'cat_avg_ppc', 'roi', 'cat_avg_conv', 'budget']
     local_camp_list = Campaign.objects(Q(online_status = 'online')).order_by('+campaign_id') # 查询所有开启的计划
     for temp_camp_list in genr_sublist(local_camp_list, 10):
         for local_camp in temp_camp_list:
             local_adg_list = Adgroup.objects(Q(online_status = 'online') & Q(campaign_id = local_camp.campaign_id)).sum_reports(rpt_days = 7) # 查询所有开启的推广组
             for temp_adg_list in genr_sublist(local_adg_list, 10):
                 for local_adg in temp_adg_list:
                     item = local_adg.item
                     cat_data = local_adg.cat_data
                     ppc = local_adg.qr.cpc
                     roi = local_adg.qr.roi
                     cat_avg_ppc = get_cat_avg_cpc(item, cat_data)
                     cat_avg_conv = get_cat_avg_conv(item, cat_data)
                     result = calc_kw_1(local_adg)
                     if local_adg.item and local_adg.item.price and cat_avg_ppc and cat_avg_conv and result and roi and ppc:
                         print 'camp_id=%s   adg_id=%s   ppc=%s   roi=%s   cat_avg_ppc=%s   cat_avg_conv=%s   budget=%s' % (local_camp.campaign_id, local_adg.adgroup_id, ppc, roi, cat_avg_ppc, cat_avg_conv, local_camp.budget)
                         if ppc > cat_avg_ppc * 1.2 and min(local_adg.item.price * cat_avg_conv / roi, result, local_camp.budget / 50):
                             csv_data_list.append([ local_adg.shop_id, local_adg.campaign_id, local_adg.item_id, local_adg.adgroup_id, ppc, cat_avg_ppc, roi, cat_avg_conv, local_camp.budget])
     export_to_file('(check_case_5)', csv_title, csv_data_list)
Ejemplo n.º 14
0
 def check_case_4(): # 点击少, 7日均点击少于 max(10,取某个行业点击做为参考值)
     example_cat_std_click = 100
     example_cat_click = get_cat_click_8id(150704)
     csv_data_list = []
     csv_title = ['shop_id', 'camp_id', 'item_id', 'adg_id', '7_day_avg_click', 'cat_click', 'example_cat_std_click', 'example_cat_click', 'current_cat_click', 'expression']
     local_camp_list = Campaign.objects(Q(online_status = 'online')).order_by('+campaign_id') # 查询所有开启的计划
     for temp_camp_list in genr_sublist(local_camp_list, 10):
         for local_camp in temp_camp_list:
             local_adg_list = Adgroup.objects(Q(online_status = 'online') & Q(campaign_id = local_camp.campaign_id)).sum_reports(rpt_days = 7) # 查询所有开启的推广组
             for temp_adg_list in genr_sublist(local_adg_list, 10):
                 for local_adg in temp_adg_list:
                     item = local_adg.item
                     cat_data = local_adg.cat_data
                     click = local_adg.qr.click / 7
                     cat_click = get_cat_all_click(local_adg)
                     if cat_click and example_cat_std_click and example_cat_click:
                         expression = example_cat_std_click * cat_click / example_cat_click
                         print 'camp_id=%s   adg_id=%s   qr.click=%s   cat.click=%s   example_cat_std_click=%s   example_cat_click=%s   expression=%s' % (local_camp.campaign_id, local_adg.adgroup_id, click, cat_click, example_cat_std_click, example_cat_click, expression)
                         if click < max(10, expression):
                             csv_data_list.append([ local_adg.shop_id, local_adg.campaign_id, local_adg.item_id, local_adg.adgroup_id, click, cat_click, example_cat_std_click, example_cat_click, cat_click, expression])
     export_to_file('(check_case_4)', csv_title, csv_data_list)
Ejemplo n.º 15
0
def get_catinfo_new(select_type,
                    category_id_list=[]
                    ):  # TODO 该返回值信息量很大,所以是否要保留类目树需要待定,或者修改类目树的基本结构,所以该操作暂时待定
    """
    新的获取类目信息接口,获取类目信息,此接口既提供所有顶级类目的查询,
    又提供给定类目id自身信息和子类目信息的查询,所以可以根据此接口逐层获取所有的类目信息
    传入参数:
    type: 0 表示请求所有顶级类目的信息,这时可以忽略第二个参数
          1 表示获取给定的类目id的详细信息
          2 表示获取给定类目id的所有子类目的详细信息
    category_id_list:以逗号隔开的类目id "16,30"
    返回值:
    parent_cat_id:父类目id
    cat_name:类目名称
    cat_path_name:类目路径名称
    cat_id:类目id
    cat_level:类目层级
    last_sync_time:最后一次同步时间
    cat_path_id:类目路径id
    """
    def get_cat_data_info(tobj, cat_dict):
        if tobj and hasattr(tobj, "category_info_list"):
            category_info_list = tobj.category_info_list
            if hasattr(category_info_list, "insight_category_info_d_t_o"):
                insight_category_info_d_t_o = category_info_list.insight_category_info_d_t_o
                for cat in insight_category_info_d_t_o:
                    cat_id = cat.cat_id
                    cat_dict[cat_id] = {
                        'cat_id': cat_id,
                        'cat_path_name': cat.cat_path_name,
                        'parent_cat_id': cat.parent_cat_id,
                        'cat_name': cat.cat_name,
                        'cat_level': cat.cat_level,
                        'last_sync_time': cat.last_sync_time,
                        'cat_path_id': cat.cat_path_id
                    }
        return cat_dict

    tobj = None
    cat_dict = {}
    try:
        if category_id_list:
            for tmp_list in genr_sublist(category_id_list, 10):
                tobj = tsapi.simba_insight_catsinfo_get(
                    type=select_type, category_id_list=','.join(tmp_list))
                cat_dict = get_cat_data_info(tobj, cat_dict)
        else:
            tobj = tsapi.simba_insight_catsinfo_get(type=select_type)
        cat_dict = get_cat_data_info(tobj, cat_dict)
    except TopError, e:
        log.error('get simba_insight_catsinfo_get error, e=%s' % (e))
        return {}
Ejemplo n.º 16
0
def bulk_update_mongodb(
    coll, update_list
):  # update_list形如[({'_id':1024321654}, {'$set':{'max_price':24}}), ({'_id':1024321651}, {'$set':{'max_price':47}}),...]
    total_updated_num = 0
    for temp_list in genr_sublist(update_list, 1000):  # bulk一次最多1000个
        bulk = coll.initialize_unordered_bulk_op()
        for update_tuple in temp_list:
            bulk.find(update_tuple[0]).update(update_tuple[1])
        try:
            result = bulk.execute()
            total_updated_num += result['nModified']
        except BulkWriteError, e:
            log.error('bulk_update_mongodb, detail=%s' % e.details)
            total_updated_num += e.details['nModified']
Ejemplo n.º 17
0
def bulk_update_for_sql(sql, value_list, commit_number = 1000):
    '''批量执行sql语句保存数据,按照commit_number将value_list分组批量更新'''
    rowcount = 0
    for group_value_list in genr_sublist(value_list, commit_number):
        if group_value_list:
            try:
                cursor = connection.cursor()
                for temp_list in group_value_list:
                    rowcount += cursor.executemany(sql, temp_list) # sql语句里的字符串型参数 %s 两旁不加引号!
                    transaction.commit()
            except Exception, e:
                log.exception("SQL update error, e=%s" % (e))
                transaction.rollback()
                raise e
Ejemplo n.º 18
0
    def set_large_list(key,
                       value_list,
                       cache_name='kwlib',
                       timeout=24 * 60 * 60 * 30,
                       count=4500):
        use_cache = get_cache(cache_name)
        # 分解成多组
        value_map = {}
        i = 0
        for temp_sublist in genr_sublist(value_list, count):
            value_map.update({'%s_%s' % (key, i): temp_sublist})
            i += 1

        # 存入内存
        use_cache.set_many(value_map, timeout)  # 存入数据
        use_cache.set(key, i, timeout)  # 存入组数
Ejemplo n.º 19
0
def cat_data_list(cat_id_list, start_date,
                  end_date):  # TODO 需要测试起始时间和终止时间的最长时间和最短时间
    """
    根据类目id获取类目的大盘数据,其中cpc, ctr, cvr, roi这几个指标数据是真实数据,其它的数据都是通过指数化后的数据,
    其中competition这个字段的目前无法做到精确统计,只是一个参考值,本次提供的insight相关的其它接口的都是这种情况。
    cat_id_list格式为:"16,30"以逗号分割
    start_data格式为:yyyy-MM-dd 起始时间
    end_data格式为:yyyy-MM-dd 终止时间
    返回值:
    impression:展现量
    click:点击量
    cost:花费,单位(分)
    directtransaction:直接成交额
    indirecttransaction:间接成交额
    directtransactionshipping:直接成交笔数
    indirecttransactionshipping:间接成交笔数
    favitemtotal:宝贝收藏数
    favshoptotal:店铺收藏数
    transactionshippingtotal:总的成交笔数
    transactiontotal:成交总金额
    favtotal:总的收藏数,包括宝贝收藏和店铺收藏
    competition:竞争度
    ctr:点击率
    cpc:平均点击花费
    roi:投入产出比
    coverage:点击转化率
    cat_id:类目id
    cat_name:类目名称
    """

    cat_id_list = [str(cat_id) for cat_id in cat_id_list]
    top_cat_data_dict = {}
    for cat_list in genr_sublist(cat_id_list, 5):
        try:
            tobj = tsapi.simba_insight_catsdata_get(
                category_id_list=','.join(cat_list),
                start_date=start_date,
                end_date=end_date)
            if hasattr(tobj, "cat_data_list"):
                cat_data_list = tobj.cat_data_list
                if hasattr(cat_data_list, "insight_category_data_d_t_o"):
                    insight_category_data_d_t_o = cat_data_list.insight_category_data_d_t_o
                    for cat_data in insight_category_data_d_t_o:
                        top_cat_data_dict[cat_data.cat_id] = cat_data
        except TopError, e:
            log.error('get simba_insight_catsdata_get error, e=%s' % (e))
            continue
Ejemplo n.º 20
0
    def set_large_list(cls,
                       key,
                       value_list,
                       split_size,
                       cache_name,
                       timeout=0):
        if value_list:
            use_cache = get_cache(cache_name)
            value_map = {}

            for index, temp_sublist in enumerate(
                    genr_sublist(value_list, split_size)):
                value_map.update({'%s_%s' % (key, index): temp_sublist})

            use_cache.set_many(value_map, timeout)  # 存入分解后的数据
            use_cache.set(key, index + 1, timeout)  # 被分解的N组
        return True
Ejemplo n.º 21
0
 def __get_creative_bycrtids(cls, shop_id, creative_id_list, tapi):
     """
                     通过creative_id获取创意
     """
     creative_list = []
     for temp_crt_id_list in genr_sublist(creative_id_list, 200):
         creative_ids = ','.join(
             str(creative_id) for creative_id in temp_crt_id_list)
         try:
             top_objs = tapi.simba_creatives_get(
                 creative_ids=creative_ids,
                 retry_count=settings.TAPI_RETRY_COUNT * 6,
                 retry_delay=settings.TAPI_RETRY_DELAY)
             if top_objs and hasattr(top_objs, 'creatives') and hasattr(
                     top_objs.creatives,
                     'creative') and top_objs.creatives.creative:
                 creative_list.extend(top_objs.creatives.creative)
         except TopError, e:
             log.error(
                 'simba_creatives_get TopError, shop_id = %s, e = %s' %
                 (shop_id, e))
Ejemplo n.º 22
0
def get_cats_forecast_new(word_list):
    """
    根据传入的关键词获取该关键词适合的类目
    新的类目预测接口,传入参数为纯word以逗号隔开,淘宝没有提供传入字符串长度,所以默认长度为原有的200以内。
    淘宝返回值:
    cat_path_name:类目路径及名称
    bidword:关键词
    score:类目相关度
    cat_path_id:类目路径id
    当前返回值:
    {
    '连衣裙':[1,2,3,4],
    ...
    ..
    }
    """
    cat_forecast_dict = {}
    for temp_list in genr_sublist(word_list, 100):
        try:
            tobj = tsapi.simba_insight_catsforecastnew_get(
                bidword_list=",".join(temp_list))
            if hasattr(tobj, "category_forecast_list"):
                category_forecast_list = tobj.category_forecast_list
                if hasattr(category_forecast_list,
                           "insight_category_forcast_d_t_o"):
                    insight_category_forcast_d_t_o = category_forecast_list.insight_category_forcast_d_t_o
                    for catforecast in insight_category_forcast_d_t_o:
                        cat_path_id = catforecast.cat_path_id
                        if cat_path_id == "":
                            continue
                        cat_id = int(catforecast.cat_path_id.split(' ')[-1])
                        word = catforecast.bidword
                        if catforecast.bidword in cat_forecast_dict:
                            cat_forecast_dict[word].append(cat_id)
                        else:
                            cat_forecast_dict[word] = [cat_id]
        except TopError, e:
            log.error('get simba_insight_catsforecastnew_get error, e=%s' %
                      (e))
            continue
Ejemplo n.º 23
0
def get_relatewords_new(word_list, number=10):  # TODO 调用长度是一个非常严重的问题
    """
    获取给定词的若干相关词,返回结果中越相关的权重越大,排在越前面,根据number参数对返回结果进行截断。
    以词滚词,扩充词库
    传入参数:
    bidword_list:以逗号分割的关键词字符串
    number:返回个数
    返回值:
    related_word:相关关键词
    weight:相关度
    函数返回值:
    [
        连衣裙,
        连衣裙夏,
        ...
        ..
        .
    ]
    """
    relate_list = []
    for temp_list in genr_sublist(word_list, 100):
        try:
            tobj = tsapi.simba_insight_relatedwords_get(
                bidword_list=','.join(temp_list), number=number)
            if hasattr(tobj, "related_words_result_list"):
                related_words_result_list = tobj.related_words_result_list
                if hasattr(related_words_result_list, "insight_related_words"):
                    insight_related_words = related_words_result_list.insight_related_words
                    for relate_word in insight_related_words:
                        if hasattr(relate_word, "related_word_items_list"):
                            related_word_items_list = relate_word.related_word_items_list
                            if hasattr(related_word_items_list,
                                       "insight_related_word"):
                                insight_related_word = related_word_items_list.insight_related_word
                                for word in insight_related_word:
                                    relate_list.append(
                                        word.related_word.replace(
                                            '\t', '').decode('utf8'))
        except TopError, e:
            log.error('get simba_insight_relatedwords_get error, e=%s' % (e))
Ejemplo n.º 24
0
    def generate_task(self):
        adg_date_list = collections.defaultdict(list)
        query_dict = {'shop_id': self.shop_id, 'adgroup_id': {'$in': self.adgroup_id_list}}
        result = Adgroup.Report.aggregate_rpt(query_dict= query_dict, group_keys = "adgroup_id,date",
                                              start_date = str(self.earliest_date), end_date = str(self.end_date))
        for adg_rpt in result:
            temp_adg_id = adg_rpt['_id']['adgroup_id']
            temp_datetime = adg_rpt['_id']['date'] # mongodb包含时分秒,去掉,转为date
            temp_date = datetime.date(temp_datetime.year, temp_datetime.month, temp_datetime.day)

            if temp_date >= self.adg_date_dict[temp_adg_id]: # 判断这个广告组的关键词时间是否是在上次同步之外的
                adg_date_list[temp_adg_id].append(temp_date)

        for adg_id, adg_date_list in adg_date_list.items():
            adg_date_list.sort()
            for temp_time_scope in genr_sublist(adg_date_list, 5):
                for search_type, source in KeywordRpt.REPORT_CFG:
                    desc = source == "SUMMARY" and "summary" or "detail" # 明细数据与汇总数据分成两个任务
                    start_time = str(temp_time_scope[0])[:10]
                    end_time = str(temp_time_scope[-1])[:10]

                    cache_key = "%s_%s_%s_%s" % (adg_id, start_time, end_time, desc)

                    self.task_list.append({
                        'shop_id': self.shop_id,
                        'campaign_id': self.campaign_id,
                        'token': self.token,
                        'adgroup_id': adg_id,
                        'source': source,
                        'search_type': search_type,
                        'start_time': start_time,
                        'end_time': end_time,
                        'cache_db': self.cache_db,
                        'cache_key': cache_key
                    })

                    self.key_list.append(cache_key)
Ejemplo n.º 25
0
        continue
    if not isinstance(chk['max']['cat_id'], int):
        chk['max']['cat_id'] = 100000000
    if chk['min']['cat_id'] == chk['max']['cat_id']:
        if chk['min']['cat_id'] in shard_dict['shard']:
            continue
        shard_dict['shard'].append(chk['min']['cat_id'])
        continue
    min_max_list = [chk['min']['cat_id'], chk['max']['cat_id']]
    key = chk['shard']
    if shard_dict.has_key(key):
        shard_dict[key].append(min_max_list)
    else:
        shard_dict[key] = [min_max_list]
conn.disconnect()
shard_list = genr_sublist(shard_dict['shard'], 4)
del shard_dict['shard']
count = 0
index = 0
for key in shard_dict:
    conn = pymongo.Connection(shard_conn_ip[key])
    kwlib = conn.kwlib
    kwlib.authenticate('PS_kwlibAdmin', 'PS_managerKwlib')
    cat_coll = kwlib.kwlib_catinfo
    cat_list = []
    for max_min_list in shard_dict[key]:
        for cat in cat_coll.find(
            {'cat_id': {
                '$gte': max_min_list[0],
                '$lte': max_min_list[1]
            }}):
Ejemplo n.º 26
0
def get_words_gdata(word_list, time_scope=None):
    """
    获取关键词的详细数据,全网数据接口
    传入参数:
    bidword_list:以逗号分割的字符串列表,如“连衣裙,红色连衣裙...”
    start_date:起始时间
    end_date:终止时间
    返回值:
    impression:展现量
    click:点击量
    cost:花费,单位 分
    directtransaction:直接成交金额
    indirecttransaction:间接成交金额
    directtransactionshipping:直接成交笔数
    indirecttransactionshipping:间接成交笔数
    favitemtotal:宝贝收藏数
    favshoptotal:店铺收藏数
    transactionshippingtotal:总的成交笔数 1
    transactiontotal:成交总金额
    favtotal:总的收藏数,包括宝贝收藏数以及店铺收藏数 1
    competition:竞争度
    ctr:点击率
    cpc:平均点击花费
    roi:投资回报率 1 string
    coverage:点击转化率 1 string
    bidword:关键词
    函数返回值:
    {
    '连衣裙':{'pv':1, 'click':1, 'cpc':1, 'ctr':1, 'competition':1, 'last_update_time':1}
    ...
    ..
    .
    }
    """
    if not time_scope:  # 不给时间区间,就默认采用昨天的数据
        yst_date = '%s' % (datetime.date.today() - datetime.timedelta(days=1))
        time_scope = (yst_date, yst_date)

    word_dict = {}
    for temp_list in genr_sublist(word_list, 100):
        try:
            tobj = tsapi.simba_insight_wordsdata_get(
                bidword_list=','.join(temp_list),
                start_date=time_scope[0],
                end_date=time_scope[1])
            if hasattr(tobj, "word_data_list"):
                word_data_list = tobj.word_data_list
                if hasattr(word_data_list, "insight_word_data_d_t_o"):
                    insight_word_data_d_t_o = word_data_list.insight_word_data_d_t_o
                    for word_data in insight_word_data_d_t_o:
                        cpc = int(
                            float(
                                getattr(
                                    word_data, "cpc", word_data.click > 0
                                    and word_data.cost / word_data.click
                                    or 0)))
                        ctr = float(getattr(word_data, "ctr", 0))
                        competition = int(
                            float(getattr(word_data, "competition", 0)))
                        roi = float(getattr(word_data, "roi", 0))
                        coverage = float(getattr(word_data, "coverage", 0))
                        favtotal = int(float(getattr(word_data, "favtotal",
                                                     0)))
                        transactionshippingtotal = int(
                            float(
                                getattr(word_data, "transactionshippingtotal",
                                        0)))
                        word_dict[word_data.bidword] = DictWrapper({
                            'pv':
                            int(word_data.impression),
                            'click':
                            int(word_data.click),
                            'avg_price':
                            cpc,
                            'ctr':
                            ctr,
                            'competition':
                            competition,
                            'word':
                            word_data.bidword,
                            'roi':
                            roi,
                            'coverage':
                            coverage,
                            'favtotal':
                            favtotal,
                            'transactionshippingtotal':
                            transactionshippingtotal
                        })
        except TopError, e:
            if "API error response" in str(e):
                log.info("test error for :" + str(datetime.datetime.now()) +
                         ',\t' + ','.join(temp_list))
            log.error('simba_insight_wordsdata_get TopError, e=%s' % (e))
Ejemplo n.º 27
0
def cleanup_expired(request=None):
    """清除过期数据,包括:
    1.清除掉过期15天内的用户所有数据
    2.部分表,根据时间清理数据(通常是30天)
    """
    def is_outservice(shop_id):
        expire_days = Config.get_value('web.clean_up.OUTSERVICE_EXPIRE_DAYS',
                                       default=15)
        deadline_query_sql = "select deadline from router_articleusersubscribe where nick=(select nick from router_user where shop_id=%s) order by deadline desc limit 1"
        # 检查是否可删除
        deadline_list = execute_query_sql(deadline_query_sql % shop_id)
        for tmp_deadline in deadline_list:
            deadline = tmp_deadline["deadline"]
            if time_is_ndays_interval(deadline, expire_days):
                return True
        return False

    def remove_shopdata(shop_id_list):
        """移除多个店铺数据"""
        from apps.subway.models import account_coll, camp_coll, adg_coll, crt_coll, ccrt_coll, kw_coll, item_coll, attn_coll, uprcd_coll
        # 移除基本的结构(含报表)数据
        # account_coll.remove({'_id':{'$in':shop_id_list}}) # 店铺数据不清理
        camp_coll.remove({'shop_id': {'$in': shop_id_list}})  # 计划数据
        adg_coll.remove({'shop_id': {'$in': shop_id_list}})  # 推广级数据
        crt_coll.remove({'shop_id': {'$in': shop_id_list}})  # 创意数据
        ccrt_coll.remove({'shop_id': {'$in': shop_id_list}})  # 自定义创意
        kw_coll.remove({'shop_id': {'$in': shop_id_list}})  # 关键词数据
        item_coll.remove({'shop_id': {'$in': shop_id_list}})  # 宝贝数据

        # 移除报表数据
        from apps.subway.models_report import acctrpt_coll, camprpt_coll, adgrpt_coll, crtrpt_coll, kwrpt_coll
        # acctrpt_coll.remove({'shop_id': {'$in': shop_id_list}}) # 店铺报表不清理
        camprpt_coll.remove({'shop_id': {'$in': shop_id_list}})
        adgrpt_coll.remove({'shop_id': {'$in': shop_id_list}})
        crtrpt_coll.remove({'shop_id': {'$in': shop_id_list}})
        kwrpt_coll.remove({'shop_id': {'$in': shop_id_list}})

        # 移除算法/功能相关的数据
        attn_coll.remove({'_id': {'$in': shop_id_list}})  # 关注数据

        # 清除抢排名设置和历史记录
        from apps.engine.models_channel import MessageChannel
        from apps.engine.models_kwlocker import kw_locker_coll
        kw_cur = kw_locker_coll.find({'shop_id': {
            '$in': shop_id_list
        }}, {'_id': 1})
        kw_id_list = [kw['_id'] for kw in kw_cur]
        MessageChannel.delete_msg_history(kw_id_list)
        kw_locker_coll.remove({'shop_id': {'$in': shop_id_list}})

        from apps.engine.models import shopmng_task_coll
        shopmng_task_coll.remove({'_id': {'$in': shop_id_list}})  # 店铺大任务数据

        from apps.mnt.models import mnt_camp_coll, mnt_task_coll
        mnt_camp_coll.remove({'shop_id': {'$in': shop_id_list}})  # 全自动计划数据
        mnt_task_coll.remove({'shop_id': {'$in': shop_id_list}})  # 全自动任务数据

        uprcd_coll.remove({'shop_id': {'$in': shop_id_list}})  # 操作记录数据

        from apps.alg.models import optrec_coll
        optrec_coll.remove({'shop_id': {'$in': shop_id_list}})  #全自动优化分析记录

        from apps.subway.download import dler_coll
        dler_coll.remove({'_id': {'$in': shop_id_list}})  # 下载数据

        from apps.crm.models import psmsg_coll
        psmsg_coll.remove({'shop_id': {'$in': shop_id_list}})  # 留言信息

        try:
            user_list = User.objects.filter(nick__in=shop_id_list)
            user_id_list, uid_list, username_list = [], [], []
            for user in user_list:
                user_id_list.append(user.id)
                uid_list.append(user.first_name)
                username_list.append(user.nick)

            from apps.router.models import Agent, AccessToken, AdditionalPermission, NickPort, Shop
            Agent.objects.filter(principal__in=user_id_list).delete()  # 用户代理
            AccessToken.objects.filter(uid__in=uid_list,
                                       platform='web').delete()  # 千牛session
            AdditionalPermission.objects.filter(
                user__in=user_id_list).delete()  # 额外授权
            NickPort.objects.filter(nick__in=username_list).delete()  # 属服务器
            Shop.objects.filter(sid__in=shop_id_list).delete()  # 店铺信息
        except Exception:
            pass

    def remove_outdated():
        last_date = datetime.date.today() - datetime.timedelta(30)
        default_deadline = date_2datetime(last_date)

        from django.contrib.sessions.models import Session
        Session.objects.filter(expire_date__lte=default_deadline).delete()

        from apps.subway.models_report import AccountRpt, CampaignRpt, AdgroupRpt, CreativeRpt, KeywordRpt
        AccountRpt.clean_outdated()
        CampaignRpt.clean_outdated()
        AdgroupRpt.clean_outdated()
        CreativeRpt.clean_outdated()
        KeywordRpt.clean_outdated()

        from apps.subway.models_upload import UploadRecord
        UploadRecord.clean_outdated()

        from apps.alg.models import OptimizeRecord
        OptimizeRecord.clean_outdated()

        # 删除冻结超过30天的积分数据
        from apps.web.models import PointActivity
        PointActivity.clean_outdated()

    smt_cursor = shopmng_task_coll.find({'status': 0}, {'_id': 1})
    deactived_shop_id_list = [smt['_id'] for smt in smt_cursor]  # 无法执行店铺任务的
    expired_shop_id_list = []

    log.info('need to check %s shops' % len(deactived_shop_id_list))
    for temp_list in genr_sublist(deactived_shop_id_list, 100):
        expired_shop_id_list = []
        for shop_id in temp_list:
            if is_outservice(shop_id):
                expired_shop_id_list.append(shop_id)

        if expired_shop_id_list:
            remove_shopdata(expired_shop_id_list)

    log.info('start remove outdated data')
    remove_outdated()
    log.info('all data cleaned OK')
Ejemplo n.º 28
0
def get_catsworddata(
        cat_id, word_list, start_date,
        end_date):  # TODO 需要测试传入word_str的最大长度  起始时间和终止时间的时间间距,isp错误 连接超时
    """
    获取关键词在类目下的数据
    传入参数:
    cat_id:类目id str
    bidword_list:以^^分割的字符串,默认最长为200
    start_date:起始时间
    end_date:终止时间
    taobao返回值:
    impression:展现量
    click:点击量
    cost:花费,单位 分
    directtransaction:直接成交金额
    indirecttransaction:间接成交金额
    directtransactionshipping:直接成交笔数
    indirecttransactionshipping:间接成交笔数
    favitemtotal:宝贝收藏数
    favshoptotal:店铺收藏数
    transactionshippingtotal:总的成交笔数
    transactiontotal:总的成交金额
    favtotal:总的收藏总数,包括宝贝收藏和店铺收藏
    competition:竞争度
    ctr:点击率
    roi:投入产出比
    cpc:点击花费
    coverage:点击转化率
    cat_id:类目id
    cat_name:类目名称
    bidword:关键词

    函数返回值:
    {
        ‘连衣裙’:{‘pv’:1,‘click’:1,‘cpc’:1,‘ctr’:1,‘competition’:1,}
        ...
        ..
        .
    }
    """
    cat_word_data_dict = {}
    last_update_time = datetime.datetime.now()
    for temp_list in genr_sublist(word_list, 100):
        try:
            tobj = tsapi.simba_insight_catsworddata_get(
                cat_id=cat_id,
                bidword_list=','.join(temp_list),
                start_date=start_date,
                end_date=end_date)
            if hasattr(tobj, "catword_data_list"):
                catword_data_list = tobj.catword_data_list
                if hasattr(catword_data_list,
                           "insight_word_data_under_cat_d_t_o"):
                    insight_word_data_under_cat_d_t_o = catword_data_list.insight_word_data_under_cat_d_t_o
                    for cat_data in insight_word_data_under_cat_d_t_o:
                        cpc = getattr(
                            cat_data, "cpc", cat_data.click > 0 and
                            (cat_data.cost / cat_data.click) or 0)
                        try:
                            cat_word_data_dict[cat_data.bidword] = {
                                'pv': cat_data.impression,
                                'click': cat_data.click,
                                'cpc': cpc,
                                'ctr': cat_data.ctr,
                                'competition': cat_data.competition,
                                'last_update_time': last_update_time,
                                'word': cat_data.bidword
                            }
                        except Exception, e:
                            log.error("get the top error = %s" % e)
                            continue
        except TopError, e:
            log.error('get simba_insight_catstopwordnew_get error, e=%s' % (e))
Ejemplo n.º 29
0
def get_word_subdata(word_list, start_date, end_date):
    """
    获取关键词按流量进行细分的数据,返回结果中network表示流量的来源,意义如下:1->PC站内,2->PC站外,4->无线站内 5->无线站外
    出入参数:
    bidword_list:以逗号分割的关键词列表
    start_date:起始时间
    end_date:终止时间
    返回值:
    impression: 展现量
    click:点击量
    cost: 花费,单位(分)
    directtransaction: 直接成交金额
    indirecttransaction:间接成交金额
    directtransactionshipping:直接成交笔数
    indirecttransactionshipping:间接成交笔数
    favitemtotal:宝贝搜藏数
    favshoptotal:店铺搜藏数
    transactionshippingtotal:总的成交笔数
    transactiontotal:成交总金额
    favtotal:总的收藏数,包括宝贝收藏和店铺收藏
    competition:竞争度
    ctr:点击率
    cpc:平均点击花费
    roi:投入产出比
    coverage:点击转化率
    bidword:关键词
    network:流量来源:1:PC站内,2:PC站外,4:无线站内 5:无线站外
    mechanism:投放机制:0:关键词推广 2:定向推广 3:通用定向
    函数返回值:
    {
        '连衣裙':{
            "pv":1,
            "click":1,
            "cost":1,
            "directtransaction":1,
            "indirecttransaction":1,
            "directtransactionshipping":1,
            "indirecttransactionshipping":1,
            "favitemtotal":1,
            "favshoptotal":1,
            "transactionshippingtotal":1,
            "transactiontotal":1,
            "favtotal":1,
            "competition":1,
            "ctr":1,
            "cpc":1,
            "roi":1,
            "coverage":1,
            "network":1,
            "mechanism":1,
        },
        ‘红色连衣裙’:{
            ...
            },
        ...
        ..
        .
    }
    """
    word_sub_data_dict = {}
    for temp_list in genr_sublist(word_list, 100):
        try:
            tobj = tsapi.simba_insight_wordssubdata_get(
                bidword_list=','.join(temp_list),
                start_date=start_date,
                end_date=end_date)
            if hasattr(tobj, "word_subdata_list"):
                word_subdata_list = tobj.word_subdata_list
                if hasattr(word_subdata_list, "insight_word_sub_data_d_t_o"):
                    insight_word_sub_data_d_t_o = word_subdata_list.insight_word_sub_data_d_t_o
                    for sub_data in insight_word_sub_data_d_t_o:
                        word = sub_data.bidword
                        temp_dict = sub_data.__dict__
                        if word in word_sub_data_dict:
                            word_sub_data_dict[word].append(temp_dict)
                        else:
                            word_sub_data_dict[word] = [temp_dict]
        except TopError, e:
            log.error('get simba_insight_wordssubdata_get error, e=%s' % (e))
            return {}