Esempio n. 1
0
    def load_in_cache_if_not():
        flag_dict = CacheAdpter.get(CacheKey.KWLIB_SYNOWORD % -1, 'web',
                                    {'init': ''})
        if not flag_dict.has_key('init'):
            return

        temp_cat_id = -1
        word_dict = {}
        syno_word_list = SynonymWord.objects.all().order_by('cat_id')
        for sw in syno_word_list:
            if not sw:
                continue
            word_list = sw.word_list.split(',')
            temp_word_dict = {
                word.replace('\r', ''): word_list
                for word in word_list
            }
            if sw.cat_id == temp_cat_id:
                word_dict.update(temp_word_dict)
            else:
                CacheAdpter.set(CacheKey.KWLIB_SYNOWORD % temp_cat_id,
                                word_dict, 'web', 60 * 60 * 24 * 7)
                temp_cat_id = sw.cat_id
                word_dict = temp_word_dict
        CacheAdpter.set(CacheKey.KWLIB_SYNOWORD % temp_cat_id, word_dict,
                        'web', 60 * 60 * 24 * 7)
        log.info('init all synoword into memcache')
Esempio n. 2
0
 def run(self):
     '''
                重写线程的运行函数
     '''
     while self.get_status():
         sub_task = self.get_task()
         if sub_task == None:
             #                self.wait_lock.acquire_lock()
             #                self.wait_lock.acquire_lock()
             time.sleep(1)
             continue
         # 子任务执行
         log.info('sub_task = %s begin to work' % (sub_task))
         finished_count = sub_task.execute()
         log.info('sub_task = %s work finished' % (sub_task))
         if finished_count == -1:
             return
         # 如果子任务完成数目为0,说明已经遍历完,取不到任务,该线程自动结束
         if finished_count == 0 or not finished_count:
             self.father_task.unregister_subtask(self)
             self.shut_down()
             return
         # 刷新父任务的进度
         self.father_task.update_result(finished_count)
         # 子任务清除
         self.set_task(None)
Esempio n. 3
0
 def get_record_list(start_index, prev_index, group_size, manager,
                     query_condition, filter_condition):
     '''
     .获取上传文件当中的关键词,并且记录下标存入到数据库当中
     '''
     if start_index == None and prev_index == None:
         start_index = '0'
     start_index = int(start_index)
     result_list = []
     try:
         file_r = open(manager.file_path, 'rb')
         offset = manager.finished_count  # 取文件的偏移量
         file_r.seek(offset)
         while True:
             word = file_r.readline()
             if word != None and len(
                     result_list) <= group_size:  # 如果文件读取完成或者超过预定的长度则停止读取
                 try:
                     word = word.decode('gbk').strip(
                         '\r\n')  # 为每个word去除'\r\n'操作
                 except Exception:
                     continue
                 result_list.append(word)
             else:
                 break
         manager.finished_count = int(
             file_r.tell())  # 保存读取后的文件偏移量以便于下次取文件内容
     except Exception, e:
         log.info('file open failed, e=%s' % e)
Esempio n. 4
0
def scan(atoms):
    signal.signal(signal.SIGINT, signal.SIG_IGN)
    keyword_total = 0
    logged_keyword = 0
    for i, atom in enumerate(atoms):
        log.info('Finding keywords contain %s[%d/%d]...' % (atom, i, len(atoms)))
        for keyword in find_keywords(atom):
            keyword_total += 1
            item = get_item(keyword)
            if not item:
                continue
            ps, ss, ds = get_psd(item)
            label_words = ps + ss + ds
            word = keyword['word'].replace(' ', '')
            scorer = get_scorer(keyword)
            left = scorer.extract_labels(word)[-1]
            score, labels = scorer.score_participles_by_item(word)
            url = item_url(keyword['shop_id'], item.item_id)
            deals = count_deals(keyword)
            if atom not in label_words: # if score < 1000:
                logged_keyword += 1
                queue.put((
                    atom,
                    keyword['word'],
                    ''.join(labels),
                    ','.join(ps),
                    ','.join(ss),
                    ','.join(ds),
                    left,
                    url,
                    deals))
                log.info('Processing %d/%d keywords' % (logged_keyword, keyword_total))
    queue.put('DONE')
Esempio n. 5
0
    def update_cat_market_data(cls, cat_id_list=[], rpt_date=None):
        """
        .更新类目下的大盘数据
        """
        log.info('skip update cat market_data')
        return True
        if not rpt_date:
            rpt_date = datetime.date.today() - datetime.timedelta(days=1)
        if not cat_id_list:
            cat_cur = cat_coll.find({}, {'_id'})
            cat_id_list = [cat['_id'] for cat in cat_cur]

        rpt_date_str = datetime.datetime.strftime(rpt_date, '%Y-%m-%d')
        new_rpt_date = datetime.datetime.strptime(
            rpt_date_str, '%Y-%m-%d')  # 数据库中不能存 date 类型
        result_dict = cat_data_list(cat_id_list, rpt_date_str, rpt_date_str)
        insert_list = []

        for cat_id, cat_data in result_dict.iteritems():
            cat_dict = {'cat_id': cat_id, 'rpt_date': new_rpt_date}
            for rpt_field, func in cls.rpt_field_dict.iteritems():
                cat_dict[rpt_field] = func(getattr(cat_data, rpt_field,
                                                   0))  # 注意单位转换
            insert_list.append(cat_dict)

        cat_static_coll.remove({
            'rpt_date': new_rpt_date,
            'cat_id': {
                '$in': cat_id_list
            }
        })
        cat_static_coll.insert(insert_list)
        log.info('update cat market, cat_count=%s' % len(insert_list))
        return
Esempio n. 6
0
 def do_my_work(self):
     log.info('worker start, item_id=%s, key=%s' %
              (self.prj_dict['item_id'], self.prj_dict['data_key']))
     kw_list = CacheAdpter.get(self.prj_dict['from_key'],
                               self.prj_dict['from_db'], [])
     if not kw_list:
         log.error('can not get group from memcache and the group is = %s' %
                   self.prj_dict['from_key'])
         kw_list = CacheAdpter.get(self.prj_dict['from_key'],
                                   self.prj_dict['from_db'], [])
     group_dict = {}
     if kw_list:
         if ('click > 0' not in self.filter_conf) and (
                 'click>0' not in self.filter_conf) or kw_list[0][2] > 0:
             cat_id = self.prj_dict['from_key'].split('_')[0]
             try:
                 group_dict = group_kwlist(kw_list, self.item_scorer,
                                           int(cat_id), self.cat_cpc,
                                           self.cats, self.remove_word_list,
                                           self.filter_conf,
                                           self.filter_list,
                                           self.price_list)
             except Exception, e:
                 log.error('group_kwlist error: cat_id=%s, e=%s' %
                           (cat_id, e))
Esempio n. 7
0
 def update_memcache():
     objs = PointlessWord.objects.all()
     level_1_list = [obj.word for obj in objs if obj.level == 1]
     level_2_list = [obj.word for obj in objs if obj.level == 2]
     CacheAdpter.set(CacheKey.KWLIB_POINTLESSWORD,
                     [level_1_list, level_2_list], 'web', 60 * 60 * 24 * 7)
     log.info('update PointlessWord into memcache')
Esempio n. 8
0
    def _run(self):
        TASK_OPERATION_LIST = ['_sync_data', '_check_mnt']
        try:
            index = TASK_OPERATION_LIST.index(self.stop_func)
        except ValueError:
            index = -1

        try:
            for i, operation in enumerate(TASK_OPERATION_LIST):
                if i < index:
                    continue
                else:
                    if not getattr(self, operation)():
                        raise Exception(operation)
                    log.info('shopmng task %s OK, shop_id=%s' %
                             (operation, self.shop_id))

            log.info('[timer][shop_task_result][ok]: shop_id=%s' %
                     self.shop_id)
            return True, None
        except Exception, e:
            log.error(
                '[timer][shop_task_result][failed]: shop_id=%s, while doing %s'
                % (self.shop_id, e))
            return False, str(e)
Esempio n. 9
0
 def is_runnable(self, is_login=False):
     """根据任务状态判断当前是否可以执行"""
     if not self.is_today():
         return True
     else:
         if self.status in [-1, 0, 1]:
             return True
         elif self.status == 2:  # TODO: wangqi 2014-1-10 考虑到有缓存作为判断任务是否在运行,这里允许重复执行。注释的代码有判断运行超时的逻辑
             return True
             # if time_is_recent(self.last_start_time, minutes = 45):
             #     log.info('shopmngtask is running, alread ran %s minutes' % ((datetime.datetime.now() - self.last_start_time).seconds / 60))
             #     return False
             # else:
             #     return True
         elif self.status == 3:
             log.info('shopmngtask ran today already, shop_id=%s' %
                      self.shop_id)
             return False
         elif self.status == 4:
             if self.run_times > 3 and not is_login:
                 log.info(
                     'shopmngtask %s ran %s times today, and not from login'
                     % (self.shop_id, self.run_times))
                 return False
             else:
                 return True
Esempio n. 10
0
def get_task_list_of_run_mnt_routine_task(need_count):
    """获取符合条件的例行优化任务队列"""
    task_id_list = MntTaskMng.get_valid_task(task_type=0,
                                             need_count=need_count)
    log.info('[timer][get_task_list][mnt_routine_task]: count=%s' %
             len(task_id_list))
    return task_id_list
Esempio n. 11
0
    def _check_mnt(self):
        from apps.subway.download import Downloader
        from apps.mnt.models import MntMnger, MntTaskMng
        try:
            priority, adg_tuple_list = MntMnger.check_mnt_camps(
                shop_id=self.shop_id)
            MntTaskMng.check_routine_task(shop_id=self.shop_id)
            if self.priority != priority:
                self.update_task_status(priority=priority)
            log.info('shopmng task checked mnt status, shop_id=%s' %
                     self.shop_id)

            if adg_tuple_list:
                try:
                    dler = Downloader.objects.get(shop_id=self.shop_id)
                    Keyword.download_kwrpt_byadgs(
                        shop_id=self.shop_id,
                        tapi=dler.tapi,
                        token=dler.token,
                        adg_tuple_list=adg_tuple_list)
                except Exception, e:
                    log.error('download kwrpt error, shop_id=%s, e=%s' %
                              (self.shop_id, e))
            log.info('shopmng task downloaded keyword rpt, shop_id=%s' %
                     self.shop_id)
            return True
Esempio n. 12
0
    def summary(self, data):
        cmd_count_dict = {'unchange': 0}
        need_modify_count_dict = {'add': len(data.adgroup.add_kw_list),
                                  'del': 0,
                                  'upd_price': 0,
                                  'upd_match': 0,
                                  'click': 0, 'click_inc': 0, 'click_dec': 0,
                                  }
        for kw in data.kw_list:
            if hasattr(kw, 'cmd') and kw.cmd:
                cmd_count_dict[kw.cmd] = cmd_count_dict.get(kw.cmd, 0) + 1
            else:
                cmd_count_dict['unchange'] += 1
            need_modify_count_dict['click'] += kw.rpt7.click
            if kw.is_delete:
                need_modify_count_dict['del'] += 1
            if kw.new_price:
                need_modify_count_dict['upd_price'] += 1
                need_modify_count_dict['click_inc'] += kw.rpt7.click if kw.new_price > kw.max_price else 0
                need_modify_count_dict['click_dec'] += kw.rpt7.click if kw.new_price < kw.max_price else 0
            if kw.new_match_scope:
                need_modify_count_dict['upd_match'] += 1
        data.cmd_count_dict = cmd_count_dict
        data.modify_kw_count_dict['plan'] = need_modify_count_dict
        log.info('Strategy 2, summary: %s, %s, shop_id=%s, adg_id=%s' %
                 (need_modify_count_dict, cmd_count_dict, data.adgroup.shop_id, data.adgroup.adgroup_id))

        return
Esempio n. 13
0
 def refresh_camp_cost(cls, rpt_date):
     camp_rpts = camprpt_coll.find(
         {
             'date': rpt_date,
             'search_type': -1,
             'source': -1
         }, {
             'cost': 1,
             'campaign_id': 1,
             'shop_id': 1
         })
     update_list = []
     for cr in camp_rpts:
         update_list.append(({
             'rpt_date': rpt_date,
             'shop_id': cr['shop_id'],
             'campaign_id': cr['campaign_id']
         }, {
             '$set': {
                 'cost': cr['cost']
             }
         }))
     update_count = cls.bulk_update_db(update_list)
     log.info('refresh_camp_status ok, rpt_date=%s, update_count=%s' %
              (rpt_date, update_count))
Esempio n. 14
0
    def get_lottery(cls, user, is_backend = False):
        '''用于进入首页时,判断用户是否该抽奖,是否已兑换,是否提示用户领取奖励'''

        result = {'need_lottery': False, 'need_exchange': False, 'lottery_detail': None}
        try:
            today = datetime.datetime.now()
            blacklist = Config.get_value('web.LOTTERY_BACKLIST', default = [])
            if user.nick in blacklist:
                return result
            user_lottery = cls.get_user_lottery(user)
            if cls.date_config_dict['start_time'] < today < cls.date_config_dict['exchange_deadline']:
                need_lottery, lottery_desc = cls._need_lottery(user_lottery, user)
                need_exchange, exchange_desc = cls._need_exchange(user_lottery, user)
                lottery_detail = cls._get_awards_detail(user_lottery, user)
                result.update({'need_lottery': need_lottery, 'need_exchange': need_exchange, 'lottery_detail': lottery_detail})
                if need_lottery and not is_backend:
                    if not user_lottery:
                        UserLottery.objects.create(nick = user.nick, lottery_num = cls.lottery_num)
                    else:
                        user_lottery.last_show_time = datetime.datetime.today()
                        user_lottery.save()
                    LotteryReport.add_impressions(lottery_num = cls.lottery_num)
                log.info('nick=%s, need_lottery=%s,%s; need_exchange=%s,%s' % (user.nick, need_lottery, lottery_desc, need_exchange, exchange_desc))
        except Exception, e:
            log.error('get lottery error, nick=%s, e=%s' % (user.nick, e))
Esempio n. 15
0
    def allot_2_workers(self, sub_prj_list, db_name):
        log.info('start: send msgs to workers')
        # 先将各个任务状态为working
        prj_stat_dict = {}
        data_keys = []
        for prj in sub_prj_list:
            prj['statu'] = 'working'
            prj_stat_dict[prj['data_key'] + '_statu'] = 'working'
            data_keys.append(prj['data_key'])
        CacheAdpter.set_many(prj_stat_dict, db_name, 180)
        CacheAdpter.delete_many(data_keys, db_name)

        # 分发任务
        for prj in sub_prj_list:
            # 派活
            try:
                nt = NewThread(JAPI(host='%s:%s' %
                                    (prj['host'], prj['port'])).worker_work,
                               prj_dict=prj,
                               is_sync=False)
                nt.setDaemon(True)
                nt.start()
            except Exception, e:
                log.error('error=%s,prj=%s' % (e, prj))
                continue
Esempio n. 16
0
 def calc_top_keywords(self):
     log.info("now calc top_keywords, shop_id=%s" % self.shop_id)
     self.status = 1
     if self.sync_report():
         self.get_top_keywords()
     del self.status
     log.info("calc top_keywords OK, shop_id=%s" % self.shop_id)
Esempio n. 17
0
    def start_prj(self, time_out=30):
        '''
        开始工程
        '''
        db_name = self.get_db_name()  # 得到数据仓库
        prj_list = self.get_prjdata_list(db_name)  # 分任务
        worker_list = self.get_workers()  # 得到一批工人
        sub_prj_list = self.allot_2_prjs(prj_list, worker_list)  # 分工
        self.allot_2_workers(sub_prj_list, db_name)
        self.do_self_work()  # 包工头自己做些自己的事情,比如选词包工头可以重写,自己组合关键词
        sum_time = 0
        start_time = datetime.datetime.now()
        time_interval = 0.7
        min_interval = 0.3
        # 轮询任务完成情况
        while not self.is_prj_finished(sub_prj_list):
            time.sleep(time_interval)
            now_time = datetime.datetime.now()
            sum_time += time_interval
            if (now_time - start_time).seconds >= time_out:
                log.info('waiting for worker finishing time out!')
                break
            self.get_prj_statu(sub_prj_list, db_name)
            time_interval = time_interval / 2 if time_interval / 2 > min_interval else min_interval

        prj_result = self.sum_prj_result(sub_prj_list, db_name)  # 汇总结果

        return prj_result
Esempio n. 18
0
 def download_crtrpt_byadgs(
     cls, shop_id, tapi, token, adg_tuple_list
 ):  # adg_tuple_list形如:[(adgroup_id, campaign_id, last_sync_time)]
     """根据adg_tuple_list来下载指定adgroup下对应的创意"""
     try:
         init_start_date = datetime.date.today() - datetime.timedelta(
             days=cls.Report.INIT_DAYS)
         valid_rpt_days = datetime.datetime.now().hour < 6 and 2 or 1
         end_date = datetime.date.today() - datetime.timedelta(
             days=valid_rpt_days)
         for adg in adg_tuple_list:
             last_date = adg[2].date()
             if last_date < init_start_date:
                 last_date = init_start_date
             elif last_date > end_date:
                 last_date = end_date
             if not time_is_someday(last_date):
                 Creative.download_crtrpt_byadg(shop_id=shop_id,
                                                campaign_id=adg[1],
                                                adgroup_id=adg[0],
                                                token=token,
                                                time_scope=(last_date,
                                                            end_date),
                                                tapi=tapi)
         log.info('download creative rpt OK, shop_id=%s' % shop_id)
         return True, ''
     except Exception, e:
         log.error('download creative rpt FAILED, shop_id=%s, e=%s' %
                   (shop_id, e))
         return False, e
Esempio n. 19
0
def sub_port(request):
    top_dict = {}
    top_dict.update(request.REQUEST)

    # URL合法性校验
    if not top_dict:
        return render_to_limited(request, '您的登录方式有误,请重新登陆您的管理后台,然后点击开车精灵的图标')
    check_result = jl_check_sign_with_secret(top_dict, timeout=60 * 6)
    if check_result == 'no_permission':
        return render_to_limited(request, '您没有使用权限,请订购后重新登录')
    elif check_result == 'timeout':
        return HttpResponse('请求超时,请重新进入')

    log.info("LOGIN sub_port, nick=%s, from=%s" %
             (top_dict['visitor_nick'], top_dict['visitor_from']))
    auth_logout(request)

    try:  # 执行登陆
        visit_dict = {
            'nick': top_dict['visitor_nick'],
            'session': top_dict['top_session'],
            'visitor_from': top_dict['visitor_from']
        }
        return for_user_login(request, visit_dict)
    except Exception, e:
        log.exception(
            "sub_port exception, nick=%s, session=%s, visitor_from=%s, error=%s"
            % (top_dict['visitor_nick'], top_dict['top_session'],
               top_dict['visitor_from'], e))
        return render_to_limited(request, '登陆开车精灵发生系统错误')
Esempio n. 20
0
    def run(self):
        try:
            is_login_ok = False
            is_oauth_request, is_auth_ok, oauth_error = self.check_is_oauth_and_result(
            )
            if is_oauth_request:
                if is_auth_ok:
                    self.token = AccessToken.sync_access_token(
                        self.request.GET['code'])
                else:
                    return self.jump_limited_page(oauth_error)
            elif self.check_parms_integrity() \
                    and self.check_timestamp() \
                    and self.check_sign() \
                    and self.get_token():
                is_login_ok = True

            log.info("LOGIN main_port, nick=%s, from=%s" %
                     (self.nick, self.visitor_from))
            if is_auth_ok or is_login_ok:
                domain = NickPort.get_port_domain(nick=self.nick,
                                                  force_create=True)
                if not domain:
                    return self.jump_limited_page('服务器繁忙,请稍候再登录')
                else:
                    return self.redirect_2subport(domain)
            else:
                return self.redirect_2top_authorize()
        except Exception, e:
            log.error('Login error, e=%s, request=%s' %
                      (e, self.request.get_full_path()))
            return self.jump_limited_page('登录失败,请联系客服')
Esempio n. 21
0
def load_atoms(cat_id):
    log.info('Loading atom words in category: {:<10}'.format(cat_id))
    atoms = []
    for atom in ChSegement._get_collection().find({}, {'word': 1}):
        atoms.append(atom['word'])
    log.info('%d atom words loaded.' % len(atoms))
    return atoms
Esempio n. 22
0
def update_prop_status(request, dajax):
    """修改托管状态,包含两个状态,计划是否暂停与托管是否暂停,一改同时改"""
    try:
        campaign_id = request.POST['campaign_id']
        status = bool(int(request.POST['status']))
        shop_id = int(request.user.shop_id)
        mnt_type = int(request.POST.get('mnt_type', 0))
        online_status, mnt_status, opt_desc = status and (
            'online', 1, '开启自动优化') or ('offline', 0, '暂停自动优化')
        opter, opter_name = analysis_web_opter(request)
        result_list, msg_list = update_campaign(shop_id=shop_id,
                                                campaign_id=campaign_id,
                                                online_status=online_status,
                                                opter=opter,
                                                opter_name=opter_name)
        if 'online_status' not in result_list:
            raise Exception('<br/>'.join(msg_list))
        MntCampaign.objects.filter(
            shop_id=shop_id,
            campaign_id=campaign_id).update(set__mnt_status=mnt_status)
        dajax.script('PTQN.mnt.update_camp_back(%s)' % (status and 1 or 0))
    except Exception, e:
        log.info(
            'update mnt campaign prop status error ,e = %s, shop_id = %s' %
            (e, request.user.shop_id))
        dajax.script("PTQN.alert('%s失败,请刷新页面重新操作!');" % (opt_desc))
Esempio n. 23
0
 def get_cat_from_db(cls, cat_id):
     cat_id = int(float(cat_id))
     cat = None
     try:
         cat = cls.objects.get(cat_id=cat_id)
     except Exception, e:
         log.info('can not get cat info from db and the error is = %s' % e)
Esempio n. 24
0
    def struct_download(cls, shop_id, tapi):
        result = False
        try:
            account_dict = {'balance': 100}
            try:
                # tobj_balance = tapi.simba_account_balance_get()
                # if tobj_balance and hasattr(tobj_balance, 'balance'):
                #     account_dict.update({'balance':tobj_balance.balance})
                balance = tapi.get_account_balance()
                account_dict.update({'balance': balance})
            except Exception, e:
                log.error('get balance error, shop_id=%s, error=%s' %
                          (shop_id, e))

            if account_coll.find_one({'_id': shop_id}):
                account_coll.update({'_id': shop_id}, {'$set': account_dict})
            else:
                account_dict.update({
                    '_id': shop_id,
                    'cat_id': 0,
                    'consult_group_id': 0,
                    'consult_id': 0
                })
                account_coll.insert(account_dict)
            log.info('sync account OK, shop_id=%s' % shop_id)
            result = True
Esempio n. 25
0
def update_keyword_tokenizer(key):
    '''
    .关键词分词器修改之后,刷新整个数据
    '''
    word_list = KeywordInfo.r_keyword.lrange(key, 0, -1)
    count = 0
    today = str(datetime.date.today())
    for word in word_list:
        word = word.decode('utf8')
        tmp_list = word.split(':')
        wd, sort_word = tmp_list[0], tmp_list[1]
        tmp_word = RedisKeyManager.get_sort_word(wd)
        if sort_word != tmp_word:
            KeywordInfo.r_keyword.lset(key, count, wd + ':' + tmp_word)
            try:
                KeywordInfo.r_hkeyword.rename(sort_word, tmp_word)
            except Exception, e:  # @UnusedVariable
                log.info('miss key and word is = %s:%s' % (tmp_word, wd))
                KeywordInfo.r_hkeyword.hmset(tmp_word, {
                    'kw': wd,
                    'upt_tm': today,
                    'cat_list': ''
                })
                pass
        count += 1
Esempio n. 26
0
def set_budget(request):
    """设置日限额"""
    budget = int(request.POST['budget'])
    campaign_id = int(request.POST['camp_id'])
    use_smooth = request.POST['use_smooth']
    shop_id = int(request.user.shop_id)
    errMsg = ''
    opter, opter_name = analysis_web_opter(request)
    try:
        result_list, msg_list = update_campaign(shop_id=shop_id,
                                                campaign_id=campaign_id,
                                                budget=budget,
                                                use_smooth=use_smooth,
                                                opter=opter,
                                                opter_name=opter_name)
        if 'budget' in result_list:
            json_result_data = {
                'camp_id': campaign_id,
                'budget': budget,
                'use_smooth': use_smooth
            }
        else:
            json_result_data = {}
            errMsg = '<br/>'.join(msg_list)
    except Exception, e:
        log.info('modify_camp_budget error, shop_id=%s, campaign_id=%s, e=%s' %
                 (shop_id, campaign_id, e))
        errMsg = '修改日限额失败,请联系顾问'
Esempio n. 27
0
    def __init__(self, iterator, start_index, prev_index, group_size, method,
                 manager, query_condition, filter_condition):
        self.iterator = iterator
        self.start_index = start_index
        self.prev_index = prev_index
        self.group_size = group_size
        self.method = method
        self.manager = manager
        self.query_condition = query_condition
        self.filter_condition = filter_condition

        log.info('start get data from database where index = %s-%s' %
                 (str(self.start_index), str(self.group_size)))
        start_time = datetime.datetime.now()
        record_list, next_index = eval(self.iterator).get_record_list(
            self.start_index, self.prev_index, self.group_size, self.manager,
            self.query_condition, self.filter_condition)
        self.next_index = next_index
        task_manager_coll.update({'task_id': self.manager.task_id}, {
            '$set': {
                'prev_index': self.start_index,
                'start_index': self.next_index,
                'query_condition': self.query_condition,
                'filter_condition': self.filter_condition
            }
        })
        log.info(
            'end get data from database where $gt index =%s,next_index = %s,cost time is = %s'
            % (str(self.start_index), str(
                self.next_index), datetime.datetime.now() - start_time))
        self.record_list = record_list
Esempio n. 28
0
    def allot_2_prjs(self, prj_list, worker_list):
        '''
        分任务
        '''
        sub_prj_list = []
        worker_weight_list = []
        all_weight = 0
        for worker in worker_list:
            for ii in range(worker['weight']):
                worker_weight_list.append(worker)
                all_weight += 1

        worker_info_dict = {}

        for prj in prj_list:
            # 根据权重找人
            worker_index = random.randint(0, all_weight - 1)
            worker = worker_weight_list[worker_index]
            prj['host'], prj['port'] = worker['host'], worker['port']
            sub_prj_list.append(prj)

            if prj['host'] in worker_info_dict:
                worker_info_dict[prj['host']] += 1
            else:
                worker_info_dict[prj['host']] = 1

        log.info('allot info = %s' % worker_info_dict)
        return sub_prj_list
Esempio n. 29
0
def calc_match(prj_dict):
    prj_dict = prj_dict
    item_scorer = ItemScorer(prj_dict['label_conf_list'])
    cats = prj_dict['cats']
    filter_conf = prj_dict['filter_conf']
    filter_list = prj_dict['filter_list']
    price_list = prj_dict['price_list']
    remove_word_list = prj_dict['remove_words'] and prj_dict[
        'remove_words'].split(',') or []
    cat_cpc = prj_dict['cat_cpc']

    log.info('worker start, item_id=%s, key=%s' %
             (prj_dict['item_id'], prj_dict['data_key']))
    kw_list = CacheAdpter.get(prj_dict['from_key'], prj_dict['from_db'], [])
    if not kw_list:
        log.error('can not get group from memcache and the group is = %s' %
                  prj_dict['from_key'])
        kw_list = CacheAdpter.get(prj_dict['from_key'], prj_dict['from_db'],
                                  [])
    group_dict = {}
    if kw_list:
        if ('click > 0' not in filter_conf) and (
                'click>0' not in filter_conf) or kw_list[0][2] > 0:
            cat_id = prj_dict['from_key'].split('_')[0]
            try:
                group_dict = group_kwlist(kw_list, item_scorer, int(cat_id),
                                          cat_cpc, cats, remove_word_list,
                                          filter_conf, filter_list, price_list)
            except Exception, e:
                log.error('group_kwlist error: cat_id=%s, e=%s' % (cat_id, e))
Esempio n. 30
0
    def check_and_add_waiting_creative(self):
        for adgroup in self.vaild_adgroup:
            self.bind_waiting_creative(adgroup.adgroup_id)
            creative_list = self.get_creative_list(adgroup.adgroup_id)

            #             if(len(creative_list) <= 3) and self.waiting_creative_list: # 当只有三个以下创意时,直接添加一个
            #                 current_waiting_creative = self.waiting_creative_list.pop()
            #                 self.add_creative(current_waiting_creative)

            for i in range(4 - len(creative_list)):  # 计算有几个坑位,直接填上
                if not self.waiting_creative_list:
                    log.info(
                        '[没有等待创意_setp1]  fun:sync_creative shop_id:%s  adgroup_id:%s'
                        % (self.shop_id, adgroup.adgroup_id))
                    break
                current_waiting_creative = self.waiting_creative_list.pop()
                #                 self.add_creative(current_waiting_creative)
                if self.add_creative(current_waiting_creative):
                    # 删除等待的创意
                    ccrt_coll.remove({
                        'shop_id': self.shop_id,
                        '_id': current_waiting_creative.id
                    })

            for creative in creative_list:
                if not self.waiting_creative_list:
                    log.info(
                        '[没有等待创意_setp2]  fun:sync_creative shop_id:%s  adgroup_id:%s creative_id:%s'
                        % (self.shop_id, adgroup.adgroup_id,
                           creative.creative_id))
                    break

                if self.check_complate(creative):
                    current_waiting_creative = self.waiting_creative_list.pop()
                    self.start_rotate(creative, current_waiting_creative)