Example #1
0
def reset_items(site_id):
    mongo_client = getMongoClient()
    if mongo_client.siteExists(site_id, use_cache=False):
        mongo_client.cleanupItems(site_id)
        reset_item_index(site_id)
    else:
        raise SiteNotExistsError()
def run(site_id):
    #print "This script ignore descript field currently!"
    answer = raw_input(
        "Do you really want to reindex items of site: %s (enter 'yes' to continue)"
        % site_id)
    if answer == "yes":
        reset_item_index(site_id)
        mongo_client = getMongoClient()
        c_items = mongo_client.getSiteDBCollection(site_id, "items")
        total = c_items.count()
        cnt = 0
        for item in c_items.find():
            del item["_id"]
            #if item.has_key("description"):
            #    del item["description"]
            #item["categories"] = []
            es_client.es_index_item(site_id, item)
            cnt += 1
            if (cnt % 50) == 0:
                print "%s/%s" % (cnt, total)

        # also fill whitelisted keywords
        for record in keyword_list.fetchSuggestKeywordList(site_id):
            if record["type"] == keyword_list.WHITE_LIST:
                keyword_list.markKeywordsAsWhiteListed(site_id,
                                                       [record["keyword"]])

    else:
        print "Exit without action."
        sys.exit(0)
Example #3
0
def reset_items(site_id):
    mongo_client = getMongoClient()
    if mongo_client.siteExists(site_id, use_cache=False):
        mongo_client.cleanupItems(site_id)
        reset_item_index(site_id)
    else:
        raise SiteNotExistsError()
Example #4
0
 def setUp(self):
     self.mongo_client = getMongoClient()
     self.es = es_search_functions.getESClient()
     site_record = self.initSite(self.TEST_SITE_ID)
     self.api_key = site_record["api_key"]
     self.site_token = site_record["site_token"]
     self.maxDiff = None
     self.clearCaches()
Example #5
0
 def setUp(self):
     self.mongo_client = getMongoClient()
     self.es = es_search_functions.getESClient()
     site_record = self.initSite(self.TEST_SITE_ID)
     self.api_key = site_record["api_key"]
     self.site_token = site_record["site_token"]
     self.maxDiff = None
     self.clearCaches()
Example #6
0
def update_keyword_hot_view_list(site_id):
    mongo_client = getMongoClient()
    results = mongo_client.calculateKeywordHotViewList(site_id)
    for category_id, topn in results.items():
        if len(topn) > 0:
            cached_result.set("AutoKeywordHotView", site_id, (category_id, ), topn)
            # purge the KeywordHotView
            cached_result.delete("KeywordHotView", site_id, (category_id, ))
Example #7
0
def process_item_update_queue(item_update_queue):
    mongo_client = getMongoClient()
    for site_id, item in item_update_queue:
        for category in item["categories"]:
            mongo_client.updateProperty(site_id, category)
        if item.get("brand", None):
            mongo_client.updateProperty(site_id, item["brand"])
        item = mongo_client.updateItem(site_id, item)
        es_client.es_index_item(site_id, item)
def run(from_site_id, from_datetime, to_datetime, to_site_id, to_site_from_datetime):
    from_datetime = as_datetime(from_datetime)
    to_datetime = as_datetime(to_datetime)
    to_site_from_datetime = as_datetime(to_site_from_datetime)
    time_delta = to_site_from_datetime - from_datetime
    print "TIME DELTA:", time_delta
    mongo_client = getMongoClient()
    from_c_raw_logs = mongo_client.getSiteDBCollection(from_site_id, "raw_logs")
    to_c_raw_logs = mongo_client.getSiteDBCollection(to_site_id, "raw_logs")
    result_set = from_c_raw_logs.find({"created_on": {"$gte": from_datetime, "$lte": to_datetime}})
    print "map date range: %s, %s to %s, %s" % (from_datetime, to_datetime, to_site_from_datetime, to_site_from_datetime + (to_datetime - from_datetime))
    print from_c_raw_logs, to_c_raw_logs
    print "Total logs:", result_set.count()
    answer = raw_input("Do you want to load raw_logs from %s to %s ?(enter 'yes' to continue)" % (from_site_id, to_site_id))
    if answer == "yes":
        client = Client()
        for raw_log in result_set:
            del raw_log["_id"]
            raw_log["created_on"] = raw_log["created_on"] + time_delta
            #to_c_raw_logs.insert(raw_log)
            post_data = {"api_key": "5a552549"}
            if raw_log["behavior"] in ("V", "AF", "RF", "UNLIKE", "RI", "ASC", "RSC"):
                post_data["item_id"] = raw_log["item_id"]
                post_data["user_id"] = raw_log["user_id"]
            if raw_log["behavior"] == "PLO":
                post_data["user_id"] = raw_log["user_id"]
                post_data["order_id"] = raw_log.get("order_id", None)
                post_data["order_content"] = "|".join(["%(item_id)s,%(price)s,%(amount)s" % order_item for order_item in raw_log["order_content"]])
            if raw_log["behavior"] in ("RI",):
                post_data["score"] = raw_log["score"] 
            BH2EventType = {
                "V": "ViewItem",
                "AF": "AddFavorite",
                "RF": "RemoveFavorite",
                "UNLIKE": "Unlike",
                "RI": "RateItem",
                "ASC": "AddOrderItem",
                "RSC": "RemoveOrderItem",
                "PLO": "PlaceOrder"
            }
            post_data["event_type"] = BH2EventType[raw_log["behavior"]]
            client.cookies["__ptmid"] = raw_log["tjbid"]
            before_count = to_c_raw_logs.count()
            response = client.get("/api/v1.6/public/events/", post_data)
            if response.status_code != 200 or response.data["code"] != 0:
                print response, response.data
            else:
                after_count = to_c_raw_logs.count()
                while after_count <= before_count:
                    print "waiting raw_log being inserted. %s,%s" % (before_count, after_count)
                    time.sleep(0.1)
                    after_count = to_c_raw_logs.count()
                last_raw_log = [rl for rl in to_c_raw_logs.find().sort([("$natural", -1)]).limit(1)][0]
                last_raw_log["created_on"] = raw_log["created_on"]
                to_c_raw_logs.save(last_raw_log)
    else:
        print "Exit without action."
Example #9
0
def run(site_id, site_name, api_prefix):
    answer = raw_input("Do you want to create the site: '%s' with site_name '%s' and api_prefix '%s' ?(enter 'yes' to continue)" % (site_id, site_name, api_prefix))
    if answer == "yes":
        mongo_client = getMongoClient()
        site_record = create_site(mongo_client, site_id, site_name, 3600 * 24, api_prefix=api_prefix)
        print "Site %s created. " % site_id
        print "api_key=%s" % site_record["api_key"]
        print "api_token=%s" % site_record["site_token"]
    else:
        print "Exit without action."
def run(site_id, site_name, api_prefix):
    answer = raw_input(
        "Do you want to create the site: '%s' with site_name '%s' and api_prefix '%s' ?(enter 'yes' to continue)"
        % (site_id, site_name, api_prefix))
    if answer == "yes":
        mongo_client = getMongoClient()
        site_record = create_site(mongo_client,
                                  site_id,
                                  site_name,
                                  3600 * 24,
                                  api_prefix=api_prefix)
        print "Site %s created. " % site_id
        print "api_key=%s" % site_record["api_key"]
        print "api_token=%s" % site_record["site_token"]
    else:
        print "Exit without action."
    def authenticate(self, request):
        try:
            mongo_client = getMongoClient()
            authorization_line = request.META.get('HTTP_AUTHORIZATION')
            if authorization_line:
                splitted_line = authorization_line.split()
                if not (len(splitted_line) == 2 and splitted_line[0] == "Token"):
                    return None
            else:
                return None

            token = splitted_line[1]
            site = mongo_client.getSiteFromToken(site_token=token)
            if site is None:
                raise exceptions.AuthenticationFailed('No such user')
            
            return (site, None)
        except exceptions.AuthenticationFailed:
            raise
        except:
            import logging
            logging.critical("PocoTokenAuthentication unexpect error", exc_info=True)
            raise
Example #12
0
from elasticutils import S, F

from common.mongo_client import getMongoClient
from common.mongo_client import SimpleRecommendationResultFilter
from common.mongo_client import SameGroupRecommendationResultFilter

from tasks import process_item_update_queue
from tasks import write_log


#logging.basicConfig(format="%(asctime)s|%(levelname)s|%(name)s|%(message)s",
#                    level=logging.WARNING,
#                    datefmt="%Y-%m-%d %I:%M:%S")


mongo_client = getMongoClient()

mongo_client.reloadApiKey2SiteID()

class HotViewListCache:
    EXPIRY_TIME = 3600

    def __init__(self, mongo_client):
        self.mongo_client = mongo_client

    def getHotViewList(self, site_id, hot_index_type, category_id=None, brand=None):
        cache_key = "hot-view-list-%s-%s-%s-%s" % (site_id, hot_index_type, category_id, brand)
        django_cache = get_cache("default")
        cache_entry = django_cache.get(cache_key)
        if cache_entry:
            return cache_entry
Example #13
0
def run(from_site_id, from_datetime, to_datetime, to_site_id,
        to_site_from_datetime):
    from_datetime = as_datetime(from_datetime)
    to_datetime = as_datetime(to_datetime)
    to_site_from_datetime = as_datetime(to_site_from_datetime)
    time_delta = to_site_from_datetime - from_datetime
    print "TIME DELTA:", time_delta
    mongo_client = getMongoClient()
    from_c_raw_logs = mongo_client.getSiteDBCollection(from_site_id,
                                                       "raw_logs")
    to_c_raw_logs = mongo_client.getSiteDBCollection(to_site_id, "raw_logs")
    result_set = from_c_raw_logs.find(
        {"created_on": {
            "$gte": from_datetime,
            "$lte": to_datetime
        }})
    print "map date range: %s, %s to %s, %s" % (from_datetime, to_datetime,
                                                to_site_from_datetime,
                                                to_site_from_datetime +
                                                (to_datetime - from_datetime))
    print from_c_raw_logs, to_c_raw_logs
    print "Total logs:", result_set.count()
    answer = raw_input(
        "Do you want to load raw_logs from %s to %s ?(enter 'yes' to continue)"
        % (from_site_id, to_site_id))
    if answer == "yes":
        client = Client()
        for raw_log in result_set:
            del raw_log["_id"]
            raw_log["created_on"] = raw_log["created_on"] + time_delta
            #to_c_raw_logs.insert(raw_log)
            post_data = {"api_key": "5a552549"}
            if raw_log["behavior"] in ("V", "AF", "RF", "UNLIKE", "RI", "ASC",
                                       "RSC"):
                post_data["item_id"] = raw_log["item_id"]
                post_data["user_id"] = raw_log["user_id"]
            if raw_log["behavior"] == "PLO":
                post_data["user_id"] = raw_log["user_id"]
                post_data["order_id"] = raw_log.get("order_id", None)
                post_data["order_content"] = "|".join([
                    "%(item_id)s,%(price)s,%(amount)s" % order_item
                    for order_item in raw_log["order_content"]
                ])
            if raw_log["behavior"] in ("RI", ):
                post_data["score"] = raw_log["score"]
            BH2EventType = {
                "V": "ViewItem",
                "AF": "AddFavorite",
                "RF": "RemoveFavorite",
                "UNLIKE": "Unlike",
                "RI": "RateItem",
                "ASC": "AddOrderItem",
                "RSC": "RemoveOrderItem",
                "PLO": "PlaceOrder"
            }
            post_data["event_type"] = BH2EventType[raw_log["behavior"]]
            client.cookies["__ptmid"] = raw_log["tjbid"]
            before_count = to_c_raw_logs.count()
            response = client.get("/api/v1.6/public/events/", post_data)
            if response.status_code != 200 or response.data["code"] != 0:
                print response, response.data
            else:
                after_count = to_c_raw_logs.count()
                while after_count <= before_count:
                    print "waiting raw_log being inserted. %s,%s" % (
                        before_count, after_count)
                    time.sleep(0.1)
                    after_count = to_c_raw_logs.count()
                last_raw_log = [
                    rl for rl in to_c_raw_logs.find().sort([("$natural",
                                                             -1)]).limit(1)
                ][0]
                last_raw_log["created_on"] = raw_log["created_on"]
                to_c_raw_logs.save(last_raw_log)
    else:
        print "Exit without action."
Example #14
0
def update_hotview_list(site_id):
    mongo_client = getMongoClient()
    for hot_index_type, prefix in mongo_client.HOT_INDEX_TYPE2INDEX_PREFIX.items():
        mongo_client.updateHotViewList(site_id, hot_index_type)
                to_be_in_unidentified_keywords, increase_count=True)


    def _indexKeywordsForCompletion(self, site_id, keywords):
        res = self.es.indices.analyze(index=es_search_functions.getESItemIndexName(site_id), 
                                text=" ".join(keywords),
                                analyzer="mycn_analyzer_whitespace_pinyin_first_n_full")
        for token_idx in range(len(res["tokens"])):
            token = res["tokens"][token_idx]
            raw_keyword = keywords[token_idx]
            splitted_token = token["token"].split("||")
            first_letters = splitted_token[0]
            full_pinyin = "".join(splitted_token[1:])
            result = {"keyword_completion": {"input": [raw_keyword, full_pinyin, first_letters], 
                                             "output": raw_keyword}}
            self.es.index(index=es_search_functions.getESItemIndexName(site_id), 
                          doc_type='keyword', body=result)

    def markKeywordsAsWhiteListed(self, site_id, keywords):
        #from recommender import es_client
        # also need to search and reindex the white listed keywords. use update api.
        # also update the keyword completion
        self.updateSuggestKeywordList(site_id, self.WHITE_LIST, keywords)
        self._indexKeywordsForCompletion(site_id, keywords)

    def markKeywordsAsBlackListed(self, site_id, keywords):
        self.updateSuggestKeywordList(site_id, self.BLACK_LIST, keywords)
        # TODO remove keywords for completion

keyword_list = KeywordList(es_search_functions.getESClient(), getMongoClient())
def rebuild_suggestion_cache(site_id):
    mongo_client = getMongoClient()
    builder = SuggestionCacheBuilder(site_id, mongo_client)
    builder.rebuild()
Example #17
0
# http://www.aspheute.com/english/20040105.asp
def createRandomPassword(length):
    allowedChars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ23456789"
    password = ""
    for i in range(length):
        password += allowedChars[random.randint(0, 256) % len(allowedChars)]
    return password


def createHashedPassword(password):
    salt = createRandomPassword(16)
    hashed_password = hashlib.sha256(password + salt).hexdigest()
    return hashed_password, salt


mongo_client = getMongoClient()

c_users = mongo_client.getTjbDb()["users"]

random.seed(open("/dev/random", "rb").read(10))


def _inputSites():
    sites_str = raw_input("sites(comma separated):").strip()
    if sites_str == "":
        return []
    else:
        return sites_str.split(",")


def cmd_createNewUser():
Example #18
0
    def _getFullCacheKey(self, cache_type, site_id, cache_key_tuple):
        return "results-cache-%s-%s-%s" % (cache_type, site_id, "|".join(cache_key_tuple))

    def _setDjangoCache(self, cache_key, result):
        cache = get_cache("default")
        cache.set(cache_key, result, self.EXPIRY_TIME)

    def set(self, cache_type, site_id, cache_key_tuple, result):
        full_cache_key = self._getFullCacheKey(cache_type, site_id, cache_key_tuple)
        self.mongo_client.updateCachedResults(site_id, full_cache_key, result)
        self._setDjangoCache(full_cache_key, result)

    def get(self, cache_type, site_id, cache_key_tuple):
        cache = get_cache("default")
        full_cache_key = self._getFullCacheKey(cache_type, site_id, cache_key_tuple)
        cached_result = cache.get(full_cache_key)
        if cached_result is None:
            cached_result = self.mongo_client.getFromCachedResults(site_id, full_cache_key)
            if cached_result is not None:
                self._setDjangoCache(full_cache_key, cached_result)
        return cached_result

    def delete(self, cache_type, site_id, cache_key_tuple):
        cache = get_cache("default")
        full_cache_key = self._getFullCacheKey(cache_type, site_id, cache_key_tuple)
        self.mongo_client.deleteCachedResults(site_id, full_cache_key)
        cache.delete(full_cache_key)

cached_result = CachedResult(getMongoClient())