예제 #1
0
파일: misc.py 프로젝트: rumpelt/PyMathApp
def calculatebmi(listpr):
    c = Context()
    for p in listpr:
        for timeattr in p.getalltimeattribute():
            if timeattr.getattribute("HEIGHT") is not None and timeattr.getattribute("WEIGHT") is not None:
                bmi = c.divide(timeattr.getattribute("HEIGHT"), c.power(timeattr.getattribute("WEIGHT"), Decimal("2")))
                timeattr.setattribute("BMI", bmi)
예제 #2
0
def solve():
    mod, cnt, num1, num2 = 10 ** 9, 2, 1, 1
    x, y = (
        Decimal(1) / Decimal(5).sqrt(),
        (Decimal(1) + Decimal(5).sqrt()) / Decimal(2)
    )
    _context = Context()
    while True:
        num1, num2 = num2, (num1 + num2) % mod
        _set = set(str(num1))
        if '0' not in _set and len(_set) == 9:
            _tmp = x.log10() + cnt * y.log10()
            _num = int(_context.power(10, _tmp - int(_tmp) + 8))
            _set = set(str(_num))
            if '0' not in _set and len(_set) == 9:
                return cnt
        cnt += 1
예제 #3
0
def tahara_features(classify="dropout", is_classify="dropout", is_80=False):
    u"""田原アルゴリズムに基づいて特徴語を抽出し、ユーザーベクトルを作成する

        Args:
            calassify:適用するユーザーのクラス
            is_classify:どのユーザークラスらしさでベクトルを作成するか
            is_80:上位80語制限
    """

    import pymongo
    from decimal import Decimal,Context
    import math
    from function_common import get_word_point

    context=Context()
    count=0

    if classify == "dropout":
        profiles=load_from_mongo(config.m_dropout.db,config.m_dropout.profile_coll)
    elif classify=="normal":
        profiles=load_from_mongo(config.m_normal.db,config.m_normal.profile_coll)
    elif classify=="dropout_test":
        profiles=load_from_mongo(config.m_dropout_test.db,config.m_dropout_test.profile_coll)
        print len(profiles)
    elif classify=="normal_test":
        profiles=load_from_mongo(config.m_normal_test.db,config.m_normal_test.profile_coll)
        print len(profiles)
    else:
        print "unknown classify ",classify
        return

    if is_classify =="dropout":
            dropouts=load_from_mongo(config.m_dropout.db,config.m_dropout.tahara_coll,True)
            names=[]
            for d in dropouts.sort("rate",pymongo.DESCENDING):
                count+=1
                names.append(d["_id"])
                if count >=1000:
                    break
            if(is_80==True):
                print "name,dropout,{0}".format(
                ",".join([str(n) for n in names if get_word_point(n) > 0]))
            else:
                print "name,dropout,{0}".format(",".join([str(n) for n in names]))

    for profile in profiles:
        if classify == "dropout" or classify == "normal":
            wui=load_from_mongo("db","users_of_tehara",return_cursor=False,criteria={'_id':profile['_id']})
        elif classify == "dropout_test" or classify == "normal_test":
            wui=load_from_mongo("db","users_of_tehara_test",return_cursor=False,criteria={'_id':profile['_id']})
        else:
            print "unknown classify ",classify
            return

        if (wui is None) or (len(wui)==0):
            continue
        words= wui[0][u'words']
        wui_wci=0
        w2ui=0
        w2ci=0

        if is_classify =="dropout":
            dropouts=load_from_mongo(config.m_dropout.db,config.m_dropout.tahara_coll,True)
        else:
            dropouts=load_from_mongo(config.m_normal.db,config.m_normal.tahara_coll,True)

        if classify=="dropout":
            tweets=load_from_mongo(config.m_dropout.db,config.m_dropout.tweets_coll,criteria={'$and':[{"user.screen_name":profile["_id"]},{'text':{'$regex':'^[^RT]'}},{'created_at':{'$gte':profile["date_of_start"],'$lte':profile["date_of_dropout"]}}]})
            if len(tweets)==0:
                tweets=load_from_mongo(config.m_dropout.db,"tweets_of_dropout?",criteria={'$and':[{"user.screen_name":profile["_id"]},{'text':{'$regex':'^[^RT]'}},{'created_at':{'$gte':profile["date_of_start"],'$lte':profile["date_of_dropout"]}}]})
            tmp_sim_dropouts=[]
            tmp_sim_dropouts.append(profile["_id"])
            tmp_sim_dropouts.append(-1)
        elif classify == "normal":
            tweets=load_from_mongo(config.m_normal.db,config.m_normal.tweets_coll,criteria={'$and':[{"user.screen_name":profile["_id"]},{'text':{'$regex':'^[^RT]'}},{'created_at':{'$gte':profile["date_of_start"],'$lte':profile["date_of_dropout"]}}]})
            tmp_sim_dropouts=[]
            tmp_sim_dropouts.append(profile["_id"])
            tmp_sim_dropouts.append(1)
        elif classify == "dropout_test":
            tweets=load_from_mongo(config.m_dropout_test.db,config.m_dropout_test.tweets_coll,criteria={'$and':[{"user.screen_name":profile["_id"]},{'text':{'$regex':'^[^RT]'}},{'created_at':{'$gte':profile["date_of_start"],'$lte':profile["date_of_dropout"]}}]})
            if len(tweets)==0:
                tweets=load_from_mongo(config.m_dropout_test.db,"tweets_of_dropout?",criteria={'$and':[{"user.screen_name":profile["_id"]},{'text':{'$regex':'^[^RT]'}},{'created_at':{'$gte':profile["date_of_start"],'$lte':profile["date_of_dropout"]}}]})
            tmp_sim_dropouts=[]
            tmp_sim_dropouts.append(profile["_id"])
            tmp_sim_dropouts.append(-1)
        elif classify == "normal_test":
            tweets=load_from_mongo(config.m_normal_test.db,config.m_normal_test.tweets_coll,criteria={'$and':[{"user.screen_name":profile["_id"]},{'text':{'$regex':'^[^RT]'}},{'created_at':{'$gte':profile["date_of_start"],'$lte':profile["date_of_dropout"]}}]})
            tmp_sim_dropouts=[]
            tmp_sim_dropouts.append(profile["_id"])
            tmp_sim_dropouts.append(-1)

        # 実際につぶやきがあった日数をカウント
        day_counter=set()
        for tweet in tweets:
            y=extract_time(tweet["created_at"],"Y")
            m=extract_time(tweet["created_at"],"M")
            d=extract_time(tweet["created_at"],"D")
            days=y,":",m,":",d
            day_counter.add(days)
        total_days=len(day_counter)

        if total_days == 0:
            print "Error! ",profile["_id"],"has 0 tweets."
            continue

        tmp_sim_dropouts=[]
        count=0

        for d in dropouts.sort("rate",pymongo.DESCENDING):

            count+=1
            word=[w for w in words if w["name"] == d["_id"]]
            if len(word)==0:
                word=[{"count":0,"name":d["_id"],"days":0}]
            else:
                pass

            cnt=Decimal(word[0]["count"])
            rate=Decimal(d["rate"])
            days=Decimal(word[0]["days"])
            d_point=Decimal(get_word_point(d["_id"]))

            if d_point == 0:
                if is_classify=="dropout":
                    if count > 1000:
                        if is_80==True:
                            break
                        else:
                            pass
                    else:
                        if is_80==True:
                            continue
                        else:
                            pass
            td=Decimal(total_days)

            if is_classify=="dropout":
                tmp_wui_wci=Decimal(days/td)
                wui_wci+=tmp_wui_wci
                tmp_w2ui=context.power(cnt,2)
                w2ui+=tmp_w2ui
                tmp_w2ci=context.power(rate,2)
                w2ci+=tmp_w2ci

            else:
                tmp_wui_wci=Decimal(cnt*rate)
                wui_wci+=(cnt*rate)
                tmp_w2ui=Decimal(math.pow((Decimal(cnt)),2))
                w2ui+=math.pow((cnt),2)
                tmp_w2ci=Decimal(math.pow(Decimal(rate),2))
                w2ci+=math.pow(rate,2)

            if count >= 1000:
                break

            if tmp_wui_wci == 0 or tmp_w2ci == 0 or tmp_w2ui == 0 or cnt ==0:
                tmp_sim_dropouts.append(0)
            else:
                tmp_sim_dropout=tmp_wui_wci
                tmp_sim_dropouts.append(float(tmp_sim_dropout))

        print profile["_id"],",True,{0}".format(
            ",".join([str(t) for t in tmp_sim_dropouts])
        )