def calculatebmi(listpr): c = Context() for p in listpr: for timeattr in p.getalltimeattribute(): if timeattr.getattribute("HEIGHT") is not None and timeattr.getattribute("WEIGHT") is not None: bmi = c.divide(timeattr.getattribute("HEIGHT"), c.power(timeattr.getattribute("WEIGHT"), Decimal("2"))) timeattr.setattribute("BMI", bmi)
def solve(): mod, cnt, num1, num2 = 10 ** 9, 2, 1, 1 x, y = ( Decimal(1) / Decimal(5).sqrt(), (Decimal(1) + Decimal(5).sqrt()) / Decimal(2) ) _context = Context() while True: num1, num2 = num2, (num1 + num2) % mod _set = set(str(num1)) if '0' not in _set and len(_set) == 9: _tmp = x.log10() + cnt * y.log10() _num = int(_context.power(10, _tmp - int(_tmp) + 8)) _set = set(str(_num)) if '0' not in _set and len(_set) == 9: return cnt cnt += 1
def tahara_features(classify="dropout", is_classify="dropout", is_80=False): u"""田原アルゴリズムに基づいて特徴語を抽出し、ユーザーベクトルを作成する Args: calassify:適用するユーザーのクラス is_classify:どのユーザークラスらしさでベクトルを作成するか is_80:上位80語制限 """ import pymongo from decimal import Decimal,Context import math from function_common import get_word_point context=Context() count=0 if classify == "dropout": profiles=load_from_mongo(config.m_dropout.db,config.m_dropout.profile_coll) elif classify=="normal": profiles=load_from_mongo(config.m_normal.db,config.m_normal.profile_coll) elif classify=="dropout_test": profiles=load_from_mongo(config.m_dropout_test.db,config.m_dropout_test.profile_coll) print len(profiles) elif classify=="normal_test": profiles=load_from_mongo(config.m_normal_test.db,config.m_normal_test.profile_coll) print len(profiles) else: print "unknown classify ",classify return if is_classify =="dropout": dropouts=load_from_mongo(config.m_dropout.db,config.m_dropout.tahara_coll,True) names=[] for d in dropouts.sort("rate",pymongo.DESCENDING): count+=1 names.append(d["_id"]) if count >=1000: break if(is_80==True): print "name,dropout,{0}".format( ",".join([str(n) for n in names if get_word_point(n) > 0])) else: print "name,dropout,{0}".format(",".join([str(n) for n in names])) for profile in profiles: if classify == "dropout" or classify == "normal": wui=load_from_mongo("db","users_of_tehara",return_cursor=False,criteria={'_id':profile['_id']}) elif classify == "dropout_test" or classify == "normal_test": wui=load_from_mongo("db","users_of_tehara_test",return_cursor=False,criteria={'_id':profile['_id']}) else: print "unknown classify ",classify return if (wui is None) or (len(wui)==0): continue words= wui[0][u'words'] wui_wci=0 w2ui=0 w2ci=0 if is_classify =="dropout": dropouts=load_from_mongo(config.m_dropout.db,config.m_dropout.tahara_coll,True) else: dropouts=load_from_mongo(config.m_normal.db,config.m_normal.tahara_coll,True) if classify=="dropout": tweets=load_from_mongo(config.m_dropout.db,config.m_dropout.tweets_coll,criteria={'$and':[{"user.screen_name":profile["_id"]},{'text':{'$regex':'^[^RT]'}},{'created_at':{'$gte':profile["date_of_start"],'$lte':profile["date_of_dropout"]}}]}) if len(tweets)==0: tweets=load_from_mongo(config.m_dropout.db,"tweets_of_dropout?",criteria={'$and':[{"user.screen_name":profile["_id"]},{'text':{'$regex':'^[^RT]'}},{'created_at':{'$gte':profile["date_of_start"],'$lte':profile["date_of_dropout"]}}]}) tmp_sim_dropouts=[] tmp_sim_dropouts.append(profile["_id"]) tmp_sim_dropouts.append(-1) elif classify == "normal": tweets=load_from_mongo(config.m_normal.db,config.m_normal.tweets_coll,criteria={'$and':[{"user.screen_name":profile["_id"]},{'text':{'$regex':'^[^RT]'}},{'created_at':{'$gte':profile["date_of_start"],'$lte':profile["date_of_dropout"]}}]}) tmp_sim_dropouts=[] tmp_sim_dropouts.append(profile["_id"]) tmp_sim_dropouts.append(1) elif classify == "dropout_test": tweets=load_from_mongo(config.m_dropout_test.db,config.m_dropout_test.tweets_coll,criteria={'$and':[{"user.screen_name":profile["_id"]},{'text':{'$regex':'^[^RT]'}},{'created_at':{'$gte':profile["date_of_start"],'$lte':profile["date_of_dropout"]}}]}) if len(tweets)==0: tweets=load_from_mongo(config.m_dropout_test.db,"tweets_of_dropout?",criteria={'$and':[{"user.screen_name":profile["_id"]},{'text':{'$regex':'^[^RT]'}},{'created_at':{'$gte':profile["date_of_start"],'$lte':profile["date_of_dropout"]}}]}) tmp_sim_dropouts=[] tmp_sim_dropouts.append(profile["_id"]) tmp_sim_dropouts.append(-1) elif classify == "normal_test": tweets=load_from_mongo(config.m_normal_test.db,config.m_normal_test.tweets_coll,criteria={'$and':[{"user.screen_name":profile["_id"]},{'text':{'$regex':'^[^RT]'}},{'created_at':{'$gte':profile["date_of_start"],'$lte':profile["date_of_dropout"]}}]}) tmp_sim_dropouts=[] tmp_sim_dropouts.append(profile["_id"]) tmp_sim_dropouts.append(-1) # 実際につぶやきがあった日数をカウント day_counter=set() for tweet in tweets: y=extract_time(tweet["created_at"],"Y") m=extract_time(tweet["created_at"],"M") d=extract_time(tweet["created_at"],"D") days=y,":",m,":",d day_counter.add(days) total_days=len(day_counter) if total_days == 0: print "Error! ",profile["_id"],"has 0 tweets." continue tmp_sim_dropouts=[] count=0 for d in dropouts.sort("rate",pymongo.DESCENDING): count+=1 word=[w for w in words if w["name"] == d["_id"]] if len(word)==0: word=[{"count":0,"name":d["_id"],"days":0}] else: pass cnt=Decimal(word[0]["count"]) rate=Decimal(d["rate"]) days=Decimal(word[0]["days"]) d_point=Decimal(get_word_point(d["_id"])) if d_point == 0: if is_classify=="dropout": if count > 1000: if is_80==True: break else: pass else: if is_80==True: continue else: pass td=Decimal(total_days) if is_classify=="dropout": tmp_wui_wci=Decimal(days/td) wui_wci+=tmp_wui_wci tmp_w2ui=context.power(cnt,2) w2ui+=tmp_w2ui tmp_w2ci=context.power(rate,2) w2ci+=tmp_w2ci else: tmp_wui_wci=Decimal(cnt*rate) wui_wci+=(cnt*rate) tmp_w2ui=Decimal(math.pow((Decimal(cnt)),2)) w2ui+=math.pow((cnt),2) tmp_w2ci=Decimal(math.pow(Decimal(rate),2)) w2ci+=math.pow(rate,2) if count >= 1000: break if tmp_wui_wci == 0 or tmp_w2ci == 0 or tmp_w2ui == 0 or cnt ==0: tmp_sim_dropouts.append(0) else: tmp_sim_dropout=tmp_wui_wci tmp_sim_dropouts.append(float(tmp_sim_dropout)) print profile["_id"],",True,{0}".format( ",".join([str(t) for t in tmp_sim_dropouts]) )