"「知恵袋」あり": defaultdict(lambda: 0), "「知恵袋」なし": defaultdict(lambda: 0) } img_rexp = re.compile(r'.*%s.*' % "画像") video_rexp = re.compile(r'.*%s.*' % "動画") map_rexp = re.compile(r'.*%s.*' % "地図") chie_rexp = re.compile(r'.*%s.*' % "知恵袋") news_rexp = re.compile(r'.*%s.*' % "ニュース") recipe_rexp = [re.compile(r'.*%s.*' % "クックパッド"), re.compile(r'.*%s.*' % "レシピ")] for fileNum in xrange(2,9): # for line in open('../data/2015110'+str(fileNum)+'_all', 'r'): for line in open('../data/small_data.txt', 'r'): featureArray = line.split("\t") # if featureArray[11] == "": # continue if cm.typical_vertical(featureArray[10]) == "image": if img_rexp.match(featureArray[7]): result["「画像」あり"][featureArray[10]] += 1 result["「画像」あり"]["all"] += 1 else: result["「画像」なし"][featureArray[10]] += 1 result["「画像」なし"]["all"] += 1 elif cm.typical_vertical(featureArray[10]) == "recipe": if recipe_rexp[0].match(featureArray[7]) or recipe_rexp[1].match(featureArray[7]): result["「レシピ」あり"][featureArray[10]] += 1 result["「レシピ」あり"]["all"] += 1 else: result["「レシピ」なし"][featureArray[10]] += 1 result["「レシピ」なし"]["all"] += 1 elif cm.typical_vertical(featureArray[10]) == "video": if video_rexp[0].match(featureArray[7]):
}, } outputs = { "device": {}, "gender": {}, "generation": {} } other_verical_array = [] for fileNum in xrange(2,9): for line in open('../data/2015110'+str(fileNum)+'_all', 'r'): # for line in open('../data/small_data.txt', 'r'): featureArray = line.split("\t") vertical = cm.typical_vertical(featureArray[10]) if vertical != "video": continue device = featureArray[8] if vertical == "other" and featureArray[10] not in other_verical_array: other_verical_array.append(featureArray[10]) birthYear = featureArray[2] gender = featureArray[1] # vertical = "None" allDict["all"]["all"] += 1 if birthYear != "" and gender != "" and device in ["smartphone", "tablet", "pc", "tv", "featurephone"]: generation = cm.generation_distinction(birthYear) generationDict[generation]["all"] += 1 generationDict[generation][vertical] += 1 genderDict[gender]["all"] += 1
if __name__ == "__main__": result = { "「画像」あり": defaultdict(lambda: 0), "「画像」なし": defaultdict(lambda: 0), "「レシピ」あり": defaultdict(lambda: 0), "「レシピ」なし": defaultdict(lambda: 0) } img_rexp = re.compile(r'.*%s.*' % "画像") recipe_rexp = [re.compile(r'.*%s.*' % "クックパッド"), re.compile(r'.*%s.*' % "レシピ")] for fileNum in xrange(2,9): # for line in open('../data/2015110'+str(fileNum)+'_all', 'r'): for line in open('../data/small_data.txt', 'r'): featureArray = line.split("\t") # if featureArray[11] == "": # continue if cm.typical_vertical(featureArray[10]) == "image": if img_rexp.match(featureArray[7]): result["「画像」あり"][featureArray[10]] += 1 result["「画像」あり"]["all"] += 1 else: result["「画像」なし"][featureArray[10]] += 1 result["「画像」なし"]["all"] += 1 elif cm.typical_vertical(featureArray[10]) == "recipe": if recipe_rexp[0].match(featureArray[7]) or recipe_rexp[1].match(featureArray[7]): result["「レシピ」あり"][featureArray[10]] += 1 result["「レシピ」あり"]["all"] += 1 else: result["「レシピ」なし"][featureArray[10]] += 1 result["「レシピ」なし"]["all"] += 1 print_dict_content(result)