def _test(): from testdata import pseudo_shingakunet model = transition_model(data = pseudo_shingakunet(), goal = 'https://shingakunet.com/net2/shiryoSeikyu/entry/complete') return model
elif netloc_ratio >= upper_criteria_url and path_ratio >= upper_criteria_url: #クエリが長過ぎて一致条件に当てはまらないものを救う if query_ratio >= lower_criteria_url * 0.50: states[state].append(url) adopted_flag = True accumulated += uu break #以前に採用したstateとマージされなかった場合は新規stateとして採用する if not adopted_flag: states.setdefault(url, []) states[url].append(url) accumulated += uu #条件を満たしたらマージをやめる if len(states) >= max_state + accumulated_states or accumulated >= minimum_volume * total_uu: print 'states:', len(states) - accumulated_states, 'volume:', float(accumulated) / float(total_uu) break for state, url in states.items(): states[state] = list(set(states[state])) #uu_volumeが十分にとれなかった場合には何らかの方法で救済する? return states if __name__ == '__main__': from get_model import make_uu_base_data from testdata import pseudo_shingakunet states = summarize(make_uu_base_data(pseudo_shingakunet()), 'https://shingakunet.com/net2/shiryoSeikyu/entry/complete') for state, sub_states in states.items(): print 'state:', state, 'sub-state:', sub_states