def center(category): database = DataGetter.get__db() appIds = database.get_app_ids() c = center.Center() for appId in appIds: c.set_property(category, appId) data = c.calculate() c.record_data(data)
def cluster_data(category, Minpts=2, e=0.6): # 获取应用id database = DataGetter.get__db() appIds = database.get_app_ids() d = dbscan.DBSCAN() for appId in appIds: # dbscan算法聚簇 d.set_property(category, appId) clusters = d.dbscan(Minpts, e) # 持久化数据 d.record_cluster(clusters)
def classify_data(lower, quantity): database = DataGetter.get__db() # 从原始数据中读取指定数量数据 data = database.load_specified_quantity_data(lower, quantity) # 初始化预处理器和分类器 p = preprocess.Preprocess() b = bayes.Bayes() for i in range(len(data)): # 将数据进行预处理 p.set_sentence(data[i][1]) sentence_processed = p.preprocess() if sentence_processed == '': category = con.USELESS else: # 朴素贝叶斯算法分类器对文本进行分类 # print sentence_processed, data[i][2] b.set_property(sentence_processed, data[i][2]) category = b.bayes_classifier_improved2() data[i] = [data[i][0], sentence_processed, data[i][2], category] # print data[i] # 持久化 database.save_data(data)
def __init__(self): self.__clusterHelper = DataGetter.get__clusterHelper() self.__db = DataGetter.get__db()
def __init__(self): self.__database = DataGetter.get__db()
def __init__(self, category, appId): self.__category = category self.__appId = appId self.__database = DataGetter.get__db() self.__clusterHelper = DataGetter.get__clusterHelper() self.__isAborted = False