def load_data(): """加载数据 return: list, bolls """ l = [] code = MyCode.get() codes = [code] for code in codes: datas = jbs.getData(code) # from local upper, middle, lower, df, adx = datas if len(df) < 100: continue for index in range(100, len(df) - jbs.g_scope_len): l.append(getBolls(index, datas)) return l
def myhclust(datas, indexs): """尝试层次聚类, 因为大样本会造成计算速度过慢, 因此不能2000的切片 datas: list or np.ndarray 数据源 indexs: array 索引, 按顺序的索引列表,不支持打乱 return: df 中心点 """ offset = indexs[0] #l = load_data() l = datas print("num=%d, total_num = %d"%(len(indexs), len(l))) l = l[indexs[0]:indexs[-1]] indexs = range(indexs[-1]-indexs[0]) assert(len(indexs) == len(l)) #原始数据集 code = MyCode.get() tick_period = jbs.g_scope_len df_datas = jbs.getData(code)[3] #写入本地img中 fname = 'img_labels/hclust_imgs' #agl.removeDir(fname) #agl.createDir(fname) #imlist = [] ##for i in range(len(indexs)): #for i in indexs: #fname1 =fname + '/img_%s.png'%(i) ##pl.figure ##draw(g_list[i]) ##pl.savefig(fname1) ##pl.close() #imlist.append(fname1) #df = pd.DataFrame(g_report) # 转换一下数据 features = [np.array([i]) for i in indexs] #features = indexs #把距离值放入一个id为key的字典 #dict_distance = {} #for v in g_report: #ni,nj = v[2:4] #dict_distance[ni,nj] = (v[0]+v[1])/2 #在100样本下, 使用0.2比较合适 tree , distances = hcluster.hcluster(features, l, distfcn=distfn) clusters = tree.extract_clusters(0.2 * tree.distance) center_indexs = [] # 保存中心点 center_indexs_columns = ['datas_index', 'code', 'dt', 'tick_period', 'clust_id', 'label_id', 'label_desc'] print('clusters num=', len(clusters)) def ShowResult(): img_dir = 'html/' agl.removeDir(img_dir) for j,c in enumerate(clusters): elements = c.get_cluster_elements() nbr_elements = len(elements) center_index = calc_center(elements, distances) + offset #添加的字段 dt = df_datas.index[center_index] clust_id = j label_id = 0 label_desc = '' center_indexs.append([center_index, code, dt, tick_period, clust_id, label_id, label_desc]) print(j, center_index, len(elements),elements) #if nbr_elements >= 6: #draw_multi(l, j, offset, elements, center_index) #hcluster.draw_dendrogram(tree,imlist,filename='./sunset.png') ShowResult() df = pd.DataFrame(center_indexs, columns=center_indexs_columns) print(df) return df
def test(): code = '000001' data = getData(code) obj = boll_judge_score(data) obj._show()