def bi_pruning(q_id, q, k, index, cache): IO = 0 partitions = list(common.partitions(q, index.space, 6)) region_list = list() points = index.points vd = index.voronoi_diagram for partition in partitions: h = MinHeap() h.push((0, q_id, q)) visited = {q_id} knn = list() while len(h) > 0 and len(knn) < k: dist, o, p = h.pop() if o != q_id and partition.intersects(p): knn.append((o, p, dist)) for neighbor in vd.neighbors(o): if neighbor not in visited: visited.add(neighbor) if neighbor in cache: neighbor_p = cache[neighbor] else: neighbor_p = points[neighbor] cache[neighbor] = neighbor_p IO += 1 if partition.intersects(vd.cell(neighbor)): h.push((neighbor_p.distance(q), neighbor, neighbor_p)) if len(knn) > 0: r = knn[-1][2] if r > 0: region_list.append( common.sector(q, r, partition.angles).buffer(0.01)) region = reduce(lambda x, y: x.union(y), region_list) return region, IO
def pruning(q_id, q, k, index, partition_num): partitions = common.partitions(q, index.space, partition_num) sigLists = [[] for i in range(partition_num)] upper_arc_radius_heaps = [common.MaxHeap() for i in range(partition_num)] shaded_areas = [calculate_shaded_area(partition, partition.r) for partition in partitions] h = common.MinHeap() IO = 0 h.push((0, index.root)) while len(h) > 0: e_dist, e = h.pop() if may_contains_significant_facility(e, shaded_areas): if e.is_data_node: pruneSpace(q_id, e, k, partitions, sigLists, upper_arc_radius_heaps, shaded_areas) else: for child in e.children: h.push((child.geom.distance(q), child)) IO += 1 unpruned_area_list = list() for i in range(partition_num): r_b = min(upper_arc_radius_heaps[i].first(), partitions[i].r) angles = [2 * pi / partition_num * i, 2 * pi / partition_num * (i + 1)] if r_b > 0: unpruned_area_list.append(common.sector(q, r_b, angles).buffer(0.01)) unpruned_area = reduce(lambda x, y: x.union(y), unpruned_area_list) return sigLists, unpruned_area, IO
def calculate_shaded_area(partition, bounding_arc_radius): if bounding_arc_radius == 0: return partition.origin if bounding_arc_radius == float('inf'): bounding_arc_radius = partition.r sector = common.sector(partition.o, bounding_arc_radius * 2, partition.angles) m, n = get_M_N(partition, bounding_arc_radius) circle_m = common.circle(m, bounding_arc_radius) circle_n = common.circle(n, bounding_arc_radius) return sector.union(circle_m).union(circle_n)
def bi_pruning(q_id, q, k, index, cache): IO = 0 partitions = list(common.partitions(q, index.space, 6)) region_list = list() points = index.points vd = index.voronoi_diagram visited = {q_id} H = common.MinHeap() S = [common.MaxHeap() for i in range(6)] for neighbor_id in vd.neighbors(q_id): if neighbor_id in cache: neighbor_p = cache[neighbor_id] else: neighbor_p = points[neighbor_id] cache[neighbor_id] = neighbor_p IO += 1 H.push((1, neighbor_id, neighbor_p)) visited.add(neighbor_id) while len(H) > 0: gd_p, p_id, p = H.pop() for i in range(6): if partitions[i].intersects(p): if len(S[i]) < k: dist_bound = float('inf') else: dist_bound = S[i].first()[0] dist_p = p.distance(q) if gd_p <= k and dist_p < dist_bound: S[i].push((dist_p, p_id, p)) for neighbor_id in vd.neighbors(p_id): if neighbor_id not in visited: if neighbor_id in cache: neighbor_p = cache[neighbor_id] else: neighbor_p = points[neighbor_id] cache[neighbor_id] = neighbor_p IO += 1 gd_neighbor = gd_p + 1 visited.add(neighbor_id) H.push((gd_neighbor, neighbor_id, neighbor_p)) for i in range(6): s = sorted(S[i]) if len(s) >= k: r = s[k - 1][0] elif len(s) > 0: r = s[-1][0] else: r = 0 if r > 0: region_list.append( common.sector(q, r, partitions[i].angles).buffer(0.01)) region = reduce(lambda x, y: x.union(y), region_list) return region, IO
def hindenburgomen(): tcode={} codenumber=[] xcode,k,X,Y=[],[],[],[] Rsq=pd.DataFrame() index='000001'#上证指数 sectorcode="2000032255"#上证A股成分 startdate='20160101'#起始日期 enddate='20161231'#截止日期 #后推30日获取 Ndate=c.getdate(enddate,30) enddateN30=Ndate.Dates[0] date=c.tradedates(startdate,enddateN30) #获取上证A股的板块成分,每日新增的代码也获取对应的收盘价数据 for i in range(len(date.Dates)-30): if(i==0): code = c.sector(sectorcode, date.Dates[i]) data=c.csd(code.Codes,"CLOSE",startdate,enddateN30,'Period=1,ispandas=1') codenumber=code.Codes code2=code else: code1=c.sector(sectorcode, date.Dates[i]) addcode = [i for i in code1.Codes if i not in code2.Codes] if(len(addcode)!=0): adddata=c.csd(addcode,"CLOSE",startdate,enddateN30,'Period=1,ispandas=1') data=data.append(adddata) codenumber=codenumber+addcode code2 = code1 data.to_csv('data.csv') data=pd.read_csv('data.csv') data=data.set_index('CODES') #获取对比指数收盘价数据 indexdata=c.csd(index,"CLOSE",startdate,enddateN30,'Period=1,ispandas=1,rowindex=1') #板块成分和指数线性回归,并计算30日间隔的Rsq系数 for i in range(len(codenumber)): for x, y in zip(data.ix[codenumber[i],"CLOSE"], indexdata["CLOSE"]): X.append([float(x)]) Y.append(float(y)) for j in range(0,len(Y)): try: regr = linear_model.LinearRegression() regr=regr.fit(X[j:j+30], Y[j:j+30]) regr=regr.score(X[j:j + 30], Y[j:j + 30]) xcode.append(regr) if(j+30>=len(Y)-1): break except: xcode.append(0) tcode[codenumber[i]]=xcode xcode=[] X=[] Y=[] Rsq=pd.DataFrame(tcode) Rsq.to_csv('rsq.csv',index=None) Rsq=pd.read_csv('rsq.csv') #求每日Rsq系数的均值 FORMAT ='%d/%02d/%02d' Rsq=Rsq.T mean=pd.DataFrame(Rsq.mean(),columns=['MEAN']) for i in range(Rsq.columns.size): mean['MEAN']=mean['MEAN']*len(Rsq)/len(Rsq[Rsq[i]!=0.0]) mean['DATE']=date.Dates[30:] #求每日Rsq系数的均值间隔3日变动幅度超过30%的日期进行标记 for i in range(len(mean)-33): x=(mean.ix[i,'MEAN']-mean.ix[i+3,'MEAN'])/mean.ix[i,'MEAN'] if (x >0.3 or x<-0.3): k.append(mean.ix[i + 3, 'DATE']) list1=mean.ix[i + 3, 'DATE'].split("/") list1=FORMAT % (int(list1[0]), int(list1[1]), int(list1[2])) indexdata.ix[indexdata['DATES']==list1, 'MARKER'] = 1 else: pass #绘制指数收盘价 indexdata=indexdata.set_index('DATES') indexdata.index=pd.to_datetime(indexdata.index) #标记吉登堡凶兆所标记的时间点 for i in range(0,len(indexdata.ix[indexdata["MARKER"]==1,"CLOSE"])): plt.scatter(pd.to_datetime((indexdata.ix[indexdata["MARKER"]==1,"CLOSE"]).index)[i],indexdata.ix[indexdata["MARKER"]==1,"CLOSE"][i], color='red',marker='o') indexdata['CLOSE'].plot(figsize=(10,8)) plt.xlabel("date") plt.ylabel("close") plt.title(u"000001.SH") plt.show()