def save(name='default'): """ 保存id和data数据 :return: """ from context.resource_manager import Properties from pandas import DataFrame, Series path = os.path.join(Properties.getXmlLocation() + name + ".xml") from xml.dom.minidom import parse, parseString images = parse(path) id = [] data = [] for node in images.getElementsByTagName("Image"): idNode = node.getElementsByTagName("id")[0].childNodes[0].data id.append(idNode) dataNode = node.getElementsByTagName("data")[0].childNodes[0].data dataNode = dataNode[1:-1].split(',') data.append(dataNode) id = np.asarray(id) id = Series(id) data = np.asarray(list(map(conv, data)), dtype=np.float) if not os.path.exists(Properties.getDefaultDataFold() + "/cache/" + name): #f=open(Properties.getDefaultDataFold()+"/csv/threshold.csv","w") #f.close() os.makedirs(Properties.getDefaultDataFold() + "/cache/" + name) np.save(Properties.getRootPath() + "/data/cache/" + name + "/id.npy", id) np.save(Properties.getRootPath() + "/data/cache/" + name + "/data.npy", data)
def get_threshold(name='default'): """ 计算信息熵 :return: """ from context.resource_manager import Properties from view import shape_view from view import plot_utils from cluster import density_cluster_demo id = np.load(Properties.getRootPath() + "/data/cache/" + name + "/id.npy") data = np.load(Properties.getRootPath() + "/data/cache/" + name + "/data.npy") #image_size= round(math.sqrt(float(data[0].shape[0]))) #plot_utils.plot_image( data[551], w, w) # data=density_cluster_demo.binary_array(data) # shape_view.pandas_view_record((data)) threshold = density_cluster_demo.cluster(id, data, dataset=name) if not os.path.exists(Properties.getDefaultDataFold() + "/csv/" + name + "/" + resource_manager.Properties.name_str_static()): #f=open(Properties.getDefaultDataFold()+"/csv/threshold.csv","w") #f.close() os.makedirs(Properties.getDefaultDataFold() + "/csv/" + name + "/" + resource_manager.Properties.name_str_static()) threshold.to_csv(Properties.getDefaultDataFold() + "/csv/" + name + "/" + resource_manager.Properties.name_str_static() + "/threshold.csv")
def get_threshold(): from context.resource_manager import Properties from view import shape_view from view import plot_utils from cluster import density_cluster id = np.load(Properties.getRootPath() + "/data/cache/id.npy") data = np.load(Properties.getRootPath() + "/data/cache/data.npy") image_size = round(math.sqrt(float(data[0].shape[0]))) #plot_utils.plot_image( data[551], w, w) data = density_cluster.binary_array(data) # shape_view.pandas_view_record((data)) import numpy import multiprocessing threshold = DataFrame([], columns=['H', 'd_c', 'cluster']) N = 20 pool = multiprocessing.Pool(processes=N) result = list(range(N)) log.info("init " + str(N) + " workers") for i in range(N): pool.apply_async(density_cluster.multi_processing_cluster, (N, i, threshold, id, data)) # d = numpy.concatenate([c, c], axis=0) pool.close() pool.join() log.debug(threshold) if not os.path.exists(Properties.getDefaultDataFold() + "/csv"): #f=open(Properties.getDefaultDataFold()+"/csv/threshold.csv","w") #f.close() os.makedirs(Properties.getDefaultDataFold() + "/csv") threshold.to_csv(Properties.getDefaultDataFold() + "/csv/threshold.csv")
def getXmlData(self, save=False, relative=True): """ 保存id和data数据 :return: """ if relative: path = os.path.join(Properties.getRootPath()+Properties.getXmlLocation() + self.name + ".xml") else: path = self.name images = parse(path) id = [] data = [] for node in images.getElementsByTagName(self.tag): idNode = node.getElementsByTagName("id")[0].childNodes[0].data id.append(idNode) dataNode = node.getElementsByTagName("data")[0].childNodes[0].data dataNode = dataNode[1:-1].split(',') data.append(dataNode) id = np.asarray(id) id = Series(id) data = np.asarray(list(map(conv, data)), dtype=np.float) if save: if not os.path.exists(Properties.getRootPath()+Properties.getDefaultDataFold() + "/cache/" + self.name): # f=open(Properties.getDefaultDataFold()+"/csv/threshold.csv","w") # f.close() os.makedirs(Properties.getRootPath()+Properties.getDefaultDataFold() + "/cache/" + self.name) np.save(Properties.getRootPath()+Properties.getRootPath() + "/data/cache/" + self.name + "/id.npy", id) np.save(Properties.getRootPath()+Properties.getRootPath() + "/data/cache/" + self.name + "/data.npy", data) return id, data
def save_dataframe_csv(threshold=DataFrame(), name="default", relative=True): if not os.path.exists(Properties.getDefaultDataFold() + "/csv/" + name + "/" + resource_manager.Properties.name_str_static()): #f=open(Properties.getDefaultDataFold()+"/csv/threshold.csv","w") #f.close() os.makedirs(Properties.getDefaultDataFold() + "/csv/" + name + "/" + resource_manager.Properties.name_str_static()) threshold.to_csv(Properties.getDefaultDataFold() + "/csv/" + name + "/" + resource_manager.Properties.name_str_static() + "/threshold.csv")
def __init__(self, dataset): self.dataset = dataset try: self.recorder_csv = pandas.read_csv( Properties.getDefaultDataFold() + "/csv/recorder_csv_" + self.dataset + ".csv", index_col=0) except: self.recorder_csv = DataFrame([], columns=['id', 'start', 'end', 'd_c', 'max_distance_c', 'dataset', 'pile_size', 'H', 'note'])
def record_expriment(name='path', save_name='default'): from context.resource_manager import Properties from context import resource_manager from view import shape_view record_img_path = resource_manager.Properties.getDefaultDataFold( ) + "result/temp/" + name + "/" + save_name + "/" record_csv_path = Properties.getDefaultDataFold( ) + "/csv/" + name + "/" + save_name + "/" path = resource_manager.Properties.getDefaultDataFold( ) + "/result/" + name + "/" + save_name if not os.path.exists(record_csv_path): # shutil.rmtree(resource_manager.Properties.getDefaultDataFold()+"result/temp/"+save_name+ "/") os.makedirs(record_csv_path) threshold = pandas.read_csv(record_csv_path + "threshold.csv") save_plot(name, threshold, save_name) log.debug(threshold['cluster'].sort_values(ascending=False)) shutil.copytree(record_img_path, path) shutil.copy(record_csv_path + "threshold.csv", path) log.warn("finished")
def save(self): self.recorder_csv.to_csv(Properties.getDefaultDataFold() + "/csv/recorder_csv_" + self.dataset + ".csv")
def save_show_cluster(index_id, data, distance_c, pile_id, dataset="/", level="INFO", level_info='scatter figure'): from view import plot_utils from context import resource_manager path = resource_manager.Properties.getDefaultDataFold() + "result" + resource_manager.getSeparator() + "temp/" + dataset + "/" + resource_manager.Properties.name_str_static() + "/" level_path = resource_manager.Properties.getDefaultDataFold() + "result" + resource_manager.getSeparator() + "temp/" + level + "/" + resource_manager.Properties.name_str_static() + "/" + str( distance_c) + "/" if not os.path.exists(path[:path.rfind('/')]): os.makedirs(path[:path.rfind('/')]) if not os.path.exists(level_path[:level_path.rfind('/')]): os.makedirs(level_path[:level_path.rfind('/')]) pile_id = pile_id.sort_values('size', ascending=False) x = [] y = [] label = [] i = 1 for m in range(len(pile_id)): # l=pile_id.irow(m)['pile'] l = pile_id.iloc[m]['pile'] # size=pile_id.irow(m)['size'] size = pile_id.iloc[m]['size'] if pile_id.loc[m]['outlier'] is False: for node in l: index = index_id[node] x.append(data[index][0]) y.append(data[index][1]) label.append(i) i = i + 1 else: for node in l: index = index_id[node] x.append(data[index][0]) y.append(data[index][1]) label.append(0) if level is "SEE": plot_utils.plot_scatter_diagram(None, x=x, y=y, x_label='x', y_label='y', title=level_info, label=label) if level is "DEBUG": # plot_utils.save_scatter_diagram(None, x=x, y=y, x_label='x', y_label='y', title='scatter figure', label=label,path=level_path+resource_manager.Properties.name_str_FULL()+".png") plot_utils.save_all_scatter_diagram(None, x=x, y=y, x_label='x', y_label='y', title=level_info, label=label, path=level_path + resource_manager.Properties.name_str_FULL() + ".png") else: plot_utils.save_scatter_diagram(None, x=x, y=y, x_label='x', y_label='y', title='scatter figure', label=label, path=path + str( distance_c) + ".png") plot_utils.save_all_scatter_diagram(None, x=x, y=y, x_label='x', y_label='y', title='scatter figure', label=label, path=path + str( distance_c) + ".png") # plot_utils.plot_scatter_diagram(None, x=x, y=y, x_label='x', y_label='y', title='scatter figure', label=label) log.debug(("\n") + str(pile_id)) try: p = Properties.getDefaultDataFold() + "/csv/" + dataset + "/" + resource_manager.Properties.name_str_static() + "/" + str( distance_c) + ".csv" pile_id.to_csv(p) except: if not os.path.exists(p[:p.rfind('/')]): pp = p.rfind('/') os.makedirs(p[:pp]) os.mknod(p) pile_id.to_csv(p)
pool.join() log.debug(threshold) if not os.path.exists(Properties.getDefaultDataFold() + "/csv"): #f=open(Properties.getDefaultDataFold()+"/csv/threshold.csv","w") #f.close() os.makedirs(Properties.getDefaultDataFold() + "/csv") threshold.to_csv(Properties.getDefaultDataFold() + "/csv/threshold.csv") if __name__ == '__main__': get_threshold() from context.resource_manager import Properties from view import shape_view from view import plot_utils from cluster import density_cluster threshold = pandas.read_csv(Properties.getDefaultDataFold() + "/csv/threshold.csv") d_c = np.asarray(threshold['d_c'].values) log.debug(d_c) log.critical(type(d_c)) plot_utils.plot_scatter_diagram(None, x=d_c, y=threshold['H'].values, x_label='delta', y_label='H', title='threshold scatter figure') """ delta_index=Series(id,index=id,dtype=np.float) i=0 order_id=Series(result[:,0],index=id_index.values)
def save_show_cluster(index_id, data, distance_c, pile_id): from view import plot_utils from context import resource_manager path = resource_manager.Properties.getDefaultDataFold( ) + "result" + resource_manager.getSeparator( ) + "temp" + resource_manager.getSeparator( ) + resource_manager.Properties.name_str_static( ) + resource_manager.getSeparator() + str(distance_c) + ".png" if not os.path.exists(resource_manager.Properties.getDefaultDataFold() + "result" + resource_manager.getSeparator() + "temp" + resource_manager.getSeparator() + resource_manager.Properties.name_str_static() + resource_manager.getSeparator()): os.makedirs(resource_manager.Properties.getDefaultDataFold() + "result" + resource_manager.getSeparator() + "temp" + resource_manager.getSeparator() + resource_manager.Properties.name_str_static() + resource_manager.getSeparator()) pile_id = pile_id.sort_values('size', ascending=False) x = [] y = [] label = [] i = 1 for m in range(len(pile_id)): # l=pile_id.irow(m)['pile'] l = pile_id.iloc[m]['pile'] # size=pile_id.irow(m)['size'] size = pile_id.iloc[m]['size'] if size >= 1 and i < 15: for node in l: index = index_id[node] x.append(data[index][0]) y.append(data[index][1]) label.append(i) i = i + 1 else: for node in l: index = index_id[node] x.append(data[index][0]) y.append(data[index][1]) label.append(0) plot_utils.save_scatter_diagram(None, x=x, y=y, x_label='x', y_label='y', title='scatter figure', label=label, path=path) # plot_utils.plot_scatter_diagram(None, x=x, y=y, x_label='x', y_label='y', title='scatter figure', label=label) log.debug(pile_id) try: p = Properties.getDefaultDataFold( ) + "/csv/" + resource_manager.Properties.name_str_static( ) + "/" + str(distance_c) + ".csv" pile_id.to_csv(p) except: if not os.path.exists(p): pp = p.rfind('/') os.makedirs(p[:pp]) os.mknod(p) pile_id.to_csv(p)