def __init__(self, coords, values, wantCL=True, platform_num=None): """ Take the coordinates and values and build a KD tree. Keyword arguments: coords -- input coordinates (x, y) values -- input values """ self.coords = np.asarray(coords, dtype=np.float32) self.values = np.asarray(values, dtype=np.int32) if self.coords.shape[0] != self.values.shape[0]: raise AssertionError('lencoords does not equal lenvalues') self.wantCL = wantCL self.canCL = False if hasCL and self.wantCL: try: platforms = cl.get_platforms() try: platform = platforms[platform_num] self.devices = self.platform.get_devices() self.context = cl.Context(self.devices) except TypeError: # The user may be asked to select a platform. self.context = cl.create_some_context() self.devices = self.context.devices except IndexError: raise self.queue = cl.CommandQueue(self.context) filestr = ''.join(open('idt.cl', 'r').readlines()) self.program = cl.Program(self.context, filestr).build(devices=self.devices) for device in self.devices: buildlog = self.program.get_build_info(device, cl.program_build_info.LOG) if (len(buildlog) > 1): print 'Build log for device', device, ':\n', buildlog # Only the first kernel is used. self.kernel = self.program.all_kernels()[0] # Local and global sizes are device-dependent. self.local_size = {} self.global_size = {} # Groups should be overcommitted. # For now, use 3 (48 cores / 16 cores per halfwarp) * 2 for device in self.devices: work_group_size = self.kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, device) num_groups_for_1d = device.max_compute_units * 3 * 2 self.local_size[device] = (work_group_size,) self.global_size[device] = (num_groups_for_1d * work_group_size,) self.canCL = True except cl.RuntimeError: print 'warning: unable to use pyopencl, defaulting to cKDTree' if self.canCL: self.tree = build_tree(coords) else: self.tree = KDTree(coords)
def regenerate_trees(self, segment, weights, deep=DEFAULT_DEEP): from utils import build_tree trees = [] trees_clf = self._trainer.get_classifier(segment) for clf in trees_clf.estimators_: tree = build_tree( clf.tree_, # self.weights_calc.get_weights(segment, signed=False), weights, max_deep=deep ) trees.append(tree) return trees
def requires(self): taskList = { 0: 'Liqui_Adjustment_Upload_Check', 10191: 'Loader_Batch_10191', 10178: 'Engine_Batch_10178__LiquiRPTAdj_LRCF3', 172: 'Engine_Batch_172__LiquiRPTAdj_LRCF2' } result = utils.build_tree(taskList=taskList, businessdate=self.businessdate, filespath=self.filespath, useconfig=False, additionalDependency={ 10191: 0, 10178: 10191, 172: 10191 }) return [ result['Engine_Batch_10178__LiquiRPTAdj_LRCF3'], result['Engine_Batch_172__LiquiRPTAdj_LRCF2'] ]
def requires(self): taskList = { 0: 'Daily_Liqui_Data_Ready_Check', 10185: 'Extractor_Batch_10185', 167: 'Loader_Batch_167', 5: 'Engine_Batch_5__COTR', 10174: 'Engine_Batch_10174__LCCF_LRCF_ICCF', 10177: 'Engine_Batch_10177__LMRCF_LiquiRPT_LRCF3', 10175: 'Engine_Batch_10175__LMRCF_LiquiRPT_LRCF2', 171: 'Engine_Batch_171__LiquiRPT_LRCF1' } result = utils.build_tree(taskList=taskList, businessdate=self.businessdate, filespath=self.filespath, useconfig=True, additionalDependency={10185: 0}) return [ result['Engine_Batch_171__LiquiRPT_LRCF1'], result['Engine_Batch_10175__LMRCF_LiquiRPT_LRCF2'], result['Engine_Batch_10177__LMRCF_LiquiRPT_LRCF3'] ]
def __init__(self, coords, values, wantCL=True, platform_num=None): """ Take the coordinates and values and build a KD tree. Keyword arguments: coords -- input coordinates (x, y) values -- input values """ self.coords = np.asarray(coords, dtype=np.float32) self.values = np.asarray(values, dtype=np.int32) if self.coords.shape[0] != self.values.shape[0]: raise AssertionError('lencoords does not equal lenvalues') self.wantCL = wantCL self.canCL = False if hasCL and self.wantCL: try: platforms = cl.get_platforms() try: platform = platforms[platform_num] self.devices = self.platform.get_devices() self.context = cl.Context(self.devices) except TypeError: # The user may be asked to select a platform. self.context = cl.create_some_context() self.devices = self.context.devices except IndexError: raise self.queue = cl.CommandQueue(self.context) filestr = ''.join(open('idt.cl', 'r').readlines()) self.program = cl.Program(self.context, filestr).build(devices=self.devices) for device in self.devices: buildlog = self.program.get_build_info( device, cl.program_build_info.LOG) if (len(buildlog) > 1): print 'Build log for device', device, ':\n', buildlog # Only the first kernel is used. self.kernel = self.program.all_kernels()[0] # Local and global sizes are device-dependent. self.local_size = {} self.global_size = {} # Groups should be overcommitted. # For now, use 3 (48 cores / 16 cores per halfwarp) * 2 for device in self.devices: work_group_size = self.kernel.get_work_group_info( cl.kernel_work_group_info.WORK_GROUP_SIZE, device) num_groups_for_1d = device.max_compute_units * 3 * 2 self.local_size[device] = (work_group_size, ) self.global_size[device] = (num_groups_for_1d * work_group_size, ) self.canCL = True except cl.RuntimeError: print 'warning: unable to use pyopencl, defaulting to cKDTree' if self.canCL: self.tree = build_tree(coords) else: self.tree = KDTree(coords)
def main(): ################################ ## 第一模块:数据准备工作 data_ = data.Data(args.data_dir, args.vocab_size) # 对ICD tree 处理 parient_children, level2_parients, leafNodes, adj, node2id, hier_dicts = utils.build_tree( os.path.join(args.data_dir, 'note_labeled.csv')) graph = utils.generate_graph(parient_children, node2id) args.node2id = node2id args.adj = torch.Tensor(adj).long().to(args.device) args.leafNodes = leafNodes args.hier_dicts = hier_dicts # TODO batcher对象的细节 g_batcher = GenBatcher(data_, args) ################################# ## 第二模块: 创建G模型,并预训练 G模型 # TODO Generator对象的细节 gen_model = Generator(args, data_, graph, level2_parients) gen_model.to(args.device) # TODO generated 对象的细节 generated = Generated_example(gen_model, data_, g_batcher) # 预训练 G模型 pre_train_generator(gen_model, g_batcher, 10) # 利用G 生成一些negative samples generated.generator_train_negative_samples() generated.generator_test_negative_samples() ##################################### ## 第三模块: 创建 D模型,并预训练 D模型 d_model = Discriminator(args, data_) d_batcher = DisBatcher(data_, args) # 预训练 D模型 pre_train_discriminator(d_model, d_batcher, 25) ######################################## ## 第四模块: 交替训练G和D模型 for epoch in range(args.num_epochs): batches = g_batcher.get_batches(mode='train') for step in range(int(len(batches) / 1000)): #训练 G模型 train_generator(gen_model, d_model, g_batcher, d_batcher, batches[step * 1000:(step + 1) * 1000], generated) # 生成训练D的negative samples generated.generator_samples( "train_sample_generated/" + str(epoch) + "epoch_step" + str(step) + "_temp_positive", "train_sample_generated/" + str(epoch) + "epoch_step" + str(step) + "_temp_negative", 1000) # 生成测试样本 generated.generator_test_samples() # TODO: 评估 G模型的表现 # 创建训练D的batch(即包含 negative samples和positive samples) d_batcher.train_batch = d_batcher.create_batches(mode='train', shuffleis=True) # 训练 D网络 train_discriminator(d_model, 5, d_batcher, dis_batcher.get_batches(mode="train"))
return data_set, data_set data_set_len = len(data_set) train_split = floor(p * data_set_len) shuffle(data_set) return data_set[:train_split], data_set[train_split:] if __name__ == '__main__': """ If we use the entire set to train we will get maximum accuracy Splitting the dataset will will decrease the accuracy """ accuracy = float(input("Enter the accuracy of prediction you desire: ")) data_set = readData('dataset.data') train, test = split_train_test(data_set, accuracy) tree = build_tree(data_set) good_B = 0 good_R = 0 good_L = 0 for row in train: prediction = classify(row, tree) letter = max(prediction.items(), key=operator.itemgetter(1))[0] if row[0] == letter and row[0] == 'L': good_L += 1 if row[0] == letter and row[0] == 'B': good_B += 1 if row[0] == letter and row[0] == 'R': good_R += 1
output_dir = relative_to_absolute(CONFIG_FILE, OUTPUT_DIRECTORY) create_dir(output_dir) copy_file(relative_to_absolute(__file__, "css/default.css"), output_dir) for filename in config.get("EXTRA_FILES", []): copy_file(relative_to_absolute(CONFIG_FILE, filename), output_dir) for work in config["works"]: SRC = relative_to_absolute(CONFIG_FILE, SRC_PATTERN.format(**work)) DST = relative_to_absolute(CONFIG_FILE, DST_PATTERN.format(**work)) render( "page.html", { "node": build_tree(SRC, work["title"], work["levels"]), "collection": config["collection"], }, DST, absolute_directory(CONFIG_FILE)) render( "index.html", { "works": [{ "link": f"./{work['source']}.html", "title": work["title"], } for work in config["works"]], "collection": config["collection"], "subtitle": config["subtitle"], "repo_link": config.get("repo_link"),
from utils import (print_tree, print_results, build_tree) # Adapted from https://github.com/random-forests/tutorials/blob/master/decision_tree.py training_data = [ ['Green', 3, 'Apple'], ['Yellow', 3, 'Apple'], ['Red', 1, 'Grape'], ['Red', 1, 'Grape'], ['Yellow', 3, 'Lemon'], ] # Column labels. # These are used only to print the tree. header = ["color", "diameter", "label"] my_tree = build_tree(training_data) print_tree(my_tree) # Evaluate testing_data = [ ['Green', 3, 'Apple'], ['Yellow', 4, 'Apple'], ['Red', 2, 'Grape'], ['Red', 1, 'Grape'], ['Yellow', 3, 'Lemon'], ] for row in testing_data: print ("Actual: %s. Predicted: %s" % (row[-1], print_results(my_tree.classify(row))))
n_group_codes = 272 training_file = '../data/mimic/mimic.train' validation_file = '../data/mimic/mimic.valid' testing_file = '../data/mimic/mimic.test' model_file = '../models/mimic/camp_{}.model'.format(args.description) hierarchy_file = '../data/mimic/mimic.forgram' bceLoss = nn.BCELoss(reduction='none') device = torch.device("cuda:{}".format(args.device) if torch.cuda.is_available() else "cpu") types, newfather, corMat, ini_embds = pickle.load( open(hierarchy_file, 'rb')) KMIds = [types['.{}'.format(i)] for i in range(1, 18)] n_total_medical_nodes = corMat.shape[0] leavesList, ancestorList, mapInfo = build_tree(corMat, n_group_codes, device) new_version = False if '1.0.' in torch.__version__: new_version = True print("available device: {}".format(device)) train, valid, test = loadData(training_file, validation_file, testing_file) n_batches = int(np.ceil(float(len(train[0])) / float(args.batch_size))) print('n_batches:{}'.format(n_batches)) ini_embds = torch.FloatTensor(ini_embds).to(device) mapInfo = torch.LongTensor(mapInfo).to(device) rnn = gruPredictor(n_medical_codes, n_total_medical_nodes, args.embd_size, args.hidden_size, args.atten_size, leavesList, ancestorList, mapInfo, ini_embds, KMIds, args.value_size, args.profile_size, args.profile_embd_size, args.drop_rate).to(device)
import SplitStan import json # Load a CSV file def load_csv(filename): file = open(filename, "r") lines = reader(file) datalist = list(lines) # 去掉index和id for row in datalist: row.pop(0) row.pop(0) cols_name = datalist.pop(0) # convert string attributes to integers for i in range(len(datalist[0])): utils.str_column_to_float(datalist, i) return utils.DataSet(cols_name, datalist) DS = load_csv("all.csv") pro_rec = utils.ProcedureRecorder(len(DS.data)) tree = utils.build_tree(DS.data, SplitStan.gain_ratio) utils.name_cols(DS.cols_name, tree) # 修改utils 108/118/121行 js_obj = json.dumps(tree) file_obj = open('ShowTree//app//C45_decision_tree.json', 'w') file_obj.write(js_obj) file_obj.close()
import utils import SplitStan import json # Load a CSV file def load_csv(filename): file = open(filename, "r") lines = reader(file) datalist = list(lines) # 去掉index和id for row in datalist: row.pop(0) row.pop(0) cols_name = datalist.pop(0) # convert string attributes to integers for i in range(len(datalist[0])): utils.str_column_to_float(datalist, i) return utils.DataSet(cols_name, datalist) DS = load_csv("all.csv") pro_rec = utils.ProcedureRecorder(len(DS.data)) tree = utils.build_tree(DS.data, SplitStan.gini_index) utils.name_cols(DS.cols_name, tree) js_obj = json.dumps(tree) file_obj = open('TreeViz//src//gini_decision_tree.json', 'w') file_obj.write(js_obj) file_obj.close()
def main(): ################################ ## 第一模块:数据准备工作 data_ = data.Data(args.data_dir, args.vocab_size) # 对ICD tree 处理 parient_children, level2_parients, leafNodes, adj, node2id, hier_dicts = utils.build_tree( os.path.join(args.data_dir, 'note_labeled_v2.csv')) graph = utils.generate_graph(parient_children, node2id) args.node2id = node2id args.id2node = {id: node for node, id in node2id.items()} args.adj = torch.Tensor(adj).long().to(args.device) # args.leafNodes=leafNodes args.hier_dicts = hier_dicts # args.level2_parients=level2_parients #print('836:',args.id2node.get(836),args.id2node.get(0)) # TODO batcher对象的细节 g_batcher = GenBatcher(data_, args) ################################# ## 第二模块: 创建G模型,并预训练 G模型 # TODO Generator对象的细节 gen_model_eval = Generator(args, data_, graph, level2_parients) gen_model_target = Generator(args, data_, graph, level2_parients) gen_model_target.eval() print(gen_model_eval) # for name,param in gen_model_eval.named_parameters(): # print(name,param.size(),type(param)) buffer = ReplayBuffer(capacity=100000) gen_model_eval.to(args.device) gen_model_target.to(args.device) # TODO generated 对象的细节 # 预训练 G模型 #pre_train_generator(gen_model,g_batcher,10) ##################################### ## 第三模块: 创建 D模型,并预训练 D模型 d_model = Discriminator(args) d_model.to(args.device) # 预训练 D模型 #pre_train_discriminator(d_model,d_batcher,25) ######################################## ## 第四模块: 交替训练G和D模型 #将评估结果写入文件中 f = open('valid_result.csv', 'w') writer = csv.writer(f) writer.writerow([ 'avg_micro_p', 'avg_macro_p', 'avg_micro_r,avg_macro_r', 'avg_micro_f1', 'avg_macro_f1', 'avg_micro_auc_roc', 'avg_macro_auc_roc' ]) epoch_f = [] for epoch in range(args.num_epochs): batches = g_batcher.get_batches(mode='train') print('number of batches:', len(batches)) for step in range(len(batches)): #print('step:',step) current_batch = batches[step] ehrs = [example.ehr for example in current_batch] ehrs = torch.Tensor(ehrs).long().to(args.device) hier_labels = [example.hier_labels for example in current_batch] true_labels = [] # 对hier_labels进行填充 for i in range(len(hier_labels)): # i为样本索引 for j in range(len(hier_labels[i])): # j为每个样本的每条路径索引 if len(hier_labels[i][j]) < 4: hier_labels[i][j] = hier_labels[i][j] + [0] * ( 4 - len(hier_labels[i][j])) # if len(hier_labels[i]) < args.k: # for time in range(args.k - len(hier_labels[i])): # hier_labels[i].append([0] * args.hops) for sample in hier_labels: #print('sample:',sample) true_labels.append([row[1] for row in sample]) predHierLabels, batchStates_n, batchHiddens_n = generator.generated_negative_samples( gen_model_eval, d_model, ehrs, hier_labels, buffer) #true_labels = [example.labels for example in current_batch] _, _, avgJaccard = full_eval.process_labels( predHierLabels, true_labels, args) # G生成训练D的positive samples batchStates_p, batchHiddens_p = generator.generated_positive_samples( gen_model_eval, ehrs, hier_labels, buffer) # 训练 D网络 #d_loss=train_discriminator(d_model,batchStates_n,batchHiddens_n,batchStates_p,batchHiddens_p,mode=args.mode) # 训练 G模型 #for g_epoch in range(10): g_loss = train_generator(gen_model_eval, gen_model_target, d_model, batchStates_n, batchHiddens_n, buffer, mode=args.mode) print('batch_number:{}, avgJaccard:{:.4f}, g_loss:{:.4f}'.format( step, avgJaccard, g_loss)) # #每经过一个epoch 之后分别评估G 模型的表现以及D模型的表现(在验证集上的表现) avg_micro_f1 = evaluate(g_batcher, gen_model_eval, d_model, buffer, writer, flag='valid') epoch_f.append(avg_micro_f1) # 画图 # plot results window = int(args.num_epochs / 20) print('window:', window) fig, ((ax1), (ax2)) = plt.subplots(2, 1, sharey=True, figsize=[9, 9]) rolling_mean = pd.Series(epoch_f).rolling(window).mean() std = pd.Series(epoch_f).rolling(window).std() ax1.plot(rolling_mean) ax1.fill_between(range(len(epoch_f)), rolling_mean - std, rolling_mean + std, color='orange', alpha=0.2) ax1.set_title( 'Episode Length Moving Average ({}-episode window)'.format(window)) ax1.set_xlabel('Epoch Number') ax1.set_ylabel('F1') ax2.plot(epoch_f) ax2.set_title('Performance on valid set') ax2.set_xlabel('Epoch Number') ax2.set_ylabel('F1') fig.tight_layout(pad=2) plt.show() fig.savefig('results.png') f.close()
''' 总结: 1. 后序遍历,从左子树开始再到根结点,再到右子树。 ''' EXAMPLES = [ # ( # build_tree(*[ # 1, None, 2, 3, 4, 5, None, None, 6, 7, None, 8, None, 9, 10, # None, None, 11, None, 12, None, 13, None, None, 14, # ]), # [2, 6, 14, 11, 7, 3, 12, 8, 4, 13, 9, 10, 5, 1] # ), ( build_tree(*[1, None, 3, 2, 4, None, 5, 6]), [5, 6, 3, 2, 4, 1], ), ] class Solution: def postorder(self, root: Node) -> List[int]: r = [] if root: if root.children: r += sum([self.postorder(child) for child in root.children], []) r.append(root.val)
def regenerate_tree(self, segment, weights, deep=DEFAULT_DEEP): from utils import build_tree clf = self._trainer.get_classifier(segment) return build_tree(clf.tree_, weights, max_deep=deep)