Esempio n. 1
0
File: idt.py Progetto: Fuhji/TopoMC
    def __init__(self, coords, values, wantCL=True, platform_num=None):
        """
        Take the coordinates and values and build a KD tree.

        Keyword arguments:
        coords -- input coordinates (x, y)
        values -- input values

        """

        self.coords = np.asarray(coords, dtype=np.float32)
        self.values = np.asarray(values, dtype=np.int32)

        if self.coords.shape[0] != self.values.shape[0]:
            raise AssertionError('lencoords does not equal lenvalues')

        self.wantCL = wantCL
        self.canCL = False

        if hasCL and self.wantCL:
            try:
                platforms = cl.get_platforms()
                try:
                    platform = platforms[platform_num]
                    self.devices = self.platform.get_devices()
                    self.context = cl.Context(self.devices)
                except TypeError:
                    # The user may be asked to select a platform.
                    self.context = cl.create_some_context()
                    self.devices = self.context.devices
                except IndexError:
                    raise
                self.queue = cl.CommandQueue(self.context)
                filestr = ''.join(open('idt.cl', 'r').readlines())
                self.program = cl.Program(self.context, filestr).build(devices=self.devices)
                for device in self.devices:
                    buildlog = self.program.get_build_info(device, cl.program_build_info.LOG)
                    if (len(buildlog) > 1):
                        print 'Build log for device', device, ':\n', buildlog
                # Only the first kernel is used.
                self.kernel = self.program.all_kernels()[0]

                # Local and global sizes are device-dependent.
                self.local_size = {}
                self.global_size = {}
                # Groups should be overcommitted.
                # For now, use 3 (48 cores / 16 cores per halfwarp) * 2
                for device in self.devices:
                    work_group_size = self.kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, device)
                    num_groups_for_1d = device.max_compute_units * 3 * 2
                    self.local_size[device] = (work_group_size,)
                    self.global_size[device] = (num_groups_for_1d * work_group_size,)
                self.canCL = True
            except cl.RuntimeError:
                print 'warning: unable to use pyopencl, defaulting to cKDTree'

        if self.canCL:
            self.tree = build_tree(coords)
        else:
            self.tree = KDTree(coords)
Esempio n. 2
0
 def regenerate_trees(self, segment, weights, deep=DEFAULT_DEEP):
     from utils import build_tree
     trees = []
     trees_clf = self._trainer.get_classifier(segment)
     for clf in trees_clf.estimators_:
         tree = build_tree(
             clf.tree_,
             # self.weights_calc.get_weights(segment, signed=False),
             weights,
             max_deep=deep
         )
         trees.append(tree)
     return trees
Esempio n. 3
0
 def requires(self):
     taskList = {
         0: 'Liqui_Adjustment_Upload_Check',
         10191: 'Loader_Batch_10191',
         10178: 'Engine_Batch_10178__LiquiRPTAdj_LRCF3',
         172: 'Engine_Batch_172__LiquiRPTAdj_LRCF2'
     }
     result = utils.build_tree(taskList=taskList,
                               businessdate=self.businessdate,
                               filespath=self.filespath,
                               useconfig=False,
                               additionalDependency={
                                   10191: 0,
                                   10178: 10191,
                                   172: 10191
                               })
     return [
         result['Engine_Batch_10178__LiquiRPTAdj_LRCF3'],
         result['Engine_Batch_172__LiquiRPTAdj_LRCF2']
     ]
Esempio n. 4
0
    def requires(self):
        taskList = {
            0: 'Daily_Liqui_Data_Ready_Check',
            10185: 'Extractor_Batch_10185',
            167: 'Loader_Batch_167',
            5: 'Engine_Batch_5__COTR',
            10174: 'Engine_Batch_10174__LCCF_LRCF_ICCF',
            10177: 'Engine_Batch_10177__LMRCF_LiquiRPT_LRCF3',
            10175: 'Engine_Batch_10175__LMRCF_LiquiRPT_LRCF2',
            171: 'Engine_Batch_171__LiquiRPT_LRCF1'
        }

        result = utils.build_tree(taskList=taskList,
                                  businessdate=self.businessdate,
                                  filespath=self.filespath,
                                  useconfig=True,
                                  additionalDependency={10185: 0})
        return [
            result['Engine_Batch_171__LiquiRPT_LRCF1'],
            result['Engine_Batch_10175__LMRCF_LiquiRPT_LRCF2'],
            result['Engine_Batch_10177__LMRCF_LiquiRPT_LRCF3']
        ]
Esempio n. 5
0
    def __init__(self, coords, values, wantCL=True, platform_num=None):
        """
        Take the coordinates and values and build a KD tree.

        Keyword arguments:
        coords -- input coordinates (x, y)
        values -- input values

        """

        self.coords = np.asarray(coords, dtype=np.float32)
        self.values = np.asarray(values, dtype=np.int32)

        if self.coords.shape[0] != self.values.shape[0]:
            raise AssertionError('lencoords does not equal lenvalues')

        self.wantCL = wantCL
        self.canCL = False

        if hasCL and self.wantCL:
            try:
                platforms = cl.get_platforms()
                try:
                    platform = platforms[platform_num]
                    self.devices = self.platform.get_devices()
                    self.context = cl.Context(self.devices)
                except TypeError:
                    # The user may be asked to select a platform.
                    self.context = cl.create_some_context()
                    self.devices = self.context.devices
                except IndexError:
                    raise
                self.queue = cl.CommandQueue(self.context)
                filestr = ''.join(open('idt.cl', 'r').readlines())
                self.program = cl.Program(self.context,
                                          filestr).build(devices=self.devices)
                for device in self.devices:
                    buildlog = self.program.get_build_info(
                        device, cl.program_build_info.LOG)
                    if (len(buildlog) > 1):
                        print 'Build log for device', device, ':\n', buildlog
                # Only the first kernel is used.
                self.kernel = self.program.all_kernels()[0]

                # Local and global sizes are device-dependent.
                self.local_size = {}
                self.global_size = {}
                # Groups should be overcommitted.
                # For now, use 3 (48 cores / 16 cores per halfwarp) * 2
                for device in self.devices:
                    work_group_size = self.kernel.get_work_group_info(
                        cl.kernel_work_group_info.WORK_GROUP_SIZE, device)
                    num_groups_for_1d = device.max_compute_units * 3 * 2
                    self.local_size[device] = (work_group_size, )
                    self.global_size[device] = (num_groups_for_1d *
                                                work_group_size, )
                self.canCL = True
            except cl.RuntimeError:
                print 'warning: unable to use pyopencl, defaulting to cKDTree'

        if self.canCL:
            self.tree = build_tree(coords)
        else:
            self.tree = KDTree(coords)
Esempio n. 6
0
def main():
    ################################
    ## 第一模块:数据准备工作
    data_ = data.Data(args.data_dir, args.vocab_size)

    # 对ICD tree 处理
    parient_children, level2_parients, leafNodes, adj, node2id, hier_dicts = utils.build_tree(
        os.path.join(args.data_dir, 'note_labeled.csv'))
    graph = utils.generate_graph(parient_children, node2id)
    args.node2id = node2id
    args.adj = torch.Tensor(adj).long().to(args.device)
    args.leafNodes = leafNodes
    args.hier_dicts = hier_dicts

    # TODO batcher对象的细节
    g_batcher = GenBatcher(data_, args)

    #################################
    ## 第二模块: 创建G模型,并预训练 G模型
    # TODO Generator对象的细节
    gen_model = Generator(args, data_, graph, level2_parients)

    gen_model.to(args.device)
    # TODO generated 对象的细节
    generated = Generated_example(gen_model, data_, g_batcher)
    # 预训练 G模型
    pre_train_generator(gen_model, g_batcher, 10)

    # 利用G 生成一些negative samples
    generated.generator_train_negative_samples()
    generated.generator_test_negative_samples()

    #####################################
    ## 第三模块: 创建 D模型,并预训练 D模型
    d_model = Discriminator(args, data_)

    d_batcher = DisBatcher(data_, args)

    # 预训练 D模型
    pre_train_discriminator(d_model, d_batcher, 25)

    ########################################
    ## 第四模块: 交替训练G和D模型
    for epoch in range(args.num_epochs):
        batches = g_batcher.get_batches(mode='train')
        for step in range(int(len(batches) / 1000)):

            #训练 G模型
            train_generator(gen_model, d_model, g_batcher, d_batcher,
                            batches[step * 1000:(step + 1) * 1000], generated)

            # 生成训练D的negative samples
            generated.generator_samples(
                "train_sample_generated/" + str(epoch) + "epoch_step" +
                str(step) + "_temp_positive", "train_sample_generated/" +
                str(epoch) + "epoch_step" + str(step) + "_temp_negative", 1000)

            # 生成测试样本
            generated.generator_test_samples()

            # TODO: 评估 G模型的表现

            # 创建训练D的batch(即包含 negative samples和positive samples)
            d_batcher.train_batch = d_batcher.create_batches(mode='train',
                                                             shuffleis=True)

            # 训练 D网络
            train_discriminator(d_model, 5, d_batcher,
                                dis_batcher.get_batches(mode="train"))
        return data_set, data_set
    data_set_len = len(data_set)
    train_split = floor(p * data_set_len)
    shuffle(data_set)
    return data_set[:train_split], data_set[train_split:]


if __name__ == '__main__':
    """
        If we use the entire set to train we will get maximum accuracy
        Splitting the dataset will will decrease the accuracy
    """
    accuracy = float(input("Enter the accuracy of prediction you desire: "))
    data_set = readData('dataset.data')
    train, test = split_train_test(data_set, accuracy)
    tree = build_tree(data_set)
    good_B = 0
    good_R = 0
    good_L = 0
    for row in train:
        prediction = classify(row, tree)
        letter = max(prediction.items(), key=operator.itemgetter(1))[0]

        if row[0] == letter and row[0] == 'L':
            good_L += 1

        if row[0] == letter and row[0] == 'B':
            good_B += 1

        if row[0] == letter and row[0] == 'R':
            good_R += 1
Esempio n. 8
0
output_dir = relative_to_absolute(CONFIG_FILE, OUTPUT_DIRECTORY)

create_dir(output_dir)
copy_file(relative_to_absolute(__file__, "css/default.css"), output_dir)

for filename in config.get("EXTRA_FILES", []):
    copy_file(relative_to_absolute(CONFIG_FILE, filename), output_dir)

for work in config["works"]:
    SRC = relative_to_absolute(CONFIG_FILE, SRC_PATTERN.format(**work))
    DST = relative_to_absolute(CONFIG_FILE, DST_PATTERN.format(**work))

    render(
        "page.html", {
            "node": build_tree(SRC, work["title"], work["levels"]),
            "collection": config["collection"],
        }, DST, absolute_directory(CONFIG_FILE))

render(
    "index.html", {
        "works": [{
            "link": f"./{work['source']}.html",
            "title": work["title"],
        } for work in config["works"]],
        "collection":
        config["collection"],
        "subtitle":
        config["subtitle"],
        "repo_link":
        config.get("repo_link"),
Esempio n. 9
0
from utils import (print_tree, print_results, build_tree)
# Adapted from https://github.com/random-forests/tutorials/blob/master/decision_tree.py
training_data = [
    ['Green', 3, 'Apple'],
    ['Yellow', 3, 'Apple'],
    ['Red', 1, 'Grape'],
    ['Red', 1, 'Grape'],
    ['Yellow', 3, 'Lemon'],
]

# Column labels.
# These are used only to print the tree.
header = ["color", "diameter", "label"]

my_tree = build_tree(training_data)


print_tree(my_tree)

# Evaluate
testing_data = [
    ['Green', 3, 'Apple'],
    ['Yellow', 4, 'Apple'],
    ['Red', 2, 'Grape'],
    ['Red', 1, 'Grape'],
    ['Yellow', 3, 'Lemon'],
]

for row in testing_data:
    print ("Actual: %s. Predicted: %s" %
           (row[-1], print_results(my_tree.classify(row))))
Esempio n. 10
0
    n_group_codes = 272

    training_file = '../data/mimic/mimic.train'
    validation_file = '../data/mimic/mimic.valid'
    testing_file = '../data/mimic/mimic.test'
    model_file = '../models/mimic/camp_{}.model'.format(args.description)
    hierarchy_file = '../data/mimic/mimic.forgram'
    bceLoss = nn.BCELoss(reduction='none')
    device = torch.device("cuda:{}".format(args.device)
                          if torch.cuda.is_available() else "cpu")
    types, newfather, corMat, ini_embds = pickle.load(
        open(hierarchy_file, 'rb'))
    KMIds = [types['.{}'.format(i)] for i in range(1, 18)]

    n_total_medical_nodes = corMat.shape[0]
    leavesList, ancestorList, mapInfo = build_tree(corMat, n_group_codes,
                                                   device)
    new_version = False
    if '1.0.' in torch.__version__:
        new_version = True

    print("available device: {}".format(device))
    train, valid, test = loadData(training_file, validation_file, testing_file)
    n_batches = int(np.ceil(float(len(train[0])) / float(args.batch_size)))
    print('n_batches:{}'.format(n_batches))
    ini_embds = torch.FloatTensor(ini_embds).to(device)
    mapInfo = torch.LongTensor(mapInfo).to(device)
    rnn = gruPredictor(n_medical_codes, n_total_medical_nodes, args.embd_size,
                       args.hidden_size, args.atten_size, leavesList,
                       ancestorList, mapInfo, ini_embds, KMIds,
                       args.value_size, args.profile_size,
                       args.profile_embd_size, args.drop_rate).to(device)
Esempio n. 11
0
import SplitStan
import json


# Load a CSV file
def load_csv(filename):
    file = open(filename, "r")
    lines = reader(file)
    datalist = list(lines)
    # 去掉index和id
    for row in datalist:
        row.pop(0)
        row.pop(0)
    cols_name = datalist.pop(0)
    # convert string attributes to integers
    for i in range(len(datalist[0])):
        utils.str_column_to_float(datalist, i)
    return utils.DataSet(cols_name, datalist)


DS = load_csv("all.csv")
pro_rec = utils.ProcedureRecorder(len(DS.data))
tree = utils.build_tree(DS.data, SplitStan.gain_ratio)
utils.name_cols(DS.cols_name, tree)
# 修改utils 108/118/121行

js_obj = json.dumps(tree)
file_obj = open('ShowTree//app//C45_decision_tree.json', 'w')
file_obj.write(js_obj)
file_obj.close()
Esempio n. 12
0
import utils
import SplitStan
import json


# Load a CSV file
def load_csv(filename):
    file = open(filename, "r")
    lines = reader(file)
    datalist = list(lines)
    # 去掉index和id
    for row in datalist:
        row.pop(0)
        row.pop(0)
    cols_name = datalist.pop(0)
    # convert string attributes to integers
    for i in range(len(datalist[0])):
        utils.str_column_to_float(datalist, i)
    return utils.DataSet(cols_name, datalist)


DS = load_csv("all.csv")
pro_rec = utils.ProcedureRecorder(len(DS.data))
tree = utils.build_tree(DS.data, SplitStan.gini_index)
utils.name_cols(DS.cols_name, tree)

js_obj = json.dumps(tree)
file_obj = open('TreeViz//src//gini_decision_tree.json', 'w')
file_obj.write(js_obj)
file_obj.close()
Esempio n. 13
0
def main():
    ################################
    ## 第一模块:数据准备工作
    data_ = data.Data(args.data_dir, args.vocab_size)

    # 对ICD tree 处理
    parient_children, level2_parients, leafNodes, adj, node2id, hier_dicts = utils.build_tree(
        os.path.join(args.data_dir, 'note_labeled_v2.csv'))
    graph = utils.generate_graph(parient_children, node2id)
    args.node2id = node2id
    args.id2node = {id: node for node, id in node2id.items()}
    args.adj = torch.Tensor(adj).long().to(args.device)
    # args.leafNodes=leafNodes
    args.hier_dicts = hier_dicts
    # args.level2_parients=level2_parients
    #print('836:',args.id2node.get(836),args.id2node.get(0))

    # TODO batcher对象的细节
    g_batcher = GenBatcher(data_, args)

    #################################
    ## 第二模块: 创建G模型,并预训练 G模型
    # TODO Generator对象的细节
    gen_model_eval = Generator(args, data_, graph, level2_parients)
    gen_model_target = Generator(args, data_, graph, level2_parients)
    gen_model_target.eval()
    print(gen_model_eval)

    # for name,param in gen_model_eval.named_parameters():
    #     print(name,param.size(),type(param))
    buffer = ReplayBuffer(capacity=100000)
    gen_model_eval.to(args.device)
    gen_model_target.to(args.device)

    # TODO generated 对象的细节

    # 预训练 G模型
    #pre_train_generator(gen_model,g_batcher,10)

    #####################################
    ## 第三模块: 创建 D模型,并预训练 D模型
    d_model = Discriminator(args)
    d_model.to(args.device)

    # 预训练 D模型
    #pre_train_discriminator(d_model,d_batcher,25)

    ########################################
    ## 第四模块: 交替训练G和D模型

    #将评估结果写入文件中
    f = open('valid_result.csv', 'w')
    writer = csv.writer(f)
    writer.writerow([
        'avg_micro_p', 'avg_macro_p', 'avg_micro_r,avg_macro_r',
        'avg_micro_f1', 'avg_macro_f1', 'avg_micro_auc_roc',
        'avg_macro_auc_roc'
    ])
    epoch_f = []
    for epoch in range(args.num_epochs):
        batches = g_batcher.get_batches(mode='train')
        print('number of batches:', len(batches))
        for step in range(len(batches)):
            #print('step:',step)
            current_batch = batches[step]
            ehrs = [example.ehr for example in current_batch]
            ehrs = torch.Tensor(ehrs).long().to(args.device)

            hier_labels = [example.hier_labels for example in current_batch]

            true_labels = []

            # 对hier_labels进行填充
            for i in range(len(hier_labels)):  # i为样本索引
                for j in range(len(hier_labels[i])):  # j为每个样本的每条路径索引
                    if len(hier_labels[i][j]) < 4:
                        hier_labels[i][j] = hier_labels[i][j] + [0] * (
                            4 - len(hier_labels[i][j]))
                # if len(hier_labels[i]) < args.k:
                #     for time in range(args.k - len(hier_labels[i])):
                #         hier_labels[i].append([0] * args.hops)

            for sample in hier_labels:
                #print('sample:',sample)
                true_labels.append([row[1] for row in sample])

            predHierLabels, batchStates_n, batchHiddens_n = generator.generated_negative_samples(
                gen_model_eval, d_model, ehrs, hier_labels, buffer)

            #true_labels = [example.labels for example in current_batch]

            _, _, avgJaccard = full_eval.process_labels(
                predHierLabels, true_labels, args)

            # G生成训练D的positive samples
            batchStates_p, batchHiddens_p = generator.generated_positive_samples(
                gen_model_eval, ehrs, hier_labels, buffer)

            # 训练 D网络
            #d_loss=train_discriminator(d_model,batchStates_n,batchHiddens_n,batchStates_p,batchHiddens_p,mode=args.mode)

            # 训练 G模型
            #for g_epoch in range(10):
            g_loss = train_generator(gen_model_eval,
                                     gen_model_target,
                                     d_model,
                                     batchStates_n,
                                     batchHiddens_n,
                                     buffer,
                                     mode=args.mode)

            print('batch_number:{}, avgJaccard:{:.4f}, g_loss:{:.4f}'.format(
                step, avgJaccard, g_loss))

        # #每经过一个epoch 之后分别评估G 模型的表现以及D模型的表现(在验证集上的表现)
        avg_micro_f1 = evaluate(g_batcher,
                                gen_model_eval,
                                d_model,
                                buffer,
                                writer,
                                flag='valid')
        epoch_f.append(avg_micro_f1)

    # 画图
    # plot results
    window = int(args.num_epochs / 20)
    print('window:', window)
    fig, ((ax1), (ax2)) = plt.subplots(2, 1, sharey=True, figsize=[9, 9])
    rolling_mean = pd.Series(epoch_f).rolling(window).mean()
    std = pd.Series(epoch_f).rolling(window).std()
    ax1.plot(rolling_mean)
    ax1.fill_between(range(len(epoch_f)),
                     rolling_mean - std,
                     rolling_mean + std,
                     color='orange',
                     alpha=0.2)
    ax1.set_title(
        'Episode Length Moving Average ({}-episode window)'.format(window))
    ax1.set_xlabel('Epoch Number')
    ax1.set_ylabel('F1')

    ax2.plot(epoch_f)
    ax2.set_title('Performance on valid set')
    ax2.set_xlabel('Epoch Number')
    ax2.set_ylabel('F1')

    fig.tight_layout(pad=2)
    plt.show()
    fig.savefig('results.png')

    f.close()
'''
总结:

    1. 后序遍历,从左子树开始再到根结点,再到右子树。
'''

EXAMPLES = [
    # (
    #     build_tree(*[
    #         1, None, 2, 3, 4, 5, None, None, 6, 7, None, 8, None, 9, 10,
    #         None, None, 11, None, 12, None, 13, None, None, 14,
    #     ]),
    #     [2, 6, 14, 11, 7, 3, 12, 8, 4, 13, 9, 10, 5, 1]
    # ),
    (
        build_tree(*[1, None, 3, 2, 4, None, 5, 6]),
        [5, 6, 3, 2, 4, 1],
    ),
]


class Solution:
    def postorder(self, root: Node) -> List[int]:
        r = []
        if root:
            if root.children:
                r += sum([self.postorder(child) for child in root.children],
                         [])

            r.append(root.val)
Esempio n. 15
0
 def regenerate_tree(self, segment, weights, deep=DEFAULT_DEEP):
     from utils import build_tree
     clf = self._trainer.get_classifier(segment)
     return build_tree(clf.tree_, weights, max_deep=deep)