def get_dot_from_json(json_data, out_filename):
    dot = Digraph(name="JSON_to_DOT")

    if type(json_data) is dict:
        data = json_data
    else:
        with open(json_data, "r") as read_file:
            data = json.load(read_file)

    for node in data.get("nodes"):
        dot.node(name=str(node.get('id')),
                 label=node.get('title').lower(),
                 shape=("circle" if (node.get('type') == "circle") else "box"))
    for edge in data.get("edges"):
        dot.edge(tail_name=str(edge.get("source")),
                 head_name=str(edge.get("target")),
                 style=("dashed" if edge.get("type") == "dotted" else "solid"))

    gv = Source(dot)
    gv.save(filename=out_filename + ".dot")
    return out_filename + ".dot"
Example #2
0
    def generate_process_model(self,
                               sub_log,
                               models_path,
                               event_data_original_name,
                               w_count,
                               activity=''):
        # create the folder for saving the process map if does not exist
        models_path = self.model_type_definitions.get_models_path(
            models_path, event_data_original_name, activity)
        if not os.path.exists(models_path):
            os.makedirs(models_path)

        # mine the petri net (using Pm4Py - Inductive Miner)
        net, initial_marking, final_marking = inductive_miner.apply(sub_log)
        gviz = pn_visualizer.apply(net, initial_marking, final_marking)

        # save the process model
        output_filename = self.model_type_definitions.get_model_filename(
            event_data_original_name, w_count)
        print(f'Saving {models_path} - {output_filename}')
        Source.save(gviz, filename=output_filename, directory=models_path)
        return PNModel(net, initial_marking, final_marking)
Example #3
0
 def save(self):
     s = Source(self.dot.source, filename='tmp/graph', format='png')
     s.save()
     s.render()
     return s
Example #4
0
def train(dataset):
    """Train on dataset for a number of steps."""
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        # Create a variable to count the number of train() calls. This equals the
        # number of batches processed * FLAGS.num_gpus.
        global_step = tf.get_variable(
            'global_step', [],
            initializer=tf.constant_initializer(0), trainable=False)

        # Calculate the learning rate schedule.
        num_batches_per_epoch = (dataset.num_examples_per_epoch() /
                                 FLAGS.batch_size)
        decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay)

        # Decay the learning rate exponentially based on the number of steps.
        lr = tf.train.exponential_decay(FLAGS.initial_learning_rate,
                                        global_step,
                                        decay_steps,
                                        FLAGS.learning_rate_decay_factor,
                                        staircase=True)

        # Create an optimizer that performs gradient descent.
        opt = tf.train.RMSPropOptimizer(lr, RMSPROP_DECAY,
                                        momentum=RMSPROP_MOMENTUM,
                                        epsilon=RMSPROP_EPSILON)

        # Get images and labels for ImageNet and split the batch across GPUs.
        assert FLAGS.batch_size % FLAGS.num_gpus == 0, (
            'Batch size must be divisible by number of GPUs')
        split_batch_size = int(FLAGS.batch_size / FLAGS.num_gpus)

        # Override the number of preprocessing threads to account for the increased
        # number of GPU towers.
        num_preprocess_threads = FLAGS.num_preprocess_threads * FLAGS.num_gpus
        images, labels = image_processing.distorted_inputs(
            dataset,
            num_preprocess_threads=num_preprocess_threads)

        input_summaries = copy.copy(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # Number of classes in the Dataset label set plus 1.
        # Label 0 is reserved for an (unused) background class.
        num_classes = dataset.num_classes() + 1

        # Split the batch of images and labels for towers.
        images_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=images)
        labels_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=labels)

        # Calculate the gradients for each model tower.
        tower_grads = []
        reuse_variables = None
        for i in range(FLAGS.num_gpus):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('%s_%d' % (inception.TOWER_NAME, i)) as scope:
                    # Force all Variables to reside on the CPU.
                    with slim.arg_scope([slim.variables.variable], device='/cpu:0'):
                        # Calculate the loss for one tower of the ImageNet model. This
                        # function constructs the entire ImageNet model but shares the
                        # variables across all towers.
                        loss = _tower_loss(images_splits[i], labels_splits[i], num_classes,
                                           scope, reuse_variables)

                    # Reuse variables for the next tower.
                    reuse_variables = True

                    # Retain the summaries from the final tower.
                    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)

                    # Retain the Batch Normalization updates operations only from the
                    # final tower. Ideally, we should grab the updates from all towers
                    # but these stats accumulate extremely fast so we can ignore the
                    # other stats from the other towers without significant detriment.
                    batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION,
                                                          scope)

                    # Calculate the gradients for the batch of data on this ImageNet
                    # tower.
                    grads = opt.compute_gradients(loss)

                    # Keep track of the gradients across all towers.
                    tower_grads.append(grads)

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers.
        grads = _average_gradients(tower_grads)

        # Add a summaries for the input processing and global_step.
        summaries.extend(input_summaries)

        # Add a summary to track the learning rate.
        summaries.append(tf.summary.scalar('learning_rate', lr))

        # Add histograms for gradients.
        for grad, var in grads:
            if grad is not None:
                summaries.append(
                    tf.summary.histogram(var.op.name + '/gradients', grad))

        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        # Add histograms for trainable variables.
        for var in tf.trainable_variables():
            summaries.append(tf.summary.histogram(var.op.name, var))

        # Track the moving averages of all trainable variables.
        # Note that we maintain a "double-average" of the BatchNormalization
        # global statistics. This is more complicated then need be but we employ
        # this for backward-compatibility with our previous models.
        variable_averages = tf.train.ExponentialMovingAverage(
            inception.MOVING_AVERAGE_DECAY, global_step)

        # Another possibility is to use tf.slim.get_variables().
        variables_to_average = (tf.trainable_variables() +
                                tf.moving_average_variables())
        variables_averages_op = variable_averages.apply(variables_to_average)

        # Group all updates to into a single train op.
        batchnorm_updates_op = tf.group(*batchnorm_updates)
        train_op = tf.group(apply_gradient_op, variables_averages_op,
                            batchnorm_updates_op)

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())

        # Build the summary operation from the last tower summaries.
        summary_op = tf.summary.merge(summaries)

        # Build an initialization operation to run below.
        init = tf.global_variables_initializer()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU
        # implementations.
        sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=FLAGS.log_device_placement))


        def profile(run_metadata, epoch=0):
            with open('profs/timeline_step' + str(epoch) + '.json', 'w') as f:
                # Create the Timeline object, and write it to a json file
                fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                chrome_trace = fetched_timeline.generate_chrome_trace_format()
                f.write(chrome_trace)

        def graph_to_dot(graph):
            dot = Digraph()
            for n in graph.as_graph_def().node:
                dot.node(n.name, label= n.name)
                for i in n.input:
                    dot.edge(i, n.name)
            return dot

        dot_rep = graph_to_dot(tf.get_default_graph())
        s = Source(dot_rep, filename="test.gv", format="PNG")
        with open('profs/A_dot.dot', 'w') as fwr:
            fwr.write(str(dot_rep))

        options = tf.RunOptions(trace_level=tf.RunOptions.SOFTWARE_TRACE)
        run_metadata = tf.RunMetadata()

        sess.run(init, run_metadata=run_metadata, options=options)

        profile(run_metadata, -1)

      #  s.view()
        s.save('inc.PNG')



        if FLAGS.pretrained_model_checkpoint_path:
            assert tf.gfile.Exists(FLAGS.pretrained_model_checkpoint_path)
            variables_to_restore = tf.get_collection(
                slim.variables.VARIABLES_TO_RESTORE)
            restorer = tf.train.Saver(variables_to_restore)
            restorer.restore(sess, FLAGS.pretrained_model_checkpoint_path)
            print('%s: Pre-trained model restored from %s' %
                  (datetime.now(), FLAGS.pretrained_model_checkpoint_path))

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.summary.FileWriter(
            FLAGS.train_dir,
            graph=sess.graph)

        operations_tensors = {}
        operations_names = tf.get_default_graph().get_operations()
        count1 = 0
        count2 = 0

        for operation in operations_names:
            operation_name = operation.name
            operations_info = tf.get_default_graph().get_operation_by_name(operation_name).values()
            if len(operations_info) > 0:
                if not (operations_info[0].shape.ndims is None):
                    operation_shape = operations_info[0].shape.as_list()
                    operation_dtype_size = operations_info[0].dtype.size
                    if not (operation_dtype_size is None):
                        operation_no_of_elements = 1
                        for dim in operation_shape:
                            if not(dim is None):
                                operation_no_of_elements = operation_no_of_elements * dim
                        total_size = operation_no_of_elements * operation_dtype_size
                        operations_tensors[operation_name] = total_size
                    else:
                        count1 = count1 + 1
                else:
                    count1 = count1 + 1
                    operations_tensors[operation_name] = -1

            else:
                count2 = count2 + 1
                operations_tensors[operation_name] = -1
        print(count1)
        print(count2)

        with open('tensors_sz.json', 'w') as f:
            json.dump(operations_tensors, f)

        for step in range(FLAGS.max_steps):
            start_time = time.time()
            if step > 100 and step % 101 == 0:
                sess.run([train_op, loss], run_metadata=run_metadata, options=options)
                profile(run_metadata, step)
            else:
                _, loss_value = sess.run([train_op, loss])
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 10 == 0:
                examples_per_sec = FLAGS.batch_size / float(duration)
                format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                              'sec/batch)')
                print(format_str % (datetime.now(), step, loss_value,
                                    examples_per_sec, duration))

            if step % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step % 5000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
Example #5
0
    def get_heap_graph(self):
        def _heap_trans_list(list):
            heap_infos = []
            node_info = []
            for dict in list:
                dict = fix_br_format(dict)
                type = dict['type']
                if type == 'malloc':
                    content = '[%s] %s' % (dict['state_timestamp'],
                                           dict['message'])
                    malloc_info = {'addr': dict['addr'], \
                                   'size': dict['size'], \
                                   'statestamp': dict['state_timestamp'], \
                                   'content': content, \
                                   'type': dict['type']}
                    node_info.append(malloc_info)
                    heap_infos.append({'content': content, \
                                       'type': dict['type'], \
                                       'node': copy.deepcopy(node_info)})

                elif type == 'calloc':
                    content = '[%s] %s' % (dict['state_timestamp'],
                                           dict['message'])
                    calloc_info = {'addr': dict['addr'], \
                                   'size': dict['size'], \
                                   'statestamp': dict['state_timestamp'], \
                                   'content': content, \
                                   'type': dict['type']}
                    node_info.append(calloc_info)
                    heap_infos.append({'content': content, \
                                       'type': dict['type'], \
                                       'node': copy.deepcopy(node_info)})

                elif type == 'free':
                    content = '[%s] %s' % (dict['state_timestamp'],
                                           dict['message'])
                    free_info = {'addr': dict['addr'], \
                                 'size': dict['size'], \
                                 'statestamp': dict['state_timestamp'], \
                                 'content': content, \
                                 'type': dict['type']}
                    node_info = free_heap_info(node_info, free_info)
                    heap_infos.append({'content': content, \
                                       'type': dict['type'], \
                                       'node': copy.deepcopy(node_info)})

                elif type == 'heap_overflow':
                    overflow_info = {'addr': dict['target_addr'], \
                                     'size': dict['target_size'], \
                                     'content': '[%s] %s' % (dict['state_timestamp'], dict['message']), \
                                     'type': dict['type']}
                    node_info = overflow_heap_info(node_info, overflow_info)
                    # add the memory info by extract_memory()
                    heap_infos.append({'content': '[%s] %s' % (dict['state_timestamp'], dict['message']), \
                                       'type': dict['type'], \
                                       'node': copy.deepcopy(node_info), \
                                       'memory': memory_color_htmlformat(dict['memory'])})

                elif type == 'redzone_write':
                    heap_infos.append({'content': '[%s] %s' % (dict['state_timestamp'], dict['message']), \
                                       'type': dict['type'], \
                                       'memory': memory_color_htmlformat(dict['memory']), \
                                       'backtrace': dict['backtrace']})
            return heap_infos

        heap_infos = _heap_trans_list(self.heap_log_list_dot)
        head_dot = '''digraph G {n0[shape=record,label="......"]'''
        tail_dot = "}"
        label_dot = ""
        edge_dot = ""
        index = 0
        for heap_info in heap_infos:
            index += 1

            # table format
            content = '''n%s[shape=none, label=<<table border="0" cellborder="1" cellspacing="0" cellpadding="4">''' % (
                index)
            # when the node is empty
            if 'node' in heap_info.keys():
                if len(heap_info['node']) == 0:
                    label_dot += '''n%s[shape=record,label="......"]''' % (
                        index)
                    edge_dot += '''n%s->n%s[label="%s"]''' % (
                        index - 1, index, heap_info['content'])
                    continue

                # construct the heap node graph ande highlight the overflow part
                for info in heap_info['node']:
                    if info['type'] == "heap_overflow":
                        content += '''<tr><td bgcolor="lightgrey"><font color="red">%s size:%s</font></td></tr>''' % (
                            info['addr'], info['size'])
                        continue
                    content += '''<tr><td>%s size:%s</td></tr>''' % (
                        info['addr'], info['size'])

            # when the node is the overflow one
            if heap_info['type'] == "heap_overflow":
                label_dot += '''n%s%s[shape=box,label=<%s>]''' % (
                    index, index, heap_info['memory'])
                edge_dot += '''{rank = same; n%s->n%s%s[style=dotted label="%s"]}''' % (
                    index, index, index, "memory content")

            # when the node is the mem write one
            if heap_info['type'] == 'redzone_write':
                label_dot += '''n%s[shape=box,label=<%s>]''' % (
                    index, heap_info['memory'])
                label_dot += '''n%s%s[shape=box,label=<%s>]''' % (
                    index, index, heap_info['backtrace'])
                edge_dot += '''n%s->n%s[label="%s",style=dotted]''' % (
                    index - 1, index, heap_info['content'])
                edge_dot += '''{rank = same; n%s->n%s%s[style=dotted]}''' % (
                    index, index, index)
                continue

            content += '''</table>>]'''
            label_dot += content
            edge_dot += '''n%s->n%s[label="%s"]''' % (index - 1, index,
                                                      heap_info['content'])

        dot = head_dot + label_dot + edge_dot + tail_dot
        t = Source(dot)
        t.save("/tmp/HeapChange.dot")
        os.system("dot /tmp/HeapChange.dot -Tpng -o /tmp/HeapChange.png")
        return "/tmp/HeapChange.png"