Example #1
0
def train_model(model: SLModel,
                trainset: NpDataset,
                valset: NpDataset,
                epochs=5,
                batch_size=32):

    # Create the generators
    logging.info("Training model for {} epochs and {} batch size".format(
        epochs, batch_size))
    logging.info("Flowing the train and validation sets")
    traingen = trainset.flow(
        batch_size=batch_size, shuffle=True, seed=utils.get_random_seed())
    valgen = valset.flow(batch_size=batch_size, shuffle=False)

    # Create the callbacks
    logging.info("Creating the callbacks")
    callbacks = [
        ModelCheckpoint(
            utils.get_model_path(RUN_ID),
            "val_loss",
            verbose=1,
            save_best_only=True),
        Plotter(
            "loss",
            scale='log',
            plot_during_train=True,
            save_to_file=utils.get_plot_path(RUN_ID),
            block_on_end=False),
        Plotter(
            "accuracy",
            scale='linear',
            plot_during_train=True,
            save_to_file=utils.get_plot_path(RUN_ID + "_acc"),
            block_on_end=False)
    ]

    # Create the optiizer
    logging.info("Creating the optimizer")
    params = [param for param in model.parameters() if param.requires_grad]
    # optimizer = optim.SGD(
    #     params
    #     lr=0.01,
    #     momentum=0.9,
    #     nesterov=True)
    optimizer = optim.Adam(params)
    logging.info("Optimizer: %r" % optimizer)

    # Train the model
    logs = model.fit_generator(
        traingen,
        traingen.steps_per_epoch,
        epochs=epochs,
        optimizer=optimizer,
        validation_generator=valgen,
        validation_steps=valgen.steps_per_epoch,
        metrics=["accuracy"],
        callbacks=callbacks,
        verbose=1)

    return logs
Example #2
0
def _classify_thread_body(train_ratio_list):
    ret_list = []
    for train_ratio in train_ratio_list:
        time_start = time.time()
        logger.info('\t train_ratio = {}, evaling ...'.format(train_ratio))

        X_train, X_test, Y_train, Y_test = train_test_split(
            features_matrix,
            labels_matrix,
            test_size=1.0 - train_ratio,
            random_state=utils.get_random_seed(),
            shuffle=True)
        # find out how many labels should be predicted
        top_k_list = [
            np.sum(Y_test[i]) for i in range(np.size(Y_test, axis=0))
        ]
        clf = TopKRanker(LogisticRegression())
        clf.fit(X_train, Y_train)
        preds = clf.predict(X_test, top_k_list)
        # averages = ["micro", "macro", "samples", "weighted"]
        # results[average] = f1_score(mlb.fit_transform(y_test), mlb.fit_transform(preds), average=average)
        # macro = f1_score(Y_test, preds, average="macro")
        # micro = f1_score(Y_test, preds, average="micro")
        macro, micro = eval_utils.f1_scores_multilabel(Y_test, preds)
        logger.info('\t train_ratio = {}, eval completed in {}s'.format(
            train_ratio,
            time.time() - time_start))
        ret_list.append((train_ratio, macro, micro))
    return ret_list
Example #3
0
def _classify_thread_body(train_ratio_list):
    global features_dict, true_edges_list_by_repeat, neg_edges_list_by_repeat
    ret_list = []
    for repeat, op, train_ratio in train_ratio_list:
        time_start = time.time()
        logger.info('\t repeat={}, train_ratio={}, op={}, evaling ...'.format(
            repeat, train_ratio, op))

        edges_train, edges_test, labels_train, labels_test = train_test_split(
            true_edges_list_by_repeat[repeat] +
            neg_edges_list_by_repeat[repeat],
            [1] * len(true_edges_list_by_repeat[repeat]) +
            [0] * len(neg_edges_list_by_repeat[repeat]),
            test_size=1.0 - train_ratio,
            random_state=utils.get_random_seed(),
            shuffle=True)

        train1 = np.array([features_dict[e[0]] for e in edges_train],
                          dtype=np.float32)
        train2 = np.array([features_dict[e[1]] for e in edges_train],
                          dtype=np.float32)
        test1 = np.array([features_dict[e[0]] for e in edges_test],
                         dtype=np.float32)
        test2 = np.array([features_dict[e[1]] for e in edges_test],
                         dtype=np.float32)

        if op == 'average':
            X_train = (train1 + train2) / 2
            X_test = (test1 + test2) / 2
        elif op == 'hadamard':
            X_train = np.multiply(train1, train2)
            X_test = np.multiply(test1, test2)
        elif op == 'l1':
            X_train = np.absolute(train1 - train2)
            X_test = np.absolute(test1 - test2)
        elif op == 'l2':
            X_train = np.square(train1 - train2)
            X_test = np.square(test1 - test2)
        elif op == 'concat':
            X_train = np.concatenate((train1, train2), axis=1)
            X_test = np.concatenate((test1, test2), axis=1)
        else:
            logger.error("error: invalid feature operator: {}".format(op))

        clf = LogisticRegression()
        clf.fit(X_train, np.asarray(labels_train))
        preds = clf.predict(X_test)
        # preds = clf.predict_proba(X_test)[:,1] # better choice!
        auc = roc_auc_score(np.asarray(labels_test), preds)

        logger.info(
            '\t repeat={}, train_ratio={}, op={}, eval completed in {}s.'.
            format(repeat, train_ratio, op,
                   time.time() - time_start))
        ret_list.append((train_ratio, op, auc))
    return ret_list
Example #4
0
def _cluster_thread_body(repeated_times):
    nmi_list = []
    X = features_matrix
    y = labels_matrix
    for _ in range(repeated_times):
        X, y = shuffle(X, y, random_state=utils.get_random_seed())
        # clr
        clr = KMeans(n_clusters=LABEL_SIZE)
        clr.fit(X)  # clustering
        y_pred = clr.labels_  # get clustering labels
        nmi_list.append(evalute_NMI(y, y_pred))
    return nmi_list
Example #5
0
def train_model(model: Model,
                train_dataset: ImageDataset,
                val_dataset: ImageDataset,
                augmenters=(),
                epochs=100,
                batch_size=32,
                epoch_size=10000,
                plot=False,
                load_model=False,
                **kwargs):
    logging.info("Training model with run id %s" % model.run_id)
    logging.info("Using: \n\tbatch_size: {batch_size} \
        \n\tepochs: {epochs} \
        \n\tplot: {plot} \
        \n\tload_model: {load_model} \
        \n\tepoch_size: {epoch_size}".format(**locals()))

    if load_model:
        logging.info("Reloading model from weights")
        model.load_weights(utils.get_model_path(model.run_id), by_name=True)
    if model.fine_tune:
        old_run_id = model.run_id[:-len("-fine-tune")]
        logging.info(
            "Fine tuning model with weights from {}".format(old_run_id))
        model.load_weights(utils.get_model_path(old_run_id), by_name=True)

    steps = epoch_size // batch_size
    val_steps = epoch_size // 10 // batch_size
    traingen = train_dataset.flow(batch_size=batch_size,
                                  steps_per_epoch=steps,
                                  shuffle=True,
                                  replace=True,
                                  seed=utils.get_random_seed())
    valgen = val_dataset.flow(batch_size=batch_size,
                              steps_per_epoch=val_steps,
                              shuffle=True,
                              replace=True,
                              seed=utils.get_random_seed())

    # Add the augmenters to the training generator
    for augmenter in augmenters:
        traingen = augmenter(traingen)

    # Create the callbacks
    callbacks = [
        ModelCheckpoint(utils.get_model_path(model.run_id),
                        monitor="val_loss",
                        save_best_only=False,
                        save_weights_only=True,
                        mode="min",
                        verbose=1),
        ModelCheckpoint(utils.get_model_path(model.run_id + "_f1"),
                        monitor="val_f1_loss",
                        save_best_only=True,
                        save_weights_only=True,
                        mode="min",
                        verbose=1),
        Plotter(monitor="loss",
                scale="linear",
                plot_during_train=plot,
                save_to_file=utils.get_plot_path(model.run_id),
                block_on_end=False)
    ]

    # Train the model
    history = model.fit_generator(
        traingen,
        steps_per_epoch=5 if args.debug else traingen.steps_per_epoch,
        epochs=epochs,
        verbose=1,
        callbacks=callbacks,
        validation_data=valgen,
        validation_steps=5 if args.debug else valgen.steps_per_epoch)

    # Log the output
    logs = history.history
    epochs = range(len(logs["val_loss"]))
    checkpoint = min(epochs, key=lambda i: logs["val_loss"][i])
    best_val_loss, best_val_f1 = logs["val_loss"][checkpoint], logs[
        "val_f1_loss"][checkpoint]
    logging.info("LOSS CHECKPOINTED -- Loss: {} -- F1: {}".format(
        best_val_loss, best_val_f1))

    checkpoint = min(epochs, key=lambda i: logs["val_f1_loss"][i])
    best_val_loss, best_val_f1 = logs["val_loss"][checkpoint], logs[
        "val_f1_loss"][checkpoint]
    logging.info("ACC CHECKPOINTED -- Loss: {} -- F1: {}".format(
        best_val_loss, best_val_f1))
Example #6
0
from data import HomeCreditData

logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)

parser = argparse.ArgumentParser()
parser.add_argument('--data', default='../input/', help="Path to the data")
parser.add_argument(
    '--train', action='store_true', help="Runs the script in train mode")
parser.add_argument(
    '--test', action='store_true', help="Runs the script in test mode")

MODEL = models.SNNModel
RUN_ID = "snn"
SEED = 42
utils.set_random_seed(SEED)
SPLIT_SEED = utils.get_random_seed()


# TODO: Add ROC AUC stateful metric to pyjet so we don't need the validate
# function and can plot the roc-auc over time
# TODO: add more logging to these functions s we can see what's going on
def train_model(model: SLModel,
                trainset: NpDataset,
                valset: NpDataset,
                epochs=5,
                batch_size=32):

    # Create the generators
    logging.info("Training model for {} epochs and {} batch size".format(
        epochs, batch_size))
    logging.info("Flowing the train and validation sets")
Example #7
0
    def sample_by_nodes(self,
                        sampled_num,
                        rule="random",
                        keep_consistent_nodes=False):
        """
        sample some nodes to construct a sub-network.
        :param sampled_num:
        :param keep_consistent_nodes: whether making the sampled nodes consistent by re-sorting the nodesID.
        :param rule: sampling rule. random: randomly sample all sampled_nodes;
                                            extend: randomly sample one root-node and then extend to sampled_nodes.
        :return: a sub-network with sampled_nodes and corresponding edges.
        """
        logger.info(
            'Net sampling: sample nodes to construct a sub-network ...')
        logger.info(
            "\t\t sampled_nodes = {}, sample_rule = {}, keep_consistent_nodes = {}"
            .format(sampled_num, rule, keep_consistent_nodes))
        logger.info("\t\t origin_node_size = {}".format(self.get_nodes_size()))
        assert sampled_num <= self.get_nodes_size(), "error, {} > {}".format(
            sampled_num, self.get_nodes_size())

        time_start = time.time()
        origin_nodes_list = list(self.nodes)

        if rule == "random":
            sampled_nodes_set = set(
                shuffle(origin_nodes_list,
                        random_state=utils.get_random_seed())[0:sampled_num])
            # random.shuffle(origin_nodes_list)
            # sampled_nodes_set = set(origin_nodes_list[0:sampled_num])
        elif rule == "extend":
            sampled_nodes_set = set()
            extend_nodes_list = []
            origin_nodes_set = set(origin_nodes_list)
            while len(sampled_nodes_set) < sampled_num:
                if len(extend_nodes_list) == 0:
                    origin_nodes_set = origin_nodes_set - sampled_nodes_set
                    root = random.choice(
                        shuffle(list(origin_nodes_set),
                                random_state=utils.get_random_seed()))
                    sampled_nodes_set.add(root)
                    extend_nodes_list.append(root)
                    if len(sampled_nodes_set) >= sampled_num:
                        break
                root = extend_nodes_list.pop(0)
                for v in self._nodes_adjlist[root]:
                    if v not in sampled_nodes_set:
                        sampled_nodes_set.add(v)
                        extend_nodes_list.append(v)
                        if len(sampled_nodes_set) >= sampled_num:
                            break
        else:
            logger.error(
                "Unknown sampling rule: '%s'.  Valid rules: 'random', 'extend'."
                % rule)

        sampled_net = Graph(isdirected=self._isdirected,
                            isweighted=self._isweighted,
                            self_looped=self._self_looped)

        for node in sampled_nodes_set:
            sampled_net.add_single_node(node)
            for v in self._nodes_adjlist[node]:
                if v in sampled_nodes_set:
                    sampled_net.add_single_edge(node, v)
        if keep_consistent_nodes:
            sampled_net.make_consistent()

        logger.info("\t\t sampled_net edges_size = {}".format(
            sampled_net.get_edges_size()))
        logger.info(
            'Net sampling: sample nodes completed in {}s'.format(time.time() -
                                                                 time_start))
        return sampled_net
Example #8
0
    def split_by_edges(self,
                       train_ratio=0,
                       keep_static_nodes=True,
                       keep_consistent_nodes=False):
        """
        split the network to two parts: one has train_ratio edges, one has 1-train_ratio edges.
        :param train_ratio:
        :param keep_static_nodes: whether the splited two parts keep the same node set as the original network.
        :param keep_consistent_nodes: whether making the splited nodes consistent by re-sorting the nodesID.
        :return: train_netwrok: with train_ratio edges, eval_netwrok: with 1-train_ratio edges.
        """
        logger.info(
            'Net split: spliting edges to train_network and eval_network ...')
        logger.info(
            "\t\t train_ratio = {}, keep_static_nodes = {}, keep_consistent_nodes = {}"
            .format(train_ratio, keep_static_nodes, keep_consistent_nodes))
        logger.info("\t\t origin_edges_size = {}".format(
            self.get_edges_size()))
        time_start = time.time()
        edges_list = self.edges
        if not self._isdirected:
            edges_set = set()
            for source, target in edges_list:
                if (source,
                        target) not in edges_set and (target,
                                                      source) not in edges_set:
                    edges_set.add((source, target))
            edges_list = list(edges_set)

        train_edges_list, test_edges_list = train_test_split(
            edges_list,
            test_size=1.0 - train_ratio,
            random_state=utils.get_random_seed(),
            shuffle=True)
        # perm = np.arange(len(edges_list))
        # random.shuffle(perm)
        # edges_list_t = [edges_list[i] for i in perm]
        # edges_list = edges_list_t
        # # split for train:
        # train_edges_size = int(np.ceil(len(edges_list)*train_ratio))
        # assert train_edges_size <= len(edges_list), "error, {} > {}".format(train_edges_size, len(edges_list))
        #
        # train network:
        train_net = Graph(isdirected=self._isdirected,
                          isweighted=self._isweighted,
                          self_looped=self._self_looped)
        # for source, target in edges_list[0:train_edges_size]:
        for source, target in train_edges_list:
            train_net.add_single_edge(source, target)
        if keep_static_nodes:
            for v in self.nodes:
                train_net.add_single_node(v)
        elif keep_consistent_nodes:
            train_net.make_consistent()
        logger.info("\t\t train_edges_size = {}".format(
            train_net.get_edges_size()))

        # eval network:
        eval_net = Graph(isdirected=self._isdirected,
                         isweighted=self._isweighted,
                         self_looped=self._self_looped)
        # for source, target in edges_list[train_edges_size:]:
        for source, target in test_edges_list:
            eval_net.add_single_edge(source, target)
        if keep_static_nodes:
            for v in self.nodes:
                eval_net.add_single_node(v)
        elif keep_consistent_nodes:
            eval_net.make_consistent()
        logger.info("\t\t eval_edges_size = {}".format(
            eval_net.get_edges_size()))
        logger.info(
            'Net split: split edges completed in {}s'.format(time.time() -
                                                             time_start))
        return train_net, eval_net
def validate_arguments(args):
    # Initialize the logging
    if args.verbose:
        logging.basicConfig(level=logging.DEBUG,
                            format="%(levelname)s: %(message)s")
    else:
        logging.basicConfig(level=logging.INFO,
                            format="%(levelname)s: %(message)s")

    errors = []
    # Partitioning
    if not args.assignments:
        if args.nparts == None:
            errors.append("--nparts is required when not using --assignments")
        if not args.tpwgts:
            errors.append("--tpwgts is required when not using --assignments")
        if args.random_assignments:
            if args.nparts != None and args.nparts <= 0:
                errors.append("The --nparts value must be strictly positive")
        else:
            if args.nparts != None and args.nparts <= 1:
                errors.append("The --nparts value must be greater than 1")
        if args.ubvec != None and args.nparts == None:
            errors.append(
                "The --ubvec option is only available with the --nparts option"
            )
        if args.ubvec != None and args.ubvec <= 1.0:
            errors.append("The --ubvec value must be greater than 1.0")
        if args.tpwgts and not args.nparts:
            errors.append(
                "The --tpwgts option is only available with the --nparts option"
            )
        if args.tpwgts and args.nparts and len(args.tpwgts) != args.nparts:
            errors.append(
                "The --tpwgts option requires a list of {} values (one value per partition)"
                .format(args.nparts))
        if args.tpwgts and not math.isclose(
                sum(args.tpwgts), 1.0, rel_tol=1e-5):
            errors.append(
                "The sum of --tpwgts values must be 1.0 (currently {})".format(
                    sum(args.tpwgts)))
    # Clustering
    if args.scheme == 'communities':
        if args.clustering and args.clustering == 'graphviz' and args.cluster_seed:
            errors.append(
                "The --cluster-seed option is not available with the graphviz clustering method"
            )
        if args.clustering and args.clustering != 'oslom2' and args.infomap_calls:
            errors.append(
                "The --infomap-calls option is only available with the oslom2 clustering method"
            )
        if args.cut_edge_length:
            errors.append(
                "The --cut-edge-length option is only available with the cut-edges scheme"
            )
        if args.cut_edge_node_size:
            errors.append(
                "The --cut-edge-node-size option is only available with the cut-edges scheme"
            )
    # Cut edges
    if args.scheme == 'cut-edges':
        if args.cut_edge_length and (args.cut_edge_length < 0
                                     or args.cut_edge_length > 100):
            errors.append(
                "The --cut-edge-length value must be between 0 and 100")
        if args.clustering:
            errors.append(
                "The --clustering option is only available with the communities scheme"
            )
        if args.cluster_seed:
            errors.append(
                "The --cluster-seed option is only available with the communities scheme"
            )
        if args.infomap_calls:
            errors.append(
                "The --infomap-calls option is only available with the communities scheme"
            )
    # Layout
    if args.layout != 'linlog' and args.force:
        errors.append(
            "The --force option is only available with the linlog layout")
    if not args.video and args.fps:
        errors.append(
            "The --fps option is only available with the --video option")
    if not args.video and args.padding_time:
        errors.append(
            "The --padding-time option is only available with the --video option"
        )
    # Image style
    if args.node_size and args.node_size_mode != 'fixed':
        errors.append(
            "The --node-size option is only available with --node-size-mode fixed"
        )
    if args.min_node_size and args.node_size_mode == 'fixed':
        errors.append(
            "The --min-node-size option is only available with --node-size-mode centrality or highlight-new"
        )
    if args.max_node_size and args.node_size_mode == 'fixed':
        errors.append(
            "The --max-node-size option is only available with --node-size-mode centrality or highlight-new"
        )

    # Print errors and exit if any error found
    if errors:
        for error in errors:
            logging.error(error)
        sys.exit(1)

    # Set default values
    if args.layout == 'springbox':
        if not args.attraction:
            args.attraction = 0.012
        if not args.repulsion:
            args.repulsion = 0.024
    elif args.layout == 'linlog':
        if not args.attraction:
            args.attraction = 0.0
        if not args.repulsion:
            args.repulsion = -1.2
    if not args.fps:
        args.fps = 8
    if not args.padding_time:
        args.padding_time = 2.0
    if not args.node_size:
        args.node_size = 20
    if not args.min_node_size:
        args.min_node_size = 20
    if not args.min_node_size:
        args.max_node_size = 60
    if args.scheme == 'communities':
        if not args.clustering:
            args.clustering = 'oslom2'
        if not args.cluster_seed:
            args.cluster_seed = utils.get_random_seed()
        if not args.infomap_calls:
            args.infomap_calls = 0
    if args.scheme == 'cut-edges':
        if not args.cut_edge_length:
            args.cut_edge_length = 50
        if not args.cut_edge_node_size:
            args.cut_edge_node_size = 5
    if not args.cut_edge_length:
        args.cut_edge_length = 0  # to avoid passing None to Graphstream
    if not args.ubvec:
        args.ubvec = 1.0
def parse_arguments():
    parent_parser = argparse.ArgumentParser(
        description=
        '''Create animation of network partition assignments. First processes
        network file and assignments into DGS file format, then uses
        GraphStream to animate each frame, finally frames are stitched together.'''
    )
    parent_parser.add_argument("-v",
                               "--verbose",
                               action="store_true",
                               help="increase output verbosity")

    # Required arguments
    required_group = parent_parser.add_argument_group('required arguments')
    required_group.add_argument('-g',
                                '--graph',
                                required=True,
                                help='input graph file')
    required_group.add_argument('-f',
                                '--format',
                                choices=['metis', 'edgelist', 'gml'],
                                required=True,
                                help='format of the input graph file')
    required_group.add_argument('-o',
                                '--output_dir',
                                required=True,
                                help='output directory')
    # Input/output files
    io_group = parent_parser.add_argument_group('input/outputs options')
    order_group = io_group.add_mutually_exclusive_group()
    order_group.add_argument('-n', '--order', help='node order list')
    order_group.add_argument('--order-seed',
                             type=int,
                             default=utils.get_random_seed(),
                             metavar='S',
                             help='seed for ordering nodes')
    io_group.add_argument('--filter',
                          help='filter node list (<= 0 to exclude node)')
    io_group.add_argument(
        '--node-weight',
        default='weight',
        metavar='W',
        help=
        'attribute used to determine the weight of each node (default=\'weight\')'
    )
    io_group.add_argument(
        '--edge-weight',
        default='weight',
        metavar='W',
        help=
        'attribute used to determine the weight of each edge (default=\'weight\')'
    )
    # Partitioning
    partitioning_group = parent_parser.add_argument_group(
        'partitioning options')
    partitioning_type_group = partitioning_group.add_mutually_exclusive_group()
    partitioning_type_group.add_argument('-a',
                                         '--assignments',
                                         help='partition assignments list')
    partitioning_type_group.add_argument("--random-assignments",
                                         action="store_true",
                                         help="generate random assignments")
    partitioning_group.add_argument(
        '--partition-seed',
        type=int,
        default=utils.get_random_seed(),
        metavar='S',
        help='seed for random assignments partitioning')
    partitioning_group.add_argument(
        '--nparts',
        type=int,
        metavar='P',
        help='number of partitions to generate with METIS')
    partitioning_group.add_argument(
        '--ubvec',
        type=float,
        metavar='U',
        help=
        'allowed load imbalance among partitions in METIS (default=1.001). The load imbalance must be greater than 1.0, 1.2 indicates a desired maximum load imbalance of 20 percents.'
    )
    partitioning_group.add_argument(
        '--tpwgts',
        nargs='+',
        type=float,
        metavar='T',
        help=
        'desired weight for each partition in METIS. The sum of tpwgts[] must be 1.0'
    )
    partitioning_group.add_argument(
        '--show-partitions',
        nargs='+',
        type=int,
        help=
        'partitions to be displayed (based on nparts or partition values in assignments list)'
    )
    # Layout
    layout_group = parent_parser.add_argument_group('layout options')
    layout_group.add_argument('--layout',
                              '-l',
                              choices=['springbox', 'linlog'],
                              default='springbox',
                              help='graph layout')
    layout_group.add_argument('--layout-seed',
                              type=int,
                              default=utils.get_random_seed(),
                              metavar='S',
                              help='seed for graph layout')
    layout_group.add_argument(
        '--force',
        type=float,
        metavar='F',
        help='force for linlog graph layout (default=3.0)')
    layout_group.add_argument(
        '--attraction',
        type=float,
        metavar='A',
        help=
        'attraction factor for graph layout (default=0.06 for springbox, default=0.0 for linlog)'
    )
    layout_group.add_argument(
        '--repulsion',
        type=float,
        metavar='R',
        help=
        'repulsion factor for graph layout (default=0.024 for springbox, default=-1.2 for linlog)'
    )
    # Coloring
    coloring_group = parent_parser.add_argument_group('coloring options')
    color_mode_group = coloring_group.add_mutually_exclusive_group()
    color_mode_group.add_argument(
        '--color-scheme',
        choices=['pastel', 'primary-colors'],
        default='pastel',
        help='color scheme used by gvmap (default=pastel)')
    color_mode_group.add_argument('--node-color',
                                  metavar='C',
                                  help='single color to use for all nodes')
    coloring_group.add_argument('--color-seed',
                                type=int,
                                default=utils.get_random_seed(),
                                metavar='S',
                                help='seed for coloring with gvmap')
    coloring_group.add_argument(
        '--shadow-color',
        metavar='C',
        help=
        'color of the shadow to use for highlighted nodes. Use with --node-size-mode highlight-new'
    )
    # Image style
    styling_group = parent_parser.add_argument_group('image options')
    styling_group.add_argument(
        '--node-size-mode',
        choices=['fixed', 'centrality', 'highlight-new'],
        default='fixed',
        help='node size mode')
    styling_group.add_argument(
        '--node-size',
        type=int,
        metavar='S',
        help=
        'node size in pixels (default=20). Use with --node-size-mode fixed.')
    styling_group.add_argument(
        '--min-node-size',
        type=int,
        metavar='S',
        help=
        'minimum node size in pixels (default=20). Use with --node-size-mode centrality or highlight-new.'
    )
    styling_group.add_argument(
        '--max-node-size',
        type=int,
        metavar='S',
        help=
        'maximum node size in pixels (default=60). Use with --node-size-mode centrality or highlight-new.'
    )
    styling_group.add_argument('--edge-size',
                               type=int,
                               default=1,
                               metavar='S',
                               help='edge size in pixels (default=1)')
    styling_group.add_argument('--label-size',
                               type=int,
                               default=10,
                               metavar='S',
                               help='label size in points (default=10)')
    styling_group.add_argument(
        '--label-type',
        choices=['id', 'order'],
        default='id',
        metavar='T',
        help='type of node labels (node id or node order)')
    styling_group.add_argument('--border-size',
                               type=int,
                               default=1,
                               metavar='S',
                               help='border size between tiles (default=1)')
    styling_group.add_argument('--width',
                               type=int,
                               default=1280,
                               metavar='W',
                               help='image width (default=1280)')
    styling_group.add_argument('--height',
                               type=int,
                               default=720,
                               metavar='H',
                               help='image height (default=720)')
    # Video
    video_group = parent_parser.add_argument_group('video options')
    video_group.add_argument('--video',
                             help='output video file with tiled frames')
    video_group.add_argument('--fps',
                             type=int,
                             help='frames per second (default=8)')
    video_group.add_argument(
        '--padding-time',
        type=float,
        help=
        'padding time in seconds to add extra frames at the end of the video (default=2.0)'
    )
    # Pdf
    pdf_group = parent_parser.add_argument_group('pdf options')
    pdf_group.add_argument(
        '--pdf',
        type=int,
        default=20,
        metavar='P',
        help='Percentage of frames to convert to pdf (default=20)')

    # Scheme
    scheme_group = parent_parser.add_argument_group('scheme option')
    scheme_group.add_argument(
        '-s',
        '--scheme',
        choices=['communities', 'cut-edges'],
        default='communities',
        help=
        'scheme to highlight either communities or cut edges (default=communities)'
    )

    # Clustering
    clustering_group = parent_parser.add_argument_group(
        'communities options (only for scheme=communities)')
    clustering_group.add_argument('--clustering',
                                  '-c',
                                  choices=['oslom2', 'infomap', 'graphviz'],
                                  help='clustering method (default=oslom2)')
    clustering_group.add_argument('--cluster-seed',
                                  type=int,
                                  metavar='S',
                                  help='seed for clustering')
    clustering_group.add_argument(
        '--infomap-calls',
        type=int,
        metavar='C',
        help=
        'number of times infomap is called within oslom2. Good values are between 1 and 10 (default=0)'
    )

    # Cut edges
    cut_edges_group = parent_parser.add_argument_group(
        'cut-edges options (only for scheme=cut-edges)')
    cut_edges_group.add_argument(
        '--cut-edge-length',
        type=int,
        metavar='L',
        help='length of cut edges as percentage of original length (default=50)'
    )
    cut_edges_group.add_argument(
        '--cut-edge-node-size',
        metavar='S',
        help='size of the nodes attached to cut edges (default=10)')

    return parent_parser.parse_args()