예제 #1
0
 def order_unigrams_and_bigrams(self):
     self.fS = {}
     self.features = {}
     for e, Se in itertools.izip(self.S, self.Se_list):
         e_feat = self.create_ngrams(Se, 1)
         e_feat.update(self.create_ngrams(Se, 2))
         self.features[e] = e_feat
     self.fS = set(rasengan.flatten((
         list(v) for v in self.features.itervalues())))
     return
예제 #2
0
def catpeople_stats():
    ''' Show Statistics about the CatPeople Dataset
    '''
    (url_mention, TM, E, cat_folds, cat2url, performance_aggregator,
     _) = setup()
    print 'Total Number of entities', len(E)
    print 'Total Number of Categories', len(cat2url)
    print 'Total Number of URLs', len(
        set(rasengan.flatten(cat2url.itervalues())))
    print 'Total Number of mentions', sum(len(url_mention[e]) for e in E)
    return
예제 #3
0
def process_soup(page_html):
    soup = BeautifulSoup(page_html, 'html.parser')
    data = []
    cur_div = soup.body.div
    assert cur_div is not None
    while cur_div is not None:
        try:
            style = Style(cur_div['style'])
            span_style = Style(cur_div.span['style'])
            all_span_strings = rasengan.flatten(
                list([list(e.stripped_strings) for e in cur_div.children]))
            data.append((style, span_style, all_span_strings))
        except:
            pass
        cur_div = cur_div.next_sibling
    return (Style(soup.span['style']), data)
예제 #4
0
def random_perf(g, node_set, train_node_idx, label_list):
    _, test_data = get_train_test_data(g, node_set, train_node_idx)
    # -------------------------------------------------------------------- #
    # Figure out how many times does a test president/vicepresident really #
    # has multiple parties/labels? Count this amongst test node in 10 fold #
    # -------------------------------------------------------------------- #
    # node2label = defaultdict(list)
    # for label, nodes in test_data.iteritems():
    #     for node in nodes:
    #         node2label[node].append(label)
    # print "Test Nodes With Multiple Labels", \
    #     sum((len(labels) > 2) for labels in node2label.values())
    node_rating = {}
    for node in rasengan.flatten(test_data.values()):
        node_rating[node] = {}
    return mad_rating(node_rating, label_list, test_data)
예제 #5
0
def get_cautious_update_f(updates, lr, x, y, cost):
    ''' Sometimes the theano optimizer may layout the updates improperly.
    so that some parameters get updated in place before their effects on
    other gradients and updates have been computed. This can happen during
    nested scans for example. One simple strategy to overcome this problem
    is to stage the `troublemaking` variables into their own staging area.
    Update the staging area and then to copy the updates over. This way updates
    won't suffer from a race condition.

    Params
    ------
    updates : The update expression list of tuples, First element of tuple
      is the paraemter to be updated and then second one is the update
      expression.
    lr      : The learning rate.
    x       : The input sentence tv.
    y       : The gold output tv.
    cost    : The cost tv.
    Returns
    -------
    A function for updating the variables.
    '''
    print 'Using Cautious Updates'
    params = [e[0] for e in updates]
    updates = [e[1] for e in updates]
    staging_area = [theano.shared(e.get_value()) for e in params]
    update_stage_1 = theano.function(
        flatten([lr, x, y]),
        cost,
        updates=zip(staging_area, updates),
        name='f_update_stage_1')
    update_stage_2 = theano.function(
        [], [], updates=zip(params, staging_area),
        name='f_update_stage_2')

    # Instead of using the lambda notation one can define a sequential function.
    def f_update(p1, p2, p3):
        update_stage_1(p1, p2, p3)
        return update_stage_2()

    # f_update = (lambda p1, p2, p3:
    #             (lambda _: update_stage_2())(
    #                 update_stage_1(p1, p2, p3)))
    f_update.name = 'f_update'
    return f_update
예제 #6
0
def randomwalk_perf(adj, labels, train_node_idx, test_data, label_list):
    node_rating = {}
    adj_list = []
    adj = np.maximum(adj, adj.T)
    vertex_to_label = []
    min_label = min(label_list)
    for row in labels:
        vertex_to_label.append(list(row.nonzero()[0] + min_label))
    for row in adj:
        adj_list.append(list(row.nonzero()[0]))
    # --------------------------------------------------------------------------- #
    # For Each query point(test vertex) Create a ranked list of potential labels. #
    # --------------------------------------------------------------------------- #
    for node in rasengan.flatten(test_data.values()):
        node_rating[node] = rw_get_label_rating(node, adj_list,
                                                vertex_to_label,
                                                train_node_idx)
    return mad_rating(node_rating, label_list, test_data)
def get_train_x(featset_name,
                predicate_name,
                predicate_idx,
                train_idx,
                train_y,
                use_only_positive_feat=False,
                create_conjunctive_feat=False):
    if featset_name == 's_features_backoff':
        backoff_feat_idx = backoff_feat_name_to_idx[get_backoff_feature_name(
            predicate_name)]
        train_x, feat_name = get_s_features_backoff(backoff_feat_idx)
    elif featset_name == 's_features_backoff_nodoc':
        backoff_feat_idx = backoff_feat_name_to_idx[get_backoff_feature_name(
            predicate_name)]
        bkoff_part, bkoff_names = get_s_features_backoff(backoff_feat_idx)
        nodoc_part, nodoc_names = get_s_features_nodoc(predicate_idx)
        train_x = scipy.sparse.hstack([bkoff_part, nodoc_part])
        feat_name = (bkoff_names + nodoc_names)
    elif featset_name == 's_features_nodoc':
        train_x, feat_name = get_s_features_nodoc(predicate_idx)
    elif featset_name == 's_features_doc':
        train_x, feat_name = get_s_features_doc(predicate_idx)
    elif featset_name == 'random':
        return None
    # print [feat_name[_] for _ in scipy.sparse.find(train_x[41935])[1]]
    # pdb.set_trace()
    assert train_x.shape[1] == len(feat_name)
    idx_to_use = ([e for e, _ in zip(train_idx, train_y)
                   if _ == 1] if use_only_positive_feat else train_idx)

    non_zero_train_col_set = set(
        rasengan.flatten(scipy.sparse.find(train_x[idx_to_use])[1]))
    non_zero_train_col = list(non_zero_train_col_set)
    train_x = train_x[:, non_zero_train_col]
    feat_name = [feat_name[i] for i in non_zero_train_col_set]
    # print [feat_name[_] for _ in scipy.sparse.find(train_x[41935])[1]]
    # pdb.set_trace()
    assert train_x.shape[1] == len(feat_name)
    if not create_conjunctive_feat:
        return train_x, feat_name
    train_x_2, feat_name_2 = make_conjunctive_feat(train_x, feat_name)
    # print [feat_name[_] for _ in scipy.sparse.find(train_x[41935])[1]]
    # pdb.set_trace()
    return train_x_2, feat_name_2
예제 #8
0
def get_cautious_update_f(updates, lr, x, y, cost):
    ''' Sometimes the theano optimizer may layout the updates improperly.
    so that some parameters get updated in place before their effects on
    other gradients and updates have been computed. This can happen during
    nested scans for example. One simple strategy to overcome this problem
    is to stage the `troublemaking` variables into their own staging area.
    Update the staging area and then to copy the updates over. This way updates
    won't suffer from a race condition.

    Params
    ------
    updates : The update expression list of tuples, First element of tuple
      is the paraemter to be updated and then second one is the update
      expression.
    lr      : The learning rate.
    x       : The input sentence tv.
    y       : The gold output tv.
    cost    : The cost tv.
    Returns
    -------
    A function for updating the variables.
    '''
    print('Using Cautious Updates')
    params = [e[0] for e in updates]
    updates = [e[1] for e in updates]
    staging_area = [theano.shared(e.get_value()) for e in params]
    update_stage_1 = theano.function(flatten([lr, x, y]),
                                     cost,
                                     updates=zip(staging_area, updates),
                                     name='f_update_stage_1')
    update_stage_2 = theano.function([], [],
                                     updates=zip(params, staging_area),
                                     name='f_update_stage_2')

    # Instead of using the lambda notation one can define a sequential function.
    def f_update(p1, p2, p3):
        update_stage_1(p1, p2, p3)
        return update_stage_2()

    # f_update = (lambda p1, p2, p3:
    #             (lambda _: update_stage_2())(
    #                 update_stage_1(p1, p2, p3)))
    f_update.name = 'f_update'
    return f_update
예제 #9
0
 def create_ngrams(self, Se, n, predicate=None, verbose=False):
     # NOTE: We are implementing the Binarized MNB model where we dont
     # multiple count features that occur in a single
     # document/utterange/sentence.
     assert self.binarized_mnb
     features = {}
     max_tok = self.max_tok
     total_iter = 0
     if predicate is None:
         predicate = lambda x: True
     for mention in Se:
         tokens = (rasengan.flatten(mention[0])
                   if self.entire
                   else mention[0][mention[1]])
         if n == 1:
             for t in tokens:
                 total_iter += 1
                 if predicate(t):
                     features[t] = None
         elif n == 2:
             # This is the BOS token idx
             pt = self.BOS
             for ct in tokens:
                 total_iter += 1
                 bigram_feature = max_tok * (1 + pt) + ct
                 pt = ct
                 if predicate(bigram_feature):
                     features[bigram_feature] = None
         elif n == 12:
             pt = self.BOS
             for ct in tokens:
                 total_iter += 1
                 bigram_feature = max_tok * (1 + pt) + ct
                 pt = ct
                 if predicate(bigram_feature):
                     features[bigram_feature] = None
                 if predicate(ct):
                     features[ct] = None
         else:
             raise NotImplementedError
     if verbose:
         print 'Total_iter', total_iter
     return features
예제 #10
0
def compile_update_fn(x, y, lr, cost, updates, stack_config, grads):
    ''' Compile the inputs and outputs to produce an update functions.
    Params
    ------
    x       :
    y       :
    lr      :
    cost    :
    updates :
    '''
    if y is not None:
        f_cost = theano.function(flatten([x, y]), cost, name='f_cost')
        f_grad = theano.function(flatten([x, y]), grads, name='f_grad')
    else:
        f_intermediate = theano.function(inputs=flatten([x]),
                                         outputs=stack_config.stack_ns.pred_y,
                                         name='f_intermediate')

        def f_cost(str1, str2):
            '''
            The stack_config['endpoint'] contains a func method that has three
            inputs. it takes in a
            1. stringA
            2. stringB
            3. tensor.
            Params
            ------
            *args : The args right now are designed to be just a tuple
                    of two integer sequences. args[0] is the left integer
                   sequence. representing a string.
            Returns
            -------
            '''
            assert str1.ndim == 2
            cols = (str1.shape[1] - 1)
            assert cols % 2 == 0
            assert str2.ndim == 1
            intermediate_tensor = f_intermediate(str1).astype('float64')
            return stack_config['endpoint'].func(str1[1:, int(cols / 2)], str2,
                                                 intermediate_tensor)

        f_grad_intermediate = theano.function(inputs=flatten([x, cost]),
                                              outputs=grads,
                                              name='f_grad_intermediate')

        def f_grad(str1, str2):
            assert str1.ndim == 2
            cols = (str1.shape[1] - 1)
            assert cols % 2 == 0
            assert str2.ndim == 1
            intermediate_tensor = f_intermediate(str1).astype('float64')
            intermediate_grad = numpy.array(stack_config['endpoint'].grad(
                str1[1:, int(cols / 2)], str2, intermediate_tensor),
                                            dtype='float32')
            return f_grad_intermediate(str1, intermediate_grad)

        pass

    if stack_config['cautious_update']:
        f_update = get_cautious_update_f(updates, lr, x, y, cost)
    else:
        on_unused_input = ('ignore' if updates == [] else 'raise')
        if y is not None:
            f_update = theano.function(
                # Input is the learning rate, and supervised example.
                flatten([lr, x, y]),
                # Output is the cost/loss that has to be minimized.
                cost,
                updates=updates,
                name='f_update',
                on_unused_input=on_unused_input)
        else:
            f_update_intermediate = theano.function(
                flatten([lr, x, cost]), [],
                updates=updates,
                name='f_update_intermediate',
                on_unused_input=on_unused_input)

            def f_update(lr, str1, str2):
                ''' f_update in this case receives a tuple of
                Params
                ------
                *args :
                Returns
                -------
                '''
                assert str1.ndim == 2
                cols = (str1.shape[1] - 1)
                assert cols % 2 == 0
                assert str2.ndim == 1
                intermediate_tensor = f_intermediate(str1).astype('float64')
                intermediate_grad = numpy.array(stack_config['endpoint'].grad(
                    str1[1:, int(cols / 2)], str2, intermediate_tensor),
                                                dtype='float32')
                f_update_intermediate(lr, str1, intermediate_grad)
                return (intermediate_tensor, intermediate_grad)

    if y is not None:
        f_classify = theano.function(inputs=flatten(
            [stack_config.stack_ns.absolute_input_tv]),
                                     outputs=stack_config.stack_ns.pred_y,
                                     name='f_classify')
        f_update.individual_updates = {}
        for (_, u) in updates:
            f_update.individual_updates[u.wrt_name] = theano.function(
                flatten([lr, x, y]), u, name='f_update')
    else:

        def f_classify(in_str):
            assert in_str.ndim == 2
            cols = (in_str.shape[1] - 1)
            assert cols % 2 == 0
            return stack_config['endpoint'].decode(
                in_str[1:, int(cols / 2)],
                f_intermediate(in_str).astype('float64'))

    nsf = Namespace()
    nsf.f_cost = f_cost
    nsf.f_update = f_update
    nsf.f_classify = f_classify
    nsf.f_grad = f_grad
    return nsf
예제 #11
0
def main():
    import argparse
    arg_parser = argparse.ArgumentParser(description='')
    arg_parser.add_argument('--seed', default=0, type=int, help='Default={0}')
    arg_parser.add_argument('--emb_pkl_fn',
                            default='data/demonstrate_similarity_idea.emb.pkl',
                            type=str)
    arg_parser.add_argument(
        '--feat_file',
        default='data/random/details/89c0c894.American_women_writers',
        type=str)
    arg_parser.add_argument('--ctag', default=None, type=int)
    arg_parser.add_argument('--mode_count', default=5, type=int)
    arg_parser.add_argument('--method',
                            default='fast_relax',
                            type=str,
                            choices=[
                                'brute_force', 'fast_relax', 'annealed_gibbs',
                                'maxproduct-bp', 'variational_inference',
                                'dc_programming'
                            ])
    args = arg_parser.parse_args()
    import random
    random.seed(args.seed)
    numpy.random.seed(args.seed)
    cfg.mode_count = args.mode_count
    tags_to_remove = defaultdict(list)
    with rasengan.tictoc('Loading pkl'):
        embeddings = pkl.load(open(args.emb_pkl_fn))
        if cfg.introduce_NULL_embedding:
            embeddings[cfg.NULL_KEY] = numpy.zeros(
                next(embeddings.itervalues()).shape)
    with rasengan.debug_support():
        for mode_idx in range(cfg.mode_count):
            print 'mode_idx=', mode_idx
            entity_tags = {}
            entities = []
            for row in open(args.feat_file):
                _e, _tags = [e.strip() for e in row.strip().split('|||')]
                entities.append(_e)
                entity_tags[_e] = set([
                    t.lower()
                    for t in (e.strip().split(':')[0] for e in _tags.split())
                    if t.lower() in embeddings
                ])
            total_tags = set(
                rasengan.flatten([list(e) for e in entity_tags.values()]))
            assert all(e in embeddings for e in total_tags)
            print(
                'For each of these people our goal is to select one word.'
                ' That word should be as similar to other words picked for other'
                ' entities as possible')

            problem = rasengan.OrderedDict_Indexable_By_StringKey_Or_Index()
            for (a, b) in entity_tags.items():
                b = list(b)
                print 'Entity: ', a, 'tags to remove: ', tags_to_remove[a]
                for ttr in tags_to_remove[a]:
                    tolerant_remove(b, ttr)
                if cfg.introduce_NULL_embedding and cfg.NULL_KEY not in b:
                    b.append(cfg.NULL_KEY)
                # print '%-25s' % a, '|||', ', '.join(b)
                problem[a] = DataFrame(data=numpy.concatenate(
                    [(scale_to_unit(embeddings[e])
                      if cfg.scale_to_unit else embeddings[e])[None, :]
                     for e in b],
                    axis=0),
                                       index=b)
            if args.ctag is None:
                initial_assignment = dict(
                    (__a, 0) for __b, __a in enumerate(entities))
            else:
                ctag = 'war'.split()[args.ctag]
                initial_assignment = dict(
                    (__e,
                     (cfg.NULL_KEY if ctag not in entity_tags[__e] else ctag))
                    for __e in entities)
            print 'Initial chosen tags::', chosen_tags(problem,
                                                       initial_assignment)
            initial_objective = dp_objective_efficient_impl(
                problem, initial_assignment)
            print 'initial_objective=', initial_objective
            assert numpy.isclose(
                dp_objective_naive_impl(problem, initial_assignment),
                initial_objective)
            final_assignment = optimize_assignment(problem,
                                                   initial_assignment,
                                                   method=args.method)
            final_objective = dp_objective_efficient_impl(
                problem, final_assignment)
            for (fa_entity, fa_tag_idx) in final_assignment.iteritems():
                tags_to_remove[fa_entity].append(
                    liloc(problem[fa_entity], fa_tag_idx).name)
            print 'mode_idx=', mode_idx,
            print 'initial_objective=', initial_objective,
            print 'final_objective=', final_objective,
            print 'Final chosen tags=', chosen_tags(problem, final_assignment)
    return
예제 #12
0
    description='Do a basic featurization of the BBN2 data.')
arg_parser.add_argument('--seed', default=0, type=int, help='Default={0}')
arg_parser.add_argument('--in_fn',
                        default="~/data/tackbp2015bbn2/relational_bbn2.pkl",
                        type=str)
arg_parser.add_argument(
    '--out_fn',
    default="~/data/tackbp2015bbn2/basicfeaturization_relational_bbn2.pkl",
    type=str)
args = arg_parser.parse_args()
vertex_dict, edgelist = pkl.load(open(os.path.expanduser(args.in_fn)))
assert all(e.confidence == '"1.0"' for e in vertex_dict.itervalues())
# The total features
potential_feat = list(
    set(
        rasengan.flatten(
            [e.keys() for _ in vertex_dict.itervalues() for e in _.featsets])))
STRING_FEAT = [e for e in potential_feat if e.endswith('~name')]
other_feat = ['~crime', '~time', '~document']
'''
print [e for e in potential_feat
 if e.endswith('~name')]

print [e for e in potential_feat
 if e.endswith('~type')]

print [e for e in potential_feat
 if e.endswith('~confidence')]

print [e for e in potential_feat
      if not e.endswith('~name')
      and not e.endswith('~type')
예제 #13
0
 labels = s_features[:, predicate_idx]
 I = list(scipy.sparse.find(labels)[0])
 set_I = set(I)
 for featset_name in [
         's_features_nodoc',
         's_features_doc',
 ]:
     for trials in range(5):
         train_idx = IDX_DATA[predicate_name][trials]['train']
         preamble = 'predicate_name=%s trials=%d featset_name=%s ' % (
             predicate_name, trials, featset_name)
         g = get_igraph(predicate_idx, featset_name)
         counter = Counter()
         for origin in train_idx:
             counter.update((e for e in rasengan.flatten(
                 g.random_walk(origin, args.rw_max_step)
                 for walk_idx in range(args.rw_walk_num))
                             if e < total_persons))
             # pr = g.personalized_pagerank(damping=0.85, reset_vertices=[0])
             # print len(pr)
         # A large number of vertices are completely left untouched
         # by the random walks, because of their low tendency to discover
         # new things. Imagine a star like graph.
         # Most of the time you'd reach a spoke, and then turn back to
         # the hub. The rate of going from one hub to the other is too
         # low. So
         _testing_output = [[
             (1 if _i in set_I else 0), _i
         ] for _i in itertools.chain((
             i for i, _ in sorted(
                 counter.items(), key=lambda x: x[1], reverse=True)
예제 #14
0
def compile_update_fn(x, y, lr, cost, updates,
                      stack_config, grads):
    ''' Compile the inputs and outputs to produce an update functions.
    Params
    ------
    x       :
    y       :
    lr      :
    cost    :
    updates :
    '''
    if y is not None:
        f_cost = theano.function(flatten([x, y]), cost, name='f_cost')
        f_grad = theano.function(flatten([x, y]), grads, name='f_grad')
    else:
        f_intermediate = theano.function(
            inputs=flatten([x]),
            outputs=stack_config.stack_ns.pred_y,
            name='f_intermediate')

        def f_cost(str1, str2):
            '''
            The stack_config['endpoint'] contains a func method that has three
            inputs. it takes in a
            1. stringA
            2. stringB
            3. tensor.
            Params
            ------
            *args : The args right now are designed to be just a tuple
                    of two integer sequences. args[0] is the left integer
                   sequence. representing a string.
            Returns
            -------
            '''
            assert str1.ndim == 2
            cols = (str1.shape[1]-1)
            assert cols % 2 == 0
            assert str2.ndim == 1
            intermediate_tensor = f_intermediate(str1).astype('float64')
            return stack_config['endpoint'].func(
                str1[1:, cols/2], str2, intermediate_tensor)

        f_grad_intermediate = theano.function(
            inputs=flatten([x, cost]),
            outputs=grads,
            name='f_grad_intermediate')

        def f_grad(str1, str2):
            assert str1.ndim == 2
            cols = (str1.shape[1]-1)
            assert cols % 2 == 0
            assert str2.ndim == 1
            intermediate_tensor = f_intermediate(str1).astype('float64')
            intermediate_grad = numpy.array(
                stack_config['endpoint'].grad(
                    str1[1:, cols/2], str2, intermediate_tensor),
                dtype='float32')
            return f_grad_intermediate(str1, intermediate_grad)
        pass

    if stack_config['cautious_update']:
        f_update = get_cautious_update_f(updates, lr, x, y, cost)
    else:
        on_unused_input=('ignore' if updates == [] else 'raise')
        if y is not None:
            f_update = theano.function(
                # Input is the learning rate, and supervised example.
                flatten([lr, x, y]),
                # Output is the cost/loss that has to be minimized.
                cost,
                updates=updates,
                name='f_update',
                on_unused_input=on_unused_input)
        else:
            f_update_intermediate = theano.function(
                flatten([lr, x, cost]),
                [],
                updates=updates,
                name='f_update_intermediate',
                on_unused_input=on_unused_input)

            def f_update(lr, str1, str2):
                ''' f_update in this case receives a tuple of
                Params
                ------
                *args :
                Returns
                -------
                '''
                assert str1.ndim == 2
                cols = (str1.shape[1]-1)
                assert cols % 2 == 0
                assert str2.ndim == 1
                intermediate_tensor = f_intermediate(str1).astype(
                    'float64')
                intermediate_grad = numpy.array(
                    stack_config['endpoint'].grad(
                        str1[1:, cols/2], str2, intermediate_tensor),
                    dtype='float32')
                f_update_intermediate(lr, str1, intermediate_grad)
                return (intermediate_tensor, intermediate_grad)

    if y is not None:
        f_classify = theano.function(
            inputs=flatten([stack_config.stack_ns.absolute_input_tv]),
            outputs=stack_config.stack_ns.pred_y,
            name='f_classify')
        f_update.individual_updates = {}
        for (_, u) in updates:
            f_update.individual_updates[u.wrt_name] = theano.function(
                flatten([lr, x, y]), u, name='f_update')
    else:
        def f_classify(in_str):
            assert in_str.ndim == 2
            cols = (in_str.shape[1]-1)
            assert cols % 2 == 0
            return stack_config['endpoint'].decode(
                in_str[1:, cols/2], f_intermediate(in_str).astype('float64'))
    nsf = Namespace()
    nsf.f_cost = f_cost
    nsf.f_update = f_update
    nsf.f_classify = f_classify
    nsf.f_grad = f_grad
    return nsf
예제 #15
0
# rasengan.warn('I am deleting Resident_location_to_person')
# del data_dict['Resident_location_to_person']

idx_to_name = dict((idx, entity_to_name[e])
                   for e, idx in entity_to_int_map.iteritems())
int_to_entity_map = dict((a, b) for (b, a) in entity_to_int_map.iteritems())
dv = data_dict.values()


edge_to_label_map = {}
for label, edges in data_dict.iteritems():
    for v1, v2 in edges:
        edge_to_label_map[(v1, v2)] = label
        edge_to_label_map[(v2, v1)] = label + '_inv'

all_nodes = set(rasengan.flatten(dv))
all_edges = reduce(lambda x, y: x + y, dv)
set_all_edges = set(tuple(e) for e in all_edges)
# print '\n'.join([str(e)
#                  for e
#                  in sorted(list(set([(idx_to_name[e[1]],
#                                       g.degree(e[1])) for e in
#                                      data_dict['Role_person_to_role']
#                                      if g.degree(e[1]) == 2])),
#                            key=lambda x: x[1], reverse=True)])


def shortest_path(g, v1, v2):
    d = {}
    v = None
    pv = None