Esempio n. 1
0
def test_dict_importer():
    """Dict Importer."""
    importer = DictImporter()
    exporter = DictExporter()
    refdata = {
        'id':
        'root',
        'children': [{
            'id': 'sub0',
            'children': [{
                'id': 'sub0B'
            }, {
                'id': 'sub0A'
            }]
        }, {
            'id':
            'sub1',
            'children': [{
                'id': 'sub1A'
            }, {
                'id': 'sub1B'
            }, {
                'id': 'sub1C',
                'children': [{
                    'id': 'sub1Ca'
                }]
            }]
        }]
    }
    data = deepcopy(refdata)
    root = importer.import_(data)
    eq_(data, refdata)
    eq_(exporter.export(root), data)
    def _prune_leaves(self, root_node) -> dict:
        """Prune all the leaves that does not have 'value' attribute in them.
        In other words, remove the leaves that user did not set in file.

        Parameters
        ----------
        root_node : dict
            Branch to be pruned.

        Returns
        -------
        dict
            Pruned branch.
        """
        importer = DictImporter()
        exporter = DictExporter()

        inptree = importer.import_(root_node)

        # keep checking for leaves without value attribute
        while True:
            all_has_value = all(
                hasattr(leaf, 'value') for leaf in inptree.leaves)

            if all_has_value:
                break

            for leaf in inptree.leaves:
                if not hasattr(leaf, 'value'):
                    # prune leaves without value attribute
                    leaf.parent = None

        return exporter.export(inptree)
Esempio n. 3
0
    def generate_json(self):
        nested_dict = self.build()
        importer = DictImporter()
        tree = importer.import_(nested_dict)

        hr_entries = []
        for node in LevelOrderIter(tree):
            item = {
                'PreferredFirstName': getattr(node, 'PreferredFirstName'),
                'Preferred_Name_-_Last_Name': getattr(node, 'Preferred_Name_-_Last_Name'),
                'businessTitle': getattr(node, 'businessTitle'),
                'EmployeeID': getattr(node, 'EmployeeID'),
                'PrimaryWorkEmail': getattr(node, 'PrimaryWorkEmail'),
                'IsManager': len(node.children) > 0,
            }

            if node.parent:
                manager_first_name = getattr(node.parent, 'PreferredFirstName')
                manager_last_name = getattr(node.parent, 'Preferred_Name_-_Last_Name')
                manager = {
                    'WorkersManager': '{} {}'.format(manager_first_name, manager_last_name),
                    'WorkersManagersEmployeeID': getattr(node.parent, 'EmployeeID')
                }
                item.update(manager)

            hr_entries.append(item)

        hr_data = {
            'Report_Entry': hr_entries
        }

        return json.dumps(hr_data)
Esempio n. 4
0
 def sync_to_restore(self, opt_model):
     self.opt_model = opt_model
     if hasattr(self, 'root_node'):
         root_node_compressed = self.root_node
         importer = DictImporter()
         self.root_node = importer.import_(root_node_compressed)
         sync_part_tree_on_restore(self.opt_model.ele_model,
                                   self.opt_model.seq_model, self.root_node)
Esempio n. 5
0
def load_tree(path):
    importer = DictImporter()
    with gzip.open(path, 'rb') as f:
        dataRead = pickle.load(f)
        logging.debug("dataRead=" + str(dataRead))
        rootNode = importer.import_(dataRead)

    return rootNode
Esempio n. 6
0
def intercept():
    """判断用户是否top_root"""
    # redis-cli
    rc = current_app.redis_cli
    root_data = json.loads(rc.hget(f"drp_relation_member_{g.user_id}", 0).decode())
    importer = DictImporter()
    root = importer.import_(root_data)
    return root.height
Esempio n. 7
0
    def get_path_descendants(path):
        """ Get the set of descendants for a tree like path dict.
        """

        importer = DictImporter()
        root = importer.import_(path)
        descendants = set(
            [descendant_node.name for descendant_node in root.descendants])
        return descendants
Esempio n. 8
0
 def load_tree(self, directory=None):
     data_path = (
         directory or '.'
     ) + '/nn_data/' + self.config.experiment_name + '/spectral_tree.dict'
     if os.path.exists(data_path):
         importer = DictImporter()
         with open(data_path, 'rb') as file:
             treedict = pickle.load(file)
             self.root = importer.import_(treedict)
     print(RenderTree(self.root).by_attr('id'))
Esempio n. 9
0
def path_differences(model, paths_labels, type_analysis='production'):
    """

    Parameters
    ----------
    model: PySB model
        Model used to do dominant path analysis
    paths_labels: dict
        Dictionary of pathways generated by dominant path analysis
    type_analysis: str
        Type of analysis used in the dominant path analysis.
        It can either be `production` or `consumption`

    Returns
    -------
    A pandas dataframe where the column names and row indices are the labels of the
    pathways and the cells contain the edges that are present in the
    row pathway index but not in the column pathway.
    """
    generate_equations(model)
    importer = DictImporter()
    path_edges = {}

    def find_numbers(dom_r_str):
        n = map(int, re.findall('\d+', dom_r_str))
        return n

    def nodenamefunc(node):
        node_idx = list(find_numbers(node.name))[0]
        node_sp = model.species[node_idx]
        node_name = parse_name(node_sp)
        return node_name

    def edgeattrfunc(node, child):
        return 'dir="back"'

    for keys, values in paths_labels.items():
        root = importer.import_(values)
        dot = DotExporter(root, graph='strict digraph', options=["rankdir=RL;"], nodenamefunc=nodenamefunc,
                          edgeattrfunc=edgeattrfunc)
        data = ''
        for line in dot:
            data += line
        pydot_graph = graph_from_dot_data(data)
        graph = from_pydot(pydot_graph[0])
        if type_analysis == 'production':
            graph = graph.reverse()
        edges = set(graph.edges())
        path_edges[keys] = edges

    path_diff = pd.DataFrame(index=paths_labels.keys(), columns=paths_labels.keys())
    for row in path_diff.columns:
        for col in path_diff.columns:
            path_diff.loc[row, col] = path_edges[row].difference(path_edges[col])
    return path_diff
Esempio n. 10
0
 def load_from_file(cls, file_path, game_def):
     """
     Creates a Tree from a file with the tree in json format
     Args:
         file_path: Path to the json file
     """
     with open(file_path) as feedsjson:
         tree_dic = json.load(feedsjson)
     importer = DictImporter()
     root = importer.import_(tree_dic['tree'])
     for n in PreOrderIter(root):
         n = cls.node_class.from_dic(n.name,game_def,tree_dic['main_player'],parent=n.parent,children=n.children)
     t = cls(root)
     return t
Esempio n. 11
0
    def render(self, reform):
        from anytree.importer import DictImporter
        from anytree import RenderTree

        if isinstance(reform, dict):
            reform = DeppReform.from_data(reform)

        importer = DictImporter()
        # tree_root = importer.import_(r['hierplane_tree']['root'])
        print([(w, h) for w, h in zip(reform.words, reform.heads)])
        tree_root = importer.import_(reform.tree['root'])
        tree = RenderTree(tree_root)
        for pre, fill, node in tree:
            print("%s%s: %s (%s)" % (pre, node.nodeType, node.word, ','.join(
                node.attributes).lower()))
Esempio n. 12
0
    def undo(self):
        importer = DictImporter(nodecls=QAnyTreeItem)
        parent = getIndexFromLocations(self.indexLocations, self.model)
        parentItem = self.model.getItem(parent)

        self.result = self.model.beginInsertRows(parent, self.position,
                                                 self.position + self.rows - 1)
        for row, data in enumerate(self.data):
            print(data)
            item = importer.import_(data)

            # Reconstruct branch
            item.parent = parentItem
            parentItem.moveChild(parentItem.childCount() - 1,
                                 self.position + row)

        self.model.endInsertRows()
Esempio n. 13
0
def load_tree(dirname):
    importer = DictImporter()
    with open(join(dirname, 'tree.yaml'), 'r', encoding='utf-8') as file:
        tree = importer.import_(yaml_load(file))

    with open(join(dirname, 'node_to_classes.yaml'), 'r',
              encoding='utf-8') as file:
        node_to_classes = yaml_load(file)

    with open(join(dirname, 'node_to_class.yaml'), 'r',
              encoding='utf-8') as file:
        node_to_class = yaml_load(file)

    with open(join(dirname, 'class_maps.yaml'), 'r', encoding='utf-8') as file:
        class_maps = yaml_load(file)

    return tree, node_to_class, node_to_classes, class_maps
Esempio n. 14
0
def test_dict_importer_node():
    """Dict Importer."""
    importer = DictImporter(Node)
    exporter = DictExporter()
    refdata = {
        'name':
        'root',
        'children': [{
            'name': 'sub0',
            'children': [{
                'name': 'sub0B'
            }, {
                'name': 'sub0A'
            }]
        }, {
            'name':
            'sub1',
            'children': [{
                'name': 'sub1A'
            }, {
                'name': 'sub1B'
            }, {
                'name': 'sub1C',
                'children': [{
                    'name': 'sub1Ca'
                }]
            }]
        }]
    }
    data = deepcopy(refdata)
    root = importer.import_(data)
    eq_(data, refdata)
    eq_(exporter.export(root), data)
    r = RenderTree(root)
    expected = u"\n".join([
        u"Node('/root')",
        u"├── Node('/root/sub0')",
        u"│   ├── Node('/root/sub0/sub0B')",
        u"│   └── Node('/root/sub0/sub0A')",
        u"└── Node('/root/sub1')",
        u"    ├── Node('/root/sub1/sub1A')",
        u"    ├── Node('/root/sub1/sub1B')",
        u"    └── Node('/root/sub1/sub1C')",
        u"        └── Node('/root/sub1/sub1C/sub1Ca')",
    ])
    eq_str(str(r), expected)
Esempio n. 15
0
def vis_tree(ds:Dict[Text, Any], lang, trans=False):
    """ 可视化domains """
    from anytree.importer import DictImporter
    from anytree import RenderTree
    from sagas.nlu.translator import get_contrast

    data = treeing(ds)
    importer = DictImporter()
    tree_root = importer.import_(data)
    tree = RenderTree(tree_root)
    for pre, fill, node in tree:
        if node.dependency_relation=='punct':
            addons='_'
        else:
            addons=f"{node.lemma}; {get_contrast(node.text, lang)}" \
                if trans else f"{node.lemma}"
        print(f"{pre}{node.dependency_relation}: "
              f"{node.text}({addons}, {node.upos.lower()}, {node.index})")
Esempio n. 16
0
    def from_file(cls, filename):
        """
        Load parameter space as a ternary tree from pickle file.
        """
        # load dict from pickle
        if not filename.endswith(PKL_EXT):
            filename += PKL_EXT
        with open(filename, "rb") as f:
            loaded_dict = pickle.load(f)

        def _sanitize_dict(raw_dict, keys_to_delete):
            """
            Remove keys from dict - possibly nested.
            """
            sanit_dict = {}
            for key, value in raw_dict.items():
                if key not in keys_to_delete:
                    if isinstance(value, (list, tuple)):
                        sanit_dict[key] = [
                            _sanitize_dict(list_val, keys_to_delete)
                            if isinstance(list_val, dict)
                            else list_val
                            for list_val in value
                        ]
                    elif isinstance(value, dict):
                        sanit_dict[key] = _sanitize_dict(value, keys_to_delete)
                    else:
                        sanit_dict[key] = value
            return sanit_dict

        # sanitise dict for correct init
        keys_to_delete = set(loaded_dict.keys()) - set(cls.INIT_ATTRS)
        sanitised_dict = _sanitize_dict(loaded_dict, keys_to_delete)
        # import as `LeafNode` class
        importer = DictImporter(nodecls=LeafNode)
        root_leaf_node = importer.import_(sanitised_dict)
        # force `ParameterSpace` class which extends `LeafNode` with some
        # useful methods
        root_leaf_node.__class__ = ParameterSpace

        return root_leaf_node
Esempio n. 17
0
 def get_hierarchy(self, attribute_name):
     """
     Returns the hierarchy for a given attribute, None if there is no hierarchy
     Parameters
     ----------
     attribute_name: str
         The attribute name.
     Returns
     -------
     AnyNode
         The generalization hierarchy.
     """
     attribute = self.attributes[attribute_name]
     if "hierarchy" in attribute:
         importer = DictImporter()
         root = importer.import_(attribute['hierarchy'])
         for node in LevelOrderIter(root):
             node_range = name_to_range(node.name)
             node.range = node_range
         return root
     return None
Esempio n. 18
0
    def generate_json(self):
        nested_dict = self.build()
        importer = DictImporter()
        tree = importer.import_(nested_dict)

        hr_entries = []
        for node in LevelOrderIter(tree):
            item = {
                'PreferredFirstName':
                getattr(node, 'PreferredFirstName'),
                'Preferred_Name_-_Last_Name':
                getattr(node, 'Preferred_Name_-_Last_Name'),
                'businessTitle':
                getattr(node, 'businessTitle'),
                'EmployeeID':
                getattr(node, 'EmployeeID'),
                'PrimaryWorkEmail':
                getattr(node, 'PrimaryWorkEmail'),
                'IsManager':
                len(node.children) > 0,
            }

            if node.parent:
                manager_first_name = getattr(node.parent, 'PreferredFirstName')
                manager_last_name = getattr(node.parent,
                                            'Preferred_Name_-_Last_Name')
                manager = {
                    'WorkersManager':
                    '{} {}'.format(manager_first_name, manager_last_name),
                    'WorkersManagersEmployeeID':
                    getattr(node.parent, 'EmployeeID')
                }
                item.update(manager)

            hr_entries.append(item)

        hr_data = {'Report_Entry': hr_entries}

        return json.dumps(hr_data)
Esempio n. 19
0
def vis_trees(trees: List[Dict[Text, Any]], word_info=True):
    from anytree.importer import DictImporter
    from anytree import RenderTree
    importer = DictImporter()
    for index, data in enumerate(trees):
        word = data['word']
        if word_info:
            pprint(word)
        tree_root = importer.import_(data['tree'])
        tree = RenderTree(tree_root)
        tc.emp(
            'green',
            f"Display #{index} sememe tree: {word['en_word']}|{word['ch_word']}.{word['No']}"
        )
        for pre, fill, node in tree:
            if node.role and node.role != 'None':
                cl = 'magenta'
                role = node.role
            else:
                cl = 'yellow'
                role = '✔'
            tc.emp(cl, "%s%s: %s" % (pre, role, node.name))
Esempio n. 20
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='one-time computations')
    parser.add_argument('-p',
                        '--path',
                        default='embs.npy',
                        type=str,
                        help='path to language embeddings')
    args = parser.parse_args()

    embs = np.load(args.path)
    if not os.path.exists('../outputs'):
        os.makedirs('../outputs')

    # only considering languages relatively close to current language (e.g. not cross-continent)
    radius = 500

    # for each k, we return fraction of languages that have a top k embedding distance
    # neighbor that's in the family/tree
    ks = {2, 4, 8, 16, 32}

    metrics = {}  # where all the metrics are added

    lang_list_pth = '../metadata/LangList.txt'
    with open(lang_list_pth, 'r') as inf:
        lines = inf.readlines()
    langlist_lines = [l.strip() for l in lines]

    lang_to_link = {l.split()[0]: l.split()[2] for l in langlist_lines}
    # lang_to_link['ABIWBT'] = 'https://en.wikipedia.org/wiki/Abidji_language'
    lang_to_3 = {l.split()[0]: l.split()[1] for l in langlist_lines}
    # lang_to_3['ABIWBT'] = 'abi'
    lang_to_coord = {
        l.split()[0]: (l.split()[4], l.split()[5])
        for l in langlist_lines
    }  # latitude, longitude
    # lang_to_coord['ABIWBT'] = ('5.65656', '-4.58421')

    with open('../metadata/lang195.txt', 'r') as inf:
        lines = inf.readlines()
    lang195_langs = [l.strip() for l in lines]  # ['ABIWBT', ...]
    lang195_langs_set = set(lang195_langs)

    lons = [float(lang_to_coord[l][1]) for l in lang195_langs]
    lats = [float(lang_to_coord[l][0]) for l in lang195_langs]

    if not os.path.exists('../metadata/dists_km.npy'):
        coords = [(lat, lon) for lat, lon in zip(lats, lons)]
        dists = [[geopy.distance.distance(c1, c2) for c1 in coords]
                 for c2 in coords]
        dists_km = [[d.km for d in ds] for ds in dists]
        # dists_km[i][j] = dist btw lang_i and lang_j
        np.save('../metadata/dists_km.npy', dists_km)
    dists_km = np.load('../metadata/dists_km.npy')

    emb_dists = [[np.linalg.norm(a - b) for a in embs] for b in embs]

    correlation_mean = np.mean(
        [pearsonr(dists_km[i], emb_dists[i])[0] for i in range(len(dists_km))])
    correlation_std = np.std(
        [pearsonr(dists_km[i], emb_dists[i])[0] for i in range(len(dists_km))])
    metrics['correlation_mean'] = correlation_mean
    metrics['correlation_std'] = correlation_std

    idxs_under_threshold = [[i for i, d in enumerate(ds) if d < radius]
                            for ds in dists_km]
    # row i has the languages < 500 km of lang i
    dists_km_subsets = [[ds[i] for i in idxs]
                        for idxs, ds in zip(idxs_under_threshold, dists_km)]
    emb_dists_subsets = [[ds[i] for i in idxs]
                         for idxs, ds in zip(idxs_under_threshold, emb_dists)]
    rad_correlation_mean = np.mean([
        pearsonr(dists_km_subsets[i], emb_dists_subsets[i])[0]
        for i in range(len(dists_km)) if len(dists_km_subsets[i]) > 1
    ])
    rad_correlation_std = np.std([
        pearsonr(dists_km_subsets[i], emb_dists_subsets[i])[0]
        for i in range(len(dists_km)) if len(dists_km_subsets[i]) > 1
    ])
    metrics['rad_correlation_mean'] = rad_correlation_mean
    metrics['rad_correlation_std'] = rad_correlation_std

    ethnologue_tree_path = '../outputs/ethnologue_tree.txt'
    ethnologue_ancestor_mat_path = '../metadata/ethnologue_lang195_ancestor_mat.npy'
    ethnologue_avg_ancestor_mat_path = '../metadata/ethnologue_lang195_avg_ancestor_mat.npy'
    ethnologue_tree_idxs_path = '../metadata/ethnologue_lang195_tree_idxs.npy'

    if not (os.path.exists(ethnologue_tree_path)
            and os.path.exists(ethnologue_ancestor_mat_path)
            and os.path.exists(ethnologue_avg_ancestor_mat_path)
            and os.path.exists(ethnologue_tree_idxs_path)):

        def contains_pattern(p, node):
            return p in node.name

        filename = "../metadata/ethnologue_forest.json"
        with open(filename, 'r') as f:
            ethnologue_forest = json.load(f)

        importer = DictImporter()
        ethnologue_trees = []
        for cur in ethnologue_forest:
            root = importer.import_(cur)
            ethnologue_trees.append(root)

        output_str = ''
        for root in ethnologue_trees:
            for pre, _, node in RenderTree(root):
                output_str += "%s%s\n" % (pre, node.name)
        with open(ethnologue_tree_path, 'w+') as ouf:
            ouf.write(output_str)

        def get_node_ethnologue(lang):
            l1 = lang_to_3[lang]
            f = partial(contains_pattern, l1)
            for ti, t in enumerate(ethnologue_trees):
                nodes = anytree.search.findall(t, filter_=f)
                if len(nodes) > 0:
                    return nodes[0], ti
            return None, -1

        def tree_dist(node0, node1, mode='min'):
            '''assumes nodes are in the same tree

            e.g. if node0 is depth 7 and node1 is depth 5 and their common ancestor is depth 3,
                then we return 5-3 = 2
            '''
            ancestor_depth = len(anytree.util.commonancestors(node0, node1))
            if mode == 'avg':
                return (node0.depth + node1.depth) / 2 - ancestor_depth
            else:
                return min(node0.depth, node1.depth) - ancestor_depth

        def mk_ancestor_mat(languages, mode='min'):
            '''
            Args:
                languages: list of strings (each string is 6-letter language id)

            Returns:
                ancestor_mat: entry [i][j] is the tree_dist between lang_i and lang_j
            '''
            nodes = []
            tree_idxs = []
            for lang in languages:
                n, ti = get_node_ethnologue(lang)
                nodes.append(n)
                tree_idxs.append(ti)
            ancestor_mat = np.zeros((len(languages), len(languages)),
                                    dtype=int)
            for i, lang1 in enumerate(languages):
                for j, lang2 in enumerate(languages):
                    if lang1 == lang2:
                        ancestor_mat[i][j] = 0
                    elif i < j:
                        node0 = nodes[i]
                        node1 = nodes[j]
                        if tree_idxs[i] != tree_idxs[j]:
                            ancestor_mat[i][j] = -1
                        else:
                            ancestor_mat[i][j] = tree_dist(node0,
                                                           node1,
                                                           mode=mode)
                    else:
                        ancestor_mat[i][j] = ancestor_mat[j][i]
            return ancestor_mat

        if not os.path.exists(ethnologue_ancestor_mat_path):
            lang195_ancestor_mat = mk_ancestor_mat(lang195_langs)
            np.save(ethnologue_ancestor_mat_path, lang195_ancestor_mat)
        if not os.path.exists(ethnologue_avg_ancestor_mat_path):
            lang195_ancestor_mat = mk_ancestor_mat(lang195_langs, mode='avg')
            np.save(ethnologue_avg_ancestor_mat_path, lang195_ancestor_mat)
        if not os.path.exists(ethnologue_tree_idxs_path):
            lang195_tree_idxs = []
            for lang in lang195_langs:
                n, ti = get_node_ethnologue(lang)
                lang195_tree_idxs.append(ti)
            np.save(ethnologue_tree_idxs_path, lang195_tree_idxs)

    ethnologue_lang195_ancestor_mat = np.load(ethnologue_ancestor_mat_path)
    ethnologue_lang195_avg_ancestor_mat = np.load(
        ethnologue_avg_ancestor_mat_path)

    lang195_tree_idxs = np.load(ethnologue_tree_idxs_path)

    emb_dists_np = np.array(emb_dists)

    # for each k, we return fraction of languages that have a top k embedding distance
    # neighbor that's in the family/tree

    langs_with_fam = []  # list of (index in lang195, lang)
    for i, lang in enumerate(lang195_langs):
        if np.sum(ethnologue_lang195_ancestor_mat[i] >= 0) > 1:
            langs_with_fam.append((i, lang))
    tot = len(langs_with_fam)

    all_closest_idxs = {
        i: np.argsort(emb_dists_np[i])[1:]
        for i, lang in langs_with_fam
    }

    max_k = max(ks)
    num_outliers = 10
    num_non_outliers = tot - num_outliers

    for ancestor_mat in [ethnologue_lang195_ancestor_mat]:
        k_outlier = -1
        outliers = []  # list of 6-letter language ids
        for k in range(1, tot):
            count = 0
            curr_outliers = []
            for i, lang in langs_with_fam:
                closest_idxs = all_closest_idxs[i][:k]
                ti = lang195_tree_idxs[i]
                ctis = lang195_tree_idxs[closest_idxs]
                has_fam = (ctis == ti).sum() > 0
                if not has_fam:
                    curr_outliers.append(lang)
                count += has_fam
            if count >= num_non_outliers and k > max_k:
                k_outlier = k
                outliers = curr_outliers
                break
            if k in ks:
                metrics['k-%d' % k] = count / tot
    metrics['k_outlier'] = k_outlier
    metrics['outliers'] = outliers

    ti_to_195_idxs = {}
    ti_to_195_idxs_set = {}
    for i, ti in enumerate(lang195_tree_idxs):
        if ti in ti_to_195_idxs:
            ti_to_195_idxs[ti].append(i)
        else:
            ti_to_195_idxs[ti] = [i]
    for ti in ti_to_195_idxs:
        ti_to_195_idxs_set[ti] = set(ti_to_195_idxs[ti])

    # languages on map colored by language family
    if args.verbose:
        import matplotlib
        k = 10
        tis = lang195_tree_idxs
        ti_list = []
        new_tis = []
        for ti in tis:
            if ti in ti_list:
                new_tis.append(ti_list.index(ti))
            else:
                ti_list.append(ti)
                new_tis.append(len(ti_list) - 1)
        tis = new_tis
        plt.figure(figsize=(20, 20))
        ax = plt.axes(projection=ccrs.PlateCarree())
        ax.stock_img()
        plt.scatter(lons, lats, marker='o', c=tis, cmap=plt.cm.jet)
        plt.title('Language Families')
        plt.colorbar(fraction=0.02, pad=0.01)
        plt.savefig('../outputs/lang_fams.png')

    kmeans = KMeans(n_clusters=5, random_state=0).fit(embs)
    cd = ['b', 'c', 'y', 'm', 'r']  # color scheme, but can also use colormap
    plt.figure(figsize=(20, 20))
    ax = plt.axes(projection=ccrs.PlateCarree())
    ax.stock_img()
    plt.scatter(lons, lats, c=[cd[i] for i in kmeans.labels_], marker='o')
    plt.savefig('../outputs/cluster_map.png')

    # tsne plot: same language family has same color
    tis = lang195_tree_idxs
    min_fam_size = 5
    min_fam_ti = -1
    for ti in tis:
        if len(ti_to_195_idxs_set[ti]) < min_fam_size:
            min_fam_ti = ti
            break
    idxs_kept = []
    for i, ti in enumerate(tis):
        if len(ti_to_195_idxs_set[ti]) < min_fam_size:
            tis[i] = min_fam_ti
        else:
            idxs_kept.append(i)
    ti_to_color_id = {min_fam_ti: 0}
    count = 1
    for ti in tis:
        if ti not in ti_to_color_id:
            ti_to_color_id[ti] = count
            count += 1
    tis = np.array([ti_to_color_id[ti] for ti in tis])[idxs_kept]
    perplexity = 15.0
    learning_rate = 3e2
    n_iter = 600
    n_iter_without_progress = 300
    random_state = 0
    X_embedded = TSNE(n_components=2,
                      perplexity=perplexity,
                      learning_rate=learning_rate,
                      n_iter=n_iter,
                      n_iter_without_progress=n_iter_without_progress,
                      random_state=random_state).fit_transform(embs)
    plt.figure(figsize=(10, 10))
    plt.scatter(X_embedded[idxs_kept, 0],
                X_embedded[idxs_kept, 1],
                c=tis,
                cmap=plt.cm.jet)
    plt.savefig('../outputs/tnse.png')

    with open('../outputs/metrics.json', 'w+') as f:
        json.dump(metrics, f, indent=2)
Esempio n. 21
0
        start = timer()
        build_oct(data, "root", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
        end = timer()
    elif tree_choice == 2:
        start = timer()
        build_kd(data, 6, 0, "root", "root")
        end = timer()
    print("The tree has been built successfully.")
    print(end - start)
elif choise_2 == 2:
    importer = DictImporter()
    if tree_choice == 1:
        dict = ast.literal_eval(open("tree_save/oct_export.txt", encoding="utf-8").read())
        nodes_oct = []
        nodes2 = []
        nodes2 = importer.import_(dict)
        nodes_oct.append(nodes2)
    elif tree_choice == 2:
        dict = ast.literal_eval(open("tree_save/kd_export.txt", encoding="utf-8").read())
        nodes = []
        nodes2 = []
        nodes2 = importer.import_(dict)
        nodes.append(nodes2)
    print(" The tree has been imported successfully")

while True:

    print("==================================================")
    print("Type the number of the  action you want to execute")
    print("1. Search")
    print("2. Delete")
Esempio n. 22
0
def load_labels_tree(data_path):
    """Loads a tree from json format."""
    data = load_json_data(data_path)
    importer = DictImporter(nodecls=AnyNode)
    return importer.import_(data)
Esempio n. 23
0
def word_print(word_str, word, verbosity):
    # pretty print
    title_styles = {
        "noun": Fore.LIGHTRED_EX + "Noun" + Style.RESET_ALL,
        "verb": Fore.LIGHTGREEN_EX + "Verb" + Style.RESET_ALL,
        "adjective": Fore.LIGHTYELLOW_EX + "Adjective" + Style.RESET_ALL,
        "adverb": Fore.LIGHTMAGENTA_EX + "Adverb" + Style.RESET_ALL
    }
    index = 0
    if verbosity["audio"]:
        audio_path = (Path("Vocabot") / "audio" / word_str).with_suffix(".mp3")
        if audio_path.exists():
            playsound(str(audio_path))
    if "ordinal" in word:  # a word that is scraped
        # print main text
        if word["short"] is not None and verbosity["short"]:
            print(Fore.LIGHTGREEN_EX + word["short"] + Style.RESET_ALL + "\n")
        if word["long"] is not None and verbosity["long"]:
            print(Fore.GREEN + word["long"] + Style.RESET_ALL + "\n")

        # print definitions
        print(Fore.LIGHTWHITE_EX + "Definitions:" + Style.RESET_ALL)
        for ordinal_index in range(len(word["ordinal"])):
            print(Fore.LIGHTWHITE_EX + ("\n" if ordinal_index != 0 else "") +
                  str(ordinal_index + 1) + "-----" + Style.RESET_ALL)
            for i in range(index, index + word["ordinal"][ordinal_index]):
                print(Fore.LIGHTWHITE_EX + str(i - index + 1) +
                      Style.RESET_ALL)
                meaning = word["meaning"][i]
                if "title" in meaning and verbosity["title"]:
                    print(title_styles[meaning["title"]])
                if "definition" in verbosity and verbosity["definition"]:
                    print(Fore.LIGHTGREEN_EX + meaning["definition"] +
                          Style.RESET_ALL)
                if "example" in meaning:
                    for example in meaning["example"][:verbosity["example"]]:
                        print(Fore.GREEN + '"' + example + '"' +
                              Style.RESET_ALL)
                instances = {
                    instance: dict(
                        list(meaning[instance].items())[-verbosity[instance]:])
                    for instance in
                    ["antonyms", "synonyms", "types", "type of", "examples"]
                    if instance in meaning and meaning[instance] != {}
                }
                for instance in instances:
                    str_list = []
                    for (word_str, definition) in instances[instance].items():
                        if definition is None:
                            str_list.append("|" + word_str)
                        else:
                            str_list.append("|" + word_str + " > " +
                                            definition)
                    print(Fore.LIGHTCYAN_EX + instance + Style.RESET_ALL +
                          "\n" + Fore.CYAN + "\n".join(str_list) +
                          Style.RESET_ALL)
            index += word["ordinal"][ordinal_index]

        # print family
        print(Fore.LIGHTWHITE_EX + "\n\nWord Family:" + Style.RESET_ALL)
        importer = DictImporter()
        family = importer.import_(word["family"])
        family = RenderTree(family, style=ContRoundStyle()).by_attr("freq")
        family = "\n".join(family.split("\n")[1:verbosity["family"] + 1])
        family += "\n╰──────"
        print(Fore.LIGHTGREEN_EX + family + Style.RESET_ALL)

        # print usage
        if word["usage"] is not None:
            print(Fore.LIGHTWHITE_EX + "\n\nUsage:" + Style.RESET_ALL)
            for usage in word["usage"][:verbosity["usage"]]:
                print(Fore.LIGHTCYAN_EX + usage["sentence"] + Style.RESET_ALL +
                      Fore.CYAN + "\n--" + usage["title"] + " " +
                      (usage["date"] if usage["date"] is not None else "") +
                      Style.RESET_ALL)
    else:  # a word from a list
        word = word["meaning"][0]
        if "definition" in word:
            print(Fore.LIGHTGREEN_EX + word["definition"] + Style.RESET_ALL)
        if "example" in word:
            print(Fore.GREEN + word["example"] + Style.RESET_ALL)
        if "examples" in word:
            if "Example" in word["examples"]:
                print(word["examples"]["Example"])
Esempio n. 24
0
    def get_tbs(self):
        tb_url = "{}/api/content/v1/search".format(self.content_search)
        payload = """{
                    "request": {
                        "filters": {
                            "contentType": ["Textbook"],
                            "status": ["Live"]
                        },
                        "sort_by": {"createdOn":"desc"},
                        "limit": 10000
                    }
                }"""
        headers = {
            'content-type': "application/json; charset=utf-8",
            'cache-control': "no-cache"
        }
        retry_count = 0
        while 1:
            try:
                response = requests.request("POST",
                                            tb_url,
                                            data=payload,
                                            headers=headers)
                break
            except requests.exceptions.ConnectionError:
                print("Retry {} for textbook list".format(retry_count + 1))
                retry_count += 1
                sleep(10)
                if retry_count == 5:
                    print("Max retries reached...")
                    break

        list_of_textbooks = pd.DataFrame(response.json()['result']['content'])
        list_of_textbooks = list_of_textbooks[[
            'identifier', 'channel', 'board', 'gradeLevel', 'medium', 'name',
            'subject'
        ]]
        tb_list = list(list_of_textbooks.identifier.unique())
        list_of_textbooks.drop_duplicates(subset=['identifier'],
                                          keep='first',
                                          inplace=True)

        dialcode_df = pd.DataFrame()
        tb_count = 0

        for tb_id in tb_list:
            tb_count = tb_count + 1

            print("currently running for textbook number %d(%s)/%d" %
                  (tb_count, tb_id, len(tb_list)))

            retry_count = 0
            url = "{}/api/course/v1/hierarchy/{}".format(
                self.content_hierarchy, tb_id)
            while 1:
                try:
                    response = requests.request("GET", url, headers=headers)
                    break
                except requests.exceptions.ConnectionError:
                    print("Retry {} for TOC {}".format(retry_count + 1, tb_id))
                    retry_count += 1
                    sleep(10)
                    if retry_count == 5:
                        print("Max retries reached...")
                        print("Skipping the run for TB ID %s" % (tb_id))
                        failed_tbs.append(tb_id)
                        break

            if response.json()['result'] != {}:

                tb = response.json()['result']['content']

                if 'children' in tb:
                    pass
                else:
                    continue

                if tb['children'] == None:
                    continue

                if 'index' not in tb['children'][0]:
                    continue

                tree_obj = self.traverse(tb)
                importer = DictImporter()
                root = importer.import_(tree_obj)
                resources = findall(root,
                                    filter_=lambda node: node.contentType in
                                    ("Resource"))
                dialcodes = findall(root,
                                    filter_=lambda node: node.dialcode not in
                                    (""))

                dialcodes_with_content = []
                for resource in resources:
                    for ancestor in resource.ancestors:
                        dialcodes_with_content.append(
                            (ancestor.dialcode, ancestor.index))

                dialcodes_with_content = set(
                    [x for x in dialcodes_with_content if (x[0] != '')])

                dialcodes_all = []
                for dialcode in dialcodes:
                    dialcodes_all.append((dialcode.dialcode, dialcode.index))

                dialcodes_all = set([x for x in dialcodes_all if (x != '')])

                no_content = pd.DataFrame(list(dialcodes_all -
                                               dialcodes_with_content),
                                          columns=['QR', 'Index'])
                no_content['TB_ID'] = tb_id
                no_content['status'] = 'no content'

                with_content = pd.DataFrame(list(dialcodes_with_content),
                                            columns=['QR', 'Index'])
                with_content['TB_ID'] = tb_id
                with_content['status'] = 'content linked'

                final_df = with_content.copy()
                final_df = final_df.append(no_content)

                final_df['Index'].fillna(int(0), inplace=True)
                final_df['Index'].loc[final_df['Index'] == ''] = 0
                final_df.Index = final_df.Index.astype('category')
                final_df.Index.cat.reorder_categories(natsorted(
                    set(final_df.Index)),
                                                      inplace=True,
                                                      ordered=True)
                final_df_sorted_by_index = final_df.sort_values('Index')

                ranks_to_be_assigned_for_positions_of_QR = list(
                    range(len(final_df_sorted_by_index.QR) + 1))[1:]

                final_df_ranked_for_QR = final_df_sorted_by_index

                final_df_ranked_for_QR[
                    'Position of QR in a TB'] = ranks_to_be_assigned_for_positions_of_QR
                final_df_ranked_for_QR[
                    'Position of QR in a TB'] = final_df_ranked_for_QR[
                        'Position of QR in a TB'].astype(int)

                dialcode_df = dialcode_df.append(final_df_sorted_by_index,
                                                 ignore_index=True)

        dialcode_state = dialcode_df.merge(list_of_textbooks,
                                           how='left',
                                           left_on='TB_ID',
                                           right_on='identifier')
        dialcode_state_final = dialcode_state[[
            'board', 'gradeLevel', 'QR', 'medium', 'subject', 'TB_ID', 'name',
            'status', 'Index', 'Position of QR in a TB', 'channel'
        ]]

        execution_date_str = datetime.strptime(self.execution_date,
                                               "%d/%m/%Y").strftime('%Y-%m-%d')

        os.makedirs(self.data_store_location.joinpath('tb_metadata',
                                                      execution_date_str),
                    exist_ok=True)
        dialcode_state_final.to_csv(self.data_store_location.joinpath(
            'tb_metadata', execution_date_str, 'qr_code_state.csv'),
                                    index=False,
                                    encoding='UTF-8')
        post_data_to_blob(self.data_store_location.joinpath(
            'tb_metadata', execution_date_str, 'qr_code_state.csv'),
                          backup=True)
Esempio n. 25
0
a = open('7.txt').read().split('\n')[:-1]
a = [x.split(' -> ') for x in a]
a = [x[0].split() + [x[1].split(', ') if len(x) > 1 else []] for x in a]
a = {x[0]: (int(x[1][1:-1]), x[2]) for x in a}

root = (set(x for x in a) - set(chain(*[a[x][1] for x in a]))).pop()


def get_total_weight(node):
    children = a[node][1]
    if len(children) == 0:
        return a[node][0]
    else:
        return a[node][0] + sum([get_total_weight(x) for x in children])


def create_dict(node):
    children = a[node][1]
    if len(children) == 0:
        return {"a": a[node][0]}
    else:
        return {
            "a": (a[node][0], get_total_weight(node)),
            "children": [create_dict(x) for x in children]
        }


importer = DictImporter()
tree = importer.import_(create_dict(root))

print RenderTree(tree)
Esempio n. 26
0
fNaive = io.open("./runs/TreeTest2.csv", 'a')

fTrees = io.open("./DictStore.pkl", 'r').read().splitlines()

singleRunResults = RandomDict()

key = "DEFAULT, SHOULD NEVER BE SEEN"

for line in fTrees:
    if line[0] == '.':
        key = line
        singleRunResults[key] = []
    else:
        tDict = eval(line)
        root = importer.import_(tDict)
        singleRunResults[key].append(root)

done = 0

for x in singleRunResults.keys:
    for y in range(0, 10):
        t = time.time()
        z = singleRunResults.random_key()
        while (z == x or z == x[:-5] + '2.txt'):
            z = singleRunResults.random_key()
        comp = treeTester.runDouble(singleRunResults[x], singleRunResults[z])
        t = time.time() - t
        output = x + ", " + z + ", " + str(comp) + ", " + str(t) + "\n"
        print output
        done += 1
    node_ids = [i for i in node_ids]
    return node_ids

if __name__ == '__main__':
    input_directory = constants.input_data
    output_directory = constants.output_data
    input_files_term_hierarchy = constants.input_files_term_hierarchy
    node_filter = output_directory + 'filtered_nodes/'+'node_filter_1_input.txt' 
    node_filter_output = output_directory + 'filtered_nodes/'+'node_filter_1_output.json' 
    with codecs.open(input_directory+input_files_term_hierarchy,encoding = "utf-8-sig") as json_file:
        treedict = json.load(json_file)

    # preprocess data with this pattern ,\n^\s*"children": null

    # create a tree object
    root = importer.import_(treedict)

    #open the nodes file
    main_nodes = open(node_filter).readlines()
    complete_results = []
    for node_name in main_nodes:
    # Check if a combination of nodes
        if not '+' in node_name:
            if '[' not in node_name:
                result = find_container_subnodes([node_name.strip()])
                complete_results.append({node_name.strip():result})
            else:
                result = find_nodes_by_name_pattern(node_name.strip().split('[')[0])
                complete_results.append({node_name.strip().split('[')[0]:result})
        else:
            partial_result = []
Esempio n. 28
0
def run_cidm_chain(circuit_folder, conf_template_file, conf_output_file,
                   conf_cell_file, sub_folder, fitting_type):
    conf_template_file = os.path.join(circuit_folder, conf_template_file)
    conf_output_file = os.path.join(circuit_folder, conf_output_file)
    conf_cell_file = os.path.join(circuit_folder, conf_cell_file)
    fitting_type = FittingType(fitting_type)

    with open(conf_cell_file) as json_file:
        data = json.load(json_file)

    # instead of generating a deepcopy for the results dictionary
    with open(conf_cell_file) as json_file:
        data_results = json.load(json_file)

    simulations = data['simulations']
    simulation_results = data_results['simulations']
    replacements = data['replacements']
    dependency_tree_dict = data['dependency_tree']
    importer = DictImporter()
    dependency_tree = importer.import_(dependency_tree_dict)

    # IDM_STAR_FWD and IDM_STAR_BWD must have a dependency tree
    assert (not (fitting_type == FittingType.IDM_STAR_FWD
                 or fitting_type == FittingType.IDM_STAR_BWD)
            or dependency_tree)

    if not dependency_tree:
        dependency_tree = Node("Top")
        for idx, simulation in enumerate(simulations):
            Node(idx, parent=dependency_tree)

    traverse_order = PreOrderIter
    if fitting_type == FittingType.IDM_STAR_BWD:
        traverse_order = PostOrderIter

    counter = 0

    simulate_all = True

    if simulate_all:
        for node in traverse_order(dependency_tree):
            if node.name == 'Top':
                continue

            counter = counter + 1

            print("===========================")
            print("Progress: {counter}/{total}".format(counter=counter,
                                                       total=len(simulations)))
            print("===========================")

            # counter = counter + 1
            # if counter == 4:
            # 	break

            simulation = simulations[node.name]

            print(node.name, simulation)

            perform_simulation(simulation, conf_template_file,
                               conf_output_file, replacements, circuit_folder,
                               sub_folder, fitting_type, simulations,
                               simulation_results, node, conf_cell_file, data)
    else:
        node_ids = [76, 126]
        for node_id in node_ids:
            simulation = simulations[node_id]
            node = None
            for n in traverse_order(dependency_tree):
                if n.name == node_id:
                    node = n
                    break
            print(node.name, simulation)
            perform_simulation(simulation, conf_template_file,
                               conf_output_file, replacements, circuit_folder,
                               sub_folder, fitting_type, simulations,
                               simulation_results, node, conf_cell_file, data)
Esempio n. 29
0
            else:
                obj[new_key] = "Node_type: " + obj[
                    key] + "\n" + "Value: " + obj[
                        'value'] + "\n" + "Line: " + str(obj['line'])
            del obj[key]
    return obj


if __name__ == '__main__':
    arg_parser = argparse.ArgumentParser(description='Tree Gnerator!')
    arg_parser.add_argument("--i",
                            default="AST.json",
                            type=str,
                            help="Input JSON file with Nodes")
    arg_parser.add_argument("--o",
                            default="tree.png",
                            type=str,
                            help="Output image for tree ")
    args: argparse.Namespace = arg_parser.parse_args()
    input_filename: str = args.i
    output_filename: str = args.o

    with open(input_filename) as json_file:
        data = json.load(json_file)

    new_json = json.loads(json.dumps(data), object_hook=remove_dots)

    root = importer.import_(new_json)

    UniqueDotExporter(root).to_picture(output_filename)
Esempio n. 30
0
with open('courses.csv', newline='') as file:
    coursesInFile = csv.DictReader(file, delimiter=',')

    for course in coursesInFile:
        courses[course['course_code']] = course

selectedCourse = ''

print('--------------Welcome to the UCalgary Prerequisites Wizard!-----------')
selectedCourse = input(
    'Select a UCalgary course you want to generate a prerequisites-tree from:')

prerequisitesTree = getPrerequisites(selectedCourse, courses)

importer = DictImporter()
root = importer.import_(prerequisitesTree)

RenderTreeGraph(root).to_picture("tree.png")
print('`tree.png` has been generated.')
if sys.platform == 'win32':
    subprocess.Popen(['start', 'tree.png'], shell=True)

elif sys.platform == 'darwin':
    subprocess.Popen(['open', 'tree.png'])

else:
    try:
        subprocess.Popen(['xdg-open', 'tree.png'])
    except OSError:
        print('Can\'t open file in system.')
# er, think of something else to try
Esempio n. 31
0
    output = file1 + ", " + file2 + ", " + str(len(f1)) + ", " + str(
        len(f2)) + ", " + str(mDist1) + ", " + str(mDist2) + ", " + str(
            mDist3) + ", " + str(runTime)
    print output
    return output


if __name__ == "__main__":
    rrp = RerankingParser.fetch_and_load('WSJ-PTB3', verbose=False)

    newTweet = "The quick brown fox jumps over the lazy dog."

    newTree = rrp.simple_parse(newTweet)
    sTree = parse_sexp(newTree)
    pprint(sTree)
    dTree = tMake(sTree[0])
    root = importer.import_(dTree)

    print RenderTree(root)

    newTweet = "The quick brown fox, it jumped over the lazy dog slowly."

    newTree = rrp.simple_parse(newTweet)
    sTree = parse_sexp(newTree)
    pprint(sTree)
    dTree = tMake(sTree[0])
    root = importer.import_(dTree)

    print RenderTree(root)

    #print runSingle(sys.argv[1], rrp)