Example #1
0
def load_category_and_relationship(sheet,column_num,label_name, relation_name):
    row_count = 0
    common_concept_count = 0
    node_matcher = NodeMatcher(graph)
    
    for rowx in range(1,sheet.nrows):
        stock_name = sheet.row_values(rowx)[1]
        listed_company_node = node_matcher.match("上市公司", name = stock_name).first()
        if listed_company_node == None :
            listed_company_node = Node("上市公司", name = stock_name)
        row_content_tmp = sheet.row_values(rowx)[column_num]
        if len(row_content_tmp) == 0 :
            print("内容没有,需要补全")
            break
        #原始数据有些括号,node_matcher.match(label,product_category)会出错,处理一下
        row_content_list = re.sub("\\(.*\\)|\\{.*?}|\\[.*?]", "", row_content_tmp).split(';')
        row_count += 1
        print(row_count, ': ', listed_company_node, row_content_list)

# """ 
        for row_content in row_content_list:
            #多个上市公司的概念板块可能是重合的,先判断概念是否存在,如果已经存在,不创建。
            category_node = node_matcher.match(label_name, name = row_content).first()
            if (category_node == None ):
                category_node = Node(label_name, name=row_content)
                graph.create(category_node)
                print(category_node)
            else:
                print("分类已存在,无需重复创建")
                common_category.append(category_node)

            #程序跑错了,重复跑,之前已经建立了部分relation可能会重复,需要检查一下是否relation已经存在,有机会改造一下这块
            relationship = Relationship(listed_company_node, relation_name, category_node)
            graph.create(relationship)
            print(relationship)
Example #2
0
def load_industry_category_layer4(sheet):
    row_count = 0
    new_industry_category_node_count = 0
    common_category_count = 0
    node_matcher = NodeMatcher(graph)
    
    for rowx in range(1,sheet.nrows):
        stock_name = sheet.row_values(rowx)[1]
        listed_company_node = node_matcher.match("上市公司", name = stock_name).first()
        if listed_company_node == None :
            listed_company_node = Node("上市公司", name = stock_name)
        industry_category_layer4_tmp = sheet.row_values(rowx)[13]
        #原始数据有些括号,node_matcher.match(label,product_category)会出错,处理一下
        industry_category_layer4 = re.sub("\\(.*\\)|\\{.*?}|\\[.*?]", "", industry_category_layer4_tmp).split(';')
        row_count += 1
        print(row_count, ': ', listed_company_node, industry_category_layer4)
#         """ 
        for industry_category in industry_category_layer4:
            #多个上市公司的行业类别可能是重合的,先判断行业类别是否存在,如果已经存在,不创建。
            industry_category_node = node_matcher.match(industry_category_layer4_label.name, name = industry_category).first()
            if (industry_category_node == None ):
                industry_category_node = Node(industry_category_layer4_label.name, name=industry_category)
                graph.create(industry_category_node)
                new_industry_category_node_count += 1
                print(industry_category_node,new_industry_category_node_count)
            else:
                common_category.append(industry_category+"\/n")
                common_category_count += 1
                print("重合类型: ",industry_category, common_category_count)
            #程序跑错了,重复跑,之前已经建立了部分relation可能会重复,需要检查一下是否relation已经存在,有机会改造一下这块
            relationship = Relationship(listed_company_node, industry_category_relation, industry_category_node)
            graph.create(relationship)
            print(relationship)
Example #3
0
 def match(self, primary_value=None):
     cls = self._object_class
     if cls.__primarykey__ == "__id__":
         match = NodeMatcher.match(self, cls.__primarylabel__).where("id(_) = %d" % primary_value)
     elif primary_value is None:
         match = NodeMatcher.match(self, cls.__primarylabel__)
     else:
         match = NodeMatcher.match(self, cls.__primarylabel__).where(**{cls.__primarykey__: primary_value})
     return match
Example #4
0
def add_propterty():
    STOCK_NAME_LABEL = '股票名称'
    COMPANY_NAME_LABEL = '公司名称'
    STOCK_CODE_PROPERTY = '股票代码'
    COMPANY_STOCK_RELATIONSHIP = '股票名称是'
    
    row_count = 0
    node_matcher = NodeMatcher(graph)
    for rowx in range(1,sheet.nrows):
        row_count += 1
        stock_code = sheet.row_values(rowx)[0]
        stock_name = sheet.row_values(rowx)[1]
        company_name = sheet.row_values(rowx)[2]
#         print(row_count, stock_code, stock_name, company_name)
        data_list = node_matcher.match(STOCK_NAME_LABEL,name = stock_name)
        #检查1、是否有没在图谱中出现的新股票
        #检查2、图谱中是否有重复股票名称(数据清洗)
        if len( list(data_list) ) < 1 :
            print("没有此股票名称")
            stock_node = Node(STOCK_NAME_LABEL, name = stock_name)
#             graph.push(stock_node)
            print(row_count, stock_code, stock_name, company_name)
        elif len( list(data_list) ) > 1 : 
                for data in data_list :
                    print(data)
                print(row_count, stock_code, stock_name, company_name)       
        else :
            stock_node = data_list.first()
    
        stock_node[STOCK_CODE_PROPERTY] = stock_code
        company_node = Node(COMPANY_NAME_LABEL, name = company_name)
        graph.create(company_node) 
        company_stock_relation = Relationship(company_node, COMPANY_STOCK_RELATIONSHIP, stock_node) 
        graph.create(company_stock_relation)              
        print(row_count, company_node)
Example #5
0
def match():
    """这里的节点是正常的,它有两个属性name和age
    name是Liz age是34
    match("Person").where(age=34).first() 正常
    match("Person").where(name='Liz').first() 正常
    match("Person", name="Liz").first() 正常
    match("Person", age=34).first() 正常
    match("Person", age=34).where(name="Liz").first() None
    match("Person", name="Liz").where(age=34).first() None
    """
    matcher_1 = NodeMatcher(graph)
    matcher_2 = RelationshipMatcher(graph)
    # TODO: 这里的 age 属性使用后返回结果为 None
    node = matcher_1.match("Person", name="Liz").where(age=34).first()
    relation = matcher_2.match(r_type='FRIENDS')
    return list(relation), node, type(relation)
Example #6
0
def get_command_last_run(course_key, graph):
    """
    This information is stored on the course node of a course in neo4j
    Args:
        course_key: a CourseKey
        graph: a py2neo Graph

    Returns: The datetime that the command was last run, converted into
        text, or None, if there's no record of this command last being run.
    """
    matcher = NodeMatcher(graph)
    course_node = matcher.match("course", course_key=str(course_key)).first()

    last_this_command_was_run = None
    if course_node:
        last_this_command_was_run = course_node['time_last_dumped_to_neo4j']

    return last_this_command_was_run
Example #7
0
 def match(self, primary_value=None):
     cls = self._object_class
     properties = {}
     if primary_value is not None:
         properties[cls.__primarykey__] = primary_value
     return NodeMatcher.match(self, cls.__primarylabel__, **properties)
Example #8
0
file_name = '上市公司产品类型和名称.xlsx'
workbook = xlrd.open_workbook(file_name)
sheet_names = workbook.sheet_names()
relation = "产品属于"
product_lable = "产品类型"
common_category = []
count = 0

for sheet_name in sheet_names:
    sheet = workbook.sheet_by_name(sheet_name)
    for rowx in range(1, sheet.nrows - 2):

        stock_name = sheet.row_values(rowx)[1]
        node_matcher = NodeMatcher(graph)
        listed_company_node = node_matcher.match("上市公司",
                                                 name=stock_name).first()
        product_categories_tmp = sheet.row_values(rowx)[3]
        #原始数据有些括号,node_matcher.match(label,product_category)会出错,处理一下
        product_categories = re.sub("\\(.*\\)|\\{.*?}|\\[.*?]", "",
                                    product_categories_tmp).split('、')
        count += 1
        print(count, ': ', listed_company_node, product_categories)
        #         """
        for product_category in product_categories:
            product_category_node = node_matcher.match(
                product_lable, name=product_category).first()
            print(product_category_node)
            #多个上市公司的产品类别可能是重合的,先判断产品类别是否存在,如果已经存在,不创建。
            if (product_category_node == None):
                product_category_node = Node(product_lable,
                                             name=product_category)
Example #9
0
class NLMGraph:
    """
    The Memory Graph.

    Parameters
    -----------
    graph: Graph
        The Neo4j Graph instance.
    """

    graph: Graph

    def __post_init__(self):
        self.nmatcher = NodeMatcher(self.graph)
        self.rmatcher = RelationshipMatcher(self.graph)

    @raise_customized_error(Exception, DatabaseError)
    def push_graph(self, subgraph: Subgraph) -> bool:
        """
        Push a subgraph (node, relationship, subgraph) to the Neo database.
        """
        tx = self.graph.begin()
        tx.create(subgraph)
        tx.commit()
        return tx.finished()

    def add_node(self, label: str, name: str, props: dict) -> Node:
        """
        Add a Node to database.

        Parameters
        ------------
        label: Node label
        name: Node name
        props: Node property

        Returns
        --------
        out: a Node.
        """
        node = Node(label, name=name, **props)
        self.push_graph(node)
        return node

    def check_update_node(self,
                          nlmgn: GraphNode,
                          update_props: bool = False) -> Node:
        """
        Check whether the given node is already in the graph.

        Parameters
        ------------
        nlmgn: GraphNode
            The defined Node data type.
            Includes name, labels and properties.
        
        Returns
        --------
        out: Node
            Whether it is already in the graph.
            If is, update with the new properties, if necessary and return the updated node.
            If not, return the created Node (and need to commit to the graph).
        """
        label, name, props = nlmgn.label, nlmgn.name, nlmgn.props
        neogn = self.nmatcher.match(label, name=name).first()
        if neogn:
            if update_props:
                node = self.update_property(neogn, props)
            else:
                node = neogn
        else:
            node = self.add_node(label, name, props)
        return node

    @raise_customized_error(Exception, DatabaseError)
    def update_property(self, neog_oj, props: dict):
        """
        Update a neo graph node or relationship.

        Parameters
        ------------
        neog_oj: neo graph object, Node or Relationship

        Returns
        --------
        out: updated  Node or Relationship
        """
        neog_oj_props = dict(neog_oj)
        if props and props != neog_oj_props:
            # make sure new props is behind the exisited props.
            neog_oj.update({**neog_oj_props, **props})
            # only can be pushed when neog_oj is already in the graph
            # so we do not need push_graph function here
            self.graph.push(neog_oj)
        return neog_oj

    def add_relationship(self, start: Node, end: Node, kind: str,
                         props: dict) -> Relationship:
        """
        Add a Relationship to database.

        Parameters
        ------------
        start: start Node
        end: end Node
        kind: Relationship kind
        props: Relationship property

        Returns
        --------
        out: a Relationship.
        """
        relation = Relationship(start, kind, end, **props)
        self.push_graph(relation)
        return relation

    def check_update_relationship(self,
                                  nlmgr: GraphRelation,
                                  update_props: bool = False) -> Relationship:
        """
        Parameters
        ------------
        nlmgr: GraphRelation
            The defined Relationship data type.
            Includes kind, start, end and properties.

        Returns
        --------
        out: Relationship
        """
        kind, props = nlmgr.kind, nlmgr.props
        start = self.check_update_node(nlmgr.start, update_props)
        end = self.check_update_node(nlmgr.end, update_props)
        neogr = self.rmatcher.match((start, end), r_type=kind).first()
        if neogr:
            if update_props:
                relation = self.update_property(neogr, props)
            else:
                relation = neogr
        else:
            relation = self.add_relationship(start, end, kind, props)
        return relation

    def add(self, gin: GraphRelation
            or GraphNode) -> Node or List[Node] or Relationship:
        """
        Add a Node or Relationship to the database.

        Parameters
        ------------
        gin: A GraphNode or GraphRelation (kind could be None)

        Returns
        --------
        out: A Node or Relationship.
        """
        if isinstance(gin, GraphNode):
            return self.add_node(gin.label, gin.name, gin.props)
        elif isinstance(gin, GraphRelation) and gin.kind:
            start = self.check_update_node(gin.start)
            end = self.check_update_node(gin.end)
            return self.add_relationship(start, end, gin.kind, gin.props)
        elif isinstance(gin, GraphRelation) and gin.kind == None:
            start = self.check_update_node(gin.start)
            end = self.check_update_node(gin.end)
            return (start, end)
        else:
            raise InputError

    def update(
            self, gin: GraphRelation
        or GraphNode) -> Node or List[Node] or Relationship:
        """
        Update the property of a Node or Relationship to the database.

        Parameters
        ------------
        gin: A GraphNode or GraphRelation (kind could be None)

        Returns
        --------
        out: A Node or Relationship.
        """
        if isinstance(gin, GraphNode):
            return self.check_update_node(gin, update_props=True)
        elif isinstance(gin, GraphRelation) and gin.kind:
            return self.check_update_relationship(gin, update_props=True)
        elif isinstance(gin, GraphRelation) and gin.kind == None:
            start = self.check_update_node(gin.start, update_props=True)
            end = self.check_update_node(gin.end, update_props=True)
            return (start, end)
        else:
            raise InputError

    def query(self, qin, topn=1, limit=10, fuzzy=False) -> list:
        """
        Query by user given.
        
        Parameters
        -----------
        qin: could be GraphNode, GraphRelation, or just Cypher.

        Returns
        ---------
        out: queried Nodes or Relationships.

        """
        if isinstance(qin, GraphNode):
            ret = self._query_by_node(qin, topn, limit, fuzzy)
        elif isinstance(qin, GraphRelation):
            ret = self._query_by_relation(qin, topn, limit, fuzzy)
        elif isinstance(qin, str):
            ret = self._query_by_cypher(qin)
        else:
            raise InputError
        return ret

    def _sort_matched(self, matched_nodes: list, props: dict) -> list:
        """
        Sort matched nodes by comparing their properties with the given props.
        """
        ret = []
        for node in matched_nodes:
            nprops = dict(node)
            num = 0
            for k, v in props.items():
                if k in nprops and nprops[k] == v:
                    num += 1
            ret.append((node, num))
        sorted_ret = sorted(ret, key=lambda x: x[1], reverse=True)
        return [n for (n, _) in sorted_ret]

    @raise_customized_error(Exception, QueryError)
    def _query_by_node(self, gn: GraphNode, topn: int, limit: int,
                       fuzzy: bool) -> List[Node]:
        """
        Query node by given label and name.
        If None, then by those nodes whose nodes contains the given name
        """
        label, name, props = gn.label, gn.name, gn.props
        nmatch = self.nmatcher.match(label)
        nodes = nmatch.where(name=name).limit(limit)
        if fuzzy and nodes.first() == None:
            nodes = nmatch.where(name__contains=name).limit(limit)
        nmlst = list(nodes)
        return self.__from_match_to_return(nmlst, props, topn)

    @raise_customized_error(Exception, QueryError)
    def _query_by_relation(self, gr: GraphRelation, topn: int, limit: int,
                           fuzzy: bool) -> List[Relationship]:
        """
        Query relations by given start, end and kind.
        If start and end are None, return [].
        If start or end is None, then by kind and start or end.

        If result is None, then by start or end, or by both.
        """
        starts = self._query_by_node(gr.start, topn=1, limit=5, fuzzy=fuzzy)
        ends = self._query_by_node(gr.end, topn=1, limit=5, fuzzy=fuzzy)
        start = starts[0] if starts else None
        end = ends[0] if ends else None
        kind, props = gr.kind, gr.props
        # print("start: ", start)
        # print("end:", end)
        if not start and not end:
            return []
        # start, end could be None
        # r_type could be None
        relations = self.rmatcher.match((start, end), r_type=kind).limit(limit)
        if relations.first() == None and start and end:
            relations = self.rmatcher.match((start, end)).limit(limit)
        rmlst = list(relations)
        return self.__from_match_to_return(rmlst, props, topn)

    def _query_by_cypher(self, cypher: str) -> types.GeneratorType:
        """
        Return a generator, the content depends on your query input.
        """
        pattern_match = re.compile(r'^ ?MATCH')
        pattern_limit = re.compile(r'LIMIT \d')
        if not pattern_match.search(cypher):
            raise OverstepError
        searched_limit = pattern_limit.search(cypher)
        topn = int(searched_limit.group().split()[-1]) if searched_limit else 5
        try:
            cursor = self.graph.run(cypher)
            res = []
            n = 0
            for item in cursor:
                res.append(item.data())
                n += 1
                if n == topn:
                    return res
        except Exception as e:
            raise QueryError

    def __from_match_to_return(self, matched_list: list, props: dict,
                               topn: int) -> list:
        if not matched_list:
            return []
        if len(matched_list) > 1 and props:
            ret = self._sort_matched(matched_list, props)[:topn]
        else:
            ret = matched_list[:topn]
        return ret

    @property
    def labels(self) -> frozenset:
        """all labels"""
        return self.graph.schema.node_labels

    @property
    def relationship_types(self) -> frozenset:
        """all relation types"""
        return self.graph.schema.relationship_types

    @property
    def nodes_num(self) -> int:
        """all nodes amounts"""
        return len(self.graph.nodes)

    @property
    def relationships_num(self) -> int:
        """all relations amounts"""
        return len(self.graph.relationships)

    @property
    def nodes(self) -> types.GeneratorType:
        """all nodes (a generator)"""
        return iter(self.graph.nodes.match())

    @property
    def relationships(self) -> types.GeneratorType:
        """all relations (a generator)"""
        return iter(self.graph.relationships.match())

    def excute(self, cypher) -> dict:
        """
        Be careful to use this function.
        Especially when you're updating the database.
        This function will not check the duplicated nodes or relationships.
        """
        try:
            run = self.graph.run(cypher)
            return dict(run.stats())
        except Exception as e:
            raise InputError
Example #10
0
class NodeMatcherTestCase(IntegrationTestCase):

    def setUp(self):
        self.graph.delete_all()
        with open(path_join(dirname(__file__), "..", "resources", "movies.cypher")) as f:
            cypher = f.read()
        self.graph.run(cypher)
        self.matcher = NodeMatcher(self.graph)

    def tearDown(self):
        self.graph.delete_all()

    def test_can_match_by_label_key_value(self):
        found = list(self.matcher.match("Person", name="Keanu Reeves"))
        assert len(found) == 1
        first = found[0]
        assert isinstance(first, Node)
        assert first["name"] == "Keanu Reeves"
        assert first["born"] == 1964

    def test_can_match_by_label_only(self):
        found = list(self.matcher.match("Person"))
        assert len(found) == 131

    def test_can_match_all_nodes(self):
        found = list(self.matcher.match())
        assert len(found) == 169

    def test_can_count_all_nodes(self):
        count = len(self.matcher.match())
        self.assertEqual(count, 169)

    def test_can_match_by_label_and_multiple_values(self):
        found = list(self.matcher.match("Person", name="Keanu Reeves", born=1964))
        assert len(found) == 1
        first = found[0]
        assert isinstance(first, Node)
        assert first["name"] == "Keanu Reeves"
        assert first["born"] == 1964

    def test_multiple_values_must_intersect(self):
        found = list(self.matcher.match("Person", name="Keanu Reeves", born=1963))
        assert len(found) == 0

    def test_custom_conditions(self):
        found = list(self.matcher.match("Person").where("_.name =~ 'K.*'"))
        found_names = {actor["name"] for actor in found}
        assert found_names == {'Keanu Reeves', 'Kelly McGillis', 'Kevin Bacon',
                               'Kevin Pollak', 'Kiefer Sutherland', 'Kelly Preston'}

    def test_custom_conditions_with_parameters(self):
        found = list(self.matcher.match("Person").where(("_.name = {1}", {"1": "Keanu Reeves"})))
        assert len(found) == 1
        first = found[0]
        assert isinstance(first, Node)
        assert first["name"] == "Keanu Reeves"
        assert first["born"] == 1964

    def test_order_by(self):
        found = list(self.matcher.match("Person").where("_.name =~ 'K.*'").order_by("_.name"))
        found_names = [actor["name"] for actor in found]
        assert found_names == ['Keanu Reeves', 'Kelly McGillis', 'Kelly Preston',
                               'Kevin Bacon', 'Kevin Pollak', 'Kiefer Sutherland']

    def test_skip(self):
        found = list(self.matcher.match("Person").where("_.name =~ 'K.*'").order_by("_.name").skip(2))
        found_names = [actor["name"] for actor in found]
        assert found_names == ['Kelly Preston', 'Kevin Bacon', 'Kevin Pollak', 'Kiefer Sutherland']

    def test_limit(self):
        found = list(self.matcher.match("Person").where("_.name =~ 'K.*'").order_by("_.name").skip(2).limit(2))
        found_names = [actor["name"] for actor in found]
        assert found_names == ['Kelly Preston', 'Kevin Bacon']

    def test_multiple_custom_conditions(self):
        found = list(self.matcher.match("Person").where("_.name =~ 'J.*'", "_.born >= 1960", "_.born < 1970"))
        found_names = {actor["name"] for actor in found}
        assert found_names == {'James Marshall', 'John Cusack', 'John Goodman', 'John C. Reilly', 'Julia Roberts'}

    def test_one(self):
        the_one = self.matcher.match("Person").where("_.name =~ 'K.*'").order_by("_.name").first()
        assert the_one["name"] == 'Keanu Reeves'

    def test_tuple_property_value(self):
        found = list(self.matcher.match("Person", name=("Kevin Bacon", "Kiefer Sutherland")))
        found_names = {actor["name"] for actor in found}
        assert found_names == {"Kevin Bacon", "Kiefer Sutherland"}

    def test_set_property_value(self):
        found = list(self.matcher.match("Person", name={"Kevin Bacon", "Kiefer Sutherland"}))
        found_names = {actor["name"] for actor in found}
        assert found_names == {"Kevin Bacon", "Kiefer Sutherland"}

    def test_frozenset_property_value(self):
        found = list(self.matcher.match("Person", name=frozenset(["Kevin Bacon", "Kiefer Sutherland"])))
        found_names = {actor["name"] for actor in found}
        assert found_names == {"Kevin Bacon", "Kiefer Sutherland"}