import codecs import json from py2neo import Relationship from skgraph.graph.accessor.graph_accessor import GraphClient from skgraph.graph.accessor.graph_client_for_awesome import AwesomeGraphAccessor from shared.logger_util import Logger _logger = Logger("AwesomeImporter").get_log() awesomeGraphAccessor = AwesomeGraphAccessor(GraphClient(server_number=0)) baseGraphClient = awesomeGraphAccessor.graph file_name = "complete_list_of_awesome_list_collect_relation.json" with codecs.open(file_name, 'r', 'utf-8') as f: relation_list = json.load(f) for relation in relation_list: start_url = relation["start_url"] relation_str = relation["relation"] end_url = relation["end_url"] start_node = awesomeGraphAccessor.find_awesome_list_entity(start_url) end_node = awesomeGraphAccessor.find_awesome_list_entity(end_url) if start_node is not None and end_node is not None: relationship = Relationship(start_node, relation_str, end_node) baseGraphClient.merge(relationship) _logger.info("create or merge relation" + str(relation)) else: _logger.warn("fail create relation" + str(relation))
import sys from script.graph_operation.graph_data_clean.awesome_item_rename_ambiguous import awesome_item_rename_ambiguous from script.graph_operation.graph_data_clean.awesome_item_rename_duplicate import awesome_item_rename_duplicate from script.graph_operation.graph_data_clean.merge_awesome_nodes_from_database import merge_awesome_nodes_from_database from script.graph_operation.graph_data_clean.merge_awesome_nodes_from_file import merge_awesome_nodes_from_file from skgraph.graph.accessor.graph_accessor import GraphClient from skgraph.graph.accessor.graph_client_for_awesome import AwesomeGraphAccessor from skgraph.graph.node_collection import NodeCollection reload(sys) sys.setdefaultencoding('utf8') graph_client = GraphClient(server_number=1) awesome_graph_accessor = AwesomeGraphAccessor(graph_client) node_collection = NodeCollection(graph_client) # step1 print "----------------------------------------------------------" print "step1 begin: rename duplicate awesome items" awesome_item_rename_duplicate(awesome_graph_accessor, node_collection) # step2 print "----------------------------------------------------------" print "step2 begin: merge awesome nodes from file" merge_awesome_nodes_from_file(awesome_graph_accessor) # step3 print "----------------------------------------------------------" print "step3 begin: merge awesome nodes from database" merge_awesome_nodes_from_database(awesome_graph_accessor, node_collection) # step4 print "----------------------------------------------------------"
import codecs import json from skgraph.graph.accessor.graph_accessor import GraphClient from skgraph.graph.accessor.graph_client_for_awesome import AwesomeGraphAccessor awesomeGraphAccessor = AwesomeGraphAccessor(GraphClient(server_number=0)) baseGraphClient = awesomeGraphAccessor.graph file_name = "new_complete_list_of_awesome_list.json" with codecs.open(file_name, 'r', 'utf-8') as f: awesome_item_category_entity_list = json.load(f) for cate in awesome_item_category_entity_list: property_dict = { "name": cate["name"], "category": cate["category"], "repository name": cate["repository name"], "alias": cate["alias"], "url": cate["url"], "source code repository": cate["url"] } if "description" in cate.keys() and cate["description"] != "": property_dict["description"] = cate["description"] node = awesomeGraphAccessor.create_or_update_awesome_list_entity(cate["url"], property_dict) file_name = "awesome_item_category_entity_list.json" with codecs.open(file_name, 'r', 'utf-8') as f: awesome_item_category_entity_list = json.load(f) for cate in awesome_item_category_entity_list: node = awesomeGraphAccessor.find_or_create_awesome_cate(cate)
import codecs import json from py2neo import watch from skgraph.graph.accessor.graph_accessor import GraphClient from skgraph.graph.accessor.graph_client_for_awesome import AwesomeGraphAccessor watch("httpstream") awesomeGraphAccessor = AwesomeGraphAccessor(GraphClient(server_number=0)) baseGraphClient = awesomeGraphAccessor.graph filename = "temp_data.json" with codecs.open(filename, "r", "utf8") as f: github_info_list = json.load(f) ''' each = github_info_list[0] github_url = each["github:url"] node = awesomeGraphAccessor.find_awesome_item_by_url(github_url) if node is not None: keys = each.keys() for key in keys: node[key] = each[key] print github_info_list.index(each) + 1 print node awesomeGraphAccessor.push_node(node) ''' for each in github_info_list: github_url = each["github:url"] node = awesomeGraphAccessor.find_awesome_item_by_url(github_url) if node is not None: keys = each.keys()
import codecs import json from py2neo import Relationship from skgraph.graph.accessor.graph_accessor import GraphClient, DefaultGraphAccessor from skgraph.graph.accessor.graph_client_for_awesome import AwesomeGraphAccessor from skgraph.graph.accessor.graph_client_for_wikipedia import WikipediaGraphAccessor from shared.logger_util import Logger _logger = Logger("AwesomeImporter").get_log() awesomeGraphAccessor = AwesomeGraphAccessor(GraphClient(server_number=0)) wikipediaGraphAccessor = WikipediaGraphAccessor(awesomeGraphAccessor) defaultGraphAccessor = DefaultGraphAccessor(awesomeGraphAccessor) baseGraphClient = awesomeGraphAccessor.graph file_name = "awesome_item_category_related_to_wikipedia_relation_list.json" with codecs.open(file_name, 'r', 'utf-8') as f: relation_list = json.load(f) for tag_relation in relation_list: start_entity_name = tag_relation["start_entity_name"] relation = tag_relation["relation"] end_url = tag_relation["end_url"] start_node = awesomeGraphAccessor.find_awesome_cate_by_name( start_entity_name) end_node = defaultGraphAccessor.get_node_by_wikipedia_link(end_url) if end_node is None: end_node = wikipediaGraphAccessor.create_wikipedia_item_entity_by_url( end_url)
import codecs import json from skgraph.graph.accessor.graph_accessor import GraphClient from skgraph.graph.accessor.graph_client_for_awesome import AwesomeGraphAccessor awesomeGraphAccessor = AwesomeGraphAccessor(GraphClient(server_number=0)) baseGraphClient = awesomeGraphAccessor.graph file_name = "awesome_item_entity_list.json" with codecs.open(file_name, 'r', 'utf-8') as f: awesome_item_entity_list = json.load(f) for cate in awesome_item_entity_list: property_dict = { "name": cate["name"], "url": cate["url"] } if cate["url"].startswith("https://github.com/"): property_dict["source code repository"] = cate["url"] if "description" in cate.keys() and cate["description"] != "": property_dict["description"] = cate["description"] node = awesomeGraphAccessor.create_or_update_awesome_item_entity(cate["url"], property_dict)
import codecs import json from skgraph.graph.accessor.factory import NodeBuilder from skgraph.graph.accessor.graph_accessor import GraphClient from skgraph.graph.accessor.graph_client_for_awesome import AwesomeGraphAccessor awesomeGraphAccessor = AwesomeGraphAccessor(GraphClient(server_number=0)) baseGraphClient = awesomeGraphAccessor.graph file_name = "awesome_list_define_for_entity_list.json" with codecs.open(file_name, 'r', 'utf-8') as f: awesome_item_category_entity_list = json.load(f) for cate in awesome_item_category_entity_list: builder = NodeBuilder(). \ add_label("awesome list topic"). \ add_entity_label(). \ add_one_property("name", cate) node = builder.build() print node node = baseGraphClient.merge(node) ## todo link different source to the same wikidata concept
import codecs import json from py2neo import Relationship from skgraph.graph.accessor.graph_accessor import GraphClient from skgraph.graph.accessor.graph_client_for_awesome import AwesomeGraphAccessor from shared.logger_util import Logger _logger = Logger("AwesomeImporter").get_log() awesomeGraphAccessor = AwesomeGraphAccessor(GraphClient(server_number=0)) baseGraphClient = awesomeGraphAccessor.graph file_name = "awesome_item_belong_relations.json" with codecs.open(file_name, 'r', 'utf-8') as f: relation_list = json.load(f) for relation in relation_list: start_url = relation["start_url"] relation_str = "main category" end_entity_name = relation["end_entity_name"] start_node = awesomeGraphAccessor.find_or_create_awesome_item_entity_by_url( start_url) end_node = awesomeGraphAccessor.find_or_create_awesome_cate( end_entity_name) if start_node is not None and end_node is not None: relationship = Relationship(start_node, relation_str, end_node) baseGraphClient.merge(relationship) _logger.info("create or merge relation" + str(relation)) else: