def get_test_instance(self, temp_location): from data_platform.config import ConfigManager from data_platform.datasource import ScienceDirectDS config = ConfigManager({"init": {"location": temp_location}}) ds = ScienceDirectDS(config) return ds
def get_test_instance(self, temp_location): from data_platform.config import ConfigManager, get_global_config from data_platform.datasource.mongodb import MongoDBDS global_conf = get_global_config() uri = global_conf.check_get(['test', 'mongodb', 'uri']) database = global_conf.check_get(['test', 'mongodb', 'database']) config = ConfigManager({"init": {"uri": uri, 'database': database}}) mongodbds = MongoDBDS(config) return mongodbds
def get_test_instance(self, temp_location): from data_platform.config import ConfigManager from data_platform.datasource import SQLiteDS config = ConfigManager( {"init": { "location": os.path.join(temp_location, 'data.db') }}) ds = SQLiteDS(config) return ds
def set_nxds(): current_location = Path(os.getcwd()) data_location = current_location / 'data' graph_location = data_location / 'graph' config = ConfigManager({ "init": { "location": graph_location }, "file_format": "graphml" }) return NetworkXDS(config)
def init(): current_location = Path(os.getcwd()) data_location = current_location / 'data' graph_location = data_location / 'graph' config = ConfigManager({ "init": { "location": graph_location }, 'file_format': 'graphml' }) return NetworkXDS(config)
def do_text(): database = request.forms.get('database') print(database) db.create_database(database) db.flush() source = request.forms.get('source') document = request.forms.get('document') node = request.forms.get('node') relation = request.forms.get('relation') nc.create_network_text(source, document, node, relation, database) db.flush() current_location = Path(os.getcwd()) data_location = current_location / 'data' graph_location = data_location / 'graph' config = ConfigManager({ "init": { "location": graph_location }, "file_format": "graphml" }) print(graph_location) nxds = NetworkXDS(config) # 读取网络用模块 print(nxds.read_graph()) network0 = nxds.read_graph(database)[database] scale = network0.number_of_nodes() size = network0.number_of_edges() print(scale) print(size) data = { 'database': database + '.graphml', 'source': source, 'document': document, 'node': node, 'relation': relation, 'node_number': scale, 'edge_number': size, 'nettype': node + ' ' + 'word network', 'weighttype': 'count' } print(data) return data
def do_author(): database = request.forms.get('database') print(database) db.flush() db.create_database(database) source = request.forms.get('source') document = request.forms.get('document') relation = request.forms.get('relation') nc.create_network_author(source, document, relation, database) db.flush() current_location = Path(os.getcwd()) data_location = current_location / 'data' graph_location = data_location / 'graph' config = ConfigManager({ "init": { "location": graph_location }, "file_format": "graphml" }) print(graph_location) nxds = NetworkXDS(config) # 读取网络用模块 print(nxds.read_graph()) network0 = nxds.read_graph(database)[database] scale = network0.number_of_nodes() size = network0.number_of_edges() print(scale) print(size) data = { 'database': database + '.graphml', 'source': source, 'document': document, 'node': "undefined", 'relation': relation, 'node_number': scale, 'edge_number': size, 'nettype': 'author citation network', 'weighttype': 'cite_count' } #nettype可以为co work network,此时weighttype改为co_count,这个要在后续版本中再进行动态的修改,暂时只使用citation即可。 print(data) return data
def get_test_instance(self, temp_location): from data_platform.config import ConfigManager, get_global_config from data_platform.datasource.arangodb import ArangoDBDS if self._cache_ds is None: global_conf = get_global_config() uri = global_conf.check_get(['test', 'arangodb', 'uri']) user = global_conf.check_get(['test', 'arangodb', 'user']) password = global_conf.check_get(['test', 'arangodb', 'password']) database = global_conf.check_get(['test', 'arangodb', 'database']) config = ConfigManager({ "init": { "uri": uri, 'user': user, 'password': password, 'database': database } }) arangodbds = ArangoDBDS(config) self._cache_ds = arangodbds return self._cache_ds
def do_author(): database = request.forms.get('database') print(database) db.flush() db.create_database(database) source = request.forms.get('source') document = request.forms.get('document') relation = request.forms.get('relation') nc.create_network_author(source, document, relation, database) db.flush() current_location = Path(os.getcwd()) data_location = current_location / 'data' graph_location = data_location / 'graph' config = ConfigManager({ "init": { "location": graph_location }, "file_format": "graphml" }) print(graph_location) nxds = NetworkXDS(config) # 读取网络用模块 print(nxds.read_graph()) network = nxds.read_graph(database)[database] scale = network.number_of_nodes() size = network.number_of_edges() print(scale) print(size) data = { 'database': database, 'source': source, 'document': document, 'node': "undefined", 'relation': relation, 'node_number': scale, 'edge_number': size } print(data) return data
def get_test_instance(self, temp_location): from data_platform.config import ConfigManager from data_platform.datasource import JSONDS config = ConfigManager({"init": {"location": temp_location}}) jsonds = JSONDS(config) return jsonds
from data_fetcher.id_manager import IDManager from data_fetcher.ieee.ieee_retrieval import IEEERetrieval from data_fetcher.ieee.ieee_fulltext_spider import IEEEFulltextSpider from data_fetcher.parser.pdf_parser import PDFParser, PDFFormat from data_platform.config import ConfigManager from data_platform.datasource.abc.doc import DocKeyPair from data_platform.datasource.mongodb import MongoDBDS current_path = Path(os.path.join(os.getcwd(), "..")) data_path = current_path / 'data' xml_path = data_path / 'unprocessed_articles_xml' pdf_path = data_path / 'pdf_files' config = ConfigManager({ "init": { "location": xml_path, "pdf": pdf_path, 'uri': None, 'database': 'db' } }) def main(query, paper_set, num_result): # 创建mongoDB数据源管理 mgdbds = MongoDBDS(config=config) # 指定ID管理器 pim = IDManager( config=config, key=DocKeyPair('paper_id', 'title'), auto_inc=DocKeyPair('id_inc', 'paper_id')
# encoding:utf-8 import os from pathlib import Path from data_platform.config import ConfigManager from network_construction import source as s from network_construction import database as db from network_construction import algorithm current_path = Path(os.getcwd()) data_path = current_path / 'data' xml_path = data_path / 'unprocessed_articles_xml' config = ConfigManager({"init": {"location": xml_path}}) # the output format is a dictionary; its key is node_key and the properties are id name email; # if there is nop author-name, the key is author_null def node_extraction_author(source, document, database): authors = s.search_author(source, document) citations = s.search_citation(source, document) for a in authors: author_name = a['author_list'][0] node_key = "author_" + author_name node_struct = {} node_struct['id'] = "null" node_struct['name'] = author_name node_struct['email'] = "null" db.insert_author(node_key, node_struct, database) for c in citations: for value0 in c['bib_detail'].items(): value = value0[1] if 'authors' in value.keys():