Example #1
0
    def get_test_instance(self, temp_location):
        from data_platform.config import ConfigManager
        from data_platform.datasource import ScienceDirectDS

        config = ConfigManager({"init": {"location": temp_location}})
        ds = ScienceDirectDS(config)
        return ds
Example #2
0
    def get_test_instance(self, temp_location):
        from data_platform.config import ConfigManager, get_global_config
        from data_platform.datasource.mongodb import MongoDBDS

        global_conf = get_global_config()
        uri = global_conf.check_get(['test', 'mongodb', 'uri'])
        database = global_conf.check_get(['test', 'mongodb', 'database'])
        config = ConfigManager({"init": {"uri": uri, 'database': database}})
        mongodbds = MongoDBDS(config)
        return mongodbds
Example #3
0
    def get_test_instance(self, temp_location):
        from data_platform.config import ConfigManager
        from data_platform.datasource import SQLiteDS

        config = ConfigManager(
            {"init": {
                "location": os.path.join(temp_location, 'data.db')
            }})
        ds = SQLiteDS(config)
        return ds
Example #4
0
 def set_nxds():
     current_location = Path(os.getcwd())
     data_location = current_location / 'data'
     graph_location = data_location / 'graph'
     config = ConfigManager({
         "init": {
             "location": graph_location
         },
         "file_format": "graphml"
     })
     return NetworkXDS(config)
Example #5
0
def init():
    current_location = Path(os.getcwd())
    data_location = current_location / 'data'
    graph_location = data_location / 'graph'
    config = ConfigManager({
        "init": {
            "location": graph_location
        },
        'file_format': 'graphml'
    })
    return NetworkXDS(config)
Example #6
0
def do_text():
    database = request.forms.get('database')
    print(database)
    db.create_database(database)
    db.flush()
    source = request.forms.get('source')
    document = request.forms.get('document')
    node = request.forms.get('node')
    relation = request.forms.get('relation')
    nc.create_network_text(source, document, node, relation, database)
    db.flush()

    current_location = Path(os.getcwd())
    data_location = current_location / 'data'
    graph_location = data_location / 'graph'
    config = ConfigManager({
        "init": {
            "location": graph_location
        },
        "file_format": "graphml"
    })
    print(graph_location)
    nxds = NetworkXDS(config)  # 读取网络用模块
    print(nxds.read_graph())
    network0 = nxds.read_graph(database)[database]
    scale = network0.number_of_nodes()
    size = network0.number_of_edges()
    print(scale)
    print(size)
    data = {
        'database': database + '.graphml',
        'source': source,
        'document': document,
        'node': node,
        'relation': relation,
        'node_number': scale,
        'edge_number': size,
        'nettype': node + ' ' + 'word network',
        'weighttype': 'count'
    }
    print(data)
    return data
Example #7
0
def do_author():
    database = request.forms.get('database')
    print(database)
    db.flush()
    db.create_database(database)
    source = request.forms.get('source')
    document = request.forms.get('document')
    relation = request.forms.get('relation')
    nc.create_network_author(source, document, relation, database)
    db.flush()
    current_location = Path(os.getcwd())
    data_location = current_location / 'data'
    graph_location = data_location / 'graph'
    config = ConfigManager({
        "init": {
            "location": graph_location
        },
        "file_format": "graphml"
    })
    print(graph_location)
    nxds = NetworkXDS(config)  # 读取网络用模块
    print(nxds.read_graph())
    network0 = nxds.read_graph(database)[database]
    scale = network0.number_of_nodes()
    size = network0.number_of_edges()
    print(scale)
    print(size)
    data = {
        'database': database + '.graphml',
        'source': source,
        'document': document,
        'node': "undefined",
        'relation': relation,
        'node_number': scale,
        'edge_number': size,
        'nettype': 'author citation network',
        'weighttype': 'cite_count'
    }
    #nettype可以为co work network,此时weighttype改为co_count,这个要在后续版本中再进行动态的修改,暂时只使用citation即可。
    print(data)
    return data
Example #8
0
    def get_test_instance(self, temp_location):
        from data_platform.config import ConfigManager, get_global_config
        from data_platform.datasource.arangodb import ArangoDBDS

        if self._cache_ds is None:
            global_conf = get_global_config()
            uri = global_conf.check_get(['test', 'arangodb', 'uri'])
            user = global_conf.check_get(['test', 'arangodb', 'user'])
            password = global_conf.check_get(['test', 'arangodb', 'password'])
            database = global_conf.check_get(['test', 'arangodb', 'database'])
            config = ConfigManager({
                "init": {
                    "uri": uri,
                    'user': user,
                    'password': password,
                    'database': database
                }
            })
            arangodbds = ArangoDBDS(config)
            self._cache_ds = arangodbds

        return self._cache_ds
Example #9
0
def do_author():
    database = request.forms.get('database')
    print(database)
    db.flush()
    db.create_database(database)
    source = request.forms.get('source')
    document = request.forms.get('document')
    relation = request.forms.get('relation')
    nc.create_network_author(source, document, relation, database)
    db.flush()
    current_location = Path(os.getcwd())
    data_location = current_location / 'data'
    graph_location = data_location / 'graph'
    config = ConfigManager({
        "init": {
            "location": graph_location
        },
        "file_format": "graphml"
    })
    print(graph_location)
    nxds = NetworkXDS(config)  # 读取网络用模块
    print(nxds.read_graph())
    network = nxds.read_graph(database)[database]
    scale = network.number_of_nodes()
    size = network.number_of_edges()
    print(scale)
    print(size)
    data = {
        'database': database,
        'source': source,
        'document': document,
        'node': "undefined",
        'relation': relation,
        'node_number': scale,
        'edge_number': size
    }
    print(data)
    return data
Example #10
0
 def get_test_instance(self, temp_location):
     from data_platform.config import ConfigManager
     from data_platform.datasource import JSONDS
     config = ConfigManager({"init": {"location": temp_location}})
     jsonds = JSONDS(config)
     return jsonds
Example #11
0
from data_fetcher.id_manager import IDManager
from data_fetcher.ieee.ieee_retrieval import IEEERetrieval
from data_fetcher.ieee.ieee_fulltext_spider import IEEEFulltextSpider
from data_fetcher.parser.pdf_parser import PDFParser, PDFFormat
from data_platform.config import ConfigManager
from data_platform.datasource.abc.doc import DocKeyPair
from data_platform.datasource.mongodb import MongoDBDS

current_path = Path(os.path.join(os.getcwd(), ".."))
data_path = current_path / 'data'
xml_path = data_path / 'unprocessed_articles_xml'
pdf_path = data_path / 'pdf_files'
config = ConfigManager({
    "init": {
        "location": xml_path,
        "pdf": pdf_path,
        'uri': None,
        'database': 'db'
    }
})


def main(query, paper_set, num_result):

    # 创建mongoDB数据源管理
    mgdbds = MongoDBDS(config=config)

    # 指定ID管理器
    pim = IDManager(
        config=config,
        key=DocKeyPair('paper_id', 'title'),
        auto_inc=DocKeyPair('id_inc', 'paper_id')
Example #12
0
# encoding:utf-8
import os
from pathlib import Path
from data_platform.config import ConfigManager
from network_construction import source as s
from network_construction import database as db
from network_construction import algorithm

current_path = Path(os.getcwd())
data_path = current_path / 'data'
xml_path = data_path / 'unprocessed_articles_xml'
config = ConfigManager({"init": {"location": xml_path}})


# the output format is a dictionary; its key is node_key and the properties are id name email;
# if there is nop author-name, the key is author_null
def node_extraction_author(source, document, database):
    authors = s.search_author(source, document)
    citations = s.search_citation(source, document)
    for a in authors:
        author_name = a['author_list'][0]
        node_key = "author_" + author_name
        node_struct = {}
        node_struct['id'] = "null"
        node_struct['name'] = author_name
        node_struct['email'] = "null"
        db.insert_author(node_key, node_struct, database)
    for c in citations:
        for value0 in c['bib_detail'].items():
            value = value0[1]
            if 'authors' in value.keys():