Beispiel #1
0
    def __init__(self, graph_set=None, repok=None, reperr=None,
                 context_map={}, default_dir="_", dir_split=0, n_file_item=1):
        self.dir_split = dir_split
        self.n_file_item = n_file_item
        self.context_map = context_map
        self.default_dir = default_dir
        for context_url in context_map:
            context_file_path = context_map[context_url]
            with open(context_file_path) as f:
                context_json = json.load(f)
                self.context_map[context_url] = context_json

        if graph_set is None:
            self.g = []
        else:
            self.g = graph_set.graphs()
        if repok is None:
            self.repok = Reporter(prefix="[Storer: INFO] ")
        else:
            self.repok = repok
        if reperr is None:
            self.reperr = Reporter(prefix="[Storer: ERROR] ")
        else:
            self.reperr = reperr
        self.preface_query = ""
Beispiel #2
0
    def __init__(self,
                 conf_file,
                 sec_to_wait=10,
                 max_iteration=6,
                 timeout=30,
                 query_interface='remote'):
        with open(conf_file) as f:
            conf_json = json.load(f)
            self.headers = {
                "Authorization": "Bearer %s" % conf_json["access_token"],
                "Content-Type": "application/json"
            }
            self.id = "ORCID"
            self.name = "SPACIN " + self.__class__.__name__
            self.repok = Reporter(prefix="[%s - INFO] " % self.name)
            self.reper = Reporter(prefix="[%s - ERROR] " % self.name)
            self.__last_query_done = None
            self.sec_to_wait = sec_to_wait
            self.max_iteration = max_iteration
            self.timeout = timeout

            if query_interface == 'local':
                self.query_interface = LocalQuery(reperr=self.reper,
                                                  repok=self.repok)
            elif query_interface == 'remote':
                self.query_interface = RemoteQuery(max_iteration=max_iteration,
                                                   sec_to_wait=sec_to_wait,
                                                   timeout=timeout,
                                                   headers=self.headers,
                                                   reperr=self.reper,
                                                   repok=self.repok,
                                                   is_json=True)
            else:
                raise ValueError(
                    "query_interface param must be `local` or `remote`")
Beispiel #3
0
    def __init__(self,
                 base_iri,
                 context_path,
                 info_dir="",
                 n_file_item=1,
                 supplier_prefix="",
                 forced_type=False,
                 wanted_label=True):
        self.r_count = 0
        # A list of rdflib.Graphs, one for subject entity
        self.g = []
        # The following variable maps a URIRef with the graph in the graph list related to them
        self.entity_g = {}
        # The following variable maps a URIRef with the related graph entity
        self.res_to_entity = {}
        self.base_iri = base_iri
        self.context_path = context_path
        self.cur_name = "OCDM " + self.__class__.__name__
        self.n_file_item = n_file_item
        self.supplier_prefix = supplier_prefix
        self.wanted_label = wanted_label  ##new
        self.forced_type = forced_type  ##new
        # Graphs
        # The following structure of URL is quite important for the other classes
        # developed and should not be changed. The only part that can change is the
        # value of the base_iri
        self.g_an = base_iri + "an/"  # new
        self.g_ar = base_iri + "ar/"
        self.g_be = base_iri + "be/"
        self.g_br = base_iri + "br/"
        self.g_ci = base_iri + "ci/"  # new
        self.g_de = base_iri + "de/"  # new
        self.g_id = base_iri + "id/"
        self.g_pl = base_iri + "pl/"  # new
        self.g_ra = base_iri + "ra/"
        self.g_re = base_iri + "re/"
        self.g_rp = base_iri + "rp/"  # new

        # Local paths
        self.info_dir = info_dir
        self.an_info_path = info_dir + "an.txt"  # new
        self.ar_info_path = info_dir + "ar.txt"
        self.be_info_path = info_dir + "be.txt"
        self.br_info_path = info_dir + "br.txt"
        self.ci_info_path = info_dir + "ci.txt"  # new not really used
        self.de_info_path = info_dir + "de.txt"  # new
        self.id_info_path = info_dir + "id.txt"
        self.pl_info_path = info_dir + "pl.txt"  # new
        self.ra_info_path = info_dir + "ra.txt"
        self.re_info_path = info_dir + "re.txt"
        self.rp_info_path = info_dir + "rp.txt"  # new

        self.reperr = Reporter(True)
        self.reperr.new_article()
        self.repok = Reporter(True)
        self.repok.new_article()
Beispiel #4
0
    def __init__(self,
                 base_iri,
                 context_base,
                 info_dir,
                 entries,
                 n_file_item,
                 supplier_prefix,
                 agent_id=None):
        self.occ = None
        self.doi = None
        self.pmid = None
        self.pmcid = None
        self.url = None
        self.curator = None
        self.source = None
        self.source_provider = None
        self.entries = None
        self.reference_pointers = None

        if entries is not None:
            if "occ" in entries:
                self.occ = entries["occ"]
            if "doi" in entries:
                self.doi = entries["doi"].lower()
            if "pmid" in entries:
                self.pmid = entries["pmid"]
            if "pmcid" in entries:
                self.pmcid = entries["pmcid"]
            if "url" in entries:
                self.url = entries["url"].lower()
            if "curator" in entries:
                self.curator = entries["curator"]
            if "source" in entries:
                self.source = entries["source"]
            if "source_provider" in entries:
                self.source_provider = entries["source_provider"]
            if "references" in entries:
                self.entries = entries["references"]
                if "reference_pointers" in entries:
                    self.reference_pointers = entries["reference_pointers"]

        self.name = "SPACIN " + self.__class__.__name__
        self.g_set = GraphSet(base_iri,
                              context_base,
                              info_dir,
                              n_file_item,
                              supplier_prefix,
                              wanted_label=False)  # added no label param
        self.id = agent_id
        self.repok = Reporter(prefix="[%s - INFO] " % self.name)
        self.repok.new_article()
        self.reperr = Reporter(prefix="[%s - ERROR] " % self.name)
        self.reperr.new_article()
Beispiel #5
0
 def __init__(self, conf_file, sec_to_wait=10, max_iteration=6, timeout=30):
     with open(conf_file) as f:
         conf_json = json.load(f)
         self.headers = {
             "Authorization": "Bearer %s" % conf_json["access_token"],
             "Content-Type": "application/json"
         }
         self.id = "ORCID"
         self.name = "SPACIN " + self.__class__.__name__
         self.repok = Reporter(prefix="[%s - INFO] " % self.name)
         self.reper = Reporter(prefix="[%s - ERROR] " % self.name)
         self.__last_query_done = None
         self.sec_to_wait = sec_to_wait
         self.max_iteration = max_iteration
         self.timeout = timeout
Beispiel #6
0
 def __init__(self, tp_url_real, context_path, context_file_path,
              base_iri, base_dir, info_dir, dataset_home, tmp_dir, triplestore_url=None):
     self.tp_url = triplestore_url
     self.base_iri = base_iri
     self.base_dir = base_dir
     self.info_dir = info_dir
     self.context_path = context_path
     self.dataset_home = URIRef(dataset_home)
     self.tmp_dir = tmp_dir
     self.tp_res = URIRef(tp_url_real)
     self.repok = Reporter(prefix="[DatasetHandler: INFO] ")
     self.reperr = Reporter(prefix="[DatasetHandler: ERROR] ")
     self.st = Storer(context_map={context_path: context_file_path},
                      repok=self.repok, reperr=self.reperr)
     self.st.set_preface_query(
         u"DELETE { ?res <%s> ?date } WHERE { ?res a <%s> ; <%s> ?date }" %
         (str(DatasetHandler.modified), str(DatasetHandler.dataset), str(DatasetHandler.modified)))
Beispiel #7
0
    def __init__(self,
                 base_iri,
                 context_path,
                 info_dir="",
                 n_file_item=1,
                 supplier_prefix=""):
        self.r_count = 0
        # A list of rdflib.Graphs, one for subject entity
        self.g = []
        # The following variable maps a URIRef with the graph in the graph list related to them
        self.entity_g = {}
        # The following variable maps a URIRef with the related graph entity
        self.res_to_entity = {}
        self.base_iri = base_iri
        self.context_path = context_path
        self.cur_name = "OCDM " + self.__class__.__name__
        self.n_file_item = n_file_item
        self.supplier_prefix = supplier_prefix

        # Graphs
        # The following structure of URL is quite important for the other classes
        # developed and should not be changed. The only part that can change is the
        # value of the base_iri
        self.g_ar = base_iri + "ar/"
        self.g_be = base_iri + "be/"
        self.g_br = base_iri + "br/"
        self.g_id = base_iri + "id/"
        self.g_ra = base_iri + "ra/"
        self.g_re = base_iri + "re/"

        # Local paths
        self.info_dir = info_dir
        self.ar_info_path = info_dir + "ar.txt"
        self.be_info_path = info_dir + "be.txt"
        self.br_info_path = info_dir + "br.txt"
        self.id_info_path = info_dir + "id.txt"
        self.ra_info_path = info_dir + "ra.txt"
        self.re_info_path = info_dir + "re.txt"

        self.reperr = Reporter(True)
        self.reperr.new_article()
        self.repok = Reporter(True)
        self.repok.new_article()
Beispiel #8
0
 def __init__(self,
              stored_file,
              reference_dir,
              error_dir,
              stopper,
              headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; "
                                     "rv:33.0) Gecko/20100101 Firefox/33.0"},
              sec_to_wait=10,
              max_iteration=6,
              timeout=30,
              debug=False,
              supplier_idx=()):
     self.headers = headers
     self.sec_to_wait = sec_to_wait
     self.max_iteration = max_iteration
     self.timeout = timeout
     self.stopper = stopper
     self.name = "BEE " + self.__class__.__name__
     self.repok = Reporter(print_sentences=debug, prefix="[%s - INFO] " % self.name)
     self.repok.new_article()
     self.reper = Reporter(print_sentences=debug, prefix="[%s - ERROR] " % self.name)
     self.reper.new_article()
     self.rs = BibliographicReferenceStorer(stored_file, reference_dir, error_dir, supplier_idx)
Beispiel #9
0
        "-i",
        "--input",
        dest="input",
        required=True,
        help="The file containing the RDF to execute, the JSON-LD to upload, "
        "or a directory containing several files with both queries and RDF.")

    args = arg_parser.parse_args()

    if args.conf is not None:
        my_conf = __import__(args.conf)
        for attr in dir(my_conf):
            if not attr.startswith("__"):
                globals()[attr] = getattr(my_conf, attr)

    storer = Storer(repok=Reporter(True),
                    reperr=Reporter(True),
                    context_map={context_path: context_file_path})

    all_files = []
    if os.path.isdir(args.input):
        for cur_dir, cur_subdir, cur_files in os.walk(args.input):
            for cur_file in cur_files:
                full_path = cur_dir + os.sep + cur_file
                if re.search(os.sep + "prov" + os.sep, full_path) is None and \
                        not full_path.endswith("index.json"):
                    all_files += [full_path]
    else:
        all_files += [args.input]

    for cur_file in all_files:
Beispiel #10
0
        description="This script create an nt file given a directory "
        "of the OCC containing data")
    arg_parser.add_argument("-i",
                            "--input",
                            dest="input",
                            required=True,
                            help="The directory containing the json-ld data.")
    arg_parser.add_argument("-o",
                            "--output",
                            dest="output",
                            required=True,
                            help="The output file.")

    args = arg_parser.parse_args()

    repok = Reporter(True, prefix="[creatent.py: INFO] ")
    reperr = Reporter(True, prefix="[creatent.py: ERROR] ")
    repok.new_article()
    reperr.new_article()

    s = Storer(context_map={context_path: context_file_path},
               dir_split=dir_split_number,
               n_file_item=items_per_file,
               default_dir=default_dir)

    for cur_dir, cur_subdir, cur_files in os.walk(args.input):
        with open(args.output, 'a') as f:
            for cur_file in cur_files:
                if match("^[0-9]+\.json", cur_file) is not None:
                    cur_g = s.load(cur_dir + os.sep + cur_file,
                                   tmp_dir=temp_dir_for_rdf_loading)