Beispiel #1
0
    def __init__(self, graph_set=None, repok=None, reperr=None,
                 context_map={}, default_dir="_", dir_split=0, n_file_item=1):
        self.dir_split = dir_split
        self.n_file_item = n_file_item
        self.context_map = context_map
        self.default_dir = default_dir
        for context_url in context_map:
            context_file_path = context_map[context_url]
            with open(context_file_path) as f:
                context_json = json.load(f)
                self.context_map[context_url] = context_json

        if graph_set is None:
            self.g = []
        else:
            self.g = graph_set.graphs()
        if repok is None:
            self.repok = Reporter(prefix="[Storer: INFO] ")
        else:
            self.repok = repok
        if reperr is None:
            self.reperr = Reporter(prefix="[Storer: ERROR] ")
        else:
            self.reperr = reperr
        self.preface_query = ""
Beispiel #2
0
class ReferenceProcessor(object):
    def __init__(self,
                 stored_file,
                 reference_dir,
                 error_dir,
                 stopper,
                 headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; "
                                        "rv:33.0) Gecko/20100101 Firefox/33.0"},
                 sec_to_wait=10,
                 max_iteration=6,
                 timeout=30,
                 debug=False,
                 supplier_idx=()):
        self.headers = headers
        self.sec_to_wait = sec_to_wait
        self.max_iteration = max_iteration
        self.timeout = timeout
        self.stopper = stopper
        self.name = "BEE " + self.__class__.__name__
        self.repok = Reporter(print_sentences=debug, prefix="[%s - INFO] " % self.name)
        self.repok.new_article()
        self.reper = Reporter(print_sentences=debug, prefix="[%s - ERROR] " % self.name)
        self.reper.new_article()
        self.rs = BibliographicReferenceStorer(stored_file, reference_dir, error_dir, supplier_idx)

    def process(self):
        pass  # To implement in subclasses
Beispiel #3
0
    def __init__(self,
                 conf_file,
                 sec_to_wait=10,
                 max_iteration=6,
                 timeout=30,
                 query_interface='remote'):
        with open(conf_file) as f:
            conf_json = json.load(f)
            self.headers = {
                "Authorization": "Bearer %s" % conf_json["access_token"],
                "Content-Type": "application/json"
            }
            self.id = "ORCID"
            self.name = "SPACIN " + self.__class__.__name__
            self.repok = Reporter(prefix="[%s - INFO] " % self.name)
            self.reper = Reporter(prefix="[%s - ERROR] " % self.name)
            self.__last_query_done = None
            self.sec_to_wait = sec_to_wait
            self.max_iteration = max_iteration
            self.timeout = timeout

            if query_interface == 'local':
                self.query_interface = LocalQuery(reperr=self.reper,
                                                  repok=self.repok)
            elif query_interface == 'remote':
                self.query_interface = RemoteQuery(max_iteration=max_iteration,
                                                   sec_to_wait=sec_to_wait,
                                                   timeout=timeout,
                                                   headers=self.headers,
                                                   reperr=self.reper,
                                                   repok=self.repok,
                                                   is_json=True)
            else:
                raise ValueError(
                    "query_interface param must be `local` or `remote`")
Beispiel #4
0
    def __init__(self,
                 base_iri,
                 context_path,
                 info_dir="",
                 n_file_item=1,
                 supplier_prefix="",
                 forced_type=False,
                 wanted_label=True):
        self.r_count = 0
        # A list of rdflib.Graphs, one for subject entity
        self.g = []
        # The following variable maps a URIRef with the graph in the graph list related to them
        self.entity_g = {}
        # The following variable maps a URIRef with the related graph entity
        self.res_to_entity = {}
        self.base_iri = base_iri
        self.context_path = context_path
        self.cur_name = "OCDM " + self.__class__.__name__
        self.n_file_item = n_file_item
        self.supplier_prefix = supplier_prefix
        self.wanted_label = wanted_label  ##new
        self.forced_type = forced_type  ##new
        # Graphs
        # The following structure of URL is quite important for the other classes
        # developed and should not be changed. The only part that can change is the
        # value of the base_iri
        self.g_an = base_iri + "an/"  # new
        self.g_ar = base_iri + "ar/"
        self.g_be = base_iri + "be/"
        self.g_br = base_iri + "br/"
        self.g_ci = base_iri + "ci/"  # new
        self.g_de = base_iri + "de/"  # new
        self.g_id = base_iri + "id/"
        self.g_pl = base_iri + "pl/"  # new
        self.g_ra = base_iri + "ra/"
        self.g_re = base_iri + "re/"
        self.g_rp = base_iri + "rp/"  # new

        # Local paths
        self.info_dir = info_dir
        self.an_info_path = info_dir + "an.txt"  # new
        self.ar_info_path = info_dir + "ar.txt"
        self.be_info_path = info_dir + "be.txt"
        self.br_info_path = info_dir + "br.txt"
        self.ci_info_path = info_dir + "ci.txt"  # new not really used
        self.de_info_path = info_dir + "de.txt"  # new
        self.id_info_path = info_dir + "id.txt"
        self.pl_info_path = info_dir + "pl.txt"  # new
        self.ra_info_path = info_dir + "ra.txt"
        self.re_info_path = info_dir + "re.txt"
        self.rp_info_path = info_dir + "rp.txt"  # new

        self.reperr = Reporter(True)
        self.reperr.new_article()
        self.repok = Reporter(True)
        self.repok.new_article()
Beispiel #5
0
    def __init__(self,
                 base_iri,
                 context_base,
                 info_dir,
                 entries,
                 n_file_item,
                 supplier_prefix,
                 agent_id=None):
        self.occ = None
        self.doi = None
        self.pmid = None
        self.pmcid = None
        self.url = None
        self.curator = None
        self.source = None
        self.source_provider = None
        self.entries = None
        self.reference_pointers = None

        if entries is not None:
            if "occ" in entries:
                self.occ = entries["occ"]
            if "doi" in entries:
                self.doi = entries["doi"].lower()
            if "pmid" in entries:
                self.pmid = entries["pmid"]
            if "pmcid" in entries:
                self.pmcid = entries["pmcid"]
            if "url" in entries:
                self.url = entries["url"].lower()
            if "curator" in entries:
                self.curator = entries["curator"]
            if "source" in entries:
                self.source = entries["source"]
            if "source_provider" in entries:
                self.source_provider = entries["source_provider"]
            if "references" in entries:
                self.entries = entries["references"]
                if "reference_pointers" in entries:
                    self.reference_pointers = entries["reference_pointers"]

        self.name = "SPACIN " + self.__class__.__name__
        self.g_set = GraphSet(base_iri,
                              context_base,
                              info_dir,
                              n_file_item,
                              supplier_prefix,
                              wanted_label=False)  # added no label param
        self.id = agent_id
        self.repok = Reporter(prefix="[%s - INFO] " % self.name)
        self.repok.new_article()
        self.reperr = Reporter(prefix="[%s - ERROR] " % self.name)
        self.reperr.new_article()
Beispiel #6
0
 def __init__(self, conf_file, sec_to_wait=10, max_iteration=6, timeout=30):
     with open(conf_file) as f:
         conf_json = json.load(f)
         self.headers = {
             "Authorization": "Bearer %s" % conf_json["access_token"],
             "Content-Type": "application/json"
         }
         self.id = "ORCID"
         self.name = "SPACIN " + self.__class__.__name__
         self.repok = Reporter(prefix="[%s - INFO] " % self.name)
         self.reper = Reporter(prefix="[%s - ERROR] " % self.name)
         self.__last_query_done = None
         self.sec_to_wait = sec_to_wait
         self.max_iteration = max_iteration
         self.timeout = timeout
Beispiel #7
0
 def __init__(self, tp_url_real, context_path, context_file_path,
              base_iri, base_dir, info_dir, dataset_home, tmp_dir, triplestore_url=None):
     self.tp_url = triplestore_url
     self.base_iri = base_iri
     self.base_dir = base_dir
     self.info_dir = info_dir
     self.context_path = context_path
     self.dataset_home = URIRef(dataset_home)
     self.tmp_dir = tmp_dir
     self.tp_res = URIRef(tp_url_real)
     self.repok = Reporter(prefix="[DatasetHandler: INFO] ")
     self.reperr = Reporter(prefix="[DatasetHandler: ERROR] ")
     self.st = Storer(context_map={context_path: context_file_path},
                      repok=self.repok, reperr=self.reperr)
     self.st.set_preface_query(
         u"DELETE { ?res <%s> ?date } WHERE { ?res a <%s> ; <%s> ?date }" %
         (str(DatasetHandler.modified), str(DatasetHandler.dataset), str(DatasetHandler.modified)))
Beispiel #8
0
    def __init__(self,
                 base_iri,
                 context_path,
                 info_dir="",
                 n_file_item=1,
                 supplier_prefix=""):
        self.r_count = 0
        # A list of rdflib.Graphs, one for subject entity
        self.g = []
        # The following variable maps a URIRef with the graph in the graph list related to them
        self.entity_g = {}
        # The following variable maps a URIRef with the related graph entity
        self.res_to_entity = {}
        self.base_iri = base_iri
        self.context_path = context_path
        self.cur_name = "OCDM " + self.__class__.__name__
        self.n_file_item = n_file_item
        self.supplier_prefix = supplier_prefix

        # Graphs
        # The following structure of URL is quite important for the other classes
        # developed and should not be changed. The only part that can change is the
        # value of the base_iri
        self.g_ar = base_iri + "ar/"
        self.g_be = base_iri + "be/"
        self.g_br = base_iri + "br/"
        self.g_id = base_iri + "id/"
        self.g_ra = base_iri + "ra/"
        self.g_re = base_iri + "re/"

        # Local paths
        self.info_dir = info_dir
        self.ar_info_path = info_dir + "ar.txt"
        self.be_info_path = info_dir + "be.txt"
        self.br_info_path = info_dir + "br.txt"
        self.id_info_path = info_dir + "id.txt"
        self.ra_info_path = info_dir + "ra.txt"
        self.re_info_path = info_dir + "re.txt"

        self.reperr = Reporter(True)
        self.reperr.new_article()
        self.repok = Reporter(True)
        self.repok.new_article()
Beispiel #9
0
class GraphSet(object):
    # Labels
    labels = {
        "an": "annotation",  # new
        "ar": "agent role",
        "be": "bibliographic entry",
        "br": "bibliographic resource",
        "ci": "citation",  # new
        "de": "discourse element",  # new
        "id": "identifier",
        "pl": "single location pointer list",  # new
        "ra": "responsible agent",
        "re": "resource embodiment",
        "rp": "in-text reference pointer"  # new
    }

    def __init__(self,
                 base_iri,
                 context_path,
                 info_dir="",
                 n_file_item=1,
                 supplier_prefix="",
                 forced_type=False,
                 wanted_label=True):
        self.r_count = 0
        # A list of rdflib.Graphs, one for subject entity
        self.g = []
        # The following variable maps a URIRef with the graph in the graph list related to them
        self.entity_g = {}
        # The following variable maps a URIRef with the related graph entity
        self.res_to_entity = {}
        self.base_iri = base_iri
        self.context_path = context_path
        self.cur_name = "OCDM " + self.__class__.__name__
        self.n_file_item = n_file_item
        self.supplier_prefix = supplier_prefix
        self.wanted_label = wanted_label  ##new
        self.forced_type = forced_type  ##new
        # Graphs
        # The following structure of URL is quite important for the other classes
        # developed and should not be changed. The only part that can change is the
        # value of the base_iri
        self.g_an = base_iri + "an/"  # new
        self.g_ar = base_iri + "ar/"
        self.g_be = base_iri + "be/"
        self.g_br = base_iri + "br/"
        self.g_ci = base_iri + "ci/"  # new
        self.g_de = base_iri + "de/"  # new
        self.g_id = base_iri + "id/"
        self.g_pl = base_iri + "pl/"  # new
        self.g_ra = base_iri + "ra/"
        self.g_re = base_iri + "re/"
        self.g_rp = base_iri + "rp/"  # new

        # Local paths
        self.info_dir = info_dir
        self.an_info_path = info_dir + "an.txt"  # new
        self.ar_info_path = info_dir + "ar.txt"
        self.be_info_path = info_dir + "be.txt"
        self.br_info_path = info_dir + "br.txt"
        self.ci_info_path = info_dir + "ci.txt"  # new not really used
        self.de_info_path = info_dir + "de.txt"  # new
        self.id_info_path = info_dir + "id.txt"
        self.pl_info_path = info_dir + "pl.txt"  # new
        self.ra_info_path = info_dir + "ra.txt"
        self.re_info_path = info_dir + "re.txt"
        self.rp_info_path = info_dir + "rp.txt"  # new

        self.reperr = Reporter(True)
        self.reperr.new_article()
        self.repok = Reporter(True)
        self.repok.new_article()

    def res_count(self):  # useless?
        return self.r_count

    def get_entity(self, res):
        if res in self.res_to_entity:
            return self.res_to_entity[res]

    # Add resources related to bibliographic entities
    def add_an(self,
               resp_agent,
               source_agent=None,
               source=None,
               res=None):  # new
        return self._add(self.g_an, GraphEntity.note, res, resp_agent,
                         source_agent, source, self.an_info_path, "an")

    def add_ar(self, resp_agent, source_agent=None, source=None, res=None):
        return self._add(self.g_ar, GraphEntity.role_in_time, res, resp_agent,
                         source_agent, source, self.ar_info_path, "ar")

    def add_be(self, resp_agent, source_agent=None, source=None, res=None):
        return self._add(self.g_be, GraphEntity.bibliographic_reference, res,
                         resp_agent, source_agent, source, self.be_info_path,
                         "be")

    def add_br(self, resp_agent, source_agent=None, source=None, res=None):
        return self._add(self.g_br, GraphEntity.expression, res, resp_agent,
                         source_agent, source, self.br_info_path, "br")

    # def add_ci(self, resp_agent, citing_res, cited_res, rp_num=None, source_agent=None, source=None, res=None): # new
    #     return self._add_ci(self.g_ci, GraphEntity.citation, citing_res, cited_res, rp_num, res, resp_agent,
    #                     source_agent, source, self.ci_info_path, "ci")
    def add_ci(self,
               resp_agent,
               source_agent=None,
               source=None,
               res=None):  # new
        return self._add(self.g_ci, GraphEntity.citation, res, resp_agent,
                         source_agent, source, self.ci_info_path, "ci")

    def add_de(self,
               resp_agent,
               source_agent=None,
               source=None,
               res=None):  # new
        return self._add(self.g_de, GraphEntity.discourse_element, res,
                         resp_agent, source_agent, source, self.de_info_path,
                         "de")

    def add_id(self, resp_agent, source_agent=None, source=None, res=None):
        return self._add(self.g_id, GraphEntity.identifier, res, resp_agent,
                         source_agent, source, self.id_info_path, "id")

    def add_pl(self,
               resp_agent,
               source_agent=None,
               source=None,
               res=None):  # new
        return self._add(self.g_pl, GraphEntity.singleloc_pointer_list, res,
                         resp_agent, source_agent, source, self.pl_info_path,
                         "pl")

    def add_rp(self,
               resp_agent,
               source_agent=None,
               source=None,
               res=None):  # new
        return self._add(self.g_rp, GraphEntity.intextref_pointer, res,
                         resp_agent, source_agent, source, self.rp_info_path,
                         "rp")

    def add_ra(self, resp_agent, source_agent=None, source=None, res=None):
        return self._add(self.g_ra, GraphEntity.agent, res, resp_agent,
                         source_agent, source, self.ra_info_path, "ra")

    def add_re(self, resp_agent, source_agent=None, source=None, res=None):
        return self._add(self.g_re, GraphEntity.manifestation, res, resp_agent,
                         source_agent, source, self.re_info_path, "re")

    # new
    def _add_ci(self,
                graph_url,
                main_type,
                citing_res,
                cited_res,
                rp_num,
                res,
                resp_agent,
                source_agent,
                source,
                info_file_path,
                short_name,
                list_of_entities=[]):
        cur_g = Graph(identifier=graph_url)
        self._set_ns(cur_g)
        self.g += [cur_g]

        if res is not None:
            return self._generate_entity(cur_g,
                                         res=res,
                                         resp_agent=resp_agent,
                                         source_agent=source_agent,
                                         source=source,
                                         list_of_entities=list_of_entities)

        else:
            citing_res, cited_res = str(citing_res), str(cited_res)
            citing_count = citing_res.rsplit('/', 1)[-1]
            cited_count = cited_res.rsplit('/', 1)[-1]
            if rp_num is not None:
                count = citing_count + '-' + cited_count + '/' + rp_num
            else:
                count = citing_count + '-' + cited_count

        return self._generate_entity(cur_g,
                                     res_type=main_type,
                                     resp_agent=resp_agent,
                                     source_agent=source_agent,
                                     source=source,
                                     count=count,
                                     label=None,
                                     short_name=short_name,
                                     list_of_entities=list_of_entities)

    def _add(self,
             graph_url,
             main_type,
             res,
             resp_agent,
             source_agent,
             source,
             info_file_path,
             short_name,
             list_of_entities=[]):
        cur_g = Graph(identifier=graph_url)
        self._set_ns(cur_g)
        self.g += [cur_g]

        # This is the case when 'res_or_resp_agent' is a resource. It allows one to create
        # the graph entity starting from and existing URIRef, without incrementing anything
        # at the graph set level. However, a new graph is created and reserved for such resource
        # and it is added to the graph set.
        if res is not None:
            return self._generate_entity(cur_g,
                                         res=res,
                                         res_type=main_type,
                                         resp_agent=resp_agent,
                                         source_agent=source_agent,
                                         source=source,
                                         list_of_entities=list_of_entities,
                                         forced_type=self.forced_type)
        # This is the case when 'res_or_resp_agent' is actually a string representing the name
        # of the responsible agent. In this case, a new individual will be created.
        else:
            self._increment()
            related_to_label = ""
            related_to_short_label = ""

            # Note: even if list of entities is actually a list, it seems
            # that it would be composed by at most one item (e.g. for provenance)
            if list_of_entities:
                count = str(
                    GraphSet._add_number(
                        info_file_path,
                        find_local_line_id(list_of_entities[0],
                                           self.n_file_item)))
                related_to_label += " related to"
                related_to_short_label += " ->"
                for idx, cur_entity in enumerate(list_of_entities):
                    if idx > 0:
                        related_to_label += ","
                        related_to_short_label += ","
                    cur_short_name = get_short_name(cur_entity)
                    cur_entity_count = get_count(cur_entity)
                    cur_entity_prefix = get_prefix(cur_entity)
                    if cur_short_name == 'ci':
                        related_to_label += " %s %s" % (
                            self.labels[cur_short_name], cur_entity_count)
                        related_to_short_label += " %s/%s" % (cur_short_name,
                                                              cur_entity_count)
                    else:
                        related_to_label += " %s %s%s" % (
                            self.labels[cur_short_name], cur_entity_prefix,
                            cur_entity_count)
                        related_to_short_label += " %s/%s%s" % (
                            cur_short_name, cur_entity_prefix,
                            cur_entity_count)
            else:
                count = self.supplier_prefix + str(
                    GraphSet._add_number(info_file_path))

            if self.wanted_label:  ##new
                label = "%s %s%s [%s/%s%s]" % (
                    GraphSet.labels[short_name], count, related_to_label,
                    short_name, count, related_to_short_label)
            else:
                label = None

            return self._generate_entity(cur_g,
                                         res_type=main_type,
                                         resp_agent=resp_agent,
                                         source_agent=source_agent,
                                         source=source,
                                         count=count,
                                         label=label,
                                         short_name=short_name,
                                         list_of_entities=list_of_entities,
                                         forced_type=self.forced_type)

    def _generate_entity(self,
                         g,
                         res=None,
                         res_type=None,
                         resp_agent=None,
                         source_agent=None,
                         source=None,
                         count=None,
                         label=None,
                         short_name="",
                         list_of_entities=[],
                         forced_type=False):
        return GraphEntity(g,
                           res=res,
                           res_type=res_type,
                           resp_agent=resp_agent,
                           source_agent=source_agent,
                           source=source,
                           count=count,
                           label=label,
                           g_set=self,
                           forced_type=forced_type)

    def graphs(self):
        result = []
        for cur_g in self.g:
            if len(cur_g) > 0:
                result += [cur_g]
        return result

    def _increment(self):
        self.r_count += 1

    def _set_ns(self, g):
        g.namespace_manager.bind("an", Namespace(self.g_an))  # new
        g.namespace_manager.bind("ar", Namespace(self.g_ar))
        g.namespace_manager.bind("be", Namespace(self.g_be))
        g.namespace_manager.bind("ci", Namespace(self.g_ci))  # new
        g.namespace_manager.bind("de", Namespace(self.g_de))  # new
        g.namespace_manager.bind("br", Namespace(self.g_br))
        g.namespace_manager.bind("id", Namespace(self.g_id))
        g.namespace_manager.bind("pl", Namespace(self.g_pl))  # new
        g.namespace_manager.bind("ra", Namespace(self.g_ra))
        g.namespace_manager.bind("re", Namespace(self.g_re))
        g.namespace_manager.bind("rp", Namespace(self.g_rp))  # new
        g.namespace_manager.bind("biro", GraphEntity.BIRO)
        g.namespace_manager.bind("co", GraphEntity.CO)  # new
        g.namespace_manager.bind("c4o", GraphEntity.C4O)
        g.namespace_manager.bind("cito", GraphEntity.CITO)
        g.namespace_manager.bind("datacite", GraphEntity.DATACITE)
        g.namespace_manager.bind("dcterms", GraphEntity.DCTERMS)
        g.namespace_manager.bind("deo", GraphEntity.DEO)  # new
        g.namespace_manager.bind("doco", GraphEntity.DOCO)
        g.namespace_manager.bind("fabio", GraphEntity.FABIO)
        g.namespace_manager.bind("foaf", GraphEntity.FOAF)
        g.namespace_manager.bind("frbr", GraphEntity.FRBR)
        g.namespace_manager.bind("literal", GraphEntity.LITERAL)
        g.namespace_manager.bind("oa", GraphEntity.OA)
        g.namespace_manager.bind("oco", GraphEntity.OCO)
        g.namespace_manager.bind("prism", GraphEntity.PRISM)
        g.namespace_manager.bind("pro", GraphEntity.PRO)

    @staticmethod
    def get_graph_iri(g):
        return str(g.identifier)

    @staticmethod
    def _read_number(file_path, line_number=1):
        cur_number = 0

        try:
            with open(file_path) as f:
                cur_number = int(f.readlines()[line_number - 1])
        except Exception as e:
            pass  # Do nothing

        return cur_number

    @staticmethod
    def _add_number(file_path, line_number=1):
        cur_number = GraphSet._read_number(file_path, line_number) + 1

        if not os.path.exists(os.path.dirname(file_path)):
            os.makedirs(os.path.dirname(file_path))

        if os.path.exists(file_path):
            with open(file_path) as f:
                all_lines = f.readlines()
        else:
            all_lines = []

        line_len = len(all_lines)
        zero_line_number = line_number - 1
        for i in range(line_number):
            if i >= line_len:
                all_lines += ["\n"]
            if i == zero_line_number:
                all_lines[i] = str(cur_number) + "\n"

        with open(file_path, "w") as f:
            f.writelines(all_lines)

        return cur_number
Beispiel #10
0
class Storer(object):
    def __init__(self,
                 graph_set=None,
                 repok=None,
                 reperr=None,
                 context_map={},
                 default_dir="_",
                 dir_split=0,
                 n_file_item=1,
                 nt=False,
                 nq=False):
        self.nt = nt
        self.nq = nq
        self.dir_split = dir_split
        self.n_file_item = n_file_item
        self.default_dir = default_dir

        if not nt and not nq:
            self.context_map = context_map
            for context_url in context_map:
                context_file_path = context_map[context_url]
                with open(context_file_path) as f:
                    context_json = json.load(f)
                    self.context_map[context_url] = context_json

        if graph_set is None:
            self.g = []
        else:
            self.g = graph_set.graphs()
        if repok is None:
            self.repok = Reporter(prefix="[Storer: INFO] ")
        else:
            self.repok = repok
        if reperr is None:
            self.reperr = Reporter(prefix="[Storer: ERROR] ")
        else:
            self.reperr = reperr
        self.preface_query = ""

    @staticmethod
    def hack_dates():
        if XSD.gYear in _toPythonMapping:
            _toPythonMapping.pop(XSD.gYear)
        if XSD.gYearMonth in _toPythonMapping:
            _toPythonMapping.pop(XSD.gYearMonth)

    def store_graphs_in_file(self, file_path, context_path):
        self.repok.new_article()
        self.reperr.new_article()
        self.repok.add_sentence(
            "Store the graphs into a file: starting process")

        cg = ConjunctiveGraph()
        for g in self.g:
            cg.addN([item + (g.identifier, ) for item in list(g)])

        self.__store_in_file(cg, file_path, context_path)

    def store_all(self,
                  base_dir,
                  base_iri,
                  context_path,
                  tmp_dir=None,
                  g_set=[],
                  override=False,
                  remove_data=False):
        for g in g_set:
            self.g += [g]

        self.repok.new_article()
        self.reperr.new_article()

        self.repok.add_sentence("Starting the process")

        processed_graphs = {}
        for cur_g in self.g:
            processed_graphs = self.store(cur_g, base_dir, base_iri,
                                          context_path, tmp_dir, override,
                                          processed_graphs, False, remove_data)

        stored_graph_path = []
        for cur_file_path in processed_graphs:
            stored_graph_path += [cur_file_path]

            self.__store_in_file(processed_graphs[cur_file_path],
                                 cur_file_path, context_path)

        return stored_graph_path

    def upload_and_store(self,
                         base_dir,
                         triplestore_url,
                         base_iri,
                         context_path,
                         tmp_dir=None,
                         g_set=[],
                         override=False):

        stored_graph_path = self.store_all(base_dir, base_iri, context_path,
                                           tmp_dir, g_set, override)

        # Some graphs were not stored properly, then no one will be updloaded to the triplestore
        # but we highlights those ones that could be added in principle, by mentioning them
        # with a ".notupdloaded" marker
        if None in stored_graph_path:
            for file_path in stored_graph_path:
                # Create a marker for the file not uploaded in the triplestore
                open("%s.notuploaded" % file_path, "w").close()
                self.reperr.add_sentence(
                    "[6] "
                    "The statements of in the JSON-LD file '%s' were not "
                    "uploaded into the triplestore." % file_path)
        else:  # All the files have been stored
            self.upload_all(self.g, triplestore_url, base_dir)

    def query(self,
              query_string,
              triplestore_url,
              n_statements=None,
              base_dir=None):
        if query_string != "":
            try:
                tp = SPARQLWrapper(triplestore_url)
                tp.setMethod('POST')
                tp.setQuery(query_string)
                tp.query()

                if n_statements is None:
                    self.repok.add_sentence(
                        "Triplestore updated by means of a SPARQL Update query."
                    )
                else:
                    self.repok.add_sentence(
                        "Triplestore updated with %s more RDF statements." %
                        n_statements)

                return True

            except Exception as e:
                self.reperr.add_sentence(
                    "[1] "
                    "Graph was not loaded into the "
                    "triplestore due to communication problems: %s" % str(e))
                if base_dir is not None:
                    tp_err_dir = base_dir + os.sep + "tp_err"
                    if not os.path.exists(tp_err_dir):
                        os.makedirs(tp_err_dir)
                    cur_file_err = tp_err_dir + os.sep + \
                                   datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f_not_uploaded.txt')
                    with io.open(cur_file_err, "w", encoding="utf-8") as f:
                        f.write(query_string)

        return False

    def do_action_all(self, all_g, triplestore_url, base_dir, query_f):
        result = True

        self.repok.new_article()
        self.reperr.new_article()

        query_string = None
        total_new_statements = None

        for idx, cur_g in enumerate(all_g):
            cur_idx = idx % 10
            if cur_idx == 0:
                if query_string is not None:
                    result &= self.query(query_string, triplestore_url,
                                         total_new_statements, base_dir)
                query_string = ""
                total_new_statements = 0
            else:
                query_string += " ; "
                total_new_statements += len(cur_g)

            query_string += self.get_preface_query(cur_g) + query_f(cur_g)

        if query_string is not None and query_string != "":
            result &= self.query(query_string, triplestore_url,
                                 total_new_statements, base_dir)

        return result

    def update_all(self, all_add_g, all_remove_g, triplestore_url, base_dir):
        return self.do_action_all(all_remove_g, triplestore_url, base_dir, Storer._make_delete_query) and \
               self.upload_all(all_add_g, triplestore_url, base_dir)

    def upload_all(self, all_g, triplestore_url, base_dir):
        return self.do_action_all(all_g, triplestore_url, base_dir,
                                  Storer._make_insert_query)

    def execute_upload_query(self, query_string, triplestore_url):
        self.repok.new_article()
        self.reperr.new_article()

        return self.query(query_string, triplestore_url)

    def upload(self, cur_g, triplestore_url):
        self.repok.new_article()
        self.reperr.new_article()

        query_string = Storer._make_insert_query(cur_g)

        return self.query(query_string, triplestore_url, len(cur_g))

    def set_preface_query(self, query_string):
        self.preface_query = query_string

    def get_preface_query(self, cur_g):
        if self.preface_query != "":
            if type(cur_g.identifier) is BNode:
                return "CLEAR DEFAULT ; "
            else:
                return "WITH <%s> " % str(
                    cur_g.identifier) + self.preface_query + " ; "
        else:
            return ""

    @staticmethod
    def _make_insert_query(cur_g):
        return Storer.__make_query(cur_g, "INSERT")

    @staticmethod
    def _make_delete_query(cur_g):
        return Storer.__make_query(cur_g, "DELETE")

    @staticmethod
    def __make_query(cur_g, query_type="INSERT"):
        if type(cur_g.identifier) is BNode:
            return "%s DATA { %s }" % (
                query_type, cur_g.serialize(format="nt11",
                                            encoding="utf-8").decode("utf-8"))
        else:
            return "%s DATA { GRAPH <%s> { %s } }" % \
                   (query_type, str(cur_g.identifier), cur_g.serialize(format="nt11", encoding="utf-8").decode("utf-8"))

    def __store_in_file(self, cur_g, cur_file_path, context_path):
        # Note: the following lines from here and until 'cur_json_ld' are a sort of hack for including all
        # the triples of the input graph into the final stored file. Some how, some of them are not written
        # in such file otherwise - in particular the provenance ones.
        new_g = ConjunctiveGraph()
        for s, p, o in cur_g.triples((None, None, None)):
            g_iri = None
            for g_context in cur_g.contexts((s, p, o)):
                g_iri = g_context.identifier
                break

            new_g.addN([(s, p, o, g_iri)])

        if not self.nt and not self.nq and context_path:
            cur_json_ld = json.loads(
                new_g.serialize(
                    format="json-ld",
                    context=self.__get_context(context_path)).decode("utf-8"))

            if isinstance(cur_json_ld, dict):
                cur_json_ld["@context"] = context_path
            else:  # it is a list
                for item in cur_json_ld:
                    item["@context"] = context_path

            with open(cur_file_path, "w") as f:
                json.dump(cur_json_ld, f, indent=4, ensure_ascii=False)
        elif self.nt:
            new_g.serialize(cur_file_path, format="nt11", encoding="utf-8")
        elif self.nq:
            new_g.serialize(cur_file_path, format="nquads", encoding="utf-8")

        self.repok.add_sentence("File '%s' added." % cur_file_path)

    def dir_and_file_paths(self, cur_g, base_dir, base_iri):
        cur_subject = set(cur_g.subjects(None, None)).pop()
        if self.nt or self.nq:
            is_json = False
        else:
            is_json = True
        return find_paths(str(cur_subject),
                          base_dir,
                          base_iri,
                          self.default_dir,
                          self.dir_split,
                          self.n_file_item,
                          is_json=is_json)

    def update(self,
               add_g,
               remove_g,
               base_dir,
               base_iri,
               context_path,
               tmp_dir=None,
               override=False,
               already_processed={},
               store_now=True):
        self.repok.new_article()
        self.reperr.new_article()

        if len(remove_g) > 0:
            cur_dir_path, cur_file_path = self.dir_and_file_paths(
                remove_g, base_dir, base_iri)

            if cur_file_path in already_processed:
                final_g = already_processed[cur_file_path]
            elif os.path.exists(cur_file_path):
                # This is a conjunctive graps that contains all the triples (and graphs)
                # the file is actually defining - they could be more than those using
                # 'cur_subject' as subject.
                final_g = self.load(cur_file_path, tmp_dir=tmp_dir)
                already_processed[cur_file_path] = final_g

            for s, p, o, g in [
                    item + (remove_g.identifier, ) for item in list(remove_g)
            ]:
                final_g.remove((s, p, o, g))

        if len(add_g) > 0:
            self.store(add_g, base_dir, base_iri, context_path, tmp_dir,
                       override, already_processed, store_now)
        elif len(remove_g) > 0 and store_now:
            self.__store_in_file(final_g, cur_file_path, context_path)

        return already_processed

    def store(self,
              cur_g,
              base_dir,
              base_iri,
              context_path,
              tmp_dir=None,
              override=False,
              already_processed={},
              store_now=True,
              remove_data=False):
        self.repok.new_article()
        self.reperr.new_article()

        if len(cur_g) > 0:
            cur_dir_path, cur_file_path = self.dir_and_file_paths(
                cur_g, base_dir, base_iri)

            try:
                if not os.path.exists(cur_dir_path):
                    os.makedirs(cur_dir_path)

                final_g = ConjunctiveGraph()
                final_g.addN(
                    [item + (cur_g.identifier, ) for item in list(cur_g)])

                # Remove the data
                if remove_data:
                    stored_g = None
                    if cur_file_path in already_processed:
                        stored_g = already_processed[cur_file_path]
                    elif os.path.exists(cur_file_path):
                        stored_g = self.load(cur_file_path, cur_g, tmp_dir)

                    for s, p, o, g in final_g.quads((None, None, None, None)):
                        stored_g.remove((s, p, o, g))

                    final_g = stored_g
                elif not override:  # Merging the data
                    if cur_file_path in already_processed:
                        stored_g = already_processed[cur_file_path]
                        stored_g.addN(final_g.quads((None, None, None, None)))
                        final_g = stored_g
                    elif os.path.exists(cur_file_path):
                        # This is a conjunctive graps that contains all the triples (and graphs)
                        # the file is actually defining - they could be more than those using
                        # 'cur_subject' as subject.
                        final_g = self.load(cur_file_path, cur_g, tmp_dir)

                already_processed[cur_file_path] = final_g

                if store_now:
                    self.__store_in_file(final_g, cur_file_path, context_path)

                return already_processed
            except Exception as e:
                self.reperr.add_sentence(
                    "[5] It was impossible to store the RDF statements in %s. %s"
                    % (cur_file_path, str(e)))

        return None

    def __get_context(self, context_url):
        if context_url in self.context_map:
            return self.context_map[context_url]
        else:
            return context_url

    def __get_first_context(self):
        for context_url in self.context_map:
            return self.context_map[context_url]

    def load(self, rdf_file_path, cur_graph=None, tmp_dir=None):
        self.repok.new_article()
        self.reperr.new_article()

        if os.path.isfile(rdf_file_path):
            Storer.hack_dates()
            # The line above has been added for handling gYear and gYearMonth correctly.
            # More info at https://github.com/RDFLib/rdflib/issues/806.

            try:
                cur_graph = self.__load_graph(rdf_file_path, cur_graph)
            except IOError:
                if tmp_dir is not None:
                    current_file_path = tmp_dir + os.sep + "tmp_rdf_file.rdf"
                    shutil.copyfile(rdf_file_path, current_file_path)
                    try:
                        cur_graph = self.__load_graph(current_file_path,
                                                      cur_graph)
                    except IOError as e:
                        self.reperr.add_sentence(
                            "[2] "
                            "It was impossible to handle the format used for "
                            "storing the file (stored in the temporary path) '%s'. "
                            "Additional details: %s" %
                            (current_file_path, str(e)))
                    os.remove(current_file_path)
                else:
                    self.reperr.add_sentence(
                        "[3] "
                        "It was impossible to try to load the file from the "
                        "temporary path '%s' since that has not been specified in "
                        "advance" % rdf_file_path)
        else:
            self.reperr.add_sentence(
                "[4] "
                "The file specified ('%s') doesn't exist." % rdf_file_path)

        return cur_graph

    def __load_graph(self, file_path, cur_graph=None):
        formats = ["json-ld", "rdfxml", "turtle", "trig", "nt11", "nquads"]

        current_graph = ConjunctiveGraph()

        if cur_graph is not None:
            current_graph.parse(data=cur_graph.serialize(format="trig"),
                                format="trig")

        for cur_format in formats:
            try:
                if cur_format == "json-ld":
                    with open(file_path) as f:
                        json_ld_file = json.load(f)
                        if isinstance(json_ld_file, dict):
                            json_ld_file = [json_ld_file]

                        for json_ld_resource in json_ld_file:
                            # Trick to force the use of a pre-loaded context if the format
                            # specified is JSON-LD
                            context_json = None
                            if "@context" in json_ld_resource:
                                cur_context = json_ld_resource["@context"]
                                if cur_context in self.context_map:
                                    context_json = self.__get_context(
                                        cur_context)["@context"]
                                    json_ld_resource["@context"] = context_json

                            current_graph.parse(data=json.dumps(
                                json_ld_resource, ensure_ascii=False),
                                                format=cur_format)
                else:
                    current_graph.parse(file_path, format=cur_format)

                return current_graph
            except Exception as e:
                errors = " | " + str(e)  # Try another format

        raise IOError(
            "1",
            "It was impossible to handle the format used for storing the file '%s'%s"
            % (file_path, errors))
Beispiel #11
0
        "-i",
        "--input",
        dest="input",
        required=True,
        help="The file containing the RDF to execute, the JSON-LD to upload, "
        "or a directory containing several files with both queries and RDF.")

    args = arg_parser.parse_args()

    if args.conf is not None:
        my_conf = __import__(args.conf)
        for attr in dir(my_conf):
            if not attr.startswith("__"):
                globals()[attr] = getattr(my_conf, attr)

    storer = Storer(repok=Reporter(True),
                    reperr=Reporter(True),
                    context_map={context_path: context_file_path})

    all_files = []
    if os.path.isdir(args.input):
        for cur_dir, cur_subdir, cur_files in os.walk(args.input):
            for cur_file in cur_files:
                full_path = cur_dir + os.sep + cur_file
                if re.search(os.sep + "prov" + os.sep, full_path) is None and \
                        not full_path.endswith("index.json"):
                    all_files += [full_path]
    else:
        all_files += [args.input]

    for cur_file in all_files:
Beispiel #12
0
class FormatProcessor(object):
    #doi_pattern = "[^A-z0-9\.]([0-9]+\.[0-9]+(\.[0-9]+)*/[^%\"# \?<>{}\^\[\]`\|\\\+]+)"
    doi_pattern = "[^A-z0-9\.](10\.[0-9]+(\.[0-9]+)*/[^%\"# \?<>{}\^\[\]`\|\\\+]+)"
    http_pattern = "(https?://([A-z]|[0-9]|%|&|\?|/|\.|_|~|-|:)+)"
    """This class is the abstract one for any kind of processors."""
    def __init__(self,
                 base_iri,
                 context_base,
                 info_dir,
                 entries,
                 n_file_item,
                 supplier_prefix,
                 agent_id=None):
        self.occ = None
        self.doi = None
        self.pmid = None
        self.pmcid = None
        self.url = None
        self.curator = None
        self.source = None
        self.source_provider = None
        self.entries = None
        self.reference_pointers = None

        if entries is not None:
            if "occ" in entries:
                self.occ = entries["occ"]
            if "doi" in entries:
                self.doi = entries["doi"].lower()
            if "pmid" in entries:
                self.pmid = entries["pmid"]
            if "pmcid" in entries:
                self.pmcid = entries["pmcid"]
            if "url" in entries:
                self.url = entries["url"].lower()
            if "curator" in entries:
                self.curator = entries["curator"]
            if "source" in entries:
                self.source = entries["source"]
            if "source_provider" in entries:
                self.source_provider = entries["source_provider"]
            if "references" in entries:
                self.entries = entries["references"]
                if "reference_pointers" in entries:
                    self.reference_pointers = entries["reference_pointers"]

        self.name = "SPACIN " + self.__class__.__name__
        self.g_set = GraphSet(base_iri,
                              context_base,
                              info_dir,
                              n_file_item,
                              supplier_prefix,
                              wanted_label=False)  # added no label param
        self.id = agent_id
        self.repok = Reporter(prefix="[%s - INFO] " % self.name)
        self.repok.new_article()
        self.reperr = Reporter(prefix="[%s - ERROR] " % self.name)
        self.reperr.new_article()

    def process(self):
        pass  # Implemented in the subclasses

    def graph_set(self):
        return self.g_set

    def graphs(self):
        return self.g_set.graphs()

    def message(self, mess):
        return "%s" % mess

    @staticmethod
    def clean_entry(entry):
        return quote(sa(re.sub(":", ",", entry)))

    @staticmethod
    def extract_data(string, pattern):
        if string is not None:
            result = re.search(pattern, string)
            if result:
                return result.group(1)

    @staticmethod
    def extract_doi(string):
        if string is not None:
            result = FormatProcessor.extract_data(string,
                                                  FormatProcessor.doi_pattern)
            if result:
                result = re.sub("(\.|,)?$", "", result)

            return result

    @staticmethod
    def extract_url(string):
        if string is not None:
            result = FormatProcessor.extract_data(string,
                                                  FormatProcessor.http_pattern)
            if result:
                result = re.sub("\\\\", "", re.sub("/?\.?$", "", result))

            return result
Beispiel #13
0
class ORCIDFinder(object):
    __api_url = "https://pub.orcid.org/v2.1/search?q="
    __personal_url = "https://pub.orcid.org/v2.1/%s/personal-details"

    def __init__(self, conf_file, sec_to_wait=10, max_iteration=6, timeout=30):
        with open(conf_file) as f:
            conf_json = json.load(f)
            self.headers = {
                "Authorization": "Bearer %s" % conf_json["access_token"],
                "Content-Type": "application/json"
            }
            self.id = "ORCID"
            self.name = "SPACIN " + self.__class__.__name__
            self.repok = Reporter(prefix="[%s - INFO] " % self.name)
            self.reper = Reporter(prefix="[%s - ERROR] " % self.name)
            self.__last_query_done = None
            self.sec_to_wait = sec_to_wait
            self.max_iteration = max_iteration
            self.timeout = timeout

    def get_last_query(self):
        return self.__last_query_done

    def get_orcid_data(self, orcid_string):
        self.repok.new_article()
        self.reper.new_article()
        self.__last_query_done = ORCIDFinder.__personal_url % orcid_string
        return get_data(self.max_iteration, self.sec_to_wait,
                        self.__last_query_done, self.headers, self.timeout,
                        self.repok, self.reper)

    def get_orcid_records(self, doi_string, family_names=[]):
        self.repok.new_article()
        self.reper.new_article()

        cur_query = "digital-object-ids:\"%s\"" % doi_string
        if family_names:
            cur_query += " AND ("
            first_name = True
            for idx, family_name in enumerate(family_names):
                if family_name is not None:
                    if first_name:
                        first_name = False
                    else:
                        cur_query += " OR "
                    cur_query += "family-name:\"%s\"" % na("" + family_name)

            cur_query += ")"

        self.__last_query_done = ORCIDFinder.__api_url + quote(cur_query)

        return get_data(self.max_iteration, self.sec_to_wait,
                        self.__last_query_done, self.headers, self.timeout,
                        self.repok, self.reper)

    def get_orcid_ids(self, doi_string, family_names=[]):
        result = []

        records = self.get_orcid_records(doi_string, family_names)
        if records is not None:
            for orcid_id in dg(records,
                               ["result", "orcid-identifier", "path"]):
                personal_details = self.get_orcid_data(orcid_id)
                if personal_details is not None:
                    given_name = dg(personal_details,
                                    ["name", "given-names", "value"])
                    family_name = dg(personal_details,
                                     ["name", "family-name", "value"])
                    credit_name = dg(personal_details,
                                     ["name", "credit-name", "value"])
                    other_names = dg(personal_details,
                                     ["other-names", "other-name", "content"])
                    result += [
                        da({
                            "orcid": orcid_id,
                            "given": given_name,
                            "family": family_name,
                            "credit": credit_name,
                            "other": other_names
                        })
                    ]

        return result
Beispiel #14
0
        description="This script create an nt file given a directory "
        "of the OCC containing data")
    arg_parser.add_argument("-i",
                            "--input",
                            dest="input",
                            required=True,
                            help="The directory containing the json-ld data.")
    arg_parser.add_argument("-o",
                            "--output",
                            dest="output",
                            required=True,
                            help="The output file.")

    args = arg_parser.parse_args()

    repok = Reporter(True, prefix="[creatent.py: INFO] ")
    reperr = Reporter(True, prefix="[creatent.py: ERROR] ")
    repok.new_article()
    reperr.new_article()

    s = Storer(context_map={context_path: context_file_path},
               dir_split=dir_split_number,
               n_file_item=items_per_file,
               default_dir=default_dir)

    for cur_dir, cur_subdir, cur_files in os.walk(args.input):
        with open(args.output, 'a') as f:
            for cur_file in cur_files:
                if match("^[0-9]+\.json", cur_file) is not None:
                    cur_g = s.load(cur_dir + os.sep + cur_file,
                                   tmp_dir=temp_dir_for_rdf_loading)
Beispiel #15
0
class ORCIDFinder(object):
    __api_url = "https://pub.orcid.org/v2.1/search?q="
    __personal_url = "https://pub.orcid.org/v2.1/%s/personal-details"

    def __init__(self,
                 conf_file,
                 sec_to_wait=10,
                 max_iteration=6,
                 timeout=30,
                 query_interface='remote'):
        with open(conf_file) as f:
            conf_json = json.load(f)
            self.headers = {
                "Authorization": "Bearer %s" % conf_json["access_token"],
                "Content-Type": "application/json"
            }
            self.id = "ORCID"
            self.name = "SPACIN " + self.__class__.__name__
            self.repok = Reporter(prefix="[%s - INFO] " % self.name)
            self.reper = Reporter(prefix="[%s - ERROR] " % self.name)
            self.__last_query_done = None
            self.sec_to_wait = sec_to_wait
            self.max_iteration = max_iteration
            self.timeout = timeout

            if query_interface == 'local':
                self.query_interface = LocalQuery(reperr=self.reper,
                                                  repok=self.repok)
            elif query_interface == 'remote':
                self.query_interface = RemoteQuery(max_iteration=max_iteration,
                                                   sec_to_wait=sec_to_wait,
                                                   timeout=timeout,
                                                   headers=self.headers,
                                                   reperr=self.reper,
                                                   repok=self.repok,
                                                   is_json=True)
            else:
                raise ValueError(
                    "query_interface param must be `local` or `remote`")

    def get_last_query(self):
        return self.__last_query_done

    def get_orcid_data(self, orcid_string):
        self.repok.new_article()
        self.reper.new_article()
        self.__last_query_done = ORCIDFinder.__personal_url % orcid_string
        print(self.__last_query_done)
        return self.query_interface.get_orcid_data(orcid_string)

    def get_orcid_records(self, doi_string, family_names=[]):
        self.repok.new_article()
        self.reper.new_article()

        # If we're making a local query, we only need to use the doi string
        if isinstance(self.query_interface, LocalQuery):
            return self.query_interface.get_orcid_records(doi_string.lower())

        # Otherwise we need to setup the query in ther format that follows
        else:
            cur_query = "doi-self:\"%s\"" % doi_string
            doi_string_l = doi_string.lower()
            doi_string_u = doi_string.upper()
            if doi_string_l != doi_string or doi_string_u != doi_string:
                cur_query = "(" + cur_query
                if doi_string_l != doi_string:
                    cur_query += " OR doi-self:\"%s\"" % doi_string_l
                if doi_string_u != doi_string:
                    cur_query += " OR doi-self:\"%s\"" % doi_string_u
                cur_query += ")"

            if family_names:
                cur_query += " AND ("
                first_name = True
                for idx, family_name in enumerate(family_names):
                    if family_name is not None:
                        if first_name:
                            first_name = False
                        else:
                            cur_query += " OR "
                        cur_query += "family-name:\"%s\"" % na("" +
                                                               family_name)

                cur_query += ")"

            self.__last_query_done = ORCIDFinder.__api_url + quote(cur_query)

            returned_data = self.query_interface.get_orcid_records(
                quote(cur_query))
            return returned_data

    def get_orcid_ids(self, doi_string, family_names=[]):
        result = []
        records = self.get_orcid_records(doi_string, family_names)
        if records is not None:
            if isinstance(self.query_interface, RemoteQuery):
                for orcid_id in dg(records,
                                   ["result", "orcid-identifier", "path"]):
                    personal_details = self.get_orcid_data(orcid_id)
                    if personal_details is not None:
                        given_name = dg(personal_details,
                                        ["name", "given-names", "value"])
                        family_name = dg(personal_details,
                                         ["name", "family-name", "value"])
                        credit_name = dg(personal_details,
                                         ["name", "credit-name", "value"])
                        other_names = dg(
                            personal_details,
                            ["other-names", "other-name", "content"])
                        result += [
                            da({
                                "orcid": orcid_id,
                                "given": given_name,
                                "family": family_name,
                                "credit": credit_name,
                                "other": other_names
                            })
                        ]
            else:
                for author in records:
                    result += [
                        da({
                            "orcid": author['orcid'],
                            "given": author['given_names'],
                            "family": author['family_name'],
                            "credit": "",  # actually we don't manage this
                            "other": ""  # actually we don't manage this
                        })
                    ]

        return result