def __init__(self, api_url): """ """ self.api_url = api_url if len(self.api_url) > 0 and self.api_url[-1] != "/": self.api_url += "/" self.reader = ParsCitReader()
def __init__(self, api_url): """ """ self.api_url = api_url if len(self.api_url) > 0 and self.api_url[-1] !="/": self.api_url += "/" self.reader=ParsCitReader()
class ParsCitClient: """ Connects to ParsCit web service, sends documents and converts the response to metadata. """ def __init__(self, api_url): """ """ self.api_url = api_url if len(self.api_url) > 0 and self.api_url[-1] !="/": self.api_url += "/" self.reader=ParsCitReader() def tagFullDocument(self, doc_text): """ """ raise NotImplementedError def extractReferenceList(self, ref_list, format="raw"): """ Main function: interface with ParsCit. Args: ref_list: list of strings to be processed. Each string can contain multiple references """ res=[] if isinstance(ref_list, list): ref_list ="\n\n".join(ref_list) # Hack so ParsCit will actually recognize the references if format=="raw": ref_list=u"References\n\n%s" % ref_list else: ref_list=u"<references>%s</references>" % ref_list data={"text":ref_list, "format":format} r=requests.post(self.api_url+"extract_citations/", json=data) if r.status_code != 200: # TODO specialized exceptions print(ref_list) raise ValueError("ParsCit exception") json_data=json.loads(r.content) res=self.reader.parseParsCitXML(json_data["parsed_xml"]) return res
class ParsCitClient: """ Connects to ParsCit web service, sends documents and converts the response to metadata. """ def __init__(self, api_url): """ """ self.api_url = api_url if len(self.api_url) > 0 and self.api_url[-1] != "/": self.api_url += "/" self.reader = ParsCitReader() def tagFullDocument(self, doc_text): """ """ raise NotImplementedError def extractReferenceList(self, ref_list, format="raw"): """ Main function: interface with ParsCit. Args: ref_list: list of strings to be processed. Each string can contain multiple references """ res = [] if isinstance(ref_list, list): ref_list = "\n\n".join(ref_list) # Hack so ParsCit will actually recognize the references if format == "raw": ref_list = u"References\n\n%s" % ref_list else: ref_list = u"<references>%s</references>" % ref_list data = {"text": ref_list, "format": format} r = requests.post(self.api_url + "extract_citations/", json=data) if r.status_code != 200: # TODO specialized exceptions print(ref_list) raise Exception json_data = json.loads(r.content) res = self.reader.parseParsCitXML(json_data["parsed_xml"]) return res