def validate_xml(coll, article_id, issue_id, api_host='localhost', api_port='7000'): """ Validate article agains WOS Schema. Flaging his attribute validated_scielo to True if the document is valid. """ xsd = open('ThomsonReuters_publishing.xsd', 'r').read() sch = Schema(xsd) xml_url = 'http://{0}:{1}/api/v1/article?code={2}&format=xml&show_citation=True'.format(api_host, api_port, article_id) xml = urllib2.urlopen(xml_url, timeout=30).read() try: result = sch.validate(xml) except etree.XMLSyntaxError as e: msg = "{0}: Problems reading de XML, {1}".format(article_id, e.text) write_log(article_id, issue_id, sch, xml, msg) return None if result: coll.update({'code': article_id}, {'$set': {'validated_scielo': 'True'}}) return xml else: msg = "" for error in sch.get_validation_errors(xml): msg += "{0}: {1}\r\n".format(article_id, error[2]) write_log(article_id, issue_id, sch, xml, msg) return None
def __init__(self, crossref_api_url=API_URL, query_email=QUERY_EMAIL, user=USER, passwd=PASSWD, xsd='../xsd/crossref_query_input2.0.xsd'): """ Create a Brocker instance. xsd default for queries """ xsd = open(xsd).read() self.porteira = Schema(xsd) self.crossref_api_url = crossref_api_url self.user = user self.passwd = passwd self.query_email = query_email
class Brocker(object): def __init__(self, crossref_api_url=API_URL, query_email=QUERY_EMAIL, user=USER, passwd=PASSWD, xsd='../xsd/crossref_query_input2.0.xsd'): """ Create a Brocker instance. xsd default for queries """ xsd = open(xsd).read() self.porteira = Schema(xsd) self.crossref_api_url = crossref_api_url self.user = user self.passwd = passwd self.query_email = query_email def query_doi(self, xml): """ Returns a DOI number according to metadata contained into a given valid XML. The XML must be compatible with crossref_query_input2.0.xsd This method will return 'False' or a 'DOI number'. """ if self.porteira.validate(xml): request_url = "{0}query/?format=xsd_xml&pid={1}&qdata={2}".format( self.crossref_api_url, self.query_email, urllib2.quote(xml)) query_result = urllib2.urlopen(request_url).read() dec = BeautifulSoup(query_result) try: return dec.query_result.doi.string except AttributeError: return None else: return None def is_resolved(self, doi): """ Returns True or False for a given DOI number """ request_url = "{0}query/?format=xsd_xml&pid={1}&id={2}".format( self.crossref_api_url, self.query_email, doi) query_result = urllib2.urlopen(request_url).read() dec = BeautifulSoup(query_result) if dec.query_result.query['status'] == 'resolved': return True else: return False def request(self, xml): """ Returns True if the request was well done and False if their was an error while sending the request to Crossref """ if self.porteira.validate(xml): xml_file = StringIO() xml_file.write(xml) form = MultiPartForm() url = "{0}deposit".format(self.crossref_api_url) form.add_field('operation', 'doMDUpload') form.add_field('login_id', self.user) form.add_field('login_passwd', self.passwd) form.add_file('fname', 'crossref_query.xml', fileHandle=xml_file) body = str(form) request = urllib2.Request(url) request.add_header('Content-type', form.get_content_type()) request.add_header('Content-length', len(body)) request.add_data(body) response = urllib2.urlopen(request).read() return True else: return None
def xml_is_valid(xml): sch = Schema(str_schema) return sch.validate(xml)