from ETL.E.G1.authors import Authors import os authors = Authors() for dirpath, dnames, fnames in os.walk("./mch/"): for f in fnames: if f.endswith(".xml"): print os.path.join(dirpath, f) authors.add_from_xml(os.path.join(dirpath, f)) # print authors # print '*** Numero de autores ***' print len(authors)
import xml.etree.ElementTree as ET import json import logging from ETL.E.G1.authors import Authors from ETL.E.G2.request.find_request import FindRequest from ETL.E.G2.request.present_request import PresentRequest from ETL.T.transformer import Transformer logging.basicConfig() authors = Authors() authors.add_from_xml("data/000 - 999.xml") aleph_data = ET.fromstring("<aleph></aleph>") cnt = 0 total = 20 for author in authors: cnt += 1 if cnt == total: break print "Loading authors: " + str(100*cnt/total) + "%" url = 'http://www.bncatalogo.cl/X' request = FindRequest(base_url=url) metadata = request.find(name=author) if len(metadata.keys()) == 0: logging.warning("Author '" + author + "' not found.") continue no_entries = metadata['no_entries'] author_id = metadata['set_number']
import xml.etree.ElementTree as ET import json import logging from ETL.E.G1.authors import Authors from ETL.E.G2.request.find_request import FindRequest from ETL.E.G2.request.present_request import PresentRequest from ETL.T.transformer import Transformer logging.basicConfig() authors = Authors() authors.add_from_xml("data/000 - 999.xml") aleph_data = ET.fromstring("<aleph></aleph>") cnt = 0 total = 20 for author in authors: cnt += 1 if cnt == total: break print "Loading authors: " + str(100 * cnt / total) + "%" url = 'http://www.bncatalogo.cl/X' request = FindRequest(base_url=url) metadata = request.find(name=author) if len(metadata.keys()) == 0: logging.warning("Author '" + author + "' not found.") continue no_entries = metadata['no_entries'] author_id = metadata['set_number']