def build(self, dump_data=False, dump_filename=None): proceedings = DBLPParser() proceedings_data = proceedings.parse_proceedings( self.DBLP_LINK, self.no_track) if dump_data == True and dump_filename != None: self.dump_data(proceedings_data, dump_filename + "_DBLP_data") print("Proceedings metadata has been dumped from DBLP!") else: print("'dump_filename' is missing!") papers = PaperParser() proceedings_book = {} for no, track in enumerate(proceedings_data): if no == 0: proceedings_book[track] = [proceedings_data[track]] else: proceedings_book[track] = [] for doi in proceedings_data[track]: status_code, paper = papers.parse_doi(doi) if status_code == 200: proceedings_book[track].append(paper) print('{}. "{}" track DONE!\n'.format(no, track)) if dump_data == True and dump_filename != None: self.dump_data(proceedings_book, dump_filename + "_book_data") print("Proceedings book data has been dumped!") book_builder = ProceedingsBookBuilder(file_name=self.TITLE) book_builder.build_pdf_book(proceedings_book)
def __init__(self, *args, **kargs): password = kargs.pop('_mypassword') host = kargs.pop('_myhost') username = kargs.pop('_username') database = kargs.pop('_database') xml_path = kargs.pop('_xmlpath') Application.__init__(self, *args, **kargs) assert not hasattr(self, '_password') assert not hasattr(self, '_host') assert not hasattr(self, '_username') self._db=DBLPDatabaseDriver(host=host, username=username, password=password, database=database, create_db_on_start=True) self._db.create_table() parser = DBLPParser(xml_path) parser.visit() parser.push_to_db(self._db)
Example usage @author: Weiyi Wang @contact: [email protected] @date: 11/8/2015 """ from dblp_parser import DBLPParser from rdf_driver import RDFDriver """ Level 1: Load DBLP data to RDFLib and do simple query given a publication name, list its detailed information """ print("=========LEVEL 1========") parser = DBLPParser("data/dblp_t100.xml") parser.visit() # Parse the data and store in python print("Converting to rdf data..\n") driver = RDFDriver() driver.add_documents(parser.publications) # Store the data to RDF format print("Querying the detail of publication 'On Parallel Integer Sorting'") res = driver.query_by_title('On Parallel Integer Sorting') for row in res: print row """ Level 2: turn the DBLP (XML) into RDF format, add query like given author A, provide all of her co-authors together with the corresponding publication information """
DATABASE=config['database'] HOST=config['host'] USERNAME=config['username'] from dblp_dbdriver import DBLPDatabaseDriver from dblp_parser import DBLPParser import pprint db = DBLPDatabaseDriver(host=HOST,password=PASSWORD, database=DATABASE, username=USERNAME, create_db_on_start=False) db.create_table() parser = DBLPParser(config['xmlpath']) parser.visit() parser.push_to_db(db) print('\n\n----------Listing Co-Authors-------------') pprint.pprint(db.query_coauthor('Eike Best')) print('\n\n----------Querying by title-------------') pprint.pprint(db.query_by_name('Extended multi bottom-up tree transducers.')) print('\n\n----------Querying by author-------------') pprint.pprint(db.query_by_author('Eike Best')) print('\n\n----------Querying by keywords-------------') pprint.pprint(db.query_keywords(['multi', 'space']))