def build(self, dump_data=False, dump_filename=None):
        proceedings = DBLPParser()
        proceedings_data = proceedings.parse_proceedings(
            self.DBLP_LINK, self.no_track)

        if dump_data == True and dump_filename != None:
            self.dump_data(proceedings_data, dump_filename + "_DBLP_data")
            print("Proceedings metadata has been dumped from DBLP!")
        else:
            print("'dump_filename' is missing!")

        papers = PaperParser()
        proceedings_book = {}

        for no, track in enumerate(proceedings_data):
            if no == 0:
                proceedings_book[track] = [proceedings_data[track]]
            else:
                proceedings_book[track] = []
                for doi in proceedings_data[track]:
                    status_code, paper = papers.parse_doi(doi)
                    if status_code == 200:
                        proceedings_book[track].append(paper)
            print('{}. "{}" track DONE!\n'.format(no, track))

        if dump_data == True and dump_filename != None:
            self.dump_data(proceedings_book, dump_filename + "_book_data")
            print("Proceedings book data has been dumped!")

        book_builder = ProceedingsBookBuilder(file_name=self.TITLE)
        book_builder.build_pdf_book(proceedings_book)
Example #2
0
 def __init__(self, *args, **kargs):
   password = kargs.pop('_mypassword')
   host = kargs.pop('_myhost')
   username = kargs.pop('_username')
   database = kargs.pop('_database')
   xml_path = kargs.pop('_xmlpath')
   Application.__init__(self, *args, **kargs)
   assert not hasattr(self, '_password')
   assert not hasattr(self, '_host')
   assert not hasattr(self, '_username')
   self._db=DBLPDatabaseDriver(host=host,
                               username=username,
                               password=password,
                               database=database,
                               create_db_on_start=True)
   self._db.create_table()
   parser = DBLPParser(xml_path)
   parser.visit()
   parser.push_to_db(self._db)
Example #3
0
Example usage

@author:  Weiyi Wang
@contact: [email protected]
@date:    11/8/2015
"""
from dblp_parser import DBLPParser
from rdf_driver import RDFDriver


"""
Level 1: Load DBLP data to RDFLib and do simple query
given a publication name, list its detailed information
"""
print("=========LEVEL 1========")
parser = DBLPParser("data/dblp_t100.xml")
parser.visit()  # Parse the data and store in python

print("Converting to rdf data..\n")
driver = RDFDriver()
driver.add_documents(parser.publications)  # Store the data to RDF format

print("Querying the detail of publication 'On Parallel Integer Sorting'")
res = driver.query_by_title('On Parallel Integer Sorting')
for row in res:
    print row

"""
Level 2:  turn the DBLP (XML) into RDF format, add query like
given author A, provide all of her co-authors together with the corresponding publication information
"""
Example #4
0
DATABASE=config['database']
HOST=config['host']
USERNAME=config['username']

from dblp_dbdriver import DBLPDatabaseDriver
from dblp_parser import DBLPParser

import pprint

db = DBLPDatabaseDriver(host=HOST,password=PASSWORD, 
                        database=DATABASE, 
                        username=USERNAME,
                        create_db_on_start=False)
db.create_table()

parser = DBLPParser(config['xmlpath'])
parser.visit()
parser.push_to_db(db)

print('\n\n----------Listing Co-Authors-------------')
pprint.pprint(db.query_coauthor('Eike Best'))

print('\n\n----------Querying by title-------------')
pprint.pprint(db.query_by_name('Extended multi bottom-up tree transducers.'))

print('\n\n----------Querying by author-------------')
pprint.pprint(db.query_by_author('Eike Best'))

print('\n\n----------Querying by keywords-------------')
pprint.pprint(db.query_keywords(['multi', 'space']))