# Author: Xu Wang
# Email: [email protected]
#
# Filename: retrieve_publications.py
#
#
from database import Publications
from journal import *

PublicationsTable = Publications()

# retrieve journals
# publication_type = 'Journals & Magazines'
# for number, name in all_journal_titles():
#     print(name, number)
#     PublicationsTable.add_publication_title_type_number(name,
#                                                         publication_type,
#                                                         number)


# retrieve conferences
publication_type = 'Conferences'
for number, name in all_conference_titles():
    print(name, number)
    PublicationsTable.add_publication_title_type_number(name,
                                                        publication_type,
                                                        number)
Example #2
0
              'authors', 'terms']
    with open(filename, 'ta', newline='') as fp:
        tsv_writer = csv.writer(fp, delimiter='\t')
        # tsv_writer.writerow(fields)
        try:
            print('get publication urls', ptitle)
            urls = journal_articles_requests_urls(ptitle,
                                                  articles_per_request=1000)
            for url in urls:
                try:
                    print('reading --', url)
                    paper_dicts = retrieve_documents_from_url(url)
                    paperlist = []
                    for pd in paper_dicts:
                        p = Paper(ptitle, pd)
                        paperlist.append(p)
                    paper_str_list = []
                    for p in paperlist:
                        paper_str_list.append(p.to_list())
                    tsv_writer.writerows(paper_str_list)
                except Exception as e:
                    print(str(e))
        except Exception as e:
            print(str(e))


if __name__ == '__main__':

    publications_table = Publications()
    for ptitle in publications_table.get_all_publications_title():
        retrieve_papers_to_tsv(ptitle)
Example #3
0
def mp_retriever(inqueue):
    """

    :param inqueue:
    :return:
    """
    while True:
        ptitle, ptype = inqueue.get()
        if ptitle is None:
            return
        else:
            retrieve_papers_to_tsv(ptitle, ptype)

if __name__ == '__main__':

    publications_table = Publications()
    workers = 128
    processes = []
    title_queue = Queue()
    for ptitle, ptype in publications_table.get_all_titles_numbers():
        # print(ptype, ptitle)
        # retrieve_papers_to_tsv(ptitle)
        title_queue.put((ptitle, ptype))

    for i in range(workers):
        title_queue.put((None, None))

    for i in range(workers):
        p = Process(target=mp_retriever, args=(title_queue, ))
        p.start()
        processes.append(p)