Python get_datasets Examples

Programming Language: Python

Namespace/Package Name: bibjson_util

Method/Function: get_datasets

Examples at hotexamples.com: 7

Python get_datasets - 7 examples found. These are the top rated real world Python examples of bibjson_util.get_datasets extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: bibjson_rdf.py Project: opencitations/PubMed-OA-network-analysis-scripts

def process_articles(input_filename, writer, mappings):
    tar = tarfile.open(input_filename, 'r:gz')
    for tar_info, journal in get_datasets(tar, types=('Article',)):
        try:
            process_article(journal['recordList'], writer, mappings)
        except Exception:
            traceback.print_exc(file=sys.stderr)

Example #2

Show file

File: bibjson_rdf.py Project: viveksck/OpenCitationsCorpus

def process_articles(input_filename, writer, mappings):
    tar = tarfile.open(input_filename, 'r:gz')
    for tar_info, journal in get_datasets(tar, types=('Article',)):
        try:
            process_article(journal['recordList'], writer, mappings)
        except Exception:
            traceback.print_exc(file=sys.stderr)

Example #3

Show file

File: error_counting.py Project: HeinrichHartmann/OpenCitationsCorpus

tar = tarfile.open('../parsed-recent/articles-unified.bibjson.tar.gz', 'r:gz')

mappings = defaultdict(dict)

def format_article(article, mappings, records):
    authors = ', '.join(normalize_field(a, mappings, records).get('name', '-').replace(',', '').replace('.', '') for a in article.get('author', ()))
    return '%30s  %10s  %80s  %100s' % (
        article.get('doi', '')[:30].ljust(30),
        article.get('pmid', '')[:10].ljust(10),
        article.get('title', '')[:80].ljust(80),
        authors[:100].ljust(100),
    )



for tar_info, dataset in get_datasets(tar, ('Article',)):
    records = dataset['recordList']
    articles = [r for r in records if r.get('type') == 'Article']
    canonical, fields = majority_vote(records, ('Article',), mappings)

    records = dict((r['id'], r) for r in records)

    for record in records:
        v = compare(canonical, record, records)
        print v

    print '='*80
    print format_article(canonical, mappings, records)
    print '-'*80
    for article in articles:
        print format_article(article, mappings, records)

Example #4

Show file

File: bibjson_rdf.py Project: opencitations/PubMed-OA-network-analysis-scripts

def process_errors(input_filename, writer, mappings):
    tar = tarfile.open(input_filename, 'r:gz')
    for tar_info, dataset in get_datasets(tar, types=None):
        process_error(dataset['recordList'], writer, mappings)

Example #5

Show file

File: bibjson_rdf.py Project: opencitations/PubMed-OA-network-analysis-scripts

def process_citations(input_filename, writer, mappings):
    tar = tarfile.open(input_filename, 'r:gz')
    for tar_info, journal in get_datasets(tar, types=('Article',)):
        process_citation(journal['recordList'], writer, mappings)

Example #6

Show file

File: bibjson_rdf.py Project: viveksck/OpenCitationsCorpus

def process_errors(input_filename, writer, mappings):
    tar = tarfile.open(input_filename, 'r:gz')
    for tar_info, dataset in get_datasets(tar, types=None):
        process_error(dataset['recordList'], writer, mappings)

Example #7

Show file

File: bibjson_rdf.py Project: viveksck/OpenCitationsCorpus

def process_citations(input_filename, writer, mappings):
    tar = tarfile.open(input_filename, 'r:gz')
    for tar_info, journal in get_datasets(tar, types=('Article',)):
        process_citation(journal['recordList'], writer, mappings)