예제 #1
0
from pyingest.parsers.adsfeedback import ADSFeedbackParser
from pyingest.serializers.classic import Tagged


infile = './feedback_test.json'

with open(infile,'r') as ff:
    data = ff.read()
    output = ADSFeedbackParser(data).parse()
    parser = Tagged()
    parser.write(output)
        i = i - 1
        v = vols[i]

    papers = glob(v + '/*.xml')
    print("VEE:", v)

    # Try the parser
    documents = []
    for p in papers:
        try:
            with open(p, 'rU') as fp:
                doc = parser.parse(fp)
            documents.append(doc)
        except Exception as e:
            print("Error in IOP parser:", p, e)

    # Write everything out in Classic tagged format
    fo = open(outfile, 'a')

    serializer = Tagged()
    refwriter = ReferenceWriter()
    refwriter.refsource = '.jats.iopft.xml'

    for d in documents:
        serializer.write(d, fo)
        try:
            refwriter.writeref(d)
        except Exception as err:
            print("Error in refwriter: %s" % err)
    fo.close()
예제 #3
0
#!/usr/bin/env python
import os
import sys
from pyingest.parsers.default import BaseRSSFeedParser
from pyingest.serializers.classic import Tagged

rss_url = 'http://www.reddit.com/r/python/.rss'

parser = BaseRSSFeedParser()
documents = parser.parse(rss_url)

outputfp = open('rss.tag', 'a')
for d in documents:
    serializer = Tagged()
    serializer.write(d, outputfp)
outputfp.close()
예제 #4
0
            if not os.path.exists(archive_file):
                abs_source = urllib.urlopen(absURL).read()
                open(archive_file, 'w').write(abs_source)
                pnas = PNASParser()
                output = pnas.parse(absURL)
                records.append(output)
        except Exception, err:
            print("Error parsing %s: %s:" % (absURL, err))

if records:
    try:
        fo = open(outfile, 'a')
        for rec in records:
            try:
                serializer = Tagged()
                serializer.write(rec, fo)
            except Exception, err:
                print("Error in serializer: %s" % err)
            try:
                ref_handler = ReferenceWriter()
                ref_handler.writeref(rec, 'pnas')
            except Exception, err:
                print("Error in writeref: %s" % err)
        print("New PNAS records available in %s" % outfile)
        fo.close()
    except:
        print("Error writing PNAS records: %s" % err)
else:
    print("No new PNAS records available.")

print("End PNAS harvest.")