from pyingest.parsers.adsfeedback import ADSFeedbackParser from pyingest.serializers.classic import Tagged infile = './feedback_test.json' with open(infile,'r') as ff: data = ff.read() output = ADSFeedbackParser(data).parse() parser = Tagged() parser.write(output)
i = i - 1 v = vols[i] papers = glob(v + '/*.xml') print("VEE:", v) # Try the parser documents = [] for p in papers: try: with open(p, 'rU') as fp: doc = parser.parse(fp) documents.append(doc) except Exception as e: print("Error in IOP parser:", p, e) # Write everything out in Classic tagged format fo = open(outfile, 'a') serializer = Tagged() refwriter = ReferenceWriter() refwriter.refsource = '.jats.iopft.xml' for d in documents: serializer.write(d, fo) try: refwriter.writeref(d) except Exception as err: print("Error in refwriter: %s" % err) fo.close()
#!/usr/bin/env python import os import sys from pyingest.parsers.default import BaseRSSFeedParser from pyingest.serializers.classic import Tagged rss_url = 'http://www.reddit.com/r/python/.rss' parser = BaseRSSFeedParser() documents = parser.parse(rss_url) outputfp = open('rss.tag', 'a') for d in documents: serializer = Tagged() serializer.write(d, outputfp) outputfp.close()
if not os.path.exists(archive_file): abs_source = urllib.urlopen(absURL).read() open(archive_file, 'w').write(abs_source) pnas = PNASParser() output = pnas.parse(absURL) records.append(output) except Exception, err: print("Error parsing %s: %s:" % (absURL, err)) if records: try: fo = open(outfile, 'a') for rec in records: try: serializer = Tagged() serializer.write(rec, fo) except Exception, err: print("Error in serializer: %s" % err) try: ref_handler = ReferenceWriter() ref_handler.writeref(rec, 'pnas') except Exception, err: print("Error in writeref: %s" % err) print("New PNAS records available in %s" % outfile) fo.close() except: print("Error writing PNAS records: %s" % err) else: print("No new PNAS records available.") print("End PNAS harvest.")