sys.exit(1) outfile = 'oup.tag' parser = OUPJATSParser() files = glob(DIR+'/TagTextFiles/*.xml') documents = [] for f in files: try: with open(f,'rU') as fp: doc = parser.parse(fp) documents.append(doc) except Exception as e: print("Error in OUP parser:", f, e) # print(documents) # Write everything out in Classic tagged format fo = open(outfile, 'a') serializer = Tagged() ref_handler = ReferenceWriter() for d in documents: #print(d) serializer.write(d,fo) ref_handler.writeref(d,'oup') fo.close()
outfile = 'pnas.tag' fo = open(outfile, 'a') for k, v in PNAS_RSS_URLS.items(): feed = feedparser.parse(v) # print "feed:",k for _item in feed['entries']: try: record = {} absURL = _item['link'] volno = _item['prism_volume'].zfill(4) ident = _item['dc_identifier'] ident = ident.replace('hwp:master-id:pnas;', '') # print absURL,volno,ident pnas = PNASParser() output = pnas.parse(absURL) except Exception as err: print("Error in parser:", err) else: try: serializer = Tagged() serializer.write(output, fo) except Exception as err: print("Error in serializer:", err) try: ref_handler = ReferenceWriter() ref_handler.writeref(output, 'pnas') except Exception as err: print("Error in writeref:", err) fo.close()
abs_source = urllib.urlopen(absURL).read() open(archive_file, 'w').write(abs_source) pnas = PNASParser() output = pnas.parse(abs_source) records.append(output) except Exception, err: print("Error parsing %s: %s:" % (absURL, err)) if records: try: fo = open(outfile, 'a') for rec in records: try: serializer = Tagged() serializer.write(rec, fo) except Exception, err: print("Error in serializer: %s" % err) try: ref_handler = ReferenceWriter() ref_handler.writeref(rec, 'pnas') except Exception, err: print("Error in writeref: %s" % err) print("New PNAS records available in %s" % outfile) fo.close() except: print("Error writing PNAS records: %s" % err) else: print("No new PNAS records available.") print("End PNAS harvest.")
i = i - 1 v = vols[i] papers = glob(v + '/*.xml') print "VEE:", v # Try the parser documents = [] for p in papers: try: with open(p, 'rU') as fp: doc = parser.parse(fp) documents.append(doc) except Exception as e: print("Error in IOP parser:", p, e) # Write everything out in Classic tagged format fo = open(outfile, 'a') serializer = Tagged() refwriter = ReferenceWriter() refwriter.refsource = '.jats.iopft.xml' for d in documents: serializer.write(d, fo) try: refwriter.writeref(d) except Exception, err: print("Error in refwriter: %s" % err) fo.close()