Esempio n. 1
0
    sys.exit(1)

outfile = 'oup.tag'

parser = OUPJATSParser()
files = glob(DIR+'/TagTextFiles/*.xml')
documents = []

for f in files:
    try:
        with open(f,'rU') as fp:
            doc = parser.parse(fp)
            documents.append(doc)
    except Exception as e:
        print("Error in OUP parser:", f, e)
#   print(documents) 

# Write everything out in Classic tagged format
fo = open(outfile, 'a')

serializer = Tagged()
ref_handler = ReferenceWriter()

for d in documents:
    #print(d)
    serializer.write(d,fo)
    ref_handler.writeref(d,'oup')
fo.close()


Esempio n. 2
0
outfile = 'pnas.tag'
fo = open(outfile, 'a')

for k, v in PNAS_RSS_URLS.items():
    feed = feedparser.parse(v)
    # print "feed:",k
    for _item in feed['entries']:
        try:
            record = {}
            absURL = _item['link']
            volno = _item['prism_volume'].zfill(4)
            ident = _item['dc_identifier']
            ident = ident.replace('hwp:master-id:pnas;', '')
            # print absURL,volno,ident
            pnas = PNASParser()
            output = pnas.parse(absURL)
        except Exception as err:
            print("Error in parser:", err)
        else:
            try:
                serializer = Tagged()
                serializer.write(output, fo)
            except Exception as err:
                print("Error in serializer:", err)
            try:
                ref_handler = ReferenceWriter()
                ref_handler.writeref(output, 'pnas')
            except Exception as err:
                print("Error in writeref:", err)
fo.close()
Esempio n. 3
0
                abs_source = urllib.urlopen(absURL).read()
                open(archive_file, 'w').write(abs_source)
                pnas = PNASParser()
                output = pnas.parse(abs_source)
                records.append(output)
        except Exception, err:
            print("Error parsing %s: %s:" % (absURL, err))

if records:
    try:
        fo = open(outfile, 'a')
        for rec in records:
            try:
                serializer = Tagged()
                serializer.write(rec, fo)
            except Exception, err:
                print("Error in serializer: %s" % err)
            try:
                ref_handler = ReferenceWriter()
                ref_handler.writeref(rec, 'pnas')
            except Exception, err:
                print("Error in writeref: %s" % err)
        print("New PNAS records available in %s" % outfile)
        fo.close()
    except:
        print("Error writing PNAS records: %s" % err)
else:
    print("No new PNAS records available.")

print("End PNAS harvest.")
Esempio n. 4
0
        i = i - 1
        v = vols[i]

    papers = glob(v + '/*.xml')
    print "VEE:", v

    # Try the parser
    documents = []
    for p in papers:
        try:
            with open(p, 'rU') as fp:
                doc = parser.parse(fp)
            documents.append(doc)
        except Exception as e:
            print("Error in IOP parser:", p, e)

    # Write everything out in Classic tagged format
    fo = open(outfile, 'a')

    serializer = Tagged()
    refwriter = ReferenceWriter()
    refwriter.refsource = '.jats.iopft.xml'

    for d in documents:
        serializer.write(d, fo)
        try:
            refwriter.writeref(d)
        except Exception, err:
            print("Error in refwriter: %s" % err)
    fo.close()