lit = re.sub(', pp. ', ', ', lit) lit = re.sub(u'\u201d', '"', lit) lit = re.sub(u'\u201c', '"', lit) lit = re.sub(u'\u2013', '-', lit) lit = re.sub(u'\u2018', "'", lit) lit = re.sub(u'\u2019', "'", lit) if refnum: ref = '[%s] %s' % (refnum, lit) else: ref = lit rec['refs'].append([('x', ref)]) if rec['tc'] == 'Editorial': print 'skip Editorial' else: recs.append(rec) print ' keys: ' + ', '.join(rec.keys()), '|', len(rec['refs']), 'refs' #write xml xmlf = os.path.join(xmldir, jnlfilename + '.xml') xmlfile = codecs.EncodedFile(codecs.open(xmlf, mode='wb'), 'utf8') ejlmod2.writeXML(recs, xmlfile, publisher) xmlfile.close() #retrival retfiles_path = "/afs/desy.de/user/l/library/proc/retinspire/retfiles" retfiles_text = open(retfiles_path, "r").read() line = jnlfilename + '.xml' + "\n" if not line in retfiles_text: retfiles = open(retfiles_path, "a") retfiles.write(line) retfiles.close()
timespan = int(sys.argv[1]) except: print '"%s" is not a number' % (timespan) sys.exit(2) else: timespan = 9 (recids, jnlfilename) = requestarticles(timespan) records = translatearticles(recids) #split too large xmls for tc in records.keys(): if len(tc) == 1 and len(records[tc]) > chunksize: for i in range(0, len(records[tc]), chunksize): records[tc + str(i)] = records[tc][i:i + chunksize] del records[tc] for tc in records.keys(): #if not tc[0] in ['T', 'C', 'K']: # continue #write xml-file xmlf = os.path.join(xmldir, '%s.%s.xml' % (jnlfilename, tc)) xmlfile = codecs.EncodedFile(codecs.open(xmlf, mode='wb'), 'utf8') ejlmod2.writeXML(records[tc], xmlfile, publisher) xmlfile.close() #retrival retfiles_path = "/afs/desy.de/user/l/library/proc/retinspire/retfiles" retfiles_text = open(retfiles_path, "r").read() line = "%s.%s.xml\n" % (jnlfilename, tc) if not line in retfiles_text: retfiles = open(retfiles_path, "a") retfiles.write(line) retfiles.close()
if not re.search('xml$', artfile): artfile = os.path.join(d4, os.listdir(d4)[2]) print '-[%s]=' % (artfile) artxml = xml.dom.minidom.parse(artfile) rec = xmlExtractBook() vol = rec['vol'] editordoesexist = rec['editordoesexist'] jrnl = jc[jnr][0] + vol #jrnl = 'TEST-FS-'+jrnl xmlf = os.path.join(xmldir, jrnl + '.' + str(cday) + '.xml') #xmlfile = open(xmlf,'w') xmlfile = codecs.EncodedFile(codecs.open(xmlf, mode='wb'), 'utf8') #print "tit=", rec['tit'] ejlmod2.writeXML([rec], xmlfile, 'Springer') xmlfile.close() #retrival retfiles_text = open(retfiles_path, "r").read() line = jrnl + '.' + str(cday) + '.xml' + "\n" if not line in retfiles_text: retfiles = open(retfiles_path, "a") retfiles.write(line) retfiles.close() if isbook: pass pacs = [] kw = [] #if editordoesexist: # (FS) for books: take chapters only if they are not all by the same author(s) if editordoesexist or not editordoesexist: # (FS) for books: take chapters only if they are not all by the same author(s) for d3 in os.listdir(df3): # issue if "matter" in d3:
if issn == '1748-0221' and rec['p1'][0] == 'C': rec['tc'] = ['C'] except: pass #publication note try: pbn = rec['jnl'] + rec['vol'] + rec['p1'] except: pbn = '' recsunsrtd.append((pbn, rec)) #sort articles by publication note recsunsrtd.sort() recs = [tupel[1] for tupel in recsunsrtd] xmlf = os.path.join(xmldir,iopf+'.xml') xmlfile = codecs.EncodedFile(codecs.open(xmlf,mode='wb'),'utf8') ejlmod2.writeXML(recs ,xmlfile,'IOP') xmlfile.close() #retrival retfiles_path = "/afs/desy.de/user/l/library/proc/retinspire/retfiles" retfiles_text = open(retfiles_path,"r").read() line = iopf+'.xml'+ "\n" if not line in retfiles_text: retfiles = open(retfiles_path,"a") retfiles.write(line) retfiles.close()