Beispiel #1
0
            lit = re.sub(', pp. ', ', ', lit)
            lit = re.sub(u'\u201d', '"', lit)
            lit = re.sub(u'\u201c', '"', lit)
            lit = re.sub(u'\u2013', '-', lit)
            lit = re.sub(u'\u2018', "'", lit)
            lit = re.sub(u'\u2019', "'", lit)
            if refnum:
                ref = '[%s] %s' % (refnum, lit)
            else:
                ref = lit
            rec['refs'].append([('x', ref)])
    if rec['tc'] == 'Editorial':
        print 'skip Editorial'
    else:
        recs.append(rec)
        print ' keys: ' + ', '.join(rec.keys()), '|', len(rec['refs']), 'refs'

#write xml
xmlf = os.path.join(xmldir, jnlfilename + '.xml')
xmlfile = codecs.EncodedFile(codecs.open(xmlf, mode='wb'), 'utf8')
ejlmod2.writeXML(recs, xmlfile, publisher)
xmlfile.close()
#retrival
retfiles_path = "/afs/desy.de/user/l/library/proc/retinspire/retfiles"
retfiles_text = open(retfiles_path, "r").read()
line = jnlfilename + '.xml' + "\n"
if not line in retfiles_text:
    retfiles = open(retfiles_path, "a")
    retfiles.write(line)
    retfiles.close()
Beispiel #2
0
            timespan = int(sys.argv[1])
        except:
            print '"%s" is not a number' % (timespan)
            sys.exit(2)
    else:
        timespan = 9
    (recids, jnlfilename) = requestarticles(timespan)
    records = translatearticles(recids)
    #split too large xmls
    for tc in records.keys():
        if len(tc) == 1 and len(records[tc]) > chunksize:
            for i in range(0, len(records[tc]), chunksize):
                records[tc + str(i)] = records[tc][i:i + chunksize]
            del records[tc]
    for tc in records.keys():
        #if not tc[0] in ['T', 'C', 'K']:
        #    continue
        #write xml-file
        xmlf = os.path.join(xmldir, '%s.%s.xml' % (jnlfilename, tc))
        xmlfile = codecs.EncodedFile(codecs.open(xmlf, mode='wb'), 'utf8')
        ejlmod2.writeXML(records[tc], xmlfile, publisher)
        xmlfile.close()
        #retrival
        retfiles_path = "/afs/desy.de/user/l/library/proc/retinspire/retfiles"
        retfiles_text = open(retfiles_path, "r").read()
        line = "%s.%s.xml\n" % (jnlfilename, tc)
        if not line in retfiles_text:
            retfiles = open(retfiles_path, "a")
            retfiles.write(line)
            retfiles.close()
Beispiel #3
0
             if not re.search('xml$', artfile):
                 artfile = os.path.join(d4, os.listdir(d4)[2])
             print '-[%s]=' % (artfile)
             artxml = xml.dom.minidom.parse(artfile)
             rec = xmlExtractBook()
             vol = rec['vol']
             editordoesexist = rec['editordoesexist']
             jrnl = jc[jnr][0] + vol
             #jrnl = 'TEST-FS-'+jrnl
             xmlf = os.path.join(xmldir,
                                 jrnl + '.' + str(cday) + '.xml')
             #xmlfile = open(xmlf,'w')
             xmlfile = codecs.EncodedFile(codecs.open(xmlf, mode='wb'),
                                          'utf8')
             #print "tit=", rec['tit']
             ejlmod2.writeXML([rec], xmlfile, 'Springer')
             xmlfile.close()
             #retrival
             retfiles_text = open(retfiles_path, "r").read()
             line = jrnl + '.' + str(cday) + '.xml' + "\n"
             if not line in retfiles_text:
                 retfiles = open(retfiles_path, "a")
                 retfiles.write(line)
                 retfiles.close()
             if isbook: pass
             pacs = []
             kw = []
 #if editordoesexist: # (FS) for books: take chapters only if they are not all by the same author(s)
 if editordoesexist or not editordoesexist:  # (FS) for books: take chapters only if they are not all by the same author(s)
     for d3 in os.listdir(df3):  # issue
         if "matter" in d3:
Beispiel #4
0
        if issn == '1748-0221' and rec['p1'][0] == 'C':
            rec['tc'] = ['C']
    except:
        pass
    #publication note
    try:
        pbn = rec['jnl'] + rec['vol'] + rec['p1']
    except:
        pbn = ''
    recsunsrtd.append((pbn, rec))

#sort articles by publication note
recsunsrtd.sort()
recs = [tupel[1] for tupel in recsunsrtd]
 

xmlf = os.path.join(xmldir,iopf+'.xml')
xmlfile  = codecs.EncodedFile(codecs.open(xmlf,mode='wb'),'utf8')
ejlmod2.writeXML(recs ,xmlfile,'IOP')
xmlfile.close()

#retrival
retfiles_path = "/afs/desy.de/user/l/library/proc/retinspire/retfiles"
retfiles_text = open(retfiles_path,"r").read()
line = iopf+'.xml'+ "\n"
if not line in retfiles_text: 
    retfiles = open(retfiles_path,"a")
    retfiles.write(line)
    retfiles.close()