def load_error_mail(ia, marc_display, subject): msg_from = '*****@*****.**' msg_to = ['*****@*****.**'] subject += ': ' + ia msg = 'http://www.archive.org/details/%s\n' % ia msg += 'http://www.archive.org/download/%s\n' msg += '\n' + bad_binary error_mail(msg_from, msg_to, subject, msg)
def load_error_mail(ia, marc_display, subject): msg_from = "*****@*****.**" msg_to = ["*****@*****.**"] subject += ": " + ia msg = "http://www.archive.org/details/%s\n" % ia msg += "http://www.archive.org/download/%s\n" msg += "\n" + bad_binary error_mail(msg_from, msg_to, subject, msg)
def bad_marc_alert(bad_marc): assert bad_marc msg_from = '*****@*****.**' msg_to = ['*****@*****.**'] subject = '%d bad MARC' % len(bad_marc) msg = '\n'.join(('http://www.archive.org/details/%s\n' + 'http://www.archive.org/download/%s\n\n' + '%s\n\n') % (ia, ia, repr(data)) for ia, data in bad_marc) error_mail(msg_from, msg_to, subject, msg)
def bad_marc_alert(bad_marc): assert bad_marc msg_from = '*****@*****.**' msg_to = ['*****@*****.**'] subject = '%d bad MARC' % len(bad_marc) msg = '\n'.join(( 'http://www.archive.org/details/%s\n' + 'http://www.archive.org/download/%s\n\n' + '%s\n\n') % (ia, ia, repr(data)) for ia, data in bad_marc) error_mail(msg_from, msg_to, subject, msg)
def marc_formats(ia, host=None, path=None): files = { ia + '_marc.xml': 'xml', ia + '_meta.mrc': 'bin', } has = {'xml': False, 'bin': False} ending = 'files.xml' if host and path: url = 'http://%s%s/%s_%s' % (host, path, ia, ending) else: url = 'http://www.archive.org/download/' + ia + '/' + ia + '_' + ending for attempt in range(10): f = urlopen_keep_trying(url) if f is not None: break sleep(10) if f is None: msg_from = '*****@*****.**' msg_to = ['*****@*****.**'] subject = "error reading %s_files.xml" % ia msg = url error_mail(msg_from, msg_to, subject, msg) return has data = f.read() try: root = etree.fromstring(data) except: print 'bad:', ` data ` return has for e in root: name = e.attrib['name'] if name in files: has[files[name]] = True if all(has.values()): break return has
def find_author(name, send_mail=True): q = {'type': '/type/author', 'name': name, 'limit': 0} del q['limit'] reply = list(ol.query(q)) authors = [ol.get(k) for k in reply] if any(a['type'] != '/type/author' for a in authors): subject = 'author query redirect: ' + `q['name']` body = 'Error: author query result should not contain redirects\n\n' body += 'query: ' + `q` + '\n\nresult\n' if send_mail: for a in authors: if a['type'] == '/type/redirect': body += a['key'] + ' redirects to ' + a['location'] + '\n' elif a['type'] == '/type/delete': body += a['key'] + ' is deleted ' + '\n' elif a['type'] == '/type/author': body += a['key'] + ' is an author: ' + `a['name']` + '\n' else: body += a['key'] + 'has bad type' + a + '\n' addr = '*****@*****.**' error_mail(addr, [addr], subject, body) seen = set() authors = [walk_redirects(a, seen) for a in authors if a['key'] not in seen] return authors
def marc_formats(ia, host=None, path=None): files = { ia + '_marc.xml': 'xml', ia + '_meta.mrc': 'bin', } has = { 'xml': False, 'bin': False } ending = 'files.xml' if host and path: url = 'http://%s%s/%s_%s' % (host, path, ia, ending) else: url = 'http://www.archive.org/download/' + ia + '/' + ia + '_' + ending for attempt in range(10): f = urlopen_keep_trying(url) if f is not None: break sleep(10) if f is None: msg_from = '*****@*****.**' msg_to = ['*****@*****.**'] subject = "error reading %s_files.xml" % ia msg = url error_mail(msg_from, msg_to, subject, msg) return has data = f.read() try: root = etree.fromstring(data) except: print 'bad:', `data` return has for e in root: name = e.attrib['name'] if name in files: has[files[name]] = True if all(has.values()): break return has
def error_marc_403(ia): msg_from = '*****@*****.**' msg_to = ['*****@*****.**'] msg = 'http://www.archive.org/details/' + ia subject = 'MARC 403: ' + ia error_mail(msg_from, msg_to, subject, msg)
======= url = 'http://%s%s/%s_%s' % (host, path, ia, ending) >>>>>>> a6e890c72315ff97b2f8a600f189fce28668fefe else: url = 'http://www.archive.org/download/' + ia + '/' + ia + '_' + ending for attempt in range(10): f = urlopen_keep_trying(url) if f is not None: break sleep(10) if f is None: msg_from = '*****@*****.**' msg_to = ['*****@*****.**'] subject = "error reading %s_files.xml" % ia msg = url error_mail(msg_from, msg_to, subject, msg) return has data = f.read() try: root = etree.fromstring(data) except: print 'bad:', `data` return has for e in root: name = e.attrib['name'] if name in files: has[files[name]] = True if all(has.values()): break return has
msg_from = '*****@*****.**' msg_to = ['*****@*****.**'] msg = 'http://www.archive.org/details/' + ia subject = 'MARC 403: ' + ia error_mail(msg_from, msg_to, subject, msg) def bad_marc_alert(bad_marc): assert bad_marc msg_from = '*****@*****.**' msg_to = ['*****@*****.**'] subject = '%d bad MARC' % len(bad_marc) msg = '\n'.join(('http://www.archive.org/details/%s\n' + 'http://www.archive.org/download/%s\n\n' + '%s\n\n') % (ia, ia, ` data `) for ia, data in bad_marc) error_mail(msg_from, msg_to, subject, msg) if __name__ == '__main__': fh_log = open(load_scribe_log, 'a') open(config.runtime_config['state_dir'] + '/load_scribe.pid', 'w').write(os.getpid()) start = open(state_file).readline()[:-1] bad_marc_last_sent = time() bad_marc = [] while True: if args.item_id: db_iter = db.query(