Python update_workの例、openlibrary.solr.update_work.update_work Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_update_work.py プロジェクト: nmavuso120/openlibrary

 def test_no_title(self):
     requests = update_work.update_work({'key': '/books/OL1M', 'type': {'key': '/type/edition'}})
     assert len(requests) == 1
     assert '<field name="title">__None__</field>' in requests[0].toxml()
     requests = update_work.update_work({'key': '/works/OL23W', 'type': {'key': '/type/work'}})
     assert len(requests) == 1
     assert '<field name="title">__None__</field>' in requests[0].toxml()

コード例 #2

0

ファイルを表示

ファイル: test_update_work.py プロジェクト: hornc/openlibrary-1

 def test_delete_work(self):
     del_work = update_work.update_work({'key': '/works/OL23W', 'type': {'key': '/type/delete'}})
     del_edition = update_work.update_work({'key': '/works/OL23M', 'type': {'key': '/type/delete'}})
     assert len(del_work) == 1
     assert len(del_edition) == 1
     assert isinstance(del_work, list)
     assert isinstance(del_work[0], update_work.DeleteRequest)
     assert del_work[0].toxml() == '<delete><query>key:/works/OL23W</query></delete>'
     assert isinstance(del_edition[0], update_work.DeleteRequest)
     assert del_edition[0].toxml() == '<delete><query>key:/works/OL23M</query></delete>'

コード例 #3

0

ファイルを表示

ファイル: load_scribe.py プロジェクト: artmedlar/openlibrary

def hide_books(start):
    mend = []
    fix_works = set()
    db_iter = db.query("select identifier, collection, updated from metadata where (noindex is not null or curatestate='dark') and mediatype='texts' and scandate is not null and updated > $start order by updated", {'start': start})
    for row in db_iter:
        ia = row.identifier
        if row.collection:
            collections = set(i.lower().strip() for i in row.collection.split(';'))
            if 'printdisabled' in collections:
                continue
        print `ia`, row.updated
        for eq in query({'type': '/type/edition', 'ocaid': ia}):
            print eq['key']
            e = ol.get(eq['key'])
            if 'ocaid' not in e:
                continue
            if 'works' in e:
                fix_works.update(e['works'])
            print e['key'], `e.get('title', None)`
            del e['ocaid']
            mend.append(e)
    print 'removing links from %d editions' % len(mend)
    print ol.save_many(mend, 'remove link')
    requests = []
    for wkey in fix_works:
        requests += update_work(withKey(wkey))
    if fix_works:
        solr_update(requests + ['<commit/>'], debug=True)

コード例 #4

0

ファイルを表示

ファイル: solr_author_merge.py プロジェクト: artmedlar/openlibrary

def solr_updates(i):
    global subjects_to_update, authors_to_update
    t0 = time()
    d = i['data']
    changeset = d['changeset']
    print 'author:', d['author']
    try:
        assert len(changeset['data']) == 2 and 'master' in changeset['data'] and 'duplicates' in changeset['data']
    except:
        print d['changeset']
        raise
    master_key = changeset['data']['master']
    dup_keys = changeset['data']['duplicates']
    assert dup_keys
    print d['changeset']
    print 'timestamp:', i['timestamp']
    print 'dups:', dup_keys
    print 'records to update:', len(d['result'])
     
    master = None
    obj_by_key = {}
    works = []
    editions_by_work = defaultdict(list)
    for obj in d['query']:
        obj_type = obj['type']['key']
        k = obj['key']
        if obj_type == '/type/work':
            works.append(obj['key'])
        elif obj_type == '/type/edition':
            if 'works' not in obj:
                continue
            for w in obj['works']:
                editions_by_work[w['key']].append(obj)
        obj_by_key[k] = obj
    master = obj_by_key.get(master_key)
    #print 'master:', master

    if len(d['result']) == 0:
        print i

    work_updates = []
    for wkey in works:
            #print 'editions_by_work:', editions_by_work
            work = obj_by_key[wkey]
            work['editions'] = editions_by_work[wkey]
            subjects = get_work_subjects(work)
            for subject_type, values in subjects.iteritems():
                subjects_to_update.update((subject_type, v) for v in values)
            ret = update_work(work, obj_cache=obj_by_key, debug=True)
            work_updates += ret
    solr_update(work_updates, debug=False, index='works')

    authors_to_update.append({ 'redirects': dup_keys, 'master_key': master_key, 'master': master})
    print 'authors to update:', len(authors_to_update)

    t1 = time() - t0
    update_times.append(t1)
    print 'update takes: %d seconds' % t1
    print

コード例 #5

0

ファイルを表示

 def test_no_title(self):
     requests = update_work.update_work({
         'key': '/books/OL1M',
         'type': {
             'key': '/type/edition'
         }
     })
     assert len(requests) == 1
     assert requests[0].doc['title'] == "__None__"
     requests = update_work.update_work({
         'key': '/works/OL23W',
         'type': {
             'key': '/type/work'
         }
     })
     assert len(requests) == 1
     assert requests[0].doc['title'] == "__None__"

コード例 #6

0

ファイルを表示

 def test_work_no_title(self):
     work = {'key': '/works/OL23W', 'type': {'key': '/type/work'}}
     ed = make_edition(work)
     ed['title'] = 'Some Title!'
     update_work.data_provider = FakeDataProvider([work, ed])
     requests = update_work.update_work(work)
     assert len(requests) == 1
     assert requests[0].doc['title'] == "Some Title!"

コード例 #7

0

ファイルを表示

 def test_work_no_title(self):
     work = {'key': '/works/OL23W', 'type': {'key': '/type/work'}}
     ed = make_edition(work)
     ed['title'] = 'Some Title!'
     update_work.data_provider = FakeDataProvider([work, ed])
     requests = update_work.update_work(work)
     assert len(requests) == 1
     assert '<field name="title">Some Title!</field>' in requests[0].toxml()

コード例 #8

0

ファイルを表示

 def test_delete_editions(self):
     requests = update_work.update_work({
         'key': '/works/OL23M',
         'type': {
             'key': '/type/delete'
         }
     })
     assert len(requests) == 1
     assert requests[0].to_json_command() == '"delete": ["/works/OL23M"]'

コード例 #9

0

ファイルを表示

 def test_redirects(self):
     requests = update_work.update_work({
         'key': '/works/OL23W',
         'type': {
             'key': '/type/redirect'
         }
     })
     assert len(requests) == 1
     assert requests[0].to_json_command() == '"delete": ["/works/OL23W"]'

コード例 #10

0

ファイルを表示

 def test_redirects(self):
     requests = update_work.update_work({
         'key': '/works/OL23W',
         'type': {
             'key': '/type/redirect'
         }
     })
     assert len(requests) == 1
     assert isinstance(requests[0], update_work.DeleteRequest)
     assert requests[0].toxml() == '<delete><id>/works/OL23W</id></delete>'

コード例 #11

0

ファイルを表示

ファイル: test_update_work.py プロジェクト: tushar-ga/openlibrary

 def test_delete_editions(self):
     requests = update_work.update_work({
         'key': '/works/OL23M',
         'type': {
             'key': '/type/delete'
         }
     })
     assert len(requests) == 1
     assert isinstance(requests[0], update_work.DeleteRequest)
     assert requests[0].toxml(
     ) == '<delete><query>key:/works/OL23M</query></delete>'

コード例 #12

0

ファイルを表示

ファイル: solr_update.py プロジェクト: bowlofeggs/openlibrary

def run_update():
    global authors_to_update
    global works_to_update
    global last_update
    print 'running update: %s works %s authors' % (len(works_to_update), len(authors_to_update))
    if works_to_update:
        requests = []
        num = 0
        total = len(works_to_update)
        for wkey in works_to_update:
            num += 1
            print 'update work: %s %d/%d' % (wkey, num, total)
            if '/' in wkey[7:]:
                print 'bad wkey:', wkey
                continue
            for attempt in range(5):
                try:
                    requests += update_work(withKey(wkey))
                    break
                except AuthorRedirect:
                    print 'fixing author redirect'
                    w = ol.get(wkey)
                    need_update = False
                    for a in w['authors']:
                        r = ol.get(a['author'])
                        if r['type'] == '/type/redirect':
                            a['author'] = {'key': r['location']}
                            need_update = True
                    assert need_update
                    print w
                    if not done_login:
                        rc = read_rc()
                        ol.login('EdwardBot', rc['EdwardBot']) 
                    ol.save(w['key'], w, 'avoid author redirect')
            if len(requests) >= 100:
                solr_update(requests, debug=True)
                requests = []
#            if num % 1000 == 0:
#                solr_update(['<commit/>'], debug=True)
        if requests:
            solr_update(requests, debug=True)
        solr_update(['<commit/>'], debug=True)
    last_update = time()
    print >> open(state_file, 'w'), offset
    if authors_to_update:
        requests = []
        for akey in authors_to_update:
            print 'update author:', akey
            requests += update_author(akey)
        solr_update(requests + ['<commit/>'], index='authors', debug=True)
    authors_to_update = set()
    works_to_update = set()
    print >> open(state_file, 'w'), offset

コード例 #13

0

ファイルを表示

ファイル: solr_update.py プロジェクト: ziwar/openlibrary

def run_update():
    global authors_to_update
    global works_to_update
    global last_update
    print "running update: %s works %s authors" % (len(works_to_update), len(authors_to_update))
    if works_to_update:
        requests = []
        num = 0
        total = len(works_to_update)
        for wkey in works_to_update:
            num += 1
            print "update work: %s %d/%d" % (wkey, num, total)
            if "/" in wkey[7:]:
                print "bad wkey:", wkey
                continue
            for attempt in range(5):
                try:
                    requests += update_work(withKey(wkey))
                    break
                except AuthorRedirect:
                    print "fixing author redirect"
                    w = ol.get(wkey)
                    need_update = False
                    for a in w["authors"]:
                        r = ol.get(a["author"])
                        if r["type"] == "/type/redirect":
                            a["author"] = {"key": r["location"]}
                            need_update = True
                    assert need_update
                    print w
                    if not done_login:
                        rc = read_rc()
                        ol.login("EdwardBot", rc["EdwardBot"])
                    ol.save(w["key"], w, "avoid author redirect")
            if len(requests) >= 100:
                solr_update(requests, debug=True)
                requests = []
        #            if num % 1000 == 0:
        #                solr_update(['<commit/>'], debug=True)
        if requests:
            solr_update(requests, debug=True)
        solr_update(["<commit/>"], debug=True)
    last_update = time()
    print >> open(state_file, "w"), offset
    if authors_to_update:
        requests = []
        for akey in authors_to_update:
            print "update author:", akey
            requests += update_author(akey)
        solr_update(requests + ["<commit/>"], index="authors", debug=True)
    authors_to_update = set()
    works_to_update = set()
    print >> open(state_file, "w"), offset

コード例 #14

0

ファイルを表示

 def test_delete_work(self):
     del_work = update_work.update_work({
         'key': '/works/OL23W',
         'type': {
             'key': '/type/delete'
         }
     })
     del_edition = update_work.update_work({
         'key': '/works/OL23M',
         'type': {
             'key': '/type/delete'
         }
     })
     assert len(del_work) == 1
     assert len(del_edition) == 1
     assert isinstance(del_work, list)
     assert isinstance(del_work[0], update_work.DeleteRequest)
     assert del_work[0].toxml(
     ) == '<delete><query>key:/works/OL23W</query></delete>'
     assert isinstance(del_edition[0], update_work.DeleteRequest)
     assert del_edition[0].toxml(
     ) == '<delete><query>key:/works/OL23M</query></delete>'

コード例 #15

0

ファイルを表示

ファイル: load_scribe.py プロジェクト: strogo/openlibrary

def hide_books(start):
    hide_start = open(hide_state_file).readline()[:-1]
    print "hide start:", hide_start

    mend = []
    fix_works = set()
    db_iter = db.query(
        "select identifier, collection, updated from metadata where (noindex is not null or curatestate='dark') and mediatype='texts' and scandate is not null and updated > $start order by scandate_dt",
        {"start": hide_start},
    )
    last_updated = None
    for row in db_iter:
        ia = row.identifier
        if row.collection:
            collections = set(i.lower().strip() for i in row.collection.split(";"))
            if "printdisabled" in collections or "lendinglibrary" in collections:
                continue
        print ` ia `, row.updated
        for eq in query({"type": "/type/edition", "ocaid": ia}):
            print eq["key"]
            e = ol.get(eq["key"])
            if "ocaid" not in e:
                continue
            if "works" in e:
                fix_works.update(e["works"])
            print e["key"], ` e.get("title", None) `
            del e["ocaid"]
            mend.append(e)
        last_updated = row.updated
    print "removing links from %d editions" % len(mend)
    if not mend:
        return
    print ol.save_many(mend, "remove link")
    requests = []
    for wkey in fix_works:
        requests += update_work(withKey(wkey))
    if fix_works:
        solr_update(requests + ["<commit/>"], debug=True)
    print >> open(hide_state_file, "w"), last_updated

コード例 #16

0

ファイルを表示

def hide_books(start):
    hide_start = open(hide_state_file).readline()[:-1]
    print('hide start:', hide_start)

    mend = []
    fix_works = set()
    db_iter = db.query(
        "select identifier, collection, updated from metadata where (noindex is not null or curatestate='dark') and mediatype='texts' and scandate is not null and updated > $start",
        {'start': hide_start})
    last_updated = None
    for row in db_iter:
        ia = row.identifier
        if row.collection:
            collections = set(i.lower().strip()
                              for i in row.collection.split(';'))
            if ignore_noindex & collections:
                continue
        print((repr(ia), row.updated))
        for eq in query({'type': '/type/edition', 'ocaid': ia}):
            print(eq['key'])
            e = ol.get(eq['key'])
            if 'ocaid' not in e:
                continue
            if 'works' in e:
                fix_works.update(e['works'])
            print((e['key'], repr(e.get('title', None))))
            del e['ocaid']
            mend.append(e)
        last_updated = row.updated
    print('removing links from %d editions' % len(mend))
    if not mend:
        return
    print(ol.save_many(mend, 'remove link'))
    requests = []
    for wkey in fix_works:
        requests += update_work(withKey(wkey))
    if fix_works:
        solr_update(requests + ['<commit/>'], debug=True)
    print(last_updated, file=open(hide_state_file, 'w'))

コード例 #17

0

ファイルを表示

ファイル: load_scribe.py プロジェクト: randomecho/openlibrary

def hide_books(start):
    hide_start = open(hide_state_file).readline()[:-1]
    print 'hide start:', hide_start

    mend = []
    fix_works = set()
    db_iter = db.query("select identifier, collection, updated from metadata where (noindex is not null or curatestate='dark') and mediatype='texts' and scandate is not null and updated > $start", {'start': hide_start})
    last_updated = None
    for row in db_iter:
        ia = row.identifier
        if row.collection:
            collections = set(i.lower().strip() for i in row.collection.split(';'))
            if ignore_noindex & collections:
                continue
        print(repr(ia), row.updated)
        for eq in query({'type': '/type/edition', 'ocaid': ia}):
            print eq['key']
            e = ol.get(eq['key'])
            if 'ocaid' not in e:
                continue
            if 'works' in e:
                fix_works.update(e['works'])
            print(e['key'], repr(e.get('title', None)))
            del e['ocaid']
            mend.append(e)
        last_updated = row.updated
    print 'removing links from %d editions' % len(mend)
    if not mend:
        return
    print ol.save_many(mend, 'remove link')
    requests = []
    for wkey in fix_works:
        requests += update_work(withKey(wkey))
    if fix_works:
        solr_update(requests + ['<commit/>'], debug=True)
    print >> open(hide_state_file, 'w'), last_updated

コード例 #18

0

ファイルを表示

ファイル: find_works.py プロジェクト: sribanta/openlibrary

                    e = ol.get(ekey)
                    e['works'] = [Reference(use_key)]
                    update.append(e)
            if work_title[use_key] != w['title']:
                print 'update work title', `work_title[use_key]`, '->', `w['title']`
                existing_work = ol.get(use_key)
                existing_work['title'] = w['title']
                update.append(existing_work)
            if do_updates:
                ol.save_many(update, 'merge works')
        all_existing.update(existing)
        for wkey in existing:
            cur = work_title[wkey]
            print '  ', wkey, cur == w['title'], `cur`

    print len(work_to_edition), len(all_existing)
    assert len(work_to_edition) == len(all_existing)

    if not do_updates:
        sys.exit(0)

    for key in work_keys:
        w = ol.get(key)
        add_cover_to_work(w)
        if 'cover_edition' not in w:
            print 'no cover found'
        update_work(withKey(key), debug=True)

    requests = ['<commit />']
    solr_update(requests, debug=True)

コード例 #19

0

ファイルを表示

def solr_updates(i):
    global subjects_to_update, authors_to_update
    t0 = time()
    d = i['data']
    changeset = d['changeset']
    print 'author:', d['author']
    try:
        assert len(changeset['data']) == 2 and 'master' in changeset['data'] and 'duplicates' in changeset['data']
    except:
        print d['changeset']
        raise
    master_key = changeset['data']['master']
    dup_keys = changeset['data']['duplicates']
    assert dup_keys
    print 'timestamp:', i['timestamp']
    print 'dups:', dup_keys
    print 'records to update:', len(d['result'])

    master = None
    obj_by_key = {}
    works = []
    editions_by_work = defaultdict(list)
    for obj in d['query']:
        obj_type = obj['type']['key']
        k = obj['key']
        if obj_type == '/type/work':
            works.append(obj['key'])
        elif obj_type == '/type/edition':
            if 'works' not in obj:
                continue
            for w in obj['works']:
                editions_by_work[w['key']].append(obj)
        obj_by_key[k] = obj
    master = obj_by_key.get(master_key)
    #print 'master:', master

    if len(d['result']) == 0:
        print i

    work_updates = []
    for wkey in works:
            #print 'editions_by_work:', editions_by_work
            work = obj_by_key[wkey]
            work['editions'] = editions_by_work[wkey]
            subjects = get_work_subjects(work)
            for subject_type, values in subjects.iteritems():
                subjects_to_update.update((subject_type, v) for v in values)
            try:
                ret = update_work(work, obj_cache=obj_by_key, debug=True)
            except AuthorRedirect:
                work = withKey(wkey)
                work['editions'] = editions_by_work[wkey]
                ret = update_work(work, debug=True, resolve_redirects=True)
            work_updates += ret
    if work_updates:
        solr_update(work_updates, debug=False, index='works')

    authors_to_update.append({ 'redirects': dup_keys, 'master_key': master_key, 'master': master})
    print 'authors to update:', len(authors_to_update)

    t1 = time() - t0
    update_times.append(t1)
    print 'update takes: %d seconds' % t1
    print

コード例 #20

0

ファイルを表示

def run_update():
    global authors_to_update, works_to_update
    subjects_to_update = set()
    global last_update
    print 'running update: %s works %s authors' % (len(works_to_update), len(authors_to_update))
    if works_to_update:
        requests = []
        num = 0
        total = len(works_to_update)
        for wkey in works_to_update:
            num += 1
            print 'update work: %s %d/%d' % (wkey, num, total)
            if '/' in wkey[7:]:
                print 'bad wkey:', wkey
                continue
            work_to_update = withKey(wkey)
            for attempt in range(5):
                try:
                    requests += update_work(work_to_update)
                except AuthorRedirect:
                    print 'fixing author redirect'
                    w = ol.get(wkey)
                    need_update = False
                    for a in w['authors']:
                        r = ol.get(a['author'])
                        if r['type'] == '/type/redirect':
                            a['author'] = {'key': r['location']}
                            need_update = True
                    if need_update:
                        if not done_login:
                            rc = read_rc()
                            ol.login('EdwardBot', rc['EdwardBot'])
                        ol.save(w['key'], w, 'avoid author redirect')
            if work_to_update['type']['key'] == '/type/work' and work_to_update.get('title'):
                subjects = get_work_subjects(work_to_update)
                print subjects
                for subject_type, values in subjects.iteritems():
                    subjects_to_update.update((subject_type, v) for v in values)
                if len(requests) >= 100:
                    solr_update(requests, debug=True)
                    requests = []
    #            if num % 1000 == 0:
    #                solr_update(['<commit/>'], debug=True)
        if requests:
            solr_update(requests, debug=True)
        if not args.no_commit:
            solr_update(['<commit/>'], debug=True)
    last_update = time()
    if not args.no_author_updates and authors_to_update:
        requests = []
        for akey in authors_to_update:
            print 'update author:', `akey`
            try:
                request = update_author(akey)
                if request:
                    requests += request
            except AttributeError:
                print 'akey:', `akey`
                raise
        if not args.no_commit:
            solr_update(requests + ['<commit/>'], index='authors', debug=True)
    subject_add = Element("add")
    print subjects_to_update
    for subject_type, subject_name in subjects_to_update:
        key = subject_type + '/' + subject_name
        count = subject_count(subject_type, subject_name)

        if not subject_need_update(key, count):
            print 'no updated needed:', (subject_type, subject_name, count)
            continue
        print 'updated needed:', (subject_type, subject_name, count)

        doc = Element("doc")
        add_field(doc, 'key', key)
        add_field(doc, 'name', subject_name)
        add_field(doc, 'type', subject_type)
        add_field(doc, 'count', count)
        subject_add.append(doc)

    if len(subject_add):
        print 'updating subjects'
        add_xml = tostring(subject_add).encode('utf-8')
        solr_update([add_xml, '<commit />'], debug=True, index='subjects')

    authors_to_update = set()
    works_to_update = set()
    subjects_to_update = set()
    print >> open(state_file, 'w'), offset

コード例 #21

0

ファイルを表示

ファイル: find_works.py プロジェクト: hornc/openlibrary-1

        best_match = max(w['existing_works'].iteritems(), key=lambda i:i[1])[0]
        w['best_match'] = work_by_key[best_match]
        updated = update_work_with_best_match(akey, w, work_to_edition, do_updates, fh_log)
        for wkey in updated:
            if wkey in works_updated_this_session:
                print(wkey, 'already updated!', file=fh_log)
                print(wkey, 'already updated!')
        works_updated_this_session.update(updated)

    #if not do_updates:
    #    return []

    return [withKey(key) for key in works_updated_this_session]

if __name__ == '__main__':
    akey = '/authors/' + sys.argv[1]

    title_redirects = find_title_redirects(akey)
    works = find_works(akey, get_books(akey, books_query(akey)), existing=title_redirects)
    to_update = update_works(akey, works, do_updates=True)

    requests = []
    for w in to_update:
        requests += update_work(w)

    if to_update:
        solr_update(requests + ['<commit />'], debug=True)

    requests = update_author(akey)
    solr_update(requests + ['<commit/>'], debug=True)

コード例 #22

0

ファイルを表示

def run_update():
    global authors_to_update, works_to_update
    subjects_to_update = set()
    global last_update
    print 'running update: %s works %s authors' % (len(works_to_update),
                                                   len(authors_to_update))
    if works_to_update:
        requests = []
        num = 0
        total = len(works_to_update)
        for wkey in works_to_update:
            num += 1
            print 'update work: %s %d/%d' % (wkey, num, total)
            if '/' in wkey[7:]:
                print 'bad wkey:', wkey
                continue
            work_to_update = withKey(wkey)
            for attempt in range(5):
                try:
                    requests += update_work(work_to_update)
                except AuthorRedirect:
                    print 'fixing author redirect'
                    w = ol.get(wkey)
                    need_update = False
                    for a in w['authors']:
                        r = ol.get(a['author'])
                        if r['type'] == '/type/redirect':
                            a['author'] = {'key': r['location']}
                            need_update = True
                    if need_update:
                        if not done_login:
                            rc = read_rc()
                            ol.login('EdwardBot', rc['EdwardBot'])
                        ol.save(w['key'], w, 'avoid author redirect')
            if work_to_update['type'][
                    'key'] == '/type/work' and work_to_update.get('title'):
                subjects = get_work_subjects(work_to_update)
                print subjects
                for subject_type, values in subjects.iteritems():
                    subjects_to_update.update(
                        (subject_type, v) for v in values)
                if len(requests) >= 100:
                    solr_update(requests, debug=True)
                    requests = []
    #            if num % 1000 == 0:
    #                solr_update(['<commit/>'], debug=True)
        if requests:
            solr_update(requests, debug=True)
        if not args.no_commit:
            solr_update(['<commit/>'], debug=True)
    last_update = time()
    if not args.no_author_updates and authors_to_update:
        requests = []
        for akey in authors_to_update:
            print('update author:', repr(akey))
            try:
                request = update_author(akey)
                if request:
                    requests += request
            except AttributeError:
                print('akey:', repr(akey))
                raise
        if not args.no_commit:
            solr_update(requests + ['<commit/>'], debug=True)
    subject_add = Element("add")
    print subjects_to_update
    for subject_type, subject_name in subjects_to_update:
        key = subject_type + '/' + subject_name
        count = subject_count(subject_type, subject_name)

        if not subject_need_update(key, count):
            print 'no updated needed:', (subject_type, subject_name, count)
            continue
        print 'updated needed:', (subject_type, subject_name, count)

        doc = Element("doc")
        add_field(doc, 'key', key)
        add_field(doc, 'name', subject_name)
        add_field(doc, 'type', subject_type)
        add_field(doc, 'count', count)
        subject_add.append(doc)

    if len(subject_add):
        print 'updating subjects'
        add_xml = tostring(subject_add).encode('utf-8')
        solr_update([add_xml, '<commit />'], debug=True)

    authors_to_update = set()
    works_to_update = set()
    subjects_to_update = set()
    print >> open(state_file, 'w'), offset

コード例 #23

0

ファイルを表示

        for wkey in updated:
            if wkey in works_updated_this_session:
                print >> fh_log, wkey, 'already updated!'
                print wkey, 'already updated!'
        works_updated_this_session.update(updated)

    #if not do_updates:
    #    return []

    return [withKey(key) for key in works_updated_this_session]


if __name__ == '__main__':
    akey = '/authors/' + sys.argv[1]

    title_redirects = find_title_redirects(akey)
    works = find_works(akey,
                       get_books(akey, books_query(akey)),
                       existing=title_redirects)
    to_update = update_works(akey, works, do_updates=True)

    requests = []
    for w in to_update:
        requests += update_work(w)

    if to_update:
        solr_update(requests + ['<commit />'], debug=True)

    requests = update_author(akey)
    solr_update(requests + ['<commit/>'], debug=True)