def insert_xml(path): f = open(path) count = 0 for line in f: count += 1 if count < START: continue print count, if LIMIT > 0 and count == LIMIT: break try: title = tpat.search(line).group(1) wid = int(idpat.search(line).group(1)) img = imgpat.search(line).group(1) birth = int(bpat.search(line).group(1)) death = int(dpat.search(line).group(1)) #cats = catpat.search(line).group(1).split('|') #others = olpat.search(line).group(1).split('|') art = Article(name=title, wid=wid, image=img, birth=birth, death=death) art.save() print title except: #raise continue f.close()
def gr_insert_incunabula_articles(revw): #clean up first c = 0 ''' for a in Article.objects.filter(art_ed=15).iterator(): a.delete() print c, 'del' c+=1 ''' sizes = revw['sizes'] for k in revw.keys(): name = ' '.join(k.split('_')) try: ma = Article.objects.get(name=name) for ed in revw[k]['editions']: #make matches unique if ma.article_set.filter(art_ed=ed).count() != 0: continue a = revw[k]['editions'][ed][0] txt = a['txt'] if ed == 15: pass name = str(ed)+'_'+str(a['id'])+'_'+a['name'] name = parse_name(name) if name == None: continue ia = Article(name=name, wid=a['id'], art_ed=ed, text=txt, vscore = 0.0, match_master=ma) ia.save() print ia ''' for cat in ma.categories.iterator(): ia.categories.add(cat) ia.save() ''' ma.match_count = ma.article_set.count() ma.save() except Article.DoesNotExist: continue except: raise #print k,'not found' continue
def gr_insert_ed_15(split_dir): #for a in Article.objects.filter(art_ed=15).iterator(): # a.delete() w = open('15notmatched', 'w') MAXC = 5 c = 0 for pkl in os.listdir(split_dir): pkl = split_dir+pkl f = open(pkl) arts = cPickle.load(f) f.close() for a in arts: match = ' '. join(a['matched'][0].split('_')) ma = None try: ma = Article.objects.get(name=match, art_ed=1000) except: for candid in a['candids'][:MAXC]: try: match = ' '. join(candids.split('_')) ma = Article.objects.get(name=match, art_ed=1000) except: continue if ma == None: w.write(a['name']+' not matched\n') continue ed = 15 name=str(ed)+'_'+str(a['id'])+'_'+a['name'] name = smart_unicode(name) ia = Article(name=name, wid=a['id'], art_ed=ed, text=a['txt'], vscore = 0.0, match_master=ma) ia.save() ma.match_count = ma.article_set.count() ma.save() c+=1 print c, 'ed15' w.close()