コード例 #1
0
def insert_xml(path):
    f = open(path)
    count = 0

    for line in f:
        count += 1
        if count < START:
            continue

        print count,
        
        if LIMIT > 0 and count == LIMIT:
            break

        try:
            title  = tpat.search(line).group(1)
            wid    = int(idpat.search(line).group(1))
            img    = imgpat.search(line).group(1)
            birth  = int(bpat.search(line).group(1))
            death  = int(dpat.search(line).group(1))
            #cats   = catpat.search(line).group(1).split('|')
            #others = olpat.search(line).group(1).split('|')
        
            art = Article(name=title, wid=wid, image=img, birth=birth, death=death)
            art.save()

            print title
        except:
            #raise
            continue
    f.close()
コード例 #2
0
def gr_insert_incunabula_articles(revw):
    #clean up first
    c = 0
    '''
    for a in Article.objects.filter(art_ed=15).iterator():
        a.delete()
        print c, 'del'
        c+=1
    '''


    sizes = revw['sizes']
    for k in revw.keys():
        name = ' '.join(k.split('_'))
        try:
            ma = Article.objects.get(name=name)

            for ed in revw[k]['editions']:
                #make matches unique
                if ma.article_set.filter(art_ed=ed).count() != 0:
                    continue
                
                a = revw[k]['editions'][ed][0]
                txt = a['txt']
                if ed == 15:
                    pass
                
                name = str(ed)+'_'+str(a['id'])+'_'+a['name']
                name = parse_name(name)
                if name == None:
                    continue

                ia = Article(name=name, wid=a['id'], art_ed=ed, text=txt,
                              vscore = 0.0,
                              match_master=ma)

                ia.save()
                print ia
                '''
                for cat in ma.categories.iterator():
                    ia.categories.add(cat)
                ia.save()
                '''

            ma.match_count = ma.article_set.count()
            ma.save()

        except Article.DoesNotExist:
            continue

        except:
            raise
            #print k,'not found'
            continue
コード例 #3
0
def gr_insert_ed_15(split_dir):
    #for a in Article.objects.filter(art_ed=15).iterator():
    #    a.delete()

    w = open('15notmatched', 'w')
    MAXC = 5
    c = 0
    for pkl in os.listdir(split_dir):

        pkl = split_dir+pkl
        f = open(pkl)
        arts = cPickle.load(f)
        f.close()

        for a in arts:
            match = ' '. join(a['matched'][0].split('_'))
            ma = None
            try:
                ma = Article.objects.get(name=match, art_ed=1000)
            except:
                for candid in a['candids'][:MAXC]:
                    try:
                        match = ' '. join(candids.split('_'))
                        ma = Article.objects.get(name=match, art_ed=1000)
                    except:
                        continue

            if ma == None:
                w.write(a['name']+' not matched\n')
                continue

            ed = 15
            name=str(ed)+'_'+str(a['id'])+'_'+a['name']
            name = smart_unicode(name)
            ia = Article(name=name, wid=a['id'], art_ed=ed, text=a['txt'],
                         vscore = 0.0,
                         match_master=ma)
            ia.save()

            ma.match_count = ma.article_set.count()
            ma.save()

            c+=1
            print c, 'ed15'
    w.close()