Example #1
0
    def import_yago_classes(self):

        f = gzip.open(djity_cowst.__path__[0] + '/data/yago_classes_list.tsv.gz')
        for i,chunk in enumerate(chunkify(f)):
            with transaction.commit_on_success():
                for line in chunk:
                    
                    md5_code = long(line.rstrip())
                    yago = YagoClass(md5=md5_code)
                    yago.save()
            print "%d yago class imported"%(i*1000)
Example #2
0
    def import_infobox_properties(self):
        def parse_tuple(line):
            return tuple(map(long,line.split('\t',4)[:4]))
        
        def key(args):
            return args[:3]



        f = hdfs_open('swoct/dbpedia/infobox_properties_histogram')
        i =0
        for k,g in groupby(imap(parse_tuple,f),key=key):
            print i
            i += 1
            with transaction.commit_on_success():
                prop = InfoboxProperty(md5 = k[0])
                prop.save()
                yago = YagoClass(md5=k[1])
                yago.save()
                h= InfoboxPropertyHistogram( infobox_property=prop, count =k[2], yago_class=yago)
                h.save()
                for sample in g:
                    h.sample.add(Instance.objects.get(md5=sample[3]))