def import_yago_classes(self): f = gzip.open(djity_cowst.__path__[0] + '/data/yago_classes_list.tsv.gz') for i,chunk in enumerate(chunkify(f)): with transaction.commit_on_success(): for line in chunk: md5_code = long(line.rstrip()) yago = YagoClass(md5=md5_code) yago.save() print "%d yago class imported"%(i*1000)
def import_infobox_properties(self): def parse_tuple(line): return tuple(map(long,line.split('\t',4)[:4])) def key(args): return args[:3] f = hdfs_open('swoct/dbpedia/infobox_properties_histogram') i =0 for k,g in groupby(imap(parse_tuple,f),key=key): print i i += 1 with transaction.commit_on_success(): prop = InfoboxProperty(md5 = k[0]) prop.save() yago = YagoClass(md5=k[1]) yago.save() h= InfoboxPropertyHistogram( infobox_property=prop, count =k[2], yago_class=yago) h.save() for sample in g: h.sample.add(Instance.objects.get(md5=sample[3]))