mapping = data.get('mapping') default_category = data.get('default_category') assert default_category in CATEGORIES, default_category entities = defaultdict(set) with open(os.path.join(dir_name, 'data.csv'), 'rb') as fh: for row in unicodecsv.DictReader(fh): label = row.get(mapping.get('label', 'label')) if label is None: continue category = row.get(mapping.get('category', 'category')) category = category or default_category selectors = [row.get(mapping.get('selector', 'selector'))] selectors = [s for s in selectors if s] entities[(label, category)].update(selectors) for (label, category), selectors in entities.items(): data = {'label': label, 'category': category, 'selectors': selectors, 'list': lst} try: Entity.create(data, None) except Invalid, inv: log.warn("Failed: %s", inv) db.session.commit() selectors.update(lst.terms) log.info('Created %s entities', len(entities)) refresh(selectors)
def refresh_selectors(selectors): from aleph.processing.entities import refresh refresh(selectors)
entities = defaultdict(set) with open(os.path.join(dir_name, 'data.csv'), 'rb') as fh: for row in unicodecsv.DictReader(fh): label = row.get(mapping.get('label', 'label')) if label is None: continue category = row.get(mapping.get('category', 'category')) category = category or default_category selectors = [row.get(mapping.get('selector', 'selector'))] selectors = [s for s in selectors if s] entities[(label, category)].update(selectors) for (label, category), selectors in entities.items(): data = { 'label': label, 'category': category, 'selectors': selectors, 'list': lst } try: Entity.create(data, None) except Invalid, inv: log.warn("Failed: %s", inv) db.session.commit() selectors.update(lst.terms) log.info('Created %s entities', len(entities)) refresh(selectors)