Beispiel #1
0
def import_upload(dataset_name, id, account_id,
                  entity_col, alias_col):
    dataset = Dataset.find(dataset_name)
    account = Account.by_id(account_id)
    metadata, row_set = parse_upload(dataset, id)
    headers = detect_headers(row_set)
    for row in row_set:
        data = dict([(c.column, c.value) for c in row])
        entity = data.pop(entity_col) if entity_col else None
        alias = data.pop(alias_col) if alias_col else None
        if alias_col and alias is not None and len(alias) and alias != entity:
            d = {'name': alias, 'data': data}
            alias_obj = Alias.lookup(dataset, d, account,
                                     match_entity=False)
            data = {}
        if entity_col and entity is not None and len(entity):
            d = {'name': entity, 'data': data}
            entity_obj = Entity.by_name(dataset, entity)
            if entity_obj is None:
                entity_obj = Entity.create(dataset, d, account)
            entity_obj.data = data
        if alias_col and entity_col:
            alias_obj.match(dataset, {'choice': entity_obj.id}, account)
    db.session.commit()
    flush_cache(dataset)
Beispiel #2
0
def import_upload(dataset_name, sig, account_id,
        value_col, link_col):
    dataset = Dataset.find(dataset_name)
    account = Account.by_id(account_id)
    metadata, row_set = parse_upload(dataset, sig)
    headers = detect_headers(row_set)
    for row in row_set:
        data = dict([(c.column, c.value) for c in row])
        value = data.pop(value_col) if value_col else None
        link = data.pop(link_col) if link_col else None
        if link_col:
            d = {'key': link, 'data': data}
            link_obj = Link.lookup(dataset, d, account,
                            match_value=False)
            data = {}
        if value_col:
            d = {'value': value, 'data': data}
            value_obj = Value.by_value(dataset, value)
            if value_obj is None:
                value_obj = Value.create(dataset,
                        d, account)
            value_obj.data = data
        if link_col and value_col:
            link_obj.match(dataset, {'choice': value_obj.id},
                    account)
        db.session.commit()
Beispiel #3
0
def import_upload(upload_id, account_id, mapping):
    upload = Upload.all().filter_by(id=upload_id).first()
    account = Account.by_id(account_id)
    mapped = mapping['columns'].values()

    rows = [apply_mapping(r, mapping) for r in upload.tab.dict]
    # put aliases second.
    rows = sorted(rows, key=lambda r: 2 if r.get('canonical') else 1)

    for i, row in enumerate(rows):
        try:
            entity = None
            if row.get('id'):
                entity = Entity.by_id(row.get('id'))
            if entity is None:
                entity = Entity.by_name(upload.dataset, row.get('name'))
            if entity is None:
                entity = Entity.create(upload.dataset, row, account)

            # restore some defaults:
            if entity.canonical_id and 'canonical' not in mapped:
                row['canonical'] = entity.canonical_id
            if entity.invalid and 'invalid' not in mapped:
                row['invalid'] = entity.invalid

            if entity.attributes:
                attributes = entity.attributes.copy()
            else:
                attributes = {}
            attributes.update(row['attributes'])
            row['attributes'] = attributes

            entity.update(row, account)
            print(entity)
            if i % 100 == 0:
                db.session.commit()
                logging.debug('Commit')
        except Invalid as inv:
            logging.warning('Exception during import: {}'.format(str(inv)))
    db.session.commit()
    logging.info('Import Completed')
def import_upload(upload_id, account_id, mapping):
    upload = Upload.all().filter_by(id=upload_id).first()
    account = Account.by_id(account_id)
    mapped = mapping['columns'].values()

    rows = [apply_mapping(r, mapping) for r in upload.tab.dict]
    # put aliases second.
    rows = sorted(rows, key=lambda r: 2 if r.get('canonical') else 1)

    for i, row in enumerate(rows):
        try:
            entity = None
            if row.get('id'):
                entity = Entity.by_id(row.get('id'))
            if entity is None:
                entity = Entity.by_name(upload.dataset, row.get('name'))
            if entity is None:
                entity = Entity.create(upload.dataset, row, account)

            # restore some defaults: 
            if entity.canonical_id and 'canonical' not in mapped:
                row['canonical'] = entity.canonical_id
            if entity.invalid and 'invalid' not in mapped:
                row['invalid'] = entity.invalid 

            if entity.attributes:
                attributes = entity.attributes.copy()
            else:
                attributes = {}
            attributes.update(row['attributes'])
            row['attributes'] = attributes

            entity.update(row, account)
            print entity
            if i % 100 == 0:
                print 'COMMIT'
                db.session.commit()
        except Invalid, inv:
            # TODO: logging. 
            print inv
Beispiel #5
0
def import_upload(dataset_name, sig, account_id, entity_col, alias_col):
    dataset = Dataset.find(dataset_name)
    account = Account.by_id(account_id)
    metadata, row_set = parse_upload(dataset, sig)
    headers = detect_headers(row_set)
    for row in row_set:
        data = dict([(c.column, c.value) for c in row])
        entity = data.pop(entity_col) if entity_col else None
        alias = data.pop(alias_col) if alias_col else None
        if alias_col and alias is not None and len(alias) and alias != entity:
            d = {'name': alias, 'data': data}
            alias_obj = Alias.lookup(dataset, d, account, match_entity=False)
            data = {}
        if entity_col and entity is not None and len(entity):
            d = {'name': entity, 'data': data}
            entity_obj = Entity.by_name(dataset, entity)
            if entity_obj is None:
                entity_obj = Entity.create(dataset, d, account)
            entity_obj.data = data
        if alias_col and entity_col:
            alias_obj.match(dataset, {'choice': entity_obj.id}, account)
    db.session.commit()
    flush_cache()