Ejemplo n.º 1
0
def upload(dataset):
    dataset = Dataset.find(dataset)
    authz.require(authz.dataset_edit(dataset))
    file_ = request.files.get('file')
    if not file_ or not file_.filename:
        err = {'file': "You need to upload a file"}
        raise Invalid("No file.", None, None, error_dict=err)
    upload = Upload.create(dataset, request.account, file_)
    db.session.commit()
    return jsonify(upload)
Ejemplo n.º 2
0
def parse_upload(dataset, id):
    upload = Upload.by_id(id)
    if upload is None:
        return None, None
    fh = open(upload.path, 'rb')
    fn, ext = os.path.splitext(upload.filename)
    ext = ext[1:] if ext else None
    table_set = AnyTableSet.from_fileobj(fh,
            mimetype=upload.mimetype,
            extension=ext[1:])
    return upload, table_set.tables[0]
Ejemplo n.º 3
0
def process(dataset, id):
    dataset = Dataset.find(dataset)
    authz.require(authz.dataset_edit(dataset))
    upload = Upload.find(dataset, id)
    mapping = request_data()
    mapping['reviewed'] = mapping.get('reviewed') or False
    mapping['columns'] = mapping.get('columns', {})
    fields = mapping['columns'].values()
    for header in mapping['columns'].keys():
        if header not in upload.tab.headers:
            raise Invalid("Invalid header: %s" % header, None, None)    

    if 'name' not in fields and 'id' not in fields:
        raise Invalid("You have not selected a field that definies entity names.", None, None)

    import_upload.delay(upload.id, request.account.id, mapping)
    return jsonify({'status': 'Loading data...'})
Ejemplo n.º 4
0
def import_upload(upload_id, account_id, mapping):
    upload = Upload.all().filter_by(id=upload_id).first()
    account = Account.by_id(account_id)
    mapped = mapping['columns'].values()

    rows = [apply_mapping(r, mapping) for r in upload.tab.dict]
    # put aliases second.
    rows = sorted(rows, key=lambda r: 2 if r.get('canonical') else 1)

    for i, row in enumerate(rows):
        try:
            entity = None
            if row.get('id'):
                entity = Entity.by_id(row.get('id'))
            if entity is None:
                entity = Entity.by_name(upload.dataset, row.get('name'))
            if entity is None:
                entity = Entity.create(upload.dataset, row, account)

            # restore some defaults:
            if entity.canonical_id and 'canonical' not in mapped:
                row['canonical'] = entity.canonical_id
            if entity.invalid and 'invalid' not in mapped:
                row['invalid'] = entity.invalid

            if entity.attributes:
                attributes = entity.attributes.copy()
            else:
                attributes = {}
            attributes.update(row['attributes'])
            row['attributes'] = attributes

            entity.update(row, account)
            print(entity)
            if i % 100 == 0:
                db.session.commit()
                logging.debug('Commit')
        except Invalid as inv:
            logging.warning('Exception during import: {}'.format(str(inv)))
    db.session.commit()
    logging.info('Import Completed')
Ejemplo n.º 5
0
def import_upload(upload_id, account_id, mapping):
    upload = Upload.all().filter_by(id=upload_id).first()
    account = Account.by_id(account_id)
    mapped = mapping['columns'].values()

    rows = [apply_mapping(r, mapping) for r in upload.tab.dict]
    # put aliases second.
    rows = sorted(rows, key=lambda r: 2 if r.get('canonical') else 1)

    for i, row in enumerate(rows):
        try:
            entity = None
            if row.get('id'):
                entity = Entity.by_id(row.get('id'))
            if entity is None:
                entity = Entity.by_name(upload.dataset, row.get('name'))
            if entity is None:
                entity = Entity.create(upload.dataset, row, account)

            # restore some defaults: 
            if entity.canonical_id and 'canonical' not in mapped:
                row['canonical'] = entity.canonical_id
            if entity.invalid and 'invalid' not in mapped:
                row['invalid'] = entity.invalid 

            if entity.attributes:
                attributes = entity.attributes.copy()
            else:
                attributes = {}
            attributes.update(row['attributes'])
            row['attributes'] = attributes

            entity.update(row, account)
            print entity
            if i % 100 == 0:
                print 'COMMIT'
                db.session.commit()
        except Invalid, inv:
            # TODO: logging. 
            print inv
Ejemplo n.º 6
0
def view(dataset, id):
    dataset = Dataset.find(dataset)
    authz.require(authz.dataset_edit(dataset))
    upload = Upload.find(dataset, id)
    return jsonify(upload)
Ejemplo n.º 7
0
def upload_file(dataset, file_, account):
    upload = Upload.create(dataset, account,
                           file_.filename, file_.mimetype)
    file_.save(upload.path)
    db.session.commit()
    return upload