def upload(dataset): dataset = Dataset.find(dataset) authz.require(authz.dataset_edit(dataset)) file_ = request.files.get('file') if not file_ or not file_.filename: err = {'file': "You need to upload a file"} raise Invalid("No file.", None, None, error_dict=err) upload = Upload.create(dataset, request.account, file_) db.session.commit() return jsonify(upload)
def parse_upload(dataset, id): upload = Upload.by_id(id) if upload is None: return None, None fh = open(upload.path, 'rb') fn, ext = os.path.splitext(upload.filename) ext = ext[1:] if ext else None table_set = AnyTableSet.from_fileobj(fh, mimetype=upload.mimetype, extension=ext[1:]) return upload, table_set.tables[0]
def process(dataset, id): dataset = Dataset.find(dataset) authz.require(authz.dataset_edit(dataset)) upload = Upload.find(dataset, id) mapping = request_data() mapping['reviewed'] = mapping.get('reviewed') or False mapping['columns'] = mapping.get('columns', {}) fields = mapping['columns'].values() for header in mapping['columns'].keys(): if header not in upload.tab.headers: raise Invalid("Invalid header: %s" % header, None, None) if 'name' not in fields and 'id' not in fields: raise Invalid("You have not selected a field that definies entity names.", None, None) import_upload.delay(upload.id, request.account.id, mapping) return jsonify({'status': 'Loading data...'})
def import_upload(upload_id, account_id, mapping): upload = Upload.all().filter_by(id=upload_id).first() account = Account.by_id(account_id) mapped = mapping['columns'].values() rows = [apply_mapping(r, mapping) for r in upload.tab.dict] # put aliases second. rows = sorted(rows, key=lambda r: 2 if r.get('canonical') else 1) for i, row in enumerate(rows): try: entity = None if row.get('id'): entity = Entity.by_id(row.get('id')) if entity is None: entity = Entity.by_name(upload.dataset, row.get('name')) if entity is None: entity = Entity.create(upload.dataset, row, account) # restore some defaults: if entity.canonical_id and 'canonical' not in mapped: row['canonical'] = entity.canonical_id if entity.invalid and 'invalid' not in mapped: row['invalid'] = entity.invalid if entity.attributes: attributes = entity.attributes.copy() else: attributes = {} attributes.update(row['attributes']) row['attributes'] = attributes entity.update(row, account) print(entity) if i % 100 == 0: db.session.commit() logging.debug('Commit') except Invalid as inv: logging.warning('Exception during import: {}'.format(str(inv))) db.session.commit() logging.info('Import Completed')
def import_upload(upload_id, account_id, mapping): upload = Upload.all().filter_by(id=upload_id).first() account = Account.by_id(account_id) mapped = mapping['columns'].values() rows = [apply_mapping(r, mapping) for r in upload.tab.dict] # put aliases second. rows = sorted(rows, key=lambda r: 2 if r.get('canonical') else 1) for i, row in enumerate(rows): try: entity = None if row.get('id'): entity = Entity.by_id(row.get('id')) if entity is None: entity = Entity.by_name(upload.dataset, row.get('name')) if entity is None: entity = Entity.create(upload.dataset, row, account) # restore some defaults: if entity.canonical_id and 'canonical' not in mapped: row['canonical'] = entity.canonical_id if entity.invalid and 'invalid' not in mapped: row['invalid'] = entity.invalid if entity.attributes: attributes = entity.attributes.copy() else: attributes = {} attributes.update(row['attributes']) row['attributes'] = attributes entity.update(row, account) print entity if i % 100 == 0: print 'COMMIT' db.session.commit() except Invalid, inv: # TODO: logging. print inv
def view(dataset, id): dataset = Dataset.find(dataset) authz.require(authz.dataset_edit(dataset)) upload = Upload.find(dataset, id) return jsonify(upload)
def upload_file(dataset, file_, account): upload = Upload.create(dataset, account, file_.filename, file_.mimetype) file_.save(upload.path) db.session.commit() return upload