def post_user(self, doc, **kw): """ Creates new user with tags, the owner of the tags is assigned to the user document format: <user name="user"> <tag name="password" value="12345"/> <tag name="email" value="*****@*****.**"/> <tag name="display_name" value="user"/> </user> """ userxml = etree.fromstring(doc) required_tags = ['user_name', 'password', 'email', 'display_name'] tags = {} if userxml.tag == 'user': user_name = userxml.attrib['name'] if user_name: tags['user_name'] = user_name for t in userxml.xpath('tag'): tags[t.get('name')] = t.get('value') #if (t.attrib['name']=='password') or (t.attrib['name']=='email'): if t.get('name') in REMOVE_TAGS: t.getparent().remove(t) #removes email and password if t.attrib['name'] == 'email': userxml.attrib['value'] = t.attrib[ 'value'] #set it as value of the user if all(k in tags for k in required_tags): log.debug("ADMIN: Adding user: %s", str(user_name)) u = User(user_name=tags['user_name'], password=tags['password'], email_address=tags['email'], display_name=tags['display_name']) DBSession.add(u) self._update_groups(u, tags.get('groups', '').split(',')) try: transaction.commit() except IntegrityError: abort( 405, 'Another user already has this user name or email address' ) #r = BQUser.query.filter(BQUser.resource_name == tags['user_name']).first() r = data_service.query(resource_type='user', name=tags['user_name'], wpublic=1) if len(r) > 0: admin = get_username() #get admin user set_current_user( tags['user_name'] ) #change document as user so that all changes are owned by the new user r = data_service.update_resource( '/data_service/%s' % r[0].attrib.get('resource_uniq'), new_resource=userxml) set_current_user(admin) #set back to admin user return self.get_user( '%s' % r.attrib.get('resource_uniq'), **kw) else: abort(400) abort(400)
def _load_mount_path(self, store_name, path, **kw): """load a store resource from store """ log.debug("load_mount_path : %s %s", store_name, path) path = list(path) # make a copy to leave argument alone view = kw.pop('view', 'full') q = self._load_store( store_name) # This load is not a full load but may be a short load #log.debug ('ZOOM %s', q.get ('uri')) while q is not None and path: el = path.pop(0) el = urllib.unquote(el) #q = data_service.query(parent=q, resource_unid=el, view='full', ) q = data_service.query( parent=q, resource_unid=el, view='short', ) if len(q) != 1: log.error('multiple names (%s) in store level %s', el, q.get('uri')) return None q = q[0] if q is not None and (kw or len(q)) == 0: # might be limitin result log.debug("loading with %s view=%s and %s", q, view, kw) q = data_service.get_resource(q, view=view, **kw) return q
def get_all_users(self, *arg, **kw): """ Returns a list of all users in xml with password and diplay name. (Note: may be removed in version 0.6 due to redundant functionality of data_service) Limited command support, does not have view=deep,clean.. document format: <resource> <user name="user" owner="/data_service/00-aYnhJQA5BJVm4GDpuexc2G" permission="published" resource_uniq="00-aYnhJQA5BJVm4GDpuexc2G" ts="2015-01-30T02:23:18.414000" uri="admin/user/00-aYnhJQA5BJVm4GDpuexc2G"> <tag name="email" value="*****@*****.**"/> <tag name="display_name" value="user"/> </user> <user>... </resource> """ kw['wpublic'] = 1 users = data_service.query(resource_type='user', **kw) view = kw.pop('view', None) resource = etree.Element('resource', uri=str(request.url)) for u in users: user = self.add_admin_info2node(u, view) resource.append(user) return etree.tostring(resource)
def add_query(self, duri, resource_tag, tag_query, **kw): """Append query results to a dataset @param duri: dataset uri of an existing dataset @param resource_tag:resource type tag i.e. images @param tag_query: expression of tag search """ log.info ("dataset: addquery members of %s tag %s query %s " , duri, resource_tag, tag_query) dataset = data_service.get_resource(duri, view='deep') members = dataset.xpath('./value') for n, val in enumerate(members): val.set('index', str(n)) items = data_service.query (resource_tag, tag_query=tag_query, **kw) count = len(members) for resource in items: # check if already there: found = dataset.xpath('./value[text()="%s"]' % resource.get('uri')) if len(found) == 0: val = etree.SubElement(dataset, 'value', type='object', index = str(count)) val.text =resource.get('uri') count += 1 log.debug ("members = %s" % etree.tostring (dataset)) r = data_service.update(dataset) return etree.tostring(r)
def do_notify_users(self, userlist, message): log.debug(message) variables = self.get_variables() #for users users = data_service.query(resource_type='user', wpublic='true', view='full') for u in users: variables['user_name'] = u.get('name') variables['email'] = u.get('value') variables['display_name'] = u.find( 'tag[@name="display_name"]').get('value') if variables['email'] not in userlist: continue msg = copy.deepcopy(message) msg = string.Template(msg).safe_substitute(variables) #for v,t in variables.iteritems(): # msg = msg.replace('$%s'%v, t) # send log.info('Sending message to: %s', variables['email']) log.info('Message:\n%s', msg) try: notify_service.send_mail( variables['bisque_email'], variables['email'], 'Notification from %s service' % variables['service_name'], msg, ) except Exception: log.exception("Mail not sent")
def get_counts(self, resource_type, num_days): now = datetime.now().replace(hour=23, minute=59, second=59, microsecond=0) counts = [] days = [] for i in range(num_days): d1 = now - timedelta(days=i) d2 = now - timedelta(days=i + 1) ts = ['>%s' % d2.isoformat(), '<=%s' % d1.isoformat()] days.append(d1.isoformat(' ')) # dima: some error happens in data_service and this throws try: req = data_service.query(resource_type, view='count', ts=ts, permcheck=False) log.debug( 'Daily Usage for [%s - %s] %s' % (d2.isoformat(), d1.isoformat(), etree.tostring(req))) c = req.xpath('//%s[@count]' % resource_type) if len(c) > 0: counts.append(c[0].get('count')) else: counts.append('0') except AttributeError: counts.append('0') counts.reverse() days.reverse() return counts, days
def list(self, path=None, *args, **kwargs): 'Find a resource identified by a path' log.info("list( %s )", path) resource = data_service.query('image|file', resource_value=path, wpublic='1', cache=False) return etree.tostring(resource)
def delete_links(self, resource): 'Delete link nodes for a resource' log.debug ("delete_links %s", resource.get ('resource_uniq')) # Delete the reference in the store links = data_service.query ('link', parent=False, value = resource.get ('resource_uniq'), cache=False) for link in links: log.debug ("delete_blob: delete link %s", link.get('uri')) data_service.del_resource(link, check_acl=False, check_blob=False, check_references=False)
def list_stores(username=None): if username is not None: users = [username] else: users = [x.get('name') for x in data_service.query('user', wpublic=1)] drivers = load_default_drivers() print("\n\nDrivers:\n") for n, d in drivers.iteritems(): print("%s: %s" % (n, d))
def _load_root_mount(self): "fetch the root mount and submounts" #root = data_service.query('store', resource_unid='(root)', view='full', cache=False) root = data_service.query('store', resource_unid='(root)', view='full') #root = data_service.query('store', resource_unid='(root)', view='short', cache=False) if len(root) == 1: return self._create_default_mounts(root[0]) elif len(root) == 0: raise IllegalOperation ("No root store not valid %s", etree.tostring (root)) return root[0]
def update_stores(username=None): """ Update stores to use current datadir specifications """ if username is not None: users = [username] else: users = [x.get('name') for x in data_service.query('user', wpublic=1)] drivers = load_default_drivers() for user in users: with identity.as_user(user): _update_mounts(drivers)
def init_stores(username=None): """Esnure stores are initialized with correct values and tag order """ if username is not None: users = [username] else: users = [x.get('name') for x in data_service.query('user', wpublic=1)] drivers = load_default_drivers() for user in users: with identity.as_user(user): _create_root_mount(drivers)
def remove(self, path, delete_blob=True, user=None, **kwargs): ' Delete a resource identified by path ' log.info("delete() called %s", path) if user is not None and identity.is_admin(): identity.current.set_current_user(user) resource = data_service.query("file|image", resource_value=path, wpublic='1', cache=False) for child in resource: data_service.del_resource(child) return etree.tostring(resource)
def move_stores(from_store, to_store, username=None): """ Update stores to use current datadir specifications """ if username is not None: users = [username] else: users = [x.get('name') for x in data_service.query('user', wpublic=1)] from bq.core.service import service_registry file_service = service_registry.find_service("mnt") drivers = load_default_drivers() for user in users: print("MOVING %s -> %s for %s ", from_store, to_store, user) with identity.as_user(user): file_service.move(from_store, to_store)
def _create_root_mount(drivers): 'create/find hidden root store for each user' root = data_service.query('store', resource_unid='(root)', view='full') if len(root) == 0: return _create_default_mounts(drivers) if len(root) == 1: return _create_default_mounts(drivers, root[0]) elif len(root) > 1: log.error("Root store created more than once: %s ", etree.tostring(root)) return None return root[0]
def init_classes_dataset(self): if self.training_set is None: raise ConnoisseurException(responses.BAD_REQUEST, 'Cannot initialize classes due to missing training dataset') with Locks(None, self.lockable, failonexist=True) as l: if l.locked is False: # the file is being written by another process raise ConnoisseurException(responses.LOCKED, 'The model is locked for processing by another process') try: dataset_url = ensure_url(self.training_set) adapter_gobs = self.create_adapter_gobs(model=self, image=None) classes = {} gobs = data_service.query(resource_type='value', parent=dataset_url, extract='gobject[type]') idx = 0 self.total_samples = 0 for g in gobs: k = g.get('type') n = misc.safeint(g.text, 0) if k is None: continue k = adapter_gobs.get_class_name(g) # adapt the class name, might need some change since the node is not a true gobject if k is None: continue if k not in classes: classes[k] = { 'label': k, 'id': idx, 'samples': n, } idx += 1 else: classes[k]['samples'] += n self.total_samples += n self.classes_data = classes self.classes_data_by_original_id = dict((v['id'],v) for k,v in self.classes_data.iteritems()) #log.debug('Classes data: %s', str(self.classes_data)) self.classes_model = {} self.classes_model_by_id = {} self.classes_model_by_original_id = {} self.number_classes_in_model = 0 except: self.update_with_error('status.classes.init', 'Exception during init_classes_dataset') raise # update model resource set_tag(self.resource, 'total_samples', self.total_samples) set_classes(self.resource, 'classes_data', self.classes_data) set_classes(self.resource, 'classes_model', self.classes_model) set_tag(self.resource, 'status.classes.init', 'finished') self.sync_resource()
def _create_root_mount(self): 'create/find hidden root store for each user' root = data_service.query('store', resource_unid='(root)', view='full') #root = data_service.query('store', resource_unid='(root)', view='short') if len(root) == 0: found_root= self._create_default_mounts() if len(root) == 1: found_root = self._create_default_mounts(root[0]) elif len(root) > 1: log.error("Root store created more than once: %s ", etree.tostring(root)) return None self.mapuris(found_root) return found_root
def move(self, path, destination, user=None, **kw): ' Move a resource identified by path ' log.info("move(%s,%s) %s %s", path, destination, tg.request.method, kw) if user is not None and identity.is_admin(): identity.current.set_current_user(user) # sanity check resource = etree.Element('resource', value=destination) store, driver = self.mounts.valid_store_ref(resource) if store is None: abort(400, "%s is not a valid store " % destination) resource = data_service.query("file|image", resource_value=path, wpublic='1', cache=False) for child in resource: old_store, old_driver = self.mounts.valid_store_ref(child) if old_store is None: abort(400, "%s is not a valid store " % destination) # Remove links in directory hierarchy self.mounts.delete_links(child) # Change the location child.set('value', destination) child.set('name', os.path.basename(destination)) resource = data_service.update(child) # Update the tag q1 = data_service.query('tag', parent=resource, name='filename') if len(q1): q1[0].set('value', os.path.basename(destination)) data_service.update(q1[0]) # update the links partial_path = destination.replace(driver.mount_url, '') self.mounts.insert_mount_path(store, partial_path, resource) return etree.tostring(resource)
def welcomebackground(self, **kw): log.info("BACKGROUND %s " % session) wpublic = kw.pop('wpublic', not bq.core.identity.current) thumbnail = None imageurl = None welcome_resource = config.get('bisque.background_resource', None) thumb_size = kw.get('size', '800,600') if welcome_resource: imageurl = welcome_resource try: image = data_service.get_resource(imageurl) thumbnail = '/image_service/image/%s?thumbnail=%s' % ( image.get('resource_uniq'), thumb_size) except Exception: log.exception('bisque.background (%s) set but not available' % imageurl) else: tag_query = config.get('bisque.background_query', "welcome_background:") image_count = data_service.count("image", tag_query=tag_query, wpublic=wpublic) wpublic_query = wpublic if image_count == 0 and wpublic == False: wpublic_query = True image_count = data_service.count("image", tag_query=tag_query, wpublic=wpublic_query) # None found .. pick a random if image_count == 0: image_count = data_service.count("image", wpublic=wpublic) tag_query = None wpublic_query = wpublic if image_count: im = random.randint(0, image_count - 1) image = data_service.query('image', tag_query=tag_query, wpublic=wpublic_query, offset=im, limit=1)[0] #imageurl = self.viewlink(image.attrib['uri']) thumbnail = '/image_service/image/%s?thumbnail=%s' % ( image.get('resource_uniq'), thumb_size) redirect(base_url=thumbnail)
def delete_blob(self, resource): 'Delete elements for a resource' log.debug("delete_blob %s", resource.get('resource_uniq')) self.delete_links(resource) store, driver = self._find_store(resource) if store is None: log.warn('Not a valid store ref in %s', etree.tostring(resource)) return None if driver.readonly: log.warn("Delete blob on readonly store.. skipping") return None with driver as driver: uniq = resource.get('resource_uniq') bloburls = resource.get('value') if bloburls is None: bloburls = [x.text for x in resource.xpath('value')] elif bloburls: bloburls = [bloburls] else: bloburls = [] log.debug("fetch_blob %s -> %s", resource.get('resource_uniq'), bloburls) # If image has subimages then check for other subimage otherwise for image itself. file_query, sub = split_subpath(bloburls[0]) #if len(sub)>0: file_query = '%s*' % file_query # sanity check . ensure exactly one reference to store url before delete # Since storeurl can contain '#' marks for series files # What about directory URL.. this may match too many blobrefs = data_service.query(parent=False, value=file_query, cache=False) if len(blobrefs) < 2: for storeurl in bloburls: driver.delete(storeurl) else: log.warn("blob delete skipped >2 refs %s", str(blobrefs))
def resource_acl_query(resource, user_uniq=None, recurse=False, filter_resource_type=None, response=None): """Query a set of resource acl """ log.debug("ACL_QUERY %s %s", resource, user_uniq) if response is None: response = etree.Element('response') query = DBSession.query(TaggableAcl) if resource: query = query.filter(TaggableAcl.taggable_id == resource.id) if user_uniq: query = query.filter(TaggableAcl.user_id == Taggable.id, Taggable.resource_uniq == user_uniq) for auth in query: log.debug("Found %s with user %s", auth, auth.user) response.append(_aclelem(auth.resource, auth.user, auth)) if recurse: # look for filter_resource have a value of resource uniq related = data_service.query(resource_type=recurse, tag_query="*%s*" % auth.resource.resource_uniq) # For given a given resource return the list non-empty # auth records of associated resources (all the mexes # that points to the current resource) for relation in related: #resource = DBSession.query (Taggable).filter_by (resource_uniq = relation.get ('resource_uniq')).first() rq = DBSession.query(TaggableAcl).filter( TaggableAcl.taggable_id == Taggable.id, TaggableAcl.user_id == auth.user_id, Taggable.resource_uniq == relation.get('resource_uniq')).first() log.debug("LOADED %s %s", recurse, resource) if rq is not None: # Nested response relation.append(_aclelem(rq.resource, rq.user, rq)) response.append(relation) #resource_acl_query (resource, auth.user.resource_uniq, response=relation) # flat response #resource_acl_query (resource, auth.user.resource_uniq, response=response) return response
def _add_resource_inputs_outputs(xnode, edges, checked, unchecked): """ For the given xnode, find all other nodes that are connected to it by direct edges. For MEX type, input is all links in "inputs" section, output is all links in "outputs" section. For other types, input is all MEXs with it in "outputs" section, output is all MEXs with it in "inputs" section. Inputs: any ref in top "inputs" section without self-references Outputs: any ref in top "outputs" section without self-references or input references """ node = xnode.get('resource_uniq') if xnode.tag == 'mex': points_from_list = [ x.rsplit('/', 1)[1] for x in xnode.xpath('./tag[@name="inputs"]/tag/@value') if x.startswith("http") ] points_to_list = [ x.rsplit('/', 1)[1] for x in xnode.xpath('./tag[@name="outputs"]/tag/@value') if x.startswith("http") ] else: points_from_list = [] points_to_list = [] # TODO: the following will be very slow on large DBs... change to new query in 0.6! mexes_ref_node = data_service.query('mex', tag_query='"http*/%s"' % node, cache=False) for mex_ref_node in mexes_ref_node: mex_deep = data_service.resource_load( uniq=mex_ref_node.get('resource_uniq'), view='full') if mex_deep: found_in_inputs = False inputs_tag = mex_deep.xpath('./tag[@name="inputs"]') if inputs_tag: input_id = inputs_tag[0].get('uri') input_deep = data_service.get_resource(resource=input_id, view='full,clean') if input_deep and len( input_deep.xpath( './tag[@value="%s"]' % xnode.get("uri"))) > 0: # found node in MEX's inputs points_to_list.append( mex_ref_node.get('resource_uniq')) found_in_inputs = True if not found_in_inputs: outputs_tag = mex_deep.xpath('./tag[@name="outputs"]') if outputs_tag: output_id = outputs_tag[0].get('uri') output_deep = data_service.get_resource( resource=output_id, view='full,clean') if output_deep and len( output_deep.xpath('./tag[@value="%s"]' % xnode.get("uri"))) > 0: # found node in MEX's outputs points_from_list.append( mex_ref_node.get('resource_uniq')) # add edge unless it points to mex recursively points_from_list = [ x for x in points_from_list if is_uniq_code(x) and x != node ] # add edge unless it points to mex recursively or back to an input points_to_list = [ x for x in points_to_list if is_uniq_code(x) and x != node and x not in points_from_list ] log.debug("points_to_list %s", points_to_list) log.debug("points_from_list %s", points_from_list) for xlink in points_from_list: if (xlink, node) not in edges: log.debug("ADDING IN EDGE : %s" % str((xlink, node))) edges.add((xlink, node)) if xlink not in checked: unchecked.add(xlink) for xlink in points_to_list: if (node, xlink) not in edges: log.debug("ADDING OUT EDGE : %s" % str((node, xlink))) edges.add((node, xlink)) if xlink not in checked: unchecked.add(xlink)
def fileInfoList(self, fileList, datasetList, urlList, dirList): log.debug('fileInfoList fileList: %s' % fileList) log.debug('fileInfoList datasetList: %s' % datasetList) log.debug('fileInfoList urlList: %s' % urlList) log.debug('fileInfoList dirList: %s' % dirList) flist = [] fileHash = {} # Use a URI hash to look out for file repetitions def fileInfo(relpath, uri, index=0): xml = data_service.get_resource(uri, view='deep,clean') if xml is None: log.warn('skipping unreadable uri %s', uri) return None name = xml.get('name') uniq = xml.get('resource_uniq', None) # try to figure out a name for the resource if not name: name = xml.xpath('./tag[@name="filename"]') or xml.xpath( './tag[@name="name"]') name = name and name[0].get('value') if not name and uniq: name = uniq[-4] if not name: name = str(index) path = None files = None if uniq is not None: #del xml.attrib['resource_uniq'] # dima: strip resource_uniq from exported xml b = blob_service.localpath(uniq) if b: files = b.files if files is not None and len(files) > 0: path = files[0] else: path = b.path if path and not os.path.exists(path): path = None else: log.warn("Resource %s ( %s ) did not have blob", uniq, xml.tag) # if resource is just an XML doc content = None if path is None: content = etree.tostring(xml) name = '%s_%s' % (name, uniq) xml = None # disambiguate file name if present ext = '' if path is not None else '.xml' outpath = os.path.join(relpath, '%s%s' % (name, ext)).replace('\\', '/') if outpath in fileHash: fname, ext = os.path.splitext(name) name = '%s%s%s' % (fname, uniq, ext) outpath = os.path.join(relpath, '%s%s' % (name, ext)).replace( '\\', '/') fileHash[outpath] = name if files is None or len(files) < 2: return [{ 'xml': xml, 'content': content, 'name': name, 'uniq': uniq, 'path': path, 'relpath': relpath, 'outpath': outpath, }] log.debug('fileInfo name: %s, path: %s, relpath: %s, outpath: %s', name, path, relpath, outpath) log.debug('fileInfo files: %s', files) # find minimum relative path min_length = sys.maxint for f in files: min_length = min(min_length, len(os.path.dirname(f))) minpath = files[0][:min_length + 1] log.debug('fileInfo minpath: %s', minpath) # check if file disimbiguation is needed subpath = files[0][min_length + 1:] outpath = os.path.join(relpath, name, subpath).replace('\\', '/') if outpath in fileHash: name = '%s.%s' % (name, uniq) outpath = os.path.join(relpath, name, subpath).replace('\\', '/') fileHash[outpath] = name infos = [] first = True for f in files: subpath = f[min_length + 1:] info = { 'name': os.path.basename(f), 'uniq': uniq, 'path': f, 'relpath': relpath, 'outpath': os.path.join(relpath, name, subpath).replace('\\', '/'), 'subpath': subpath.replace('\\', '/'), } if first is True: first = False info['xml'] = xml info['content'] = content infos.append(info) log.debug('fileInfo infos: %s', infos) return infos def xmlInfo(finfo): if len(finfo) == 1: finfo = finfo[0] file = finfo.copy() file['outpath'] = '%s.xml' % file['outpath'] # need to modify the resource value to point to a local file #file['xml'].set('value', os.path.basename(file['xml'].get('value', ''))) file['xml'].set('value', finfo['name']) file['content'] = etree.tostring(file['xml']) del file['path'] del file['xml'] return file else: file = finfo[0].copy() file['outpath'] = '%s.xml' % file['outpath'] i = 0 for v in file['xml'].xpath('value'): v.text = finfo[i]['subpath'] i += 1 file['content'] = etree.tostring(file['xml']) del file['path'] del file['xml'] return file def urlInfo(url, index=0): #httpReader = httplib2.Http( disable_ssl_certificate_validation=True) #httpReader = requests # This hack gets around bisque internal authentication mechanisms # please refer to http://biodev.ece.ucsb.edu/projects/bisquik/ticket/597 headers = dict((name, request.headers.get(name, '')) for name in ['Authorization', 'Mex', 'Cookie'] if name in request.headers) # test if URL is relative, httplib2 does not fetch relative if urlparse.urlparse(url).scheme == '': url = urlparse.urljoin(config.get('bisque.root'), url) log.debug('ArchiveStreamer: Sending %s with %s' % (url, headers)) response = requests.get(url, headers=headers) #pylint: disable=no-member if not response.status_code == requests.codes.ok: log.error("URL request returned %s", response.status_code) return None items = response.headers.get('content-disposition', '').split(';') fileName = str(index) + '.' log.debug('Respose headers: %s', response.headers) log.debug('items: %s' % items) for item in items: pair = item.split('=') if (pair[0].lower().strip() == 'filename'): fileName = pair[1].strip('"\'') if (pair[0].lower().strip() == 'filename*'): try: fileName = pair[1].strip('"\'').decode('utf8') except UnicodeDecodeError: pass return dict(name=fileName, content=response.content, outpath=fileName) # processing a list of resources if len(fileList) > 0: for index, uri in enumerate(fileList): finfo = fileInfo('', uri) if finfo is None: continue flist.extend(finfo) if self.export_meta is True and finfo[0].get( 'xml') is not None: flist.append(xmlInfo(finfo)) # find all mexs that use this resource explicitly # dima: we'll not get any second level mexs # mexs that use mexs, will need closure query in the db for that if self.export_mexs: mexq = data_service.query( 'mex', tag_query=finfo[0]['xml'].get('uri')) members = mexq.xpath('//mex') for m in members: uri = m.get('uri') flist.extend(fileInfo('', uri)) # processing a list of datasets if len(datasetList) > 0: for uri in datasetList: dataset = data_service.get_resource(uri, view='deep,clean') name = dataset.xpath('/dataset/@name')[0] members = dataset.xpath('/dataset/value') uniq = dataset.get('resource_uniq', '') #del dataset.attrib['resource_uniq'] # dima: strip resource_uniq from exported xml for index, member in enumerate(members): finfo = fileInfo(name, member.text, index) if finfo is None: continue finfo[0]['dataset'] = name flist.extend(finfo) # update reference in the dataset xml if self.export_meta is True and finfo[0].get( 'xml') is not None: flist.append(xmlInfo(finfo)) member.text = '%s.xml' % finfo[0].get('outpath', '') else: member.text = finfo[0].get('outpath', '') if self.export_meta: # disambiguate file name if present name = '%s.xml' % name if name in fileHash: fname, ext = os.path.splitext(name) name = '%s%s%s' % (fname, uniq, ext) fileHash[name] = name # Insert dataset XML into file list flist.append( dict(name=name, content=etree.tostring(dataset), outpath=name)) # processing a list of directories if len(dirList) > 0: for uri in dirList: # read dir from blob storage, dima: need to access blob storage folder = data_service.get_resource(uri, view='deep') members = folder.xpath('//link') for index, member in enumerate(members): # dima: need to compute proper URI uniq = member.get('value', None) uri = '/data_service/%s' % uniq # compute URI from uniq, dima: does not work today: 403 forbidden # compute path for each link by traversing up the tree folder = [ ] # relative path to the resource from currently selected dir with no trailing slash parent = member while parent is not None: parent = parent.xpath('..') parent = parent[0] if len(parent) > 0 else None if parent is not None: folder.append(parent.get('name', None)) folder.reverse() finfo = fileInfo('/'.join(folder), uri, index) if finfo is None: continue flist.extend(finfo) if self.export_meta is True and finfo[0].get( 'xml') is not None: flist.append(xmlInfo(finfo)) # processing a list of URLs if len(urlList) > 0: for index, url in enumerate(urlList): if fileHash.get(url) != None: continue else: fileHash[url] = 1 finfo = urlInfo(url, index) flist.append(finfo) return flist
def _create_full_path(self, store, path, resource_uniq=None, resource_name=None, count=1, **kw): """Create the full path relative to store @param store: a string name or etreeElement @param path: a path relative to the store @param resource_uniq: optional resource to be placed @param resource_name: options name of resource """ if isinstance(path, basestring): path = path.split('/') path = list(path) root = None log.debug("CREATE_PATH %s %s", store, path) if isinstance(store, basestring): resource = root = etree.Element('store', name=store, resource_unid=store) store = self._load_root_mount() # Scan path for all existing directories and possible existing filelink parent = store while parent is not None and path: el = path.pop(0) if not el: continue #q = data_service.query(parent=parent, resource_unid=el, view='full', cache=False) q = data_service.query(parent=parent, resource_unid=el, view='short', cache=False) if len(q) == 0: # no element we are done path.insert(0, el) break if len(q) > 1: log.error('multiple names (%s) in store level %s', el, q.get('uri')) #path.insert(0, el) parent = q[0] return None #break # return Fail? # len(q) == 1 .. we have a result .. just keep searching down the path parent2 = parent parent = q[0] # directories do not exists len(path) > 1 # directories exist but filelink does not len(path) == 1 # directories and filelink exists len(path) == 0 log.debug("create: at %s rest %s", (parent is not None) and parent.get('uri'), path) while len(path) > 1: # any left over path needs to be created nm = path.pop(0) if root is None: resource = root = etree.Element('dir', name=nm, resource_unid=nm) else: resource = etree.SubElement(resource, 'dir', name=nm, resource_unid=nm) # The last element might be dir or a link if len(path) == 1: nm = resource_name or path.pop(0) #nm = path.pop(0) if root is None: resource = root = etree.Element( 'link' if resource_uniq else 'dir', name=nm, resource_unid=nm) else: resource = etree.SubElement(resource, 'link' if resource_uniq else 'dir', name=nm, resource_unid=nm) if resource_uniq: resource.set('value', resource_uniq) # create the new resource log.debug("New resource %s at %s ", etree.tostring(root), (parent is not None) and parent.get('uri')) q = data_service.new_resource(resource=root, parent=parent, flush=False) elif len(path) == 0: log.warn("NAME conflict? %s %s", resource_name, count) # Conflict in link name? Possible when mutltfile name has not be disambiguated by underlying filesystem nm = "%s-%s" % (resource_name, count) if root is None: resource = root = etree.Element( 'link' if resource_uniq else 'dir', name=nm, resource_unid=nm) else: resource = etree.SubElement(resource, 'link' if resource_uniq else 'dir', name=nm, resource_unid=nm) if resource_uniq: resource.set('value', resource_uniq) q = data_service.new_resource(resource=root, parent=parent2, flush=False) return q
def _default(self, *path, **kw): path = list(path) res_uniq = path[0] query = path[0] # if path is of format "<resource_uniq>/<multi_node_id>", return the contents of the multi node as a "virtual" dataset if len(path) > 1 and path[1].isdigit(): multi_node_id = "%s/%s" % (path[0], path[1]) res_uniq = "%s-%s" % (path[0], path[1]) else: multi_node_id = None extra = path[-1] if len(path) > 1 and not path[-1].isdigit( ) else None # could be 'auth', 'value', or 'tag' view = kw.pop('view', 'short') offset = kw.pop('offset', 0) limit = kw.pop('limit', 1000) tag_query = kw.pop('tag_query', None) tag_order = kw.pop('tag_order', None) extract = kw.pop('extract', None) request_url = request.url if extra == 'auth' or extra == 'tag': # no sharing etc and no tags response = etree.Element('resource', uri=request_url) return etree.tostring(response) big_types = ( 'table', 'image', 'file' ) # types with many instances (treat as identical for collapsing) nodes = set() edges = set() resources = set() datasets = {} # dataset uniq -> [ member id, member id, ... ] checked = set() unchecked = set() unchecked.add(query) query_node = None while unchecked: log.debug("graph unchecked %s", unchecked) node = unchecked.pop() # Find everybody this node references: xnode = data_service.resource_load(uniq=node, view='short') if xnode is None: log.error('could not load %s', node) edges = set([ (n1, n2) for (n1, n2) in edges if n1 != node and n2 != node ]) # remove edges from/to node continue node_type = xnode.tag if node_type == 'resource': node_type = xnode.get('resource_type') or xnode.tag if node_type.startswith('mex') or node_type.startswith('dataset'): xnode = data_service.resource_load( uniq=node, view='deep' ) # TODO: this is too expensive... replace with 0.6 query! if node_type not in big_types: node_type = "%s(%s)" % (node_type, xnode.get( 'name', 'unknown')) nodes.add((node, node_type)) if node == query: query_node = (node, node_type) checked.add(node) # find all inputs/outputs to this node and update edges and unchecked _add_resource_inputs_outputs(xnode, edges, checked, unchecked) # add dataset reference information if node_type.startswith('mex'): # treat Mex's submex outputs as a dataset datasets[node] = [ x.rsplit('/', 1)[1] for x in xnode.xpath( './mex/tag[@name="outputs"]/tag/@value') if x.startswith('http') ] else: if node_type.startswith('dataset'): datasets[node] = [ x.rsplit('/', 1)[1] for x in xnode.xpath("./value/text()") ] else: resources.add(node) log.debug("pre-summary Nodes : %s, Edges : %s" % (nodes, edges)) # check if any resource is member of a dataset => add membership link members = set() for node in resources: for dataset in datasets: if node in datasets[dataset]: members.add((node, dataset)) # summarize graph (nodes, edges, members) = _summarize_nodes(query_node, nodes, edges, members, summ_by_types=True) log.debug("post-summary Nodes : %s, Edges : %s" % (nodes, edges)) if multi_node_id: # caller wants specific multi node back as dataset response = etree.Element( 'dataset', uri=request_url, resource_uniq=res_uniq ) # data_service.uri() + ('vd-%s' % multi_node_id)) if view != 'short' or extra == 'value': if extra == 'value': # ask data_service to expand all elements for node in nodes: if isinstance( node, SummaryNode) and node.value == multi_node_id: response = data_service.query( cache=False, resource_uniq='|'.join(node.node_ids), view=view, offset=offset, limit=limit, tag_query=tag_query, tag_order=tag_order, extract=extract) response.set('uri', request_url) break else: for node in nodes: if isinstance( node, SummaryNode) and node.value == multi_node_id: response.set('name', node.type) idx = 0 for node_id in node.node_ids: el = etree.SubElement(response, 'value', type='object', index=str(idx)) el.text = data_service.uri() + node_id idx += 1 else: response = etree.Element('graph', value=query) if view == 'count': for node in nodes: if isinstance(node, SummaryNode): response = etree.Element('resource') etree.SubElement(response, 'resource', count=str(len(node.node_ids))) break elif view != 'short': for node in nodes: if isinstance(node, SummaryNode): etree.SubElement(response, 'node', value=str(node.value), type=node.type, count=str(len(node.node_ids))) else: etree.SubElement(response, 'node', value=node[0], type=node[1]) node_uniqs = [ n[0] if not isinstance(n, SummaryNode) else n.value for n in nodes ] for edge in edges: if edge[0] in node_uniqs and edge[1] in node_uniqs: etree.SubElement(response, 'edge', value="%s:%s" % edge) else: log.error("Skipping edge %s due to missing nodes", edge) for edge in members: if edge[0] in node_uniqs and edge[1] in node_uniqs: etree.SubElement(response, 'member', value="%s:%s" % edge) else: log.error("Skipping edge %s due to missing nodes", edge) return etree.tostring(response)
def run(self): load_config(self.options.config) from lxml import etree from tg import config, session, request from bq import data_service from bq.core.identity import set_admin_mode import transaction load_bisque_services() prefs = config_path('preferences.xml') set_admin_mode(True) if self.args[0].startswith('init'): x = data_service.query('system') if len(x): if self.options.force: print("deleting current system object") data_service.del_resource(x[0]) else: print("NO ACTION: System object initialized at %s " % etree.tostring(x[0])) sys.exit(1) if os.path.exists(prefs): if self.options.force: print("deleting %s" % prefs) os.remove(prefs) else: print('NO ACTION: %s exists.. cannot init' % prefs) sys.exit(1) system = etree.parse( defaults_path('preferences.xml.default')).getroot() for el in system.getiterator(tag=etree.Element): el.set('permission', 'published') system = data_service.new_resource(system, view='deep') elif self.args[0].startswith('read'): system = data_service.query('system', view='deep') if len(system): system = system[0] else: system = None else: if not os.path.exists(prefs): print "Need %s" % prefs return system = etree.parse(prefs).getroot() # Esnure all elements are published for el in system.getiterator(tag=etree.Element): el.set('permission', 'published') # Read system object uri = system.get('uri') print 'system = %s' % etree.tostring(system) system = data_service.update_resource(new_resource=system, resource=uri, view='deep') print etree.tostring(system) transaction.commit() if system is not None: with open(prefs, 'w') as f: f.write(etree.tostring(system, pretty_print=True)) print "Wrote %s" % prefs
def _update_mounts(drivers): update = False user_name = identity.current.user_name user_root = data_service.query('store', resource_unid='(root)', view='full') if len(user_root) == 0: log.warn("No store found") elif len(user_root) == 1: user_root = user_root[0] elif len(user_root) > 1: log.error( "Root store created more than once for %s: %s please check DB", user_name, etree.tostring(user_root)) return None user_stores = dict((x.get('name'), x) for x in user_root.xpath('store')) storeorder = get_tag(user_root, 'order') if storeorder is None: log.warn("order tag missing from root store adding") storeorder = etree.SubElement(user_root, 'tag', name='order', value=','.join(drivers.keys())) update = True elif len(storeorder) == 1: storeorder = storeorder[0] storelist = ','.join(drivers.keys()) if storeorder.get('value') != storelist: storeorder.set('value', storelist) update = True for store_name, driver in drivers.items(): if store_name not in user_stores: print("Need to create new store : %s" % store_name) mount_path = string.Template(driver['mounturl']).safe_substitute( datadir=data_url_path(), user=user_name) etree.SubElement(user_root, 'store', name=store_name, resource_unid=store_name, value=config2url(mount_path)) update = True continue store = user_stores[store_name] mounturl = driver.get('mounturl') mounturl = string.Template(mounturl).safe_substitute( datadir=data_url_path(), user=user_name) # ensure no $ are left mounturl = mounturl.split('$', 1)[0] mounturl = config2url(mounturl) store_value = store.get('value') print("examining store %s with %s" % (store_name, store_value)) if store_value is None or store_value != mounturl: print("Updating store with value %s to %s" % (store_value, mounturl)) store.set('value', mounturl) update = True if update: return data_service.update(user_root, new_resource=user_root, replace=False, view='full')