예제 #1
0
 def get_request(self, url, setmode):
     request = data_service.get_resource(url, view='deep')
     # if the resource is a dataset, fetch contents of documents linked in it
     if request.tag == 'dataset' and not setmode:
         members_uri = request.get('uri')
         if members_uri is not None:
             request = data_service.get_resource('%s/value' % members_uri,
                                                 view='deep')
     return request
예제 #2
0
 def _load_mount_path(self, store_name, path, **kw):
     """load a store resource from store
     """
     log.debug("load_mount_path : %s %s", store_name, path)
     path = list(path)  # make a copy to leave argument alone
     view = kw.pop('view', 'full')
     q = self._load_store(
         store_name)  # This load is not a full load but may be a short load
     #log.debug ('ZOOM %s', q.get ('uri'))
     while q is not None and path:
         el = path.pop(0)
         el = urllib.unquote(el)
         #q = data_service.query(parent=q, resource_unid=el, view='full', )
         q = data_service.query(
             parent=q,
             resource_unid=el,
             view='short',
         )
         if len(q) != 1:
             log.error('multiple names (%s) in store level %s', el,
                       q.get('uri'))
             return None
         q = q[0]
     if q is not None and (kw or len(q)) == 0:
         # might be limitin result
         log.debug("loading with %s view=%s and %s", q, view, kw)
         q = data_service.get_resource(q, view=view, **kw)
     return q
예제 #3
0
    def _validate_store_update(self, storename, storexml):
        "Allow only the credential tag to be modified on store"
        try:
            storeel = etree.XML(storexml)
        except etree.ParseError:
            log.error("bad storexml for update %s", storexml)
            return
        store = self._load_store(storename)
        # What constitutes a valid update?
        # 1.  Only admins can create new top level stores and they
        #  should be user specific (this is done using the site.cfg
        #  currently)
        if store is None:
            log.warn(
                "attempting modify non-existent store %s.. please add new store templates to site.cfg",
                storename)
            return None
        # 2. User can edit substores but may not change any attributes (only tags)
        if storeel.tag != 'tag' or storeel.get(
                'name') != 'credentials':  # could be posting a tag
            log.warn("invalid store resource (use tag[credentials]) %s",
                     storexml)
            return None
        store = data_service.get_resource(store, view="full")
        credtag = get_tag(store, 'credentials')
        if credtag is None:
            store.append(storeel)
        else:
            credtag[0].set('value', storeel.get('value'))

        return data_service.update_resource(store,
                                            new_resource=store,
                                            replace=False,
                                            view='full')
예제 #4
0
 def convert_node(self, node, kml, cnvf=None):
     #log.debug('convert_node: %s', etree.tostring(node))
     if node is None:
         return
     if node.tag in gobject_primitives or node.tag == 'gobject' and node.get('type') in gobject_primitives:
         self.render_gobjects(node, kml, node.tag, cnvf=cnvf)
     elif node.tag == 'gobject' and len(node)==1: # special case of a gobject wrapper of a primitive
         self.render_gobjects(node[0], kml, node.get('type'), node.get('name'), cnvf=cnvf)
     elif node.tag == 'tag':
         # skip any tags, they were added beforehand
         pass
     elif node.tag == 'dataset':
         # geojson does not have hierarchical elements, dum all as a flat list
         #folder = self.render_resouces(node, kml)
         vals = node.xpath('value')
         for v in vals:
             n = data_service.get_resource(v.text, view='deep')
             self.convert_node(n, kml, cnvf=cnvf)
     elif node.tag not in ['vertex', 'value']: # any other node type is a folder
         if node.tag == 'image':
             # load metadata and create coordinate transformation function
             cnvf = self.create_transform_function(node)
         # geojson does not have hierarchical elements, dum all as a flat list
         #folder = self.render_resouces(node, kml)
         if len(node) > 0:
             for n in node:
                 self.convert_node(n, kml, cnvf=cnvf)
예제 #5
0
 def delete(self, duri, **kw):
     # remove all member
     log.info ("dataset: delete members of %s to " , duri)
     dataset = data_service.get_resource(duri, view='full')
     members = dataset.xpath('./value')
     data_service.del_resource(dataset)
     return self.iterate (operation='delete', dataset=dataset, members=members, **kw)
예제 #6
0
    def add_query(self, duri, resource_tag, tag_query, **kw):
        """Append query results to a dataset

        @param duri: dataset uri of an existing dataset
        @param resource_tag:resource type tag i.e. images
        @param tag_query:  expression of tag search
        """
        log.info ("dataset: addquery members of %s tag %s query %s " , duri, resource_tag, tag_query)

        dataset = data_service.get_resource(duri, view='deep')
        members = dataset.xpath('./value')
        for n, val in enumerate(members):
            val.set('index', str(n))

        items = data_service.query (resource_tag, tag_query=tag_query, **kw)
        count = len(members)
        for resource in items:
            # check  if already there:
            found = dataset.xpath('./value[text()="%s"]' % resource.get('uri'))
            if len(found) == 0:
                val = etree.SubElement(dataset, 'value', type='object', index = str(count))
                val.text =resource.get('uri')
                count += 1


        log.debug ("members = %s" % etree.tostring (dataset))
        r = data_service.update(dataset)
        return etree.tostring(r)
예제 #7
0
    def action(self, member, action, tagdoc, **kw):
        """Modify the tags of the member
        @param member: the memeber of the dataset
        @param action: a string :append, delete, edit_value, edit_name, change_name
        @poarag tagdoc
        """
        member = member.text
        if isinstance(tagdoc, basestring):
            tagdoc = etree.XML(tagdoc)

        log.debug ('TagEdit (%s) %s with %s' % (action, member, etree.tostring(tagdoc)))
        # These update operation should be done in the database
        # However, I don't want to think about it now
        # so here's the brute-force way
        if action=="append":
            resource = data_service.get_resource(member, view='short')
            resource.append(tagdoc)
            data_service.update(resource)
        elif action=='delete':
            resource = data_service.get_resource(member, view='full')
            for tag in tagdoc.xpath('./tag'):
                resource_tags = resource.xpath('./tag[@name="%s"]' % tag.get('name'))
                for killtag in resource_tags:
                    data_service.del_resource(killtag.get('uri'))
        elif action=='edit_value':
            resource = data_service.get_resource(member, view='full')
            for tag in tagdoc.xpath('./tag'):
                resource_tags = resource.xpath('./tag[@name="%s"]' % tag.get('name'))
                for mtag in resource_tags:
                    mtag.set('value', tag.get('value'))
            data_service.update(resource)
        elif action=='edit_name':
            resource = data_service.get_resource(member, view='full')
            for tag in tagdoc.xpath('./tag'):
                resource_tags = resource.xpath('./tag[@value="%s"]' % tag.get('value'))
                for mtag in resource_tags:
                    mtag.set('name', tag.get('name'))
            data_service.update(resource)
        elif action=='change_name':
            resource = data_service.get_resource(member, view='full')
            for tag in tagdoc.xpath('./tag'):
                resource_tags = resource.xpath('./tag[@name="%s"]' % tag.get('name'))
                for mtag in resource_tags:
                    mtag.set('name', tag.get('value'))
            data_service.update(resource)

        return None
예제 #8
0
 def action(self, member, permission):
     member = member.text
     log.debug('permission action %s' % member)
     resource = data_service.get_resource(member, view='short')
     log.debug('GOT %s' % etree.tostring (resource))
     resource.set('permission',  permission)
     data_service.update(resource)
     return None
예제 #9
0
    def _get(self, path, **kw):
        """ GET from a path /store_name/d1/d2/
        """
        log.info("GET %s with %s", path, kw)
        origview = kw.pop('view', 'short')
        value = None
        # Some crazy hanlding when de-referencing
        if len(path) and path[-1] == 'value':
            value = path.pop()
            #view = 'query'
            view = 'full'
            origkw = kw
            kw = {}
        else:
            view = 'full'
        # woops just want a list of stores.. index does that
        if len(path) == 0:
            return self.index()
        store_name = path.pop(0)

        q = self._load_mount_path(store_name=store_name,
                                  path=path,
                                  view=view,
                                  **kw)
        if q is None:
            log.warn("could not load store/path %s/%s", store_name, path)
            #abort (404, "bad store path %s" % path)
            return '<resource/>'
        # crazy value handling (emulate limit and offset)
        if value is not None:
            limit = origkw.pop('limit', None)
            offset = int(origkw.pop('offset', 0))
            resp = etree.Element('resource')
            for el in q[offset:limit and (int(limit) + offset)]:
                if el.tag == 'link':
                    r = data_service.get_resource(el.get('value'),
                                                  view=origview,
                                                  **origkw)
                    if r is not None:
                        resp.append(r)
                    else:
                        log.warn('element %s was not fetched',
                                 etree.tostring(el))
                else:
                    resp.append(el)
            # recipe for sorting trees from http://stackoverflow.com/questions/8385358/lxml-sorting-tag-order
            for parent in resp.xpath('//*[./*]'):  # Search for parent elements
                parent[:] = sorted(parent,
                                   key=lambda x: x.get('name', None) or '')
            #resp.sort (key = lambda x: x.get ('name'))
            q = resp

        fullpath = ['', 'blob_service', 'store', store_name]
        fullpath.extend(path)
        self.mapuris(q, top="/".join(fullpath))

        return etree.tostring(q)
예제 #10
0
    def welcomebackground(self, **kw):
        log.info("BACKGROUND %s " % session)
        wpublic = kw.pop('wpublic', not bq.core.identity.current)
        thumbnail = None
        imageurl = None
        welcome_resource = config.get('bisque.background_resource', None)
        thumb_size = kw.get('size', '800,600')
        if welcome_resource:
            imageurl = welcome_resource
            try:
                image = data_service.get_resource(imageurl)
                thumbnail = '/image_service/image/%s?thumbnail=%s' % (
                    image.get('resource_uniq'), thumb_size)
            except Exception:
                log.exception('bisque.background (%s) set but not available' %
                              imageurl)
        else:
            tag_query = config.get('bisque.background_query',
                                   "welcome_background:")
            image_count = data_service.count("image",
                                             tag_query=tag_query,
                                             wpublic=wpublic)
            wpublic_query = wpublic
            if image_count == 0 and wpublic == False:
                wpublic_query = True
                image_count = data_service.count("image",
                                                 tag_query=tag_query,
                                                 wpublic=wpublic_query)
            # None found .. pick a random
            if image_count == 0:
                image_count = data_service.count("image", wpublic=wpublic)
                tag_query = None
                wpublic_query = wpublic
            if image_count:
                im = random.randint(0, image_count - 1)
                image = data_service.query('image',
                                           tag_query=tag_query,
                                           wpublic=wpublic_query,
                                           offset=im,
                                           limit=1)[0]
                #imageurl = self.viewlink(image.attrib['uri'])
                thumbnail = '/image_service/image/%s?thumbnail=%s' % (
                    image.get('resource_uniq'), thumb_size)

        redirect(base_url=thumbnail)
예제 #11
0
    def create_sample_db(self):
        if self.training_set is None:
            raise ConnoisseurException(responses.BAD_REQUEST, 'Cannot create sample DB due to missing training dataset')

        with Locks(None, self.lockable, failonexist=True) as l:
            if l.locked is False: # the file is being written by another process
                raise ConnoisseurException(responses.LOCKED, 'The model is locked for processing by another process')

            dataset_url = ensure_url(self.training_set)
            dataset = data_service.get_resource(dataset_url, view='full')
            if (dataset is None or dataset.tag) != 'dataset':
                raise ConnoisseurException(responses.BAD_REQUEST, 'Provided resource is not a dataset')

            self.training_set_timestamp = dataset.get('ts')
            set_tag(self.resource, 'training_set_timestamp', self.training_set_timestamp)

            images = []
            refs = dataset.xpath('value[@type="object"]')
            for r in refs:
                images.append(XImage(base_url=r.text))

            self.total_images = len(images)
            set_tag(self.resource, 'total_images', self.total_images)

            self.update_status(status='Creating sample db')

            # dima: this should be parallelized
            #r = self.framework.create_sample_db(images)
            log.info('STARTING samples:init for %s images', self.total_images)
            try:
                for i,image in enumerate(images):
                    log.info('PROCESSING samples:init %s/%s for %s', i, self.total_images, image)
                    self.framework.update_sample_db(image)
            except:
                self.update_with_error('status.samples.init', 'Exception during create_sample_db')
                raise
            log.info('FINSHED samples:init for %s images', self.total_images)

            set_classes(self.resource, 'classes_model', self.classes_model)
            set_tag(self.resource, 'status.samples.init', 'finished')
            self.sync_resource(status='finished')
            return r
예제 #12
0
def iterate(duri=None, operation='idem', dataset=None, members = None, last= False, **kw):
    """Iterate over a dataset executing an operation on each member

    @param  duri: dataset uri
    @param operation: an operation name (i.e. module, permisssion)
    @param kw : operation parameters by name
    """

    log.info('iterate op %s on  %s' , operation, duri or dataset.get('uri'))
    if dataset is None:
        dataset = data_service.get_resource(duri, view='full')
    if members is None:
        members = dataset.xpath('/dataset/value')

    op_klass  = DatasetServer.operations.get(operation, IdemOp)
    op = op_klass(duri, dataset=dataset, members = members)

    #mex = module_service.begin_internal_mex ("dataset_iterate")

    log.debug ("%s on  members %s" , str( op ),  [ x.text for x in members ] )
    results = etree.Element('resource', uri=request.url)
    if last:
        last_member = members[-1]
        members = members[:-1]

    for val in members:
        result =  op(member = val, **kw)
        log.debug ("%s on %s -> %s" , operation, val.text, result )
        if result is not None:
            results.append (result)

    if last:
        result =  op(member = last_member, last = True, **kw)
        log.debug ("%s on %s -> %s" , operation, last_member.text, result )
        if result is not None:
            results.append (result)

    return etree.tostring(results)
예제 #13
0
def _add_resource_inputs_outputs(xnode, edges, checked, unchecked):
    """
    For the given xnode, find all other nodes that are connected to it by direct edges.
    For MEX type, input is all links in "inputs" section, output is all links in "outputs" section.
    For other types, input is all MEXs with it in "outputs" section, output is all MEXs with it in "inputs" section.
    
    Inputs: any ref in top "inputs" section without self-references
    Outputs: any ref in top "outputs" section without self-references or input references
    """
    node = xnode.get('resource_uniq')
    if xnode.tag == 'mex':
        points_from_list = [
            x.rsplit('/', 1)[1]
            for x in xnode.xpath('./tag[@name="inputs"]/tag/@value')
            if x.startswith("http")
        ]
        points_to_list = [
            x.rsplit('/', 1)[1]
            for x in xnode.xpath('./tag[@name="outputs"]/tag/@value')
            if x.startswith("http")
        ]
    else:
        points_from_list = []
        points_to_list = []
        # TODO: the following will be very slow on large DBs... change to new query in 0.6!
        mexes_ref_node = data_service.query('mex',
                                            tag_query='"http*/%s"' % node,
                                            cache=False)
        for mex_ref_node in mexes_ref_node:
            mex_deep = data_service.resource_load(
                uniq=mex_ref_node.get('resource_uniq'), view='full')
            if mex_deep:
                found_in_inputs = False
                inputs_tag = mex_deep.xpath('./tag[@name="inputs"]')
                if inputs_tag:
                    input_id = inputs_tag[0].get('uri')
                    input_deep = data_service.get_resource(resource=input_id,
                                                           view='full,clean')
                    if input_deep and len(
                            input_deep.xpath(
                                './tag[@value="%s"]' % xnode.get("uri"))) > 0:
                        # found node in MEX's inputs
                        points_to_list.append(
                            mex_ref_node.get('resource_uniq'))
                        found_in_inputs = True
                if not found_in_inputs:
                    outputs_tag = mex_deep.xpath('./tag[@name="outputs"]')
                    if outputs_tag:
                        output_id = outputs_tag[0].get('uri')
                        output_deep = data_service.get_resource(
                            resource=output_id, view='full,clean')
                        if output_deep and len(
                                output_deep.xpath('./tag[@value="%s"]' %
                                                  xnode.get("uri"))) > 0:
                            # found node in MEX's outputs
                            points_from_list.append(
                                mex_ref_node.get('resource_uniq'))

    # add edge unless it points to mex recursively
    points_from_list = [
        x for x in points_from_list if is_uniq_code(x) and x != node
    ]
    # add edge unless it points to mex recursively or back to an input
    points_to_list = [
        x for x in points_to_list
        if is_uniq_code(x) and x != node and x not in points_from_list
    ]

    log.debug("points_to_list %s", points_to_list)
    log.debug("points_from_list %s", points_from_list)

    for xlink in points_from_list:
        if (xlink, node) not in edges:
            log.debug("ADDING IN EDGE : %s" % str((xlink, node)))
            edges.add((xlink, node))
        if xlink not in checked:
            unchecked.add(xlink)

    for xlink in points_to_list:
        if (node, xlink) not in edges:
            log.debug("ADDING OUT EDGE : %s" % str((node, xlink)))
            edges.add((node, xlink))
        if xlink not in checked:
            unchecked.add(xlink)
예제 #14
0
        def fileInfo(relpath, uri, index=0):
            xml = data_service.get_resource(uri, view='deep,clean')
            if xml is None:
                log.warn('skipping unreadable uri %s', uri)
                return None

            name = xml.get('name')
            uniq = xml.get('resource_uniq', None)

            # try to figure out a name for the resource
            if not name:
                name = xml.xpath('./tag[@name="filename"]') or xml.xpath(
                    './tag[@name="name"]')
                name = name and name[0].get('value')
            if not name and uniq:
                name = uniq[-4]
            if not name:
                name = str(index)

            path = None
            files = None
            if uniq is not None:
                #del xml.attrib['resource_uniq'] # dima: strip resource_uniq from exported xml
                b = blob_service.localpath(uniq)
                if b:
                    files = b.files
                    if files is not None and len(files) > 0:
                        path = files[0]
                    else:
                        path = b.path
                    if path and not os.path.exists(path):
                        path = None
                else:
                    log.warn("Resource %s ( %s ) did not have blob", uniq,
                             xml.tag)

            # if resource is just an XML doc
            content = None
            if path is None:
                content = etree.tostring(xml)
                name = '%s_%s' % (name, uniq)
                xml = None

            # disambiguate file name if present
            ext = '' if path is not None else '.xml'
            outpath = os.path.join(relpath,
                                   '%s%s' % (name, ext)).replace('\\', '/')
            if outpath in fileHash:
                fname, ext = os.path.splitext(name)
                name = '%s%s%s' % (fname, uniq, ext)
                outpath = os.path.join(relpath, '%s%s' % (name, ext)).replace(
                    '\\', '/')
            fileHash[outpath] = name

            if files is None or len(files) < 2:
                return [{
                    'xml': xml,
                    'content': content,
                    'name': name,
                    'uniq': uniq,
                    'path': path,
                    'relpath': relpath,
                    'outpath': outpath,
                }]

            log.debug('fileInfo name: %s, path: %s, relpath: %s, outpath: %s',
                      name, path, relpath, outpath)
            log.debug('fileInfo files: %s', files)

            # find minimum relative path
            min_length = sys.maxint
            for f in files:
                min_length = min(min_length, len(os.path.dirname(f)))
            minpath = files[0][:min_length + 1]
            log.debug('fileInfo minpath: %s', minpath)

            # check if file disimbiguation is needed
            subpath = files[0][min_length + 1:]
            outpath = os.path.join(relpath, name, subpath).replace('\\', '/')
            if outpath in fileHash:
                name = '%s.%s' % (name, uniq)
                outpath = os.path.join(relpath, name,
                                       subpath).replace('\\', '/')
            fileHash[outpath] = name

            infos = []
            first = True
            for f in files:
                subpath = f[min_length + 1:]
                info = {
                    'name':
                    os.path.basename(f),
                    'uniq':
                    uniq,
                    'path':
                    f,
                    'relpath':
                    relpath,
                    'outpath':
                    os.path.join(relpath, name, subpath).replace('\\', '/'),
                    'subpath':
                    subpath.replace('\\', '/'),
                }
                if first is True:
                    first = False
                    info['xml'] = xml
                    info['content'] = content
                infos.append(info)

            log.debug('fileInfo infos: %s', infos)
            return infos
예제 #15
0
    def fileInfoList(self, fileList, datasetList, urlList, dirList):
        log.debug('fileInfoList fileList: %s' % fileList)
        log.debug('fileInfoList datasetList: %s' % datasetList)
        log.debug('fileInfoList urlList: %s' % urlList)
        log.debug('fileInfoList dirList: %s' % dirList)
        flist = []
        fileHash = {}  # Use a URI hash to look out for file repetitions

        def fileInfo(relpath, uri, index=0):
            xml = data_service.get_resource(uri, view='deep,clean')
            if xml is None:
                log.warn('skipping unreadable uri %s', uri)
                return None

            name = xml.get('name')
            uniq = xml.get('resource_uniq', None)

            # try to figure out a name for the resource
            if not name:
                name = xml.xpath('./tag[@name="filename"]') or xml.xpath(
                    './tag[@name="name"]')
                name = name and name[0].get('value')
            if not name and uniq:
                name = uniq[-4]
            if not name:
                name = str(index)

            path = None
            files = None
            if uniq is not None:
                #del xml.attrib['resource_uniq'] # dima: strip resource_uniq from exported xml
                b = blob_service.localpath(uniq)
                if b:
                    files = b.files
                    if files is not None and len(files) > 0:
                        path = files[0]
                    else:
                        path = b.path
                    if path and not os.path.exists(path):
                        path = None
                else:
                    log.warn("Resource %s ( %s ) did not have blob", uniq,
                             xml.tag)

            # if resource is just an XML doc
            content = None
            if path is None:
                content = etree.tostring(xml)
                name = '%s_%s' % (name, uniq)
                xml = None

            # disambiguate file name if present
            ext = '' if path is not None else '.xml'
            outpath = os.path.join(relpath,
                                   '%s%s' % (name, ext)).replace('\\', '/')
            if outpath in fileHash:
                fname, ext = os.path.splitext(name)
                name = '%s%s%s' % (fname, uniq, ext)
                outpath = os.path.join(relpath, '%s%s' % (name, ext)).replace(
                    '\\', '/')
            fileHash[outpath] = name

            if files is None or len(files) < 2:
                return [{
                    'xml': xml,
                    'content': content,
                    'name': name,
                    'uniq': uniq,
                    'path': path,
                    'relpath': relpath,
                    'outpath': outpath,
                }]

            log.debug('fileInfo name: %s, path: %s, relpath: %s, outpath: %s',
                      name, path, relpath, outpath)
            log.debug('fileInfo files: %s', files)

            # find minimum relative path
            min_length = sys.maxint
            for f in files:
                min_length = min(min_length, len(os.path.dirname(f)))
            minpath = files[0][:min_length + 1]
            log.debug('fileInfo minpath: %s', minpath)

            # check if file disimbiguation is needed
            subpath = files[0][min_length + 1:]
            outpath = os.path.join(relpath, name, subpath).replace('\\', '/')
            if outpath in fileHash:
                name = '%s.%s' % (name, uniq)
                outpath = os.path.join(relpath, name,
                                       subpath).replace('\\', '/')
            fileHash[outpath] = name

            infos = []
            first = True
            for f in files:
                subpath = f[min_length + 1:]
                info = {
                    'name':
                    os.path.basename(f),
                    'uniq':
                    uniq,
                    'path':
                    f,
                    'relpath':
                    relpath,
                    'outpath':
                    os.path.join(relpath, name, subpath).replace('\\', '/'),
                    'subpath':
                    subpath.replace('\\', '/'),
                }
                if first is True:
                    first = False
                    info['xml'] = xml
                    info['content'] = content
                infos.append(info)

            log.debug('fileInfo infos: %s', infos)
            return infos

        def xmlInfo(finfo):
            if len(finfo) == 1:
                finfo = finfo[0]
                file = finfo.copy()
                file['outpath'] = '%s.xml' % file['outpath']
                # need to modify the resource value to point to a local file
                #file['xml'].set('value', os.path.basename(file['xml'].get('value', '')))
                file['xml'].set('value', finfo['name'])
                file['content'] = etree.tostring(file['xml'])
                del file['path']
                del file['xml']
                return file
            else:
                file = finfo[0].copy()
                file['outpath'] = '%s.xml' % file['outpath']
                i = 0
                for v in file['xml'].xpath('value'):
                    v.text = finfo[i]['subpath']
                    i += 1
                file['content'] = etree.tostring(file['xml'])
                del file['path']
                del file['xml']
                return file

        def urlInfo(url, index=0):
            #httpReader = httplib2.Http( disable_ssl_certificate_validation=True)
            #httpReader = requests
            # This hack gets around bisque internal authentication mechanisms
            # please refer to http://biodev.ece.ucsb.edu/projects/bisquik/ticket/597
            headers = dict((name, request.headers.get(name, ''))
                           for name in ['Authorization', 'Mex', 'Cookie']
                           if name in request.headers)

            # test if URL is relative, httplib2 does not fetch relative
            if urlparse.urlparse(url).scheme == '':
                url = urlparse.urljoin(config.get('bisque.root'), url)

            log.debug('ArchiveStreamer: Sending %s with %s' % (url, headers))
            response = requests.get(url, headers=headers)

            #pylint: disable=no-member
            if not response.status_code == requests.codes.ok:
                log.error("URL request returned %s", response.status_code)
                return None
            items = response.headers.get('content-disposition', '').split(';')
            fileName = str(index) + '.'

            log.debug('Respose headers: %s', response.headers)
            log.debug('items: %s' % items)

            for item in items:
                pair = item.split('=')
                if (pair[0].lower().strip() == 'filename'):
                    fileName = pair[1].strip('"\'')
                if (pair[0].lower().strip() == 'filename*'):
                    try:
                        fileName = pair[1].strip('"\'').decode('utf8')
                    except UnicodeDecodeError:
                        pass

            return dict(name=fileName,
                        content=response.content,
                        outpath=fileName)

        # processing a list of resources
        if len(fileList) > 0:
            for index, uri in enumerate(fileList):
                finfo = fileInfo('', uri)
                if finfo is None:
                    continue
                flist.extend(finfo)
                if self.export_meta is True and finfo[0].get(
                        'xml') is not None:
                    flist.append(xmlInfo(finfo))
                # find all mexs that use this resource explicitly
                # dima: we'll not get any second level mexs
                # mexs that use mexs, will need closure query in the db for that
                if self.export_mexs:
                    mexq = data_service.query(
                        'mex', tag_query=finfo[0]['xml'].get('uri'))
                    members = mexq.xpath('//mex')
                    for m in members:
                        uri = m.get('uri')
                        flist.extend(fileInfo('', uri))

        # processing a list of datasets
        if len(datasetList) > 0:
            for uri in datasetList:
                dataset = data_service.get_resource(uri, view='deep,clean')
                name = dataset.xpath('/dataset/@name')[0]
                members = dataset.xpath('/dataset/value')
                uniq = dataset.get('resource_uniq', '')
                #del dataset.attrib['resource_uniq'] # dima: strip resource_uniq from exported xml

                for index, member in enumerate(members):
                    finfo = fileInfo(name, member.text, index)
                    if finfo is None:
                        continue
                    finfo[0]['dataset'] = name
                    flist.extend(finfo)

                    # update reference in the dataset xml
                    if self.export_meta is True and finfo[0].get(
                            'xml') is not None:
                        flist.append(xmlInfo(finfo))
                        member.text = '%s.xml' % finfo[0].get('outpath', '')
                    else:
                        member.text = finfo[0].get('outpath', '')

                if self.export_meta:
                    # disambiguate file name if present
                    name = '%s.xml' % name
                    if name in fileHash:
                        fname, ext = os.path.splitext(name)
                        name = '%s%s%s' % (fname, uniq, ext)
                    fileHash[name] = name

                    # Insert dataset XML into file list
                    flist.append(
                        dict(name=name,
                             content=etree.tostring(dataset),
                             outpath=name))

        # processing a list of directories
        if len(dirList) > 0:
            for uri in dirList:
                # read dir from blob storage, dima: need to access blob storage
                folder = data_service.get_resource(uri, view='deep')
                members = folder.xpath('//link')

                for index, member in enumerate(members):
                    # dima: need to compute proper URI
                    uniq = member.get('value', None)
                    uri = '/data_service/%s' % uniq  # compute URI from uniq, dima: does not work today: 403 forbidden

                    # compute path for each link by traversing up the tree
                    folder = [
                    ]  # relative path to the resource from currently selected dir with no trailing slash
                    parent = member
                    while parent is not None:
                        parent = parent.xpath('..')
                        parent = parent[0] if len(parent) > 0 else None
                        if parent is not None:
                            folder.append(parent.get('name', None))
                    folder.reverse()
                    finfo = fileInfo('/'.join(folder), uri, index)
                    if finfo is None:
                        continue
                    flist.extend(finfo)
                    if self.export_meta is True and finfo[0].get(
                            'xml') is not None:
                        flist.append(xmlInfo(finfo))

        # processing a list of URLs
        if len(urlList) > 0:
            for index, url in enumerate(urlList):
                if fileHash.get(url) != None:
                    continue
                else:
                    fileHash[url] = 1
                    finfo = urlInfo(url, index)
                    flist.append(finfo)

        return flist
예제 #16
0
 def get_user_name(self, uri):
     if uri in self.user_map:
         return self.user_map[uri]
     owner = data_service.get_resource(uri)
     self.user_map[uri] = owner.get('name')
     return owner.get('name')