Esempio n. 1
0
 def __init__(self):
     self._serverUrl = config["couchdb.url.dbadmin"]
     self._targetName = config["couchdb.db.resourcedata"]
     self.documents = deque()
     s = couchdb.Server(self._serverUrl)
     self._db = s[self._targetName]
     self.repl_helper = ResourceDataReplacement()
     self.threads = {}
     self.max_threads = 50
 def __init__(self):
     self._serverUrl = config["couchdb.url.dbadmin"]
     self._targetName = config["couchdb.db.resourcedata"]
     self.documents = []
     s = couchdb.Server(self._serverUrl)
     self._db = s[self._targetName]
     self.repl_helper = ResourceDataReplacement()
class IncomingCopyHandler(BaseChangeHandler):
    def __init__(self):
        self._serverUrl = config["couchdb.url.dbadmin"]
        self._targetName = config["couchdb.db.resourcedata"]
        self.documents = []
        s = couchdb.Server(self._serverUrl)
        self._db = s[self._targetName]
        self.repl_helper = ResourceDataReplacement()

    def _canHandle(self, change, database):
        if (_DOC in change) and (
            change[_DOC].get(_DOC_TYPE) == _RESOURCE_DISTRIBUTABLE_TYPE or change[_DOC].get(_DOC_TYPE) == _RESOURCE_TYPE
        ):
            return True
        return False

    def _handle(self, change, database):
        def handleDocument(newDoc):
            should_delete = True
            try:
                # newDoc['node_timestamp'] = h.nowToISO8601Zformat()
                ResourceDataModelValidator.set_timestamps(newDoc)
                del newDoc["_rev"]
                self.repl_helper.handle(newDoc)
                # rd = ResourceDataModel(newDoc)
                # rd.save(log_exceptions=False)
            except SpecValidationException as e:
                log.error(newDoc["_id"] + str(e))
            except ResourceConflict as ex:
                log.exception(ex)
            except Exception as ex:
                should_delete = False  # don't delete something unexpected happend
                log.error(ex)
            if should_delete:
                try:
                    del database[newDoc["_id"]]
                except Exception as ex:
                    log.error(ex)

        self.documents.append(change[_DOC])
        if len(self.documents) >= _DOCUMENT_UPDATE_THRESHOLD or len(self.documents) >= database.info()["doc_count"]:
            for doc in self.documents:
                t = Thread(target=handleDocument, args=(doc,))
                t.start()
            self.documents = []
Esempio n. 4
0
class IncomingCopyHandler(BaseChangeHandler):
    def __init__(self):
        self._serverUrl = config["couchdb.url.dbadmin"]
        self._targetName = config["couchdb.db.resourcedata"]
        self.documents = deque()
        s = couchdb.Server(self._serverUrl)
        self._db = s[self._targetName]
        self.repl_helper = ResourceDataReplacement()
        self.threads = {}
        self.max_threads = 50

    def _canHandle(self, change, database):
        if ((_DOC in change) and \
            (change[_DOC].get(_DOC_TYPE) == _RESOURCE_DISTRIBUTABLE_TYPE or \
            change[_DOC].get(_DOC_TYPE) == _RESOURCE_TYPE)):
            return True
        return False

    def _handle(self, change, database):
        def threadName(doc):
            return "T-" + doc["_id"]

        def handleDocument(newDoc):
            should_delete = True
            try:
                # newDoc['node_timestamp'] = h.nowToISO8601Zformat()
                ResourceDataModelValidator.set_timestamps(newDoc)
                del newDoc["_rev"]
                self.repl_helper.handle(newDoc)
                # rd = ResourceDataModel(newDoc)
                # rd.save(log_exceptions=False)
            except SpecValidationException as e:
                log.error("SpecValidationException: %s, %s", newDoc['_id'],
                          str(e))
            except couchdb.ResourceConflict as rc:
                log.error("Document conflicts", exc_info=1)
            except Exception as ex:
                should_delete = False  # don't delete something unexpected happend
                log.error("Unable to save %s", newDoc['_id'], exc_info=ex)
            if should_delete:
                try:
                    del database[newDoc['_id']]
                except Exception as ex:
                    log.error("Error when deleting", exc_info=ex)
            try:
                del self.threads[threadName(newDoc)]
            except:
                pass

        self.documents.append(change[_DOC])
        if len(self.documents) >= _DOCUMENT_UPDATE_THRESHOLD or len(
                self.documents) >= database.info()['doc_count']:
            while len(self.documents) > 0:
                doc = self.documents.popleft()
                tname = threadName(doc)
                t = Thread(target=handleDocument, name=tname, args=(doc, ))
                self.threads[tname] = t
                t.start()
                while len(self.threads) > self.max_threads:
                    time.sleep(.1)

    def isRunning(self):
        return len(self.threads) > 0

    def threadCount(self):
        return len(self.threads)
Esempio n. 5
0
class PublishController(BaseController):
    """REST Controller styled on the Atom Publishing Protocol"""
    # To properly map this controller, ensure your config/routing.py
    # file has a resource setup:
    #     map.resource('publish', 'publisher')
    __ERROR = 'error'
    __OK = "OK"
    __DOCUMENT_RESULTS = 'document_results'
    __DOCUMENTS = 'documents'

    repl_helper = ResourceDataReplacement()

    @oauth.authorize("oauth-sign",
                     _service_doc(True),
                     roles=None,
                     mapper=signing.lrsignature_mapper,
                     post_cond=_no_abort)
    @bauth.authorize("oauth-sign",
                     _service_doc(),
                     roles=None,
                     pre_cond=_continue_if_missing_oauth,
                     realm="Learning Registry")
    def create(self, *args, **kwargs):

        results = {self.__OK: True}
        error_message = None
        try:
            data = json.loads(request.body)
            doc_limit = _service_doc()()['service_data']['doc_limit']

            if not self.__DOCUMENTS in data.keys():
                # Comply with LR-RQST-009 'Missing documents in POST'
                results[
                    self.__ERROR] = "Missing field 'documents' in post body"
            elif len(data[self.__DOCUMENTS]) < 1:
                # Comply with LR-API-PUBLISH-001 'List of documents is empty'
                results[self.__ERROR] = "List of documents is empty"
            elif len(data[self.__DOCUMENTS]) > doc_limit:
                error_message = "number of posted docs {0} exceeds doc limit: {1}".format(
                    len(data['documents']), str(doc_limit))
                log.debug(error_message)
                results[self.__ERROR] = error_message
            else:
                results[self.__DOCUMENT_RESULTS] = map(
                    lambda doc: signing.sign_doc(
                        doc, cb=self._publish, session_key="oauth-sign"),
                    data[self.__DOCUMENTS])
        except Exception as ex:
            log.exception(ex)
            results[self.__ERROR] = str(ex)

        if results.has_key(self.__ERROR):
            results[self.__OK] = False
        return json.dumps(results)

    def _isResourceDataFilteredOut(self, resourceData):

        if (LRNode.filterDescription is None
                or LRNode.filterDescription.filter is None
                or LRNode.filterDescription.custom_filter == True):
            #Do custom the filter I supposed ... for now just resturn false.
            return [False, None]

        matchResult = False
        envelopFilter = ""
        for f in LRNode.filterDescription.filter:
            log.info("\n" + str(f) + "\n")
            # Ckeck if jsonObject object has the key if it has search
            # for the regular expression in the filter otherwise keep looking
            key = f['filter_key']
            resourceValue = None

            for k in resourceData.keys():
                if re.search(key, k) is not None:
                    resourceValue = str(resourceData[k])
                    break

            if resourceValue is None:
                continue

            value = f['filter_value']
            log.info("\n" + str(key) + ", " + str(value) + ", " +
                     resourceValue + "\n")
            if (re.search(value, resourceValue) is not None):
                matchResult = True
                envelopFilter = "Filter '"+str(f)+"' for key '"+str(key)+\
                                          "' matches '"+resourceValue+"'"
                break

        #Check if what matching means base on the include_exclude
        # True: the filters describe what documents to accept all others
        # are rejected
        # False: the filters describe what documents to reject
        # all others are accepted
        if LRNode.filterDescription.include_exclude is None or \
            LRNode.filterDescription.include_exclude == True:
            if matchResult == False:
                return True, "\nDocument failed to match filter: \n"+\
                               pprint.pformat(LRNode.filterDescription.specData, indent=4)+"\n"
        else:
            if matchResult == True:
                return True, "\nExcluded by filter: \n" + envelopFilter

        return [False, None]

    def _publish(self, resourceData):
        if isinstance(resourceData, unicode):
            resourceData = json.loads(resourceData)

        result = {self.__OK: True}

        try:
            # Set the envelop data timestaps.
            resourceData = ResourceDataModelValidator.set_timestamps(
                resourceData)

            #Check if the envelop get filtered out
            isFilteredOut, reason = self._isResourceDataFilteredOut(
                resourceData)
            if isFilteredOut:
                result[self.__ERROR] = reason
            else:
                resourceData[
                    "publishing_node"] = LRNode.nodeDescription.node_id
                result = self.repl_helper.handle(resourceData)
                # ResourceDataModelValidator.save(resourceData)
                result[ResourceDataModelValidator.DOC_ID] = resourceData[
                    ResourceDataModelValidator.DOC_ID]

        except SpecValidationException as ex:
            log.exception(ex)
            result[self.__ERROR] = "\n" + pprint.pformat(str(ex), indent=4)
        except Exception as ex:
            log.exception(ex)
            result[self.__ERROR] = str(ex)

        if result.has_key(self.__ERROR):
            result[self.__OK] = False

        return result
class IncomingCopyHandler(BaseChangeHandler):

    def __init__(self):
        self._serverUrl = config["couchdb.url.dbadmin"]
        self._targetName = config["couchdb.db.resourcedata"]
        self.documents = deque()
        s = couchdb.Server(self._serverUrl)
        self._db = s[self._targetName]
        self.repl_helper = ResourceDataReplacement()
        self.threads = {}
        self.max_threads = 50

    def _canHandle(self, change, database):
        if ((_DOC in change) and \
            (change[_DOC].get(_DOC_TYPE) == _RESOURCE_DISTRIBUTABLE_TYPE or \
            change[_DOC].get(_DOC_TYPE) == _RESOURCE_TYPE)):
            return True
        return False

    def _handle(self, change, database):
        def threadName(doc):
            return "T-"+doc["_id"]

        def handleDocument(newDoc):
            should_delete = True
            try:
                # newDoc['node_timestamp'] = h.nowToISO8601Zformat()
                ResourceDataModelValidator.set_timestamps(newDoc)
                del newDoc["_rev"]
                self.repl_helper.handle(newDoc)
                # rd = ResourceDataModel(newDoc)
                # rd.save(log_exceptions=False)
            except SpecValidationException as e:
                log.error("SpecValidationException: %s, %s",newDoc['_id'],str(e))
            except couchdb.ResourceConflict as rc:
                log.error("Document conflicts", exc_info=1)
            except Exception as ex:
                should_delete = False  # don't delete something unexpected happend
                log.error("Unable to save %s", newDoc['_id'], exc_info=ex)
            if should_delete:
                try:
                    del database[newDoc['_id']]
                except Exception as ex:
                    log.error("Error when deleting", exc_info=ex)
            try:
                del self.threads[threadName(newDoc)]
            except:
                pass
                    
        self.documents.append(change[_DOC])
        if len(self.documents) >= _DOCUMENT_UPDATE_THRESHOLD or len(self.documents) >= database.info()['doc_count']:
            while len(self.documents) > 0:
                doc = self.documents.popleft()
                tname = threadName(doc)
                t = Thread(target=handleDocument, name=tname, args=(doc,))
                self.threads[tname] = t
                t.start()
                while len(self.threads) > self.max_threads:
                    time.sleep(.1)




    def isRunning(self):
        return len(self.threads) > 0


    def threadCount(self):
        return len(self.threads)