Example #1
0
    def retrieve(self, document):
        """Read document CouchDB metadata.

        @param document: is a DMS Document() instance
        """
        docrule = document.get_docrule()
        mapping = docrule.get_docrule_plugin_mappings()
        # No actions for no docrule documents
        # No actions for documents without 'mapping has DB plugins'
        if document.get_docrule().uncategorized:
            return document
        else:
            if not mapping.get_database_storage_plugins():
                return document
            else:
                self.check_user(document)
                doc_name = document.get_code()
                couchdoc = CouchDocument()
                try:
                    couchdoc = CouchDocument.get(docid=doc_name)
                except Exception, e:
                    # Skip deleted errors (they are not used in DMS)
                    e_message = str(e)
                    if not e_message in ['deleted', 'missing']:
                        raise PluginError('CouchDB error: %s' % e, e)
                    pass
                document = couchdoc.populate_into_dms(document)
                return document
Example #2
0
    def update_document_metadata(self, document):
        """Updates document with new indexes and stores old one into another revision.

        @param document: is a DMS Document() instance
        """
        self.check_user(document)
        if 'update_file' in document.options and document.options[
                'update_file']:
            name = document.get_code()
            # We need to create couchdb document in case it does not exists in database.
            couchdoc = CouchDocument.get_or_create(docid=name)
            couchdoc.update_file_revisions_metadata(document)
            couchdoc.save()
        if document.old_docrule:
            old_couchdoc = None
            couchdoc = CouchDocument.get_or_create(docid=document.file_name)
            try:
                old_couchdoc = CouchDocument.get(docid=document.old_name_code)
            except Exception, e:
                log.error('%s' % e)
                pass
            if old_couchdoc:
                # Migrate from existing CouchDB document
                couchdoc.migrate_metadata_for_docrule(document, old_couchdoc)
                couchdoc.save()
                old_couchdoc.delete()
            else:
                # store from current Document() instance
                user = document.user
                couchdoc.populate_from_dms(user, document)
                couchdoc.save()
Example #3
0
    def retrieve(self, document):
        """Read document CouchDB metadata.

        @param document: is a DMS Document() instance
        """
        docrule = document.get_docrule()
        mapping = docrule.get_docrule_plugin_mappings()
        # No actions for no docrule documents
        # No actions for documents without 'mapping has DB plugins'
        if document.get_docrule().uncategorized:
            return document
        else:
            if not mapping.get_database_storage_plugins():
                return document
            else:
                self.check_user(document)
                doc_name = document.get_code()
                couchdoc = CouchDocument()
                try:
                    couchdoc = CouchDocument.get(docid=doc_name)
                except Exception, e:
                    # Skip deleted errors (they are not used in DMS)
                    e_message = str(e)
                    if not e_message in ['deleted', 'missing']:
                        raise PluginError('CouchDB error: %s' % e, e)
                    pass
                document = couchdoc.populate_into_dms(document)
                return document
Example #4
0
    def update_document_metadata(self, document):
        """Updates document with new indexes and stores old one into another revision.

        @param document: is a DMS Document() instance
        """
        self.check_user(document)
        if 'update_file' in document.options and document.options['update_file']:
            name = document.get_code()
            # We need to create couchdb document in case it does not exists in database.
            couchdoc = CouchDocument.get_or_create(docid=name)
            couchdoc.update_file_revisions_metadata(document)
            couchdoc.save()
        if document.old_docrule:
            old_couchdoc = None
            couchdoc = CouchDocument.get_or_create(docid=document.file_name)
            try:
                old_couchdoc = CouchDocument.get(docid=document.old_name_code)
            except Exception, e:
                log.error('%s' % e)
                pass
            if old_couchdoc:
                # Migrate from existing CouchDB document
                couchdoc.migrate_metadata_for_docrule(document, old_couchdoc)
                couchdoc.save()
                old_couchdoc.delete()
            else:
                # store from current Document() instance
                user = document.user
                couchdoc.populate_from_dms(user, document)
                couchdoc.save()
Example #5
0
    def remove(self, document):
        """Updates document CouchDB metadata on removal.

        (Removes CouchDB document or acts as prescribed in removal workflows)
        @param document: is a DMS Document() instance
        """
        # Doing nothing for mark deleted call
        code = document.get_code()
        couchdoc = CouchDocument.get(docid=code)
        if 'mark_deleted' in document.options.iterkeys():
            couchdoc['deleted'] = 'deleted'
            couchdoc.save()
            return document
        if 'mark_revision_deleted' in document.options.iterkeys():
            mark_revision = document.options['mark_revision_deleted']
            if mark_revision in couchdoc.revisions.iterkeys():
                couchdoc.revisions[mark_revision]['deleted'] = True
            else:
                raise PluginError('Object has no revision: %s' % mark_revision,
                                  404)
            couchdoc.save()
            return document
        if 'delete_revision' in document.options.iterkeys():
            revision = document.options['delete_revision']
            del couchdoc.revisions[revision]
            couchdoc.save()
            return document
        if not document.get_file_obj():
            #doc is fully deleted from fs
            couchdoc.delete()
        return document
Example #6
0
 def document_date_range_only_search(self, cleaned_document_keys, docrule_ids):
     log.debug('Date range search only')
     resp_list = []
     startkey = [None,]
     endkey = [None,]
     for docrule_id in docrule_ids:
         startkey = [docrule_id, str_date_to_couch(cleaned_document_keys["date"])]
         endkey = [docrule_id, str_date_to_couch(cleaned_document_keys["end_date"])]
         # Getting all documents withing this date range
         all_docs = CouchDocument.view(
             'dmscouch/search_date',
             classes={None: CouchDocument},
             startkey=startkey,
             endkey=endkey
         )
         # Appending to fetch docs list if not already there
         for doc in all_docs:
             doc_name = doc.get_id
             if not doc_name in resp_list:
                 resp_list.append(doc_name)
     if resp_list:
         log_data = resp_list.__len__()
     else:
         log_data = None
     log.debug(
         'Search results by date range: from: "%s", to: "%s", docrules: "%s", documents: "%s"' %
         (startkey[0], endkey[0], docrule_ids, log_data)
     )
     return resp_list
Example #7
0
    def remove(self, document):
        """Updates document CouchDB metadata on removal.

        (Removes CouchDB document or acts as prescribed in removal workflows)
        @param document: is a DMS Document() instance
        """
        # Doing nothing for mark deleted call
        code = document.get_code()
        couchdoc = CouchDocument.get(docid=code)
        if 'mark_deleted' in document.options.iterkeys():
            couchdoc['deleted'] = 'deleted'
            couchdoc.save()
            return document
        if 'mark_revision_deleted' in document.options.iterkeys():
            mark_revision = document.options['mark_revision_deleted']
            if mark_revision in couchdoc.revisions.iterkeys():
                couchdoc.revisions[mark_revision]['deleted'] = True
            else:
                raise PluginError('Object has no revision: %s' % mark_revision, 404)
            couchdoc.save()
            return document
        if 'delete_revision' in document.options.iterkeys():
            revision = document.options['delete_revision']
            del couchdoc.revisions[revision]
            couchdoc.save()
            return document
        if not document.get_file_obj():
            #doc is fully deleted from fs
            couchdoc.delete()
        return document
Example #8
0
    def get_found_documents(self, document_names_list):
        """
        Method to retrieve documents index data by document names list.

        @param document_names_list: list of document id's, e.g. ['DOC0001', 'MAS0001', '...' ]
        @return: CouchDB documents list.
        """
        documents = CouchDocument.view('dmscouch/all', keys=document_names_list, include_docs=True)
        return documents
Example #9
0
def check_docs_for_existence(key, value, docrule):
    """Check if at least one document with specified docrule, key and value exist"""
    documents = CouchDocument.view('dmscouch/search_autocomplete', key=[str(docrule), key, value], reduce=False)
    if documents.__len__() > 0:
        # There is at least one this type document...
        return True
    else:
        # No such documents
        return False
Example #10
0
 def document_date_range_with_keys_search(self, cleaned_document_keys,
                                          docrule_ids):
     log.debug('Date range search with additional keys specified')
     resp_set = {}
     docs_list = {}
     # For each docrule user search is requested
     for docrule_id in docrule_ids:
         # Getting list of date range filtered docs for each provided secondary key
         # Except for internal keys
         for key, value in cleaned_document_keys.iteritems():
             if not (key == 'date') and not (key == 'end_date'):
                 if not value.__class__.__name__ == 'tuple':
                     # Normal search
                     startkey = self.convert_to_search_keys_for_date_range(
                         cleaned_document_keys, key, docrule_id)
                     endkey = self.convert_to_search_keys_for_date_range(
                         cleaned_document_keys, key, docrule_id, end=True)
                 else:
                     # Got date range key
                     startkey = self.convert_to_search_keys_for_date_range(
                         cleaned_document_keys,
                         key,
                         docrule_id,
                         date_range=True)
                     endkey = self.convert_to_search_keys_for_date_range(
                         cleaned_document_keys,
                         key,
                         docrule_id,
                         end=True,
                         date_range=True)
                 if startkey and endkey:
                     # Appending results list to mixed set of results.
                     search_res = CouchDocument.view(
                         'dmscouch/search',
                         classes={None: CouchDocument},
                         startkey=startkey,
                         endkey=endkey)
                     if docrule_id in resp_set.iterkeys():
                         resp_set[docrule_id].append(search_res)
                     else:
                         resp_set[docrule_id] = [search_res]
         # Extracting documents for each CouchDB response set.
         docs_list[docrule_id] = self.convert_search_res_for_range(
             resp_set, cleaned_document_keys, docrule_id)
     # Listing all documents to retrieve and getting them
     retrieve_docs = []
     for d_list in docs_list.itervalues():
         if d_list:
             for item in d_list:
                 retrieve_docs.append(item)
     log.debug(
         'Search results by date range with additional keys: "%s", docrule: "%s", documents: "%s"'
         % (cleaned_document_keys, docrule_ids,
            map(lambda doc: doc, retrieve_docs)))
     return retrieve_docs
Example #11
0
def check_docs_for_existence(key, value, docrule):
    """Check if at least one document with specified docrule, key and value exist"""
    documents = CouchDocument.view('dmscouch/search_autocomplete',
                                   key=[str(docrule), key, value],
                                   reduce=False)
    if documents.__len__() > 0:
        # There is at least one this type document...
        return True
    else:
        # No such documents
        return False
Example #12
0
def check_for_secondary_keys_pairs(input_keys_list, docrule_id):
    """Checks for parallel keys pairs if they already exist in Secondary Keys.

    Scenario:
    Existing Parallell key:
        JOHN 1234
    user enters
        MIKE 1234
    where MIKE already exists in combination with another numeric id we should still issue a warning.
    EG. The combination of key values is new! (even though no new keys have been created)
    """
    # Copying dictionary data and operating with them in function
    sec_keys_list = {}
    suspicious_keys_list = {}
    if input_keys_list:
        for key in input_keys_list.iterkeys():
            sec_keys_list[key] = input_keys_list[key]
        suspicious_keys_list = {}
    p_keys_manager = ParallelKeysManager()
    mdt_manager = MetaDataTemplateManager()
    keys_list = [key for key in sec_keys_list.iterkeys()]
    # Cleaning from not secondary keys
    for key in keys_list:
        if key == 'date' or key == 'description':
            del sec_keys_list[key]
    # Getting list of parallel keys for this docrule.
    mdts = mdt_manager.get_mdts_for_docrule(docrule_id)
    pkeys = p_keys_manager.get_parallel_keys_for_mdts(mdts)
    # Getting Pkeys lists.
    checked_keys = []
    for key in sec_keys_list.iterkeys():
        key_pkeys = p_keys_manager.get_parallel_keys_for_key(pkeys, key)
        pkeys_with_values = p_keys_manager.get_parallel_keys_for_pkeys(
            key_pkeys, sec_keys_list)
        # Checking if this parallel keys group already was checked.
        if not pkeys_with_values in checked_keys:
            checked_keys.append(pkeys_with_values)
            # Getting all keys for parallel key to check if it exists in any document metadata already.
            for pkey, pvalue in pkeys_with_values:
                documents = CouchDocument.view('dmscouch/search_autocomplete',
                                               key=[docrule_id, pkey, pvalue],
                                               reduce=False)
                # Appending non existing keys into list to be checked.
                if not documents:
                    suspicious_keys_list[pkey] = pvalue
    if suspicious_keys_list:
        log.debug('Found new unique key/values in secondary keys: ',
                  suspicious_keys_list)
    else:
        log.debug('Found NO new unique key/values in secondary keys')
    return suspicious_keys_list
Example #13
0
    def get_sorting_docs_indexes(self, document_list):
        """
        Retrieves main document indexes from CouchDB view.

        @param document_list: list of document names, e.g. ['ADL-0001', 'CCC-0001', ... ]
        @return: CouchDB documents "view results" Object.
                 each document contains:
                     "mdt_indexes"
                     "metadata_created_date"
                     "metadata_description"
                     "metadata_doc_type_rule_id"
        """
        documents = CouchDocument.view('dmscouch/search_main_indexes', keys=document_list)
        return documents
Example #14
0
def check_for_secondary_keys_pairs(input_keys_list, docrule_id):
    """Checks for parallel keys pairs if they already exist in Secondary Keys.

    Scenario:
    Existing Parallell key:
        JOHN 1234
    user enters
        MIKE 1234
    where MIKE already exists in combination with another numeric id we should still issue a warning.
    EG. The combination of key values is new! (even though no new keys have been created)
    """
    # Copying dictionary data and operating with them in function
    sec_keys_list = {}
    suspicious_keys_list = {}
    if input_keys_list:
        for key in input_keys_list.iterkeys():
            sec_keys_list[key] = input_keys_list[key]
        suspicious_keys_list = {}
    p_keys_manager = ParallelKeysManager()
    mdt_manager = MetaDataTemplateManager()
    keys_list = [key for key in sec_keys_list.iterkeys()]
    # Cleaning from not secondary keys
    for key in keys_list:
        if key == 'date' or key == 'description':
            del sec_keys_list[key]
    # Getting list of parallel keys for this docrule.
    mdts = mdt_manager.get_mdts_for_docrule(docrule_id)
    pkeys = p_keys_manager.get_parallel_keys_for_mdts(mdts)
    # Getting Pkeys lists.
    checked_keys = []
    for key in sec_keys_list.iterkeys():
        key_pkeys = p_keys_manager.get_parallel_keys_for_key(pkeys, key)
        pkeys_with_values = p_keys_manager.get_parallel_keys_for_pkeys(key_pkeys, sec_keys_list)
        # Checking if this parallel keys group already was checked.
        if not pkeys_with_values in checked_keys:
            checked_keys.append(pkeys_with_values)
            # Getting all keys for parallel key to check if it exists in any document metadata already.
            for pkey, pvalue in pkeys_with_values:
                documents = CouchDocument.view('dmscouch/search_autocomplete',
                                                key=[docrule_id, pkey, pvalue],
                                                reduce=False)
                # Appending non existing keys into list to be checked.
                if not documents:
                    suspicious_keys_list[pkey] = pvalue
    if suspicious_keys_list:
        log.debug('Found new unique key/values in secondary keys: ', suspicious_keys_list)
    else:
        log.debug('Found NO new unique key/values in secondary keys')
    return suspicious_keys_list
Example #15
0
    def get_found_documents(self, document_names_list):
        """
        Method to retrieve documents index data by document names list.

        @param document_names_list: list of document id's, e.g. ['DOC0001', 'MAS0001', '...' ]
        @return: CouchDB documents list.
        """
        documents = CouchDocument.view('dmscouch/all',
                                       keys=document_names_list,
                                       include_docs=True)
        # Converting documents to omit couchdb ViewResults iteration bug
        results = []
        for doc in documents:
            results.append(doc)
        return results
Example #16
0
    def get_sorting_docs_indexes(self, document_list):
        """
        Retrieves main document indexes from CouchDB view.

        @param document_list: list of document names, e.g. ['ADL-0001', 'CCC-0001', ... ]
        @return: CouchDB documents "view results" Object.
                 each document contains:
                     "mdt_indexes"
                     "metadata_created_date"
                     "metadata_description"
                     "metadata_doc_type_rule_id"
        """
        documents = CouchDocument.view('dmscouch/search_main_indexes',
                                       classes={None: CouchDocument},
                                       keys=document_list)
        return documents
Example #17
0
    def get_found_documents(self, document_names_list):

        """
        Method to retrieve documents index data by document names list.

        @param document_names_list: list of document id's, e.g. ['DOC0001', 'MAS0001', '...' ]
        @return: CouchDB documents list.
        """
        documents = CouchDocument.view(
            'dmscouch/all',
            keys=document_names_list,
            include_docs=True)
        # Converting documents to omit couchdb ViewResults iteration bug
        results = []
        for doc in documents:
            results.append(doc)
        return results
Example #18
0
 def document_date_range_with_keys_search(self, cleaned_document_keys, docrule_ids):
     log.debug('Date range search with additional keys specified')
     resp_set = {}
     docs_list = {}
     # For each docrule user search is requested
     for docrule_id in docrule_ids:
         # Getting list of date range filtered docs for each provided secondary key
         # Except for internal keys
         for key, value in cleaned_document_keys.iteritems():
             if not (key == 'date') and not (key == 'end_date'):
                 if not value.__class__.__name__ == 'tuple':
                     # Normal search
                     startkey = self.convert_to_search_keys_for_date_range(cleaned_document_keys, key, docrule_id)
                     endkey = self.convert_to_search_keys_for_date_range(cleaned_document_keys, key, docrule_id, end=True)
                 else:
                     # Got date range key
                     startkey = self.convert_to_search_keys_for_date_range(cleaned_document_keys, key, docrule_id, date_range=True)
                     endkey = self.convert_to_search_keys_for_date_range(cleaned_document_keys, key, docrule_id, end=True, date_range=True)
                 if startkey and endkey:
                     # Appending results list to mixed set of results.
                     search_res = CouchDocument.view(
                         'dmscouch/search',
                         classes={None: CouchDocument},
                         startkey=startkey,
                         endkey=endkey
                     )
                     if docrule_id in resp_set.iterkeys():
                         resp_set[docrule_id].append(search_res)
                     else:
                         resp_set[docrule_id] = [search_res]
         # Extracting documents for each CouchDB response set.
         docs_list[docrule_id] = self.convert_search_res_for_range(resp_set, cleaned_document_keys, docrule_id)
     # Listing all documents to retrieve and getting them
     retrieve_docs = []
     for d_list in docs_list.itervalues():
         if d_list:
             for item in d_list:
                 retrieve_docs.append(item)
     log.debug(
         'Search results by date range with additional keys: "%s", docrule: "%s", documents: "%s"' %
         (cleaned_document_keys, docrule_ids, map(lambda doc: doc, retrieve_docs))
     )
     return retrieve_docs
Example #19
0
 def document_date_range_only_search(self, cleaned_document_keys,
                                     docrule_ids):
     log.debug('Date range search only')
     resp_list = []
     startkey = [
         None,
     ]
     endkey = [
         None,
     ]
     for docrule_id in docrule_ids:
         startkey = [
             docrule_id,
             str_date_to_couch(cleaned_document_keys["date"])
         ]
         endkey = [
             docrule_id,
             str_date_to_couch(cleaned_document_keys["end_date"])
         ]
         # Getting all documents withing this date range
         all_docs = CouchDocument.view('dmscouch/search_date',
                                       classes={None: CouchDocument},
                                       startkey=startkey,
                                       endkey=endkey)
         # Appending to fetch docs list if not already there
         for doc in all_docs:
             doc_name = doc.get_id
             if not doc_name in resp_list:
                 resp_list.append(doc_name)
     if resp_list:
         log_data = resp_list.__len__()
     else:
         log_data = None
     log.debug(
         'Search results by date range: from: "%s", to: "%s", docrules: "%s", documents: "%s"'
         % (startkey[0], endkey[0], docrule_ids, log_data))
     return resp_list
Example #20
0
 def get_codes(self):
     deleted_codes = CouchDocument.view('dmscouch/deleted')
     codes = [doc.get_id for doc in deleted_codes]
     return codes
Example #21
0
    def store(self, document):
        """Stores CouchDB object into DB.

        (Updates or overwrites CouchDB document)

        @param document: is a DMS Document() instance
        """
        # FIXME: Refactor me. We should upload new "secondary_indexes" or metatags with update() workflow,
        # not a create(), like it is now. Because this method is a mess.
        docrule = document.get_docrule()
        # doing nothing for no docrule documents
        if docrule.uncategorized:
            return document
        else:
            user = self.check_user(document)
            processor = DocumentProcessor()
            # FIXME: there might be more than one mapping
            mapping = docrule.get_docrule_plugin_mappings()
            # doing nothing for documents without mapping has DB plugins
            if not mapping.get_database_storage_plugins():
                return document
            else:
                # if not exists all required metadata getting them from docrule retrieve sequence
                if not document.file_revision_data:
                    # HACK: Preserving db_info here... (May be Solution!!!)
                    db_info = document.get_db_info()
                    document = processor.read(document.file_name, options={
                        'only_metadata': True,
                        'user': document.user
                    })

                    # saving NEW file_revision_data ONLY if they exist in new uploaded doc (Preserving old indexes)
                    if db_info:
                        # Storing new indexes
                        document.set_db_info(db_info)
                    else:
                        # TODO: move this code into a proper place (UPDATE method)
                        # Asking couchdb about if old file_revision_data exists and updating them properly
                        current_revisions = document.file_revision_data
                        try:
                            # Only if document exists in DB. Falling gracefully if not.
                            temp_doc = self.retrieve(document)
                            old_metadata = temp_doc.get_db_info()
                            old_index_revisions = None
                            if old_metadata['mdt_indexes']:
                                # Preserving Description, User, Created Date, indexes revisions
                                if temp_doc.index_revisions:
                                    old_index_revisions = temp_doc.index_revisions
                                old_metadata['mdt_indexes']['description'] = old_metadata['description']
                                old_metadata['mdt_indexes']['metadata_user_name'] = old_metadata['metadata_user_name']
                                old_metadata['mdt_indexes']['metadata_user_id'] = old_metadata['metadata_user_id']
                                old_cr_date = datetime.datetime.strftime(
                                    old_metadata['metadata_created_date'],
                                    settings.DATE_FORMAT
                                )
                                old_metadata['mdt_indexes']['date'] = old_cr_date
                                document.set_db_info(old_metadata['mdt_indexes'])
                                document.set_index_revisions(old_index_revisions)
                                document.set_file_revisions_data(current_revisions)
                            else:
                                # Preserving set revisions anyway.
                                document.set_file_revisions_data(current_revisions)
                        except ResourceNotFound:
                            pass
                # updating tags to sync with Django DB
                self.sync_document_tags(document)
                # assuming no document with this _id exists. SAVING or overwriting existing
                couchdoc = CouchDocument()

                couchdoc.populate_from_dms(user, document)
                couchdoc.save(force_update=True)
                return document
Example #22
0
def process_pkeys_request(docrule_id, key_name, autocomplete_req, doc_mdts, letters_limit=2, suggestions_limit=8):
    """Helper method to process MDT's for special user.

    # We can collect all the documents keys for each docrule in MDT related to requested field and load them into queue.
    # Then check them for duplicated values and/or make a big index with all the document's keys in it
    # to fetch only document indexes we need on first request. (Instead of 'include_docs=True')
    # E.g. Make autocomplete Couch View to output index with all Document's mdt_indexes ONLY.
    #
    # Total amount of requests will be 3 instead of 2 (for 2 docrules <> 1 MDT) but they will be smaller.
    # And that will be good for say 1 000 000 documents. However, DB size will rise too.
    # (Because we will copy all the doc's indexes into separate specific response for Typehead in fact)
    # Final step is to load all unique suggestion documents that are passed through our filters.
    # (Or if we will build this special index it won't be necessary)
    # (Only if we require parallel keys to be parsed)
    # It can be done by specifying multiple keys that we need to load here. ('key' ws 'keys' *args in CouchDB request)
    """
    # TODO: Can be optimised for huge document's amounts in future (Step: Scalability testing)
    resp = []
    view_name = 'dmscouch/search_autocomplete'
    manager = ParallelKeysManager()
    for mdt in doc_mdts.itervalues():
        mdt_keys = [mdt[u'fields'][mdt_key][u'field_name'] for mdt_key in mdt[u'fields']]
        log.debug('mdt_parallel_keys selected for suggestion MDT-s keys: %s' % mdt_keys)
        if key_name in mdt_keys:
            # Autocomplete key belongs to this MDT
            mdt_docrules = mdt[u'docrule_id']
            if docrule_id:
                # In case of index get Parallel keys from all MDT for docrule
                mdt_fields = manager.get_keys_for_docrule(docrule_id, doc_mdts)
            else:
                # In case of search get only from selected MDT
                mdt_fields = manager.get_parallel_keys_for_mdts(doc_mdts)
            pkeys = manager.get_parallel_keys_for_key(mdt_fields, key_name)
            for docrule in mdt_docrules:
                # Only search through another docrules if response is not full
                if resp.__len__() > suggestions_limit:
                    break
                # db call to search in docs
                if pkeys:
                    # Making no action if not enough letters
                    if autocomplete_req.__len__() > letters_limit:
                        # Suggestion for several parallel keys
                        documents = CouchDocument.view(
                            view_name,
                            startkey=[docrule, key_name, autocomplete_req],
                            endkey=[docrule, key_name, unicode(autocomplete_req)+u'\ufff0'],
                            include_docs=True,
                            reduce=False
                        )
                        # Adding each selected value to suggestions list
                        for doc in documents:
                            # Only append values until we've got 'suggestions_limit' results
                            if resp.__len__() > suggestions_limit:
                                break
                            resp_array = {}
                            if pkeys:
                                for pkey in pkeys:
                                    resp_array[pkey['field_name']] = doc.mdt_indexes[pkey['field_name']]
                            suggestion = json.dumps(resp_array)
                            # filtering from existing results
                            if not suggestion in resp:
                                resp.append(suggestion)
                else:
                    # Simple 'single' key suggestion
                    documents = CouchDocument.view(
                        view_name,
                        startkey=[docrule, key_name, autocomplete_req],
                        endkey=[docrule, key_name, unicode(autocomplete_req)+u'\ufff0'],
                        group=True,
                    )
                    # Fetching unique responses to suggestion set
                    for doc in documents:
                        # Only append values until we've got 'suggestions_limit' results
                        if resp.__len__() > suggestions_limit:
                            break
                        resp_array = {key_name: doc['key'][2]}
                        suggestion = json.dumps(resp_array)
                        if not suggestion in resp:
                            resp.append(suggestion)
    return resp
Example #23
0
def process_pkeys_request(docrule_id, key_name, autocomplete_req, doc_mdts, letters_limit=2, suggestions_limit=8):
    """
    Helper method to process MDT's for special user.

    # We can collect all the documents keys for each docrule in MDT related to requested field and load them into queue.
    # Then check them for duplicated values and/or make a big index with all the document's keys in it
    # to fetch only document indexes we need on first request. (Instead of 'include_docs=True')
    # E.g. Make autocomplete Couch View to output index with all Document's mdt_indexes ONLY.
    #
    # Total amount of requests will be 3 instead of 2 (for 2 docrules <> 1 MDT) but they will be smaller.
    # And that will be good for say 1 000 000 documents. However, DB size will rise too.
    # (Because we will copy all the doc's indexes into separate specific response for Typehead in fact)
    # Final step is to load all unique suggestion documents that are passed through our filters.
    # (Or if we will build this special index it won't be necessary)
    # (Only if we require parallel keys to be parsed)
    # It can be done by specifying multiple keys that we need to load here. ('key' ws 'keys' *args in CouchDB request)
    """
    # TODO: Can be optimised for huge document's amounts in future (Step: Scalability testing)
    resp = []
    view_name = 'dmscouch/search_autocomplete'
    manager = ParallelKeysManager()
    for mdt in doc_mdts.itervalues():
        mdt_keys = [mdt[u'fields'][mdt_key][u'field_name'] for mdt_key in mdt[u'fields']]
        log.debug('mdt_parallel_keys selected for suggestion MDT-s keys: %s' % mdt_keys)
        if key_name in mdt_keys:
            # Autocomplete key belongs to this MDT
            mdt_docrules = mdt[u'docrule_id']
            if docrule_id:
                # In case of index get Parallel keys from all MDT for docrule
                mdt_fields = manager.get_keys_for_docrule(docrule_id, doc_mdts)
            else:
                # In case of search get only from selected MDT
                mdt_fields = manager.get_parallel_keys_for_mdts(doc_mdts)
            pkeys = manager.get_parallel_keys_for_key(mdt_fields, key_name)
            for docrule in mdt_docrules:
                # Only search through another docrules if response is not full
                if resp.__len__() > suggestions_limit:
                    break
                # db call to search in docs
                if pkeys:
                    # Making no action if not enough letters
                    if autocomplete_req.__len__() > letters_limit:
                        # Suggestion for several parallel keys
                        documents = CouchDocument.view(
                            view_name,
                            startkey=[docrule, key_name, autocomplete_req],
                            endkey=[docrule, key_name, unicode(autocomplete_req)+u'\ufff0'],
                            include_docs=True,
                            reduce=False
                        )
                        # Adding each selected value to suggestions list
                        for doc in documents:
                            # Only append values until we've got 'suggestions_limit' results
                            if resp.__len__() > suggestions_limit:
                                break
                            resp_array = {}
                            if pkeys:
                                for pkey in pkeys:
                                    resp_array[pkey['field_name']] = doc.mdt_indexes[pkey['field_name']]
                            suggestion = json.dumps(resp_array)
                            # filtering from existing results
                            if not suggestion in resp:
                                resp.append(suggestion)
                else:
                    # Simple 'single' key suggestion
                    documents = CouchDocument.view(
                        view_name,
                        startkey=[docrule, key_name, autocomplete_req],
                        endkey=[docrule, key_name, unicode(autocomplete_req)+u'\ufff0'],
                        group=True,
                    )
                    # Fetching unique responses to suggestion set
                    for doc in documents:
                        # Only append values until we've got 'suggestions_limit' results
                        if resp.__len__() > suggestions_limit:
                            break
                        resp_array = {key_name: doc['key'][2]}
                        suggestion = json.dumps(resp_array)
                        if not suggestion in resp:
                            resp.append(suggestion)
    return resp
Example #24
0
class CouchDBMetadataWorker(object):
    """Stores metadata in CouchDB DatabaseManager.

    Handles required logic for metadata <==> Document(object) manipulations.
    """
    def store(self, document):
        """Stores CouchDB object into DB.

        (Updates or overwrites CouchDB document)

        @param document: is a DMS Document() instance
        """
        # FIXME: Refactor me. We should upload new "secondary_indexes" or metatags with update() workflow,
        # not a create(), like it is now. Because this method is a mess.
        docrule = document.get_docrule()
        # doing nothing for no docrule documents
        if docrule.uncategorized:
            return document
        else:
            user = self.check_user(document)
            processor = DocumentProcessor()
            # FIXME: there might be more than one mapping
            mapping = docrule.get_docrule_plugin_mappings()
            # doing nothing for documents without mapping has DB plugins
            if not mapping.get_database_storage_plugins():
                return document
            else:
                # if not exists all required metadata getting them from docrule retrieve sequence
                if not document.file_revision_data:
                    # HACK: Preserving db_info here... (May be Solution!!!)
                    db_info = document.get_db_info()
                    document = processor.read(document.file_name,
                                              options={
                                                  'only_metadata': True,
                                                  'user': document.user
                                              })

                    # saving NEW file_revision_data ONLY if they exist in new uploaded doc (Preserving old indexes)
                    if db_info:
                        # Storing new indexes
                        document.set_db_info(db_info)
                    else:
                        # TODO: move this code into a proper place (UPDATE method)
                        # Asking couchdb about if old file_revision_data exists and updating them properly
                        current_revisions = document.file_revision_data
                        try:
                            # Only if document exists in DB. Falling gracefully if not.
                            temp_doc = self.retrieve(document)
                            old_metadata = temp_doc.get_db_info()
                            old_index_revisions = None
                            if old_metadata['mdt_indexes']:
                                # Preserving Description, User, Created Date, indexes revisions
                                if temp_doc.index_revisions:
                                    old_index_revisions = temp_doc.index_revisions
                                old_metadata['mdt_indexes'][
                                    'description'] = old_metadata[
                                        'description']
                                old_metadata['mdt_indexes'][
                                    'metadata_user_name'] = old_metadata[
                                        'metadata_user_name']
                                old_metadata['mdt_indexes'][
                                    'metadata_user_id'] = old_metadata[
                                        'metadata_user_id']
                                old_cr_date = datetime.datetime.strftime(
                                    old_metadata['metadata_created_date'],
                                    settings.DATE_FORMAT)
                                old_metadata['mdt_indexes'][
                                    'date'] = old_cr_date
                                document.set_db_info(
                                    old_metadata['mdt_indexes'])
                                document.set_index_revisions(
                                    old_index_revisions)
                                document.set_file_revisions_data(
                                    current_revisions)
                            else:
                                # Preserving set revisions anyway.
                                document.set_file_revisions_data(
                                    current_revisions)
                        except ResourceNotFound:
                            pass
                # updating tags to sync with Django DB
                self.sync_document_tags(document)
                # assuming no document with this _id exists. SAVING or overwriting existing
                couchdoc = CouchDocument()

                couchdoc.populate_from_dms(user, document)
                couchdoc.save(force_update=True)
                return document

    def update_document_metadata(self, document):
        """Updates document with new indexes and stores old one into another revision.

        @param document: is a DMS Document() instance
        """
        self.check_user(document)
        if 'update_file' in document.options and document.options[
                'update_file']:
            name = document.get_code()
            # We need to create couchdb document in case it does not exists in database.
            couchdoc = CouchDocument.get_or_create(docid=name)
            couchdoc.update_file_revisions_metadata(document)
            couchdoc.save()
        if document.old_docrule:
            old_couchdoc = None
            couchdoc = CouchDocument.get_or_create(docid=document.file_name)
            try:
                old_couchdoc = CouchDocument.get(docid=document.old_name_code)
            except Exception, e:
                log.error('%s' % e)
                pass
            if old_couchdoc:
                # Migrate from existing CouchDB document
                couchdoc.migrate_metadata_for_docrule(document, old_couchdoc)
                couchdoc.save()
                old_couchdoc.delete()
            else:
                # store from current Document() instance
                user = document.user
                couchdoc.populate_from_dms(user, document)
                couchdoc.save()
        # We have to do it after moving document names.
        if document.new_indexes and document.file_name:
            couchdoc = CouchDocument.get(docid=document.file_name)
            couchdoc.update_indexes_revision(document)
            couchdoc.save()
            document = couchdoc.populate_into_dms(document)
        return document
Example #25
0
    def store(self, document):
        """Stores CouchDB object into DB.

        (Updates or overwrites CouchDB document)

        @param document: is a DMS Document() instance
        """
        # FIXME: Refactor me. We should upload new "secondary_indexes" or metatags with update() workflow,
        # not a create(), like it is now. Because this method is a mess.
        docrule = document.get_docrule()
        # doing nothing for no docrule documents
        if docrule.uncategorized:
            return document
        else:
            user = self.check_user(document)
            processor = DocumentProcessor()
            # FIXME: there might be more than one mapping
            mapping = docrule.get_docrule_plugin_mappings()
            # doing nothing for documents without mapping has DB plugins
            if not mapping.get_database_storage_plugins():
                return document
            else:
                # if not exists all required metadata getting them from docrule retrieve sequence
                if not document.file_revision_data:
                    # HACK: Preserving db_info here... (May be Solution!!!)
                    db_info = document.get_db_info()
                    document = processor.read(document.file_name,
                                              options={
                                                  'only_metadata': True,
                                                  'user': document.user
                                              })

                    # saving NEW file_revision_data ONLY if they exist in new uploaded doc (Preserving old indexes)
                    if db_info:
                        # Storing new indexes
                        document.set_db_info(db_info)
                    else:
                        # TODO: move this code into a proper place (UPDATE method)
                        # Asking couchdb about if old file_revision_data exists and updating them properly
                        current_revisions = document.file_revision_data
                        try:
                            # Only if document exists in DB. Falling gracefully if not.
                            temp_doc = self.retrieve(document)
                            old_metadata = temp_doc.get_db_info()
                            old_index_revisions = None
                            if old_metadata['mdt_indexes']:
                                # Preserving Description, User, Created Date, indexes revisions
                                if temp_doc.index_revisions:
                                    old_index_revisions = temp_doc.index_revisions
                                old_metadata['mdt_indexes'][
                                    'description'] = old_metadata[
                                        'description']
                                old_metadata['mdt_indexes'][
                                    'metadata_user_name'] = old_metadata[
                                        'metadata_user_name']
                                old_metadata['mdt_indexes'][
                                    'metadata_user_id'] = old_metadata[
                                        'metadata_user_id']
                                old_cr_date = datetime.datetime.strftime(
                                    old_metadata['metadata_created_date'],
                                    settings.DATE_FORMAT)
                                old_metadata['mdt_indexes'][
                                    'date'] = old_cr_date
                                document.set_db_info(
                                    old_metadata['mdt_indexes'])
                                document.set_index_revisions(
                                    old_index_revisions)
                                document.set_file_revisions_data(
                                    current_revisions)
                            else:
                                # Preserving set revisions anyway.
                                document.set_file_revisions_data(
                                    current_revisions)
                        except ResourceNotFound:
                            pass
                # updating tags to sync with Django DB
                self.sync_document_tags(document)
                # assuming no document with this _id exists. SAVING or overwriting existing
                couchdoc = CouchDocument()

                couchdoc.populate_from_dms(user, document)
                couchdoc.save(force_update=True)
                return document
Example #26
0
 def get_revisions(self):
     """ Returns list of doc's deleted revisions in list of tuples (code, revision)
     e.g.: [('ADL-0001', '1'), ('BBB-0001', '3'), ... ] """
     deleted_revisions = CouchDocument.view('dmscouch/deleted_files_revisions')
     results = [(doc.get_id, doc['deleted_revision']) for doc in deleted_revisions]
     return results