def retrieve(self, document): """Read document CouchDB metadata. @param document: is a DMS Document() instance """ docrule = document.get_docrule() mapping = docrule.get_docrule_plugin_mappings() # No actions for no docrule documents # No actions for documents without 'mapping has DB plugins' if document.get_docrule().uncategorized: return document else: if not mapping.get_database_storage_plugins(): return document else: self.check_user(document) doc_name = document.get_code() couchdoc = CouchDocument() try: couchdoc = CouchDocument.get(docid=doc_name) except Exception, e: # Skip deleted errors (they are not used in DMS) e_message = str(e) if not e_message in ['deleted', 'missing']: raise PluginError('CouchDB error: %s' % e, e) pass document = couchdoc.populate_into_dms(document) return document
def update_document_metadata(self, document): """Updates document with new indexes and stores old one into another revision. @param document: is a DMS Document() instance """ self.check_user(document) if 'update_file' in document.options and document.options[ 'update_file']: name = document.get_code() # We need to create couchdb document in case it does not exists in database. couchdoc = CouchDocument.get_or_create(docid=name) couchdoc.update_file_revisions_metadata(document) couchdoc.save() if document.old_docrule: old_couchdoc = None couchdoc = CouchDocument.get_or_create(docid=document.file_name) try: old_couchdoc = CouchDocument.get(docid=document.old_name_code) except Exception, e: log.error('%s' % e) pass if old_couchdoc: # Migrate from existing CouchDB document couchdoc.migrate_metadata_for_docrule(document, old_couchdoc) couchdoc.save() old_couchdoc.delete() else: # store from current Document() instance user = document.user couchdoc.populate_from_dms(user, document) couchdoc.save()
def update_document_metadata(self, document): """Updates document with new indexes and stores old one into another revision. @param document: is a DMS Document() instance """ self.check_user(document) if 'update_file' in document.options and document.options['update_file']: name = document.get_code() # We need to create couchdb document in case it does not exists in database. couchdoc = CouchDocument.get_or_create(docid=name) couchdoc.update_file_revisions_metadata(document) couchdoc.save() if document.old_docrule: old_couchdoc = None couchdoc = CouchDocument.get_or_create(docid=document.file_name) try: old_couchdoc = CouchDocument.get(docid=document.old_name_code) except Exception, e: log.error('%s' % e) pass if old_couchdoc: # Migrate from existing CouchDB document couchdoc.migrate_metadata_for_docrule(document, old_couchdoc) couchdoc.save() old_couchdoc.delete() else: # store from current Document() instance user = document.user couchdoc.populate_from_dms(user, document) couchdoc.save()
def remove(self, document): """Updates document CouchDB metadata on removal. (Removes CouchDB document or acts as prescribed in removal workflows) @param document: is a DMS Document() instance """ # Doing nothing for mark deleted call code = document.get_code() couchdoc = CouchDocument.get(docid=code) if 'mark_deleted' in document.options.iterkeys(): couchdoc['deleted'] = 'deleted' couchdoc.save() return document if 'mark_revision_deleted' in document.options.iterkeys(): mark_revision = document.options['mark_revision_deleted'] if mark_revision in couchdoc.revisions.iterkeys(): couchdoc.revisions[mark_revision]['deleted'] = True else: raise PluginError('Object has no revision: %s' % mark_revision, 404) couchdoc.save() return document if 'delete_revision' in document.options.iterkeys(): revision = document.options['delete_revision'] del couchdoc.revisions[revision] couchdoc.save() return document if not document.get_file_obj(): #doc is fully deleted from fs couchdoc.delete() return document
def document_date_range_only_search(self, cleaned_document_keys, docrule_ids): log.debug('Date range search only') resp_list = [] startkey = [None,] endkey = [None,] for docrule_id in docrule_ids: startkey = [docrule_id, str_date_to_couch(cleaned_document_keys["date"])] endkey = [docrule_id, str_date_to_couch(cleaned_document_keys["end_date"])] # Getting all documents withing this date range all_docs = CouchDocument.view( 'dmscouch/search_date', classes={None: CouchDocument}, startkey=startkey, endkey=endkey ) # Appending to fetch docs list if not already there for doc in all_docs: doc_name = doc.get_id if not doc_name in resp_list: resp_list.append(doc_name) if resp_list: log_data = resp_list.__len__() else: log_data = None log.debug( 'Search results by date range: from: "%s", to: "%s", docrules: "%s", documents: "%s"' % (startkey[0], endkey[0], docrule_ids, log_data) ) return resp_list
def get_found_documents(self, document_names_list): """ Method to retrieve documents index data by document names list. @param document_names_list: list of document id's, e.g. ['DOC0001', 'MAS0001', '...' ] @return: CouchDB documents list. """ documents = CouchDocument.view('dmscouch/all', keys=document_names_list, include_docs=True) return documents
def check_docs_for_existence(key, value, docrule): """Check if at least one document with specified docrule, key and value exist""" documents = CouchDocument.view('dmscouch/search_autocomplete', key=[str(docrule), key, value], reduce=False) if documents.__len__() > 0: # There is at least one this type document... return True else: # No such documents return False
def document_date_range_with_keys_search(self, cleaned_document_keys, docrule_ids): log.debug('Date range search with additional keys specified') resp_set = {} docs_list = {} # For each docrule user search is requested for docrule_id in docrule_ids: # Getting list of date range filtered docs for each provided secondary key # Except for internal keys for key, value in cleaned_document_keys.iteritems(): if not (key == 'date') and not (key == 'end_date'): if not value.__class__.__name__ == 'tuple': # Normal search startkey = self.convert_to_search_keys_for_date_range( cleaned_document_keys, key, docrule_id) endkey = self.convert_to_search_keys_for_date_range( cleaned_document_keys, key, docrule_id, end=True) else: # Got date range key startkey = self.convert_to_search_keys_for_date_range( cleaned_document_keys, key, docrule_id, date_range=True) endkey = self.convert_to_search_keys_for_date_range( cleaned_document_keys, key, docrule_id, end=True, date_range=True) if startkey and endkey: # Appending results list to mixed set of results. search_res = CouchDocument.view( 'dmscouch/search', classes={None: CouchDocument}, startkey=startkey, endkey=endkey) if docrule_id in resp_set.iterkeys(): resp_set[docrule_id].append(search_res) else: resp_set[docrule_id] = [search_res] # Extracting documents for each CouchDB response set. docs_list[docrule_id] = self.convert_search_res_for_range( resp_set, cleaned_document_keys, docrule_id) # Listing all documents to retrieve and getting them retrieve_docs = [] for d_list in docs_list.itervalues(): if d_list: for item in d_list: retrieve_docs.append(item) log.debug( 'Search results by date range with additional keys: "%s", docrule: "%s", documents: "%s"' % (cleaned_document_keys, docrule_ids, map(lambda doc: doc, retrieve_docs))) return retrieve_docs
def check_for_secondary_keys_pairs(input_keys_list, docrule_id): """Checks for parallel keys pairs if they already exist in Secondary Keys. Scenario: Existing Parallell key: JOHN 1234 user enters MIKE 1234 where MIKE already exists in combination with another numeric id we should still issue a warning. EG. The combination of key values is new! (even though no new keys have been created) """ # Copying dictionary data and operating with them in function sec_keys_list = {} suspicious_keys_list = {} if input_keys_list: for key in input_keys_list.iterkeys(): sec_keys_list[key] = input_keys_list[key] suspicious_keys_list = {} p_keys_manager = ParallelKeysManager() mdt_manager = MetaDataTemplateManager() keys_list = [key for key in sec_keys_list.iterkeys()] # Cleaning from not secondary keys for key in keys_list: if key == 'date' or key == 'description': del sec_keys_list[key] # Getting list of parallel keys for this docrule. mdts = mdt_manager.get_mdts_for_docrule(docrule_id) pkeys = p_keys_manager.get_parallel_keys_for_mdts(mdts) # Getting Pkeys lists. checked_keys = [] for key in sec_keys_list.iterkeys(): key_pkeys = p_keys_manager.get_parallel_keys_for_key(pkeys, key) pkeys_with_values = p_keys_manager.get_parallel_keys_for_pkeys( key_pkeys, sec_keys_list) # Checking if this parallel keys group already was checked. if not pkeys_with_values in checked_keys: checked_keys.append(pkeys_with_values) # Getting all keys for parallel key to check if it exists in any document metadata already. for pkey, pvalue in pkeys_with_values: documents = CouchDocument.view('dmscouch/search_autocomplete', key=[docrule_id, pkey, pvalue], reduce=False) # Appending non existing keys into list to be checked. if not documents: suspicious_keys_list[pkey] = pvalue if suspicious_keys_list: log.debug('Found new unique key/values in secondary keys: ', suspicious_keys_list) else: log.debug('Found NO new unique key/values in secondary keys') return suspicious_keys_list
def get_sorting_docs_indexes(self, document_list): """ Retrieves main document indexes from CouchDB view. @param document_list: list of document names, e.g. ['ADL-0001', 'CCC-0001', ... ] @return: CouchDB documents "view results" Object. each document contains: "mdt_indexes" "metadata_created_date" "metadata_description" "metadata_doc_type_rule_id" """ documents = CouchDocument.view('dmscouch/search_main_indexes', keys=document_list) return documents
def check_for_secondary_keys_pairs(input_keys_list, docrule_id): """Checks for parallel keys pairs if they already exist in Secondary Keys. Scenario: Existing Parallell key: JOHN 1234 user enters MIKE 1234 where MIKE already exists in combination with another numeric id we should still issue a warning. EG. The combination of key values is new! (even though no new keys have been created) """ # Copying dictionary data and operating with them in function sec_keys_list = {} suspicious_keys_list = {} if input_keys_list: for key in input_keys_list.iterkeys(): sec_keys_list[key] = input_keys_list[key] suspicious_keys_list = {} p_keys_manager = ParallelKeysManager() mdt_manager = MetaDataTemplateManager() keys_list = [key for key in sec_keys_list.iterkeys()] # Cleaning from not secondary keys for key in keys_list: if key == 'date' or key == 'description': del sec_keys_list[key] # Getting list of parallel keys for this docrule. mdts = mdt_manager.get_mdts_for_docrule(docrule_id) pkeys = p_keys_manager.get_parallel_keys_for_mdts(mdts) # Getting Pkeys lists. checked_keys = [] for key in sec_keys_list.iterkeys(): key_pkeys = p_keys_manager.get_parallel_keys_for_key(pkeys, key) pkeys_with_values = p_keys_manager.get_parallel_keys_for_pkeys(key_pkeys, sec_keys_list) # Checking if this parallel keys group already was checked. if not pkeys_with_values in checked_keys: checked_keys.append(pkeys_with_values) # Getting all keys for parallel key to check if it exists in any document metadata already. for pkey, pvalue in pkeys_with_values: documents = CouchDocument.view('dmscouch/search_autocomplete', key=[docrule_id, pkey, pvalue], reduce=False) # Appending non existing keys into list to be checked. if not documents: suspicious_keys_list[pkey] = pvalue if suspicious_keys_list: log.debug('Found new unique key/values in secondary keys: ', suspicious_keys_list) else: log.debug('Found NO new unique key/values in secondary keys') return suspicious_keys_list
def get_found_documents(self, document_names_list): """ Method to retrieve documents index data by document names list. @param document_names_list: list of document id's, e.g. ['DOC0001', 'MAS0001', '...' ] @return: CouchDB documents list. """ documents = CouchDocument.view('dmscouch/all', keys=document_names_list, include_docs=True) # Converting documents to omit couchdb ViewResults iteration bug results = [] for doc in documents: results.append(doc) return results
def get_sorting_docs_indexes(self, document_list): """ Retrieves main document indexes from CouchDB view. @param document_list: list of document names, e.g. ['ADL-0001', 'CCC-0001', ... ] @return: CouchDB documents "view results" Object. each document contains: "mdt_indexes" "metadata_created_date" "metadata_description" "metadata_doc_type_rule_id" """ documents = CouchDocument.view('dmscouch/search_main_indexes', classes={None: CouchDocument}, keys=document_list) return documents
def get_found_documents(self, document_names_list): """ Method to retrieve documents index data by document names list. @param document_names_list: list of document id's, e.g. ['DOC0001', 'MAS0001', '...' ] @return: CouchDB documents list. """ documents = CouchDocument.view( 'dmscouch/all', keys=document_names_list, include_docs=True) # Converting documents to omit couchdb ViewResults iteration bug results = [] for doc in documents: results.append(doc) return results
def document_date_range_with_keys_search(self, cleaned_document_keys, docrule_ids): log.debug('Date range search with additional keys specified') resp_set = {} docs_list = {} # For each docrule user search is requested for docrule_id in docrule_ids: # Getting list of date range filtered docs for each provided secondary key # Except for internal keys for key, value in cleaned_document_keys.iteritems(): if not (key == 'date') and not (key == 'end_date'): if not value.__class__.__name__ == 'tuple': # Normal search startkey = self.convert_to_search_keys_for_date_range(cleaned_document_keys, key, docrule_id) endkey = self.convert_to_search_keys_for_date_range(cleaned_document_keys, key, docrule_id, end=True) else: # Got date range key startkey = self.convert_to_search_keys_for_date_range(cleaned_document_keys, key, docrule_id, date_range=True) endkey = self.convert_to_search_keys_for_date_range(cleaned_document_keys, key, docrule_id, end=True, date_range=True) if startkey and endkey: # Appending results list to mixed set of results. search_res = CouchDocument.view( 'dmscouch/search', classes={None: CouchDocument}, startkey=startkey, endkey=endkey ) if docrule_id in resp_set.iterkeys(): resp_set[docrule_id].append(search_res) else: resp_set[docrule_id] = [search_res] # Extracting documents for each CouchDB response set. docs_list[docrule_id] = self.convert_search_res_for_range(resp_set, cleaned_document_keys, docrule_id) # Listing all documents to retrieve and getting them retrieve_docs = [] for d_list in docs_list.itervalues(): if d_list: for item in d_list: retrieve_docs.append(item) log.debug( 'Search results by date range with additional keys: "%s", docrule: "%s", documents: "%s"' % (cleaned_document_keys, docrule_ids, map(lambda doc: doc, retrieve_docs)) ) return retrieve_docs
def document_date_range_only_search(self, cleaned_document_keys, docrule_ids): log.debug('Date range search only') resp_list = [] startkey = [ None, ] endkey = [ None, ] for docrule_id in docrule_ids: startkey = [ docrule_id, str_date_to_couch(cleaned_document_keys["date"]) ] endkey = [ docrule_id, str_date_to_couch(cleaned_document_keys["end_date"]) ] # Getting all documents withing this date range all_docs = CouchDocument.view('dmscouch/search_date', classes={None: CouchDocument}, startkey=startkey, endkey=endkey) # Appending to fetch docs list if not already there for doc in all_docs: doc_name = doc.get_id if not doc_name in resp_list: resp_list.append(doc_name) if resp_list: log_data = resp_list.__len__() else: log_data = None log.debug( 'Search results by date range: from: "%s", to: "%s", docrules: "%s", documents: "%s"' % (startkey[0], endkey[0], docrule_ids, log_data)) return resp_list
def get_codes(self): deleted_codes = CouchDocument.view('dmscouch/deleted') codes = [doc.get_id for doc in deleted_codes] return codes
def store(self, document): """Stores CouchDB object into DB. (Updates or overwrites CouchDB document) @param document: is a DMS Document() instance """ # FIXME: Refactor me. We should upload new "secondary_indexes" or metatags with update() workflow, # not a create(), like it is now. Because this method is a mess. docrule = document.get_docrule() # doing nothing for no docrule documents if docrule.uncategorized: return document else: user = self.check_user(document) processor = DocumentProcessor() # FIXME: there might be more than one mapping mapping = docrule.get_docrule_plugin_mappings() # doing nothing for documents without mapping has DB plugins if not mapping.get_database_storage_plugins(): return document else: # if not exists all required metadata getting them from docrule retrieve sequence if not document.file_revision_data: # HACK: Preserving db_info here... (May be Solution!!!) db_info = document.get_db_info() document = processor.read(document.file_name, options={ 'only_metadata': True, 'user': document.user }) # saving NEW file_revision_data ONLY if they exist in new uploaded doc (Preserving old indexes) if db_info: # Storing new indexes document.set_db_info(db_info) else: # TODO: move this code into a proper place (UPDATE method) # Asking couchdb about if old file_revision_data exists and updating them properly current_revisions = document.file_revision_data try: # Only if document exists in DB. Falling gracefully if not. temp_doc = self.retrieve(document) old_metadata = temp_doc.get_db_info() old_index_revisions = None if old_metadata['mdt_indexes']: # Preserving Description, User, Created Date, indexes revisions if temp_doc.index_revisions: old_index_revisions = temp_doc.index_revisions old_metadata['mdt_indexes']['description'] = old_metadata['description'] old_metadata['mdt_indexes']['metadata_user_name'] = old_metadata['metadata_user_name'] old_metadata['mdt_indexes']['metadata_user_id'] = old_metadata['metadata_user_id'] old_cr_date = datetime.datetime.strftime( old_metadata['metadata_created_date'], settings.DATE_FORMAT ) old_metadata['mdt_indexes']['date'] = old_cr_date document.set_db_info(old_metadata['mdt_indexes']) document.set_index_revisions(old_index_revisions) document.set_file_revisions_data(current_revisions) else: # Preserving set revisions anyway. document.set_file_revisions_data(current_revisions) except ResourceNotFound: pass # updating tags to sync with Django DB self.sync_document_tags(document) # assuming no document with this _id exists. SAVING or overwriting existing couchdoc = CouchDocument() couchdoc.populate_from_dms(user, document) couchdoc.save(force_update=True) return document
def process_pkeys_request(docrule_id, key_name, autocomplete_req, doc_mdts, letters_limit=2, suggestions_limit=8): """Helper method to process MDT's for special user. # We can collect all the documents keys for each docrule in MDT related to requested field and load them into queue. # Then check them for duplicated values and/or make a big index with all the document's keys in it # to fetch only document indexes we need on first request. (Instead of 'include_docs=True') # E.g. Make autocomplete Couch View to output index with all Document's mdt_indexes ONLY. # # Total amount of requests will be 3 instead of 2 (for 2 docrules <> 1 MDT) but they will be smaller. # And that will be good for say 1 000 000 documents. However, DB size will rise too. # (Because we will copy all the doc's indexes into separate specific response for Typehead in fact) # Final step is to load all unique suggestion documents that are passed through our filters. # (Or if we will build this special index it won't be necessary) # (Only if we require parallel keys to be parsed) # It can be done by specifying multiple keys that we need to load here. ('key' ws 'keys' *args in CouchDB request) """ # TODO: Can be optimised for huge document's amounts in future (Step: Scalability testing) resp = [] view_name = 'dmscouch/search_autocomplete' manager = ParallelKeysManager() for mdt in doc_mdts.itervalues(): mdt_keys = [mdt[u'fields'][mdt_key][u'field_name'] for mdt_key in mdt[u'fields']] log.debug('mdt_parallel_keys selected for suggestion MDT-s keys: %s' % mdt_keys) if key_name in mdt_keys: # Autocomplete key belongs to this MDT mdt_docrules = mdt[u'docrule_id'] if docrule_id: # In case of index get Parallel keys from all MDT for docrule mdt_fields = manager.get_keys_for_docrule(docrule_id, doc_mdts) else: # In case of search get only from selected MDT mdt_fields = manager.get_parallel_keys_for_mdts(doc_mdts) pkeys = manager.get_parallel_keys_for_key(mdt_fields, key_name) for docrule in mdt_docrules: # Only search through another docrules if response is not full if resp.__len__() > suggestions_limit: break # db call to search in docs if pkeys: # Making no action if not enough letters if autocomplete_req.__len__() > letters_limit: # Suggestion for several parallel keys documents = CouchDocument.view( view_name, startkey=[docrule, key_name, autocomplete_req], endkey=[docrule, key_name, unicode(autocomplete_req)+u'\ufff0'], include_docs=True, reduce=False ) # Adding each selected value to suggestions list for doc in documents: # Only append values until we've got 'suggestions_limit' results if resp.__len__() > suggestions_limit: break resp_array = {} if pkeys: for pkey in pkeys: resp_array[pkey['field_name']] = doc.mdt_indexes[pkey['field_name']] suggestion = json.dumps(resp_array) # filtering from existing results if not suggestion in resp: resp.append(suggestion) else: # Simple 'single' key suggestion documents = CouchDocument.view( view_name, startkey=[docrule, key_name, autocomplete_req], endkey=[docrule, key_name, unicode(autocomplete_req)+u'\ufff0'], group=True, ) # Fetching unique responses to suggestion set for doc in documents: # Only append values until we've got 'suggestions_limit' results if resp.__len__() > suggestions_limit: break resp_array = {key_name: doc['key'][2]} suggestion = json.dumps(resp_array) if not suggestion in resp: resp.append(suggestion) return resp
def process_pkeys_request(docrule_id, key_name, autocomplete_req, doc_mdts, letters_limit=2, suggestions_limit=8): """ Helper method to process MDT's for special user. # We can collect all the documents keys for each docrule in MDT related to requested field and load them into queue. # Then check them for duplicated values and/or make a big index with all the document's keys in it # to fetch only document indexes we need on first request. (Instead of 'include_docs=True') # E.g. Make autocomplete Couch View to output index with all Document's mdt_indexes ONLY. # # Total amount of requests will be 3 instead of 2 (for 2 docrules <> 1 MDT) but they will be smaller. # And that will be good for say 1 000 000 documents. However, DB size will rise too. # (Because we will copy all the doc's indexes into separate specific response for Typehead in fact) # Final step is to load all unique suggestion documents that are passed through our filters. # (Or if we will build this special index it won't be necessary) # (Only if we require parallel keys to be parsed) # It can be done by specifying multiple keys that we need to load here. ('key' ws 'keys' *args in CouchDB request) """ # TODO: Can be optimised for huge document's amounts in future (Step: Scalability testing) resp = [] view_name = 'dmscouch/search_autocomplete' manager = ParallelKeysManager() for mdt in doc_mdts.itervalues(): mdt_keys = [mdt[u'fields'][mdt_key][u'field_name'] for mdt_key in mdt[u'fields']] log.debug('mdt_parallel_keys selected for suggestion MDT-s keys: %s' % mdt_keys) if key_name in mdt_keys: # Autocomplete key belongs to this MDT mdt_docrules = mdt[u'docrule_id'] if docrule_id: # In case of index get Parallel keys from all MDT for docrule mdt_fields = manager.get_keys_for_docrule(docrule_id, doc_mdts) else: # In case of search get only from selected MDT mdt_fields = manager.get_parallel_keys_for_mdts(doc_mdts) pkeys = manager.get_parallel_keys_for_key(mdt_fields, key_name) for docrule in mdt_docrules: # Only search through another docrules if response is not full if resp.__len__() > suggestions_limit: break # db call to search in docs if pkeys: # Making no action if not enough letters if autocomplete_req.__len__() > letters_limit: # Suggestion for several parallel keys documents = CouchDocument.view( view_name, startkey=[docrule, key_name, autocomplete_req], endkey=[docrule, key_name, unicode(autocomplete_req)+u'\ufff0'], include_docs=True, reduce=False ) # Adding each selected value to suggestions list for doc in documents: # Only append values until we've got 'suggestions_limit' results if resp.__len__() > suggestions_limit: break resp_array = {} if pkeys: for pkey in pkeys: resp_array[pkey['field_name']] = doc.mdt_indexes[pkey['field_name']] suggestion = json.dumps(resp_array) # filtering from existing results if not suggestion in resp: resp.append(suggestion) else: # Simple 'single' key suggestion documents = CouchDocument.view( view_name, startkey=[docrule, key_name, autocomplete_req], endkey=[docrule, key_name, unicode(autocomplete_req)+u'\ufff0'], group=True, ) # Fetching unique responses to suggestion set for doc in documents: # Only append values until we've got 'suggestions_limit' results if resp.__len__() > suggestions_limit: break resp_array = {key_name: doc['key'][2]} suggestion = json.dumps(resp_array) if not suggestion in resp: resp.append(suggestion) return resp
class CouchDBMetadataWorker(object): """Stores metadata in CouchDB DatabaseManager. Handles required logic for metadata <==> Document(object) manipulations. """ def store(self, document): """Stores CouchDB object into DB. (Updates or overwrites CouchDB document) @param document: is a DMS Document() instance """ # FIXME: Refactor me. We should upload new "secondary_indexes" or metatags with update() workflow, # not a create(), like it is now. Because this method is a mess. docrule = document.get_docrule() # doing nothing for no docrule documents if docrule.uncategorized: return document else: user = self.check_user(document) processor = DocumentProcessor() # FIXME: there might be more than one mapping mapping = docrule.get_docrule_plugin_mappings() # doing nothing for documents without mapping has DB plugins if not mapping.get_database_storage_plugins(): return document else: # if not exists all required metadata getting them from docrule retrieve sequence if not document.file_revision_data: # HACK: Preserving db_info here... (May be Solution!!!) db_info = document.get_db_info() document = processor.read(document.file_name, options={ 'only_metadata': True, 'user': document.user }) # saving NEW file_revision_data ONLY if they exist in new uploaded doc (Preserving old indexes) if db_info: # Storing new indexes document.set_db_info(db_info) else: # TODO: move this code into a proper place (UPDATE method) # Asking couchdb about if old file_revision_data exists and updating them properly current_revisions = document.file_revision_data try: # Only if document exists in DB. Falling gracefully if not. temp_doc = self.retrieve(document) old_metadata = temp_doc.get_db_info() old_index_revisions = None if old_metadata['mdt_indexes']: # Preserving Description, User, Created Date, indexes revisions if temp_doc.index_revisions: old_index_revisions = temp_doc.index_revisions old_metadata['mdt_indexes'][ 'description'] = old_metadata[ 'description'] old_metadata['mdt_indexes'][ 'metadata_user_name'] = old_metadata[ 'metadata_user_name'] old_metadata['mdt_indexes'][ 'metadata_user_id'] = old_metadata[ 'metadata_user_id'] old_cr_date = datetime.datetime.strftime( old_metadata['metadata_created_date'], settings.DATE_FORMAT) old_metadata['mdt_indexes'][ 'date'] = old_cr_date document.set_db_info( old_metadata['mdt_indexes']) document.set_index_revisions( old_index_revisions) document.set_file_revisions_data( current_revisions) else: # Preserving set revisions anyway. document.set_file_revisions_data( current_revisions) except ResourceNotFound: pass # updating tags to sync with Django DB self.sync_document_tags(document) # assuming no document with this _id exists. SAVING or overwriting existing couchdoc = CouchDocument() couchdoc.populate_from_dms(user, document) couchdoc.save(force_update=True) return document def update_document_metadata(self, document): """Updates document with new indexes and stores old one into another revision. @param document: is a DMS Document() instance """ self.check_user(document) if 'update_file' in document.options and document.options[ 'update_file']: name = document.get_code() # We need to create couchdb document in case it does not exists in database. couchdoc = CouchDocument.get_or_create(docid=name) couchdoc.update_file_revisions_metadata(document) couchdoc.save() if document.old_docrule: old_couchdoc = None couchdoc = CouchDocument.get_or_create(docid=document.file_name) try: old_couchdoc = CouchDocument.get(docid=document.old_name_code) except Exception, e: log.error('%s' % e) pass if old_couchdoc: # Migrate from existing CouchDB document couchdoc.migrate_metadata_for_docrule(document, old_couchdoc) couchdoc.save() old_couchdoc.delete() else: # store from current Document() instance user = document.user couchdoc.populate_from_dms(user, document) couchdoc.save() # We have to do it after moving document names. if document.new_indexes and document.file_name: couchdoc = CouchDocument.get(docid=document.file_name) couchdoc.update_indexes_revision(document) couchdoc.save() document = couchdoc.populate_into_dms(document) return document
def store(self, document): """Stores CouchDB object into DB. (Updates or overwrites CouchDB document) @param document: is a DMS Document() instance """ # FIXME: Refactor me. We should upload new "secondary_indexes" or metatags with update() workflow, # not a create(), like it is now. Because this method is a mess. docrule = document.get_docrule() # doing nothing for no docrule documents if docrule.uncategorized: return document else: user = self.check_user(document) processor = DocumentProcessor() # FIXME: there might be more than one mapping mapping = docrule.get_docrule_plugin_mappings() # doing nothing for documents without mapping has DB plugins if not mapping.get_database_storage_plugins(): return document else: # if not exists all required metadata getting them from docrule retrieve sequence if not document.file_revision_data: # HACK: Preserving db_info here... (May be Solution!!!) db_info = document.get_db_info() document = processor.read(document.file_name, options={ 'only_metadata': True, 'user': document.user }) # saving NEW file_revision_data ONLY if they exist in new uploaded doc (Preserving old indexes) if db_info: # Storing new indexes document.set_db_info(db_info) else: # TODO: move this code into a proper place (UPDATE method) # Asking couchdb about if old file_revision_data exists and updating them properly current_revisions = document.file_revision_data try: # Only if document exists in DB. Falling gracefully if not. temp_doc = self.retrieve(document) old_metadata = temp_doc.get_db_info() old_index_revisions = None if old_metadata['mdt_indexes']: # Preserving Description, User, Created Date, indexes revisions if temp_doc.index_revisions: old_index_revisions = temp_doc.index_revisions old_metadata['mdt_indexes'][ 'description'] = old_metadata[ 'description'] old_metadata['mdt_indexes'][ 'metadata_user_name'] = old_metadata[ 'metadata_user_name'] old_metadata['mdt_indexes'][ 'metadata_user_id'] = old_metadata[ 'metadata_user_id'] old_cr_date = datetime.datetime.strftime( old_metadata['metadata_created_date'], settings.DATE_FORMAT) old_metadata['mdt_indexes'][ 'date'] = old_cr_date document.set_db_info( old_metadata['mdt_indexes']) document.set_index_revisions( old_index_revisions) document.set_file_revisions_data( current_revisions) else: # Preserving set revisions anyway. document.set_file_revisions_data( current_revisions) except ResourceNotFound: pass # updating tags to sync with Django DB self.sync_document_tags(document) # assuming no document with this _id exists. SAVING or overwriting existing couchdoc = CouchDocument() couchdoc.populate_from_dms(user, document) couchdoc.save(force_update=True) return document
def get_revisions(self): """ Returns list of doc's deleted revisions in list of tuples (code, revision) e.g.: [('ADL-0001', '1'), ('BBB-0001', '3'), ... ] """ deleted_revisions = CouchDocument.view('dmscouch/deleted_files_revisions') results = [(doc.get_id, doc['deleted_revision']) for doc in deleted_revisions] return results