def handle(self, *args, **options): history_manager = HistoryManager() rcs_obj = RCS() collection = get_database()[Triple.collection_name] cur = collection.Triple.find({'_type': 'GAttribute'}) for n in cur: if type(n['attribute_type']) == ObjectId: attr_type = collection.Node.one({'_id': n['attribute_type']}) if attr_type: collection.update({'_id': n['_id']}, { '$set': { 'attribute_type': { "$ref": attr_type.collection_name, "$id": attr_type._id, "$db": attr_type.db.name } } }) else: collection.remove({'_id': n['_id']}) subject_doc = collection.Node.one({'_id': n.subject}) n.name = subject_doc.name + " -- " + n.attribute_type[ 'name'] + " -- " + n.object_value # Creates a history (version-file) for GAttribute documents if history_manager.create_or_replace_json_file(n): fp = history_manager.get_file_path(n) message = "This document (" + n.name + ") is created on " + subject_doc.created_at.strftime( "%d %B %Y") rcs_obj.checkin(fp, 1, message.encode('utf-8'), "-i")
def handle(self, *args, **options): history_manager = HistoryManager() rcs_obj = RCS() collection = get_database()[Triple.collection_name] cur = collection.Triple.find({"_type": "GAttribute"}) for n in cur: if type(n["attribute_type"]) == ObjectId: attr_type = collection.Node.one({"_id": n["attribute_type"]}) if attr_type: collection.update( {"_id": n["_id"]}, { "$set": { "attribute_type": { "$ref": attr_type.collection_name, "$id": attr_type._id, "$db": attr_type.db.name, } } }, ) else: collection.remove({"_id": n["_id"]}) subject_doc = collection.Node.one({"_id": n.subject}) n.name = subject_doc.name + " -- " + n.attribute_type["name"] + " -- " + n.object_value # Creates a history (version-file) for GAttribute documents if history_manager.create_or_replace_json_file(n): fp = history_manager.get_file_path(n) message = "This document (" + n.name + ") is created on " + subject_doc.created_at.strftime("%d %B %Y") rcs_obj.checkin(fp, 1, message.encode("utf-8"), "-i")
def handle(self, *args, **options): try: triple_collection_name = Triple.collection_name node_collection_name = Node.collection_name if triple_collection_name not in db.collection_names(): try: # [A] Create Triples collection info_message = "\n\n Creating new collection named as \"" + triple_collection_name + "\"..." print info_message log_list.append(info_message) db.create_collection(triple_collection_name) info_message = "\n\tCollection (" + triple_collection_name + ") created successfully." print info_message log_list.append(info_message) info_message = "\n===================================================================================================" print info_message log_list.append(info_message) except Exception as e: error_message = "\n Collection (" + triple_collection_name + ") NOT created as following error occurred: " + str(e) print error_message log_list.append(error_message) return # Fetch "Nodes" collection node_collection = db[node_collection_name].Node # Fetch newly created "Triples" collection triple_collection = db[triple_collection_name].Triple info_message = "\n\n Before shifting document(s) from " + node_collection_name + " collection into " + triple_collection_name + " collection: " print info_message gattribute_cur = node_collection.find({"_type": "GAttribute"}) gattribute_cur_count = gattribute_cur.count() info_message = "\n\n\tNo. of GAttribute node(s) found in " + node_collection_name + " collection: " + str(gattribute_cur_count) print info_message log_list.append(info_message) grelation_cur = node_collection.find({"_type": "GRelation"}) grelation_cur_count = grelation_cur.count() info_message = "\n\tNo. of GRelation node(s) found in " + node_collection_name + " collection: " + str(grelation_cur_count) print info_message log_list.append(info_message) if gattribute_cur.count() == 0 and grelation_cur.count() == 0: info_message = "\n\n No records found in " + node_collection_name + " collection to be shifted into " + triple_collection_name + " collection." print info_message log_list.append(info_message) # info_message = "\n\n Triples collection already created and indexes, too, set on it." # print info_message # log_list.append(info_message) # info_message = "\n\tExisting index information on \"" + triple_collection_name + "\" collection is as follows:" + \ # "\n" + json.dumps(triple_collection.index_information(), indent=2, sort_keys=False) # print info_message # log_list.append(info_message) else: gtattribute_cur = triple_collection.find({"_type": "GAttribute"}) gtattribute_cur_count = gtattribute_cur.count() info_message = "\n\n\tNo. of GAttribute node(s) found in " + triple_collection_name + " collection: " + str(gtattribute_cur_count) print info_message log_list.append(info_message) gtrelation_cur = triple_collection.find({"_type": "GRelation"}) gtrelation_cur_count = gtrelation_cur.count() info_message = "\n\tNo. of GRelation node(s) found in " + triple_collection_name + " collection: " + str(gtrelation_cur_count) print info_message info_message = "\n===================================================================================================" print info_message log_list.append(info_message) info_message = "\n\n Existing index information on \"" + triple_collection_name + "\" collection are as follows:" + \ "\n" + json.dumps(triple_collection.collection.index_information(), indent=4, sort_keys=False) print info_message log_list.append(info_message) # [B] Creating following indexes for "Triples" collection info_message = "\n\n\tCreating following indexes for \"" + triple_collection_name + "\" collection..." + \ "\n\t\t1. _type(1) >> subject(1) >> attribute_type.$id(1) >> status(1)" + \ "\n\t\t2. _type(1) >> subject(1) >> relation_type.$id(1) >> status(1) >> right_subject(1)" + \ "\n\t\t3. _type(1) >> right_subject(1) >> relation_type.$id(1) >> status(1)" print info_message log_list.append(info_message) # 1. _type(1) >> subject(1) >> attribute_type.$id(1) >> status(1) index_val = triple_collection.collection.ensure_index([("_type", ASCENDING), ("subject", ASCENDING), ("attribute_type.$id", ASCENDING), ("status", ASCENDING)]) if index_val: info_message = "\n\n\t" + str(index_val) + " index created for " + str(triple_collection_name) + " collection successfully." else: info_message = "\n\n\t_type_1_subject_1_attribute_type.$id_1_status_1 index already created for " + str(triple_collection_name) + " collection." print info_message log_list.append(info_message) # 2. _type(1) >> subject(1) >> relation_type.$id(1) >> status(1) >> right_subject(1) index_val = triple_collection.collection.ensure_index([("_type", ASCENDING), ("subject", ASCENDING), ("relation_type.$id", ASCENDING), ("status", ASCENDING), ("right_subject", ASCENDING)]) if index_val: info_message = "\n\t" + str(index_val) + " index created for " + str(triple_collection_name) + " collection successfully." else: info_message = "\n\t_type_1_subject_1_relation_type.$id_1_status_1_right_subject_1 index already created for " + str(triple_collection_name) + " collection." print info_message log_list.append(info_message) # 3. _type(1) >> right_subject(1) >> relation_type.$id(1) >> status(1) index_val = triple_collection.collection.ensure_index([("_type", ASCENDING), ("right_subject", ASCENDING), ("relation_type.$id", ASCENDING), ("status", ASCENDING)]) if index_val: info_message = "\n\t" + str(index_val) + " index created for " + str(triple_collection_name) + " collection successfully." else: info_message = "\n\t_type_1_subject_1_relation_type.$id_1_status_1_right_subject_1 index already created for " + str(triple_collection_name) + " collection." print info_message log_list.append(info_message) info_message = "\n\n Modified index information on \"" + triple_collection_name + "\" collection are as follows:" + \ "\n" + json.dumps(triple_collection.collection.index_information(), indent=4, sort_keys=False) print info_message log_list.append(info_message) info_message = "\n===================================================================================================" print info_message log_list.append(info_message) # [C] Move GAttribute & GRelation nodes from Nodes collection to Triples collection info_message = "\n\n Moving GAttribute (" + str(gattribute_cur_count) + ") & GRelation (" + str(grelation_cur_count) + ") node(s) from Nodes collection to Triples collection..." + \ "\n THIS MAY TAKE MORE TIME DEPENDING UPON HOW MUCH DATA YOU HAVE.. SO PLEASE HAVE PATIENCE !" print info_message log_list.append(info_message) bulk_insert = triple_collection.collection.initialize_unordered_bulk_op() bulk_remove = node_collection.collection.initialize_unordered_bulk_op() triple_cur = node_collection.find({"_type": {"$in": ["GAttribute", "GRelation"]}}, timeout=False) delete_nodes = [] hm = HistoryManager() rcs_obj = RCS() existing_rcs_file = [] newly_created_rcs_file = [] at_rt_updated_node_list = [] tf1 = time.time() for i, doc in enumerate(triple_cur): info_message = "\n\n\tChecking attribute_type & relation_type fields of # " + str((i+1)) + " record :-" print info_message log_list.append(info_message) if doc["_type"] == "GAttribute": if (type(doc["attribute_type"]) != bson.dbref.DBRef) and ( (type(doc["attribute_type"]) == dict) or (type(doc["attribute_type"]) == AttributeType) ): doc["attribute_type"] = node_collection.collection.AttributeType(doc["attribute_type"]).get_dbref() at_rt_updated_node_list.append(str(doc._id)) info_message = "\n\tattribute_type field updated for # " + str((i+1)) + " record." print info_message log_list.append(info_message) elif doc["_type"] == "GRelation": if (type(doc["relation_type"]) != bson.dbref.DBRef) and ( (type(doc["relation_type"]) == dict) or (type(doc["relation_type"]) == RelationType) ): doc["relation_type"] = node_collection.collection.RelationType(doc["relation_type"]).get_dbref() at_rt_updated_node_list.append(str(doc._id)) info_message = "\n\trelation_type field updated for # " + str((i+1)) + " record." print info_message log_list.append(info_message) delete_nodes.append(doc._id) bulk_insert.insert(doc) try: node_rcs_file = hm.get_file_path(doc) # As we have changed collection-name for Triple from Nodes to Triples # Hence, we need to first replace Triples with Nodes # In order to move rcs-files from Nodes into Triples directory node_rcs_file = node_rcs_file.replace(triple_collection_name, node_collection_name) info_message = "\n\n\tMoving # " + str((i+1)) + " Node rcs-file (" + node_rcs_file + ")..." print info_message log_list.append(info_message) if os.path.exists(node_rcs_file + ",v"): node_rcs_file = node_rcs_file + ",v" elif os.path.exists(node_rcs_file): node_rcs_file = node_rcs_file info_message = "\n\t node_rcs_file (json/,v) : " + node_rcs_file print info_message log_list.append(info_message) # If exists copy to Triples directory # Then delete it if node_rcs_file[-2:] == ",v" and os.path.isfile(node_rcs_file): info_message = "\n\t File FOUND : " + node_rcs_file print info_message log_list.append(info_message) # Replacing Node collection-name (Nodes) with Triple collection-name (Triples) triple_rcs_file = node_rcs_file.replace(node_collection_name, triple_collection_name) info_message = "\n\t triple_rcs_file : " + triple_rcs_file print info_message log_list.append(info_message) triple_dir_path = os.path.dirname(triple_rcs_file) info_message = "\n\t triple_dir_path : " + triple_dir_path print info_message log_list.append(info_message) if not os.path.isdir(triple_dir_path): # Creates required directory path for Triples collection in rcs-repo os.makedirs(triple_dir_path) info_message = "\n\t CREATED PATH : " + triple_dir_path print info_message log_list.append(info_message) # Copy files keeping metadata intact shutil.copy2(node_rcs_file, triple_rcs_file) info_message = "\n\t COPIED TO : " + triple_rcs_file print info_message log_list.append(info_message) # Deleting file from Nodes directory os.remove(node_rcs_file) info_message = "\n\t DELETED : " + node_rcs_file print info_message log_list.append(info_message) # Append to list to keep track of those Triple nodes # for which corresponding rcs-file exists existing_rcs_file.append(str(doc._id)) else: error_message = "\n\t Version-File (.json,v) NOT FOUND : " + node_rcs_file + " !!!" print error_message log_list.append(error_message) if hm.create_or_replace_json_file(doc): fp = hm.get_file_path(doc) message = "This document (" + doc.name + ") is shifted (newly created) from Nodes collection to Triples collection on " + datetime.datetime.now().strftime("%d %B %Y") rcs_obj.checkin(fp, 1, message.encode('utf-8'), "-i") if os.path.isdir(os.path.dirname(fp)): # Append to list to keep track of those Triple nodes # for which corresponding rcs-file doesn't exists newly_created_rcs_file.append(str(doc._id)) info_message = "\n\t CREATED rcs-file : " + fp print info_message log_list.append(info_message) except OSError as ose: error_message = "\n\t OSError (" + node_rcs_file + ") : " + str(ose) + " !!!" print error_message log_list.append(error_message) continue except Exception as e: error_message = "\n\t Exception (" + node_rcs_file + ") : " + str(e) + " !!!" print error_message log_list.append(error_message) continue tf2 = time.time() info_message = "\n\n\tTime taken by for loop (list) : " + str(tf2 - tf1) + " secs" print info_message log_list.append(info_message) t1 = time.time() bulk_insert.execute() t2 = time.time() info_message = "\n\tTime taken to copy given no. of Triple's docmuents : " + str(t2 - t1) + " secs" print info_message log_list.append(info_message) t3 = time.time() bulk_remove.find({"_id": {"$in": delete_nodes}}).remove() bulk_remove.execute() t4 = time.time() info_message = "\n\tTime taken to delete given no. of Triple's docmuents () : " + str(t4 - t3) + " secs" print info_message log_list.append(info_message) info_message = "\n===================================================================================================" print info_message log_list.append(info_message) info_message = "\n\n After shifting document(s) from " + node_collection_name + " collection into " + triple_collection_name + " collection: " print info_message log_list.append(info_message) # Entries in Nodes collection gattribute_cur = node_collection.find({"_type": "GAttribute"}) gattribute_cur_count = gattribute_cur.count() info_message = "\n\n\tNo. of GAttribute node(s) found in " + node_collection_name + " collection: " + str(gattribute_cur_count) print info_message log_list.append(info_message) grelation_cur = node_collection.find({"_type": "GRelation"}) grelation_cur_count = grelation_cur.count() info_message = "\n\tNo. of GRelation node(s) found in " + node_collection_name + " collection: " + str(grelation_cur_count) print info_message log_list.append(info_message) # Entries in Triples collection gtattribute_cur = triple_collection.find({"_type": "GAttribute"}) gtattribute_cur_count = gtattribute_cur.count() info_message = "\n\n\tNo. of GAttribute node(s) found in " + triple_collection_name + " collection: " + str(gtattribute_cur_count) print info_message log_list.append(info_message) gtrelation_cur = triple_collection.find({"_type": "GRelation"}) gtrelation_cur_count = gtrelation_cur.count() info_message = "\n\tNo. of GRelation node(s) found in " + triple_collection_name + " collection: " + str(gtrelation_cur_count) print info_message log_list.append(info_message) # Information about attribute_type & relation_type fields updated info_message = "\n\n\tNo. of node(s) (# " + str(len(at_rt_updated_node_list)) + ") whose attribute_type & relation_type fields are updated: \n" + str(at_rt_updated_node_list) print info_message log_list.append(info_message) # Information about RCS files info_message = "\n\n\tRCS file(s) moved for follwoing node(s) (# " + str(len(existing_rcs_file)) + ") :- \n" + str(existing_rcs_file) print info_message log_list.append(info_message) info_message = "\n\tRCS file(s) re-created for follwoing node(s) (# " + str(len(newly_created_rcs_file)) + ") :- \n" + str(newly_created_rcs_file) print info_message log_list.append(info_message) if triple_cur.alive: triple_cur.close() info_message = "\n\n\tTriple's cursor closed." print info_message log_list.append(info_message) info_message = "\n\n===================================================================================================" print info_message log_list.append(info_message) """ info_message = "\n\n Looking for dict type value(s) in attribute_type" + \ " and relation_type fields of respective GAttribute and GRelation" + \ "\n\tIf found code will replace corresponding value(s) with respective AttributeType/RelationType instances" + \ "\n\tTHIS MAY TAKE MORE TIME DEPENDING UPON HOW MUCH DATA YOU HAVE.. SO PLEASE HAVE PATIENCE !\n" print info_message log_list.append(info_message) triple_cur = triple_collection.collection.find({"_type": {"$in": ["GAttribute", "GRelation"]}}, timeout=False) import bson hm = HistoryManager() sc = [] ec = [] tc = triple_cur.count() for i, each in enumerate(triple_cur): try: n = None info_message = "\n\n\tChecking # " + str((i+1)) + " record :-" print info_message log_list.append(info_message) if each["_type"] == "GAttribute": if (type(each["attribute_type"]) != bson.dbref.DBRef) and (type(each["attribute_type"]) == dict): each["attribute_type"] = node_collection.collection.AttributeType(each["attribute_type"]) n = triple_collection.collection.GAttribute(each) n.save() sc.append(str(n._id)) elif each["_type"] == "GRelation": if (type(each["relation_type"]) != bson.dbref.DBRef) and (type(each["relation_type"]) == dict): each["relation_type"] = node_collection.collection.RelationType(each["relation_type"]) n = triple_collection.collection.GRelation(each) n.save() sc.append(str(n._id)) except Exception as e: error_message = "\n Error (" + str(each["_id"]) + ") : ", str(e) + " !!!" print error_message log_list.append(error_message) ec.append(str(each["_id"])) continue info_message = "\n\n\tTotal node(s) found: " + str(tc) print info_message log_list.append(info_message) info_message = "\n\n\tTotal node(s) updated (" + str(len(sc)) + ") : \n" + str(sc) print info_message log_list.append(info_message) info_message = "\n\n\tTotal node(s) where error encountered (" + str(len(ec)) + ") : \n" + str(ec) print info_message log_list.append(info_message) if triple_cur.alive: triple_cur.close() info_message = "\n\n\tTriple's cursor closed." print info_message log_list.append(info_message) info_message = "\n\n===================================================================================================" print info_message log_list.append(info_message) """ except Exception as e: error_message = str(e) print error_message log_list.append("\n Error: " + error_message + " !!!\n") finally: if log_list: info_message = "\n\n================ End of Iteration ================\n" print info_message log_list.append(info_message) log_file_name = "shift_Triples" + ".log" log_file_path = os.path.join(SCHEMA_ROOT, log_file_name) with open(log_file_path, 'a') as log_file: log_file.writelines(log_list)