def queueCalculation(self, restriction, specificationRestriction, stream, groupId): appLogger = self.settings.appLogger # stream = self.settings.args.streamname # groupId = self.settings.args.groupid if specificationRestriction is not None: specifications = specificationRestriction.split(",") specificationRestriction = ",".join(map(lambda s:"'{0}'".format(s),specifications)) else: specificationRestriction = None appLogger.debug("") appLogger.debug("Compute data") appLogger.debug("------------") if restriction is not None: restriction = restriction.split(",") processCustomColumns = True if "custom" in restriction else False processCtree = True if "ctree" in restriction else False processPins = True if "pins" in restriction else False processSolrDocuments = True if "solrDocuments" in restriction else False else: processCustomColumns = True processCtree = True processPins = True processSolrDocuments = True appLogger.debug(" stream : {0}".format(stream)) appLogger.debug(" specificationRestriction : {0}".format(specificationRestriction)) appLogger.debug(" Restriction:") appLogger.debug(" processCtree : {0}".format(processCtree)) appLogger.debug(" processCustomColumns : {0}".format(processCustomColumns)) appLogger.debug(" processPins : {0}".format(processPins)) appLogger.debug(" processSolrDocuments : {0}".format(processSolrDocuments)) if processCtree: import taskqueue.queueCtree as queueCtree queueCtree.queueTasks(self, self.settings, None, stream, specificationRestriction, groupId, appLogger) if processCustomColumns: import taskqueue.queueCustom as queueCustom queueCustom.queueTasks(self, self.settings, stream, specificationRestriction, groupId, appLogger) if processPins: import taskqueue.queuePins as queuePins queuePins.queueTasks(self, self.settings, stream, specificationRestriction, groupId, appLogger) if processSolrDocuments: import taskqueue.queueSolrDocument as queueSolrDocument queueSolrDocument.queueTasks(self, self.settings, stream, specificationRestriction, groupId, appLogger)
def queueImport(self, groupId): settings = self.settings if settings.specification.dedicatedStagingAreaName is None: nativeStageSchema = "stage" else: nativeStageSchema = settings.specification.dedicatedStagingAreaName enableMv = False enableCtree = False # self.stream = settings.args.streamname # self.specificationName = settings.specification.name # # supportConnection = settings.db.makeConnection("support") # supportCursor = supportConnection.makeCursor("supportCursor", False, False) # self.commitFrequency = settings.args.commitfrequency # self.checkpointBehaviour = settings.args.checkpointbehaviour self.importMode = settings.args.importmode #(supportConnection, supportCursor) = settings.db.makeConnection("support", False, False) self.removeDuplicates = settings.specification.autoRemoveStageDuplicates # =============== # [1] Queue files # =============== if settings.args.json is not None: (queuedTasks, minTaskId, maxTaskId) = self._queueJSON(groupId, settings.specification, self.stream, self.specificationName, settings.args.limit, settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour, settings.paths, self.removeDuplicates, self.importMode) fileIntent="undefined" elif settings.specification.sourceType=="csv": (queuedTasks, fileIntent, minTaskId, maxTaskId) = self._queueCsvFiles(groupId, settings.specification, self.stream, self.specificationName, settings.args.limit, settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour, settings.args.files, settings.paths, self.removeDuplicates,settings.args.recurse, settings.args.filenameregex, self.importMode) elif settings.specification.sourceType=="external": (queuedTasks, minTaskId, maxTaskId) = self._queueExternalLoaderFiles(groupId, self.stream, self.specificationName, settings.specification.externalLoaderName, nativeStageSchema, settings.specification.externalLoaderProfile, settings.specification.externalLoaderVariables, settings.args.limit,settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour, settings.args.files, settings.paths, settings.db.credentials, settings.env, self.removeDuplicates, settings.args.recurse, settings.args.filenameregex, self.importMode) fileIntent="full" args = {} args["specification"] = self.specificationName # ======================= sql = "select import.%s_exists()" %(self.specificationName) self.supportCursor.execute(sql) hasData = self.supportCursor.fetchone()[0] if not hasData: # ADD RECORD INDEX DROPS for thisRecord in settings.specification.records: if thisRecord.useful: args = {} filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications", self.specificationName, "resources", "sql", "indexes"), "drop_import_%s_indexes.sql" % (thisRecord.table)) args["filename"] = filename self.queue.queueTask(groupId, self.stream, "script" , "Drop import.%s indexes" %(thisRecord.table), None, None, None, json.dumps(args), False) # ADD CHECKPOINT self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) # ADD ENTITY RECORD INDEX DROPS AND DISABLE for thisEntity in settings.specification.entities: enableMv = True args = {} filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications",self.specificationName,"resources","sql","indexes"), "drop_mv_%s_indexes.sql" % (thisEntity.name)) args["filename"] = filename self.queue.queueTask(groupId, self.stream, "script" , "Drop %s mv indexes" %(thisEntity.name), None, None, None, json.dumps(args), False) args = {} filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications",self.specificationName,"resources", "sql", "mv"), "%s_disable.sql" % (thisEntity.name)) args["filename"] = filename self.queue.queueTask(groupId, self.stream, "script" , "Disable %s mv" %(thisEntity.name), None, None, None,json.dumps(args), False) # ADD CHECKPOINT if enableMv: self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) # ADD CTREE INDEX DROPS AND DISABLE for thisRecord in settings.specification.records: if thisRecord.useful: if thisRecord.hasCtree(): enableCtree = True self._queueCtreeDisable(settings, groupId, thisRecord.table) for thisEntity in settings.specification.entities: if thisEntity.hasCtree(): enableCtree = True self._queueCtreeDisable(settings, groupId, thisEntity.name) if enableCtree: self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) # ADD SENT TO IMPORT for record in settings.specification.records: if record.useful: args = {} args["specification"] = self.specificationName args["importMode"] = self.importMode args["fileIntent"] = fileIntent args["strategy"] = "speed" args["table"] = record.table args["hasData"]=hasData self.queue.queueTask(groupId, self.stream, "sendtoimport" , "Send '{0}' to import".format(record.table), None, None, None, json.dumps(args), False) self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) self.queue.queueAVacuum(settings.args.vacuumstrategy, groupId, self.stream, "import", record.table) # If we're in sync mode then we may need to delete some things if self.importMode=="sync": for record in settings.specification.records: if record.useful: args = {} args["specification"] = self.specificationName args["importMode"] = self.importMode args["fileIntent"] = fileIntent args["minTaskId"] = minTaskId args["maxTaskId"] = maxTaskId args["table"] = record.table args["hasData"]=hasData self.queue.queueTask(groupId, self.stream, "importsyncdeletes" , "Process '{0}' sync deletes".format(record.table), None, None, None, json.dumps(args), False) self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) self.queue.queueAVacuum(settings.args.vacuumstrategy, groupId, self.stream, "import", record.table) committedForIndexes=False if not hasData: for thisRecord in settings.specification.records: if thisRecord.useful: if not committedForIndexes: committedForIndexes = True self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) # ADD INDEXES args = {} filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications",self.specificationName,"resources", "sql","indexes"), "create_import_%s_indexes.sql" % (thisRecord.table)) args["filename"] = filename self.queue.queueTask(groupId, self.stream, "script" , "Create import.%s indexes" %(thisRecord.table), None, None, None, json.dumps(args), False) self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) # ================================ args = None atLeastOneEditable = False for quickCheck in settings.specification.records: if quickCheck.editable: atLeastOneEditable = True #================= if atLeastOneEditable: sql = "select editable.%s_exists()" %(self.specificationName) self.supportCursor.execute(sql) hasData = self.supportCursor.fetchone()[0] if not hasData: for thisRecord in settings.specification.records: if thisRecord.useful: args = {} filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications", self.specificationName, "resources", "sql","indexes"), "drop_editable_%s_indexes.sql" % (thisRecord.table)) args["filename"] = filename self.queue.queueTask(groupId, self.stream, "script" , "Drop editable.%s indexes" %(thisRecord.table), None, None, None, json.dumps(args), False) self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) firstEditable=True for record in settings.specification.records: if record.useful: if firstEditable: firstEditable = False args = {} self.queue.queueTask(groupId, self.stream, "recordtimestamp" , "Record current timestamp", None, None, None, json.dumps(args), False) args = {} args["specification"] = self.specificationName args["table"] = record.table args["hasData"]=hasData self.queue.queueTask(groupId, self.stream, "sendtoeditable" , "Make '{0}' editable".format(record.table), None, None, None, json.dumps(args), False) self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) self.queue.queueAVacuum(settings.args.vacuumstrategy, groupId, self.stream, "editable", record.table) args = {} args["specification"] = self.specificationName self.queue.queueTask(groupId, self.stream, "finisheditable" , "Finish send to editable process", None, None, None, json.dumps(args), False) if not hasData: for thisRecord in settings.specification.records: if thisRecord.useful: args = {} filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications",self.specificationName,"resources", "sql","indexes"), "create_editable_%s_indexes.sql" % (thisRecord.table)) args["filename"] = filename self.queue.queueTask(groupId, self.stream, "script" , "Create editable.%s indexes" %(thisRecord.table), None, None, None, json.dumps(args), False) self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) #====================== if enableCtree: for thisRecord in settings.specification.records: if thisRecord.useful: if thisRecord.hasCtree(): self._queueCtreeEnable(settings, groupId, thisRecord.table) for thisEntity in settings.specification.entities: if thisEntity.hasCtree(): self._queueCtreeEnable(settings, groupId, thisEntity.name) self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) #====================== for thisRecord in settings.specification.records: if thisRecord.useful: if thisRecord.hasCtree(): if thisRecord.editable: schemaRestriction="editable" else: schemaRestriction="import" queueCtree.queueTasks(self, settings, schemaRestriction, self.stream, "'{0}'".format(self.specificationName), groupId, settings.appLogger) #====================== if enableMv: self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) for thisEntity in settings.specification.entities: args = {} filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications",self.specificationName,"resources", "sql","mv"), "%s_enable_and_recreate.sql" % (thisEntity.name)) args["filename"] = filename self.queue.queueTask(groupId, self.stream, "script" , "Enable %s mv" %(thisEntity.name), None, None, None, json.dumps(args), False) self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) args = {} filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specifications",self.specificationName,"resources", "sql","indexes"), "create_mv_%s_indexes.sql" % (thisEntity.name)) args["filename"] = filename self.queue.queueTask(groupId, self.stream, "script" , "Create %s indexes" %(thisEntity.name), None, None, None, json.dumps(args), False) self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) # if enableCtree: # for thisRecord in settings.specification.records: # if thisRecord.useful: # if thisRecord.ancestorColumn is not None or thisRecord.descendantColumn is not None: # enableCtree = True # args = {} # filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("specification files",self.specificationName,"sql","ctree"), "%s_enable_and_recreate.sql" % (thisRecord.table)) # args["filename"] = filename # self.queue.queueTask(groupId, self.stream, "script" , "Build %s closure tree" %(thisRecord.table), None, None, None, json.dumps(args), False) # # self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) # OLD SEARCH WENT HERE # if settings.args.chainsearch: # sql = "select domain_name,source_type,source_schema,source_name,specification_name,last_synchronized,config_location from search.active_sources where specification_name=%s" # self.supportCursor.execute(sql, (self.specificationName,)) # sources = self.supportCursor.fetchall() # # domains=[] # for thisSource in sources: # if thisSource[0] not in domains: # domains.append(thisSource[0]) # # # domainsToRebuild=[] # for thisDomain in domains: # sql = "select search.is_there_any_%s_data()" %(thisDomain) # self.supportCursor.execute(sql, (self.specificationName,)) # hasData = self.supportCursor.fetchone()[0] # if not hasData: # domainsToRebuild.append(thisDomain) # args = {} # filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("search domain files",thisDomain,"sql","indexes"), "drop_search_%s_indexes.sql" % (thisDomain)) # args["filename"] = filename # self.queue.queueTask(groupId, self.stream, "script" , "Drop search.%s indexes" %(thisDomain), None, None, None, json.dumps(args), False) # for thisSource in sources: # args = {} # args["domainName"] = thisSource[0] # args["sourceType"] = thisSource[1] # args["sourceSchema"] = thisSource[2] # args["sourceName"] = thisSource[3] # args["specification"] = thisSource[4] # args["lastSynchronized"] = thisSource[5] # args["configLocation"] = thisSource[6] # args["recordLimit"] = None # self.queue.queueTask(groupId, self.stream, "syncSearchSource" , "Refresh %s (%s)" %(thisSource[0], thisSource[3]), None, None, None, json.dumps(args), False) # # for thisDomain in domainsToRebuild: # args = {} # filename = cs.getChimpScriptFilenameToUse(settings.paths["repository"], ("search domain files",thisDomain,"sql","indexes"), "create_search_%s_indexes.sql" % (thisDomain)) # args["filename"] = filename # self.queue.queueTask(groupId, self.stream, "script" , "Create search.%s indexes" %(thisDomain), None, None, None, json.dumps(args), False) # # self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) # OLD PINHEAD WENT HERE # ======================================================================= # Queue calculated data tasks for this specification # for record in settings.specification.records: # if record.useful: # record.computedData.addTasks(settings, self, groupId, self.stream) # for entity in settings.specification.entities: # entity.computedData.addTasks(settings, self, groupId, self.stream) self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) self.queue.queueAVacuum(settings.args.vacuumstrategy, groupId, self.stream, None, None) self.queue.queueCheckpoint(groupId, self.stream, "major", settings.args.tolerancelevel, self.commitFrequency, self.checkpointBehaviour) self.supportCursor.connection.commit()