def run(self): signal.signal(signal.SIGINT, self.exit_signal) signal.signal(signal.SIGTERM, self.exit_signal) self.logger.info('Starting program={} pid={}, uid={}({})'.format(os.path.basename( __file__), os.getpid(), os.geteuid(), pwd.getpwuid(os.geteuid()).pw_name)) if self.src['scheme'] != 'postgresql': eprint('Source must be "postgresql"') sys.exit(1) while True: # Track that processing has started pa_application = os.path.basename(__file__) pa_function = 'Store_Destination' pa_id = 'xdcdb-usermap' pa_topic = 'Persons' pa_about = 'xsede.org' pa = ProcessingActivity(pa_application, pa_function, pa_id, pa_topic, pa_about) self.start_ts = datetime.utcnow() self.MyUpdateStat = 0 self.MyDeleteStat = 0 self.MySkipStat = 0 CURSOR = self.Connect_Source(self.src['uri']) INPUT = self.Retrieve_Source(CURSOR) (rc, warehouse_msg) = self.Store_Destination(INPUT) self.Disconnect_Source(CURSOR) self.end_ts = datetime.utcnow() summary_msg = 'Processed {} in {:.3f}/seconds: {}/updates, {}/deletes, {}/skipped'.format(self.MyName, (self.end_ts - self.start_ts).total_seconds(), self.MyUpdateStat, self.MyDeleteStat, self.MySkipStat) self.logger.info(summary_msg) pa.FinishActivity(rc, summary_msg) break
def process_status_for_published_endpoint(pubendpoint): pa_application=os.path.basename(__file__) pa_function='main' pa_topic = 'GoEndpoints' pa_id = pubendpoint['ID'] pa_about = pubendpoint['ResourceID'] pa = ProcessingActivity(pa_application, pa_function, pa_id , pa_topic, pa_about) pa.FinishActivity(0, "") return
def run(self): while True: if self.SOURCE_PARSE.scheme == 'postgresql': CURSOR = self.Connect_Source(self.SOURCE_PARSE) self.Connect_Elastic() self.STATS = Counter() self.PROCESSING_SECONDS = {} for stepconf in self.STEPS: start_utc = datetime.now(timezone.utc) pa_application = os.path.basename(__file__) pa_function = stepconf['DSTURL'].path pa_topic = stepconf['LOCALTYPE'] pa_about = self.Affiliation pa_id = '{}:{}:{}:{}->{}'.format(pa_application, pa_function, pa_topic, stepconf['SRCURL'].scheme, stepconf['DSTURL'].scheme) pa = ProcessingActivity(pa_application, pa_function, pa_id, pa_topic, pa_about) if stepconf[ 'SRCURL'].scheme != 'sql': # This is already checked in __inir__ self.logger.error('Source scheme must be "sql"') sys.exit(1) if stepconf[ 'DSTURL'].scheme != 'function': # This is already checked in __inir__ self.logger.error('Destination scheme must be "function"') sys.exit(1) # Retrieve from SOURCE content = self.Retrieve_CloudImages(stepconf['LOCALTYPE'], stepconf) # Content does not have the expected results if stepconf['LOCALTYPE'] not in content: (rc, message) = ( False, 'JSON results is missing the \'{}\' element'.format( stepconf['LOCALTYPE'])) self.logger.error(message) pa.FinishActivity(rc, message) continue (rc, message) = getattr(self, pa_function)(content, stepconf['LOCALTYPE'], stepconf) if not rc and message == '': # No errors message = 'Executed {} in {:.3f}/seconds'.format( pa_function, (datetime.now(timezone.utc) - start_utc).total_seconds()) pa.FinishActivity(rc, message) # Not disconnecting from Elasticsearch #self.Disconnect_Source(CURSOR) break
def Run(self): while True: loop_start_utc = datetime.now(timezone.utc) self.STATS = Counter() self.PROCESSING_SECONDS = {} for stepconf in self.STEPS: step_start_utc = datetime.now(timezone.utc) pa_application = os.path.basename(__file__) pa_function = stepconf['DSTURL'].path pa_topic = stepconf['LOCALTYPE'] pa_about = self.Affiliation pa_id = '{}:{}:{}:{}->{}'.format(pa_application, pa_function, pa_topic, stepconf['SRCURL'].scheme, stepconf['DSTURL'].scheme) pa = ProcessingActivity(pa_application, pa_function, pa_id, pa_topic, pa_about) if stepconf['SRCURL'].scheme == 'file': content = self.Read_CACHE(stepconf['SRCURL'].path, stepconf['LOCALTYPE']) else: content = self.Get_HTTP(stepconf['SRCURL'], stepconf['LOCALTYPE']) if stepconf['LOCALTYPE'] not in content: (rc, message) = (False, 'JSON is missing the \'{}\' element'.format(contype)) self.logger.error(msg) elif stepconf['DSTURL'].scheme == 'file': (rc, message) = self.Write_CACHE(stepconf['DSTURL'].path, content) elif stepconf['DSTURL'].scheme == 'analyze': (rc, message) = self.Analyze_CONTENT(content) elif stepconf['DSTURL'].scheme == 'memory': (rc, message) = self.Write_MEMORY(content, stepconf['LOCALTYPE'], stepconf['DSTURL'].path) elif stepconf['DSTURL'].scheme == 'function': (rc, message) = getattr(self, pa_function)(content, stepconf['LOCALTYPE'], stepconf) if not rc and message == '': # No errors message = 'Executed {} in {:.3f}/seconds'.format(pa_function, (datetime.now(timezone.utc) - step_start_utc).total_seconds()) pa.FinishActivity(rc, message) self.logger.info('Iteration duration={:.3f}/seconds'.format((datetime.now(timezone.utc) - loop_start_utc).total_seconds())) if self.args.once: break # Continuous self.smart_sleep() return(0)
#snarfing the whole database is not the way to do it, for this anyway) databasestate = serializers.serialize("json", TGResource.objects.all()) dbstate = json.loads(databasestate) dbhash = {} for obj in dbstate: #print obj dbhash[str(obj['pk'])] = obj with open(default_file, 'r') as my_file: tgcdb_csv = csv.DictReader(my_file) #Start ProcessActivity pa_application = os.path.basename(__file__) pa_function = 'main' pa_topic = 'XDCDB-Resource' pa_id = pa_topic pa_about = 'xsede.org' pa = ProcessingActivity(pa_application, pa_function, pa_id, pa_topic, pa_about) for row in tgcdb_csv: if row['ResourceID'] in dbhash.keys(): dbhash.pop(row['ResourceID']) #print len(dbhash.keys()) #if row['project_number']+row['ResourceID'] in dbhash.keys(): # print "something is wrong" objtoserialize = {} objtoserialize["model"] = "xdcdb.TGResource" objtoserialize["pk"] = row['ResourceID'] objtoserialize["fields"] = row jsonobj = json.dumps([objtoserialize]) modelobjects = serializers.deserialize("json", jsonobj) for obj in modelobjects:
def process(self, ts, doctype, resourceid, rawdata): # Return an error message, or nothing if doctype not in ['inca', 'nagios']: msg = 'Ignoring DocType (DocType={}, ResourceID={})'.format( doctype, resourceid) logg2.info(msg) return (False, msg) pa_id = '{}:{}'.format(doctype, resourceid) pa = ProcessingActivity(self.application, self.function, pa_id, doctype, resourceid) if isinstance(rawdata, dict): jsondata = rawdata else: try: jsondata = json.loads(rawdata) except: msg = 'Failed JSON parse (DocType={}, ResourceID={}, size={})'.format( doctype, resourceid, len(rawdata)) logg2.error(msg) pa.FinishActivity('1', msg) return (False, msg) if doctype == 'inca' and 'rep:report' in jsondata: msg = 'Ignored legacy rep:report (DocType={}, ResourceID={})'.format( doctype, resourceid) logg2.info(msg) pa.FinishActivity(False, msg) return (False, msg) try: internal_resourceid = jsondata['TestResult']['Associations'][ 'ResourceID'] except: msg = 'Missing Associations->ResourceID (DocType={}, ResourceID={})'.format( doctype, resourceid) logg2.error(msg) pa.FinishActivity(False, msg) return (False, msg) model = None try: model = EntityHistory(DocumentType=doctype, ResourceID=resourceid, ReceivedTime=ts, EntityJSON=jsondata) model.save() logg2.info( 'New GLUE2 EntityHistory.ID={} (DocType={}, ResourceID={})'. format(model.ID, model.DocumentType, model.ResourceID)) except (ValidationError) as e: msg = 'Exception on GLUE2 EntityHistory (DocType={}, ResourceID={}): {}'.format( model.DocumentType, model.ResourceID, e.error_list) pa.FinishActivity(False, msg) return (False, msg) except (DataError, IntegrityError) as e: msg = 'Exception on GLUE2 EntityHistory (DocType={}, ResourceID={}): {}'.format( model.DocumentType, model.ResourceID, e.error_list) pa.FinishActivity(False, msg) return (False, msg) g2doc = Glue2NewMonitoring(doctype, resourceid, ts, 'EntityHistory.ID=%s' % model.ID) try: response = g2doc.process(jsondata) except (ValidationError, ProcessingException) as e: pa.FinishActivity(False, e.response) return (False, e.response) pa.FinishActivity(True, response) return (True, response)