def main(): parser = argparse.ArgumentParser( description='example: python %s --auth --dns aliyun' % os.path.basename(__file__)) parser.add_argument('-a', '--auth', help='auth hook', action='store_true') parser.add_argument('-c', '--cleanup', help='cleanup hook', action='store_true') parser.add_argument('-t', '--test', help='test DNS API', action='store_true') parser.add_argument('--dns', help='dns type, default: aliyun', default='aliyun') parser.add_argument('-d', '--domain', help='a domain for test DNS API') parser.add_argument('--challenge-alias', dest='alias', help='challenge aliased domain, e.g. alias.domain.com') args = parser.parse_args() Logger.info(args) if args.test: if args.domain is None: parser.error('-t, --test require --domain.') return test(args.alias or args.domain, args.dns) elif args.auth: auth(args.dns, args.alias) elif args.cleanup: cleanup(args.dns, args.alias)
def run(args): domains = map(lambda domain: '-d ' + domain, args.domains) domains = ' '.join(domains) Logger.info('obtain domains: ' + domains) deploy_hook = '--deploy-hook "python ' + deploy_path + '"' if Utils.is_enable_deployment( ) else '' cert_name = '--cert-name ' + args.cert if args.cert else '' force_renewal = '--force-renewal' if args.force else '' challenge_alias = '--challenge-alias ' + args.alias if args.alias else '' certbot_cmd = certbot_cmd_template % { 'email': Config['base']['email'], 'cert_name': cert_name, 'force_renewal': force_renewal, 'manual_path': manual_path, 'dns': args.dns, 'deploy_hook': deploy_hook, 'domains': domains, 'challenge_alias': challenge_alias } Logger.info('certbot obtain: ' + certbot_cmd) os.system(certbot_cmd)
def main(): parser = argparse.ArgumentParser( description='example: python %s -d domain.com *.domain.com' % os.path.basename(__file__)) parser.add_argument('-d', '--domains', help='domain list', required=True, nargs='+') parser.add_argument('-c', '--cert', help='certificate name, e.g. domain.com') parser.add_argument('-f', '--force', help='force renewal', default=False, action='store_true') parser.add_argument('--dns', help='dns type, default: aliyun', default='aliyun', choices=dns_types) parser.add_argument('--challenge-alias', dest='alias', help='challenge aliased domain, e.g. alias.domain.com') args = parser.parse_args() Logger.info(args) run(args)
def upload(params): Debug = DebugManager.DebugManager(); Debug.start(); Debug.trace('start'); dbManager = SharedMemoryManager.getInstance(); db = dbManager.query(); date = fn.getNestedElement(params, 'date'); path = fn.getNestedElement(params, 'path'); # url = fn.getNestedElement(params, 'callback_url'); # required params to handle callback_url paths, should_reset = ModelUpload.getPath(params); for idx in range(0, len(paths)): p = paths[idx]; processed_filename = File.converExcelFileToCsv(p, ignore_index=True); Logger.v('processed_filename', processed_filename); Debug.trace('convert to json : path {0}'.format( processed_filename ) ); if idx == 0 and should_reset: #reset once at the beginning Logger.v('Reset Database.'); reset(date); #reset stock_issue collection ModelSIIntegrity.reset(date); #reset stock_issue_datalog by date given File.readCsvFileInChunks(processed_filename, save, params, chunksize=chunksize); Debug.trace('uploaded to mongo.'); generateIndex(); ModelSIIntegrity.generateIndex(); Debug.trace('indexing mongo collection.'); saveIssueOption(); Debug.trace('save option to json.'); trigger_params = copy.deepcopy(params); trigger_params['result'] = 'data count: {0}'.format(params['data_count'][path]); # Logger.v('trigger_params', trigger_params); dbManager.executeBulkOperations(None); # Insert all the remaining job at once. ReportStock.triggerOnComplete(trigger_params); Debug.trace('trigger api on complete.'); Debug.end(); Debug.show('Stock.upload');
def tryGet(self): db = DBFactory.getInstance(self.db_name) if db is None: Logger.f(self.id(), "not suitable db instance: db=None") return None #Logger.d(self.id(), "[SQL]" + self.sql) result = {} for key in self.sql_contexts: sql = self.sql_contexts[key]['sql'] i = 0 tmp = sql.strip(';').split(';') sqls = [] for sql in tmp: if sql is None: continue if sql.strip() == '': continue sqls.append(sql) while i < len(sqls) - 1: print sqls[i] cursor = db.cursor() cursor.execute(sqls[i]) cursor.close() i = i + 1 print sqls[i] cursor = db.cursor() cursor.execute(sqls[i]) res_list = cursor.fetchall() result[key] = res_list return result
def __compute_signature(self, params): sorted_params = sorted(params.items(), key=lambda params: params[0]) query_string = '' for (k, v) in sorted_params: query_string += '&' + self.__percent_encode( k) + '=' + self.__percent_encode(str(v)) string_to_sign = 'GET&%2F&' + self.__percent_encode(query_string[1:]) try: if sys.version_info < (3, 0): digest = hmac.new(str(self.access_key_secret + "&"), str(string_to_sign), hashlib.sha1).digest() else: digest = hmac.new( (self.access_key_secret + "&").encode(encoding="utf-8"), string_to_sign.encode(encoding="utf-8"), hashlib.sha1).digest() except Exception as e: Logger.error(e) if sys.version_info < (3, 1): signature = base64.encodestring(digest).strip() else: signature = base64.encodebytes(digest).strip() return signature
def refreshIsRequired(data, collection_name): dbManager = SharedMemoryManager.getInstance() db = dbManager.query() refresh_collection = False mongo_data = list(db[collection_name].find({})) # Logger.v('mongo_data', mongo_data); unique_values = [] for row in data: obj_ = {} unique_value = generateUniqueValue(data=row, collection_name=collection_name) unique_values.append('_'.join(unique_value)) matched_row = db[collection_name].find( {'unique_value': { '$in': unique_values }}) matched_result = list(matched_row) # Logger.v('matched_result', matched_result) if not len(matched_result) == len(mongo_data) or len( mongo_data) == 0: # if there is difference between mongodb and raw Logger.v('matched_result len', len(matched_result)) Logger.v('mongo_data len', len(mongo_data)) refresh_collection = True return refresh_collection return refresh_collection
def __request(self, method, path, payload={}): url = 'https://%s%s?%s' % (self.__endpoint, self.__parse_path(path)[:-1], self.__parse_query_string(path)) data = json.dumps(payload).encode('utf8') sdk_date = self.__build_sdk_date() Logger.info('Request URL: ' + url) Logger.info('Request Data: ' + str(data)) request = urllib2.Request(url=url, data=data) request.get_method = lambda: method request.add_header('Content-Type', 'application/json') request.add_header('Host', self.__endpoint) request.add_header('X-sdk-date', sdk_date) request.add_header('Authorization', self.__build_authorization(request)) Logger.info('Request headers: ' + str(request.headers)) try: f = urllib2.urlopen(request, timeout=45) response = f.read().decode('utf-8') Logger.info(response) return response except urllib2.HTTPError as e: Logger.error('huaweicloud#__request raise urllib2.HTTPError: ' + str(e)) raise SystemExit(e)
def test(domain, dns_type='aliyun'): try: print('start to test ' + domain + ' in DNS ' + dns_type + ' API') client = __get_dns_client(dns_type) maindomain, acme_challenge = __extract_maindomain_and_challenge(domain) validation = ''.join( random.sample(string.ascii_letters + string.digits, 16)) print('add TXT record(domain=' + maindomain + ', rr=' + acme_challenge + ', value=' + validation + ') to ' + dns_type + ' DNS') client.add_domain_record(maindomain, acme_challenge, validation) print('added TXT record') print('waiting %(time)i seconds...' % {'time': __get_wait_time()}) time.sleep(__get_wait_time()) print('remove above TXT record') client.delete_domain_record(maindomain, acme_challenge) print('removed TXT record') print('tested ' + domain + ' in DNS ' + dns_type + ' API') except Exception as e: Logger.error('test raise Exception:' + str(e)) sys.exit()
def update(data): global collection_name dbManager = SharedMemoryManager.getInstance() db = dbManager.query() state_facility_code = '_'.join( [str(data['state']), str(data['facility_code'])]) if state_facility_code not in list(set(unique_facility)): state_name = fn.getNestedElement(data, 'state') state_code = fn.getNestedElement(data, 'state') facility_name = fn.getNestedElement(data, 'facility_name') facility_code = fn.getNestedElement(data, 'facility_code') date = fn.getNestedElement(data, 'upload_date') Logger.v('date', date) date_string = DateTime.toString(date) values = { 'state_name': state_name, 'state_code': state_code, 'facility_name': facility_name, 'facility_code': facility_code, 'state_updated_at': date_string, 'facility_updated_at': date_string, 'date': date_string, } dbManager.addBulkInsert(collection_name, values, batch=True) unique_facility.append(state_facility_code) dbManager.executeBulkOperations(collection_name)
class Core: """Core class, contains core services (like listeners, executors, datapool)""" def __init__(self): self.cnf = cnf.get("core") self.logger = Logger("Core") self.logger.debug("Loading services") self._services = [] for service_name in self.cnf.get("services"): service = Loader.by_id('services', service_name) self._services.append(service) def start(self): """Starts all loaded services""" self.logger.info("Starting") for service in self._services: service.start() self.logger.info("Started") def stop(self): """Stops all loaded services""" self.logger.info("Stopping Core") for service in self._services: service.stop() self.logger.info("Stopped")
def recordCrawledFile(data): global global_check_data; path = fn.getNestedElement(data, 'path'); report = fn.getNestedElement(data, 'report'); split_path = path.split('/'); file = split_path[-1]; date = file.split('.')[0]; year = date.split('-')[0]; if file == '.json': os.remove(path); Logger.v('Removed', path); indexes = { 'budget': { 'year': -4, 'state': -3, 'all_facility': -2, }, 'procurement': { 'year': -5, 'state': -4, 'all_facility': -2, }, }; state_idx = indexes[report]['state']; year_idx = indexes[report]['year']; all_facility_idx = indexes[report]['all_facility']; conditions = { 'budget': split_path[year_idx] == 'year_{0}'.format(year), 'procurement': True, }; if not file in ['.DS_Store', '.json'] and split_path[all_facility_idx] == 'all_facility' and conditions[report]: state_code = split_path[state_idx].replace('state_', ''); global_check_data[report].append('_'.join([date, state_code]));
def get_domain_zone_id(self, domain): try: record = self.get_domain(domain) return record['id'] if record else None except Exception as e: Logger.error('huaweicloud#get_domain_zone_id raise: ' + str(e)) return None
def check(params): global msia_tz, date_retrieve_limit, date_count, collection_name dbManager = SharedMemoryManager.getInstance() db = dbManager.query() today = DateTime.now(tzinfo=msia_tz) start_date = DateTime.getDaysAgo(date_retrieve_limit, datefrom=today) durations = DateTime.getBetween([start_date, today], element='date', offset=24)['order'] # offset 24 to include today Logger.v('durations', durations) data = db[collection_name].aggregate([{ '$match': { 'state_updated_at': { '$in': durations }, 'facility_updated_at': { '$in': durations } } }, { '$project': { '_id': 0, 'inserted_at': 0, 'updated_at': 0 } }]) data = list(data) Logger.v('Total stock issue integrity in', date_retrieve_limit, 'days:', len(data)) state_data = {} facility_data_by_state = {} for idx in range(0, len(data)): row = data[idx] state_code = fn.getNestedElement(row, 'state_code') if state_code not in facility_data_by_state: facility_data_by_state[state_code] = {} state_data = addIntegrityData(data={ 'row': row, 'to_update': state_data }, category='state') facility_data_by_state[state_code] = addIntegrityData( data={ 'row': row, 'to_update': facility_data_by_state[state_code] }, category='facility') if date_count > date_retrieve_limit: # limit loop data/ show data in N days break date_count = 0 # reset to 0th day return { 'state': state_data, 'state_facility': facility_data_by_state, }
def updateDropdownOptions(params): option_keys = fn.getNestedElement(params, 'keys.option', ['state']) today = fn.getNestedElement( params, 'schedule_params.today', DateTime.toString(DateTime.now(tzinfo=msia_tz))) data = {} crawled_data = {} # crawl from API URL (get options from API) # for key in keys: # url = api_links[key]; # # url = generateUrl(api_links[key]); # response = requests.get(url); # json_response = json.loads(response.text); # Logger.v('json_response', json_response); # crawled_data[key] = json_response; # Logger.v('Crawled', url); # Logger.v('Done crawling.'); # save(data); # read from file for key in option_keys: filename = api_files[key] crawled_data[key] = File.readJson(filename) # convert key to snakecase, value to lower for key in crawled_data: if key not in data: data[key] = [] for idx in range(0, len(crawled_data[key])): row = crawled_data[key][idx] obj_ = {} for row_key in row: row_value = row[row_key] new_key = fn.camelToSnakecase(str=row_key) if type(row_value) == str: new_value = row_value.lower() elif row_value is None: new_value = 'null' else: new_value = row_value obj_[new_key] = new_value data[key].append(obj_) for key in data: folder_path = '/'.join([crawl_folder, key]) if not os.path.exists(folder_path): os.makedirs(folder_path) filename = '{0}/{1}'.format(folder_path, today) Logger.v('Saving', filename) fn.writeJSONFile(filename='{0}.json'.format(filename), data=data[key]) for key in option_keys: directory = '/'.join([crawl_folder, key]) raw = File.readLatestFile(directory=directory) refresh_collection = refreshIsRequired(data=raw, collection_name=key) if refresh_collection: refreshCollection(data=raw, collection_name=key) Logger.v('refreshed', key)
def runTest(cases): for case in cases: params = case['params'] result = Router.route(params) Logger.v('Daniel result', result) # fn.show(result); filename = 'daniel_py_result' fn.writeTestFile(filename, result, minified=False)
def runTest(cases): for case in cases: params = case['params']; result = Router.route(params); Logger.v('William result', result); # fn.show(result); filename = 'william_py_result'; fn.writeTestFile(filename, result, minified=False);
def run(): # Init web host = Config['API']['host'] port = int(Config['API']['port']) Logger.info('Api running on port ' + str(port) + '.') server = WSGIServer((host, port), Api.api, log=None) server.serve_forever()
def get_domain_recordset_id(self, domain, rr, _type='TXT'): try: record = self.get_domain_record(domain, rr, _type) return record['id'] if record else None except Exception as e: Logger.error('huaweicloud#get_domain_recordset_id raise: ' + str(e)) return None
def __init__(self, ip, port): self._send_lock = threading.Lock() self._ip = ip self._port = port self._url = "tcp://%s:%s" % (ip, port) self._logger = Logger("Remote %s" % self._url) self._z_ctx = None self._z_sck = None
def tryGet(self): self.tryLoadFromHiveToLocalFile() data = self.tryLoadFromLocalTempFile() Logger.w(self.id(), "data size=" + str(len(data))) return data
def sink(self, data): if data is None or len(data) == 0: Logger.e(self.id(), "no data, abort") return cols = self.cols.split(',') content = HtmlFactory.gethtml(self.title, cols, data) title = "[%s]" % self.title mail_to = [self.address] #MailSender.sendMail2(mail_to, title, content) MailSender.sendMail(mail_to, title, content)
def main(): username = raw_input("username: "******"password: ") logger.initialize() stream = tweepy.Stream(username, password, twitlib.TwitterStream(),\ timeout=6.0) stream.sample()
def readJson(filename): data = [] try: with open(filename) as f: data = json.load(f) Logger.v('Reading:', filename) except Exception as e: Logger.e(e) return data
def __init__(self): self.cnf = cnf.get("core") self.logger = Logger("Core") self.logger.debug("Loading services") self._services = [] for service_name in self.cnf.get("services"): service = Loader.by_id('services', service_name) self._services.append(service)
def get_departures(self): # Logger.info('Request: ' + str(self.station)) try: r = requests.get(Config['REISAPI']['base'] + 'StopVisit/GetDepartures/' + str(self.station)) except (MaxRetryError, TimeoutError, ConnectionError) as e: Logger.warn("Ruter API returned exception: " + e) return [] if r.status_code != 200: Logger.warn("Ruter API returned status " + str(r.status_code)) return [] return r.json()
def get_domain(self, domain): try: response = self.__request( 'GET', '/v2/zones?type=public&name=%s' % (domain)) content = json.loads(response) return list( filter(lambda item: item['name'][:-1] == domain, content['zones']))[0] except Exception as e: Logger.error('huaweicloud#get_domain raise: ' + str(e)) return None
def delete_domain_record(self, domain, rr, value, _type='TXT'): records = self.get_domain_records(domain, rr, _type) if records: for record in records: try: self.__request( 'DELETE', '/%s/dns_records/%s' % (record['zone_id'], record['id'])) except Exception as e: Logger.error(e) return
def __init__(self, thread, id, root=cnf): super().__init__(id, root) self._data = Loader.by_id('storage', self.lcnf.get("storage")) self._stop_timeout = 10 self._running = False self._thread_to_run = thread self._run_thread = None self._logger = Logger('Service') self._init()
def __init__(self, station): self.station = station['ID'] self.name = station['Name'] position = utm.to_latlon(station['X'], station['Y'], 32, 'X') self.lat = position[0] self.lon = position[1] self.lastUpdate = datetime(1970, 1, 1) Logger.info('New stop ' + self.name + ' has been loaded ' + str(position) + '.')
def __extract_maindomain_and_challenge(domain): sudomain, maindomain = Utils.extract_domain(domain) acme_challenge = '_acme-challenge' if sudomain: acme_challenge += '.' + sudomain Logger.info('manual_hook maindomain: ' + maindomain) Logger.info('manual_hook acme_challenge: ' + acme_challenge) return (maindomain, acme_challenge)
def sink(self, sections): if sections is None: Logger.e(self.id() + "no data, abort") return content = "" for context in self.contexts: key = context["id"] section = sections[key] content += "</p>" content += HtmlFactory.getTable(context["title"], context['cols'].split(','), section) content = HtmlFactory.getHeader(content) return content
def sink(self, sections): if sections is None or len(sections) == 0: Logger.e(self.id() + "no data, abort") return content = "" for context in self.contexts: key = context["id"] section = sections[key] content += "</p>" content += HtmlFactory.getTable(context["title"], context['cols'].split(','), section) content = HtmlFactory.getHeader(content) title = "[%s]" % self.title mail_to = [self.address] #MailSender.sendMail2(mail_to, title, content) MailSender.sendMail(mail_to, title, content)
def sink(self, data): if data is None or len(data) == 0: Logger.e(self.id() + "no data, abort") return sections = self.buildDataGroup(data) keys = sections['__meta__'] content = "" #for key in sections: for key in keys: section = sections[key] content += "</p>" content += HtmlFactory.getTable(section["title"], section['cols'].split(','), section['data']) content = HtmlFactory.getHeader(content) #print content title = "[%s]" % self.title mail_to = [self.address] #MailSender.sendMail2(mail_to, title, content) MailSender.sendMail(mail_to, title, content)
def getInstance(db_name): global db_instances global db_configs if db_name not in db_configs: Logger.f("[DBFactory]", "unknown db instance") return None instance = None conf = db_configs[db_name] if db_name in db_instances: instance = db_instances[db_name] if instance is None: instance = connect(conf) db_instances[db_name] = instance else: try: instance.ping() except: instance = connect(conf) return instance
def tryLoadFromHiveToLocalFile(self): sql = "use {0};".format(self.db_name) i = 0 tmp = self.sql.strip(';').split(';') sqls = [] for sql_tmp in tmp: if sql_tmp is None: continue if sql_tmp.strip() == '': continue sqls.append(sql_tmp.strip('\r').strip('\n')) while i < len(sqls) - 1: sql += sqls[i] + ";" i = i + 1 sql += " INSERT OVERWRITE LOCAL DIRECTORY '{0}' ".format(self.tmp_dir) #sql += sqls[i].replace("\n", "").replace("\r", "") + ";" sql += sqls[i] + ";" Logger.d(self.id(), sql) Logger.d(self.id(), "tmp_dir=" + self.tmp_dir) #child = subprocess.Popen(['mkdir', self.tmp_dir, '-p']) child1 = subprocess.Popen(['hive', '-e ' + sql]) child1.wait()
def tryGet(self): body = self.buildQueryObject() Logger.d(self.id(), "query=" + json.dumps(body)) resp = requests.post(url='http://10.1.11.100:8080/native_report/v2', data=json.dumps(body)) content = resp.json() #print content data = [] Logger.d(self.id(), "content size=" + str(len(content))) Logger.d(self.id(), "dimens=" + ','.join(self.dims)) Logger.d(self.id(), "dimenFilters=" + ','.join(self.filters['slot'])) for elem in content: cell = [] cell.append(self.log_date) dimenValues = elem['dimenId'].split('|') i = 0 while i < len(self.dims): dim = self.dims[i] dimValue = dimenValues[i] if not self.filters is None and 0 != len(self.filters): if dim in self.filters: if dimValue not in self.filters[dim]: i = i + 1 continue cell.append(dimValue) i = i + 1 if len(cell) != len(self.dims) + 1: continue i = 0 while i < len(self.cq): cols = elem['columns'] if self.cq[i] in cols: cell.append(cols[self.cq[i]]) else: cell.append('0') i = i + 1 data.append(cell) return data
def run(self, task): Logger.d("[Executor-%s]" % task, "start running at " + str(datetime.datetime.now())) if not self.config.has_section(task): Logger.f("[Executor-%s]" % task, "failed running at {0}, reason={1}".format(datetime.datetime.now(), "task not found")) return supplier = None supplier_type = self.config.get(task, "supplier.type").lower() Logger.d("[Executor-%s]" % task, "supplier_type=%s" % (supplier_type)) if "sql" == supplier_type or "mysql" == supplier_type: supplier_db_name = self.config.get(task, "supplier.db_name") supplier_sql_file = self.config.get(task, "supplier.sql_file") supplier_cachable = False if self.config.has_option(task, "supplier.cachable"): supplier_cachable = self.config.get(task, "supplier.cachable") supplier_sql = "" with open(supplier_sql_file, "r") as sql_file: supplier_sql = sql_file.read() #Logger.d("[Executor-%s]" % task, "db=%s file=%s sql=%s" % (supplier_db_name, supplier_sql_file, supplier_sql)) supplier = SQLBasedSupplier(task, supplier_db_name, supplier_sql, supplier_cachable) elif "hql" == supplier_type or "hive" == supplier_type: supplier_db_name = self.config.get(task, "supplier.db_name") supplier_sql_file = self.config.get(task, "supplier.sql_file") supplier_tmp_file = None if self.config.has_option(task, "supplier.tmp_file"): supplier_tmp_file = self.config.get(task, "supplier.tmp_file") supplier_cachable = False if self.config.has_option(task, "supplier.cachable"): supplier_cachable = self.config.get(task, "supplier.cachable") supplier_sql = "" with open(supplier_sql_file, "r") as sql_file: supplier_sql = sql_file.read() #Logger.d("[Executor-%s]" % task, "db=%s file=%s sql=%s" % (supplier_db_name, supplier_sql_file, supplier_sql)) supplier = HiveBasedSupplier(task, supplier_db_name, supplier_sql, supplier_tmp_file, supplier_cachable) elif "multi-sql" == supplier_type: supplier_db_name = self.config.get(task, "supplier.db_name") supplier_cachable = False if self.config.has_option(task, "supplier.cachable"): supplier_cachable = self.config.get(task, "supplier.cachable") supplier_instances = self.config.get(task, "supplier.instances").split(",") supplier_sqls = {} for instance in supplier_instances: instance_sql_file = self.config.get(task, "supplier." + instance + ".sql_file") with open(instance_sql_file, "r") as sql_file: instance_sql = sql_file.read() supplier_sqls[instance] = {"sql":instance_sql} supplier = MultiSQLBasedSupplier(task, supplier_db_name, supplier_sqls, supplier_cachable) elif "http" == supplier_type: supplier_product = self.config.get(task, "supplier.product") supplier_cq = self.config.get(task, "supplier.cq") supplier_dimen = self.config.get(task, "supplier.dimen") supplier_log_date = datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d") if self.config.has_option(task, "supplier.date"): supplier_date = self.config.get(task, "supplier.date") if '{INTERVAL}' == supplier_date: supplier_date_interval = self.config.get(task, "supplier.date.interval") date_tmp = datetime.date.today() - datetime.timedelta(days=int(supplier_date_interval)) supplier_log_date = datetime.datetime.strftime(date_tmp, "%Y-%m-%d") supplier_filters = {} if self.config.has_option(task, "supplier.filters"): supplier_filter_dims = self.config.get(task, "supplier.filters").split(",") for dim in supplier_filter_dims: dimValues = self.config.get(task, "supplier.filters." + dim).split(",") supplier_filters[dim] = dimValues supplier = HttpBasedSupplier(task, supplier_log_date, supplier_product, supplier_dimen, supplier_cq, supplier_filters) elif "test" == supplier_type: supplier = SimpleTestSupplier() if supplier is None: Logger.e("[Executor-%s]" % task, "no supplier, abort") return sinker = None sinker_type = self.config.get(task, "sinker.type").lower() if "html" == sinker_type: sinker_title = self.config.get(task, "sinker.title") sinker_cols = self.config.get(task, "sinker.cols") sinker_html_format = self.config.get(task, "sinker.format") sinker = HttpBasedSinker(task, sinker_title, sinker_cols, sinker_html_format) elif "multi-html" == sinker_type: sinker_title = self.config.get(task, "sinker.title") sinker_instances = self.config.get(task, "sinker.instances").split(",") sinker_contexts = [] for instance in sinker_instances: instance_title = self.config.get(task, "sinker." + instance + ".title") instance_cols = self.config.get(task, "sinker." + instance + ".cols") sinker_contexts.append({"id":instance, "title":instance_title, "cols":instance_cols}) sinker = MultiSectionHttpSinker(task, sinker_title, sinker_contexts) elif "email" == sinker_type: sinker_title = self.config.get(task, "sinker.title") sinker_cols = self.config.get(task, "sinker.cols") sinker_address = self.config.get(task, "sinker.address") sinker = EmailBasedSinker(task, sinker_address, sinker_title, sinker_cols) elif "multi-email" == sinker_type: sinker_address = self.config.get(task, "sinker.address") sinker_title = self.config.get(task, "sinker.title") sinker_instances = self.config.get(task, "sinker.instances").split(",") sinker_contexts = [] for instance in sinker_instances: instance_title = self.config.get(task, "sinker." + instance + ".title") instance_cols = self.config.get(task, "sinker." + instance + ".cols") sinker_contexts.append({"id":instance, "title":instance_title, "cols":instance_cols}) sinker = MultiSectionEmailSinker(task, sinker_address, sinker_title, sinker_contexts) elif "multi-csv" == sinker_type: sinker_instances = self.config.get(task, "sinker.instances").split(",") sinker_contexts = [] for instance in sinker_instances: instance_write_header = self.config.get(task, "sinker." + instance + ".write_header") instance_cols = self.config.get(task, "sinker." + instance + ".cols") instance_output = self.config.get(task, "sinker." + instance + ".output") sinker_contexts.append({"id":instance, "write_header":instance_write_header, "cols":instance_cols, "output":instance_output}) sinker = MultiSectionCsvSinker(task, sinker_contexts) elif "group-email" == sinker_type: sinker_address = self.config.get(task, "sinker.address") sinker_title = self.config.get(task, "sinker.title") sinker_cols = self.config.get(task, "sinker.cols") sinker_group_by = self.config.get(task, "sinker.group_by") sinker_limit = sys.maxint if self.config.has_option(task, "sinker.limit"): sinker_limit = self.config.get(task, "sinker.limit") sinker = GroupEmailSinker(task, sinker_address, sinker_title, sinker_cols, sinker_group_by, sinker_limit) if sinker is None: Logger.e("[Executor-%s]" % task, "no sinker, abort") return extender = None extender_type = "" if self.config.has_option(task, "extender.type"): extender_type = self.config.get(task, "extender.type").lower() if "single-key" == extender_type: supplier_type = self.config.get(task, "extender.supplier.type") extender_supplier = None if "sql" == supplier_type or "mysql" == supplier_type: supplier_db_name = self.config.get(task, "extender.supplier.db_name") supplier_sql_file = self.config.get(task, "extender.supplier.sql_file") supplier_cachable = False if self.config.has_option(task, "extender.supplier.cachable"): supplier_cachable = self.config.get(task, "extender.supplier.cachable") supplier_sql = "" with open(supplier_sql_file, "r") as sql_file: supplier_sql = sql_file.read() #Logger.d("[Executor-%s]" % task, "db=%s file=%s sql=%s" % (supplier_db_name, supplier_sql_file, supplier_sql)) extender_supplier = SQLBasedSupplier(task, supplier_db_name, supplier_sql, supplier_cachable) if not extender_supplier is None: extender_base_key_index = self.config.get(task, "extender.base_key_index") extender_data_key_index = self.config.get(task, "extender.data_key_index") extender_insert_index = self.config.get(task, "extender.insert_index") extender = KeyBasedExtender(task, extender_supplier, extender_base_key_index, extender_data_key_index, extender_insert_index) if "multi-stage" == extender_type: extender_stages = self.config.get(task, "extender.stages").split(',') contexts = [] for stage in extender_stages: prefix = "extender." + stage + "." supplier_type = self.config.get(task, prefix + "supplier.type") extender_supplier = None if "sql" == supplier_type or "mysql" == supplier_type: supplier_db_name = self.config.get(task, prefix + "supplier.db_name") supplier_sql_file = self.config.get(task, prefix + "supplier.sql_file") supplier_cachable = False if self.config.has_option(task, prefix + "supplier.cachable"): supplier_cachable = self.config.get(task, prefix + "supplier.cachable") supplier_sql = "" with open(supplier_sql_file, "r") as sql_file: supplier_sql = sql_file.read() #Logger.d("[Executor-%s]" % task, "db=%s file=%s sql=%s" % (supplier_db_name, supplier_sql_file, supplier_sql)) extender_supplier = SQLBasedSupplier(task, supplier_db_name, supplier_sql, supplier_cachable) if not extender_supplier is None: extender_base_key_index = self.config.get(task, prefix + "base_key_index") extender_data_key_index = self.config.get(task, prefix + "data_key_index") extender_insert_index = self.config.get(task, prefix + "insert_index") contexts.append({"supplier":extender_supplier, "base_key_index":extender_base_key_index, "data_key_index":extender_data_key_index, "insert_index":extender_insert_index}) extender = MultiStageKeyExtender(task, contexts) data = supplier.get() if not extender is None: data = extender.extend(data) data = sinker.sink(data) Logger.d("[Executor-%s]" % task, "end running at " + str(datetime.datetime.now())) return data