def get_or_create_session(): """ Retrieve the current active global session. If no active session exists, attempt to load config and create a new session. If an active session exists, return the session without loading new config. Returns ------- session : Session The global active session """ global _session if _session is not None and _session.is_active(): _getLogger(__name__).debug( "Active session found, ignoring session kwargs") else: config = load_config() if config is None: print("WARN: Missing config") writer = WriterConfig(type="local", output_path="output", formats=["all"]) config = SessionConfig("default-project", "default-pipeline", [writer], False) _session = session_from_config(config) return _session
def get_or_create_session(path_to_config: Optional[str] = None, report_progress: Optional[bool] = False): """ Retrieve the current active global session. If no active session exists, attempt to load config and create a new session. If an active session exists, return the session without loading new config. :return: The global active session :rtype: Session :type path_to_config: str """ global _session if _session is not None and _session.is_active(): _getLogger(__name__).debug("Active session found, ignoring session kwargs") else: config = load_config(path_to_config) if config is None: print("WARN: Missing config") config = SessionConfig( "default-project", "default-pipeline", [WriterConfig(type="local", output_path="output", formats=["all"])], MetadataConfig(type="local", output_path="output", input_path=""), False, ) if report_progress is not None: config.report_progress = report_progress _session = session_from_config(config) return _session
def get_lang_for_preparetext(text, main_language="en")->str: try: lang = detect(text) if lang not in _lang_names.keys(): lang = main_language return lang except: _getLogger(__name__).warning('no features in text') return main_language
def get_lang(text, main_language="en")->str: try: lang = detect(text) if lang not in _mongo_langs: lang = main_language return lang except: _getLogger(__name__).warning('no features in text') return main_language
def stemme_text(text: str, returnList=True, lang=None): try: lang = lang if lang != "none" else detect(comment) except LangDetectException as e: _getLogger(__name__).warning(str(e)) pass words = tokenize(text) words=stemme_words(words,lang) if returnList: return words else: return " ".join(words)
def SweepingTest(metrics=None, endpoint=None, batchSize=1, printVolume=10000): logger = _getLogger(__name__) #logger.info("in SweepingTest")# st = _time.time() allR = [] printFactor = 0 for queryIndex in range(0, len(metrics), batchSize): printFactor += batchSize allR.append( _requests.get( '%s/api/query?start=2018/04/25-00:00:00&end=2018/06/12-00:00:00&%s' % (endpoint, "&".join( metrics[queryIndex:queryIndex + batchSize])))) if printFactor > printVolume: logger.info(queryIndex) logger.info( '%s/api/query?start=2018/04/25-00:00:00&end=2018/06/12-00:00:00&%s' % (endpoint, "&".join( metrics[queryIndex:queryIndex + batchSize]))) et = _time.time() logger.info(et - st) logger.info("total throughput %f" % ((et - st) / (queryIndex + batchSize))) logger.debug(allR[-1].text) printFactor = 0 et = _time.time() throughput = (et - st) / (len(metrics)) logger.info("throughput=%f" % (throughput)) return throughput, allR[-1]
def get_or_create_session(): """ Retrieve the current active session. If no active session is found, create the session. """ global _session if _session is not None and _session.is_active(): _getLogger(__name__).debug( 'Active session found, ignoring session kwargs') else: config = load_config() if config is not None: writer = WriterConfig(type='local', output_path='output', formats=['all']) config = SessionConfig('default-project', 'default-pipeline', False, [writer]) _session = session_from_config(config) return _session
def __init__( self, project: str, pipeline: str, writers: List[Writer], metadata_writer: Optional[MetadataWriter] = None, verbose: bool = False, with_rotation_time: str = None, cache_size: int = None, report_progress: bool = False, ): self._py_logger = _getLogger(__name__) if writers is None: writers = [] self.project = project self.pipeline = pipeline self.writers = writers self.metadata_writer = metadata_writer self.verbose = verbose self._active = True self._loggers = {} self._session_time = datetime.datetime.now() self._session_id = str(uuid4()) self._config = SessionConfig(project, pipeline, writers, metadata_writer, verbose) self.with_rotation_time = with_rotation_time self.cache_size = cache_size self.report_progress = report_progress # enable special logic when starting/closing a Session if we're using whylabs client to save dataset profiles whylabs_writer_is_present = any(isinstance(w, WhyLabsWriter) for w in self.writers) self.use_whylabs_writer = _use_whylabs_client or whylabs_writer_is_present # add WhyLabs writer if it's not already present (which can happen if it's not specified in the config) if _use_whylabs_client and whylabs_writer_is_present is False: self.writers.append(WhyLabsWriter(output_path=None, formats=["protobuf"]))
def _getLogger(name=None): logger = logging._getLogger(name) if not logger.handlers: handler = StreamHandler() logger.addHandler(handler) return logger
def configure_logger_level(): """ When called this will set the root logger level based on the ``agent_global_logger_level`` configuration variable. """ # Import here to prevent circular imports and because we # don't want CONFIGURATION in the namespace of this module. from pyfarm.agent.logger.twistd import CONFIGURATION root_level = config["agent_global_logger_level"] if isinstance(root_level, STRING_TYPES): root_level = _levelNames[root_level.upper()] assert isinstance(root_level, int) levels = CONFIGURATION["levels"] for index, (name, level) in enumerate(levels): if name == "": levels[index] = ("", root_level) break else: levels.insert(0, ("", root_level)) # Just to be safe, we also set pf's root level pf = _getLogger("pf") pf.setLevel(root_level)
def writeDataFrameToOpenTsdb(df=None, valueColumns=None, groupColumns=None, apiEntryPoint='cviadqat07.office.comscore.com', putApiEndPoint=None, assignApiEndPoint=None, port=None, metric=None, host_tag=False, check_tsdb_alive=True, send_metrics_batch_limit=50, tagsToKeep=None, max_queue_size=50000, compressTags=False, overrideMillisecond=False): """store dataframe into tsdb via client""" logger = _getLogger(__name__) if not (metric and metric.strip()): raise Exception('Metric must not be empty') result = putAPIOpentsdb(df=df, valueColumns=valueColumns, groupColumns=groupColumns, metric=metric, putApiEndPoint=putApiEndPoint, tagsToKeep=tagsToKeep, compressTags=compressTags, overrideMillisecond=overrideMillisecond) return result
def ad_processor(self, input_data, parameter_dict=None, agg_col_name=None, default_result=None): logger = _getLogger(__name__) nan_map = input_data.isnull() result = {} default_result['timestamp'] = input_data.index.max() _STATUS_CODE = AlphaResultObject.STATUS_CODE if nan_map.agg('sum') == 0: if input_data.shape[0] > 1: result = self.apply_prediction(data=input_data, agg_col_name=agg_col_name, param_dict=parameter_dict) if not isinstance(result, dict): raise TypeError( 'Return type of apply_prediction should be a dict() object' ) else: if set(result.keys()).issubset( AlphaResultObject().metric_column_list): raise ValueError( 'Result dictionary key set should contain all elements of AlphaResultObject().metric_column_list' ) else: result[_STATUS_CODE] = 2 else: result[_STATUS_CODE] = 1 result_dict = {**default_result, **result} return result_dict
def readData(filename): """open filename and return binary data""" logger = _getLogger(__name__) logger.info("in readData") data = None with open(filename, 'rb') as fid: data = fid.read() return data
def prepare_text(comment: str, lang=None)->list: print('prepar text called') # comment["lang"] is none if it's not supported by mongodb # we have to detect language to remove stope words and stemme try: lang = get_lang_for_preparetext(comment) except LangDetectException as e: _getLogger(__name__).warning(str(e)) pass # tokenize comment text words = tokenize(comment) # remove numbers and punctuation filtred_words = remove_punctuation(words) # remove stop words filtred_words = remove_stop_words(filtred_words, lang) # stemme words if lang!='ar': filtred_words = stemme_words(filtred_words, lang) return filtred_words
def findStartTime(timeColumnsDict=None, numberPoints=0, timePeriod='D'): """given time columns, end time and numberPoints return the python date time object""" logger = _getLogger(__name__) logger.info("in findStartTime") if 'month_id' in timeColumnsDict: logger.info("found month_id") timestamp = _CSTimeToDatetime(month_id=timeColumnsDict['month_id'] - numberPoints) elif 'week_id' in timeColumnsDict: logger.info("found week_id") timestamp = _CSTimeToDatetime(week_id=timeColumnsDict['week_id'] - numberPoints) elif 'hour_id' in timeColumnsDict and 'time_id' in timeColumnsDict: logger.info("found time_id and hour_id") timestamp = _CSTimeToDatetime(time_id=timeColumnsDict['time_id'], hour_id=timeColumnsDict['hour_id'] - numberPoints) elif 'time_id' in timeColumnsDict: logger.info("found time_id ") timestamp = _CSTimeToDatetime(time_id=timeColumnsDict['time_id'] - numberPoints) elif len(timeColumnsDict) == 1: # assume it is already a timestamp #for single column it is assumed that the time column is seconds since epoch secondsSinceEpoch = timeColumnsDict[next(iter(timeColumnsDict))] utcDateTime = _datetime.fromtimestamp(secondsSinceEpoch, _timezone.utc) time_id = 0 hour_id = 0 week_id = 0 month_id = 0 #set the 1 that should be in the delta based on the time parameter needDelta = True if timePeriod == 'daily': time_id = -1 * numberPoints elif timePeriod == 'hourly': hour_id = -1 * numberPoints elif timePeriod == 'monthly': month_id = -1 * numberPoints elif timePeriod == 'weekly': week_id = -1 * numberPoints elif timePeriod == 'unknown': needDelta = False logger.warn("unknown time type defaulting to start time as epoch") timestamp = _datetime(1970, 1, 1, 0, 0, 0, 0, _pytz.UTC) else: needDelta = False logger.warn( "unsupported time type defaulting to start time as epoch") timestamp = _datetime(1970, 1, 1, 0, 0, 0, 0, _pytz.UTC) if needDelta: timestamp = utcDateTime + _timedelta( days=time_id, hours=hour_id, ) + _relativedelta(months=month_id, weeks=week_id) return timestamp
def SweepingMetaTest(metaDF=None, totalRows=None, endpointsMap=None, batchSize=1, printVolume=10000, testNumber=2, additionalSearchParameters='', tagsToKeep=None): logger = _getLogger(__name__) metaInfo = [] for endpoint in endpointsMap: logger.info('using %s' % (endpoint[0])) try: _requests.get('%s/api/dropcaches' % (endpoint[0])) df = metaDF['df'] timeColumns = metaDF['time_columns'] valueColumns = metaDF['value_columns'] groupColumns = metaDF['group_columns'] pj = [ generateOpentsdbJsonPayloadAsMetrics( fields=dict(row[valueColumns]), metric=endpoint[testNumber], tags=dict(row[groupColumns]), time=row['timestamp'], tagsToKeep=tagsToKeep) for index, row in df[:totalRows].iterrows() ] if tagsToKeep is None: metrics = [ 'm=none:%s%s' % (additionalSearchParameters, elem['metric']) for elem in _itertools.chain.from_iterable(pj) ] else: metrics = [ 'm=none:%s%s%s' % (additionalSearchParameters, elem['metric'], buildTagSearchFromKV(elem['tags'], )) for elem in _itertools.chain.from_iterable(pj) ] logger.info(pj[0][0]) logger.info(metrics[0]) results = SweepingTest(metrics=metrics, endpoint=endpoint[0], batchSize=batchSize, printVolume=printVolume) metaInfo.append(results) except _requests.exceptions.ConnectionError as e: logger.info("Catching Connection Error: %s: process next record" % (str(e))) metaInfo.append([e]) continue return metaInfo
def decodeData(inputData, encoding): """decode data using encoding to return string data""" logger = _getLogger(__name__) logger.info("in decodeData") data = None try: data = inputData.decode(encoding).strip() except Exception as e: logger.error("Exception in decode:%s" % (e)) raise (e) return data
def putAPIOpentsdb(df=None, valueColumns=None, groupColumns=None, metric=None, tagsToKeep=None, putApiEndPoint=None, compressTags=False, overrideMillisecond=False, timestampCol='timestamp', tolerance=0.1): logger = _getLogger(__name__) pj = get_metric_names(dataframe=df, group_columns=groupColumns, value_columns=valueColumns, metric_prefix=metric, tagsToKeep=tagsToKeep) pjAll = list(_itertools.chain.from_iterable(pj)) result = list() if len(pjAll) > 0: if logger.isEnabledFor(_DEBUG): logger.debug(len(pjAll)) logger.debug(len(pjAll[0])) logger.debug(type(pjAll[0])) logger.debug(pjAll[0]) else: if _randint(1, 10001) < 10: logger.info(len(pjAll)) logger.info(len(pjAll[0])) logger.info(type(pjAll[0])) logger.info(pjAll[0]) else: logger.info('number of metrics generated are 0') logger.info('Not calling put API, returning empty list') return result session = retry_session(retries=5) r = session.post(url=putApiEndPoint, data=_json.dumps(pjAll)) #, timeout=8 result.append(r.status_code) successful_puts = sum([elem < 300 for elem in result]) logger.info('number 204:%d' % (successful_puts)) if successful_puts == 0: raise ValueError('put api call was unsucessful') return result
def json_parser(json_file=None, business_id=None, group_id=None, database_id=None, **kwargs): master_json_tmp = master_json_file + '.tmp' master_json_bak = master_json_file + '.bak' remove_bak_file = True remove_tmp_file = False shutil.copy2(master_json_file, master_json_bak) logger = _getLogger(__name__) if business_id is not None or group_id is not None or database_id is not None: meta_info = {} meta_info[business_id] = {} meta_info[business_id][group_id] = {} else: raise ValueError('business_id, group_id or database_id cannot be null') if json_file is not None: with open (json_file, 'r') as f_h: parameter_dict = _json.load(f_h) meta_info[business_id][group_id][database_id] = parameter_dict else: meta_info[business_id][group_id][database_id] = {'influxAPI':None,'influxPort':None, 'opentsdbAPI':None,'opentsdbPort':None, 'tableName':None, 'anomaly_package': None, 'anomaly_class': None, 'apply_AD_cols': None, 'time_range': 45, 'threshold_dict':None, 'parameter_dict':None} with open(master_json_file, 'r') as f_h: existing_config = _json.load(f_h) new_config = {**existing_config, **meta_info} with open(master_json_tmp, 'w') as f_h: _json.dump(new_config, f_h) try: shutil.move(master_json_tmp, master_json_file) logger.info('New Json file created successfully') except Exception as e: logger.debug('Following error occured when creating the new json file %s' % e) remove_bak_file = False remove_tmp_file = True finally: if remove_tmp_file: os.remove(master_json_tmp) if remove_bak_file: os.remove(master_json_bak)
def getLogger(name, parent=None): if isinstance(parent, LoggerAdapter): klass = type(parent) extra = parent.extra parent = parent.logger else: klass = None extra = None if parent: name = parent.name + '.' + name logger = _getLogger(name) logger.settings = settings if extra: logger = klass(logger, extra) logger.settings = settings return logger
def writeDataFrameToInfluxDB(df=None, valueColumns=None, groupColumns=None, apiEntryPoint='cviadqat07.office.comscore.com', port=8086, database='Panel_Only_Mobile_Data', measurement=None, username='', password='', timestampCol=['timestamp']): """store dataframe into tsdb via client""" logger = _getLogger(__name__) logger.info("in writeDataFrameToInfluxDB") restructured_df = df[groupColumns + valueColumns + timestampCol] num_group_cols = len(groupColumns) num_value_cols = len(valueColumns) value_col_start_index = num_group_cols + 1 value_col_end_index = num_group_cols + num_value_cols #create a client connection client = _InfluxDBClient(apiEntryPoint, port, username, password, database) #create the db if needed client.create_database(database) #verify that we have a valid measurement name if not (measurement and measurement.strip()): raise Exception('Measurement must not be empty') #add the dataframe row by row, store the result of each load result = [] # Using itertuples as it gives speed improvement over iterrows. # increase depends on size of the input df but for O(1000) can improve performance by 2x for O(1e5) can improve by 100x for tup in restructured_df.itertuples(): pj = generateInfluxJsonPayload(fields=(dict( zip(valueColumns, tup[value_col_start_index:value_col_end_index + 1]))), measurement=measurement, tags=dict( zip(groupColumns, tup[1:num_group_cols + 1])), time=int(tup[-1])) result.append(client.write_points(pj, time_precision='s')) return result
async def load(manifest_path: Union[_Path, None]) -> Config: """Configuration loader function This architecture balances readiness for async-requiring configuration fetches (e.g. a cloud credential store), with convenience of having synchronously constructed configuration objects. This way our configuration model classes can be nicely statically typed, at the cost of needing to make sure we have all the necessary data loaded & ready in time for synchronous cascading __init__ calls. """ raw = {} if (manifest_path): print("Loading configuration manifest %s", manifest_path) with manifest_path.open() as manifest_file: raw = _safe_load(manifest_file) else: print("No configuration manifest supplied - using environment variables only") raw["env"] = _environ APPCONFIG = Config(raw) LOGGER = _getLogger(__name__) LOGGER.info(APPCONFIG) return APPCONFIG
def syslog_handlers(logger_name, address=('127.0.0.1', 514), facility=0, level='DEBUG'): global _root_logger_name, root_logger, _stdout logger_names = [] loggers = [] if type(logger_name) in [str]: logger_names = [logger_name] elif type(logger_name) in [list, tuple]: logger_names = logger_name else: return loggers for name in logger_names: logger = _getLogger(name) _level = logger_filters.get(name) or level _level = _level.upper() if _level == 'OFF': handler = NullHandler() logger.addHandler(handler) elif _level in ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']: del logger.handlers[:] logger.setLevel(_level) handler = SysLogHandler(address=tuple(address), facility=SysLogHandler.LOG_LOCAL0 + facility) handler.setLevel(_level) logger.addHandler(handler) if _stdout: handler = StdoutHandler() handler.setLevel(_level) logger.addHandler(handler) logger.propagate = 0 if name == _root_logger_name: root_logger = logger loggers.append(logger) return loggers
def normalizeTimeStamps(df=None, time_columns=None): """take in comscore time ids or assume time column is a timestamp and return a timestamp series""" logger = _getLogger(__name__) logger.info("in normalizeTimeStamps") timestamps = None # overall policy is to check for month id and if found use it alone # otherwise if time_id and hour_id present use those # otherwise use time_id else ensure time column is len 1, and assume it is already a timestamp if 'month_id' in time_columns: logger.info("found month_id") timestamps = df.month_id.apply( lambda z: _CSTimeToDatetime(month_id=z).timestamp()) elif 'week_id' in time_columns: logger.info("found week_id") timestamps = df.week_id.apply( lambda z: _CSTimeToDatetime(week_id=z).timestamp()) elif 'hour_id' in time_columns and 'time_id' in time_columns: logger.info("found time_id and hour_id") timestamps = df[['time_id', 'hour_id']].apply( lambda row: _CSTimeToDatetime(time_id=row['time_id'], hour_id=row['hour_id']).timestamp(), axis=1) elif 'time_id' in time_columns: logger.info("found time_id ") timestamps = df[['time_id']].apply(lambda row: _CSTimeToDatetime( time_id=row['time_id'], hour_id=0).timestamp(), axis=1) elif len(time_columns) == 1: # assume it is already a timestamp timestamps = df[time_columns[0]].astype(int) else: raise Exception('Unrecognized time format') # log diagnostic type info if len(timestamps) > 0: logger.info("type info:%s" % (type(timestamps[0]))) logger.info("%s" % (timestamps[0])) return timestamps
def getLogger(name, parent=None): if parent: name = parent.name + '.' + name return _getLogger(name)
Core MLTools in a python package for creating, examining, and testing models in the .mlmodel format. In particular, it can be used to: * Convert existing models to .mlmodel format from popular machine learning tools including: Keras, scikit-learn, libsvm, and XGBoost. * Express models in .mlmodel format through a simple API. * Make predictions with an .mlmodel (on select platforms for testing purposes). For more information: http://developer.apple.com/documentation/coreml """ from enum import Enum as _Enum from logging import getLogger as _getLogger # Backup root logger handlers _root_logger = _getLogger() _root_logger_handlers_backup = _root_logger.handlers.copy() from .version import __version__ # This is the basic Core ML specification format understood by iOS 11.0 SPECIFICATION_VERSION = 1 # New versions for iOS 11.2 features. Models which use these features should have these # versions, but models created from this coremltools which do not use the features can # still have the basic version. _MINIMUM_CUSTOM_LAYER_SPEC_VERSION = 2 _MINIMUM_FP16_SPEC_VERSION = 2 # New versions for iOS 12.0 features. Models which use these features should have these # versions, but models created from this coremltools which do not use the features can
""" # publicly visible to plugins from stolos import argparse_shared as at from stolos.configuration_backend import TasksConfigBaseMapping, TasksConfigBaseSequence TasksConfigBaseSequence, TasksConfigBaseMapping from stolos import api at, api # imports hidden from plugins from logging import getLogger as _getLogger log = _getLogger("stolos.plugins") from stolos.exceptions import CodeError as _CodeError def log_and_raise(err, log_details): """The less unexpected way for plugins to fail. A helper function that logs the given exception and then raises an exception. Stolos will see this error, mark the job as failed and quit. Plugin exceptions not handled by this function will cause Stolos to complain that you have unexpected errors in your plugin code. """ log.exception(err, extra=log_details) raise _CodeError("Task failed. This particular error message will never appear in logs.")
def processInputData(inputa, fieldSeparator=None, recordSeparator=None, otsdbExcludeCharRegex='[^a-zA-Z\d\-_%s/]+', otsdbExcludeCharReplacer='_'): logger = _getLogger(__name__) logger.info("in processInputData") output = {} if fieldSeparator is None or recordSeparator is None: return output engine = 'c' if len(recordSeparator) > 1: engine = 'python' lines = inputa.strip().split('\n') partsAll = [elem.strip().split(recordSeparator) for elem in lines] subfields = [] for selector in range(3): subfields.append( [elem[selector].split(fieldSeparator) for elem in partsAll]) for i in range(len(subfields[0])): clean_row_part(subfields[0][i], otsdbExcludeCharRegex, otsdbExcludeCharReplacer=otsdbExcludeCharReplacer, excludeChars='') clean_row_part(subfields[1][i], otsdbExcludeCharRegex, otsdbExcludeCharReplacer=otsdbExcludeCharReplacer, excludeChars='') clean_row_part(subfields[2][i], otsdbExcludeCharRegex, otsdbExcludeCharReplacer=otsdbExcludeCharReplacer, excludeChars='.') columns = [elem.split('=')[0] for elem in subfields[0][0]] times = [elem.split('=')[0] for elem in subfields[1][0]] values = [elem.split('=')[0] for elem in subfields[2][0]] dfAll = [] for currentFieldIndex, currentField in enumerate(subfields): d1 = [] logger.info(currentFieldIndex) for indexa, elema in enumerate(currentField): try: d1.append( {elem.split('=')[0]: elem.split('=')[1] for elem in elema}) except Exception as e: logger.info(e) logger.debug(elema) logger.debug((d1[0].keys())) raise (e) dfAll.append(_DataFrame(d1)) dfMaster = _concat(dfAll, axis=1) logger.info('before timestamp normalization') timestamp = normalizeTimeStamps(dfMaster, time_columns=times) dfMaster['timestamp'] = timestamp if logger.isEnabledFor(_DEBUG): logger.debug("type debug:%s" % (type(timestamp[0]))) logger.debug("%s" % (timestamp[0])) logger.debug("type debug:%s" % (type(dfMaster['timestamp'][0]))) logger.debug("%s" % (dfMaster['timestamp'][0])) output = {} output['df'] = dfMaster output['time_columns'] = times output['value_columns'] = values output['group_columns'] = columns logger.info("times=%s, values=%s, columns=%s" % (times, values, columns)) logger.info("leave processInputData") return output
def getLogger(name, parent=None): if parent: name = parent.name + '.' + name logger = _getLogger(name) logger.settings = settings return logger
of segments per control point res.export: format string to use when exporting floating point vertices """ def __init__(self, **kwargs): self.seg_frac = .05 self.seg_angle = .08 self.max_sections = 10 self.min_sections = 5 self.export = '.5f' tol_path = NumericalTolerancePath() res_path = NumericalResolutionPath() # logging log = _getLogger('trimesh') log.addHandler(_NullHandler()) def _log_time(method): def timed(*args, **kwargs): tic = time_function() result = method(*args, **kwargs) log.debug('%s executed in %.4f seconds.', method.__name__, time_function() - tic) return result timed.__name__ = method.__name__ timed.__doc__ = method.__doc__ return timed
This module contains general utility functions and shared constants used by other TritonScraper modules. :copyright: (c) 2010 by Christopher Rebert. :license: MIT, see :file:`LICENSE.txt` for more details. """ from itertools import izip_longest as _izip_longest from decimal import Decimal as _Decimal from logging import getLogger as _getLogger from triton_scraper.config import LOGGER_NAME as _LOGGER_NAME from lxml.etree import XPath #: Floating-point Not-a-Number (NaN) value. NaN = _Decimal('NaN') #: Floating-point infinity value; i.e. float('infinity') INFINITY = float('infinity') # Common XPath component RELATIVE_PREFIX = "descendant-or-self::node()" # TritonScraper's logger LOGGER = _getLogger(_LOGGER_NAME) # From the itertools cookbook: http://docs.python.org/library/itertools.html#recipes def grouper(n, iterable, fillvalue=None): "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx" args = [iter(iterable)] * n return _izip_longest(fillvalue=fillvalue, *args)
from graphlab.connect.aws._ec2 import LicenseValidationException from graphlab.connect.aws._ec2 import get_credentials as _get_credentials import graphlab as _gl import graphlab.connect as _mt # since _predictive_service_environment imports these, need to have them defined first _MAX_CREATE_TIMEOUT_SECS = 600 # 10m from _predictive_service._predictive_service_environment import Ec2PredictiveServiceEnvironment as _Ec2PredictiveServiceEnvironment from _predictive_service._predictive_service_environment import LocalPredictiveServiceEnvironment as _LocalPredictiveServiceEnvironment from _predictive_service._file_util import parse_s3_path as _parse_s3_path, s3_recursive_delete as _s3_recursive_delete, s3_delete_key as _s3_delete_key from _predictive_service._predictive_service import PredictiveService as _PredictiveService _logger = _getLogger(__name__) _name_checker = _compile('^[a-zA-Z-]+$') def create(name, environment, state_path, description = None, api_key = None, admin_key = None, ssl_credentials = None): ''' Launch a Predictive Services cluster. This cluster can currently be launched on EC2 by specifying an EC2 environment. Parameters ---------- name : str The name of the Predictive Service that will be launched. This string can only contain: a-z, A-Z and hyphens.
res.export: format string to use when exporting floating point vertices """ def __init__(self, **kwargs): self.seg_frac = 0.05 self.seg_angle = 0.08 self.max_sections = 10 self.min_sections = 5 self.export = ".5f" tol_path = NumericalTolerancePath() res_path = NumericalResolutionPath() ### logging log = _getLogger("trimesh") log.addHandler(_NullHandler()) def _log_time(method): def timed(*args, **kwargs): tic = time_function() result = method(*args, **kwargs) log.debug("%s executed in %.4f seconds.", method.__name__, time_function() - tic) return result timed.__name__ = method.__name__ timed.__doc__ = method.__doc__ return timed
def mainProcessor(input_file_glob=None, apiEntryPointList=[ 'cviadqat07.office.comscore.com', ], processes=None, metric='bID.gID.daySession.daily.input_dataExploded', tagsToKeep=None, compressTags=False, overrideMillisecond=False, useInflux=False, encoding=None, recordSeparator=None, fieldSeparator=None): logger = _getLogger(__name__) logger.info("in main") startTimeMain = _time.time() inputa = "" """Read the file as string""" allFiles = sorted(_glob.glob(input_file_glob)) logger.info('processing %s' % (str(allFiles))) counter = 0 for file_ in allFiles: logger.info(f"Processing file: {file_}") inputa = readData(file_) inputData = decodeData(inputa, encoding) logger.info( f"Time taken to read the file: {getTimeTaken(startTimeMain, _time.time())}" ) data = processInputData(inputData, recordSeparator=recordSeparator, fieldSeparator=fieldSeparator) if overrideMillisecond: #increment timestamp field df = data['df'] df['timestamp'] *= 1000 milliVector = _np.arange(1, df.shape[0] + 1) df['timestamp'] += milliVector splitedDF = split(data['df'], chunkSize=100) valueColumns = data['value_columns'] groupColumns = data['group_columns'] timeColumns = data['time_columns'] logger.info(f"Starting put API call at: {_time.ctime()}") if useInflux: logger.info('before import into influx tsdb') #influx client needs leading http decorators removed r1 = _compile(r"^https?://(www\.)?") firstEntry = apiEntryPointList[0] firstEntry = r1.sub('', firstEntry) influxEntry = firstEntry.split(':')[0] influxPort = firstEntry.split(':')[1] cleanDataframe = data['df'].apply(_to_numeric, errors='ignore') logger.info('before import into influx tsdb %s' % (metric)) result = writeDataFrameToInfluxDB(df=cleanDataframe, valueColumns=valueColumns, groupColumns=groupColumns, apiEntryPoint=influxEntry, port=influxPort, database=metric, measurement='autoload') logger.info( 'successfully imported %d of %d records into influxdb' % (sum(result), len(result))) else: host_tag = False check_tsdb_alive = False putApiEndPoint = [ f"{apiEntryPoint}/api/put/" for apiEntryPoint in apiEntryPointList ] assignApiEndPoint = [ f"{apiEntryPoint}/api/uid/assign/" for apiEntryPoint in apiEntryPointList ] logger.info(putApiEndPoint) logger.info(assignApiEndPoint) # result = [ # writeDataFrameToOpenTsdb( # sdf.reset_index(drop=True), # valueColumns, # groupColumns, # apiEntryPoint, # putApiEndPoint, # assignApiEndPoint, # port=4248, # metric=metric, # host_tag=False, # check_tsdb_alive=False) for sdf in splitedDF # ] loadBalancerCount = len(putApiEndPoint) logger.info('before import into opentsdb %s' % (metric)) with _multiprocessing.Pool(processes=processes * loadBalancerCount) as pool: result = pool.starmap_async( writeDataFrameToOpenTsdb, [(sdf.reset_index(drop=True), valueColumns, groupColumns, None, putApiEndPoint[indexa % loadBalancerCount], assignApiEndPoint[indexa % loadBalancerCount], None, metric, host_tag, check_tsdb_alive, 50, tagsToKeep, 50000, compressTags, overrideMillisecond) for indexa, sdf in enumerate(splitedDF)], chunksize=None, callback=None, error_callback=None) result.get(timeout=None) logger.info(f"End put API call at: {_time.ctime()}") logger.info( f"Time taken for main: {getTimeTaken(startTimeMain, _time.time())}" )
planar = 1e-5, seg_frac = .05, seg_angle = .25, aspect_frac = .1, radius_frac = 1e-2, radius_min = 1e-2, radius_max = 50, tangent = .0175) res_path = _NumericalResolutionPath(seg_frac = .04, seg_angle = .18, max_sections = 10, min_sections = 5, export = '.5f') ### logging log = _getLogger('trimesh') log.addHandler(_NullHandler()) def _log_time(method): def timed(*args, **kwargs): tic = time_function() result = method(*args, **kwargs) log.debug('%s executed in %.4f seconds.', method.__name__, time_function()-tic) return result timed.__name__ = method.__name__ timed.__doc__ = method.__doc__ return timed ### exceptions class MeshError(Exception):
def openTSDB_data_processor(metric_names=None, query_string=None, query_offset=20, tolerance=0.0): logger = _getLogger(__name__) logger.info('read failure tolerance is %s' % tolerance) logger.info('number of metric names: %d' % (len(metric_names))) metrics = list( set([ 'm=none:' + elem['metric'] for elem in _itertools.chain.from_iterable(metric_names) ])) logger.info('number of metrics as set: %d' % (len(metrics))) response_df_list = [(_requests.get('%s%s' % (query_string, "&".join( metrics[queryIndex:queryIndex + query_offset]))).json()) for queryIndex in range(0, len(metrics), query_offset)] flattened_list = list(_itertools.chain(*response_df_list)) logger.info('number of elements in response list: %d' % (len(flattened_list))) if len(metrics) > 0: logger.info(str(metrics[:1])) super_dict = {'metric': [], 'dps': [], 'tags': [], 'aggregateTags': []} num_defective_records = 0 num_correct_records = 0 for ite in flattened_list: if ite == 'error' or type(ite) is not dict: num_defective_records += 1 else: merge_dicts(super_dict, ite) num_correct_records += 1 max_defective_records = num_defective_records * query_offset total_num_queries = max_defective_records + num_correct_records logger.info('number of read request errors are: %s' % num_defective_records) logger.info('max number of failed reads are: %s' % max_defective_records) logger.info('number of total queries sent are %s' % total_num_queries) if total_num_queries > 0: read_error_perc = max_defective_records / total_num_queries else: #in state where metrics were sent to tsdb for query, but no time series were returned. logger.warn('total_num_queries is zero. no input data found in tsdb') raise ValueError("NoRecordsFound") if read_error_perc > tolerance: raise ValueError( 'Stopping execution as read errors exceeded tolerance for num metrics queried that do not exist in OTSDB. Read error percent is %s and tolerance is %s' % (read_error_perc, tolerance)) causal_data_frame = _DataFrame.from_dict(super_dict) new_df = causal_data_frame.drop( ['dps', 'aggregateTags', 'tags'], 1).assign(**_DataFrame(causal_data_frame.dps.values.tolist())) transposed_df = new_df.T cleaned_causal_df = transposed_df.rename(columns=dict( zip(transposed_df.columns.tolist(), transposed_df.iloc[0].astype( str)))).drop(transposed_df.index[0]) numeric_causal_df = cleaned_causal_df.apply(_to_numeric, errors='ignore') return numeric_causal_df
def get_logger(name): logger = _getLogger(name) logger.addFilter(context_filter) return logger
def __init__(self, module=__name__): self.logger = _noLogger() if DEBUG is True: self.logger = _getLogger('%s-%s(%x)' %(module, self.__class__, id(self)))
from logging import getLogger as _getLogger ### logging log = _getLogger('truenorth-training')
else: VERSION_INFO = tuple(_metadata.VERSION.split('.')) # 2. Imports __all__ = [ 'board', 'config', 'game', 'logger', 'mainloop', 'metadata', 'metadata', 'piece', 'square', 'utils'] # 3. Configure settings from pyrochess.config import SETTINGS # 4. Configure logging from logging import getLogger as _getLogger from pyrochess.logger import init_logging as _init_logging _init_logging(SETTINGS) _log = _getLogger(_metadata.PROGRAM) _log.debug("=== {} v{} begun at: {} ===".format(__package__, VERSION, _DATE)) # 5. Expose main and other chained imports from pyrochess.cli import main