def __init__(self, config_file=None, project_name=None): configfile.get_config(sys.argv[1:], project_name=project_name, config_file=config_file) conf = configfile.CONF credentials = config.Credentials(username=conf.connection.username, password=conf.connection.password) ssl_opts = None if conf.ssl: ssl_opts = { "keyfile": conf.ssl.keyfile, "certfile": conf.ssl.certfile, "server_side": False, "cert_reqs": conf.ssl.cert_reqs, "ssl_version": conf.ssl.ssl_version, "ca_certs": conf.ssl.ca_certs, "suppress_ragged_eofs": conf.ssl.suppress_ragged_eofs, "ciphers": conf.ssl.ciphers, } conn_conf = config.ConnectionConfig( host=conf.connection.host, credentials=credentials, port=conf.connection.port, virtual_host=conf.connection.virtual_host, channel_max=conf.connection.channel_max, frame_max=conf.connection.frame_max, heartbeat_interval=conf.connection.heartbeat_interval, ssl=conf.connection.ssl, ssl_options=ssl_opts, connection_attempts=conf.connection.connection_attempts, retry_delay=conf.connection.retry_delay, socket_timeout=conf.connection.socket_timeout, locale=conf.connection.locale, backpressure_detection=conf.connection.backpressure_detection, reconnect_attempts=conf.connection.reconnect_attempts, async_engine=conf.connection.async_engine ) service_list = configfile.get_services_classes() service_mapping = configfile.get_service_name_class_mapping() print configfile.get_services() for service in configfile.get_services(): df = discovery.DiscoveryFactory(service["discovery"]) disc = df.get_discovery_service(service["name"], service.get("subscriptions")) service_mapping[service["name"]].set_discovery(disc) super(CLIServer, self).__init__( conn_conf, queue_name=conf.server.queue_name, exchange_name=conf.server.exchange_name, service_list=service_list)
def __init__(self, options, daemon_url): """ Setup objects: Methods WorkSpaces """ self.config = configfile.get_config(options.config, verbose=False) self.debug = options.debug self.daemon_url = daemon_url # check config file configfile.sanity_check(self.config) self._update_methods() self.target_workdir = self.config['target_workdir'] self.source_workdirs = {self.target_workdir} | self.config.get('source_workdirs', set()) self.workspaces = {} for name, data in self.config['workdir'].items(): if name in self.source_workdirs: path = data[0] slices = data[1] self.workspaces[name] = workspace.WorkSpace(name, path, slices) undefined_workdirs = self.source_workdirs - set(self.workspaces) if undefined_workdirs: print('\nERROR: Workdir(s) missing definition: ' + ', '.join('\"' + x + '\"' for x in undefined_workdirs) + '.') exit(1) check_slices = set(self.workspaces[name].slices for name in self.workspaces) if len(check_slices) > 1: print('\nERROR: Not all workdirs have the same number of slices!') exit(1) put_workspaces({k: v.path for k, v in self.workspaces.items()}) self.DataBase = database.DataBase(self) self.update_database() self.broken = False
def init(): # initialize locale - for number formatting resetlocale() # find config files near script location, build WORKSPACES from them rel = partial(join, sys.path[0]) for fn in glob(rel("*.conf")) + glob(rel("../*.conf")) + glob( rel("conf/*")) + glob(rel("../conf/*")): if not fn.lower().endswith(".template"): try: cfg = get_config(fn, False) except Exception: continue WORKSPACES.update({k: v[0] for k, v in cfg['workspace'].items()})
def __init__(self, options, daemon_url): """ Setup objects: Methods WorkSpaces """ self.config = configfile.get_config(options.config, verbose=False) self.debug = options.debug self.daemon_url = daemon_url # check config file configfile.sanity_check(self.config) self._update_methods() # initialise workspaces self.workspaces = {} for name, data in self.config['workspace'].items(): path = data[0] slices = data[1] w = workspace.WorkSpace(name, path, slices) if w.ok: # add only if everything whent well in __init__ self.workspaces[name] = w else: # hmm, maybe new target workspace if name == self.config['main_workspace']: self.workspaces[name] = workspace.WorkSpace( name, path, slices, True) put_workspaces({k: v.path for k, v in self.workspaces.items()}) # set current workspace pointers self.set_workspace(self.config['main_workspace']) self.set_remote_workspaces(self.config.get('remote_workspaces', '')) # and update contents self.DataBase = database.DataBase(self) self.update_database() self.broken = False
from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler, StandardScaler from sklearn.metrics import classification_report, auc, roc_curve import numpy as np import pandas as pd from dirutil import project_directory from configfile import get_config from transformer import clean_nulls, SamplerFactory if __name__ == '__main__': sample_method = 'under' project_path = project_directory() config = get_config('standard_model') train_pct = .9 performance_data_path = os.path.join(project_path, config['diw_path'], 'diw.txt') performance_data = pd.read_csv(performance_data_path, sep=config['data_sep']) target = config['target'] predictors = config['predictors'] # Pull out the predictors & target model_data = performance_data[predictors + [target]] # Clean nulls and map f/c stat to bits
def main(options): # all forks belong to the same happy family try: os.setpgrp() except OSError: print( "Failed to create process group - there is probably already one (daemontools).", file=sys.stderr) # Set a low (but not too low) open file limit to make # dispatch.update_valid_fds faster. # The runners will set the highest limit they can # before actually running any methods. r1, r2 = resource.getrlimit(resource.RLIMIT_NOFILE) r1 = min(r1, r2, 1024) resource.setrlimit(resource.RLIMIT_NOFILE, (r1, r2)) # setup statmsg sink and tell address using ENV statmsg_rd, statmsg_wr = socket.socketpair(socket.AF_UNIX, socket.SOCK_DGRAM) os.environ['BD_STATUS_FD'] = str(statmsg_wr.fileno()) def buf_up(fh, opt): sock = socket.fromfd(fh.fileno(), socket.AF_UNIX, socket.SOCK_DGRAM) sock.setsockopt(socket.SOL_SOCKET, opt, 256 * 1024) buf_up(statmsg_wr, socket.SO_SNDBUF) buf_up(statmsg_rd, socket.SO_RCVBUF) CONFIG = configfile.get_config(options.config, verbose=False) t = Thread(target=statmsg_sink, args=(CONFIG['logfilename'], statmsg_rd), name="statmsg sink") t.daemon = True t.start() # do all main-stuff, i.e. run server sys.stdout = autoflush.AutoFlush(sys.stdout) sys.stderr = autoflush.AutoFlush(sys.stderr) atexit.register(exitfunction) signal.signal(signal.SIGTERM, exitfunction) signal.signal(signal.SIGINT, exitfunction) signal.signal(signal.SIGUSR1, siginfo) signal.siginterrupt(signal.SIGUSR1, False) if hasattr(signal, 'SIGINFO'): signal.signal(signal.SIGINFO, siginfo) signal.siginterrupt(signal.SIGINFO, False) if options.port: server = ThreadedHTTPServer(('', options.port), XtdHandler) daemon_url = 'http://localhost:%d' % (options.port, ) else: check_socket(options.socket) # We want the socket to be world writeable, protect it with dir permissions. u = os.umask(0) server = ThreadedUnixHTTPServer(options.socket, XtdHandler) os.umask(u) daemon_url = configfile.resolve_socket_url(options.socket) ctrl = control.Main(options, daemon_url) print() ctrl.print_workspaces() print() XtdHandler.ctrl = ctrl for n in ("result_directory", "source_directory", "urd"): print("%16s: %s" % ( n.replace("_", " "), CONFIG.get(n), )) print() if options.port: serving_on = "port %d" % (options.port, ) else: serving_on = options.socket print("Serving on %s\n" % (serving_on, ), file=sys.stderr) server.serve_forever()
return X.reshape(self.shape) class DataTypeTransformer(BaseEstimator, TransformerMixin): def __init__(self, dtype): self.dtype = dtype def fit(self, X, y=None): return self def transform(self, X): return X.astype(self.dtype) if __name__ == '__main__': config = get_config() target = config['target'] categorical_predictors = config['cat_predictors'] numerical_predictors = config['num_predictors'] diw_path = os.path.join(project_directory(), config['diw_path'], 'diw.txt') diw_df = pd.read_csv(diw_path, sep=config['data_sep']) num_pipeline = Pipeline([('selector', DataFrameSelector(numerical_predictors)), ('imputer', Imputer()), ('scaler', MinMaxScaler())]) # The cat vars we have now don't require imputing cat_pipeline = Pipeline([('selector',
def main(options): # all forks belong to the same happy family try: os.setpgrp() except OSError: print( "Failed to create process group - there is probably already one (daemontools).", file=sys.stderr) # increase number of open file per process r1, r2 = resource.getrlimit(resource.RLIMIT_NOFILE) resource.setrlimit(resource.RLIMIT_NOFILE, (r2, r2)) print("DAEMON: Set max number of open files to (%d, %d)" % resource.getrlimit(resource.RLIMIT_NOFILE)) # setup statmsg sink and tell address using ENV statmsg_rd, statmsg_wr = socket.socketpair(socket.AF_UNIX, socket.SOCK_DGRAM) os.environ['BD_STATUS_FD'] = str(statmsg_wr.fileno()) def buf_up(fh, opt): sock = socket.fromfd(fh.fileno(), socket.AF_UNIX, socket.SOCK_DGRAM) sock.setsockopt(socket.SOL_SOCKET, opt, 256 * 1024) buf_up(statmsg_wr, socket.SO_SNDBUF) buf_up(statmsg_rd, socket.SO_RCVBUF) CONFIG = configfile.get_config(options.config) t = Thread(target=statmsg_sink, args=(CONFIG['logfilename'], statmsg_rd), name="statmsg sink") t.daemon = True t.start() # do all main-stuff, i.e. run server sys.stdout = autoflush.AutoFlush(sys.stdout) sys.stderr = autoflush.AutoFlush(sys.stderr) atexit.register(exitfunction) signal.signal(signal.SIGTERM, exitfunction) signal.signal(signal.SIGINT, exitfunction) signal.signal(signal.SIGUSR1, siginfo) signal.siginterrupt(signal.SIGUSR1, False) if hasattr(signal, 'SIGINFO'): signal.signal(signal.SIGINFO, siginfo) signal.siginterrupt(signal.SIGINFO, False) if options.port: server = ThreadedHTTPServer(('', options.port), XtdHandler) daemon_url = 'http://localhost:%d' % (options.port, ) else: check_socket(options.socket) # We want the socket to be world writeable, protect it with dir permissions. u = os.umask(0) server = ThreadedUnixHTTPServer(options.socket, XtdHandler) os.umask(u) daemon_url = configfile.resolve_socket_url(options.socket) ctrl = control.Main(options, daemon_url) print("DAEMON: Available workspaces") for x in ctrl.list_workspaces(): print("DAEMON: %s" % x) print("DAEMON: Current workspace is \"%s\"" % ctrl.get_current_workspace()) print("DAEMON: Current remote workspaces are %s" % ', '.join( ['\"' + x + '\"' for x in ctrl.get_current_remote_workspaces()])) XtdHandler.ctrl = ctrl print("Start serving on port %s." % (options.port or options.socket, ), file=sys.stderr) print('-' * 79) print() server.serve_forever()