def add_broker(self): """Adds a new broker to the cluster""" i = self.num_brokers self.log.info('Adding broker for total of {}'.format(i)) server_properties = path.join(self.kafka_home, 'config/server.properties') new_server_properties = '{}-{}'.format(server_properties, i) copyfile(server_properties, new_server_properties) cmds = [ 'sed -r -i "s/(broker.id)=(.*)/\1={}/g"'.format(i), 'sed -r -i "s/#(listeners=PLAINTEXT:\/\/:)(.*)/\1={}/g"'.format( str(9092 + i)), 'sed -r -i "s/(log.dirs)=(.*)/\1=\/tmp\/kafka-logs-{}/g"'.format( i), 'sed -r -i "s/#(delete.topic.enable)=(.*)/\1=$DELETE_TOPIC_ENABLE/g"' ] for cmd in cmds: cmd = ' '.join([cmd, new_server_properties]) self.log.debug('Running cmd {} ...'.format(cmd)) subprocess.check_output(shlexsplit(quote(cmd).replace("'", ''))) start_kafka = '$KAFKA_HOME/bin/kafka-server-start.sh {}'.format( new_server_properties) self.log.info('Initializing broker ... ') subprocess.check_output(shlexsplit( quote(start_kafka).replace("'", ''))) self.log.info('New broker (id:{}) successfully added'.format(i)) self.num_brokers += 1
def _processKey(self, format, column): """ (Internal) the inner part of _loadCSV() to determine what to do with the key. Better in here too for security. """ d = {} for key in format: if key not in ignorekeys: # ignore these tags if isinstance(format[key], str) and "location" in format[key]: # locations are very common, add support for them out of the box: d[key] = eval(format[key]) else: d[key] = self._guessDataType(column[format[key]]) elif key == "gtf_decorators": # special exceptions for gtf files gtf = column[format["gtf_decorators"]].strip() for item in gtf.split("; "): if item: item = item.strip() ss = shlexsplit(item) key = ss[0] value = ss[1].strip('"') d[key] = self._guessDataType(value) return (d)
def disaggregate_chunk(self, mains, appliances=5): """In-memory disaggregation of mains data using the pre-trained model. The function writes the chunk into shared memory and then calls the actual R implementation of the NFHMM algorithm to do the disaggregation and then read the results from a CSV the R script writes. Parameters ---------- mains : pd.Series appliances : int, initial guess of the number of total appliances Returns ------- appliance_powers : pd.DataFrame where the rows represent time and each column represents a disaggregated appliance. """ # Refuse too short chunks if len(mains) < self.MIN_CHUNK_LENGTH: raise RuntimeError('Chunk is too short.') # Define temporary IO file paths pfi = '/run/shm/nfhmm_in.csv' pfo = '/run/shm/nfhmm_out.csv' # Write the series into a CSV file with specific column headings chunk = pd.Series(mains, name='Aggregate') chunk.to_csv(pfi, index_label='Timestamp', header=True) # Run the actual R implementation of the algorithm cmd = 'Rscript src/r_fhmm.R -b -i "%s" -o "%s" -a %d -p %d -n %d -v' \ % (pfi, pfo, appliances, self.HEURISTIC_PARAMETER, self.SAMPLING_ITERATIONS) for i in range(1, self.MAX_DISAG_ATTEMPTS_PER_CHUNK + 1): print('Running "%s" (timeout=%ds)...' % (cmd, self.DISAG_ATTEMPT_TIMEOUT)) p = sp.Popen(shlexsplit(cmd), cwd=self.NFHMM_ROOT_DIR) try: p.wait(timeout=self.DISAG_ATTEMPT_TIMEOUT) msg = 'with code %d' % (p.returncode) except sp.TimeoutExpired: p.kill() msg = 'due to surpassing the timeout (%ds)' % ( self.DISAG_ATTEMPT_TIMEOUT) if p.returncode == 0: break print('Run attempt %d/%d failed %s!' % (i, self.MAX_DISAG_ATTEMPTS_PER_CHUNK, msg)) if p.returncode != 0: print('Warning: Disaggregating the chunk failed!') return None print('The R NFHMM implementation finished succesfully!') # Read the disaggregation results into a dataframe appliance_powers = pd.read_csv(pfo) # Reuse the existing index instead of the newly read timestamp column; These # should be equal despite the frequency downsampling del appliance_powers['Timestamp'] appliance_powers.index = mains.index # Remove the temporary IO files for pf in pfi, pfo: os.unlink(pf) return appliance_powers
def _run_sh(self, script, args): """Run kafka-topics.sh with the provided list of arguments. We quote(cmd) for safety. """ script = self._get_sh(script) cmd = [script, '--zookeeper', self.zkpr] + args # might need to change cmd = ' '.join([str(c) for c in cmd]) # cmd needs to be str self.log.info("running: {}".format(cmd)) return subprocess.check_output(shlexsplit(quote(cmd).replace("'", '')))
def _processKey(self, format, column): """ (Internal) the inner part of _loadCSV() to determine what to do with the key. Better in here too for security. """ d = {} for key in format: if not (key in ignorekeys): # ignore these tags #if not key in d: # d[key] = {} if '__ignore_empty_columns' in format and format[ '__ignore_empty_columns']: # check the column exists, if not, pad in an empty value try: column[format[key]] except IndexError: d[key] = '' # Better than None for downstream compatability continue if isinstance(format[key], dict) and "code" in format[key]: # a code block insertion goes here - any valid lib and one line python code fragment # store it as a dict with the key "code" d[key] = eval(format[key]["code"]) elif isinstance(format[key], str) and "location" in format[key]: # locations are very common, add support for them out of the box: d[key] = eval(format[key]) else: d[key] = self._guessDataType(column[format[key]]) elif key == "gtf_decorators": # special exceptions for gtf files gtf = column[format["gtf_decorators"]].strip() for item in gtf.split("; "): if item: item = item.strip() ss = shlexsplit(item) key = ss[0] value = ss[1].strip('"') d[key] = self._guessDataType(value) return (d)
def normfunc(): if args.All: args.MATs = MATList() elif type(args.algorithm) == str: args.algorithm = shlexsplit(args.algorithm) doit(args.algorithm, args.MATs, args.multi)