def add_broker(self):
     """Adds a new broker to the cluster"""
     i = self.num_brokers
     self.log.info('Adding broker for total of {}'.format(i))
     server_properties = path.join(self.kafka_home,
                                   'config/server.properties')
     new_server_properties = '{}-{}'.format(server_properties, i)
     copyfile(server_properties, new_server_properties)
     cmds = [
         'sed -r -i "s/(broker.id)=(.*)/\1={}/g"'.format(i),
         'sed -r -i "s/#(listeners=PLAINTEXT:\/\/:)(.*)/\1={}/g"'.format(
             str(9092 + i)),
         'sed -r -i "s/(log.dirs)=(.*)/\1=\/tmp\/kafka-logs-{}/g"'.format(
             i),
         'sed -r -i "s/#(delete.topic.enable)=(.*)/\1=$DELETE_TOPIC_ENABLE/g"'
     ]
     for cmd in cmds:
         cmd = ' '.join([cmd, new_server_properties])
         self.log.debug('Running cmd {} ...'.format(cmd))
         subprocess.check_output(shlexsplit(quote(cmd).replace("'", '')))
     start_kafka = '$KAFKA_HOME/bin/kafka-server-start.sh {}'.format(
         new_server_properties)
     self.log.info('Initializing broker ... ')
     subprocess.check_output(shlexsplit(
         quote(start_kafka).replace("'", '')))
     self.log.info('New broker (id:{}) successfully added'.format(i))
     self.num_brokers += 1
Exemple #2
0
    def _processKey(self, format, column):
        """
        (Internal)
        the inner part of _loadCSV() to determine what to do with the key.
        Better in here too for security.
        """

        d = {}
        for key in format:
            if key not in ignorekeys:  # ignore these tags
                if isinstance(format[key], str) and "location" in format[key]:
                    # locations are very common, add support for them out of the box:
                    d[key] = eval(format[key])
                else:
                    d[key] = self._guessDataType(column[format[key]])
            elif key == "gtf_decorators":  # special exceptions for gtf files
                gtf = column[format["gtf_decorators"]].strip()
                for item in gtf.split("; "):
                    if item:
                        item = item.strip()
                        ss = shlexsplit(item)
                        key = ss[0]
                        value = ss[1].strip('"')
                        d[key] = self._guessDataType(value)
        return (d)
Exemple #3
0
    def disaggregate_chunk(self, mains, appliances=5):
        """In-memory disaggregation of mains data using the pre-trained model.

    The function writes the chunk into shared memory and then calls the actual
    R implementation of the NFHMM algorithm to do the disaggregation and then
    read the results from a CSV the R script writes.

    Parameters
    ----------
    mains : pd.Series
    appliances : int, initial guess of the number of total appliances

    Returns
    -------
    appliance_powers : pd.DataFrame where the rows represent time and each
        column represents a disaggregated appliance.
    """
        # Refuse too short chunks
        if len(mains) < self.MIN_CHUNK_LENGTH:
            raise RuntimeError('Chunk is too short.')
        # Define temporary IO file paths
        pfi = '/run/shm/nfhmm_in.csv'
        pfo = '/run/shm/nfhmm_out.csv'
        # Write the series into a CSV file with specific column headings
        chunk = pd.Series(mains, name='Aggregate')
        chunk.to_csv(pfi, index_label='Timestamp', header=True)
        # Run the actual R implementation of the algorithm
        cmd = 'Rscript src/r_fhmm.R -b -i "%s" -o "%s" -a %d -p %d -n %d -v' \
            % (pfi, pfo, appliances, self.HEURISTIC_PARAMETER, self.SAMPLING_ITERATIONS)
        for i in range(1, self.MAX_DISAG_ATTEMPTS_PER_CHUNK + 1):
            print('Running "%s" (timeout=%ds)...' %
                  (cmd, self.DISAG_ATTEMPT_TIMEOUT))
            p = sp.Popen(shlexsplit(cmd), cwd=self.NFHMM_ROOT_DIR)
            try:
                p.wait(timeout=self.DISAG_ATTEMPT_TIMEOUT)
                msg = 'with code %d' % (p.returncode)
            except sp.TimeoutExpired:
                p.kill()
                msg = 'due to surpassing the timeout (%ds)' % (
                    self.DISAG_ATTEMPT_TIMEOUT)
            if p.returncode == 0:
                break
            print('Run attempt %d/%d failed %s!' %
                  (i, self.MAX_DISAG_ATTEMPTS_PER_CHUNK, msg))
        if p.returncode != 0:
            print('Warning: Disaggregating the chunk failed!')
            return None
        print('The R NFHMM implementation finished succesfully!')
        # Read the disaggregation results into a dataframe
        appliance_powers = pd.read_csv(pfo)
        # Reuse the existing index instead of the newly read timestamp column; These
        # should be equal despite the frequency downsampling
        del appliance_powers['Timestamp']
        appliance_powers.index = mains.index
        # Remove the temporary IO files
        for pf in pfi, pfo:
            os.unlink(pf)
        return appliance_powers
 def _run_sh(self, script, args):
     """Run kafka-topics.sh with the provided list of arguments.
        We quote(cmd) for safety.
     """
     script = self._get_sh(script)
     cmd = [script, '--zookeeper', self.zkpr] + args  # might need to change
     cmd = ' '.join([str(c) for c in cmd])  # cmd needs to be str
     self.log.info("running: {}".format(cmd))
     return subprocess.check_output(shlexsplit(quote(cmd).replace("'", '')))
Exemple #5
0
    def _processKey(self, format, column):
        """
        (Internal)
        the inner part of _loadCSV() to determine what to do with the key.
        Better in here too for security.
        """

        d = {}
        for key in format:
            if not (key in ignorekeys):  # ignore these tags
                #if not key in d:
                #    d[key] = {}
                if '__ignore_empty_columns' in format and format[
                        '__ignore_empty_columns']:
                    # check the column exists, if not, pad in an empty value
                    try:
                        column[format[key]]
                    except IndexError:
                        d[key] = ''  # Better than None for downstream compatability
                        continue

                if isinstance(format[key], dict) and "code" in format[key]:
                    # a code block insertion goes here - any valid lib and one line python code fragment
                    # store it as a dict with the key "code"
                    d[key] = eval(format[key]["code"])
                elif isinstance(format[key],
                                str) and "location" in format[key]:
                    # locations are very common, add support for them out of the box:
                    d[key] = eval(format[key])
                else:
                    d[key] = self._guessDataType(column[format[key]])
            elif key == "gtf_decorators":  # special exceptions for gtf files
                gtf = column[format["gtf_decorators"]].strip()
                for item in gtf.split("; "):
                    if item:
                        item = item.strip()
                        ss = shlexsplit(item)
                        key = ss[0]
                        value = ss[1].strip('"')
                        d[key] = self._guessDataType(value)
        return (d)
Exemple #6
0
 def normfunc():
     if args.All:
             args.MATs = MATList()
     elif type(args.algorithm) == str:
         args.algorithm = shlexsplit(args.algorithm)
     doit(args.algorithm, args.MATs, args.multi)