Example #1
0
 def build_tree(self, buf):
     parser = etree.HTMLParser(recover=self.recover, encoding=self.encoding)
     tree = etree.parse(StringIO(buf), parser)
     return tree
Example #2
0
#===============================================================================

import sys
from StringIO import StringIO

import unittest

from ..lib import mock

from .. import tee

#===============================================================================


@mock.patch('sys.stdout', new=StringIO())
class TestTee(unittest.TestCase):
    def setUp(self):
        self.string_file = StringIO()

    def test_write(self):
        tee.Tee(sys.stdout, self.string_file, log_header_title="Test Header")

        print("Hello")
        self.assertTrue(self.string_file.getvalue().endswith("Hello\n"))
        self.assertTrue("Test Header" in self.string_file.getvalue())

        print("Hi There Again")
        self.assertTrue(
            self.string_file.getvalue().endswith("Hello\nHi There Again\n"))
                obj_not_before = datetime.strptime(x509.get_notBefore(),
                                                   '%Y%m%d%H%M%S%z')

            if x509.get_notAfter().endswith('Z'):
                print "masuk sini"
                obj_not_after = datetime.strptime(x509.get_notAfter()[0:-1],
                                                  '%Y%m%d%H%M%S')
                print obj_not_before
            else:
                print "masuk sono"
                obj_not_after = datetime.strptime(x509.get_notAfter(),
                                                  '%Y%m%d%H%M%S%z')

            from StringIO import StringIO
            from ConfigParser import SafeConfigParser
            output = StringIO()

            cfg_parser = SafeConfigParser()
            cfg_parser.add_section('certificate')
            cfg_parser.set('certificate', 'commonname', CN.commonName)
            cfg_parser.set('certificate', 'not_before', str(obj_not_before))
            cfg_parser.set('certificate', 'not_after', str(obj_not_after))
            cfg_parser.set('certificate', 'digest', x509.digest('sha1'))
            cfg_parser.write(output)
            str_out = output.getvalue()
            print "str_out = ", str_out
            from hashlib import sha1
            name = sha1(str_out).hexdigest() + '.ini'
            print "name = ", name
            manifest_name = join('/tmp', name)
            with file(manifest_name, 'w') as fpw:
    def test_exported_meeting_json_has_correct_file_names(self, browser):
        set_preferred_language(self.portal.REQUEST, 'de-ch')
        browser.append_request_header('Accept-Language', 'de-ch')
        self.login(self.committee_responsible, browser)

        self.meeting.model.title = u'9. Sitzung der Rechnungspr\xfcfungs' \
                                   u'kommission, ordentlich'
        self.schedule_paragraph(self.meeting, u'A Gesch\xfcfte')
        with freeze(localized_datetime(2017, 12, 13)):
            self.schedule_ad_hoc(
                self.meeting, u'Ad-hoc Traktand\xfem'
            ).decide()
        agenda_item = self.schedule_proposal(self.meeting, self.submitted_proposal)
        self.decide_agendaitem_generate_and_return_excerpt(agenda_item)
        with freeze(localized_datetime(2017, 12, 14)):
            self.meeting.model.close()

        browser.open(self.meeting, view='export-meeting-zip')
        self.assertEquals('application/zip', browser.contenttype)

        zip_file = ZipFile(StringIO(browser.contents), 'r')

        meeting_json = json.loads(zip_file.read('meeting.json'))

        # the protocol is generated during the tests and its checksum cannot
        # be predicted
        meeting_json['meetings'][0]['protocol']['checksum'] = 'unpredictable'
        meeting_json['meetings'][0].pop('opengever_id')
        for agenda_item in meeting_json['meetings'][0]['agenda_items']:
            agenda_item.pop('opengever_id')

        expected_meeting_json = {
            u'meetings': [{
                u'agenda_items': [
                    {u'sort_order': 1, u'title': u'A Gesch\xfcfte'},
                    {
                        u'number': u'1.',
                        u'number_raw': 1,
                        u'proposal': {
                            u'checksum': u'e00d6c8fb32c30d3ca3a3f8e5d873565482567561023016d9ca18243ff1cfa14',
                            u'file': u'Traktandum 1/Ad-hoc Traktandthm.docx',
                            u'modified': u'2017-12-13T00:00:00+01:00',
                        },
                        u'sort_order': 2,
                        u'title': u'Ad-hoc Traktand\xfem',
                    },
                    {
                        u'attachments': [{
                            u'checksum': u'51d6317494eccc4a73154625a6820cb6b50dc1455eb4cf26399299d4f9ce77b2',
                            u'file': u'Traktandum 2/Beilage/1_Vertraegsentwurf.docx',
                            u'modified': u'2016-08-31T16:09:37+02:00',
                            u'title': u'Vertr\xe4gsentwurf',
                        }],
                        u'number': u'2.',
                        u'number_raw': 2,
                        u'proposal': {
                            u'checksum': u'114e7a059dc34c7459dab90904685584e331089d80bb6310183a0de009b66c3b',
                            u'file': u'Traktandum 2/Vertraege.docx',
                            u'modified': u'2016-08-31T16:09:35+02:00',
                        },
                        u'sort_order': 3,
                        u'title': u'Vertr\xe4ge',
                    },
                ],
                u'committee': {u'oguid': u'plone:1009313300', u'title': u'Rechnungspr\xfcfungskommission'},
                u'end': u'2016-09-12T17:00:00+00:00',
                u'location': u'B\xfcren an der Aare',
                u'protocol': {
                    u'checksum': 'unpredictable',
                    u'file': u'Protokoll-9. Sitzung der Rechnungspruefungskommission- ordentlich.docx',
                    u'modified': u'2017-12-14T00:00:00+01:00',
                },
                u'start': u'2016-09-12T15:30:00+00:00',
                u'title': u'9. Sitzung der Rechnungspr\xfcfungskommission, ordentlich',
            }],
            u'version': u'1.0.0',
        }
        self.assert_json_structure_equal(expected_meeting_json, meeting_json)

        expected_file_names = [
            'Protokoll-9. Sitzung der Rechnungspruefungskommission- ordentlich.docx',
            'Traktandum 1/Ad-hoc Traktandthm.docx',
            'Traktandum 2/Beilage/1_Vertraegsentwurf.docx',
            'Traktandum 2/Vertraege.docx',
            'meeting.json',
            ]
        file_names = sorted(zip_file.namelist())
        self.assertEqual(expected_file_names, file_names)
Example #5
0
    def list():
        processes = Process.query.filter_by(user_id=current_user.id)
        changed = False

        res = []
        for p in processes:
            status, updated = BatchProcess.update_process_info(p)
            if not status:
                continue

            if not changed:
                changed = updated

            if p.start_time is None or (p.acknowledge is not None
                                        and p.end_time is None):
                continue

            execution_time = None

            stime = parser.parse(p.start_time)
            etime = parser.parse(p.end_time or get_current_time())

            execution_time = BatchProcess.total_seconds(etime - stime)
            desc = ""
            try:
                desc = loads(p.desc.encode('latin-1')) if \
                    IS_PY2 and hasattr(p.desc, 'encode') else loads(p.desc)
            except UnicodeDecodeError:
                desc = loads(p.desc.encode('utf-8')) if \
                    IS_PY2 and hasattr(p.desc, 'encode') else loads(p.desc)
            except Exception:
                desc = loads(p.desc.encode('utf-8', 'ignore')) if \
                    IS_PY2 and hasattr(p.desc, 'encode') else loads(p.desc)

            details = desc

            if isinstance(desc, IProcessDesc):
                args = []
                args_csv = StringIO(
                    p.arguments.encode('utf-8') if hasattr(
                        p.arguments, 'decode') else p.arguments)
                args_reader = csv.reader(args_csv, delimiter=str(','))
                for arg in args_reader:
                    args = args + arg
                details = desc.details(p.command, args)
                type_desc = desc.type_desc
                desc = desc.message

            res.append({
                'id': p.pid,
                'desc': desc,
                'type_desc': type_desc,
                'details': details,
                'stime': stime,
                'etime': p.end_time,
                'exit_code': p.exit_code,
                'acknowledge': p.acknowledge,
                'execution_time': execution_time,
                'process_state': p.process_state
            })

        if changed:
            db.session.commit()

        return res
Example #6
0
 def pformat(cls, val, indent=0):
     cls.io = StringIO()
     cls.pprint_lookup(val, indent)
     return cls.io.getvalue()
Example #7
0
File: diff.py Project: drewp/photo
def hiresArray(uri):
    jpg = restkit.Resource(absoluteSite(uri)).get(size='screen').body_string()
    i = Image.open(StringIO(jpg)).convert('L')
    ar = numpy.asarray(i, dtype='f') / 255
    ar.shape = i.size[1], i.size[0]
    return ar
Example #8
0
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 12 14:26:49 2014

@author: karthik.ganapathy
"""

from sklearn.datasets import load_iris
from StringIO import StringIO
import pydot

from sklearn import tree
iris = load_iris()
clf = tree.DecisionTreeClassifier()
clf = clf.fit(iris.data, iris.target)
# print iris.keys
dot_data = StringIO()
# Below command ot output the tree to a file
# get hte tree into the out StringIO
tree.export_graphviz(clf, out_file=dot_data)
graph = pydot.graph_from_dot_data(dot_data.getvalue())
type(graph)
graph.write_pdf("iris.pdf")
#Below line prints the tree out
#print out.getvalue()

#print clf.__class__

#k = clf(0)
#print k['splitter']
Example #9
0
def prepare_namelist(environ, **kwargs):
    ''' Read atmos namelist and update variables from environ as needed

    Used vars:
      name
      atmos_namelist
      TRC
      LV
      dt_atmos
      start
      restart
      finish
      rootexp
      workdir

    Depends on:
      None
    '''
    input_file = StringIO()
    get(fmt('{agcm_namelist[file]}', environ), input_file)
    data = nml_decode(input_file.getvalue())
    input_file.close()
    output = StringIO()

    try:
        tkeys = set(environ['atmos_namelist']['vars'].keys()) & set(
            data.keys())
    except KeyError:
        pass
    else:
        for k in tkeys:
            keys = set(environ['atmos_namelist']['vars'][k].keys()) & set(
                data[k].keys())
            data[k].update([(ke, environ['atmos_namelist']['vars'][k][ke])
                            for ke in keys])

    trunc = "%04d" % environ['TRC']
    lev = "%03d" % environ['LV']

    data['MODEL_RES']['trunc'] = "%04d" % environ['TRC']
    data['MODEL_RES']['vert'] = environ['LV']
    data['MODEL_RES']['dt'] = environ['dt_atmos']
    data['MODEL_RES']['IDATEI'] = format_atmos_date(environ['start'])
    data['MODEL_RES']['IDATEW'] = format_atmos_date(environ['restart'])
    data['MODEL_RES']['IDATEF'] = format_atmos_date(environ['finish'])
    data['MODEL_RES']['DHEXT'] = environ.get('DHEXT', 0)
    if environ.get('DHEXT', 0) != 0:
        begin = datetime.strptime(environ['restart'], "%Y%m%d%H")
        end = datetime.strptime(environ['finish'], "%Y%m%d%H")
        nhext = total_seconds(end - begin) / 3600
    else:
        nhext = 0
    data['MODEL_RES']['NHEXT'] = nhext

    # TODO: is this environ['agcm_model_inputs'] ?
    data['MODEL_RES']['path_in'] = fmt('{rootexp}/AGCM-1.0/model/datain',
                                       environ)

    data['MODEL_RES']['dirfNameOutput'] = (fmt(
        '{workdir}/model/dataout/TQ%sL%s' % (trunc, lev), environ))

    output.write(
        yaml2nml(data,
                 key_order=[
                     'MODEL_RES', 'MODEL_IN', 'PHYSPROC', 'PHYSCS', 'COMCON'
                 ]))

    # HACK: sigh, this is needed to run atmos post processing, even if we
    # don't use these forecasts.
    output.write("""
 17
   6.0 12.0  18.0  24.0
  30.0 36.0  42.0  48.0
  54.0 60.0  66.0  72.0
  84.0 96.0 120.0 144.0
 168.0
""")

    put(output, fmt('{workdir}/MODELIN', environ))
    output.close()
Example #10
0
    def assert_mds_crash(self, daemon_id):
        """
        Assert that the a particular MDS daemon crashes (block until
        it does)
        """
        try:
            self.mds_cluster.mds_daemons[daemon_id].proc.wait()
        except CommandFailedError as e:
            log.info("MDS '{0}' crashed with status {1} as expected".format(
                daemon_id, e.exitstatus))
            self.mds_cluster.mds_daemons[daemon_id].proc = None

            # Go remove the coredump from the crash, otherwise teuthology.internal.coredump will
            # catch it later and treat it as a failure.
            p = self.mds_cluster.mds_daemons[daemon_id].remote.run(
                args=["sudo", "sysctl", "-n", "kernel.core_pattern"],
                stdout=StringIO())
            core_pattern = p.stdout.getvalue().strip()
            if os.path.dirname(
                    core_pattern
            ):  # Non-default core_pattern with a directory in it
                # We have seen a core_pattern that looks like it's from teuthology's coredump
                # task, so proceed to clear out the core file
                log.info(
                    "Clearing core from pattern: {0}".format(core_pattern))

                # Determine the PID of the crashed MDS by inspecting the MDSMap, it had
                # to talk to the mons to get assigned a rank to reach the point of crashing
                addr = self.mds_cluster.mon_manager.get_mds_status(
                    daemon_id)['addr']
                pid_str = addr.split("/")[1]
                log.info("Determined crasher PID was {0}".format(pid_str))

                # Substitute PID into core_pattern to get a glob
                core_glob = core_pattern.replace("%p", pid_str)
                core_glob = re.sub(
                    "%[a-z]", "*",
                    core_glob)  # Match all for all other % tokens

                # Verify that we see the expected single coredump matching the expected pattern
                ls_proc = self.mds_cluster.mds_daemons[daemon_id].remote.run(
                    args=["sudo", "ls", run.Raw(core_glob)], stdout=StringIO())
                cores = [
                    f for f in ls_proc.stdout.getvalue().strip().split("\n")
                    if f
                ]
                log.info("Enumerated cores: {0}".format(cores))
                self.assertEqual(len(cores), 1)

                log.info("Found core file {0}, deleting it".format(cores[0]))

                self.mds_cluster.mds_daemons[daemon_id].remote.run(
                    args=["sudo", "rm", "-f", cores[0]])
            else:
                log.info(
                    "No core_pattern directory set, nothing to clear (internal.coredump not enabled?)"
                )

        else:
            raise AssertionError(
                "MDS daemon '{0}' did not crash as expected".format(daemon_id))
Example #11
0
 def test_write_graph6(self):
     fh = StringIO()
     nx.write_graph6(nx.complete_bipartite_graph(6,9), fh)
     fh.seek(0)
     assert_equal(fh.read(), '>>graph6<<N??F~z{~Fw^_~?~?^_?\n')
fig = plt.figure()
ax = fig.add_subplot(111)
ax.yaxis.set_major_formatter(ff(sec2minsec))
plt.xlabel("Date of Parkrun")
plt.ylabel("Time (min:sec)")

ax.plot_date(x, winners, 'ro', label="Winning time")
ax.errorbar(x, medians, yerr=spreads, label="Median time (and spread)")

fig.autofmt_xdate()

legend = plt.legend()

format = "png"
imagedata = StringIO()
plt.savefig(imagedata, format=format, dpi=96)
utils.httpresponseheader("Content-Type", "image/%s" % format)
dumpMessage({
    "content": imagedata.getvalue().encode("base64"),
    "message_type": "console",
    "encoding": "base64"
})

import datetime as dt
import dateutil.parser
from StringIO import StringIO

import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import matplotlib as mpl
Example #13
0
def writePython(tree, txt):
    f = StringIO()
    out = TextWriter(f)
    pw = PythonWriter(tree, txt)
    pw.output(out)
    return f.getvalue().strip()
Example #14
0
def run_flogtool(argv=None, run_by_human=True):
    if argv:
        command_name = argv[0]
    else:
        command_name = sys.argv[0]
    config = Options()
    try:
        config.parseOptions(argv)
    except usage.error, e:
        if not run_by_human:
            raise
        print "%s:  %s" % (command_name, e)
        print
        c = getattr(config, 'subOptions', config)
        print str(c)
        sys.exit(1)

    command = config.subCommand
    so = config.subOptions
    if not run_by_human:
        so.stdout = StringIO()
        so.stderr = StringIO()
    rc = dispatch(command, so)
    if rc is None:
        rc = 0
    if run_by_human:
        sys.exit(rc)
    else:
        return (so.stdout.getvalue(), so.stderr.getvalue())
Example #15
0
 def string_from_image(self, image):
     output = StringIO()
     image.save(output, 'png', quality=95)
     contents = output.getvalue()
     output.close()
     return contents.encode('base64')
def format_function_infos(fninfos):
    buf = StringIO()
    try:
        print = bind_file_to_print(buf)

        title_line = "Lowering Listing"
        print(title_line)
        print('=' * len(title_line))

        print(description)

        commit = git_hash()

        def format_fname(fn):
            try:
                fname = "{0}.{1}".format(fn.__module__, get_func_name(fn))
            except AttributeError:
                fname = repr(fn)
            return fn, fname

        for fn, fname in sorted(map(format_fname, fninfos), key=lambda x: x[1]):
            impinfos = fninfos[fn]
            header_line = "``{0}``".format(fname)
            print(header_line)
            print('-' * len(header_line))
            print()

            formatted_sigs = map(
                lambda x: format_signature(x['sig']), impinfos)
            sorted_impinfos = sorted(zip(formatted_sigs, impinfos),
                                     key=lambda x: x[0])

            col_signatures = ['Signature']
            col_urls = ['Definition']

            for fmtsig, info in sorted_impinfos:
                impl = info['impl']

                filename = impl['filename']
                lines = impl['lines']
                fname = impl['name']

                source = '{0} lines {1}-{2}'.format(filename, *lines)
                link = github_url.format(commit=commit, path=filename,
                                         firstline=lines[0], lastline=lines[1])
                url = '``{0}`` `{1} <{2}>`_'.format(fname, source, link)

                col_signatures.append(fmtsig)
                col_urls.append(url)

            # table formatting
            max_width_col_sig = max(map(len, col_signatures))
            max_width_col_url = max(map(len, col_urls))
            padding = 2
            width_col_sig = padding * 2 + max_width_col_sig
            width_col_url = padding * 2 + max_width_col_url
            line_format = "{{0:^{0}}}  {{1:^{1}}}".format(width_col_sig,
                                                          width_col_url)
            print(line_format.format('=' * width_col_sig, '=' * width_col_url))
            print(line_format.format(col_signatures[0], col_urls[0]))
            print(line_format.format('=' * width_col_sig, '=' * width_col_url))
            for sig, url in zip(col_signatures[1:], col_urls[1:]):
                print(line_format.format(sig, url))
            print(line_format.format('=' * width_col_sig, '=' * width_col_url))
            print()

        return buf.getvalue()
    finally:
        buf.close()
Example #17
0
 def parse_yaml_string(self, ys):
     fd = StringIO(ys)
     dct = yaml.load(fd)
     return dct
Example #18
0
 def parse(self, source):
     in_h = StringIO(dedent(source))
     return ParserExtra()(in_h, '/dev/null')
Example #19
0
def zip_extract(binaryInput):
    in_memory_data = StringIO(binaryInput)
    file_obj = zipfile.ZipFile(in_memory_data, "r")
    files = [i for i in file_obj.namelist()]
    return [file_obj.open(file).read().decode("utf-8", "ignore") for file in files]
def gunzip(content):
    return gzip.GzipFile(fileobj=StringIO(content)).read()
Example #21
0
File: diff.py Project: drewp/photo
def inlineImage(img, cap):
    assert isinstance(img, Image.Image)
    out = StringIO()
    img.save(out, "jpeg")
    return '<img src="data:image/jpeg;base64,%s"><span>%s</span>' % (
        out.getvalue().encode('base64'), cap)
Example #22
0
 def start(self):
     self.stderr.append(sys.stderr)
     self._buf = StringIO()
     sys.stderr = self._buf
Example #23
0
    def _create_process(self, _desc, _cmd, _args):
        ctime = get_current_time(format='%y%m%d%H%M%S%f')
        log_dir = os.path.join(config.SESSION_DB_PATH, 'process_logs')

        def random_number(size):
            import random
            import string

            return ''.join(
                random.choice(string.ascii_uppercase + string.digits)
                for _ in range(size))

        created = False
        size = 0
        id = ctime
        while not created:
            try:
                id += random_number(size)
                log_dir = os.path.join(log_dir, id)
                size += 1
                if not os.path.exists(log_dir):
                    os.makedirs(log_dir, int('700', 8))
                    created = True
            except OSError as oe:
                import errno
                if oe.errno != errno.EEXIST:
                    raise

        # ID
        self.id = ctime
        # Description
        self.desc = _desc
        # Status Acknowledged time
        self.atime = None
        # Command
        self.cmd = _cmd
        # Log Directory
        self.log_dir = log_dir
        # Standard ouput log file
        self.stdout = os.path.join(log_dir, 'out')
        # Standard error log file
        self.stderr = os.path.join(log_dir, 'err')
        # Start time
        self.stime = None
        # End time
        self.etime = None
        # Exit code
        self.ecode = None
        # Process State
        self.process_state = PROCESS_NOT_STARTED

        # Arguments
        self.args = _args
        args_csv_io = StringIO()
        csv_writer = csv.writer(args_csv_io,
                                delimiter=str(','),
                                quoting=csv.QUOTE_MINIMAL)
        if sys.version_info[0] == 2:
            csv_writer.writerow([
                a.encode('utf-8') if isinstance(a, unicode) else a
                for a in _args
            ])
        else:
            csv_writer.writerow(_args)

        args_val = args_csv_io.getvalue().strip(str('\r\n'))
        tmp_desc = dumps(self.desc)
        try:
            tmp_desc = tmp_desc.decode('utf-8') if \
                IS_PY2 and hasattr(tmp_desc, 'decode') else tmp_desc
        except UnicodeDecodeError:
            tmp_desc = tmp_desc.decode('latin-1') if \
                IS_PY2 and hasattr(tmp_desc, 'decode') else tmp_desc
        except Exception:
            tmp_desc = tmp_desc.decode('utf-8', 'ignore') if \
                IS_PY2 and hasattr(tmp_desc, 'decode') else tmp_desc

        j = Process(pid=int(id),
                    command=_cmd,
                    arguments=args_val.decode('utf-8', 'replace')
                    if IS_PY2 and hasattr(args_val, 'decode') else args_val,
                    logdir=log_dir,
                    desc=tmp_desc,
                    user_id=current_user.id)
        db.session.add(j)
        db.session.commit()
Example #24
0
def pipeline():

    remote_data_path = sys.argv[1] if len(sys.argv) > 1 else \
        "s3a://asystem-astore-staging"
    remote_model_path = sys.argv[2] if len(sys.argv) > 2 else \
        "s3a://asystem-amodel-staging/asystem/amodel/energyforecastintraday"
    local_model_path = sys.argv[3] if len(sys.argv) > 3 else \
        tempfile.mkdtemp()
    print("Pipeline starting on [{}]\n".format(remote_data_path))

    time_start = int(round(time.time()))
    spark = SparkSession.builder \
        .appName("asystem-amodel-energyforecastintraday").getOrCreate()
    print("Session created ...")

    ds_energy = spark.read.parquet(
        *paths(qualify(remote_data_path +
                       "/[0-9]/asystem/astore/processed/canonical/parquet/dict/snappy"),
               ["/*/*/*/*/astore_metric=energy"], "/*.snappy.parquet"))
    ds_sun = spark.read.parquet(
        *paths(qualify(remote_data_path +
                       "/[0-9]/asystem/astore/processed/canonical/parquet/dict/snappy"),
               ["/*/*/*/*/astore_metric=sun"], "/*.snappy.parquet"))
    print("Listing finished ...")

    ds_energy.createOrReplaceTempView('energy')
    ds_energy.cache()
    df_energy = spark.sql("""
        SELECT
          bin_timestamp,
          data_value / data_scale AS bin_energy
        FROM energy
        WHERE
          data_metric='energy__production__inverter' AND 
          data_type='integral' AND
          bin_width=1 AND
          bin_unit='day'
        ORDER BY bin_timestamp ASC
    """).toPandas()
    ds_sun.createOrReplaceTempView('sun')
    ds_sun.cache()
    df_sun_rise = spark.sql("""
        SELECT
          bin_timestamp,
          data_value / data_scale AS bin_sunrise
        FROM sun
        WHERE          
          data_metric='sun__outdoor__rise' AND
          data_type='epoch' AND
          bin_width=1 AND
          bin_unit='day'
        ORDER BY bin_timestamp ASC
    """).toPandas()
    df_sun_set = spark.sql("""
        SELECT
          bin_timestamp,
          data_value / data_scale AS bin_sunset
        FROM sun
        WHERE          
          data_metric='sun__outdoor__set' AND
          data_type='epoch' AND
          bin_width=1 AND
          bin_unit='day'
        ORDER BY bin_timestamp ASC
    """).toPandas()
    spark.catalog.clearCache()
    print("Dataframes collected ...")

    df = df_energy.set_index(pd.to_datetime(df_energy['bin_timestamp'], unit='s')
                            .dt.tz_localize('UTC').dt.tz_convert(TIMEZONE))
    df['bin_date'] = df.index.date
    df.set_index('bin_date', inplace=True)
    df_energy_day = df.groupby(df.index)['bin_energy'].max().to_frame() \
        .rename(columns={'bin_energy': 'bin_energy_day'})
    df = df.merge(df_energy_day, how='inner', left_index=True, right_index=True)
    df_sun_rise.set_index(pd.to_datetime(df_sun_rise['bin_timestamp'], unit='s')
                        .dt.tz_localize('UTC').dt.tz_convert(TIMEZONE), inplace=True)
    df_sun_rise['bin_date'] = df_sun_rise.index.date
    df_sun_rise.set_index('bin_date', inplace=True)
    df = df.merge(df_sun_rise.groupby(df_sun_rise.index)['bin_sunrise'].max()
                  .to_frame(), how='inner', left_index=True, right_index=True)
    df_sun_set.set_index(
        pd.to_datetime(df_sun_set['bin_timestamp'], unit='s')
            .dt.tz_localize('UTC').dt.tz_convert(TIMEZONE), inplace=True)
    df_sun_set['bin_date'] = df_sun_set.index.date
    df_sun_set.set_index('bin_date', inplace=True)
    df = df.merge(df_sun_set.groupby(df_sun_set.index)['bin_sunset'].max()
                  .to_frame(), how='inner', left_index=True, right_index=True)
    df.set_index(pd.to_datetime(df['bin_timestamp'], unit='s')
                 .dt.tz_localize('UTC').dt.tz_convert(TIMEZONE), inplace=True)
    df.sort_index(inplace=True)
    print("Output compiled ...")
    print("\nTraining data:\n{}\n\n".format(df.describe()))

    dfvs = {'VETTED': {}, 'PURGED': {}, 'TOVETT': {}}
    for dfs in df.groupby(df.index.date):
        day = dfs[0].strftime('%Y/%m/%d')
        dfvs[('PURGED' if day in DAYS_BLACK_LIST else
              ('TOVETT' if day >= datetime.datetime.now().strftime("%Y/%m/%d")
                   else 'VETTED'))][day] = dfs[1]

    for vetting in dfvs:
        for day, dfv in sorted(dfvs[vetting].iteritems()):
            dfv.set_index(
                pd.to_datetime(dfv['bin_timestamp'], unit='s')
                    .dt.tz_localize('UTC').dt.tz_convert(TIMEZONE), inplace=True)
            if DAYS_PLOT and DAYS_PLOT_DEBUG:
                dfv.plot(title="Energy ({}) - {}"
                         .format(day, vetting), y=['bin_energy', 'bin_energy_day'])

    for vetting in dfvs:
        print("Processed {} {} days ...".format(len(dfvs[vetting]), vetting.lower()))

    dfnss = []
    bins = 1000
    for day, dfv in sorted(dfvs['VETTED'].iteritems()):
        dfv['normalised'] = dfv['bin_energy'] / dfv['bin_energy_day']
        dfv['standardised'] = bins * (
                dfv['bin_timestamp'] - dfv['bin_sunrise']) / \
                              (dfv['bin_sunset'] - dfv['bin_sunrise'])
        dfv['standardised'] = dfv['standardised'].clip(0, bins).astype(int)
        dfns = dfv.drop(['bin_timestamp', 'bin_energy',
                         'bin_energy_day', 'bin_sunrise', 'bin_sunset'],
                        axis=1).drop_duplicates()
        dfns.set_index('standardised', inplace=True)
        dfns.sort_index(inplace=True)
        dfns = dfns[~dfns.index.duplicated(keep='first')]
        dfns = dfns.reindex(np.arange(0, bins + 1)).ffill()
        dfns.loc[0:10] = 0
        dfns.loc[990:1000] = 1
        dfnss.append(dfns)
        if DAYS_PLOT and DAYS_PLOT_DEBUG:
            dfns.plot(title="Energy ({}) - VETTED".format(day))
    dfnsa = pd.concat(dfnss, axis=1, ignore_index=True)
    if DAYS_PLOT:
        dfnsa.plot(title="Energy Normalised/Standardised (All) - VETTED", legend=False)
    dfnsa = pd.concat(dfnss)
    dfnsa = dfnsa.groupby(dfnsa.index).mean()
    if DAYS_PLOT:
        dfnsa.plot(title="Energy Normalised/Standardised (Mean) - VETTED", legend=False)
    print("Model built ...")

    model_file = '/model/pickle/joblib/none/' \
                 'amodel_version=10.000.0140-SNAPSHOT/amodel_model=1003/model.pkl'
    local_model_file = local_model_path + model_file
    remote_model_file = remote_model_path + model_file
    if os.path.exists(os.path.dirname(local_model_file)):
        shutil.rmtree(os.path.dirname(local_model_file))
    os.makedirs(os.path.dirname(local_model_file))
    pickled_execute = StringIO()
    dill.dump(execute, pickled_execute)
    pickled_execute.flush()
    joblib.dump({'pipeline': dfnsa, 'execute': pickled_execute},
                local_model_file, compress=True)
    print("Model serialised ...")

    model = joblib.load(local_model_file)
    dfi = pd.DataFrame([
        {"energy__production_Dforecast_Ddaylight__inverter": 0},
        {"energy__production_Dforecast_Ddaylight__inverter": 250},
        {"energy__production_Dforecast_Ddaylight__inverter": 500},
        {"energy__production_Dforecast_Ddaylight__inverter": 750},
        {"energy__production_Dforecast_Ddaylight__inverter": 1000}
    ]).apply(pd.to_numeric, errors='ignore')
    dfo = dill.load(StringIO(model['execute'].getvalue())) \
        (model=model, features=dfi, prediction=True)
    print("Model de-serialised ...")
    print("\nEnergy Mean Input:\n{}\n\nEnergy Mean Output:\n{}\n".format(dfi, dfo))

    publish(local_model_file, remote_model_file)
    shutil.rmtree(local_model_path)
    print("Model published ...")

    print("\nPipeline finished in [{}] s".format(int(round(time.time())) - time_start))
Example #25
0
            # get session id
            tags = n.split('_')
            session = int(tags[1])
            name = tags[2]

            # parse each sensor log
            for log in logs:
                # split first line
                l = log.split('\n', 1)

                # get sensor type
                sensor = l[0].split(',')[2]

                # create directory for sensor
                if not os.path.exists(sensor):
                    os.makedirs(sensor)

                # create log dataframe
                df = pd.read_csv(StringIO(l[1]), parse_dates=[0])
                df.to_csv(sensor + '\\' + str(session) + '_' + name + '.csv')

                # add to data
                val = df['Value']
                data = data.append(pd.Series([name, sensor, val.max(), val.min(), \
                    val.mean(), val.median()], index=cols, name=session))

    # save analysis for each sensor type
    for sensor in data['sensor'].unique():
        data[data['sensor'] == sensor].to_csv(sensor + '\\Analysis.csv')
Example #26
0
 def __init__(self):
     self.file = StringIO()
     self.filename = '<dummy field storage>'
     self.headers = {}
Example #27
0
 def setUp(self):
     self.string_file = StringIO()
Example #28
0
def getTraceBack():
    f = StringIO()
    print_exc(file=f)
    return f.getvalue()
Example #29
0
def parse_root(raw):
    "Efficiently parses the root element of a *raw* XML document, returning a tuple of its qualified name and attribute dictionary."
    fp = StringIO(raw)
    for event, element in etree.iterparse(fp, events=('start',)):
        return (element.tag, element.attrib)
Example #30
0
 def testasstring(self):
     params = Dictionary('a="one two" b="2.943" c="44 45"')
     from StringIO import StringIO
     s = StringIO()
     s.write(params.asstring(' '))
     self.assertEqual(s.getvalue(), 'a="one two" b=2.943 c="44 45"')