Example #1
0
    def __init__(self, url=None, sessionid=None, app_name=None):
        if url is None:
            port = config.getint('hydra_client', 'port', 80)
            domain = config.get('hydra_client', 'domain', '127.0.0.1')
            path = config.get('hydra_client', 'soap_path', 'soap')
            #The domain may or may not specify the protocol, so do a check.
            if domain.find('http') == -1:
                self.url = "http://%s:%s/%s?wsdl" % (domain, port, path)
            else:
                self.url = "%s:%s/%s?wsdl" % (domain, port, path)
        else:
            log.info("Using user-defined URL: %s", url)
            port = _get_port(url)
            hostname = _get_hostname(url)
            path = _get_path(url)
            protocol = _get_protocol(url)
            self.url = "%s://%s:%s%s/soap?wsdl" % (protocol, hostname, port, path)
        log.info("Setting URL %s", self.url)

        self.app_name = app_name
        self.sessionid = sessionid
        self.retxml = False
        self.client = Client(self.url,
                             timeout=3600,
                             plugins=[FixNamespace()],
                             retxml=self.retxml)
        self.client.add_prefix('hyd', 'soap_server.hydra_complexmodels')

        cache = self.client.options.cache
        cache.setduration(days=10)
Example #2
0
    def __init__(self, url=None, sessionid=None, app_name=None):
        if url is None:
            port = config.getint('hydra_client', 'port', 80)
            domain = config.get('hydra_client', 'domain', '127.0.0.1')
            path = config.get('hydra_client', 'soap_path', 'soap')
            #The domain may or may not specify the protocol, so do a check.
            if domain.find('http') == -1:
                self.url = "http://%s:%s/%s?wsdl" % (domain, port, path)
            else:
                self.url = "%s:%s/%s?wsdl" % (domain, port, path)
        else:
            log.info("Using user-defined URL: %s", url)
            port = _get_port(url)
            hostname = _get_hostname(url)
            path = _get_path(url)
            protocol = _get_protocol(url)
            self.url = "%s://%s:%s%s/soap?wsdl" % (protocol, hostname, port,
                                                   path)
        log.info("Setting URL %s", self.url)

        self.app_name = app_name
        self.sessionid = sessionid
        self.retxml = False
        self.client = Client(self.url,
                             timeout=3600,
                             plugins=[FixNamespace()],
                             retxml=self.retxml)
        self.client.add_prefix('hyd', 'soap_server.hydra_complexmodels')

        cache = self.client.options.cache
        cache.setduration(days=10)
def reindex_timeseries(ts_string, new_timestamps):
    """
        get data for timesamp

        :param a JSON string, in pandas-friendly format
        :param a timestamp or list of timestamps (datetimes)
        :returns a pandas data frame, reindexed with the supplied timestamos or None if no data is found
    """
    #If a single timestamp is passed in, turn it into a list
    #Reindexing can't work if it's not a list
    if not isinstance(new_timestamps, list):
        new_timestamps = [new_timestamps]

    #Convert the incoming timestamps to datetimes
    #if they are not datetimes.
    new_timestamps_converted = []
    for t in new_timestamps:
        new_timestamps_converted.append(get_datetime(t))

    new_timestamps = new_timestamps_converted

    seasonal_year = config.get('DEFAULT', 'seasonal_year', '1678')
    seasonal_key = config.get('DEFAULT', 'seasonal_key', '9999')

    ts = ts_string.replace(seasonal_key, seasonal_year)

    timeseries = pd.read_json(ts)

    idx = timeseries.index

    ts_timestamps = new_timestamps

    #'Fix' the incoming timestamp in case it's a seasonal value
    if type(idx) == pd.DatetimeIndex:
        if set(idx.year) == set([int(seasonal_year)]):
            if isinstance(new_timestamps, list):
                seasonal_timestamp = []
                for t in ts_timestamps:
                    t_1900 = t.replace(year=int(seasonal_year))
                    seasonal_timestamp.append(t_1900)
                ts_timestamps = seasonal_timestamp

    #Reindex the timeseries to reflect the requested timestamps
    reindexed_ts = timeseries.reindex(ts_timestamps, method='ffill')

    i = reindexed_ts.index

    reindexed_ts.index = pd.Index(new_timestamps, names=i.names)

    #If there are no values at all, just return None
    if len(reindexed_ts.dropna()) == 0:
        return None

    #Replace all numpy NAN values with None
    pandas_ts = reindexed_ts.where(reindexed_ts.notnull(), None)

    return pandas_ts
Example #4
0
def reindex_timeseries(ts_string, new_timestamps):
    """
        get data for timesamp

        :param a JSON string, in pandas-friendly format
        :param a timestamp or list of timestamps (datetimes)
        :returns a pandas data frame, reindexed with the supplied timestamos or None if no data is found
    """
    #If a single timestamp is passed in, turn it into a list
    #Reindexing can't work if it's not a list
    if not isinstance(new_timestamps, list):
        new_timestamps = [new_timestamps]
    
    #Convert the incoming timestamps to datetimes
    #if they are not datetimes.
    new_timestamps_converted = []
    for t in new_timestamps:
        new_timestamps_converted.append(get_datetime(t))

    new_timestamps = new_timestamps_converted

    seasonal_year = config.get('DEFAULT','seasonal_year', '1678')
    seasonal_key = config.get('DEFAULT', 'seasonal_key', '9999')

    ts = ts_string.replace(seasonal_key, seasonal_year)
    
    timeseries = pd.read_json(ts)

    idx = timeseries.index

    ts_timestamps = new_timestamps

    #'Fix' the incoming timestamp in case it's a seasonal value
    if type(idx) == pd.DatetimeIndex:
        if set(idx.year) == set([int(seasonal_year)]):
            if isinstance(new_timestamps,  list):
                seasonal_timestamp = []
                for t in ts_timestamps:
                    t_1900 = t.replace(year=int(seasonal_year))
                    seasonal_timestamp.append(t_1900)
                ts_timestamps = seasonal_timestamp

    #Reindex the timeseries to reflect the requested timestamps
    reindexed_ts = timeseries.reindex(ts_timestamps, method='ffill')

    i = reindexed_ts.index

    reindexed_ts.index = pd.Index(new_timestamps, names=i.names)

    #If there are no values at all, just return None
    if len(reindexed_ts.dropna()) == 0:
        return None

    #Replace all numpy NAN values with None
    pandas_ts = reindexed_ts.where(reindexed_ts.notnull(), None)

    return pandas_ts
Example #5
0
    def login(self, username=None, password=None):
        if username is None:
            username = config.get('hydra_client', 'user')
        if password is None:
            password = config.get('hydra_client', 'password')
        login_params = {'username': username, 'password': password}

        resp = self.call('login', login_params)
        #set variables for use in request headers
        log.info(resp)
Example #6
0
    def login(self, username=None, password=None):
        if username is None:
            username = config.get('hydra_client', 'user')
        if password is None:
            password = config.get('hydra_client', 'password')
        login_params = {'username': username, 'password': password}

        resp = self.call('login', login_params)
        #set variables for use in request headers
        log.info(resp)
Example #7
0
    def parse_value(self):
        """
            Turn the value of an incoming dataset into a hydra-friendly value.
        """
        try:
            #attr_data.value is a dictionary,
            #but the keys have namespaces which must be stripped.


            if self.value is None:
                log.warn("Cannot parse dataset. No value specified.")
                return None

            data = str(self.value)

            if data.upper().strip() == 'NULL':
                return 'NULL'

            if data.strip() == '':
                return "NULL"

            if len(data) > 100:
                log.debug("Parsing %s", data[0:100])
            else:
                log.debug("Parsing %s", data)

            if self.type == 'descriptor':
                #Hack to work with hashtables. REMOVE AFTER DEMO
                if self.get_metadata_as_dict().get('data_type') == 'hashtable':
                    df = pd.read_json(data)
                    data = df.transpose().to_json() 
                return data
            elif self.type == 'scalar':
                return data
            elif self.type == 'timeseries':
                timeseries_pd = pd.read_json(data)
                #Epoch doesn't work here because dates before 1970 are not
                # supported in read_json. Ridiculous.
                ts = timeseries_pd.to_json(date_format='iso', date_unit='ns')
                if len(data) > int(config.get('db', 'compression_threshold', 1000)):
                    return zlib.compress(ts)
                else:
                    return ts
            elif self.type == 'array':
                #check to make sure this is valid json
                json.loads(data)
                if len(data) > int(config.get('db', 'compression_threshold', 1000)):
                    return zlib.compress(data)
                else:
                    return data
        except Exception as e:
            log.exception(e)
            raise HydraError("Error parsing value %s: %s"%(self.value, e))
Example #8
0
def create_sqlite_backup_db(audit_tables):
    """
        return an inspector object
    """
    #we always want to create a whole new DB, so delete the old one first
    #if it exists.
    try:
        Popen("rm %s" % (config.get('sqlite', 'backup_url')), shell=True)
        logging.warn("Old sqlite backup DB removed")
    except Exception as e:
        logging.warn(e)

    try:
        aux_dir = config.get('DEFAULT', 'hydra_aux_dir')
        os.mkdir(aux_dir)
        logging.warn("%s created", aux_dir)
    except Exception as e:
        logging.warn(e)

    try:
        backup_dir = config.get('db', 'export_target')
        os.mkdir(backup_dir)
        logging.warn("%s created", backup_dir)
    except Exception as e:
        logging.warn(e)

    db = create_engine(sqlite_engine, echo=True)
    db.connect()
    metadata = MetaData(db)

    for main_audit_table in audit_tables:
        cols = []
        for c in main_audit_table.columns:
            col = c.copy()
            if col.type.python_type == Decimal:
                col.type = DECIMAL()

            cols.append(col)
        Table(main_audit_table.name,
              metadata,
              *cols,
              sqlite_autoincrement=True)

    metadata.create_all(db)
Example #9
0
def remove_image(name,**kwargs):
    path = config.get('filesys', 'img_src')

    path = os.path.join(path, name)
    if(os.path.exists(path)):
        os.remove(path)
    else:
        raise HydraError("File with name (%s) does not exist!"%(name))

    return True
Example #10
0
def remove_image(name, **kwargs):
    path = config.get('filesys', 'img_src')

    path = os.path.join(path, name)
    if (os.path.exists(path)):
        os.remove(path)
    else:
        raise HydraError("File with name (%s) does not exist!" % (name))

    return True
Example #11
0
def connect(url=None):
    if url is None:
        port = config.getint('hydra_server', 'port', '8080')
        domain = config.get('hydra_server', 'domain', 'localhost')
        path = config.get('hydra_server', 'soap_path', 'soap')
        if path:
            if path[0] == '/':
                path = path[1:]
            url = 'http://%s:%s/%s?wsdl' % (domain, port, path)
        else:
            url = 'http://%s:%s?wsdl' % (domain, port)

    client = Client(url, plugins=[FixNamespace()])

    cache = client.options.cache
    cache.setduration(days=10)

    client.add_prefix('hyd', 'soap_server.hydra_complexmodels')
    return client
Example #12
0
def connect(url=None):
    if url is None:
        port = config.getint('hydra_server', 'port', '8080')
        domain = config.get('hydra_server', 'domain', 'localhost')
        path = config.get('hydra_server', 'soap_path', 'soap')
        if path:
            if path[0] == '/':
                path = path[1:]
            url = 'http://%s:%s/%s?wsdl' % (domain, port, path)
        else:
            url = 'http://%s:%s?wsdl' % (domain, port)

    client = Client(url, plugins=[FixNamespace()])

    cache = client.options.cache
    cache.setduration(days=10)

    client.add_prefix('hyd', 'soap_server.hydra_complexmodels')
    return client
Example #13
0
def remove_file(resource_type, resource_id, name, **kwargs):
    path = config.get('filesys', 'file_src')

    path = os.path.join(path, resource_type, str(resource_id), name)

    if (os.path.exists(path)):
        os.remove(path)
    else:
        raise HydraError("File with name (%s) does not exist!" % (name))

    return True
Example #14
0
def remove_file(resource_type, resource_id, name,**kwargs):
    path = config.get('filesys', 'file_src')

    path = os.path.join(path, resource_type, str(resource_id), name)

    if(os.path.exists(path)):
        os.remove(path)
    else:
        raise HydraError("File with name (%s) does not exist!"%(name))

    return True
Example #15
0
def create_sqlite_backup_db(audit_tables):
    """
        return an inspector object
    """
    #we always want to create a whole new DB, so delete the old one first
    #if it exists.
    try:
        Popen("rm %s"%(config.get('sqlite', 'backup_url')), shell=True)
        logging.warn("Old sqlite backup DB removed")
    except Exception as e:
        logging.warn(e)

    try:
        aux_dir = config.get('DEFAULT', 'hydra_aux_dir')
        os.mkdir(aux_dir)
        logging.warn("%s created", aux_dir)
    except Exception as e:
        logging.warn(e)

    try:
        backup_dir = config.get('db', 'export_target')
        os.mkdir(backup_dir)
        logging.warn("%s created", backup_dir)
    except Exception as e:
        logging.warn(e)

    db = create_engine(sqlite_engine, echo=True)
    db.connect()
    metadata = MetaData(db)
   
    for main_audit_table in audit_tables:
        cols = []
        for c in main_audit_table.columns:
            col = c.copy()
            if col.type.python_type == Decimal:
                col.type = DECIMAL()

            cols.append(col)
        Table(main_audit_table.name, metadata, *cols, sqlite_autoincrement=True)

    metadata.create_all(db)
Example #16
0
    def login(self, username=None, password=None):
        """Establish a connection to the specified server. If the URL of the
        server is not specified as an argument of this function, the URL
        defined in the configuration file is used."""

        # Connect
        token = self.client.factory.create('RequestHeader')
        if self.sessionid is None:
            if username is None:
                user = config.get('hydra_client', 'user')
            if password is None:
                passwd = config.get('hydra_client', 'password')
            login_response = self.client.service.login(user, passwd)
            token.user_id = login_response.user_id
            sessionid = login_response.sessionid
            token.username = user

        token.sessionid = sessionid
        self.client.set_options(soapheaders=token)

        return session_id
Example #17
0
    def __init__(self, url=None, sessionid=None, app_name=None):
        if url is None:
            port = config.getint('hydra_client', 'port', 80)
            domain = config.get('hydra_client', 'domain', '127.0.0.1')
            path = config.get('hydra_client', 'json_path', 'json')
            #The domain may or may not specify the protocol, so do a check.
            if domain.find('http') == -1:
                self.url = "http://%s:%s/%s" % (domain, port, path)
            else:
                self.url = "%s:%s/%s" % (domain, port, path)
        else:
            log.info("Using user-defined URL: %s", url)
            port = _get_port(url)
            hostname = _get_hostname(url)
            path = _get_path(url)
            protocol = _get_protocol(url)
            self.url = "%s://%s:%s%s/json" % (protocol, hostname, port, path)
        log.info("Setting URL %s", self.url)
        self.app_name = app_name

        self.session_id = sessionid
Example #18
0
def connect():
    db_url = config.get('mysqld', 'url')
    log.info("Connecting to database: %s", db_url)
    global engine
    engine = create_engine(db_url) 

    maker = sessionmaker(bind=engine, autoflush=False, autocommit=False,
                     extension=ZopeTransactionExtension())
    global DBSession
    DBSession = scoped_session(maker)

    DeclarativeBase.metadata.create_all(engine)
Example #19
0
    def login(self, username=None, password=None):
        """Establish a connection to the specified server. If the URL of the
        server is not specified as an argument of this function, the URL
        defined in the configuration file is used."""

        # Connect
        token = self.client.factory.create('RequestHeader')
        if self.sessionid is None:
            if username is None:
                user = config.get('hydra_client', 'user')
            if password is None:
                passwd = config.get('hydra_client', 'password')
            login_response = self.client.service.login(user, passwd)
            token.user_id = login_response.user_id
            sessionid = login_response.sessionid
            token.username = user

        token.sessionid = sessionid
        self.client.set_options(soapheaders=token)

        return session_id
Example #20
0
    def __init__(self, url=None, sessionid=None, app_name=None):
        if url is None:
            port = config.getint('hydra_client', 'port', 80)
            domain = config.get('hydra_client', 'domain', '127.0.0.1')
            path = config.get('hydra_client', 'json_path', 'json')
            #The domain may or may not specify the protocol, so do a check.
            if domain.find('http') == -1:
                self.url = "http://%s:%s/%s" % (domain, port, path)
            else:
                self.url = "%s:%s/%s" % (domain, port, path)
        else:
            log.info("Using user-defined URL: %s", url)
            port = _get_port(url)
            hostname = _get_hostname(url)
            path = _get_path(url)
            protocol = _get_protocol(url)
            self.url = "%s://%s:%s%s/json" % (protocol, hostname, port, path)
        log.info("Setting URL %s", self.url)
        self.app_name = app_name

        self.session_id = sessionid
Example #21
0
def export_table_to_csv(session, table, target=None):
    """
        @args: session so queries can be made, table so primary key columns
        can be determined and the audit_table to be truncated.
    """

    if target is None:
        target_dir = os.path.join(config.get('db', 'export_target'))
        target = os.path.join(config.get('db', 'export_target'), table.name)

    if not os.path.exists(target_dir):
        os.mkdir(target_dir)

    if os.path.exists(target):
        target_file = open(target, 'r+')

        rs = session.query(table).all()

        entries_in_db = set()
        for r in rs:
            entries_in_db.add("%s" % (r.__repr__()))

        contents = set(target_file.read().split('\n'))

        new_data = entries_in_db.difference(contents)

        if len(new_data) > 0:
            target_file.write("%s\n" % (datetime.datetime.now()))
            target_file.write("%s\n" %
                              ('.'.join([c.name for c in table.columns])))
            for d in new_data:
                target_file.write("%s\n" % (d))
    else:
        target_file = open(target, 'w')
        rs = session.query(table).all()
        target_file.write("%s\n" % (datetime.datetime.now()))
        target_file.write("%s\n" % ('.'.join([c.name for c in table.columns])))
        for r in rs:
            target_file.write("%s\n" % (r.__repr__()))
Example #22
0
def export_table_to_csv(session, table, target=None):
    """
        @args: session so queries can be made, table so primary key columns
        can be determined and the audit_table to be truncated.
    """

    if target is None:
        target_dir = os.path.join(config.get('db', 'export_target'))
        target = os.path.join(config.get('db', 'export_target'), table.name)

    if not os.path.exists(target_dir):
        os.mkdir(target_dir)

    if os.path.exists(target):
        target_file      = open(target, 'r+')
    
        rs = session.query(table).all()
        
        entries_in_db = set()
        for r in rs:
            entries_in_db.add("%s"%(r.__repr__()))

        contents = set(target_file.read().split('\n'))
        
        new_data = entries_in_db.difference(contents)

        if len(new_data) > 0:
            target_file.write("%s\n"%(datetime.datetime.now()))
            target_file.write("%s\n"%('.'.join([c.name for c in table.columns])))
            for d in new_data:
                target_file.write("%s\n"%(d))
    else:
        target_file      = open(target, 'w')
        rs = session.query(table).all()
        target_file.write("%s\n"%(datetime.datetime.now()))
        target_file.write("%s\n"%('.'.join([c.name for c in table.columns])))
        for r in rs:
            target_file.write("%s\n"%(r.__repr__()))
Example #23
0
    def test_create_template_from_network(self):
        network = self.create_network_with_data()

        net_template = self.client.service.get_network_as_xml_template(network.id)

        assert net_template is not None

        template_xsd_path = config.get('templates', 'template_xsd_path')
        xmlschema_doc = etree.parse(template_xsd_path)

        xmlschema = etree.XMLSchema(xmlschema_doc)

        xml_tree = etree.fromstring(net_template)

        xmlschema.assertValid(xml_tree)
Example #24
0
def add_file(resource_type, resource_id, name, file,**kwargs):
    path = config.get('filesys', 'file_src')
    path = os.path.join(path, resource_type)
    try:
        os.makedirs(path)
    except OSError:
        pass

    path = os.path.join(path, str(resource_id))
    try:
        os.makedirs(path)
    except OSError:
        pass

    path = os.path.join(path, name)

    #The safest way to check if a file exists is to try to open
    #it. If the open succeeds, then throw an exception to this effect.
    try:
        f = open(path)
        raise HydraError("A file with this name (%s) already exists!"%(name))
    except IOError:
        pass


    log.info("Path: %r" % path)
    if not path.startswith(path):
        log.critical("Could not open file: %s"%name)
        return False

    f = open(path, 'wb') # if this fails, the client will see an
    # # internal error.

    try:
        for data in file:
            f.write(data)

        log.debug("File written: %r" % name)
        f.close()

    except:
        log.critical("Error writing to file: %s", name)
        f.close()
        os.remove(name)
        log.debug("File removed: %r" % name)
        return False

    return True
Example #25
0
    def test_create_template_from_network(self):
        network = self.create_network_with_data()

        net_template = self.client.service.get_network_as_xml_template(
            network.id)

        assert net_template is not None

        template_xsd_path = config.get('templates', 'template_xsd_path')
        xmlschema_doc = etree.parse(template_xsd_path)

        xmlschema = etree.XMLSchema(xmlschema_doc)

        xml_tree = etree.fromstring(net_template)

        xmlschema.assertValid(xml_tree)
Example #26
0
def add_file(resource_type, resource_id, name, file, **kwargs):
    path = config.get('filesys', 'file_src')
    path = os.path.join(path, resource_type)
    try:
        os.makedirs(path)
    except OSError:
        pass

    path = os.path.join(path, str(resource_id))
    try:
        os.makedirs(path)
    except OSError:
        pass

    path = os.path.join(path, name)

    #The safest way to check if a file exists is to try to open
    #it. If the open succeeds, then throw an exception to this effect.
    try:
        f = open(path)
        raise HydraError("A file with this name (%s) already exists!" % (name))
    except IOError:
        pass

    log.info("Path: %r" % path)
    if not path.startswith(path):
        log.critical("Could not open file: %s" % name)
        return False

    f = open(path, 'wb')  # if this fails, the client will see an
    # # internal error.

    try:
        for data in file:
            f.write(data)

        log.debug("File written: %r" % name)
        f.close()

    except:
        log.critical("Error writing to file: %s", name)
        f.close()
        os.remove(name)
        log.debug("File removed: %r" % name)
        return False

    return True
Example #27
0
def date_to_string(date, seasonal=False):
    """Convert a date to a standard string used by Hydra. The resulting string
    looks like this::

        '2013-10-03 00:49:17.568-0400'

    Hydra also accepts seasonal time series (yearly recurring). If the flag
    ``seasonal`` is set to ``True``, this function will generate a string
    recognised by Hydra as seasonal time stamp.
    """

    seasonal_key = config.get('DEFAULT', 'seasonal_key', '9999')
    if seasonal:
        FORMAT = seasonal_key+'-%m-%dT%H:%M:%S.%f'
    else:
        FORMAT = '%Y-%m-%dT%H:%M:%S.%f'
    return date.strftime(FORMAT)
Example #28
0
def date_to_string(date, seasonal=False):
    """Convert a date to a standard string used by Hydra. The resulting string
    looks like this::

        '2013-10-03 00:49:17.568-0400'

    Hydra also accepts seasonal time series (yearly recurring). If the flag
    ``seasonal`` is set to ``True``, this function will generate a string
    recognised by Hydra as seasonal time stamp.
    """

    seasonal_key = config.get('DEFAULT', 'seasonal_key', '9999')
    if seasonal:
        FORMAT = seasonal_key + '-%m-%dT%H:%M:%S.%f'
    else:
        FORMAT = '%Y-%m-%dT%H:%M:%S.%f'
    return date.strftime(FORMAT)
Example #29
0
def create_timeseries(client, ResourceAttr):
    #A scenario attribute is a piece of data associated
    #with a resource attribute.
    #[[[1, 2, "hello"], [5, 4, 6]], [[10, 20, 30], [40, 50, 60]]]

    fmt = config.get('DEFAULT', 'datetime_format', "%Y-%m-%dT%H:%M:%S.%f000Z")

    t1 = datetime.datetime.now()
    t2 = t1 + datetime.timedelta(hours=1)
    t3 = t1 + datetime.timedelta(hours=2)

    val_1 = [[[1, 2, "hello"], [5, 4, 6]], [[10, 20, 30], [40, 50, 60]],
             [[9, 8, 7], [6, 5, 4]]]
    val_2 = [1.0, 2.0, 3.0]

    val_3 = [3.0, None, None]

    ts_val = {
        "index": {
            t1.strftime(fmt): val_1,
            t2.strftime(fmt): val_2,
            t3.strftime(fmt): val_3
        }
    }

    metadata_array = json.dumps({'created_by': 'Test user'})

    dataset = dict(
        id=None,
        type='timeseries',
        name='my time series',
        unit='cm^3',
        dimension='Volume',
        hidden='N',
        value=json.dumps(ts_val),
        metadata=metadata_array,
    )

    scenario_attr = dict(
        attr_id=ResourceAttr['attr_id'],
        resource_attr_id=ResourceAttr['id'],
        value=dataset,
    )

    return scenario_attr
Example #30
0
def validate_plugin_xml(plugin_xml_file_path):
    log.info('Validating plugin xml file (%s).' % plugin_xml_file_path)

    try:
        with open(plugin_xml_file_path) as f:
            plugin_xml = f.read()
    except:
        raise HydraPluginError("Couldn't find plugin.xml.")

    try:
        plugin_xsd_path = os.path.expanduser(config.get('plugin',
                                                        'plugin_xsd_path'))
        log.info("Plugin Input xsd: %s", plugin_xsd_path)
        xmlschema_doc = etree.parse(plugin_xsd_path)
        xmlschema = etree.XMLSchema(xmlschema_doc)
        xml_tree = etree.fromstring(plugin_xml)
    except XMLSyntaxError, e:
        raise HydraPluginError("There is an error in your XML syntax: %s" % e)
Example #31
0
def check_plugin_status(plugin_name, pid,**kwargs):
    home = os.path.expanduser('~')
    log_dir = config.get('plugin', 'result_file')
    log_file = os.path.join(home, log_dir, plugin_name)
    try:
        f = open(log_file, 'r')
        file_text = f.read()
        pid_index = file_text.find("%%%s%%"%(pid))

        if pid_index < 0:
            return "No log found for PID %s in %s"%(pid, plugin_name)

        split_string = file_text.split("%%%s%%"%(pid))

        return split_string[1]

    except IOError as e:
        return "No log file found for %s in plugin %s Error was: %s"%(pid, plugin_name, e)
Example #32
0
    def parse_timeseries(self, timeseries_value):
        """
            Convert a hobbes timeseries to a hydra timeseries
        """

        timeformat = config.get('DEFAULT', 'datetime_format')

        val = {}
        for timeval in timeseries_value[1:]:
            
            time = timeval[0]
            split = time.split('-')
            d = datetime(year=int(split[0]), month=int(split[1]), day=int(split[2]))

            tstime = datetime.strftime(d, timeformat)
            val[tstime]  = float(timeval[1])

        return {"idx1": val}
Example #33
0
def get_image(name,**kwargs):
    path = config.get('filesys', 'img_src')

    path = os.path.join(path, name)

    #The safest way to check if a file exists is to try to open
    #it. If the open succeeds, then throw an exception to this effect.
    try:
        f = open(path, 'rb')
    except IOError:
        raise HydraError("File with name (%s) does not exist!"%(name))

    #read the contents of the file
    imageFile = f.read()

    #encode the contents of the file as a byte array
    #encodedFile = base64.b64encode(imageFile)

    return imageFile
Example #34
0
def get_image(name, **kwargs):
    path = config.get('filesys', 'img_src')

    path = os.path.join(path, name)

    #The safest way to check if a file exists is to try to open
    #it. If the open succeeds, then throw an exception to this effect.
    try:
        f = open(path, 'rb')
    except IOError:
        raise HydraError("File with name (%s) does not exist!" % (name))

    #read the contents of the file
    imageFile = f.read()

    #encode the contents of the file as a byte array
    #encodedFile = base64.b64encode(imageFile)

    return imageFile
Example #35
0
    def parse_timeseries(self, timeseries_value):
        """
            Convert a hobbes timeseries to a hydra timeseries
        """

        timeformat = config.get('DEFAULT', 'datetime_format')

        val = {}
        for timeval in timeseries_value[1:]:

            time = timeval[0]
            split = time.split('-')
            d = datetime(year=int(split[0]),
                         month=int(split[1]),
                         day=int(split[2]))

            tstime = datetime.strftime(d, timeformat)
            val[tstime] = float(timeval[1])

        return {"idx1": val}
Example #36
0
    def setUp(self):
        logging.getLogger('suds').setLevel(logging.ERROR)
        logging.getLogger('suds.client').setLevel(logging.CRITICAL)
        logging.getLogger('suds.metrics').setLevel(logging.CRITICAL)
        # Clear SUDS cache:
        #shutil.rmtree(os.path.join(tmp(), 'suds'), True)
        global CLIENT
        if CLIENT is None:
            CLIENT = util.connect(self.url)

        self.client = CLIENT

        self.login('root', '')

        self.create_user("UserA")
        self.create_user("UserB")
        self.create_user("UserC")
        self.project_id = self.create_project().id

        self.fmt = config.get('DEFAULT', 'datetime_format', "%Y-%m-%dT%H:%M:%S.%f000Z")
Example #37
0
def get_file(resource_type, resource_id, name,**kwargs):
    path = config.get('filesys', 'file_src')

    path = os.path.join(path, resource_type, str(resource_id), name)

    #The safest way to check if a file exists is to try to open
    #it. If the open succeeds, then throw an exception to this effect.
    try:
        f = open(path, 'rb')
    except IOError:
        raise HydraError("File with name (%s) does not exist!"%(name))

    #read the contents of the file
    file_to_send = f.read()
    f.close()

    #encode the contents of the file as a byte array

    #encodedFile = base64.b64encode(file_to_send)

    return file_to_send
Example #38
0
    def setUp(self):
        logging.getLogger('suds').setLevel(logging.ERROR)
        logging.getLogger('suds.client').setLevel(logging.CRITICAL)
        logging.getLogger('suds.metrics').setLevel(logging.CRITICAL)
        # Clear SUDS cache:
        #shutil.rmtree(os.path.join(tmp(), 'suds'), True)
        global CLIENT
        if CLIENT is None:
            CLIENT = util.connect(self.url)

        self.client = CLIENT

        self.login('root', '')

        self.create_user("UserA")
        self.create_user("UserB")
        self.create_user("UserC")
        self.project_id = self.create_project().id

        self.fmt = config.get('DEFAULT', 'datetime_format',
                              "%Y-%m-%dT%H:%M:%S.%f000Z")
Example #39
0
def get_file(resource_type, resource_id, name, **kwargs):
    path = config.get('filesys', 'file_src')

    path = os.path.join(path, resource_type, str(resource_id), name)

    #The safest way to check if a file exists is to try to open
    #it. If the open succeeds, then throw an exception to this effect.
    try:
        f = open(path, 'rb')
    except IOError:
        raise HydraError("File with name (%s) does not exist!" % (name))

    #read the contents of the file
    file_to_send = f.read()
    f.close()

    #encode the contents of the file as a byte array

    #encodedFile = base64.b64encode(file_to_send)

    return file_to_send
Example #40
0
def run_plugin(plugin,**kwargs):
    """
        Run a plugin
    """
   
    args = [sys.executable]

    #Get plugin executable
    home = os.path.expanduser('~')
    path_to_plugin = os.path.join(home, 'svn/HYDRA/HydraPlugins', plugin.location)
    args.append(path_to_plugin)
    
    #Parse plugin arguments into a string
    plugin_params = " "
    for p in plugin.params:
        param = "--%s=%s "%(p.name, p.value)
        args.append("--%s"%p.name)
        args.append(p.value)
        plugin_params = plugin_params + param

    log_dir = config.get('plugin', 'result_file')
    log_file = os.path.join(home, log_dir, plugin.name)

    #this reads all the logs so far. We're not interested in them
    #Everything after this is new content to the file
    try:
        f = open(log_file, 'r')
        f.read()
    except:
        f = open(log_file, 'w')
        f.close()
        f = open(log_file, 'r')

    pid = subprocess.Popen(args).pid
    #run plugin
    #os.system("%s %s"%(path_to_plugin, plugin_params))
    
    log.info("Process started! PID: %s", pid)

    return str(pid)
Example #41
0
def create_timeseries(client, ResourceAttr):
    #A scenario attribute is a piece of data associated
    #with a resource attribute.
    #[[[1, 2, "hello"], [5, 4, 6]], [[10, 20, 30], [40, 50, 60]]]

    fmt = config.get('DEFAULT', 'datetime_format', "%Y-%m-%dT%H:%M:%S.%f000Z")

    t1 = datetime.datetime.now()
    t2 = t1+datetime.timedelta(hours=1)
    t3 = t1+datetime.timedelta(hours=2)

    val_1 = [[[1, 2, "hello"], [5, 4, 6]], [[10, 20, 30], [40, 50, 60]], [[9, 8, 7],[6, 5, 4]]]
    val_2 = [1.0, 2.0, 3.0]

    val_3 = [3.0, None, None]

    ts_val = {"index": {t1.strftime(fmt): val_1,
                  t2.strftime(fmt): val_2,
                  t3.strftime(fmt): val_3}}

    metadata_array = json.dumps({'created_by': 'Test user'})

    dataset = dict(
        id=None,
        type = 'timeseries',
        name = 'my time series',
        unit = 'cm^3',
        dimension = 'Volume',
        hidden = 'N',
        value = json.dumps(ts_val),
        metadata = metadata_array,
    )

    scenario_attr = dict(
        attr_id = ResourceAttr['attr_id'],
        resource_attr_id = ResourceAttr['id'],
        value = dataset,
    )

    return scenario_attr
Example #42
0
def xsd_validate(template_file):
    """
        Validate a template against the xsd.
        Return the xml tree if successful.
    """

    with open(template_file) as f:
        xml_template = f.read()

    template_xsd_path = os.path.expanduser(config.get('templates',
                                                      'template_xsd_path'))
    log.info("Template xsd: %s", template_xsd_path)
    xmlschema_doc = etree.parse(template_xsd_path)
    xmlschema = etree.XMLSchema(xmlschema_doc)
    xml_tree = etree.fromstring(xml_template)

    try:
        xmlschema.assertValid(xml_tree)
    except etree.DocumentInvalid as e:
        raise HydraPluginError('Template validation failed: ' + e.message)

    log.info("Template XSD validation successful.")

    return xml_tree
Example #43
0
def get_val(dataset, timestamp=None):
    """
        Turn the string value of a dataset into an appropriate
        value, be it a decimal value, array or time series.

        If a timestamp is passed to this function, 
        return the values appropriate to the requested times.

        If the timestamp is *before* the start of the timeseries data, return None
        If the timestamp is *after* the end of the timeseries data, return the last
        value.

        The raw flag indicates whether timeseries should be returned raw -- exactly
        as they are in the DB (a timeseries being a list of timeseries data objects,
        for example) or as a single python dictionary

    """
    if dataset.data_type == 'array':
        try:
            return json.loads(dataset.value)
        except ValueError:
            #Didn't work? Maybe because it was compressed.
            val = zlib.decompress(dataset.value)
            return json.loads(val)
    elif dataset.data_type == 'descriptor':
        return str(dataset.value)
    elif dataset.data_type == 'scalar':
        return Decimal(str(dataset.value))
    elif dataset.data_type == 'timeseries':

        try:
            #The data might be compressed.
            val = zlib.decompress(dataset.value)
        except Exception as e:
            val = dataset.value

        seasonal_year = config.get('DEFAULT', 'seasonal_year', '1678')
        seasonal_key = config.get('DEFAULT', 'seasonal_key', '9999')
        val = dataset.value.replace(seasonal_key, seasonal_year)

        timeseries = pd.read_json(val)

        if timestamp is None:
            return timeseries
        else:
            try:
                idx = timeseries.index
                #Seasonal timeseries are stored in the year
                #1678 (the lowest year pandas allows for valid times).
                #Therefore if the timeseries is seasonal,
                #the request must be a seasonal request, not a
                #standard request

                if type(idx) == pd.DatetimeIndex:
                    if set(idx.year) == set([int(seasonal_year)]):
                        if isinstance(timestamp, list):
                            seasonal_timestamp = []
                            for t in timestamp:
                                t_1900 = t.replace(year=int(seasonal_year))
                                seasonal_timestamp.append(t_1900)
                            timestamp = seasonal_timestamp
                        else:
                            timestamp = [
                                timestamp.replace(year=int(seasonal_year))
                            ]

                pandas_ts = timeseries.reindex(timestamp, method='ffill')

                #If there are no values at all, just return None
                if len(pandas_ts.dropna()) == 0:
                    return None

                #Replace all numpy NAN values with None
                pandas_ts = pandas_ts.where(pandas_ts.notnull(), None)

                val_is_array = False
                if len(pandas_ts.columns) > 1:
                    val_is_array = True

                if val_is_array:
                    if type(timestamp) is list and len(timestamp) == 1:
                        ret_val = pandas_ts.loc[timestamp[0]].values.tolist()
                    else:
                        ret_val = pandas_ts.loc[timestamp].values.tolist()
                else:
                    col_name = pandas_ts.loc[timestamp].columns[0]
                    if type(timestamp) is list and len(timestamp) == 1:
                        ret_val = pandas_ts.loc[timestamp[0]].loc[col_name]
                    else:
                        ret_val = pandas_ts.loc[timestamp][
                            col_name].values.tolist()

                return ret_val

            except Exception as e:
                log.critical("Unable to retrive data. Check timestamps.")
                log.critical(e)
Example #44
0
    def run_server(self, port=None):

        log.info("home_dir %s", config.get('DEFAULT', 'home_dir'))
        log.info("hydra_base_dir %s", config.get('DEFAULT', 'hydra_base_dir'))
        log.info("common_app_data_folder %s",
                 config.get('DEFAULT', 'common_app_data_folder'))
        log.info("win_common_documents %s",
                 config.get('DEFAULT', 'win_common_documents'))
        log.info("sqlite url %s", config.get('mysqld', 'url'))
        log.info("layout_xsd_path %s",
                 config.get('hydra_server', 'layout_xsd_path'))
        log.info("default_directory %s",
                 config.get('plugin', 'default_directory'))
        log.info("result_file %s", config.get('plugin', 'result_file'))
        log.info("plugin_xsd_path %s", config.get('plugin', 'plugin_xsd_path'))
        log.info("log_config_path %s",
                 config.get('logging_conf', 'log_config_path'))

        if port is None:
            port = config.getint('hydra_server', 'port', 8080)

        domain = config.get('hydra_server', 'domain', '127.0.0.1')

        check_port_available(domain, port)

        spyne.const.xml_ns.DEFAULT_NS = 'soap_server.hydra_complexmodels'
        cp_wsgi_application = Server((domain, port),
                                     application,
                                     numthreads=10)

        log.info("listening to http://%s:%s", domain, port)
        log.info("wsdl is at: http://%s:%s/soap/?wsdl", domain, port)

        try:
            cp_wsgi_application.start()
        except KeyboardInterrupt:
            cp_wsgi_application.stop()
Example #45
0
    result = sock.connect_ex((domain, port))
    if result == 0:
        raise HydraError("Something else is already running on port %s" % port)
    else:
        log.info("Port %s is available", port)


# These few lines are needed by mod_wsgi to turn the server into a WSGI script.
s = HydraServer()
soap_application = s.create_soap_application()
json_application = s.create_json_application()
jsonp_application = s.create_jsonp_application()
http_application = s.create_http_application()

apps = {
    config.get('hydra_server', 'soap_path', 'soap'): soap_application,
    config.get('hydra_server', 'json_path', 'json'): json_application,
    'jsonp': jsonp_application,
    config.get('hydra_server', 'http_path', 'http'): http_application,
}

if ui_app is not None:
    apps[''] = ui_app

wsgi_application = WsgiMounter(apps)

for server in wsgi_application.mounts.values():
    server.max_content_length = 100 * 0x100000  # 10 MB

# Configure the SessionMiddleware
session_opts = {
Example #46
0
        Integer,\
        String,\
        TIMESTAMP,\
        text,\
        DDL
from sqlalchemy.engine import reflection
import logging
from mysql.connector.connection import MySQLConnection
from HydraLib import config
from subprocess import Popen
from sqlalchemy.types import DECIMAL, NUMERIC
from sqlalchemy.dialects.mysql.base import DOUBLE
from decimal import Decimal
import os

engine_name = config.get('mysqld', 'url')
sqlite_engine = "sqlite:///%s" % (config.get('sqlite', 'backup_url'))


def connect():
    """
        return an inspector object
    """
    # MySQLConnection.get_characterset_info = MySQLConnection.get_charset

    db = create_engine(engine_name, echo=True)
    db.connect()

    return db

Example #47
0
def search_datasets(dataset_id=None,
                    dataset_name=None,
                    collection_name=None,
                    data_type=None,
                    dimension=None,
                    unit=None,
                    scenario_id=None,
                    metadata_name=None,
                    metadata_val=None,
                    attr_id=None,
                    type_id=None,
                    unconnected=None,
                    inc_metadata='N',
                    inc_val='N',
                    page_start=0,
                    page_size=2000,
                    **kwargs):
    """
        Get multiple datasets, based on several
        filters. If all filters are set to None, all
        datasets in the DB (that the user is allowe to see)
        will be returned.
    """

    log.info("Searching datasets: \ndatset_id: %s,\n"
             "datset_name: %s,\n"
             "collection_name: %s,\n"
             "data_type: %s,\n"
             "dimension: %s,\n"
             "unit: %s,\n"
             "scenario_id: %s,\n"
             "metadata_name: %s,\n"
             "metadata_val: %s,\n"
             "attr_id: %s,\n"
             "type_id: %s,\n"
             "unconnected: %s,\n"
             "inc_metadata: %s,\n"
             "inc_val: %s,\n"
             "page_start: %s,\n"
             "page_size: %s" %
             (dataset_id, dataset_name, collection_name, data_type, dimension,
              unit, scenario_id, metadata_name, metadata_val, attr_id, type_id,
              unconnected, inc_metadata, inc_val, page_start, page_size))

    if page_size is None:
        page_size = config.get('SEARCH', 'page_size', 2000)

    user_id = int(kwargs.get('user_id'))

    dataset_qry = DBSession.query(Dataset.dataset_id, Dataset.data_type,
                                  Dataset.data_units, Dataset.data_dimen,
                                  Dataset.data_name, Dataset.hidden,
                                  Dataset.cr_date, Dataset.created_by,
                                  DatasetOwner.user_id,
                                  null().label('metadata'), Dataset.start_time,
                                  Dataset.frequency, Dataset.value)

    #Dataset ID is unique, so there's no point using the other filters.
    #Only use other filters if the datset ID is not specified.
    if dataset_id is not None:
        dataset_qry = dataset_qry.filter(Dataset.dataset_id == dataset_id)

    else:
        if dataset_name is not None:
            dataset_qry = dataset_qry.filter(
                func.lower(Dataset.data_name).like("%%%s%%" %
                                                   dataset_name.lower()))
        if collection_name is not None:
            dc = aliased(DatasetCollection)
            dci = aliased(DatasetCollectionItem)
            dataset_qry = dataset_qry.join(
                dc,
                func.lower(dc.collection_name).like(
                    "%%%s%%" % collection_name.lower())).join(
                        dci,
                        and_(dci.collection_id == dc.collection_id,
                             dci.dataset_id == Dataset.dataset_id))

        if data_type is not None:
            dataset_qry = dataset_qry.filter(
                func.lower(Dataset.data_type) == data_type.lower())

        #null is a valid dimension, so we need a way for the searcher
        #to specify that they want to search for datasets with a null dimension
        #rather than ignoring the dimension in the filter. We use 'null' to do this.
        if dimension is not None:
            dimension = dimension.lower()
            if dimension == 'null':
                dimension = None
            if dimension is not None:
                dataset_qry = dataset_qry.filter(
                    func.lower(Dataset.data_dimen) == dimension)
            else:
                dataset_qry = dataset_qry.filter(
                    Dataset.data_dimen == dimension)

        #null is a valid unit, so we need a way for the searcher
        #to specify that they want to search for datasets with a null unit
        #rather than ignoring the unit. We use 'null' to do this.
        if unit is not None:
            unit = unit.lower()
            if unit == 'null':
                unit = None
            if unit is not None:
                dataset_qry = dataset_qry.filter(
                    func.lower(Dataset.data_units) == unit)
            else:
                dataset_qry = dataset_qry.filter(Dataset.data_units == unit)

        if scenario_id is not None:
            dataset_qry = dataset_qry.join(
                ResourceScenario,
                and_(ResourceScenario.dataset_id == Dataset.dataset_id,
                     ResourceScenario.scenario_id == scenario_id))

        if attr_id is not None:
            dataset_qry = dataset_qry.join(
                ResourceScenario,
                ResourceScenario.dataset_id == Dataset.dataset_id).join(
                    ResourceAttr,
                    and_(
                        ResourceAttr.resource_attr_id ==
                        ResourceScenario.resource_attr_id,
                        ResourceAttr.attr_id == attr_id))

        if type_id is not None:
            dataset_qry = dataset_qry.join(
                ResourceScenario,
                ResourceScenario.dataset_id == Dataset.dataset_id).join(
                    ResourceAttr, ResourceAttr.resource_attr_id ==
                    ResourceScenario.resource_attr_id).join(
                        TypeAttr,
                        and_(TypeAttr.attr_id == ResourceAttr.attr_id,
                             TypeAttr.type_id == type_id))

        if unconnected == 'Y':
            stmt = DBSession.query(
                distinct(ResourceScenario.dataset_id).label('dataset_id'),
                literal_column("0").label('col')).subquery()
            dataset_qry = dataset_qry.outerjoin(
                stmt, stmt.c.dataset_id == Dataset.dataset_id)
            dataset_qry = dataset_qry.filter(stmt.c.col == None)
        elif unconnected == 'N':
            #The dataset has to be connected to something
            stmt = DBSession.query(
                distinct(ResourceScenario.dataset_id).label('dataset_id'),
                literal_column("0").label('col')).subquery()
            dataset_qry = dataset_qry.join(
                stmt, stmt.c.dataset_id == Dataset.dataset_id)
        if metadata_name is not None and metadata_val is not None:
            dataset_qry = dataset_qry.join(
                Metadata,
                and_(
                    Metadata.dataset_id == Dataset.dataset_id,
                    func.lower(Metadata.metadata_name).like(
                        "%%%s%%" % metadata_name.lower()),
                    func.lower(Metadata.metadata_val).like(
                        "%%%s%%" % metadata_val.lower())))
        elif metadata_name is not None and metadata_val is None:
            dataset_qry = dataset_qry.join(
                Metadata,
                and_(
                    Metadata.dataset_id == Dataset.dataset_id,
                    func.lower(Metadata.metadata_name).like(
                        "%%%s%%" % metadata_name.lower())))
        elif metadata_name is None and metadata_val is not None:
            dataset_qry = dataset_qry.join(
                Metadata,
                and_(
                    Metadata.dataset_id == Dataset.dataset_id,
                    func.lower(Metadata.metadata_val).like(
                        "%%%s%%" % metadata_val.lower())))

    #All datasets must be joined on dataset owner so only datasets that the
    #user can see are retrieved.
    dataset_qry = dataset_qry.outerjoin(
        DatasetOwner,
        and_(DatasetOwner.dataset_id == Dataset.dataset_id,
             DatasetOwner.user_id == user_id))

    dataset_qry = dataset_qry.filter(
        or_(Dataset.hidden == 'N',
            and_(DatasetOwner.user_id is not None, Dataset.hidden == 'Y')))

    log.info(str(dataset_qry))

    datasets = dataset_qry.all()

    log.info("Retrieved %s datasets", len(datasets))

    #page the datasets:
    if page_start + page_size > len(datasets):
        page_end = None
    else:
        page_end = page_start + page_size

    datasets = datasets[page_start:page_end]

    log.info("Datasets paged from result %s to %s", page_start, page_end)

    datasets_to_return = []
    for dataset_row in datasets:

        dataset_dict = dataset_row._asdict()

        if inc_val == 'N':
            dataset_dict['value'] = None
        else:
            #convert the value row into a string as it is returned as a binary
            if dataset_row.value is not None:
                dataset_dict['value'] = str(dataset_row.value)

        if inc_metadata == 'Y':
            metadata = DBSession.query(Metadata).filter(
                Metadata.dataset_id == dataset_row.dataset_id).all()
            dataset_dict['metadata'] = metadata
        else:
            dataset_dict['metadata'] = []

        dataset = namedtuple('Dataset', dataset_dict.keys())(**dataset_dict)

        datasets_to_return.append(dataset)

    return datasets_to_return
Example #48
0
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    result = sock.connect_ex((domain,port))
    if result == 0:
        raise HydraError("Something else is already running on port %s"%port)
    else:
        log.info("Port %s is available", port)

# These few lines are needed by mod_wsgi to turn the server into a WSGI script.
s = HydraServer()
soap_application = s.create_soap_application()
json_application = s.create_json_application()
jsonp_application = s.create_jsonp_application()
http_application = s.create_http_application()

apps = {
        config.get('hydra_server', 'soap_path', 'soap'): soap_application,
        config.get('hydra_server', 'json_path', 'json'): json_application,
        'jsonp': jsonp_application,
        config.get('hydra_server', 'http_path', 'http'): http_application,
}

if ui_app is not None:
    apps[''] = ui_app

wsgi_application = WsgiMounter(apps)

for server in wsgi_application.mounts.values():
    server.max_content_length = 100 * 0x100000 # 10 MB

# Configure the SessionMiddleware
session_opts = {
Example #49
0
    def run_server(self, port=None):

        log.info("home_dir %s",config.get('DEFAULT', 'home_dir'))
        log.info("hydra_base_dir %s",config.get('DEFAULT', 'hydra_base_dir'))
        log.info("common_app_data_folder %s",config.get('DEFAULT', 'common_app_data_folder'))
        log.info("win_common_documents %s",config.get('DEFAULT', 'win_common_documents'))
        log.info("sqlite url %s",config.get('mysqld', 'url'))
        log.info("layout_xsd_path %s",config.get('hydra_server', 'layout_xsd_path'))
        log.info("default_directory %s",config.get('plugin', 'default_directory'))
        log.info("result_file %s",config.get('plugin', 'result_file'))
        log.info("plugin_xsd_path %s",config.get('plugin', 'plugin_xsd_path'))
        log.info("log_config_path %s",config.get('logging_conf', 'log_config_path'))

        if port is None:
            port = config.getint('hydra_server', 'port', 8080)

        domain = config.get('hydra_server', 'domain', '127.0.0.1')

        check_port_available(domain, port)

        spyne.const.xml_ns.DEFAULT_NS = 'soap_server.hydra_complexmodels'
        cp_wsgi_application = Server((domain,port), application, numthreads=10)

        log.info("listening to http://%s:%s", domain, port)
        log.info("wsdl is at: http://%s:%s/soap/?wsdl", domain, port)

        try:
            cp_wsgi_application.start()
        except KeyboardInterrupt:
            cp_wsgi_application.stop()
Example #50
0
def get_val(dataset, timestamp=None):
    """
        Turn the string value of a dataset into an appropriate
        value, be it a decimal value, array or time series.

        If a timestamp is passed to this function, 
        return the values appropriate to the requested times.

        If the timestamp is *before* the start of the timeseries data, return None
        If the timestamp is *after* the end of the timeseries data, return the last
        value.

        The raw flag indicates whether timeseries should be returned raw -- exactly
        as they are in the DB (a timeseries being a list of timeseries data objects,
        for example) or as a single python dictionary

    """
    if dataset.data_type == 'array':
        try:
            return json.loads(dataset.value)
        except ValueError:
            #Didn't work? Maybe because it was compressed.
            val = zlib.decompress(dataset.value)
            return json.loads(val)
    elif dataset.data_type == 'descriptor':
        return str(dataset.value)
    elif dataset.data_type == 'scalar':
        return Decimal(str(dataset.value))
    elif dataset.data_type == 'timeseries':

        try:
            #The data might be compressed.
            val = zlib.decompress(dataset.value)
        except Exception as e:
            val = dataset.value

        seasonal_year = config.get('DEFAULT','seasonal_year', '1678')
        seasonal_key = config.get('DEFAULT', 'seasonal_key', '9999')
        val = dataset.value.replace(seasonal_key, seasonal_year)
        
        timeseries = pd.read_json(val)

        if timestamp is None:
            return timeseries
        else:
            try:
                idx = timeseries.index
                #Seasonal timeseries are stored in the year
                #1678 (the lowest year pandas allows for valid times).
                #Therefore if the timeseries is seasonal, 
                #the request must be a seasonal request, not a 
                #standard request

                if type(idx) == pd.DatetimeIndex:
                    if set(idx.year) == set([int(seasonal_year)]):
                        if isinstance(timestamp,  list):
                            seasonal_timestamp = []
                            for t in timestamp:
                                t_1900 = t.replace(year=int(seasonal_year))
                                seasonal_timestamp.append(t_1900)
                            timestamp = seasonal_timestamp
                        else:
                            timestamp = [timestamp.replace(year=int(seasonal_year))]

                pandas_ts = timeseries.reindex(timestamp, method='ffill')

                #If there are no values at all, just return None
                if len(pandas_ts.dropna()) == 0:
                    return None

                #Replace all numpy NAN values with None
                pandas_ts = pandas_ts.where(pandas_ts.notnull(), None)

                val_is_array = False
                if len(pandas_ts.columns) > 1:
                    val_is_array = True

                if val_is_array:
                    if type(timestamp) is list and len(timestamp) == 1:
                        ret_val = pandas_ts.loc[timestamp[0]].values.tolist()
                    else:
                        ret_val = pandas_ts.loc[timestamp].values.tolist()
                else:
                    col_name = pandas_ts.loc[timestamp].columns[0]
                    if type(timestamp) is list and len(timestamp) == 1:
                        ret_val = pandas_ts.loc[timestamp[0]].loc[col_name]
                    else:
                        ret_val = pandas_ts.loc[timestamp][col_name].values.tolist()

                return ret_val

            except Exception as e:
                log.critical("Unable to retrive data. Check timestamps.")
                log.critical(e)
Example #51
0
def search_datasets(dataset_id=None,
                dataset_name=None,
                collection_name=None,
                data_type=None,
                dimension=None,
                unit=None,
                scenario_id=None,
                metadata_name=None,
                metadata_val=None,
                attr_id = None,
                type_id = None,
                unconnected = None,
                inc_metadata='N',
                inc_val = 'N',
                page_start = 0,
                page_size   = 2000,
                **kwargs):
    """
        Get multiple datasets, based on several
        filters. If all filters are set to None, all
        datasets in the DB (that the user is allowe to see)
        will be returned.
    """


    log.info("Searching datasets: \ndatset_id: %s,\n"
                                  "datset_name: %s,\n"
                                  "collection_name: %s,\n"
                                  "data_type: %s,\n"
                                  "dimension: %s,\n"
                                  "unit: %s,\n"
                                  "scenario_id: %s,\n"
                                  "metadata_name: %s,\n"
                                  "metadata_val: %s,\n"
                                  "attr_id: %s,\n"
                                  "type_id: %s,\n"
                                  "unconnected: %s,\n"
                                  "inc_metadata: %s,\n"
                                  "inc_val: %s,\n"
                                  "page_start: %s,\n"
                                  "page_size: %s" % (dataset_id,
                dataset_name,
                collection_name,
                data_type,
                dimension,
                unit,
                scenario_id,
                metadata_name,
                metadata_val,
                attr_id,
                type_id,
                unconnected,
                inc_metadata,
                inc_val,
                page_start,
                page_size))

    if page_size is None:
        page_size = config.get('SEARCH', 'page_size', 2000)

    user_id = int(kwargs.get('user_id'))

    dataset_qry = DBSession.query(Dataset.dataset_id,
            Dataset.data_type,
            Dataset.data_units,
            Dataset.data_dimen,
            Dataset.data_name,
            Dataset.hidden,
            Dataset.cr_date,
            Dataset.created_by,
            DatasetOwner.user_id,
            null().label('metadata'),
            Dataset.start_time,
            Dataset.frequency,
            Dataset.value
    )

    #Dataset ID is unique, so there's no point using the other filters.
    #Only use other filters if the datset ID is not specified.
    if dataset_id is not None:
        dataset_qry = dataset_qry.filter(
            Dataset.dataset_id==dataset_id)

    else:
        if dataset_name is not None:
            dataset_qry = dataset_qry.filter(
                func.lower(Dataset.data_name).like("%%%s%%"%dataset_name.lower())
            )
        if collection_name is not None:
            dc = aliased(DatasetCollection)
            dci = aliased(DatasetCollectionItem)
            dataset_qry = dataset_qry.join(dc,
                        func.lower(dc.collection_name).like("%%%s%%"%collection_name.lower())
                        ).join(dci,and_(
                            dci.collection_id == dc.collection_id,
                            dci.dataset_id == Dataset.dataset_id))

        if data_type is not None:
            dataset_qry = dataset_qry.filter(
                func.lower(Dataset.data_type) == data_type.lower())

        #null is a valid dimension, so we need a way for the searcher
        #to specify that they want to search for datasets with a null dimension
        #rather than ignoring the dimension in the filter. We use 'null' to do this.
        if dimension is not None:
            dimension = dimension.lower()
            if dimension == 'null':
                dimension = None
            if dimension is not None:
                dataset_qry = dataset_qry.filter(
                    func.lower(Dataset.data_dimen) == dimension)
            else:
                dataset_qry = dataset_qry.filter(
                    Dataset.data_dimen == dimension)

        #null is a valid unit, so we need a way for the searcher
        #to specify that they want to search for datasets with a null unit
        #rather than ignoring the unit. We use 'null' to do this.
        if unit is not None:
            unit = unit.lower()
            if unit == 'null':
                unit = None
            if unit is not None:
                dataset_qry = dataset_qry.filter(
                    func.lower(Dataset.data_units) == unit)
            else:
                dataset_qry = dataset_qry.filter(
                    Dataset.data_units == unit)

        if scenario_id is not None:
            dataset_qry = dataset_qry.join(ResourceScenario,
                                and_(ResourceScenario.dataset_id == Dataset.dataset_id,
                                ResourceScenario.scenario_id == scenario_id))

        if attr_id is not None:
            dataset_qry = dataset_qry.join(
                ResourceScenario, ResourceScenario.dataset_id == Dataset.dataset_id).join(
                ResourceAttr, and_(ResourceAttr.resource_attr_id==ResourceScenario.resource_attr_id,
                                  ResourceAttr.attr_id==attr_id))

        if type_id is not None:
            dataset_qry = dataset_qry.join(
                ResourceScenario, ResourceScenario.dataset_id == Dataset.dataset_id).join(
                ResourceAttr, ResourceAttr.resource_attr_id==ResourceScenario.resource_attr_id).join(
                TypeAttr, and_(TypeAttr.attr_id==ResourceAttr.attr_id, TypeAttr.type_id==type_id))

        if unconnected == 'Y':
            stmt = DBSession.query(distinct(ResourceScenario.dataset_id).label('dataset_id'),
                                literal_column("0").label('col')).subquery()
            dataset_qry = dataset_qry.outerjoin(
                stmt, stmt.c.dataset_id == Dataset.dataset_id)
            dataset_qry = dataset_qry.filter(stmt.c.col == None)
        elif unconnected == 'N':
            #The dataset has to be connected to something
            stmt = DBSession.query(distinct(ResourceScenario.dataset_id).label('dataset_id'),
                                literal_column("0").label('col')).subquery()
            dataset_qry = dataset_qry.join(
                stmt, stmt.c.dataset_id == Dataset.dataset_id)
        if metadata_name is not None and metadata_val is not None:
            dataset_qry = dataset_qry.join(Metadata,
                                and_(Metadata.dataset_id == Dataset.dataset_id,
                                func.lower(Metadata.metadata_name).like("%%%s%%"%metadata_name.lower()),
                                func.lower(Metadata.metadata_val).like("%%%s%%"%metadata_val.lower())))
        elif metadata_name is not None and metadata_val is None:
            dataset_qry = dataset_qry.join(Metadata,
                                and_(Metadata.dataset_id == Dataset.dataset_id,
                                func.lower(Metadata.metadata_name).like("%%%s%%"%metadata_name.lower())))
        elif metadata_name is None and metadata_val is not None:
            dataset_qry = dataset_qry.join(Metadata,
                                and_(Metadata.dataset_id == Dataset.dataset_id,
                                func.lower(Metadata.metadata_val).like("%%%s%%"%metadata_val.lower())))

    #All datasets must be joined on dataset owner so only datasets that the
    #user can see are retrieved.
    dataset_qry = dataset_qry.outerjoin(DatasetOwner,
                                and_(DatasetOwner.dataset_id==Dataset.dataset_id,
                                DatasetOwner.user_id==user_id))

    dataset_qry = dataset_qry.filter(or_(Dataset.hidden=='N', and_(DatasetOwner.user_id is not None, Dataset.hidden=='Y')))

    log.info(str(dataset_qry))

    datasets = dataset_qry.all()

    log.info("Retrieved %s datasets", len(datasets))

    #page the datasets:
    if page_start + page_size > len(datasets):
        page_end = None
    else:
        page_end = page_start + page_size

    datasets = datasets[page_start:page_end]

    log.info("Datasets paged from result %s to %s", page_start, page_end)

    datasets_to_return = []
    for dataset_row in datasets:

        dataset_dict = dataset_row._asdict()

        

        if inc_val == 'N':
            dataset_dict['value'] = None
        else:
            #convert the value row into a string as it is returned as a binary
            if dataset_row.value is not None:
                dataset_dict['value'] = str(dataset_row.value)

        if inc_metadata=='Y':
            metadata = DBSession.query(Metadata).filter(Metadata.dataset_id==dataset_row.dataset_id).all()
            dataset_dict['metadata'] = metadata
        else:
            dataset_dict['metadata'] = []

        dataset = namedtuple('Dataset', dataset_dict.keys())(**dataset_dict)

        datasets_to_return.append(dataset)

    return datasets_to_return
Example #52
0
        Integer,\
        String,\
        TIMESTAMP,\
        text,\
        DDL
from sqlalchemy.engine import reflection
import logging
from mysql.connector.connection import MySQLConnection
from HydraLib import config
from subprocess import Popen
from sqlalchemy.types import DECIMAL, NUMERIC
from sqlalchemy.dialects.mysql.base import DOUBLE
from decimal import Decimal
import os

engine_name = config.get('mysqld', 'url')
sqlite_engine = "sqlite:///%s"%(config.get('sqlite', 'backup_url'))

def connect():
    """
        return an inspector object
    """
   # MySQLConnection.get_characterset_info = MySQLConnection.get_charset

    db = create_engine(engine_name, echo=True)
    db.connect()
    
    return db

def create_sqlite_backup_db(audit_tables):
    """
Example #53
0
def guess_timefmt(datestr):
    """
    Try to guess the format a date is written in.

    The following formats are supported:

    ================= ============== ===============
    Format            Example        Python format
    ----------------- -------------- ---------------
    ``YYYY-MM-DD``    2002-04-21     %Y-%m-%d
    ``YYYY.MM.DD``    2002.04.21     %Y.%m.%d
    ``YYYY MM DD``    2002 04 21     %Y %m %d
    ``DD-MM-YYYY``    21-04-2002     %d-%m-%Y
    ``DD.MM.YYYY``    21.04.2002     %d.%m.%Y
    ``DD MM YYYY``    21 04 2002     %d %m %Y
    ``DD/MM/YYYY``    21/04/2002     %d/%m/%Y
    ================= ============== ===============

    These formats can also be used for seasonal (yearly recurring) time series.
    The year needs to be replaced by ``9999`` or another configurable year
    representing the seasonal year..

    The following formats are recognised depending on your locale setting.
    There is no guarantee that this will work.

    ================= ============== ===============
    Format            Example        Python format
    ----------------- -------------- ---------------
    ``DD-mmm-YYYY``   21-Apr-2002    %d-%b-%Y
    ``DD.mmm.YYYY``   21.Apr.2002    %d.%b.%Y
    ``DD mmm YYYY``   21 Apr 2002    %d %b %Y
    ``mmm DD YYYY``   Apr 21 2002    %b %d %Y
    ``Mmmmm DD YYYY`` April 21 2002  %B %d %Y
    ================= ============== ===============

    .. note::
        - The time needs to follow this definition without exception:
            `%H:%M:%S.%f`. A complete date and time should therefore look like
            this::

                2002-04-21 15:29:37.522

        - Be aware that in a file with comma separated values you should not
          use a date format that contains commas.
    """

    seasonal_key = str(config.get('DEFAULT', 'seasonal_key', '9999'))

    #replace 'T' with space to handle ISO times.
    if datestr.find('T') > 0:
        dt_delim = 'T'
    else:
        dt_delim = ' '

    delimiters = ['-', '.', ' ', '/']
    formatstrings = [['%Y', '%m', '%d'], ['%d', '%m', '%Y'],
                     ['%d', '%b', '%Y'], ['XXXX', '%m', '%d'],
                     ['%d', '%m', 'XXXX'], ['%d', '%b', 'XXXX'],
                     [seasonal_key, '%m', '%d'], ['%d', '%m', seasonal_key],
                     ['%d', '%b', seasonal_key]]

    timeformats = [
        '%H:%M:%S.%f', '%H:%M:%S', '%H:%M', '%H:%M:%S.%f000Z', '%H:%M:%S.%fZ'
    ]

    # Check if a time is indicated or not
    for timefmt in timeformats:
        try:
            datetime.strptime(datestr.split(dt_delim)[-1].strip(), timefmt)
            usetime = True
            break
        except ValueError:
            usetime = False

    # Check the simple ones:
    for fmt in formatstrings:
        for delim in delimiters:
            datefmt = fmt[0] + delim + fmt[1] + delim + fmt[2]
            if usetime:
                for timefmt in timeformats:
                    complfmt = datefmt + dt_delim + timefmt
                    try:
                        datetime.strptime(datestr, complfmt)
                        return complfmt
                    except ValueError:
                        pass
            else:
                try:
                    datetime.strptime(datestr, datefmt)
                    return datefmt
                except ValueError:
                    pass

    # Check for other formats:
    custom_formats = [
        '%d/%m/%Y', '%b %d %Y', '%B %d %Y', '%d/%m/XXXX',
        '%d/%m/' + seasonal_key
    ]

    for fmt in custom_formats:
        if usetime:
            for timefmt in timeformats:
                complfmt = fmt + dt_delim + timefmt
                try:
                    datetime.strptime(datestr, complfmt)
                    return complfmt
                except ValueError:
                    pass

        else:
            try:
                datetime.strptime(datestr, fmt)
                return fmt
            except ValueError:
                pass

    return None
Example #54
0
def guess_timefmt(datestr):
    """
    Try to guess the format a date is written in.

    The following formats are supported:

    ================= ============== ===============
    Format            Example        Python format
    ----------------- -------------- ---------------
    ``YYYY-MM-DD``    2002-04-21     %Y-%m-%d
    ``YYYY.MM.DD``    2002.04.21     %Y.%m.%d
    ``YYYY MM DD``    2002 04 21     %Y %m %d
    ``DD-MM-YYYY``    21-04-2002     %d-%m-%Y
    ``DD.MM.YYYY``    21.04.2002     %d.%m.%Y
    ``DD MM YYYY``    21 04 2002     %d %m %Y
    ``DD/MM/YYYY``    21/04/2002     %d/%m/%Y
    ================= ============== ===============

    These formats can also be used for seasonal (yearly recurring) time series.
    The year needs to be replaced by ``9999`` or another configurable year
    representing the seasonal year..

    The following formats are recognised depending on your locale setting.
    There is no guarantee that this will work.

    ================= ============== ===============
    Format            Example        Python format
    ----------------- -------------- ---------------
    ``DD-mmm-YYYY``   21-Apr-2002    %d-%b-%Y
    ``DD.mmm.YYYY``   21.Apr.2002    %d.%b.%Y
    ``DD mmm YYYY``   21 Apr 2002    %d %b %Y
    ``mmm DD YYYY``   Apr 21 2002    %b %d %Y
    ``Mmmmm DD YYYY`` April 21 2002  %B %d %Y
    ================= ============== ===============

    .. note::
        - The time needs to follow this definition without exception:
            `%H:%M:%S.%f`. A complete date and time should therefore look like
            this::

                2002-04-21 15:29:37.522

        - Be aware that in a file with comma separated values you should not
          use a date format that contains commas.
    """


    seasonal_key = str(config.get('DEFAULT', 'seasonal_key', '9999'))

    #replace 'T' with space to handle ISO times.
    if datestr.find('T') > 0:
        dt_delim = 'T'
    else:
        dt_delim = ' '

    delimiters = ['-', '.', ' ', '/']
    formatstrings = [['%Y', '%m', '%d'],
                     ['%d', '%m', '%Y'],
                     ['%d', '%b', '%Y'],
                     ['XXXX', '%m', '%d'],
                     ['%d', '%m', 'XXXX'],
                     ['%d', '%b', 'XXXX'],
                     [seasonal_key, '%m', '%d'],
                     ['%d', '%m', seasonal_key],
                     ['%d', '%b', seasonal_key]]

    timeformats = ['%H:%M:%S.%f', '%H:%M:%S', '%H:%M', '%H:%M:%S.%f000Z', '%H:%M:%S.%fZ']

    # Check if a time is indicated or not
    for timefmt in timeformats:
        try:
            datetime.strptime(datestr.split(dt_delim)[-1].strip(), timefmt)
            usetime = True
            break
        except ValueError:
            usetime = False

    # Check the simple ones:
    for fmt in formatstrings:
        for delim in delimiters:
            datefmt = fmt[0] + delim + fmt[1] + delim + fmt[2]
            if usetime:
                for timefmt in timeformats:
                    complfmt = datefmt + dt_delim + timefmt
                    try:
                        datetime.strptime(datestr, complfmt)
                        return complfmt
                    except ValueError:
                        pass
            else:
                try:
                    datetime.strptime(datestr, datefmt)
                    return datefmt
                except ValueError:
                    pass

    # Check for other formats:
    custom_formats = ['%d/%m/%Y', '%b %d %Y', '%B %d %Y','%d/%m/XXXX', '%d/%m/'+seasonal_key]

    for fmt in custom_formats:
        if usetime:
            for timefmt in timeformats:
                complfmt = fmt + dt_delim + timefmt
                try:
                    datetime.strptime(datestr, complfmt)
                    return complfmt
                except ValueError:
                    pass

        else:
            try:
                datetime.strptime(datestr, fmt)
                return fmt
            except ValueError:
                pass

    return None