def __init__(self, url=None, sessionid=None, app_name=None): if url is None: port = config.getint('hydra_client', 'port', 80) domain = config.get('hydra_client', 'domain', '127.0.0.1') path = config.get('hydra_client', 'soap_path', 'soap') #The domain may or may not specify the protocol, so do a check. if domain.find('http') == -1: self.url = "http://%s:%s/%s?wsdl" % (domain, port, path) else: self.url = "%s:%s/%s?wsdl" % (domain, port, path) else: log.info("Using user-defined URL: %s", url) port = _get_port(url) hostname = _get_hostname(url) path = _get_path(url) protocol = _get_protocol(url) self.url = "%s://%s:%s%s/soap?wsdl" % (protocol, hostname, port, path) log.info("Setting URL %s", self.url) self.app_name = app_name self.sessionid = sessionid self.retxml = False self.client = Client(self.url, timeout=3600, plugins=[FixNamespace()], retxml=self.retxml) self.client.add_prefix('hyd', 'soap_server.hydra_complexmodels') cache = self.client.options.cache cache.setduration(days=10)
def __init__(self, url=None, sessionid=None, app_name=None): if url is None: port = config.getint('hydra_client', 'port', 80) domain = config.get('hydra_client', 'domain', '127.0.0.1') path = config.get('hydra_client', 'soap_path', 'soap') #The domain may or may not specify the protocol, so do a check. if domain.find('http') == -1: self.url = "http://%s:%s/%s?wsdl" % (domain, port, path) else: self.url = "%s:%s/%s?wsdl" % (domain, port, path) else: log.info("Using user-defined URL: %s", url) port = _get_port(url) hostname = _get_hostname(url) path = _get_path(url) protocol = _get_protocol(url) self.url = "%s://%s:%s%s/soap?wsdl" % (protocol, hostname, port, path) log.info("Setting URL %s", self.url) self.app_name = app_name self.sessionid = sessionid self.retxml = False self.client = Client(self.url, timeout=3600, plugins=[FixNamespace()], retxml=self.retxml) self.client.add_prefix('hyd', 'soap_server.hydra_complexmodels') cache = self.client.options.cache cache.setduration(days=10)
def reindex_timeseries(ts_string, new_timestamps): """ get data for timesamp :param a JSON string, in pandas-friendly format :param a timestamp or list of timestamps (datetimes) :returns a pandas data frame, reindexed with the supplied timestamos or None if no data is found """ #If a single timestamp is passed in, turn it into a list #Reindexing can't work if it's not a list if not isinstance(new_timestamps, list): new_timestamps = [new_timestamps] #Convert the incoming timestamps to datetimes #if they are not datetimes. new_timestamps_converted = [] for t in new_timestamps: new_timestamps_converted.append(get_datetime(t)) new_timestamps = new_timestamps_converted seasonal_year = config.get('DEFAULT', 'seasonal_year', '1678') seasonal_key = config.get('DEFAULT', 'seasonal_key', '9999') ts = ts_string.replace(seasonal_key, seasonal_year) timeseries = pd.read_json(ts) idx = timeseries.index ts_timestamps = new_timestamps #'Fix' the incoming timestamp in case it's a seasonal value if type(idx) == pd.DatetimeIndex: if set(idx.year) == set([int(seasonal_year)]): if isinstance(new_timestamps, list): seasonal_timestamp = [] for t in ts_timestamps: t_1900 = t.replace(year=int(seasonal_year)) seasonal_timestamp.append(t_1900) ts_timestamps = seasonal_timestamp #Reindex the timeseries to reflect the requested timestamps reindexed_ts = timeseries.reindex(ts_timestamps, method='ffill') i = reindexed_ts.index reindexed_ts.index = pd.Index(new_timestamps, names=i.names) #If there are no values at all, just return None if len(reindexed_ts.dropna()) == 0: return None #Replace all numpy NAN values with None pandas_ts = reindexed_ts.where(reindexed_ts.notnull(), None) return pandas_ts
def reindex_timeseries(ts_string, new_timestamps): """ get data for timesamp :param a JSON string, in pandas-friendly format :param a timestamp or list of timestamps (datetimes) :returns a pandas data frame, reindexed with the supplied timestamos or None if no data is found """ #If a single timestamp is passed in, turn it into a list #Reindexing can't work if it's not a list if not isinstance(new_timestamps, list): new_timestamps = [new_timestamps] #Convert the incoming timestamps to datetimes #if they are not datetimes. new_timestamps_converted = [] for t in new_timestamps: new_timestamps_converted.append(get_datetime(t)) new_timestamps = new_timestamps_converted seasonal_year = config.get('DEFAULT','seasonal_year', '1678') seasonal_key = config.get('DEFAULT', 'seasonal_key', '9999') ts = ts_string.replace(seasonal_key, seasonal_year) timeseries = pd.read_json(ts) idx = timeseries.index ts_timestamps = new_timestamps #'Fix' the incoming timestamp in case it's a seasonal value if type(idx) == pd.DatetimeIndex: if set(idx.year) == set([int(seasonal_year)]): if isinstance(new_timestamps, list): seasonal_timestamp = [] for t in ts_timestamps: t_1900 = t.replace(year=int(seasonal_year)) seasonal_timestamp.append(t_1900) ts_timestamps = seasonal_timestamp #Reindex the timeseries to reflect the requested timestamps reindexed_ts = timeseries.reindex(ts_timestamps, method='ffill') i = reindexed_ts.index reindexed_ts.index = pd.Index(new_timestamps, names=i.names) #If there are no values at all, just return None if len(reindexed_ts.dropna()) == 0: return None #Replace all numpy NAN values with None pandas_ts = reindexed_ts.where(reindexed_ts.notnull(), None) return pandas_ts
def login(self, username=None, password=None): if username is None: username = config.get('hydra_client', 'user') if password is None: password = config.get('hydra_client', 'password') login_params = {'username': username, 'password': password} resp = self.call('login', login_params) #set variables for use in request headers log.info(resp)
def login(self, username=None, password=None): if username is None: username = config.get('hydra_client', 'user') if password is None: password = config.get('hydra_client', 'password') login_params = {'username': username, 'password': password} resp = self.call('login', login_params) #set variables for use in request headers log.info(resp)
def parse_value(self): """ Turn the value of an incoming dataset into a hydra-friendly value. """ try: #attr_data.value is a dictionary, #but the keys have namespaces which must be stripped. if self.value is None: log.warn("Cannot parse dataset. No value specified.") return None data = str(self.value) if data.upper().strip() == 'NULL': return 'NULL' if data.strip() == '': return "NULL" if len(data) > 100: log.debug("Parsing %s", data[0:100]) else: log.debug("Parsing %s", data) if self.type == 'descriptor': #Hack to work with hashtables. REMOVE AFTER DEMO if self.get_metadata_as_dict().get('data_type') == 'hashtable': df = pd.read_json(data) data = df.transpose().to_json() return data elif self.type == 'scalar': return data elif self.type == 'timeseries': timeseries_pd = pd.read_json(data) #Epoch doesn't work here because dates before 1970 are not # supported in read_json. Ridiculous. ts = timeseries_pd.to_json(date_format='iso', date_unit='ns') if len(data) > int(config.get('db', 'compression_threshold', 1000)): return zlib.compress(ts) else: return ts elif self.type == 'array': #check to make sure this is valid json json.loads(data) if len(data) > int(config.get('db', 'compression_threshold', 1000)): return zlib.compress(data) else: return data except Exception as e: log.exception(e) raise HydraError("Error parsing value %s: %s"%(self.value, e))
def create_sqlite_backup_db(audit_tables): """ return an inspector object """ #we always want to create a whole new DB, so delete the old one first #if it exists. try: Popen("rm %s" % (config.get('sqlite', 'backup_url')), shell=True) logging.warn("Old sqlite backup DB removed") except Exception as e: logging.warn(e) try: aux_dir = config.get('DEFAULT', 'hydra_aux_dir') os.mkdir(aux_dir) logging.warn("%s created", aux_dir) except Exception as e: logging.warn(e) try: backup_dir = config.get('db', 'export_target') os.mkdir(backup_dir) logging.warn("%s created", backup_dir) except Exception as e: logging.warn(e) db = create_engine(sqlite_engine, echo=True) db.connect() metadata = MetaData(db) for main_audit_table in audit_tables: cols = [] for c in main_audit_table.columns: col = c.copy() if col.type.python_type == Decimal: col.type = DECIMAL() cols.append(col) Table(main_audit_table.name, metadata, *cols, sqlite_autoincrement=True) metadata.create_all(db)
def remove_image(name,**kwargs): path = config.get('filesys', 'img_src') path = os.path.join(path, name) if(os.path.exists(path)): os.remove(path) else: raise HydraError("File with name (%s) does not exist!"%(name)) return True
def remove_image(name, **kwargs): path = config.get('filesys', 'img_src') path = os.path.join(path, name) if (os.path.exists(path)): os.remove(path) else: raise HydraError("File with name (%s) does not exist!" % (name)) return True
def connect(url=None): if url is None: port = config.getint('hydra_server', 'port', '8080') domain = config.get('hydra_server', 'domain', 'localhost') path = config.get('hydra_server', 'soap_path', 'soap') if path: if path[0] == '/': path = path[1:] url = 'http://%s:%s/%s?wsdl' % (domain, port, path) else: url = 'http://%s:%s?wsdl' % (domain, port) client = Client(url, plugins=[FixNamespace()]) cache = client.options.cache cache.setduration(days=10) client.add_prefix('hyd', 'soap_server.hydra_complexmodels') return client
def connect(url=None): if url is None: port = config.getint('hydra_server', 'port', '8080') domain = config.get('hydra_server', 'domain', 'localhost') path = config.get('hydra_server', 'soap_path', 'soap') if path: if path[0] == '/': path = path[1:] url = 'http://%s:%s/%s?wsdl' % (domain, port, path) else: url = 'http://%s:%s?wsdl' % (domain, port) client = Client(url, plugins=[FixNamespace()]) cache = client.options.cache cache.setduration(days=10) client.add_prefix('hyd', 'soap_server.hydra_complexmodels') return client
def remove_file(resource_type, resource_id, name, **kwargs): path = config.get('filesys', 'file_src') path = os.path.join(path, resource_type, str(resource_id), name) if (os.path.exists(path)): os.remove(path) else: raise HydraError("File with name (%s) does not exist!" % (name)) return True
def remove_file(resource_type, resource_id, name,**kwargs): path = config.get('filesys', 'file_src') path = os.path.join(path, resource_type, str(resource_id), name) if(os.path.exists(path)): os.remove(path) else: raise HydraError("File with name (%s) does not exist!"%(name)) return True
def create_sqlite_backup_db(audit_tables): """ return an inspector object """ #we always want to create a whole new DB, so delete the old one first #if it exists. try: Popen("rm %s"%(config.get('sqlite', 'backup_url')), shell=True) logging.warn("Old sqlite backup DB removed") except Exception as e: logging.warn(e) try: aux_dir = config.get('DEFAULT', 'hydra_aux_dir') os.mkdir(aux_dir) logging.warn("%s created", aux_dir) except Exception as e: logging.warn(e) try: backup_dir = config.get('db', 'export_target') os.mkdir(backup_dir) logging.warn("%s created", backup_dir) except Exception as e: logging.warn(e) db = create_engine(sqlite_engine, echo=True) db.connect() metadata = MetaData(db) for main_audit_table in audit_tables: cols = [] for c in main_audit_table.columns: col = c.copy() if col.type.python_type == Decimal: col.type = DECIMAL() cols.append(col) Table(main_audit_table.name, metadata, *cols, sqlite_autoincrement=True) metadata.create_all(db)
def login(self, username=None, password=None): """Establish a connection to the specified server. If the URL of the server is not specified as an argument of this function, the URL defined in the configuration file is used.""" # Connect token = self.client.factory.create('RequestHeader') if self.sessionid is None: if username is None: user = config.get('hydra_client', 'user') if password is None: passwd = config.get('hydra_client', 'password') login_response = self.client.service.login(user, passwd) token.user_id = login_response.user_id sessionid = login_response.sessionid token.username = user token.sessionid = sessionid self.client.set_options(soapheaders=token) return session_id
def __init__(self, url=None, sessionid=None, app_name=None): if url is None: port = config.getint('hydra_client', 'port', 80) domain = config.get('hydra_client', 'domain', '127.0.0.1') path = config.get('hydra_client', 'json_path', 'json') #The domain may or may not specify the protocol, so do a check. if domain.find('http') == -1: self.url = "http://%s:%s/%s" % (domain, port, path) else: self.url = "%s:%s/%s" % (domain, port, path) else: log.info("Using user-defined URL: %s", url) port = _get_port(url) hostname = _get_hostname(url) path = _get_path(url) protocol = _get_protocol(url) self.url = "%s://%s:%s%s/json" % (protocol, hostname, port, path) log.info("Setting URL %s", self.url) self.app_name = app_name self.session_id = sessionid
def connect(): db_url = config.get('mysqld', 'url') log.info("Connecting to database: %s", db_url) global engine engine = create_engine(db_url) maker = sessionmaker(bind=engine, autoflush=False, autocommit=False, extension=ZopeTransactionExtension()) global DBSession DBSession = scoped_session(maker) DeclarativeBase.metadata.create_all(engine)
def login(self, username=None, password=None): """Establish a connection to the specified server. If the URL of the server is not specified as an argument of this function, the URL defined in the configuration file is used.""" # Connect token = self.client.factory.create('RequestHeader') if self.sessionid is None: if username is None: user = config.get('hydra_client', 'user') if password is None: passwd = config.get('hydra_client', 'password') login_response = self.client.service.login(user, passwd) token.user_id = login_response.user_id sessionid = login_response.sessionid token.username = user token.sessionid = sessionid self.client.set_options(soapheaders=token) return session_id
def __init__(self, url=None, sessionid=None, app_name=None): if url is None: port = config.getint('hydra_client', 'port', 80) domain = config.get('hydra_client', 'domain', '127.0.0.1') path = config.get('hydra_client', 'json_path', 'json') #The domain may or may not specify the protocol, so do a check. if domain.find('http') == -1: self.url = "http://%s:%s/%s" % (domain, port, path) else: self.url = "%s:%s/%s" % (domain, port, path) else: log.info("Using user-defined URL: %s", url) port = _get_port(url) hostname = _get_hostname(url) path = _get_path(url) protocol = _get_protocol(url) self.url = "%s://%s:%s%s/json" % (protocol, hostname, port, path) log.info("Setting URL %s", self.url) self.app_name = app_name self.session_id = sessionid
def export_table_to_csv(session, table, target=None): """ @args: session so queries can be made, table so primary key columns can be determined and the audit_table to be truncated. """ if target is None: target_dir = os.path.join(config.get('db', 'export_target')) target = os.path.join(config.get('db', 'export_target'), table.name) if not os.path.exists(target_dir): os.mkdir(target_dir) if os.path.exists(target): target_file = open(target, 'r+') rs = session.query(table).all() entries_in_db = set() for r in rs: entries_in_db.add("%s" % (r.__repr__())) contents = set(target_file.read().split('\n')) new_data = entries_in_db.difference(contents) if len(new_data) > 0: target_file.write("%s\n" % (datetime.datetime.now())) target_file.write("%s\n" % ('.'.join([c.name for c in table.columns]))) for d in new_data: target_file.write("%s\n" % (d)) else: target_file = open(target, 'w') rs = session.query(table).all() target_file.write("%s\n" % (datetime.datetime.now())) target_file.write("%s\n" % ('.'.join([c.name for c in table.columns]))) for r in rs: target_file.write("%s\n" % (r.__repr__()))
def export_table_to_csv(session, table, target=None): """ @args: session so queries can be made, table so primary key columns can be determined and the audit_table to be truncated. """ if target is None: target_dir = os.path.join(config.get('db', 'export_target')) target = os.path.join(config.get('db', 'export_target'), table.name) if not os.path.exists(target_dir): os.mkdir(target_dir) if os.path.exists(target): target_file = open(target, 'r+') rs = session.query(table).all() entries_in_db = set() for r in rs: entries_in_db.add("%s"%(r.__repr__())) contents = set(target_file.read().split('\n')) new_data = entries_in_db.difference(contents) if len(new_data) > 0: target_file.write("%s\n"%(datetime.datetime.now())) target_file.write("%s\n"%('.'.join([c.name for c in table.columns]))) for d in new_data: target_file.write("%s\n"%(d)) else: target_file = open(target, 'w') rs = session.query(table).all() target_file.write("%s\n"%(datetime.datetime.now())) target_file.write("%s\n"%('.'.join([c.name for c in table.columns]))) for r in rs: target_file.write("%s\n"%(r.__repr__()))
def test_create_template_from_network(self): network = self.create_network_with_data() net_template = self.client.service.get_network_as_xml_template(network.id) assert net_template is not None template_xsd_path = config.get('templates', 'template_xsd_path') xmlschema_doc = etree.parse(template_xsd_path) xmlschema = etree.XMLSchema(xmlschema_doc) xml_tree = etree.fromstring(net_template) xmlschema.assertValid(xml_tree)
def add_file(resource_type, resource_id, name, file,**kwargs): path = config.get('filesys', 'file_src') path = os.path.join(path, resource_type) try: os.makedirs(path) except OSError: pass path = os.path.join(path, str(resource_id)) try: os.makedirs(path) except OSError: pass path = os.path.join(path, name) #The safest way to check if a file exists is to try to open #it. If the open succeeds, then throw an exception to this effect. try: f = open(path) raise HydraError("A file with this name (%s) already exists!"%(name)) except IOError: pass log.info("Path: %r" % path) if not path.startswith(path): log.critical("Could not open file: %s"%name) return False f = open(path, 'wb') # if this fails, the client will see an # # internal error. try: for data in file: f.write(data) log.debug("File written: %r" % name) f.close() except: log.critical("Error writing to file: %s", name) f.close() os.remove(name) log.debug("File removed: %r" % name) return False return True
def test_create_template_from_network(self): network = self.create_network_with_data() net_template = self.client.service.get_network_as_xml_template( network.id) assert net_template is not None template_xsd_path = config.get('templates', 'template_xsd_path') xmlschema_doc = etree.parse(template_xsd_path) xmlschema = etree.XMLSchema(xmlschema_doc) xml_tree = etree.fromstring(net_template) xmlschema.assertValid(xml_tree)
def add_file(resource_type, resource_id, name, file, **kwargs): path = config.get('filesys', 'file_src') path = os.path.join(path, resource_type) try: os.makedirs(path) except OSError: pass path = os.path.join(path, str(resource_id)) try: os.makedirs(path) except OSError: pass path = os.path.join(path, name) #The safest way to check if a file exists is to try to open #it. If the open succeeds, then throw an exception to this effect. try: f = open(path) raise HydraError("A file with this name (%s) already exists!" % (name)) except IOError: pass log.info("Path: %r" % path) if not path.startswith(path): log.critical("Could not open file: %s" % name) return False f = open(path, 'wb') # if this fails, the client will see an # # internal error. try: for data in file: f.write(data) log.debug("File written: %r" % name) f.close() except: log.critical("Error writing to file: %s", name) f.close() os.remove(name) log.debug("File removed: %r" % name) return False return True
def date_to_string(date, seasonal=False): """Convert a date to a standard string used by Hydra. The resulting string looks like this:: '2013-10-03 00:49:17.568-0400' Hydra also accepts seasonal time series (yearly recurring). If the flag ``seasonal`` is set to ``True``, this function will generate a string recognised by Hydra as seasonal time stamp. """ seasonal_key = config.get('DEFAULT', 'seasonal_key', '9999') if seasonal: FORMAT = seasonal_key+'-%m-%dT%H:%M:%S.%f' else: FORMAT = '%Y-%m-%dT%H:%M:%S.%f' return date.strftime(FORMAT)
def date_to_string(date, seasonal=False): """Convert a date to a standard string used by Hydra. The resulting string looks like this:: '2013-10-03 00:49:17.568-0400' Hydra also accepts seasonal time series (yearly recurring). If the flag ``seasonal`` is set to ``True``, this function will generate a string recognised by Hydra as seasonal time stamp. """ seasonal_key = config.get('DEFAULT', 'seasonal_key', '9999') if seasonal: FORMAT = seasonal_key + '-%m-%dT%H:%M:%S.%f' else: FORMAT = '%Y-%m-%dT%H:%M:%S.%f' return date.strftime(FORMAT)
def create_timeseries(client, ResourceAttr): #A scenario attribute is a piece of data associated #with a resource attribute. #[[[1, 2, "hello"], [5, 4, 6]], [[10, 20, 30], [40, 50, 60]]] fmt = config.get('DEFAULT', 'datetime_format', "%Y-%m-%dT%H:%M:%S.%f000Z") t1 = datetime.datetime.now() t2 = t1 + datetime.timedelta(hours=1) t3 = t1 + datetime.timedelta(hours=2) val_1 = [[[1, 2, "hello"], [5, 4, 6]], [[10, 20, 30], [40, 50, 60]], [[9, 8, 7], [6, 5, 4]]] val_2 = [1.0, 2.0, 3.0] val_3 = [3.0, None, None] ts_val = { "index": { t1.strftime(fmt): val_1, t2.strftime(fmt): val_2, t3.strftime(fmt): val_3 } } metadata_array = json.dumps({'created_by': 'Test user'}) dataset = dict( id=None, type='timeseries', name='my time series', unit='cm^3', dimension='Volume', hidden='N', value=json.dumps(ts_val), metadata=metadata_array, ) scenario_attr = dict( attr_id=ResourceAttr['attr_id'], resource_attr_id=ResourceAttr['id'], value=dataset, ) return scenario_attr
def validate_plugin_xml(plugin_xml_file_path): log.info('Validating plugin xml file (%s).' % plugin_xml_file_path) try: with open(plugin_xml_file_path) as f: plugin_xml = f.read() except: raise HydraPluginError("Couldn't find plugin.xml.") try: plugin_xsd_path = os.path.expanduser(config.get('plugin', 'plugin_xsd_path')) log.info("Plugin Input xsd: %s", plugin_xsd_path) xmlschema_doc = etree.parse(plugin_xsd_path) xmlschema = etree.XMLSchema(xmlschema_doc) xml_tree = etree.fromstring(plugin_xml) except XMLSyntaxError, e: raise HydraPluginError("There is an error in your XML syntax: %s" % e)
def check_plugin_status(plugin_name, pid,**kwargs): home = os.path.expanduser('~') log_dir = config.get('plugin', 'result_file') log_file = os.path.join(home, log_dir, plugin_name) try: f = open(log_file, 'r') file_text = f.read() pid_index = file_text.find("%%%s%%"%(pid)) if pid_index < 0: return "No log found for PID %s in %s"%(pid, plugin_name) split_string = file_text.split("%%%s%%"%(pid)) return split_string[1] except IOError as e: return "No log file found for %s in plugin %s Error was: %s"%(pid, plugin_name, e)
def parse_timeseries(self, timeseries_value): """ Convert a hobbes timeseries to a hydra timeseries """ timeformat = config.get('DEFAULT', 'datetime_format') val = {} for timeval in timeseries_value[1:]: time = timeval[0] split = time.split('-') d = datetime(year=int(split[0]), month=int(split[1]), day=int(split[2])) tstime = datetime.strftime(d, timeformat) val[tstime] = float(timeval[1]) return {"idx1": val}
def get_image(name,**kwargs): path = config.get('filesys', 'img_src') path = os.path.join(path, name) #The safest way to check if a file exists is to try to open #it. If the open succeeds, then throw an exception to this effect. try: f = open(path, 'rb') except IOError: raise HydraError("File with name (%s) does not exist!"%(name)) #read the contents of the file imageFile = f.read() #encode the contents of the file as a byte array #encodedFile = base64.b64encode(imageFile) return imageFile
def get_image(name, **kwargs): path = config.get('filesys', 'img_src') path = os.path.join(path, name) #The safest way to check if a file exists is to try to open #it. If the open succeeds, then throw an exception to this effect. try: f = open(path, 'rb') except IOError: raise HydraError("File with name (%s) does not exist!" % (name)) #read the contents of the file imageFile = f.read() #encode the contents of the file as a byte array #encodedFile = base64.b64encode(imageFile) return imageFile
def parse_timeseries(self, timeseries_value): """ Convert a hobbes timeseries to a hydra timeseries """ timeformat = config.get('DEFAULT', 'datetime_format') val = {} for timeval in timeseries_value[1:]: time = timeval[0] split = time.split('-') d = datetime(year=int(split[0]), month=int(split[1]), day=int(split[2])) tstime = datetime.strftime(d, timeformat) val[tstime] = float(timeval[1]) return {"idx1": val}
def setUp(self): logging.getLogger('suds').setLevel(logging.ERROR) logging.getLogger('suds.client').setLevel(logging.CRITICAL) logging.getLogger('suds.metrics').setLevel(logging.CRITICAL) # Clear SUDS cache: #shutil.rmtree(os.path.join(tmp(), 'suds'), True) global CLIENT if CLIENT is None: CLIENT = util.connect(self.url) self.client = CLIENT self.login('root', '') self.create_user("UserA") self.create_user("UserB") self.create_user("UserC") self.project_id = self.create_project().id self.fmt = config.get('DEFAULT', 'datetime_format', "%Y-%m-%dT%H:%M:%S.%f000Z")
def get_file(resource_type, resource_id, name,**kwargs): path = config.get('filesys', 'file_src') path = os.path.join(path, resource_type, str(resource_id), name) #The safest way to check if a file exists is to try to open #it. If the open succeeds, then throw an exception to this effect. try: f = open(path, 'rb') except IOError: raise HydraError("File with name (%s) does not exist!"%(name)) #read the contents of the file file_to_send = f.read() f.close() #encode the contents of the file as a byte array #encodedFile = base64.b64encode(file_to_send) return file_to_send
def setUp(self): logging.getLogger('suds').setLevel(logging.ERROR) logging.getLogger('suds.client').setLevel(logging.CRITICAL) logging.getLogger('suds.metrics').setLevel(logging.CRITICAL) # Clear SUDS cache: #shutil.rmtree(os.path.join(tmp(), 'suds'), True) global CLIENT if CLIENT is None: CLIENT = util.connect(self.url) self.client = CLIENT self.login('root', '') self.create_user("UserA") self.create_user("UserB") self.create_user("UserC") self.project_id = self.create_project().id self.fmt = config.get('DEFAULT', 'datetime_format', "%Y-%m-%dT%H:%M:%S.%f000Z")
def get_file(resource_type, resource_id, name, **kwargs): path = config.get('filesys', 'file_src') path = os.path.join(path, resource_type, str(resource_id), name) #The safest way to check if a file exists is to try to open #it. If the open succeeds, then throw an exception to this effect. try: f = open(path, 'rb') except IOError: raise HydraError("File with name (%s) does not exist!" % (name)) #read the contents of the file file_to_send = f.read() f.close() #encode the contents of the file as a byte array #encodedFile = base64.b64encode(file_to_send) return file_to_send
def run_plugin(plugin,**kwargs): """ Run a plugin """ args = [sys.executable] #Get plugin executable home = os.path.expanduser('~') path_to_plugin = os.path.join(home, 'svn/HYDRA/HydraPlugins', plugin.location) args.append(path_to_plugin) #Parse plugin arguments into a string plugin_params = " " for p in plugin.params: param = "--%s=%s "%(p.name, p.value) args.append("--%s"%p.name) args.append(p.value) plugin_params = plugin_params + param log_dir = config.get('plugin', 'result_file') log_file = os.path.join(home, log_dir, plugin.name) #this reads all the logs so far. We're not interested in them #Everything after this is new content to the file try: f = open(log_file, 'r') f.read() except: f = open(log_file, 'w') f.close() f = open(log_file, 'r') pid = subprocess.Popen(args).pid #run plugin #os.system("%s %s"%(path_to_plugin, plugin_params)) log.info("Process started! PID: %s", pid) return str(pid)
def create_timeseries(client, ResourceAttr): #A scenario attribute is a piece of data associated #with a resource attribute. #[[[1, 2, "hello"], [5, 4, 6]], [[10, 20, 30], [40, 50, 60]]] fmt = config.get('DEFAULT', 'datetime_format', "%Y-%m-%dT%H:%M:%S.%f000Z") t1 = datetime.datetime.now() t2 = t1+datetime.timedelta(hours=1) t3 = t1+datetime.timedelta(hours=2) val_1 = [[[1, 2, "hello"], [5, 4, 6]], [[10, 20, 30], [40, 50, 60]], [[9, 8, 7],[6, 5, 4]]] val_2 = [1.0, 2.0, 3.0] val_3 = [3.0, None, None] ts_val = {"index": {t1.strftime(fmt): val_1, t2.strftime(fmt): val_2, t3.strftime(fmt): val_3}} metadata_array = json.dumps({'created_by': 'Test user'}) dataset = dict( id=None, type = 'timeseries', name = 'my time series', unit = 'cm^3', dimension = 'Volume', hidden = 'N', value = json.dumps(ts_val), metadata = metadata_array, ) scenario_attr = dict( attr_id = ResourceAttr['attr_id'], resource_attr_id = ResourceAttr['id'], value = dataset, ) return scenario_attr
def xsd_validate(template_file): """ Validate a template against the xsd. Return the xml tree if successful. """ with open(template_file) as f: xml_template = f.read() template_xsd_path = os.path.expanduser(config.get('templates', 'template_xsd_path')) log.info("Template xsd: %s", template_xsd_path) xmlschema_doc = etree.parse(template_xsd_path) xmlschema = etree.XMLSchema(xmlschema_doc) xml_tree = etree.fromstring(xml_template) try: xmlschema.assertValid(xml_tree) except etree.DocumentInvalid as e: raise HydraPluginError('Template validation failed: ' + e.message) log.info("Template XSD validation successful.") return xml_tree
def get_val(dataset, timestamp=None): """ Turn the string value of a dataset into an appropriate value, be it a decimal value, array or time series. If a timestamp is passed to this function, return the values appropriate to the requested times. If the timestamp is *before* the start of the timeseries data, return None If the timestamp is *after* the end of the timeseries data, return the last value. The raw flag indicates whether timeseries should be returned raw -- exactly as they are in the DB (a timeseries being a list of timeseries data objects, for example) or as a single python dictionary """ if dataset.data_type == 'array': try: return json.loads(dataset.value) except ValueError: #Didn't work? Maybe because it was compressed. val = zlib.decompress(dataset.value) return json.loads(val) elif dataset.data_type == 'descriptor': return str(dataset.value) elif dataset.data_type == 'scalar': return Decimal(str(dataset.value)) elif dataset.data_type == 'timeseries': try: #The data might be compressed. val = zlib.decompress(dataset.value) except Exception as e: val = dataset.value seasonal_year = config.get('DEFAULT', 'seasonal_year', '1678') seasonal_key = config.get('DEFAULT', 'seasonal_key', '9999') val = dataset.value.replace(seasonal_key, seasonal_year) timeseries = pd.read_json(val) if timestamp is None: return timeseries else: try: idx = timeseries.index #Seasonal timeseries are stored in the year #1678 (the lowest year pandas allows for valid times). #Therefore if the timeseries is seasonal, #the request must be a seasonal request, not a #standard request if type(idx) == pd.DatetimeIndex: if set(idx.year) == set([int(seasonal_year)]): if isinstance(timestamp, list): seasonal_timestamp = [] for t in timestamp: t_1900 = t.replace(year=int(seasonal_year)) seasonal_timestamp.append(t_1900) timestamp = seasonal_timestamp else: timestamp = [ timestamp.replace(year=int(seasonal_year)) ] pandas_ts = timeseries.reindex(timestamp, method='ffill') #If there are no values at all, just return None if len(pandas_ts.dropna()) == 0: return None #Replace all numpy NAN values with None pandas_ts = pandas_ts.where(pandas_ts.notnull(), None) val_is_array = False if len(pandas_ts.columns) > 1: val_is_array = True if val_is_array: if type(timestamp) is list and len(timestamp) == 1: ret_val = pandas_ts.loc[timestamp[0]].values.tolist() else: ret_val = pandas_ts.loc[timestamp].values.tolist() else: col_name = pandas_ts.loc[timestamp].columns[0] if type(timestamp) is list and len(timestamp) == 1: ret_val = pandas_ts.loc[timestamp[0]].loc[col_name] else: ret_val = pandas_ts.loc[timestamp][ col_name].values.tolist() return ret_val except Exception as e: log.critical("Unable to retrive data. Check timestamps.") log.critical(e)
def run_server(self, port=None): log.info("home_dir %s", config.get('DEFAULT', 'home_dir')) log.info("hydra_base_dir %s", config.get('DEFAULT', 'hydra_base_dir')) log.info("common_app_data_folder %s", config.get('DEFAULT', 'common_app_data_folder')) log.info("win_common_documents %s", config.get('DEFAULT', 'win_common_documents')) log.info("sqlite url %s", config.get('mysqld', 'url')) log.info("layout_xsd_path %s", config.get('hydra_server', 'layout_xsd_path')) log.info("default_directory %s", config.get('plugin', 'default_directory')) log.info("result_file %s", config.get('plugin', 'result_file')) log.info("plugin_xsd_path %s", config.get('plugin', 'plugin_xsd_path')) log.info("log_config_path %s", config.get('logging_conf', 'log_config_path')) if port is None: port = config.getint('hydra_server', 'port', 8080) domain = config.get('hydra_server', 'domain', '127.0.0.1') check_port_available(domain, port) spyne.const.xml_ns.DEFAULT_NS = 'soap_server.hydra_complexmodels' cp_wsgi_application = Server((domain, port), application, numthreads=10) log.info("listening to http://%s:%s", domain, port) log.info("wsdl is at: http://%s:%s/soap/?wsdl", domain, port) try: cp_wsgi_application.start() except KeyboardInterrupt: cp_wsgi_application.stop()
result = sock.connect_ex((domain, port)) if result == 0: raise HydraError("Something else is already running on port %s" % port) else: log.info("Port %s is available", port) # These few lines are needed by mod_wsgi to turn the server into a WSGI script. s = HydraServer() soap_application = s.create_soap_application() json_application = s.create_json_application() jsonp_application = s.create_jsonp_application() http_application = s.create_http_application() apps = { config.get('hydra_server', 'soap_path', 'soap'): soap_application, config.get('hydra_server', 'json_path', 'json'): json_application, 'jsonp': jsonp_application, config.get('hydra_server', 'http_path', 'http'): http_application, } if ui_app is not None: apps[''] = ui_app wsgi_application = WsgiMounter(apps) for server in wsgi_application.mounts.values(): server.max_content_length = 100 * 0x100000 # 10 MB # Configure the SessionMiddleware session_opts = {
Integer,\ String,\ TIMESTAMP,\ text,\ DDL from sqlalchemy.engine import reflection import logging from mysql.connector.connection import MySQLConnection from HydraLib import config from subprocess import Popen from sqlalchemy.types import DECIMAL, NUMERIC from sqlalchemy.dialects.mysql.base import DOUBLE from decimal import Decimal import os engine_name = config.get('mysqld', 'url') sqlite_engine = "sqlite:///%s" % (config.get('sqlite', 'backup_url')) def connect(): """ return an inspector object """ # MySQLConnection.get_characterset_info = MySQLConnection.get_charset db = create_engine(engine_name, echo=True) db.connect() return db
def search_datasets(dataset_id=None, dataset_name=None, collection_name=None, data_type=None, dimension=None, unit=None, scenario_id=None, metadata_name=None, metadata_val=None, attr_id=None, type_id=None, unconnected=None, inc_metadata='N', inc_val='N', page_start=0, page_size=2000, **kwargs): """ Get multiple datasets, based on several filters. If all filters are set to None, all datasets in the DB (that the user is allowe to see) will be returned. """ log.info("Searching datasets: \ndatset_id: %s,\n" "datset_name: %s,\n" "collection_name: %s,\n" "data_type: %s,\n" "dimension: %s,\n" "unit: %s,\n" "scenario_id: %s,\n" "metadata_name: %s,\n" "metadata_val: %s,\n" "attr_id: %s,\n" "type_id: %s,\n" "unconnected: %s,\n" "inc_metadata: %s,\n" "inc_val: %s,\n" "page_start: %s,\n" "page_size: %s" % (dataset_id, dataset_name, collection_name, data_type, dimension, unit, scenario_id, metadata_name, metadata_val, attr_id, type_id, unconnected, inc_metadata, inc_val, page_start, page_size)) if page_size is None: page_size = config.get('SEARCH', 'page_size', 2000) user_id = int(kwargs.get('user_id')) dataset_qry = DBSession.query(Dataset.dataset_id, Dataset.data_type, Dataset.data_units, Dataset.data_dimen, Dataset.data_name, Dataset.hidden, Dataset.cr_date, Dataset.created_by, DatasetOwner.user_id, null().label('metadata'), Dataset.start_time, Dataset.frequency, Dataset.value) #Dataset ID is unique, so there's no point using the other filters. #Only use other filters if the datset ID is not specified. if dataset_id is not None: dataset_qry = dataset_qry.filter(Dataset.dataset_id == dataset_id) else: if dataset_name is not None: dataset_qry = dataset_qry.filter( func.lower(Dataset.data_name).like("%%%s%%" % dataset_name.lower())) if collection_name is not None: dc = aliased(DatasetCollection) dci = aliased(DatasetCollectionItem) dataset_qry = dataset_qry.join( dc, func.lower(dc.collection_name).like( "%%%s%%" % collection_name.lower())).join( dci, and_(dci.collection_id == dc.collection_id, dci.dataset_id == Dataset.dataset_id)) if data_type is not None: dataset_qry = dataset_qry.filter( func.lower(Dataset.data_type) == data_type.lower()) #null is a valid dimension, so we need a way for the searcher #to specify that they want to search for datasets with a null dimension #rather than ignoring the dimension in the filter. We use 'null' to do this. if dimension is not None: dimension = dimension.lower() if dimension == 'null': dimension = None if dimension is not None: dataset_qry = dataset_qry.filter( func.lower(Dataset.data_dimen) == dimension) else: dataset_qry = dataset_qry.filter( Dataset.data_dimen == dimension) #null is a valid unit, so we need a way for the searcher #to specify that they want to search for datasets with a null unit #rather than ignoring the unit. We use 'null' to do this. if unit is not None: unit = unit.lower() if unit == 'null': unit = None if unit is not None: dataset_qry = dataset_qry.filter( func.lower(Dataset.data_units) == unit) else: dataset_qry = dataset_qry.filter(Dataset.data_units == unit) if scenario_id is not None: dataset_qry = dataset_qry.join( ResourceScenario, and_(ResourceScenario.dataset_id == Dataset.dataset_id, ResourceScenario.scenario_id == scenario_id)) if attr_id is not None: dataset_qry = dataset_qry.join( ResourceScenario, ResourceScenario.dataset_id == Dataset.dataset_id).join( ResourceAttr, and_( ResourceAttr.resource_attr_id == ResourceScenario.resource_attr_id, ResourceAttr.attr_id == attr_id)) if type_id is not None: dataset_qry = dataset_qry.join( ResourceScenario, ResourceScenario.dataset_id == Dataset.dataset_id).join( ResourceAttr, ResourceAttr.resource_attr_id == ResourceScenario.resource_attr_id).join( TypeAttr, and_(TypeAttr.attr_id == ResourceAttr.attr_id, TypeAttr.type_id == type_id)) if unconnected == 'Y': stmt = DBSession.query( distinct(ResourceScenario.dataset_id).label('dataset_id'), literal_column("0").label('col')).subquery() dataset_qry = dataset_qry.outerjoin( stmt, stmt.c.dataset_id == Dataset.dataset_id) dataset_qry = dataset_qry.filter(stmt.c.col == None) elif unconnected == 'N': #The dataset has to be connected to something stmt = DBSession.query( distinct(ResourceScenario.dataset_id).label('dataset_id'), literal_column("0").label('col')).subquery() dataset_qry = dataset_qry.join( stmt, stmt.c.dataset_id == Dataset.dataset_id) if metadata_name is not None and metadata_val is not None: dataset_qry = dataset_qry.join( Metadata, and_( Metadata.dataset_id == Dataset.dataset_id, func.lower(Metadata.metadata_name).like( "%%%s%%" % metadata_name.lower()), func.lower(Metadata.metadata_val).like( "%%%s%%" % metadata_val.lower()))) elif metadata_name is not None and metadata_val is None: dataset_qry = dataset_qry.join( Metadata, and_( Metadata.dataset_id == Dataset.dataset_id, func.lower(Metadata.metadata_name).like( "%%%s%%" % metadata_name.lower()))) elif metadata_name is None and metadata_val is not None: dataset_qry = dataset_qry.join( Metadata, and_( Metadata.dataset_id == Dataset.dataset_id, func.lower(Metadata.metadata_val).like( "%%%s%%" % metadata_val.lower()))) #All datasets must be joined on dataset owner so only datasets that the #user can see are retrieved. dataset_qry = dataset_qry.outerjoin( DatasetOwner, and_(DatasetOwner.dataset_id == Dataset.dataset_id, DatasetOwner.user_id == user_id)) dataset_qry = dataset_qry.filter( or_(Dataset.hidden == 'N', and_(DatasetOwner.user_id is not None, Dataset.hidden == 'Y'))) log.info(str(dataset_qry)) datasets = dataset_qry.all() log.info("Retrieved %s datasets", len(datasets)) #page the datasets: if page_start + page_size > len(datasets): page_end = None else: page_end = page_start + page_size datasets = datasets[page_start:page_end] log.info("Datasets paged from result %s to %s", page_start, page_end) datasets_to_return = [] for dataset_row in datasets: dataset_dict = dataset_row._asdict() if inc_val == 'N': dataset_dict['value'] = None else: #convert the value row into a string as it is returned as a binary if dataset_row.value is not None: dataset_dict['value'] = str(dataset_row.value) if inc_metadata == 'Y': metadata = DBSession.query(Metadata).filter( Metadata.dataset_id == dataset_row.dataset_id).all() dataset_dict['metadata'] = metadata else: dataset_dict['metadata'] = [] dataset = namedtuple('Dataset', dataset_dict.keys())(**dataset_dict) datasets_to_return.append(dataset) return datasets_to_return
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) result = sock.connect_ex((domain,port)) if result == 0: raise HydraError("Something else is already running on port %s"%port) else: log.info("Port %s is available", port) # These few lines are needed by mod_wsgi to turn the server into a WSGI script. s = HydraServer() soap_application = s.create_soap_application() json_application = s.create_json_application() jsonp_application = s.create_jsonp_application() http_application = s.create_http_application() apps = { config.get('hydra_server', 'soap_path', 'soap'): soap_application, config.get('hydra_server', 'json_path', 'json'): json_application, 'jsonp': jsonp_application, config.get('hydra_server', 'http_path', 'http'): http_application, } if ui_app is not None: apps[''] = ui_app wsgi_application = WsgiMounter(apps) for server in wsgi_application.mounts.values(): server.max_content_length = 100 * 0x100000 # 10 MB # Configure the SessionMiddleware session_opts = {
def run_server(self, port=None): log.info("home_dir %s",config.get('DEFAULT', 'home_dir')) log.info("hydra_base_dir %s",config.get('DEFAULT', 'hydra_base_dir')) log.info("common_app_data_folder %s",config.get('DEFAULT', 'common_app_data_folder')) log.info("win_common_documents %s",config.get('DEFAULT', 'win_common_documents')) log.info("sqlite url %s",config.get('mysqld', 'url')) log.info("layout_xsd_path %s",config.get('hydra_server', 'layout_xsd_path')) log.info("default_directory %s",config.get('plugin', 'default_directory')) log.info("result_file %s",config.get('plugin', 'result_file')) log.info("plugin_xsd_path %s",config.get('plugin', 'plugin_xsd_path')) log.info("log_config_path %s",config.get('logging_conf', 'log_config_path')) if port is None: port = config.getint('hydra_server', 'port', 8080) domain = config.get('hydra_server', 'domain', '127.0.0.1') check_port_available(domain, port) spyne.const.xml_ns.DEFAULT_NS = 'soap_server.hydra_complexmodels' cp_wsgi_application = Server((domain,port), application, numthreads=10) log.info("listening to http://%s:%s", domain, port) log.info("wsdl is at: http://%s:%s/soap/?wsdl", domain, port) try: cp_wsgi_application.start() except KeyboardInterrupt: cp_wsgi_application.stop()
def get_val(dataset, timestamp=None): """ Turn the string value of a dataset into an appropriate value, be it a decimal value, array or time series. If a timestamp is passed to this function, return the values appropriate to the requested times. If the timestamp is *before* the start of the timeseries data, return None If the timestamp is *after* the end of the timeseries data, return the last value. The raw flag indicates whether timeseries should be returned raw -- exactly as they are in the DB (a timeseries being a list of timeseries data objects, for example) or as a single python dictionary """ if dataset.data_type == 'array': try: return json.loads(dataset.value) except ValueError: #Didn't work? Maybe because it was compressed. val = zlib.decompress(dataset.value) return json.loads(val) elif dataset.data_type == 'descriptor': return str(dataset.value) elif dataset.data_type == 'scalar': return Decimal(str(dataset.value)) elif dataset.data_type == 'timeseries': try: #The data might be compressed. val = zlib.decompress(dataset.value) except Exception as e: val = dataset.value seasonal_year = config.get('DEFAULT','seasonal_year', '1678') seasonal_key = config.get('DEFAULT', 'seasonal_key', '9999') val = dataset.value.replace(seasonal_key, seasonal_year) timeseries = pd.read_json(val) if timestamp is None: return timeseries else: try: idx = timeseries.index #Seasonal timeseries are stored in the year #1678 (the lowest year pandas allows for valid times). #Therefore if the timeseries is seasonal, #the request must be a seasonal request, not a #standard request if type(idx) == pd.DatetimeIndex: if set(idx.year) == set([int(seasonal_year)]): if isinstance(timestamp, list): seasonal_timestamp = [] for t in timestamp: t_1900 = t.replace(year=int(seasonal_year)) seasonal_timestamp.append(t_1900) timestamp = seasonal_timestamp else: timestamp = [timestamp.replace(year=int(seasonal_year))] pandas_ts = timeseries.reindex(timestamp, method='ffill') #If there are no values at all, just return None if len(pandas_ts.dropna()) == 0: return None #Replace all numpy NAN values with None pandas_ts = pandas_ts.where(pandas_ts.notnull(), None) val_is_array = False if len(pandas_ts.columns) > 1: val_is_array = True if val_is_array: if type(timestamp) is list and len(timestamp) == 1: ret_val = pandas_ts.loc[timestamp[0]].values.tolist() else: ret_val = pandas_ts.loc[timestamp].values.tolist() else: col_name = pandas_ts.loc[timestamp].columns[0] if type(timestamp) is list and len(timestamp) == 1: ret_val = pandas_ts.loc[timestamp[0]].loc[col_name] else: ret_val = pandas_ts.loc[timestamp][col_name].values.tolist() return ret_val except Exception as e: log.critical("Unable to retrive data. Check timestamps.") log.critical(e)
def search_datasets(dataset_id=None, dataset_name=None, collection_name=None, data_type=None, dimension=None, unit=None, scenario_id=None, metadata_name=None, metadata_val=None, attr_id = None, type_id = None, unconnected = None, inc_metadata='N', inc_val = 'N', page_start = 0, page_size = 2000, **kwargs): """ Get multiple datasets, based on several filters. If all filters are set to None, all datasets in the DB (that the user is allowe to see) will be returned. """ log.info("Searching datasets: \ndatset_id: %s,\n" "datset_name: %s,\n" "collection_name: %s,\n" "data_type: %s,\n" "dimension: %s,\n" "unit: %s,\n" "scenario_id: %s,\n" "metadata_name: %s,\n" "metadata_val: %s,\n" "attr_id: %s,\n" "type_id: %s,\n" "unconnected: %s,\n" "inc_metadata: %s,\n" "inc_val: %s,\n" "page_start: %s,\n" "page_size: %s" % (dataset_id, dataset_name, collection_name, data_type, dimension, unit, scenario_id, metadata_name, metadata_val, attr_id, type_id, unconnected, inc_metadata, inc_val, page_start, page_size)) if page_size is None: page_size = config.get('SEARCH', 'page_size', 2000) user_id = int(kwargs.get('user_id')) dataset_qry = DBSession.query(Dataset.dataset_id, Dataset.data_type, Dataset.data_units, Dataset.data_dimen, Dataset.data_name, Dataset.hidden, Dataset.cr_date, Dataset.created_by, DatasetOwner.user_id, null().label('metadata'), Dataset.start_time, Dataset.frequency, Dataset.value ) #Dataset ID is unique, so there's no point using the other filters. #Only use other filters if the datset ID is not specified. if dataset_id is not None: dataset_qry = dataset_qry.filter( Dataset.dataset_id==dataset_id) else: if dataset_name is not None: dataset_qry = dataset_qry.filter( func.lower(Dataset.data_name).like("%%%s%%"%dataset_name.lower()) ) if collection_name is not None: dc = aliased(DatasetCollection) dci = aliased(DatasetCollectionItem) dataset_qry = dataset_qry.join(dc, func.lower(dc.collection_name).like("%%%s%%"%collection_name.lower()) ).join(dci,and_( dci.collection_id == dc.collection_id, dci.dataset_id == Dataset.dataset_id)) if data_type is not None: dataset_qry = dataset_qry.filter( func.lower(Dataset.data_type) == data_type.lower()) #null is a valid dimension, so we need a way for the searcher #to specify that they want to search for datasets with a null dimension #rather than ignoring the dimension in the filter. We use 'null' to do this. if dimension is not None: dimension = dimension.lower() if dimension == 'null': dimension = None if dimension is not None: dataset_qry = dataset_qry.filter( func.lower(Dataset.data_dimen) == dimension) else: dataset_qry = dataset_qry.filter( Dataset.data_dimen == dimension) #null is a valid unit, so we need a way for the searcher #to specify that they want to search for datasets with a null unit #rather than ignoring the unit. We use 'null' to do this. if unit is not None: unit = unit.lower() if unit == 'null': unit = None if unit is not None: dataset_qry = dataset_qry.filter( func.lower(Dataset.data_units) == unit) else: dataset_qry = dataset_qry.filter( Dataset.data_units == unit) if scenario_id is not None: dataset_qry = dataset_qry.join(ResourceScenario, and_(ResourceScenario.dataset_id == Dataset.dataset_id, ResourceScenario.scenario_id == scenario_id)) if attr_id is not None: dataset_qry = dataset_qry.join( ResourceScenario, ResourceScenario.dataset_id == Dataset.dataset_id).join( ResourceAttr, and_(ResourceAttr.resource_attr_id==ResourceScenario.resource_attr_id, ResourceAttr.attr_id==attr_id)) if type_id is not None: dataset_qry = dataset_qry.join( ResourceScenario, ResourceScenario.dataset_id == Dataset.dataset_id).join( ResourceAttr, ResourceAttr.resource_attr_id==ResourceScenario.resource_attr_id).join( TypeAttr, and_(TypeAttr.attr_id==ResourceAttr.attr_id, TypeAttr.type_id==type_id)) if unconnected == 'Y': stmt = DBSession.query(distinct(ResourceScenario.dataset_id).label('dataset_id'), literal_column("0").label('col')).subquery() dataset_qry = dataset_qry.outerjoin( stmt, stmt.c.dataset_id == Dataset.dataset_id) dataset_qry = dataset_qry.filter(stmt.c.col == None) elif unconnected == 'N': #The dataset has to be connected to something stmt = DBSession.query(distinct(ResourceScenario.dataset_id).label('dataset_id'), literal_column("0").label('col')).subquery() dataset_qry = dataset_qry.join( stmt, stmt.c.dataset_id == Dataset.dataset_id) if metadata_name is not None and metadata_val is not None: dataset_qry = dataset_qry.join(Metadata, and_(Metadata.dataset_id == Dataset.dataset_id, func.lower(Metadata.metadata_name).like("%%%s%%"%metadata_name.lower()), func.lower(Metadata.metadata_val).like("%%%s%%"%metadata_val.lower()))) elif metadata_name is not None and metadata_val is None: dataset_qry = dataset_qry.join(Metadata, and_(Metadata.dataset_id == Dataset.dataset_id, func.lower(Metadata.metadata_name).like("%%%s%%"%metadata_name.lower()))) elif metadata_name is None and metadata_val is not None: dataset_qry = dataset_qry.join(Metadata, and_(Metadata.dataset_id == Dataset.dataset_id, func.lower(Metadata.metadata_val).like("%%%s%%"%metadata_val.lower()))) #All datasets must be joined on dataset owner so only datasets that the #user can see are retrieved. dataset_qry = dataset_qry.outerjoin(DatasetOwner, and_(DatasetOwner.dataset_id==Dataset.dataset_id, DatasetOwner.user_id==user_id)) dataset_qry = dataset_qry.filter(or_(Dataset.hidden=='N', and_(DatasetOwner.user_id is not None, Dataset.hidden=='Y'))) log.info(str(dataset_qry)) datasets = dataset_qry.all() log.info("Retrieved %s datasets", len(datasets)) #page the datasets: if page_start + page_size > len(datasets): page_end = None else: page_end = page_start + page_size datasets = datasets[page_start:page_end] log.info("Datasets paged from result %s to %s", page_start, page_end) datasets_to_return = [] for dataset_row in datasets: dataset_dict = dataset_row._asdict() if inc_val == 'N': dataset_dict['value'] = None else: #convert the value row into a string as it is returned as a binary if dataset_row.value is not None: dataset_dict['value'] = str(dataset_row.value) if inc_metadata=='Y': metadata = DBSession.query(Metadata).filter(Metadata.dataset_id==dataset_row.dataset_id).all() dataset_dict['metadata'] = metadata else: dataset_dict['metadata'] = [] dataset = namedtuple('Dataset', dataset_dict.keys())(**dataset_dict) datasets_to_return.append(dataset) return datasets_to_return
Integer,\ String,\ TIMESTAMP,\ text,\ DDL from sqlalchemy.engine import reflection import logging from mysql.connector.connection import MySQLConnection from HydraLib import config from subprocess import Popen from sqlalchemy.types import DECIMAL, NUMERIC from sqlalchemy.dialects.mysql.base import DOUBLE from decimal import Decimal import os engine_name = config.get('mysqld', 'url') sqlite_engine = "sqlite:///%s"%(config.get('sqlite', 'backup_url')) def connect(): """ return an inspector object """ # MySQLConnection.get_characterset_info = MySQLConnection.get_charset db = create_engine(engine_name, echo=True) db.connect() return db def create_sqlite_backup_db(audit_tables): """
def guess_timefmt(datestr): """ Try to guess the format a date is written in. The following formats are supported: ================= ============== =============== Format Example Python format ----------------- -------------- --------------- ``YYYY-MM-DD`` 2002-04-21 %Y-%m-%d ``YYYY.MM.DD`` 2002.04.21 %Y.%m.%d ``YYYY MM DD`` 2002 04 21 %Y %m %d ``DD-MM-YYYY`` 21-04-2002 %d-%m-%Y ``DD.MM.YYYY`` 21.04.2002 %d.%m.%Y ``DD MM YYYY`` 21 04 2002 %d %m %Y ``DD/MM/YYYY`` 21/04/2002 %d/%m/%Y ================= ============== =============== These formats can also be used for seasonal (yearly recurring) time series. The year needs to be replaced by ``9999`` or another configurable year representing the seasonal year.. The following formats are recognised depending on your locale setting. There is no guarantee that this will work. ================= ============== =============== Format Example Python format ----------------- -------------- --------------- ``DD-mmm-YYYY`` 21-Apr-2002 %d-%b-%Y ``DD.mmm.YYYY`` 21.Apr.2002 %d.%b.%Y ``DD mmm YYYY`` 21 Apr 2002 %d %b %Y ``mmm DD YYYY`` Apr 21 2002 %b %d %Y ``Mmmmm DD YYYY`` April 21 2002 %B %d %Y ================= ============== =============== .. note:: - The time needs to follow this definition without exception: `%H:%M:%S.%f`. A complete date and time should therefore look like this:: 2002-04-21 15:29:37.522 - Be aware that in a file with comma separated values you should not use a date format that contains commas. """ seasonal_key = str(config.get('DEFAULT', 'seasonal_key', '9999')) #replace 'T' with space to handle ISO times. if datestr.find('T') > 0: dt_delim = 'T' else: dt_delim = ' ' delimiters = ['-', '.', ' ', '/'] formatstrings = [['%Y', '%m', '%d'], ['%d', '%m', '%Y'], ['%d', '%b', '%Y'], ['XXXX', '%m', '%d'], ['%d', '%m', 'XXXX'], ['%d', '%b', 'XXXX'], [seasonal_key, '%m', '%d'], ['%d', '%m', seasonal_key], ['%d', '%b', seasonal_key]] timeformats = [ '%H:%M:%S.%f', '%H:%M:%S', '%H:%M', '%H:%M:%S.%f000Z', '%H:%M:%S.%fZ' ] # Check if a time is indicated or not for timefmt in timeformats: try: datetime.strptime(datestr.split(dt_delim)[-1].strip(), timefmt) usetime = True break except ValueError: usetime = False # Check the simple ones: for fmt in formatstrings: for delim in delimiters: datefmt = fmt[0] + delim + fmt[1] + delim + fmt[2] if usetime: for timefmt in timeformats: complfmt = datefmt + dt_delim + timefmt try: datetime.strptime(datestr, complfmt) return complfmt except ValueError: pass else: try: datetime.strptime(datestr, datefmt) return datefmt except ValueError: pass # Check for other formats: custom_formats = [ '%d/%m/%Y', '%b %d %Y', '%B %d %Y', '%d/%m/XXXX', '%d/%m/' + seasonal_key ] for fmt in custom_formats: if usetime: for timefmt in timeformats: complfmt = fmt + dt_delim + timefmt try: datetime.strptime(datestr, complfmt) return complfmt except ValueError: pass else: try: datetime.strptime(datestr, fmt) return fmt except ValueError: pass return None
def guess_timefmt(datestr): """ Try to guess the format a date is written in. The following formats are supported: ================= ============== =============== Format Example Python format ----------------- -------------- --------------- ``YYYY-MM-DD`` 2002-04-21 %Y-%m-%d ``YYYY.MM.DD`` 2002.04.21 %Y.%m.%d ``YYYY MM DD`` 2002 04 21 %Y %m %d ``DD-MM-YYYY`` 21-04-2002 %d-%m-%Y ``DD.MM.YYYY`` 21.04.2002 %d.%m.%Y ``DD MM YYYY`` 21 04 2002 %d %m %Y ``DD/MM/YYYY`` 21/04/2002 %d/%m/%Y ================= ============== =============== These formats can also be used for seasonal (yearly recurring) time series. The year needs to be replaced by ``9999`` or another configurable year representing the seasonal year.. The following formats are recognised depending on your locale setting. There is no guarantee that this will work. ================= ============== =============== Format Example Python format ----------------- -------------- --------------- ``DD-mmm-YYYY`` 21-Apr-2002 %d-%b-%Y ``DD.mmm.YYYY`` 21.Apr.2002 %d.%b.%Y ``DD mmm YYYY`` 21 Apr 2002 %d %b %Y ``mmm DD YYYY`` Apr 21 2002 %b %d %Y ``Mmmmm DD YYYY`` April 21 2002 %B %d %Y ================= ============== =============== .. note:: - The time needs to follow this definition without exception: `%H:%M:%S.%f`. A complete date and time should therefore look like this:: 2002-04-21 15:29:37.522 - Be aware that in a file with comma separated values you should not use a date format that contains commas. """ seasonal_key = str(config.get('DEFAULT', 'seasonal_key', '9999')) #replace 'T' with space to handle ISO times. if datestr.find('T') > 0: dt_delim = 'T' else: dt_delim = ' ' delimiters = ['-', '.', ' ', '/'] formatstrings = [['%Y', '%m', '%d'], ['%d', '%m', '%Y'], ['%d', '%b', '%Y'], ['XXXX', '%m', '%d'], ['%d', '%m', 'XXXX'], ['%d', '%b', 'XXXX'], [seasonal_key, '%m', '%d'], ['%d', '%m', seasonal_key], ['%d', '%b', seasonal_key]] timeformats = ['%H:%M:%S.%f', '%H:%M:%S', '%H:%M', '%H:%M:%S.%f000Z', '%H:%M:%S.%fZ'] # Check if a time is indicated or not for timefmt in timeformats: try: datetime.strptime(datestr.split(dt_delim)[-1].strip(), timefmt) usetime = True break except ValueError: usetime = False # Check the simple ones: for fmt in formatstrings: for delim in delimiters: datefmt = fmt[0] + delim + fmt[1] + delim + fmt[2] if usetime: for timefmt in timeformats: complfmt = datefmt + dt_delim + timefmt try: datetime.strptime(datestr, complfmt) return complfmt except ValueError: pass else: try: datetime.strptime(datestr, datefmt) return datefmt except ValueError: pass # Check for other formats: custom_formats = ['%d/%m/%Y', '%b %d %Y', '%B %d %Y','%d/%m/XXXX', '%d/%m/'+seasonal_key] for fmt in custom_formats: if usetime: for timefmt in timeformats: complfmt = fmt + dt_delim + timefmt try: datetime.strptime(datestr, complfmt) return complfmt except ValueError: pass else: try: datetime.strptime(datestr, fmt) return fmt except ValueError: pass return None