def do_POST(self): length = int(self.headers['content-length']) query = self.rfile.read(length) values = dict((pair.split("=") + [''])[:2] for pair in query.split("&")) match = re.search(self.expression, values.get('query', '')) if match: query = match.group('query') if query: connection = MyriaConnection(rest_url=self.server.arguments.myria_url, execution_url=self.server.arguments.myria_web_url) response = connection.execute_program(urllib.unquote(query).replace('+', ' ').replace('plus', '+')) self.send_response(200) self.send_access_control_headers() self.end_headers() self.wfile.write(json.dumps(response)) #request = requests.post(self.server.arguments.myria_web_url + '/execute?language=myrial', data={'query': urllib.unquote(query).replace('+', ' ').replace('plus', '+')}) #self.send_response(request.status_code) #self.send_access_control_headers() #self.end_headers() #self.wfile.write(request.text) return self.send_response(500) self.end_headers()
def import_(cls, source, intermediate, *args, **kwargs): connection = MyriaConnection( *args, ** kwargs) if args or kwargs else MyriaRelation.DefaultConnection schema = MyriaSchema(intermediate.schema).local pool = multiprocessing.Pool(processes=len(intermediate.uris)) workers = [(id, urlparse('//' + name).hostname) for (id, name) \ in connection.workers().items()] #TODO Need to expose identity and username, if it's still needed for cross-cluster transfers identity, username = None, None #TODO Need to adjust when |myria| > |other|, since we may not be able to argOverwriteTable=True pipes = pool.map(partial(_scatter_uri, identity, username), izip(intermediate.uris, cycle(workers))) work = ((id, { "dataType": "URI", "uri": uri }) for (id, uri) in pipes if uri) plan = utility.get_plan(schema, work, MyriaRelation._get_qualified_name(source.name)) return MyriaQuery.submit_plan(plan, connection, timeout=kwargs.get('timeout', 60))
def import_(cls, source, intermediate, *args, **kwargs): connection = MyriaConnection( *args, ** kwargs) if args or kwargs else MyriaRelation.DefaultConnection schema = MyriaSchema(schema=intermediate.schema).local #pool = multiprocessing.Pool(processes=len(intermediate.uris)) workers = [(id, urlparse('//' + name).hostname) for (id, name) \ in connection.workers().items()] #print intermediate.uris work = ((id, { "dataType": "Socket", "hostname": urlparse(uri).hostname, "port": urlparse(uri).port }) \ for ((id, name), uri) \ in izip(workers, intermediate.uris)) plan = utility.get_plan(schema, work, MyriaRelation._get_qualified_name(source.name), scan_type='ImplicitDimensionBinaryFileScan') #TODO dimensions = map(lambda d: d[2] + 1, intermediate.schema.attributes['dimensions']) for fragment in plan['fragments']: del fragment['operators'][0]['skip'] fragment['operators'][0]['isLittleEndian'] = True fragment['operators'][0]['dimensions'] = dimensions #[2,3] #print plan return MyriaQuery.submit_plan(plan, connection, timeout=kwargs.get('timeout', 60))
class TestQuery(unittest.TestCase): def __init__(self, args): with HTTMock(local_mock): self.connection = MyriaConnection(hostname='localhost', port=12345) unittest.TestCase.__init__(self, args) def test_submit(self): q = query() with HTTMock(local_mock): status = self.connection.submit_query(q) self.assertEquals(status, query_status(q, status='ACCEPTED')) self.assertEquals(query_counter, 1) def test_execute(self): q = query() with HTTMock(local_mock): status = self.connection.execute_query(q) self.assertEquals(status, query_status(q)) def test_validate(self): q = query() with HTTMock(local_mock): validated = self.connection.validate_query(q) self.assertEquals(validated, q) def test_query_status(self): q = query() with HTTMock(local_mock): status = self.connection.get_query_status(17) self.assertEquals(status, query_status(q)) def x_test_queries(self): with HTTMock(local_mock): count, _ = self.connection.queries() self.assertEquals(42, count)
def test_deploy_params(self): with HTTMock(local_mock): connection = MyriaConnection(hostname='localhost', port=12345) assert connection is not None self.assertEquals(connection.workers(), {'1': 'localhost:12347', '2': 'localhost:12348'})
def test_deploy_params(self): with HTTMock(local_mock): connection = MyriaConnection(hostname='localhost', port=12345) assert connection is not None self.assertEquals(connection.workers(), { '1': 'localhost:12347', '2': 'localhost:12348' })
def test_deploy_file(self): with HTTMock(local_mock): connection = None with open('myria/test/deployment.cfg.local') as deploy_file: connection = MyriaConnection(deploy_file) assert connection is not None self.assertEquals(connection.workers(), {'1': 'localhost:12347', '2': 'localhost:12348'})
def test_deploy_file(self): with HTTMock(local_mock): connection = None with open('myria/test/deployment.cfg.local') as deploy_file: connection = MyriaConnection(deploy_file) assert connection is not None self.assertEquals(connection.workers(), { '1': 'localhost:12347', '2': 'localhost:12348' })
def import_(cls, source, intermediate, *args, **kwargs): connection = MyriaConnection( *args, ** kwargs) if args or kwargs else MyriaRelation.DefaultConnection schema = MyriaSchema(schema=intermediate.schema).local pool = multiprocessing.Pool(processes=len(intermediate.uris)) workers = [(id, urlparse('//' + name).hostname) for (id, name) \ in connection.workers().items()] work = ((id, { "dataType": "Socket", "hostname": urlparse(uri).hostname, "port": urlparse(uri).port }) \ for ((id, name), uri) \ in izip(workers, intermediate.uris)) plan = utility.get_plan(schema, work, MyriaRelation._get_qualified_name(source.name)) return MyriaQuery.submit_plan(plan, connection, timeout=kwargs.get('timeout', 60))
class MyriaRelation(object): DefaultConnection = MyriaConnection(hostname='localhost', port=8753) def __init__(self, relation, connection=DefaultConnection, *args, **kwargs): self.name = relation if isinstance(relation, basestring) else relation.name self.components = self._get_name_components(self.name) self.connection = connection self.qualified_name = self._get_qualified_name(self.components) def toJson(self): return self.connection.download_dataset(self.qualified_name) @property def schema(self): return MyriaSchema(json=self._metadata['schema']) @property def createdDate(self): return parse(self._metadata['created']) def __len__(self): return int(self._metadata['numTuples']) @property def _metadata(self): if 'metadata' not in self.__dict__: self.metadata = self.connection.dataset(self.qualified_name) return self.metadata @staticmethod def _get_name(qualified_name): return ':'.join([ qualified_name['userName'], qualified_name['programName'], qualified_name['relationName'] ]) @staticmethod def _get_name_components(name): components = name.split(':') default_components = ['public', 'adhoc'][:max(3 - len(components), 0)] return default_components + components[:3] @staticmethod def _get_qualified_name(name_or_components): if isinstance(name_or_components, basestring): return MyriaRelation._get_qualified_name( MyriaRelation._get_name_components(name_or_components)) else: return dict( izip(('userName', 'programName', 'relationName'), name_or_components[:3]))
def __init__(self, shell): Configurable.__init__(self, config=shell.config) Magics.__init__(self, shell=shell) MyriaRelation.DefaultConnection = MyriaConnection( rest_url=self.rest_url, execution_url=self.execution_url, timeout=self.timeout) self.shell.configurables.append(self)
class TestLogs(unittest.TestCase): def __init__(self, args): with HTTMock(local_mock): self.connection = MyriaConnection(hostname='localhost', port=12345) unittest.TestCase.__init__(self, args) def test_sent_logs(self): with HTTMock(local_mock): logs = self.connection.get_sent_logs(42) self.assertEquals(list(logs), [['foo', 'bar'], ['baz', 'ban']])
def connect(self, line): """ Connect to a Myria REST (and optionally web) server """ arguments = parse_argstring(self.connect, line) self.timeout = arguments.timeout self.language = arguments.language MyriaRelation.DefaultConnection = MyriaConnection( rest_url=arguments.rest_url, execution_url=arguments.execution_url, timeout=arguments.timeout) return MyriaRelation.DefaultConnection
class TestQuery(unittest.TestCase): def __init__(self, args): with HTTMock(local_mock): self.connection = MyriaConnection(hostname='localhost', port=12345) unittest.TestCase.__init__(self, args) def test_workers(self): with HTTMock(local_mock): workers = self.connection.workers() self.assertEquals(workers, {'1': 'localhost:12347', '2': 'localhost:12348'}) def test_alive(self): with HTTMock(local_mock): workers = self.connection.workers_alive() self.assertEquals(set(workers), set([1, 2])) def test_worker_1(self): with HTTMock(local_mock): worker = self.connection.worker(1) self.assertEquals(worker, 'localhost:12347')
class TestQuery(unittest.TestCase): def __init__(self, args): with HTTMock(local_mock): self.connection = MyriaConnection(hostname='localhost', port=12345) unittest.TestCase.__init__(self, args) def test_workers(self): with HTTMock(local_mock): workers = self.connection.workers() self.assertEquals(workers, { '1': 'localhost:12347', '2': 'localhost:12348' }) def test_alive(self): with HTTMock(local_mock): workers = self.connection.workers_alive() self.assertEquals(set(workers), set([1, 2])) def test_worker_1(self): with HTTMock(local_mock): worker = self.connection.worker(1) self.assertEquals(worker, 'localhost:12347')
def do_GET(self): match = re.search(self.expression, self.querystring.get('query', '')) if match: query = match.group('query') if query: connection = MyriaConnection(rest_url=self.server.arguments.myria_url, execution_url=self.server.arguments.myria_web_url) response = connection.execute_program(urllib.unquote(query).replace('+', ' ').replace('plus', '+')) self.send_response(200) self.send_access_control_headers() self.end_headers() self.wfile.write(json.dumps(response)) #request = requests.post(self.server.arguments.myria_web_url + '/execute?language=myrial', data={'query': urllib.unquote(query).replace('+', ' ').replace('plus', '+')}) #self.send_response(request.status_code) #self.send_access_control_headers() #self.end_headers() #self.wfile.write(request.text) return self.send_response(500) self.end_headers()
class TestQuery(unittest.TestCase): def __init__(self, args): with HTTMock(local_mock): self.connection = MyriaConnection(hostname='localhost', port=12345) unittest.TestCase.__init__(self, args) def test_submit(self): q = query() with HTTMock(local_mock): status = self.connection.submit_query(q) self.assertEquals(status, query_status(q, status='ACCEPTED')) self.assertEquals(query_counter, 1) def test_execute(self): q = query() with HTTMock(local_mock): status = self.connection.execute_query(q) self.assertEquals(status, query_status(q)) def test_validate(self): q = query() with HTTMock(local_mock): validated = self.connection.validate_query(q) self.assertEquals(validated, q) def test_query_status(self): q = query() with HTTMock(local_mock): status = self.connection.get_query_status(17) self.assertEquals(status, query_status(q)) def x_test_queries(self): with HTTMock(local_mock): result = self.connection.queries() self.assertEquals(result['max'], 17) self.assertEquals(result['min'], 1) self.assertEquals(result['results'][0]['queryId'], 17)
def import_(cls, source, intermediate, *args, **kwargs): connection = MyriaConnection( *args, ** kwargs) if args or kwargs else MyriaRelation.DefaultConnection schema = MyriaSchema(intermediate.schema).local workers = [(id, urlparse('//' + name).hostname) for (id, name) \ in connection.workers().items()] print list(_assign_uris(intermediate.uris, workers)) #TODO Need to expose identity and username, if it's still needed for cross-cluster transfers identity, username = None, None pool = multiprocessing.Pool(processes=len(intermediate.uris)) pipes = pool.map(partial(_scatter_uri, identity, username), _assign_uris(intermediate.uris, workers)) work = ((id, { "dataType": "URI", "uri": uri }) for (id, uri) in pipes if uri) plan = utility.get_plan(schema, work, MyriaRelation._get_qualified_name(source.name)) return MyriaQuery.submit_plan(plan, connection)
class TestQuery(unittest.TestCase): def __init__(self, args): with HTTMock(create_mock()): self.connection = MyriaConnection(hostname='localhost', port=12345) unittest.TestCase.__init__(self, args) def test_submit(self): q = query() with HTTMock(local_mock): status = self.connection.submit_query(q) self.assertEquals(status, query_status(q, status='ACCEPTED')) self.assertEquals(query_counter, 1) def test_execute(self): q = query() with HTTMock(local_mock): status = self.connection.execute_query(q) self.assertEquals(status, query_status(q)) def test_compile_plan(self): with HTTMock(create_mock()): myrial = "a = empty(i:int);\nstore(a, a);" json = self.connection.compile_program(myrial, language="MyriaL") self.assertEqual(json['rawQuery'], myrial) def test_validate(self): q = query() with HTTMock(local_mock): validated = self.connection.validate_query(q) self.assertEquals(validated, q) def test_query_status(self): q = query() with HTTMock(local_mock): status = self.connection.get_query_status(17) self.assertEquals(status, query_status(q)) def test_queries(self): with HTTMock(local_mock): result = self.connection.queries() self.assertEquals(result['max'], 17) self.assertEquals(result['min'], 1) self.assertEquals(result['results'][0]['queryId'], 17)
def __init__(self, args): with HTTMock(local_mock): self.connection = MyriaConnection(hostname='localhost', port=12345) unittest.TestCase.__init__(self, args)
def test_no_deployment(self): assert MyriaConnection._parse_deployment(None) is None
class MyriaRelation(MyriaFluentQuery): """ Represents a relation in the Myria system """ DefaultConnection = MyriaConnection(hostname='localhost', port=8753) DisplayLimit = 500 def __init__(self, relation, connection=None, schema=None, **kwargs): """ Attach to an existing Myria relation, or create a new one relation: the name of the relation. One of: * qualified components: {'userName': '******', 'programName': 'adhoc', 'relationName': 'my_relation'} * qualified name: 'public:adhoc:my_relation' * unqualified name: 'my_relation' (assume public:adhoc) Keyword arguments: connection: attach to a specific Myria API endpoint schema: for a relation that does not yet exist, specify its schema """ name = relation if isinstance(relation, basestring) \ else self._get_name(relation) self.components = self._get_name_components(name) self.name = ':'.join(self.components) # Qualify name self.connection = connection or self.DefaultConnection self.qualified_name = self._get_qualified_name(self.components) self._schema = None self._metadata = None self.load = self.instance_load # If the relation is already persisted, any schema parameter # must match the persisted version. if schema is not None and self.is_persisted and self.schema != schema: raise ValueError('Stored relation schema does not match ' 'that specified as schema parameter.') elif schema is None and not self.is_persisted: raise ValueError('No schema specified for new relation.') elif schema is not None: self._schema = schema super(MyriaRelation, self).__init__( None, kwargs.get('query', (self._scan(self.components) if self.is_persisted else self._empty(self._schema))), self.connection) @staticmethod # pylint: disable=E0202 def load(name, url, schema, data_format='CSV', connection=None, **kwargs): """ Load data from a URL and save it as a new relation """ relation = MyriaRelation(name, connection, schema) return (relation.load(url, data_format, **kwargs).execute(relation)) def instance_load(self, url, data_format='CSV', **kwargs): """ Generate a query that loads data from the given URL into the relation """ if self.parent is not None: raise MyriaError('Load must be first invocation in fluent query.') elif self._schema is None and 'schema' not in kwargs: raise MyriaError('Relation does not have a scheme.') else: self.query = MyriaFluentQuery._load( url, self._schema or kwargs.pop('schema'), data_format, **kwargs) return self def to_dict(self, limit=None): """ Download this relation as JSON """ return self.connection.download_dataset(self.qualified_name, limit=limit) \ if self.is_persisted else [] def delete(self): """ Delete this relation""" self.connection.delete_dataset(self.qualified_name) self._metadata = None def to_dataframe(self, index=None, limit=None): """ Convert the query result to a Pandas DataFrame """ if not DataFrame: raise ImportError('Must execute `pip install pandas` to generate ' 'Pandas DataFrames') else: return DataFrame.from_records(self.to_dict(limit), index=index) def _repr_html_(self, limit=None): """ Generate a representation of this query as HTML """ limit = limit or MyriaRelation.DisplayLimit dataframe = self.to_dataframe(limit=limit) footer = '<p>(First {} tuples shown)</p>'.format(limit) \ if limit and len(dataframe) > limit else '' return dataframe.to_html() + footer @property def schema(self): """ The schema of the relation """ if self._schema is None: self._schema = MyriaSchema(json=self.metadata['schema']) return self._schema @property def created_date(self): """ The creation date for this relation """ return parse(self.metadata['created']) def __len__(self): """ The number of tuples in the relation """ return max(int(self.metadata['numTuples']), 0) @property def metadata(self): """ A JSON dictionary of relation metadata """ if self._metadata is None: self._metadata = self.connection.dataset(self.qualified_name) return self._metadata @property def is_persisted(self): """ Does the relation exist in the Myria database? """ try: return bool(self.metadata) except MyriaError: return False def __str__(self): return self.name @staticmethod def _get_name(qualified_name): """ Stringify a list of name components into a valid Myria name """ return ':'.join([ qualified_name['userName'], qualified_name['programName'], qualified_name['relationName'] ]) @staticmethod def _get_name_components(name): """ Parse a Myria relation name into a list of components """ components = name.split(':') default_components = ['public', 'adhoc'][:max(3 - len(components), 0)] return default_components + components[:3] @staticmethod def _get_qualified_name(name_or_components): """ Generate a Myria relation dictionary from a string or list """ if isinstance(name_or_components, basestring): return MyriaRelation._get_qualified_name( MyriaRelation._get_name_components(name_or_components)) else: return dict( izip(('userName', 'programName', 'relationName'), name_or_components[:3]))
def execute(self, system, data): if system == 'iquery': self.send_response(200) self.end_headers() command = [ '{path}/bin/iquery'.format( path=self.server.arguments.scidb_path), '-anp', str(self.server.arguments.scidb_port), '-q', (urllib.unquote(data).replace('csv+', 'csvplus').replace( '+', ' ').replace('csvplus', 'csv+').replace("\\'", "'").replace( '\n', ' ').replace('"', '\\"')) ] self.wfile.write( subprocess.check_output(command, stderr=subprocess.STDOUT)) elif system == 'myria': self.send_response(200) self.send_header('Access-Control-Allow-Origin', '*') self.end_headers() connection = MyriaConnection( rest_url=self.server.arguments.myria_url) relation = MyriaRelation(data, connection=connection) self.wfile.write(json.dumps(relation.to_dict())) elif system == 'restart': myria_path = self.server.arguments.myria_path body = subprocess.check_output([ './kill_all_java_processes.py', os.path.join(myria_path, 'deploy/deployment.config') ], cwd=os.path.join( myria_path, 'stack/myria/myriadeploy'), stderr=subprocess.STDOUT) body += subprocess.check_output([ './launch_cluster.sh', os.path.join(myria_path, 'deploy/deployment.config') ], cwd=os.path.join( myria_path, 'stack/myria/myriadeploy'), stderr=subprocess.STDOUT) scidb_path = self.server.arguments.scidb_path body += subprocess.check_output([ 'bin/scidb.py', 'stop_all', 'bhaynes', '{}/etc/config.ini'.format(scidb_path) ], cwd=scidb_path, stderr=subprocess.STDOUT) body += subprocess.check_output([ 'bin/scidb.py', 'start_all', 'bhaynes', '{}/etc/config.ini'.format(scidb_path) ], cwd=scidb_path, stderr=subprocess.STDOUT) body += subprocess.check_output([ 'bin/iquery', '-anp', str(self.server.arguments.scidb_port), '-q', 'scan(SciDB__Demo__Vectors)' ], cwd=scidb_path, stderr=subprocess.STDOUT) self.send_response(200) self.send_header('Access-Control-Allow-Origin', '*') self.end_headers() self.wfile.write(body) else: self.send_response(404) self.end_headers() self.wfile.write('System not found.')
from myria import MyriaConnection from myria import MyriaSchema from myria import MyriaRelation import json import time port = 8753 connection = MyriaConnection(hostname = sys.argv[1], port=8753) configurations = [4,6,8,10,12] dimensionFiles = ['replicateDim1.json', 'replicateDim2.json', 'replicateDim3.json', 'replicateDim4.json', 'replicateDim5.json'] for c in configurations: #dimension tables for d in dimensionFiles: dim_file = open(d, 'r+') dim_json = json.load(dim_file) dim_json['rawQuery'] = "Replicate " + str(d) + " on " + str(c) dim_json['fragments'][1]['overrideWorkers'] = range(1,c+1) dim_json['fragments'][1]['operators'][1]['relationKey']['programName'] = 'syntheticBenchmark' + str(c) + 'W' print 'Replicating ' + d + ' on ' + str(c) + ' workers' query_status= connection.submit_query(dim_json) query_id = query_status['queryId'] status = (connection.get_query_status(query_id))['status'] while status!='SUCCESS': status = (connection.get_query_status(query_id))['status'] time.sleep(2); print 'done'
def test_parse_deploy_file(self): with open('myria/test/deployment.cfg.local') as deploy_file: hostname, port = MyriaConnection._parse_deployment(deploy_file) self.assertEqual(hostname, 'localhost') self.assertEqual(port, 12345)
import time from raco.catalog import FromFileCatalog import raco.myrial.parser as parser import raco.myrial.interpreter as interpreter import raco.algebra as alg from raco.expression.expression import UnnamedAttributeRef from myria import MyriaConnection from myria import MyriaSchema from myria import MyriaRelation from raco.language.myrialang import compile_to_json from raco.scheme import Scheme from raco.language.myrialang import MyriaQueryScan import subprocess import json connection = MyriaConnection(hostname = "localhost", port=8753) schema = {"columnNames": ["l_orderkey", "l_linenumber", "l_custkey", "l_partkey", "l_suppkey", "l_orderdate", "l_orderpriority", "l_shippriority", "l_quantity", "l_extendedprice", "l_ordtotalprice", "l_discount", "l_revenue", "l_supplycost", "l_tax", "l_commitdate", "l_shipmode"], "columnTypes": ["LONG_TYPE", "LONG_TYPE", "LONG_TYPE", "LONG_TYPE", "LONG_TYPE", "STRING_TYPE", "STRING_TYPE", "LONG_TYPE", "DOUBLE_TYPE", "DOUBLE_TYPE", "DOUBLE_TYPE", "DOUBLE_TYPE", "LONG_TYPE", "LONG_TYPE", "DOUBLE_TYPE", "LONG_TYPE", "STRING_TYPE"]} zippedSchema = zip(schema['columnNames'], schema['columnTypes']) f = open(os.path.expanduser("runtimes.txt"), 'w'); startWorkers = 4 receiveWorkers = [6]#[6,8,10,12] correspondingChunks = [3]#[3,2,5,3] numberChunksToMove = [1,1,3,2] positionCount = 0 for r in receiveWorkers: #first case is 6
from raco.catalog import FromFileCatalog import raco.myrial.parser as parser import raco.myrial.interpreter as interpreter import raco.algebra as alg from raco.expression.expression import UnnamedAttributeRef from myria import MyriaConnection from myria import MyriaSchema from myria import MyriaRelation #CONFIGURE: information about the datasets in Myria SNAPSHOT_LIST=['002560','002552','002432'] USER_NAME="jortiz" PROGRAM_NAME="romulustest" #END CONFIGURE connection = MyriaConnection(hostname = "rest.myria.cs.washington.edu", port=1776, ssl=True, execution_url="https://myria-web.appspot.com") #first get schemas, create the catalog f = open('schema.py', 'w') f.write("{" + '\n'); for i in SNAPSHOT_LIST: current_relation = USER_NAME + ":" + PROGRAM_NAME + ":" + "cosmo" + i; current_schema = (MyriaRelation(relation=current_relation, connection=connection).schema.to_dict()) columnNames = [x.encode('utf-8') for x in current_schema['columnNames']] columnTypes = [x.encode('utf-8') for x in current_schema['columnTypes']] columns = zip(columnNames, columnTypes) f.write("'" + current_relation + "' : " + str(columns) + ',\n'); f.write("}" + '\n'); f.close()
def setUp(self): connection = MyriaConnection(hostname='localhost', port=8753, execution_url="http://127.0.0.1:8080") MyriaRelation.DefaultConnection = connection self.connection = connection
from myria import MyriaSchema from myria import MyriaRelation from raco.language.myrialang import compile_to_json from raco.scheme import Scheme from raco.language.myrialang import MyriaQueryScan #CONFIGURE: information about the datasets in Myria (first snapshot must be most "recent") SNAPSHOT_LIST=['002560','002552','002432'] USER_NAME="jortiz" PROGRAM_NAME="romulustest" NON_GRP_PARTICLES = '0' IORDER = 'iOrder' #END CONFIGURE connection = MyriaConnection(hostname = "rest.myria.cs.washington.edu", port=1776, ssl=True, execution_url="https://myria-web.appspot.com") time_count = 1 union_string = None relation_name_prefix = USER_NAME + ":" + PROGRAM_NAME + ":" #Write the union for i in range(len(SNAPSHOT_LIST)-1): current_relation_name = relation_name_prefix + "cosmo" + SNAPSHOT_LIST[i]; next_relation_name = relation_name_prefix + "cosmo" + SNAPSHOT_LIST[i+1]; current_snapshot = "(select s1.grp as currentGroup," + str(time_count) + " as currrentTime, s2.grp as nextGroup, count(*) as sharedParticles from \"" + current_relation_name + "\" s1, \"" + next_relation_name + "\" s2 where s1.\"" + IORDER + "\" = s2.\"" + IORDER + "\" and s1.grp > " + NON_GRP_PARTICLES + " and s2.grp >" + NON_GRP_PARTICLES + " group by s1.grp, s2.grp)" if(union_string): union_string = union_string + " UNION " + current_snapshot else: union_string = current_snapshot time_count = time_count + 1
from myria import MyriaSchema from myria import MyriaRelation #CONFIGURE: information about the datasets in Myria USER_NAME="jortiz" PROGRAM_NAME="romulustest" NODES_RELATION="nodesTable" EDGES_RELATION="edgesTable" #END CONFIGURE table_prefix = USER_NAME + ":" + PROGRAM_NAME + ":" nodes_table = table_prefix + NODES_RELATION; edges_table = table_prefix + EDGES_RELATION; connection = MyriaConnection(hostname = "rest.myria.cs.washington.edu", port=1776, ssl=True, execution_url="https://myria-web.appspot.com") #*****************PART 1*************** #first get schemas for nodes and edges, and write them to schema.py f = open('schema.py', 'w') f.write("{" + '\n'); #--nodes current_schema = (MyriaRelation(relation=nodes_table, connection=connection).schema.to_dict()) columnNames = [x.encode('utf-8') for x in current_schema['columnNames']] columnTypes = [x.encode('utf-8') for x in current_schema['columnTypes']] columns = zip(columnNames, columnTypes) f.write("'" + nodes_table + "' : " + str(columns) + ',\n'); #--edges current_schema = (MyriaRelation(relation=edges_table, connection=connection).schema.to_dict()) columnNames = [x.encode('utf-8') for x in current_schema['columnNames']] columnTypes = [x.encode('utf-8') for x in current_schema['columnTypes']]
class MyriaRelation(object): """ Represents a relation in the Myria system """ DefaultConnection = MyriaConnection(hostname='localhost', port=8753) def __init__(self, relation, connection=None, schema=None): """ Attach to an existing Myria relation, or create a new one relation: the name of the relation. One of: * qualified components: {'userName': '******', 'programName': 'adhoc', 'relationName': 'my_relation'} * qualified name: 'public:adhoc:my_relation' * unqualified name: 'my_relation' (assume public:adhoc) Keyword arguments: connection: attach to a specific Myria API endpoint schema: for a relation that does not yet exist, specify its schema """ self.name = relation if isinstance(relation, basestring) \ else self._get_name(relation) self.components = self._get_name_components(self.name) self.connection = connection or self.DefaultConnection self.qualified_name = self._get_qualified_name(self.components) self._schema = None self._metadata = None # If the relation is already persisted, any schema parameter # must match the persisted version. if schema is not None and self.is_persisted and self.schema != schema: raise ValueError('Stored relation schema does not match ' 'that specified as schema parameter.') elif schema is not None: self._schema = schema def to_dict(self): """ Download this relation as JSON """ return self.connection.download_dataset(self.qualified_name) \ if self.is_persisted else [] def delete(self): """ Delete this relation""" self.connection.delete_dataset(self.qualified_name) self._metadata = None def to_dataframe(self, index=None): """ Convert the query result to a Pandas DataFrame """ if not DataFrame: raise ImportError('Must execute `pip install pandas` to generate ' 'Pandas DataFrames') else: return DataFrame.from_records(self.to_dict(), index=index) def _repr_html_(self): """ Generate a representation of this query as HTML """ return self.to_dataframe().to_html() @property def schema(self): """ The schema of the relation """ if self._schema is None: self._schema = MyriaSchema(json=self.metadata['schema']) return self._schema @property def created_date(self): """ The creation date for this relation """ return parse(self.metadata['created']) def __len__(self): """ The number of tuples in the relation """ return int(self.metadata['numTuples']) @property def metadata(self): """ A JSON dictionary of relation metadata """ if self._metadata is None: self._metadata = self.connection.dataset(self.qualified_name) return self._metadata @property def is_persisted(self): """ Does the relation exist in the Myria database? """ try: return bool(self.metadata) except MyriaError: return False @staticmethod def _get_name(qualified_name): """ Stringify a list of name components into a valid Myria name """ return ':'.join([qualified_name['userName'], qualified_name['programName'], qualified_name['relationName']]) @staticmethod def _get_name_components(name): """ Parse a Myria relation name into a list of components """ components = name.split(':') default_components = ['public', 'adhoc'][:max(3 - len(components), 0)] return default_components + components[:3] @staticmethod def _get_qualified_name(name_or_components): """ Generate a Myria relation dictionary from a string or list """ if isinstance(name_or_components, basestring): return MyriaRelation._get_qualified_name( MyriaRelation._get_name_components(name_or_components)) else: return dict(izip(('userName', 'programName', 'relationName'), name_or_components[:3]))