def test_pystreamliner_multiple_tables(local_context):
    """
        Tests if a pypeline reading and writing additional tables works
    """
    ctx = local_context()
    setup_spark(ctx)
    config = pystreamliner_config(extractor="multiple_tables_extractor.py", transformer="multiple_tables_transformer.py")

    # create some reference data
    conn = database.connect(host="127.0.0.1", port=3306, user="******", password="", database="information_schema")
    conn.execute("CREATE DATABASE reference")
    conn.execute("CREATE TABLE reference.data (foo int primary key)")
    conn.query("INSERT INTO reference.data VALUES (%s)", 1)

    ctx.pipeline_put(pipeline_id="pypeline", batch_interval=1, config=config)

    # wait for a couple more batches before stopping the pipeline
    batch_end = ctx.pipeline_wait_for_batches(pipeline_id="pypeline", count=5, timeout=300)
    assert batch_end["success"]
    assert batch_end["load"]["count"] == 1

    ctx.pipeline_update(pipeline_id="pypeline", active=False)

    conn = database.connect(host="127.0.0.1", port=3306, user="******", password="", database="db")
    resp = conn.get("SELECT COUNT(*) AS count FROM t")
    assert resp.count >= 5
    num_rows = resp.count

    resp = conn.get("SELECT SUM(foo) AS f FROM t")
    assert resp.f == num_rows

    for i in range(num_rows):
        table = "pystreamliner.table%d" % i
        resp = conn.get("SELECT SUM(foo) AS f FROM " + table)
        assert resp.f == 1
Exemple #2
0
def connect():
    master_agg = '192.168.65.1'  #  10.0.3.186:3306
    conn = database.connect(host=master_agg,
                            user='******',
                            password='******',
                            database=DB_NAME)
    return conn
Exemple #3
0
 def __init__(self):
   """Create a connection"""
   
   try:
     self.client = database.connect(host='localhost', port=3307, user='******', password="******" ,database='test')
   except Exception as e:
     raise
def get_connection(db=DATABASE):
    """ Returns a new connection to the database. """
    return database.connect(host=HOST,
                            port=PORT,
                            user=USER,
                            password=PASSWORD,
                            database=db)
Exemple #5
0
def drop_database(params):
    with database.connect(host=params.db_host,
                          port=params.db_port,
                          user=params.db_user,
                          password=params.db_pswd,
                          database=params.db_name) as conn:
        conn.query('DROP DATABASE %s' % params.db_name)
def test_pystreamliner_sanity(local_context):
    """
        Tests if a basic pypeline works
    """
    ctx = local_context()
    setup_spark(ctx)
    config = pystreamliner_config(extractor="sanity_extractor.py", transformer="sanity_transformer.py")

    ctx.pipeline_put(pipeline_id="pypeline", batch_interval=1, config=config)

    # enable tracing and check that log messages are recorded appropriately
    ctx.pipeline_update(pipeline_id="pypeline", active=True, trace_batch_count=20)

    # wait for a couple more batches before stopping the pipeline
    batch_end = ctx.pipeline_wait_for_batches(pipeline_id="pypeline", count=10, timeout=120)
    assert batch_end["success"]
    assert batch_end["load"]["count"] == 1
    assert batch_end["batch_type"] == "Traced"
    assert len(batch_end["extract"]["logs"]) == 1
    assert "extractor info message" in batch_end["extract"]["logs"][0]
    assert len(batch_end["transform"]["logs"]) == 1
    assert "transformer error message" in batch_end["transform"]["logs"][0]

    ctx.pipeline_update(pipeline_id="pypeline", active=False)

    conn = database.connect(host="127.0.0.1", port=3306, user="******", password="", database="db")
    resp = conn.get("SELECT COUNT(*) AS count FROM t")
    assert resp.count >= 10
    num_rows = resp.count

    resp = conn.get("SELECT SUM(foo_int_doubled) AS f FROM t")
    assert resp.f == 2 * sum(range(1, num_rows + 1))
def test_connection_options(test_db_args):
    args = copy.deepcopy(test_db_args)
    args["host"] = "memsql.com"
    args["options"] = { "connect_timeout": 1 }
    with pytest.raises(database.OperationalError):
        conn = database.connect(**args)
        conn.query("SHOW TABLES")
Exemple #8
0
def query_autotags(interests, host=None, port=3306, user="******",
                          pswd="", db=None):
    start_t = time.time()
    with database.connect(host=host, port=port, user=user,
                          password=pswd, database=db) as conn:
        match_regex = ' AND '.join(['''{}":"$'''.format(key) for key in interests.keys()])
        val_regex = ' AND '.join(['''CONVERT(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(a.autotags,'{}":"',-1),',',1),':',-1),DECIMAL)>=CONVERT({},DECIMAL)'''.format(key, value) for key, value in interests.items()])
        query = '''select b.line_number, b.id, a.autotags, b.download_url from test_autotags a JOIN test_metadata b ON a.id = b.id WHERE match (a.autotags) against ('{}') AND {}'''.format(match_regex, val_regex)
        sql_response = conn.query(query)
        response = []
        for res in sql_response:
#             tags_str = [t for t in res['autotags'].split(',') if any(key in t for key, val in interests.items())]
            
            if all(key in res['autotags'] for key in interests.keys()):
                tags_str = []
                for t in res['autotags'].split(','):
#                     print(t.split(":"))
                    val = t.split(":")                    
                    try:
                        if len(val) == 2: 
                            val[1] = float(val[1]) if val[1] != '' else 0
                            if val[0] in interests and val[1] >= float(interests[val[0]]):
                                tags_str.append(val[0] + ":" + str(val[1]))   
                    except:
                        print(val[0], val[1])
                if tags_str:
                    print('\tID: {}'.format(res['id']))
                    print('\tAutotags: {}'.format(','.join(tags_str)))
                    print('\tdownload_url: {}\n'.format(res['download_url']))
                    response.append(res)
    return response, time.time() - start_t
Exemple #9
0
def test_connection_options(test_db_args):
    args = copy.deepcopy(test_db_args)
    args["host"] = "example.com"
    args["options"] = {"connect_timeout": 1}
    with pytest.raises(database.OperationalError):
        conn = database.connect(**args)
        conn.query("SHOW TABLES")
def test_pystreamliner_multiple_tables(local_context):
    """
        Tests if a pypeline reading and writing additional tables works
    """
    ctx = local_context()
    setup_spark(ctx)
    config = pystreamliner_config(extractor="multiple_tables_extractor.py",
                                  transformer="multiple_tables_transformer.py")

    # create some reference data
    conn = database.connect(host="127.0.0.1",
                            port=3306,
                            user="******",
                            password="",
                            database="information_schema")
    conn.execute("CREATE DATABASE reference")
    conn.execute("CREATE TABLE reference.data (foo int primary key)")
    conn.query("INSERT INTO reference.data VALUES (%s)", 1)

    ctx.pipeline_put(pipeline_id="pypeline", batch_interval=1, config=config)

    # wait for a couple more batches before stopping the pipeline
    batch_end = ctx.pipeline_wait_for_batches(pipeline_id="pypeline",
                                              count=5,
                                              timeout=300)
    assert batch_end["success"]
    assert batch_end["load"]["count"] == 1

    ctx.pipeline_update(pipeline_id="pypeline", active=False)

    conn = database.connect(host="127.0.0.1",
                            port=3306,
                            user="******",
                            password="",
                            database="db")
    resp = conn.get("SELECT COUNT(*) AS count FROM t")
    assert resp.count >= 5
    num_rows = resp.count

    resp = conn.get("SELECT SUM(foo) AS f FROM t")
    assert resp.f == num_rows

    for i in range(num_rows):
        table = "pystreamliner.table%d" % i
        resp = conn.get("SELECT SUM(foo) AS f FROM " + table)
        assert resp.f == 1
    def x_conn(self, request, test_db_args, test_db_database):
        conn = database.connect(**test_db_args)
        conn.execute('CREATE DATABASE IF NOT EXISTS %s' % test_db_database)
        conn.select_db(test_db_database)

        def cleanup():
            conn.execute('DROP DATABASE %s' % test_db_database)
        request.addfinalizer(cleanup)

        return conn
Exemple #12
0
    def _x_conn(self, request, test_db_args, test_db_database):
        conn = database.connect(**test_db_args)
        conn.execute('CREATE DATABASE IF NOT EXISTS %s CHARACTER SET utf8 COLLATE utf8_general_ci' % test_db_database)
        conn.select_db(test_db_database)

        def cleanup():
            conn.execute('DROP DATABASE %s' % test_db_database)
        request.addfinalizer(cleanup)

        return conn
    def x_conn(self, request, test_db_args, test_db_database):
        conn = database.connect(**test_db_args)
        conn.execute('CREATE DATABASE IF NOT EXISTS %s' % test_db_database)
        conn.execute('USE %s' % test_db_database)

        def cleanup():
            conn.execute('DROP DATABASE %s' % test_db_database)
        request.addfinalizer(cleanup)

        return conn
def queue_setup(request, test_db_args, test_db_database):
    with database.connect(**test_db_args) as conn:
        conn.execute('CREATE DATABASE IF NOT EXISTS %s' % test_db_database)

    test_db_args['database'] = test_db_database
    q = sql_step_queue.SQLStepQueue('test').connect(**test_db_args).setup()

    def cleanup():
        q.destroy()
    request.addfinalizer(cleanup)
def manager_setup(request, test_db_args, test_db_database):
    with database.connect(**test_db_args) as conn:
        conn.execute('CREATE DATABASE IF NOT EXISTS %s' % test_db_database)

    test_db_args['database'] = test_db_database
    q = sql_lock.SQLLockManager('test').connect(**test_db_args).setup()

    def cleanup():
        q.destroy()
    request.addfinalizer(cleanup)
Exemple #16
0
 def _connect(self, host, port, user, password):
     db = None
     try:
         db = database.connect(host=host, port=port, user=user, password=password)
         self.log.debug("Connected to MemSQL")
         yield db
     except Exception:
         raise
     finally:
         if db:
             db.close()
Exemple #17
0
 def get_connection(self):
   for attempt in xrange(1,self.max_attempts+1):
     try:
       return database.connect(host=self.host, port=self.port, user=self.user, password=self.password, database=self.database)
       break;
     except Exception as e:
       print e.message, e.args
       if self.verbose: print "\tTrouble establishing a database connection, retrying... (attempt: %d/%d)" % (attempt, self.max_attempts)
       sleep(attempt)
       continue
   sys.exit('Establishing a database connection failed after 5 attempts, giving up.')
Exemple #18
0
def queue_setup(request, test_db_args, test_db_database):
    with database.connect(**test_db_args) as conn:
        conn.execute('CREATE DATABASE IF NOT EXISTS %s' % test_db_database)

    test_db_args['database'] = test_db_database
    q = sql_step_queue.SQLStepQueue('test').connect(**test_db_args).setup()

    def cleanup():
        q.destroy()

    request.addfinalizer(cleanup)
def manager_setup(request, test_db_args, test_db_database):
    with database.connect(**test_db_args) as conn:
        conn.execute('CREATE DATABASE IF NOT EXISTS %s' % test_db_database)

    test_db_args['database'] = test_db_database
    q = sql_lock.SQLLockManager('test').connect(**test_db_args).setup()

    def cleanup():
        q.destroy()

    request.addfinalizer(cleanup)
Exemple #20
0
    def run_query(self, query, user):

        cursor = None
        try:
            cursor = database.connect(**self.configuration.to_dict())

            res = cursor.query(query)
            # column_names = []
            # columns = []
            #
            # for column in cursor.description:
            #     column_name = column[COLUMN_NAME]
            #     column_names.append(column_name)
            #
            #     columns.append({
            #         'name': column_name,
            #         'friendly_name': column_name,
            #         'type': types_map.get(column[COLUMN_TYPE], None)
            #     })

            rows = [
                dict(zip(list(row.keys()), list(row.values()))) for row in res
            ]

            # ====================================================================================================
            # temporary - until https://github.com/memsql/memsql-python/pull/8 gets merged
            # ====================================================================================================
            columns = []
            column_names = rows[0].keys() if rows else None

            if column_names:
                for column in column_names:
                    columns.append({
                        'name': column,
                        'friendly_name': column,
                        'type': TYPE_STRING
                    })

            data = {'columns': columns, 'rows': rows}
            json_data = json.dumps(data, cls=JSONEncoder)
            error = None
        except KeyboardInterrupt:
            cursor.close()
            error = "Query cancelled by user."
            json_data = None
        except Exception as e:
            logging.exception(e)
            raise sys.exc_info()[1], None, sys.exc_info()[2]
        finally:
            if cursor:
                cursor.close()

        return json_data, error
Exemple #21
0
def get_connection(host=options.host, port=options.port, db=options.database):
    """ Returns a new connection to the database. """
    if host is None:
        host = HOST
    if port is None:
        port = PORT

    return database.connect(
        host=host,
        port=port,
        user=options.user,
        password=options.password,
        database=db)
Exemple #22
0
    def run_query(self, query, user):

        cursor = None
        try:
            cursor = database.connect(**self.configuration.to_dict())

            res = cursor.query(query)
            # column_names = []
            # columns = []
            #
            # for column in cursor.description:
            #     column_name = column[COLUMN_NAME]
            #     column_names.append(column_name)
            #
            #     columns.append({
            #         'name': column_name,
            #         'friendly_name': column_name,
            #         'type': types_map.get(column[COLUMN_TYPE], None)
            #     })

            rows = [dict(zip(list(row.keys()), list(row.values()))) for row in res]

            # ====================================================================================================
            # temporary - until https://github.com/memsql/memsql-python/pull/8 gets merged
            # ====================================================================================================
            columns = []
            column_names = rows[0].keys() if rows else None

            if column_names:
                for column in column_names:
                    columns.append({
                        'name': column,
                        'friendly_name': column,
                        'type': TYPE_STRING
                    })

            data = {'columns': columns, 'rows': rows}
            json_data = json.dumps(data, cls=JSONEncoder)
            error = None
        except KeyboardInterrupt:
            cursor.close()
            error = "Query cancelled by user."
            json_data = None
        except Exception as e:
            logging.exception(e)
            raise sys.exc_info()[1], None, sys.exc_info()[2]
        finally:
            if cursor:
                cursor.close()

        return json_data, error
Exemple #23
0
    def run_query(self, query, user):

        cursor = None
        try:
            cursor = database.connect(**self.configuration.to_dict())

            res = cursor.query(query)
            # column_names = []
            # columns = []
            #
            # for column in cursor.description:
            #     column_name = column[COLUMN_NAME]
            #     column_names.append(column_name)
            #
            #     columns.append({
            #         'name': column_name,
            #         'friendly_name': column_name,
            #         'type': types_map.get(column[COLUMN_TYPE], None)
            #     })

            rows = [dict(zip(row.keys(), row.values())) for row in res]

            # ====================================================================================================
            # temporary - until https://github.com/memsql/memsql-python/pull/8 gets merged
            # ====================================================================================================
            columns = []
            column_names = rows[0].keys() if rows else None

            if column_names:
                for column in column_names:
                    columns.append({
                        "name": column,
                        "friendly_name": column,
                        "type": TYPE_STRING
                    })

            data = {"columns": columns, "rows": rows}
            json_data = json_dumps(data)
            error = None
        except KeyboardInterrupt:
            cursor.close()
            error = "Query cancelled by user."
            json_data = None
        finally:
            if cursor:
                cursor.close()

        return json_data, error
def test_sanity(local_context):
    ctx = local_context()
    ctx.run_ops()
    ctx.deploy_memsql_cluster(num_aggs=0, num_leaves=1)
    ctx.deploy_spark()
    # wait for spark to be deployed
    time.sleep(30)
    # and then kill the spark interface so that we have spark resources to run a job
    ctx.stop_ops()
    ctx.kill_spark_interface()

    print "Running the job"
    resp = ctx.spark_submit("com.memsql.spark.examples.WriteToMemSQLApp")
    print("STDOUT: \n%s" % resp.output)
    print("STDERR: \n%s" % resp.stderr_output)

    conn = database.connect(host="127.0.0.1", port=3306, user="******", password="", database="memsqlrdd_db")
    assert conn.get("SELECT count(*) AS c FROM output").c == 1000
Exemple #25
0
def get_connection(host=db_info.hostname,
                   port=db_info.port,
                   user=db_info.username,
                   password=db_info.password,
                   db=db_info.path.strip('/'),
                   verbose=options.verbose):
    for attempt in xrange(1, 21):
        try:
            return database.connect(host=host,
                                    port=port,
                                    user=user,
                                    password=password,
                                    database=db)
        except:
            if verbose:
                print "\tTrouble establishing a database connection, retrying... (attempt: %d/20)" % attempt
            sleep(attempt * 2)
            continue
def test_pystreamliner_sanity(local_context):
    """
        Tests if a basic pypeline works
    """
    ctx = local_context()
    setup_spark(ctx)
    config = pystreamliner_config(extractor="sanity_extractor.py",
                                  transformer="sanity_transformer.py")

    ctx.pipeline_put(pipeline_id="pypeline", batch_interval=1, config=config)

    # enable tracing and check that log messages are recorded appropriately
    ctx.pipeline_update(pipeline_id="pypeline",
                        active=True,
                        trace_batch_count=20)

    # wait for a couple more batches before stopping the pipeline
    batch_end = ctx.pipeline_wait_for_batches(pipeline_id="pypeline",
                                              count=10,
                                              timeout=120)
    assert batch_end["success"]
    assert batch_end["load"]["count"] == 1
    assert batch_end["batch_type"] == "Traced"
    assert len(batch_end["extract"]["logs"]) == 1
    assert "extractor info message" in batch_end["extract"]["logs"][0]
    assert len(batch_end["transform"]["logs"]) == 1
    assert "transformer error message" in batch_end["transform"]["logs"][0]

    ctx.pipeline_update(pipeline_id="pypeline", active=False)

    conn = database.connect(host="127.0.0.1",
                            port=3306,
                            user="******",
                            password="",
                            database="db")
    resp = conn.get("SELECT COUNT(*) AS count FROM t")
    assert resp.count >= 10
    num_rows = resp.count

    resp = conn.get("SELECT SUM(foo_int_doubled) AS f FROM t")
    assert resp.f == 2 * sum(range(1, num_rows + 1))
Exemple #27
0
    def _promote_child_agg_memsql(self, cluster, child_agg):
        cluster.save(currently_promoting_master=True)

        try:
            logger.info(
                "Promoting child aggregator at %s:%d to master"
                % (child_agg.data.host_ip, child_agg.data.memsql_port))
            can_connect = False
            try:
                with database.connect(host=child_agg.data.host_ip, port=child_agg.data.memsql_port, user="******", password="") as conn:
                    conn.query("SELECT 1")
                    can_connect = True
                    conn.execute("AGGREGATOR SET AS MASTER")
            except database.OperationalError as e:
                if can_connect:
                    logger.error(
                        "Could not promote child agg to master for cluster %s: %s"
                        % (cluster.name, str(e)))
                    self._rollback_cluster(cluster)
                    return
        finally:
            cluster.save(currently_promoting_master=False)
def test_sanity(local_context):
    ctx = local_context()
    ctx.run_ops()
    ctx.deploy_memsql_cluster(num_aggs=0, num_leaves=1)
    ctx.deploy_spark()
    # wait for spark to be deployed
    time.sleep(30)
    # and then kill the spark interface so that we have spark resources to run a job
    ctx.stop_ops()
    ctx.kill_spark_interface()

    print "Running the job"
    resp = ctx.spark_submit("com.memsql.spark.examples.WriteToMemSQLApp")
    print("STDOUT: \n%s" % resp.output)
    print("STDERR: \n%s" % resp.stderr_output)

    conn = database.connect(host="127.0.0.1",
                            port=3306,
                            user="******",
                            password="",
                            database="memsqlrdd_db")
    assert conn.get("SELECT count(*) AS c FROM output").c == 1000
def main():

  # TODO: pull from config
  HOST = 'localhost'
  PORT = 3306
  USER = '******'
  PASSWORD = '******'
  DATABASE = 'acme'

  conn = database.connect(host=HOST, port=PORT, user=USER, password=PASSWORD, database=DATABASE)

  try:

    conn.ping()

    id = create(conn, "Inserted row")
    print("Inserted row {0}".format(id))

    row = read_one(conn, id)
    print(row, sep =',')

    update(conn, id, "Updated row")
    print("Updated row {0}".format(id))

    rows = read_all(conn)
    print("All rows:")
    for row in rows:
      print(row, sep ='\t')

    delete(conn, id)
    print("Deleted row {0}".format(id))

  except Exception as e:
    print("Error")
    print(e)
    traceback.print_exc(file =sys.stdout)

  finally:
    conn.close()
Exemple #30
0
def get_connection(params, db=None):
    if not db:
        db = params.db_name
        
    """ Returns a new connection to the database. """
    return database.connect(host=params.db_host, port=params.db_port, user=params.db_user, password=params.db_pswd, database=db)
Exemple #31
0
def get_connection():
    return database.connect(host='127.0.0.1', port='3306', user='******', password='', database='MemEx')
def bootstrap(request, test_db_args, test_db_database):
    with database.connect(**test_db_args) as conn:
        conn.execute('CREATE DATABASE IF NOT EXISTS %s' % test_db_database)
Exemple #33
0
def bootstrap(request, test_db_args, test_db_database):
    with database.connect(**test_db_args) as conn:
        conn.execute('CREATE DATABASE IF NOT EXISTS %s' % test_db_database)
Exemple #34
0
from memsql.common import database

conn = database.connect(host="127.0.0.1", port=3306, user="******", password="")
print(conn.query("show databases"))
Exemple #35
0
 def get_connection(self, params):
     return database.connect(host=params.db_host, port=params.db_port,
                         user=params.db_user, password=params.db_pswd,
                         database="yfcc_" + params.db_name)
def main():
    # load the config and get cluster name and db connection
    options = getArgs()
    #config, specs = getConfigAndSpecs(options.config_path)

    config = {}
    config['scripts_path'] = '/tmp'
    # logging configuration
    base_logging_path = osPath.join(config['scripts_path'],
                                    G.LOG_DIRECTORY_NAME, __appname__)
    makeDirIfNeeded(base_logging_path)
    general_logging_path = osPath.join(base_logging_path, 'GENERAL')
    makeDirIfNeeded(general_logging_path)
    formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
    level = logging.DEBUG if options.verbose else logging.INFO
    handler = logging.FileHandler(
        osPath.join(
            general_logging_path, 'general_{}.log'.format(
                datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')[:-3])))
    handler.setFormatter(formatter)
    log.addHandler(handler)
    log.setLevel(level)

    if options.console_log:
        console_handler = logging.StreamHandler()
        console_handler.setFormatter(formatter)
        log.addHandler(console_handler)

    log.info("In the beginning...")
    log.info("Hostname : {}".format(options.hostname))
    log.info("Port number : {}".format(options.portnum))
    log.info("Database Target : {}".format(options.database_name))
    log.info("Source Directory : {}".format(options.source_dir))
    log.info("Pipeline Name : {}".format(options.pipeline_name))
    log.info("Done Directory : {}".format(options.done_dir))

    conn_params = {
        'user': '******',
        'password': '',
        'host': '0.0.0.0',
        'port': 3306,
        'database': 'ssb'
    }

    conn = database.connect(**conn_params)

    if not osPath.exists(options.source_dir):
        log.error("Source directory does not exhist!")
        exit(1)

    # look up pipeline_retries
    select_sql = '''
    SELECT variable_value 
    FROM information_schema.global_variables
    WHERE variable_name = 'PIPELINES_MAX_RETRIES_PER_BATCH_PARTITION'
    '''
    log.debug('Pipeline retries query : ' + select_sql)
    row = conn.get(select_sql)
    pipeline_retries = int(row['variable_value'])
    log.info(
        'Pipelines will retry files {} times before moving to Fail'.format(
            pipeline_retries))

    # look up pipeline_name, state, and pipeline directory
    select_sql = '''
    SELECT pipeline_name, state, config_json::$connection_string constr
    FROM information_schema.pipelines
    WHERE pipeline_name = '{}' and database_name = '{}'
    '''.format(options.pipeline_name, options.database_name)
    log.debug('Pipeline retries query : ' + select_sql)

    row2 = conn.get(select_sql)
    if row2:
        log.info('Pipeline {} exists'.format(row2['pipeline_name']))
        pipeline_directory = osPath.dirname(row2['constr'].decode('utf-8'))
        log.info(pipeline_directory)
    else:
        log.error('Pipeline {} does NOT exist!'.format(options.pipeline_name))
        return (-1)

    # Check to see if pipeline is running
    if row2['state'] != 'Running':
        log.error('Pipeline {} is not Running!!'.format(options.pipeline_name))
        return (-1)

    # Move files to pipeline directory
    files = [f for f in glob(osPath.join(options.source_dir, "*")) if True]
    for f in files:
        bfile = osPath.basename(f)
        destfullfile = osPath.join(pipeline_directory, bfile)
        log.info("Move {} to {}".format(f, destfullfile))
        rename(f, destfullfile)

    # Find files that have loaded
    files = [f for f in glob(osPath.join(pipeline_directory, "*")) if True]
    for f in files:
        log.info("Full filename in Pipeline Directory: {}".format(f))
        bfile = osPath.basename(f)
        log.info("Files in Pipeline Directory: {}".format(bfile))

        # lookup file load status in pipelines_offsets table
        select_sql = '''
        SELECT pipeline_name, database_name, source_partition_id, latest_loaded_offset
        FROM information_schema.pipelines_offsets
        WHERE database_name = '{}' and pipeline_name = '{}' and source_partition_id like '%{}'
        '''.format(options.database_name, options.pipeline_name, bfile)
        log.debug('Pipeline retries query : ' + select_sql)

        row = conn.get(select_sql)

        # check for errors in the pipeline
        select_sql = '''
        SELECT count(*) error_count 
        FROM information_schema.pipelines_errors
        WHERE database_name = '{}' and pipeline_name = '{}' and batch_source_partition_id like '%{}'
        '''.format(options.database_name, options.pipeline_name, bfile)
        log.debug('Pipeline retries query : ' + select_sql)

        row2 = conn.get(select_sql)

        if row:
            if row['latest_loaded_offset'] == 0:
                if row2['error_count'] == 0:
                    log.info(
                        'File {} of Pipeline {} has finished loading!!'.format(
                            row['source_partition_id'], row['pipeline_name']))
                    destfullfile = osPath.join(options.done_dir, bfile)
                    log.info('Moving file {} to {}'.format(f, destfullfile))
                    rename(f, destfullfile)
                if row2['error_count'] >= pipeline_retries:
                    log.info(
                        'File {} of Pipeline {} has FAILED loading {} times!!'.
                        format(row['source_partition_id'],
                               row['pipeline_name'], pipeline_retries))
                    destfullfile = osPath.join(options.done_dir,
                                               bfile + '.fail')
                    log.info('Moving file {} to {}'.format(f, destfullfile))
                    rename(f, destfullfile)
            else:
                log.info('File {} of Pipeline {} in process'.format(
                    row['source_partition_id'], row['pipeline_name']))

    conn.close()
from memsql.common import database
from memsql.perf.network_tester import NetworkTester

master_agg = 'master.cs.memcompute.com'
test_node = 'leaf-1.cs.memcompute.com'
iterations = 100
payload_size = 1024 * 500

conn = database.connect(host=master_agg, user='******')
conn.execute('CREATE DATABASE IF NOT EXISTS performance')
conn.execute('SET GLOBAL max_allowed_packet=%d' % (1024 * 1024 * 10))

m = NetworkTester(payload_size=payload_size).connect(host=master_agg, user='******', database='performance')
if m.ready():
    m.destroy()
m.setup()

n = NetworkTester().connect(host=test_node, user='******', database='performance')

def pp(data, postfix, cb=lambda x: x):
    for k, v in data.items():
        print k, cb(v), postfix

print 'latancy'
pp(n.estimate_latency(), 'ms')

print '\nroundtrip'
pp(n.estimate_roundtrip(iterations), 'MB/s', lambda x: (x / 1024 / 1024))

print '\nupload'
pp(n.estimate_upload(iterations), 'MB/s', lambda x: (x / 1024 / 1024))
Exemple #38
0
def test_db_conn(test_db_args):
    return database.connect(**test_db_args)
def get_connection(db=DATABASE):
    """ Returns a new connection to the database. """
    return database.connect(host=HOST, port=PORT, user=USER, password=PASSWORD, database=db)
Exemple #40
0
    def create__connection(self):
        conn = database.connect(host=self.config.mysql_host, port=int(self.config.mysql_port),
                               user=self.config.mysql_user, password=self.config.mysql_password,
                               database=self.config.mysql_db)

        self.conn = conn