Beispiel #1
0
 def test_basic_query(self):
     conn = read.connect_sql(self.conn_str)
     self.assertTrue(conn.psql_optimized)
     sa = conn.execute('SELECT * FROM employees')
     ctrl = np.array([(1, u'Arthur', u'King', 40000.0, 2.1, 10),
                      (2, u'Jones', u'James', 1000000.0, 1.9, 2),
                      (3, u'The Moabite', u'Ruth', 50000.0, 1.8, 6)],
                     dtype=[('id', '<i8'), ('last_name', 'O'),
                            ('first_name', 'O'), ('salary', '<f8'),
                            ('height', '<f8'), ('usefulness', '<i8')])
     self.assertTrue(np.array_equal(sa, ctrl))
Beispiel #2
0
 def test_basic_query(self):
     conn = read.connect_sql(self.conn_str)
     self.assertTrue(conn.psql_optimized)
     sa = conn.execute('SELECT * FROM employees')
     ctrl = np.array([(1, u'Arthur', u'King', 40000.0, 2.1, 10),
                      (2, u'Jones', u'James', 1000000.0, 1.9, 2),
                      (3, u'The Moabite', u'Ruth', 50000.0, 1.8, 6)],
                     dtype=[('id', '<i8'), ('last_name', 'O'), 
                            ('first_name', 'O'), 
                            ('salary', '<f8'), ('height', '<f8'), 
                            ('usefulness', '<i8')])
     self.assertTrue(np.array_equal(sa, ctrl))
Beispiel #3
0
    def test_connect_sql(self):
        conn_str = 'sqlite:///{}'.format(
            utils_for_tests.path_of_data('small.db'))
        conn = read.connect_sql(conn_str)
        sa = conn.execute('SELECT * FROM employees')
        ctrl = np.array([(1, u'Arthur', u'King', 40000.0, 2.1, 10),
                         (2, u'Jones', u'James', 1000000.0, 1.9, 2),
                         (3, u'The Moabite', u'Ruth', 50000.0, 1.8, 6)],
                        dtype=[('id', '<i8'), ('last_name', 'O'),
                               ('first_name', 'O'), ('salary', '<f8'),
                               ('height', '<f8'), ('usefulness', '<i8')])
        self.assertTrue(np.array_equal(sa, ctrl))

        conn = read.connect_sql(conn_str, allow_caching=True)
        sa = conn.execute('SELECT * FROM employees')
        self.assertTrue(np.array_equal(sa, ctrl))
        sa = conn.execute('SELECT id FROM employees')
        ctrl2 = np.array([(1, ), (2, ), (3, )], dtype=[('id', '<i8')])
        self.assertTrue(np.array_equal(sa, ctrl2))
        sa = conn.execute('SELECT * FROM employees')
        self.assertTrue(np.array_equal(sa, ctrl))
Beispiel #4
0
    def test_connect_sql(self):
        conn_str = 'sqlite:///{}'.format(utils_for_tests.path_of_data('small.db'))
        conn = read.connect_sql(conn_str)
        sa = conn.execute('SELECT * FROM employees')
        ctrl = np.array([(1, u'Arthur', u'King', 40000.0, 2.1, 10),
                         (2, u'Jones', u'James', 1000000.0, 1.9, 2),
                         (3, u'The Moabite', u'Ruth', 50000.0, 1.8, 6)],
                        dtype=[('id', '<i8'), ('last_name', 'O'), 
                               ('first_name', 'O'), 
                               ('salary', '<f8'), ('height', '<f8'), 
                               ('usefulness', '<i8')])
        self.assertTrue(np.array_equal(sa, ctrl))

        conn = read.connect_sql(conn_str, allow_caching=True)
        sa = conn.execute('SELECT * FROM employees')
        self.assertTrue(np.array_equal(sa, ctrl))
        sa = conn.execute('SELECT id FROM employees')
        ctrl2 = np.array([(1,), (2,), (3,)], dtype=[('id', '<i8')])
        self.assertTrue(np.array_equal(sa, ctrl2))
        sa = conn.execute('SELECT * FROM employees')
        self.assertTrue(np.array_equal(sa, ctrl))
    def get_rg_from_csv(
        self,
        csv_file_path,
        parse_datetimes=[],
        unit_id_col=None,
        start_time_col=None,
        stop_time_col=None,
        feature_col=None,
        val_col=None,
    ):
        """ Get an RG-formatted table from a CSV file.
       
        Parameters
        ----------
        csv_file_path : str
            Path of the csv file to import table from

        parse_datetimes : list of col names
            Columns that should be interpreted as datetimes

        unit_id_col : str or None
            The name of the column containing unique unit IDs. For example,
            in Table 1, this is 'student_id'. If None, ArrayEmitter will
            pick the first otherwise unspecified column

        start_time_col : str or None
            The name of the column containing start time. In Table 1,
            this is 'start_year'. If None, ArrayEmitter will pick the second
            otherwise unspecified column.

        end_time_col : str or None
            The name of the column containing the stop time. In Table 1,
            this is 'end_year'. If None, ArrayEmitter will pick the third
            otherwise unspecified column.

        feature_col : str or None
            The name of the column containing the feature name. In Table 1,
            this is 'feature'. If None, ArrayEmitter will pick the fourth
            otherwise unspecified column.

        val_col : str or None
            The name of the column containing the value for the given
            feature for the given user at the given time. In Table 1,
            this is 'value'. If None, ArrayEmitter will pick the fifth
            otherwise unspecified column.

        Returns
        -------
        ArrayGenerator
            Copy of this ArrayGenerator which has rg_table specified

        Examples
        --------
            
        >>> ae = ArrayEmitter()
        >>> ae = ae.get_rg_from_csv('table_1.csv')             
        """
        # in-memory db
        cp = self.__copy()
        conn = connect_sql("sqlite://")
        cp.__rg_table_name = utils.csv_to_sql(conn, csv_file_path, parse_datetimes=parse_datetimes)
        cp.__conn = conn
        cp.__col_specs["unit_id"] = unit_id_col
        cp.__col_specs["start_time"] = start_time_col
        cp.__col_specs["stop_time"] = stop_time_col
        cp.__col_specs["feature"] = feature_col
        cp.__col_specs["val"] = val_col
        cp.__resolve_cols()
        # SQLite doesn't really have datetimes, so we transparently translate
        # to unix times.
        cp.__convert_to_unix_time = True
        return cp
    def get_rg_from_sql(
        self,
        conn_str,
        table_name,
        unit_id_col=None,
        start_time_col=None,
        stop_time_col=None,
        feature_col=None,
        val_col=None,
    ):
        """ Gets an RG-formatted matrix from a CSV file
           
        Parameters
        ----------
        conn_str : str
            SQLAlchemy connection string to connect to the database and run
            the query. 

        table_name : str
            The name of the RG-formatted table in the database


        unit_id_col : str or None
            The name of the column containing unique unit IDs. For example,
            in Table 1, this is 'student_id'. If None, ArrayEmitter will
            pick the first otherwise unspecified column

        start_time_col : str or None
            The name of the column containing start time. In Table 1,
            this is 'start_year'. If None, ArrayEmitter will pick the second
            otherwise unspecified column.

        end_time_col : str or None
            The name of the column containing the stop time. In Table 1,
            this is 'end_year'. If None, ArrayEmitter will pick the third
            otherwise unspecified column.

        feature_col : str or None
            The name of the column containing the feature name. In Table 1,
            this is 'feature'. If None, ArrayEmitter will pick the fourth
            otherwise unspecified column.

        val_col : str or None
            The name of the column containing the value for the given
            feature for the given user at the given time. In Table 1,
            this is 'value'. If None, ArrayEmitter will pick the fifth
            otherwise unspecified column.

        Returns
        -------
        ArrayGenerator
            Copy of this ArrayGenerator which has rg_table specified
            
        Examples
        --------
        >>> conn_str = ...
        >>> ae = ArrayEmitter()
        >>> ae = ae.get_rg_from_SQL('SELECT * FROM table_1', 'student_id', 
        ...                         conn_str=conn_str)

        """
        cp = self.__copy()
        cp.__conn = connect_sql(conn_str, allow_pgres_copy_optimization=True)
        cp.__rg_table_name = table_name
        cp.__col_specs["unit_id"] = unit_id_col
        cp.__col_specs["start_time"] = start_time_col
        cp.__col_specs["stop_time"] = stop_time_col
        cp.__col_specs["feature"] = feature_col
        cp.__col_specs["val"] = val_col
        cp.__resolve_cols()
        return cp
Beispiel #7
0
from diogenes.array_emitter import M_to_rg
from diogenes.read import connect_sql

from settings import conn_str

to_table = 'vancouver._Z_test_rg'

conn = connect_sql(conn_str)
sql = 'DROP TABLE ' + to_table
conn.execute(sql)

M_to_rg(conn_str,
        'vancouver.test',
        'vancouver._Z_test_rg',
        'student_id',
        start_time_col='sat_date',
        feature_cols=('sat_score_math', 'sat_score_reading',
                      'sat_score_writing'))

M_to_rg(conn_str,
        'vancouver.test',
        'vancouver._Z_test_rg',
        'student_id',
        start_time_col='act_date',
        feature_cols=('act_score_composite', 'act_score_english',
                      'act_score_math', 'act_score_reading',
                      'act_score_science'))

M_to_rg(conn_str,
        'vancouver.student',
        'vancouver._Z_test_rg',
Beispiel #8
0
from diogenes.array_emitter import M_to_rg
from diogenes.read import connect_sql

from settings import conn_str

to_table = 'vancouver._Z_test_rg'

conn = connect_sql(conn_str)
sql = 'DROP TABLE ' + to_table
conn.execute(sql)

M_to_rg(
        conn_str, 
        'vancouver.test', 
        'vancouver._Z_test_rg', 
        'student_id', 
        start_time_col='sat_date', 
        feature_cols=('sat_score_math', 'sat_score_reading', 'sat_score_writing'))

M_to_rg(
        conn_str, 
        'vancouver.test', 
        'vancouver._Z_test_rg', 
        'student_id', 
        start_time_col='act_date', 
        feature_cols=('act_score_composite', 'act_score_english', 'act_score_math', 'act_score_reading', 'act_score_science'))

M_to_rg(
        conn_str, 
        'vancouver.student', 
        'vancouver._Z_test_rg',