Example #1
0
def mock_data():
    """Create mock tables temporarily for testing purposes."""

    src_dirname = os.path.join(mkdtemp(), '')  # ensure path ends with /
    dest_dirname = mkdtemp()

    os.mkdir(os.path.join(src_dirname, 'test_subfolder'))

    with psycopg2.connect(port='5432', host='localhost', **db_args) as conn:
        table_id = 'log_sensor_file'
        column_names = ['log_sensor_file_id', 'sensor_file_path', 'another_column']
        dtypes = ['VARCHAR (255)', 'text[]', 'VARCHAR (255)']
        drop_table(table_id, conn)
        create_table(table_id, conn, column_names, dtypes,
                     primary_key='log_sensor_file_id')

        table_id = 'log_file'
        column_names = ['operation_id', 'log_sensor_file_id', 'src_dirname',
                        'dest_dirname', 'fname', 'time_verified',
                        'rsync_operation', 'is_deleted', 'is_finished']
        dtypes = ['SERIAL', 'text', 'text',
                  'text', 'text', 'timestamp', 'text',
                  'boolean', 'boolean']
        drop_table(table_id, conn)
        create_table(table_id, conn, column_names, dtypes,
                     primary_key='operation_id',
                     foreign_key={'log_sensor_file_id': 'log_sensor_file'})

    yield src_dirname, dest_dirname

    # cleanup
    shutil.rmtree(src_dirname)
    shutil.rmtree(dest_dirname)
def test_compound_primary_key():
    """Test compound primary key."""
    conn = psycopg2.connect(connect_str)

    table_id = 'test'
    drop_table(table_id, conn)

    column_names = ['subject_id', 'first_name_birth', 'last_name_birth', 'Age']
    dtypes = ['VARCHAR (255)', 'VARCHAR (255)', 'VARCHAR (255)', 'INTEGER']
    primary_key = ['first_name_birth', 'last_name_birth']
    table_subject = create_table(table_id,
                                 conn=conn,
                                 column_names=column_names,
                                 dtypes=dtypes,
                                 primary_key=primary_key)
    assert set(table_subject.primary_key) == set(primary_key)
def test_delete():
    """Test deleting rows"""

    conn = psycopg2.connect(connect_str)

    table_id = 'test'
    drop_table(table_id, conn)

    column_names = ['subject_id', 'first_name_birth', 'last_name_birth', 'Age']
    dtypes = ['VARCHAR (255)', 'VARCHAR (255)', 'VARCHAR (255)', 'INTEGER']
    table_subject = create_table(table_id,
                                 conn=conn,
                                 column_names=column_names,
                                 dtypes=dtypes)
    table_subject.insert_rows([('x5dc', 'mainak', 'jas', 21),
                               ('y5d3', 'anoopum', 'gupta', 25),
                               ('abcd', 'mayank', 'jas', 25)],
                              cols=column_names)
    table_subject.delete_row("first_name_birth LIKE 'ma%'")
    df = table_subject.query()
    assert len(df['first_name_birth']) == 1
# drop all tables

###############################################################################
# First, we will create a connection using ``psycopg2``.
connect_str = ("dbname='neurobooth' user='******' host='localhost' "
               "password='******'")

conn = psycopg2.connect(connect_str)

for table_id in ['consent', 'study', 'register', 'contact', 'clinical',
                 'subject', 'demograph', 'human_obs_log', 'observer',
                 'tech_obs_data', 'tech_obs_log', 'stimulus', 'sensor',
                 'instruction', 'human_obs_data', 'device', 'collection',
                 'sensor_file_log']:
    drop_table(table_id, conn)

###############################################################################
# Then comes our sql command to create the SQL tables. This comes from
# dbdesigner.
# 6/7/2021, 17:02:05
create_cmd = """
CREATE TABLE "consent" (
    "subject_id" VARCHAR(255) NOT NULL,
    "study_id" VARCHAR(255) NOT NULL,
    "staff_id" VARCHAR(255) NOT NULL,
    "application_id" VARCHAR(255) NOT NULL,
    "site_date" VARCHAR(255) NOT NULL,
    CONSTRAINT "consent_pk" PRIMARY KEY ("subject_id","study_id")
) WITH (
  OIDS=FALSE
def test_upsert():
    """Test upsert operation."""

    conn = psycopg2.connect(connect_str)

    table_id = 'test'
    drop_table(table_id, conn)

    column_names = [
        'subject_id', 'first_name_birth', 'last_name_birth', 'Age',
        'attributes'
    ]
    dtypes = [
        'VARCHAR (255)', 'VARCHAR (255)', 'VARCHAR (255)', 'INTEGER', 'JSONB'
    ]
    table_subject = create_table(table_id,
                                 conn=conn,
                                 column_names=column_names,
                                 dtypes=dtypes)
    table_subject.insert_rows([('x5dc', 'mainak', 'jas', 21, {
        'a': 1
    }), ('y5d3', 'anoopum', 'gupta', 25, {
        'b': 2
    }), ('abcd', 'mayank', 'jas', 25, {
        'a': 1
    })],
                              cols=column_names)
    table_subject.insert_rows([('x5dc', 'mainak_new', 'jas_new', 21, {
        'a': 1
    }), ('zzzz', 'deepak', 'singh', 32, {
        'd': 1
    })],
                              cols=column_names,
                              on_conflict='nothing')
    df = table_subject.query()
    assert 'x5dc' in df.index
    assert 'zzzz' in df.index
    assert df.loc['x5dc']['first_name_birth'] == 'mainak'  # not mainak_new

    df = table_subject.query(where="subject_id = 'x5dc'")
    assert 'x5dc' in df.index
    assert 'zzzz' not in df.index

    # smoke test include_columns
    df = table_subject.query(include_columns='first_name_birth',
                             where="subject_id = 'x5dc'")
    df = table_subject.query(include_columns=['first_name_birth'],
                             where="subject_id = 'x5dc'")

    df = query(conn,
               "SELECT first_name_birth FROM test where subject_id = 'x5dc' ",
               column_names='first_name_birth')

    table_subject.insert_rows([('x5dc', 'mainak', 'jazz', 32, {
        'a': 1
    })],
                              cols=column_names,
                              on_conflict='update')
    df = table_subject.query(where="subject_id = 'x5dc'")
    assert df.loc['x5dc']['last_name_birth'] == 'jazz'

    # create index for conflicts
    cmd = 'DROP INDEX IF EXISTS subject_identifier'
    execute(table_subject.conn, table_subject.cursor, cmd)
    cmd = (f'CREATE UNIQUE INDEX subject_identifier ON '
           f'{table_id} (first_name_birth, last_name_birth);')
    execute(table_subject.conn, table_subject.cursor, cmd)

    table_subject.insert_rows(
        [('x5de', 'mainak', 'jazz', 32, {
            'a': 1
        })],
        cols=column_names,
        on_conflict='update',
        conflict_cols=['first_name_birth', 'last_name_birth'],
        update_cols='subject_id')
    df = table_subject.query()
    assert 'x5de' in df.index
    table_subject.insert_rows(
        [('blah', 'mainak', 'jazz', 32, {
            'a': 1
        }), ('yyyy', 'mainak', 'jazz', 31, {
            'a': 1
        })],
        cols=column_names,
        on_conflict='update',
        conflict_cols=['first_name_birth', 'last_name_birth'],
        update_cols='subject_id',
        where='test."Age" = excluded."Age"')
    df = table_subject.query()
    assert 'yyyy' not in df.index
    assert 'blah' in df.index
def test_psql_connection():
    """Test that we can connect to the database"""

    conn = psycopg2.connect(connect_str)

    table_id = 'test'
    drop_table(table_id, conn)
    assert 'test' not in list_tables(conn)

    column_names = ['subject_id', 'first_name_birth', 'last_name_birth', 'Age']
    dtypes = ['VARCHAR (255)', 'VARCHAR (255)', 'VARCHAR (255)', 'INTEGER']
    table_subject = create_table(table_id,
                                 conn=conn,
                                 column_names=column_names,
                                 dtypes=dtypes)
    with pytest.raises(Exception, match='already exists'):
        table_subject = create_table(table_id,
                                     conn=conn,
                                     column_names=column_names,
                                     dtypes=dtypes)
    conn.rollback()  # XXX: needed for the error testing above
    table_subject.insert_rows([('x5dc', 'mainak', 'jas', 21),
                               ('y5d3', 'anoopum', 'gupta', 25)],
                              cols=column_names)
    with pytest.raises(ValueError, match='vals must be a list of tuple'):
        table_subject.insert_rows('blah', ['subject_id'])
    with pytest.raises(ValueError, match='entries in vals must be tuples'):
        table_subject.insert_rows(['blah'], ['subject_id'])
    with pytest.raises(ValueError, match='tuple length must match'):
        table_subject.insert_rows([('x5dc', 'mainak', 'jas')], ['subject_id'])

    assert sorted(table_subject.data_types) == sorted(dtypes)
    table_subject.close()

    table_test = Table('test', conn)
    assert table_test.primary_key == ['subject_id']

    # test updating row
    table_test.update_row('y5d3', ('blah', 'anupum', 'gupta', 32),
                          cols=column_names)
    df = table_test.query()
    assert 'blah' in df.index

    with pytest.raises(ValueError, match='vals must be a tuple'):
        table_test.update_row('blah', 'mainak', ['first_name_birth'])

    # test updating row partially
    table_test.update_row('blah', ('mainak', ), ['first_name_birth'])
    df = table_test.query()
    assert df.loc['blah']['first_name_birth'] == 'mainak'

    with pytest.raises(ValueError, match='column blah is not present'):
        table_test.update_row('blah', ('mainak', ), ['blah'])

    with pytest.raises(ValueError, match='length of vals'):
        table_test.update_row('blah', ('mainak', ),
                              ['first_name_birth', 'last_name_birth'])

    # test dropping a column
    table_test.drop_column(col='subject_id')
    assert 'subject_id' not in table_test.column_names

    # test adding an auto-incrementing default value to the column
    table_test.add_column(col='subject_id', dtype='VARCHAR')
    table_test.alter_column(col='subject_id', default=dict(prefix='SUBJ'))
    pk_val = table_test.insert_rows(
        [('mainak', 'jas', 21)],
        cols=['first_name_birth', 'last_name_birth', 'Age'])
    assert pk_val == 'SUBJ1'
    df = table_test.query()
    assert 'SUBJ1' in df.index

    # test insertion of date
    table_id = 'test_consent'
    drop_table(table_id, conn)

    column_names = ['subject_id', 'consent_date']
    dtypes = ['VARCHAR (255)', 'date']
    table_consent = create_table(table_id, conn, column_names, dtypes)
    date = datetime.datetime.today().strftime('%Y-%m-%d')
    table_consent.insert_rows([('x5dc', date)], cols=column_names)

    conn.close()
Example #7
0
                          'feature_of_interest', 'question', 'response_array']
)

with OptionalSSHTunnelForwarder(**ssh_args) as tunnel:
    with psycopg2.connect(port=tunnel.local_bind_port,
                          host=tunnel.local_bind_host, **db_args) as conn:

        table_metadata = Table('rc_data_dictionary', conn)
        table_metadata.insert_rows(rows_metadata, cols_metadata,
                                   on_conflict='update')

        # for table_id, table_info in table_infos.items():
        for table_id, _ in survey_ids.items():
            table_info = table_infos[table_id]
            print(f'Overwriting table {table_id}')
            drop_table('rc_' + table_id, conn)
            table = create_table('rc_' + table_id, conn, table_info['columns'],
                                 table_info['dtypes'],
                                 primary_key=['subject_id', 'redcap_event_name'])
            df = fetch_survey(project, survey_name=table_id,
                              survey_id=survey_ids[table_id])
            df = df.rename(columns={'record_id': 'subject_id'})

            # XXX: not consistent.
            complete_col = [col for col in df.columns if
                            col.endswith('complete')]
            if len(complete_col) == 0:
                warn(f'Skipping {table_id} because of missing complete col')
                continue
            df = df[df[complete_col[0]] == 2]