def mock_data(): """Create mock tables temporarily for testing purposes.""" src_dirname = os.path.join(mkdtemp(), '') # ensure path ends with / dest_dirname = mkdtemp() os.mkdir(os.path.join(src_dirname, 'test_subfolder')) with psycopg2.connect(port='5432', host='localhost', **db_args) as conn: table_id = 'log_sensor_file' column_names = ['log_sensor_file_id', 'sensor_file_path', 'another_column'] dtypes = ['VARCHAR (255)', 'text[]', 'VARCHAR (255)'] drop_table(table_id, conn) create_table(table_id, conn, column_names, dtypes, primary_key='log_sensor_file_id') table_id = 'log_file' column_names = ['operation_id', 'log_sensor_file_id', 'src_dirname', 'dest_dirname', 'fname', 'time_verified', 'rsync_operation', 'is_deleted', 'is_finished'] dtypes = ['SERIAL', 'text', 'text', 'text', 'text', 'timestamp', 'text', 'boolean', 'boolean'] drop_table(table_id, conn) create_table(table_id, conn, column_names, dtypes, primary_key='operation_id', foreign_key={'log_sensor_file_id': 'log_sensor_file'}) yield src_dirname, dest_dirname # cleanup shutil.rmtree(src_dirname) shutil.rmtree(dest_dirname)
def test_compound_primary_key(): """Test compound primary key.""" conn = psycopg2.connect(connect_str) table_id = 'test' drop_table(table_id, conn) column_names = ['subject_id', 'first_name_birth', 'last_name_birth', 'Age'] dtypes = ['VARCHAR (255)', 'VARCHAR (255)', 'VARCHAR (255)', 'INTEGER'] primary_key = ['first_name_birth', 'last_name_birth'] table_subject = create_table(table_id, conn=conn, column_names=column_names, dtypes=dtypes, primary_key=primary_key) assert set(table_subject.primary_key) == set(primary_key)
def test_delete(): """Test deleting rows""" conn = psycopg2.connect(connect_str) table_id = 'test' drop_table(table_id, conn) column_names = ['subject_id', 'first_name_birth', 'last_name_birth', 'Age'] dtypes = ['VARCHAR (255)', 'VARCHAR (255)', 'VARCHAR (255)', 'INTEGER'] table_subject = create_table(table_id, conn=conn, column_names=column_names, dtypes=dtypes) table_subject.insert_rows([('x5dc', 'mainak', 'jas', 21), ('y5d3', 'anoopum', 'gupta', 25), ('abcd', 'mayank', 'jas', 25)], cols=column_names) table_subject.delete_row("first_name_birth LIKE 'ma%'") df = table_subject.query() assert len(df['first_name_birth']) == 1
# drop all tables ############################################################################### # First, we will create a connection using ``psycopg2``. connect_str = ("dbname='neurobooth' user='******' host='localhost' " "password='******'") conn = psycopg2.connect(connect_str) for table_id in ['consent', 'study', 'register', 'contact', 'clinical', 'subject', 'demograph', 'human_obs_log', 'observer', 'tech_obs_data', 'tech_obs_log', 'stimulus', 'sensor', 'instruction', 'human_obs_data', 'device', 'collection', 'sensor_file_log']: drop_table(table_id, conn) ############################################################################### # Then comes our sql command to create the SQL tables. This comes from # dbdesigner. # 6/7/2021, 17:02:05 create_cmd = """ CREATE TABLE "consent" ( "subject_id" VARCHAR(255) NOT NULL, "study_id" VARCHAR(255) NOT NULL, "staff_id" VARCHAR(255) NOT NULL, "application_id" VARCHAR(255) NOT NULL, "site_date" VARCHAR(255) NOT NULL, CONSTRAINT "consent_pk" PRIMARY KEY ("subject_id","study_id") ) WITH ( OIDS=FALSE
def test_upsert(): """Test upsert operation.""" conn = psycopg2.connect(connect_str) table_id = 'test' drop_table(table_id, conn) column_names = [ 'subject_id', 'first_name_birth', 'last_name_birth', 'Age', 'attributes' ] dtypes = [ 'VARCHAR (255)', 'VARCHAR (255)', 'VARCHAR (255)', 'INTEGER', 'JSONB' ] table_subject = create_table(table_id, conn=conn, column_names=column_names, dtypes=dtypes) table_subject.insert_rows([('x5dc', 'mainak', 'jas', 21, { 'a': 1 }), ('y5d3', 'anoopum', 'gupta', 25, { 'b': 2 }), ('abcd', 'mayank', 'jas', 25, { 'a': 1 })], cols=column_names) table_subject.insert_rows([('x5dc', 'mainak_new', 'jas_new', 21, { 'a': 1 }), ('zzzz', 'deepak', 'singh', 32, { 'd': 1 })], cols=column_names, on_conflict='nothing') df = table_subject.query() assert 'x5dc' in df.index assert 'zzzz' in df.index assert df.loc['x5dc']['first_name_birth'] == 'mainak' # not mainak_new df = table_subject.query(where="subject_id = 'x5dc'") assert 'x5dc' in df.index assert 'zzzz' not in df.index # smoke test include_columns df = table_subject.query(include_columns='first_name_birth', where="subject_id = 'x5dc'") df = table_subject.query(include_columns=['first_name_birth'], where="subject_id = 'x5dc'") df = query(conn, "SELECT first_name_birth FROM test where subject_id = 'x5dc' ", column_names='first_name_birth') table_subject.insert_rows([('x5dc', 'mainak', 'jazz', 32, { 'a': 1 })], cols=column_names, on_conflict='update') df = table_subject.query(where="subject_id = 'x5dc'") assert df.loc['x5dc']['last_name_birth'] == 'jazz' # create index for conflicts cmd = 'DROP INDEX IF EXISTS subject_identifier' execute(table_subject.conn, table_subject.cursor, cmd) cmd = (f'CREATE UNIQUE INDEX subject_identifier ON ' f'{table_id} (first_name_birth, last_name_birth);') execute(table_subject.conn, table_subject.cursor, cmd) table_subject.insert_rows( [('x5de', 'mainak', 'jazz', 32, { 'a': 1 })], cols=column_names, on_conflict='update', conflict_cols=['first_name_birth', 'last_name_birth'], update_cols='subject_id') df = table_subject.query() assert 'x5de' in df.index table_subject.insert_rows( [('blah', 'mainak', 'jazz', 32, { 'a': 1 }), ('yyyy', 'mainak', 'jazz', 31, { 'a': 1 })], cols=column_names, on_conflict='update', conflict_cols=['first_name_birth', 'last_name_birth'], update_cols='subject_id', where='test."Age" = excluded."Age"') df = table_subject.query() assert 'yyyy' not in df.index assert 'blah' in df.index
def test_psql_connection(): """Test that we can connect to the database""" conn = psycopg2.connect(connect_str) table_id = 'test' drop_table(table_id, conn) assert 'test' not in list_tables(conn) column_names = ['subject_id', 'first_name_birth', 'last_name_birth', 'Age'] dtypes = ['VARCHAR (255)', 'VARCHAR (255)', 'VARCHAR (255)', 'INTEGER'] table_subject = create_table(table_id, conn=conn, column_names=column_names, dtypes=dtypes) with pytest.raises(Exception, match='already exists'): table_subject = create_table(table_id, conn=conn, column_names=column_names, dtypes=dtypes) conn.rollback() # XXX: needed for the error testing above table_subject.insert_rows([('x5dc', 'mainak', 'jas', 21), ('y5d3', 'anoopum', 'gupta', 25)], cols=column_names) with pytest.raises(ValueError, match='vals must be a list of tuple'): table_subject.insert_rows('blah', ['subject_id']) with pytest.raises(ValueError, match='entries in vals must be tuples'): table_subject.insert_rows(['blah'], ['subject_id']) with pytest.raises(ValueError, match='tuple length must match'): table_subject.insert_rows([('x5dc', 'mainak', 'jas')], ['subject_id']) assert sorted(table_subject.data_types) == sorted(dtypes) table_subject.close() table_test = Table('test', conn) assert table_test.primary_key == ['subject_id'] # test updating row table_test.update_row('y5d3', ('blah', 'anupum', 'gupta', 32), cols=column_names) df = table_test.query() assert 'blah' in df.index with pytest.raises(ValueError, match='vals must be a tuple'): table_test.update_row('blah', 'mainak', ['first_name_birth']) # test updating row partially table_test.update_row('blah', ('mainak', ), ['first_name_birth']) df = table_test.query() assert df.loc['blah']['first_name_birth'] == 'mainak' with pytest.raises(ValueError, match='column blah is not present'): table_test.update_row('blah', ('mainak', ), ['blah']) with pytest.raises(ValueError, match='length of vals'): table_test.update_row('blah', ('mainak', ), ['first_name_birth', 'last_name_birth']) # test dropping a column table_test.drop_column(col='subject_id') assert 'subject_id' not in table_test.column_names # test adding an auto-incrementing default value to the column table_test.add_column(col='subject_id', dtype='VARCHAR') table_test.alter_column(col='subject_id', default=dict(prefix='SUBJ')) pk_val = table_test.insert_rows( [('mainak', 'jas', 21)], cols=['first_name_birth', 'last_name_birth', 'Age']) assert pk_val == 'SUBJ1' df = table_test.query() assert 'SUBJ1' in df.index # test insertion of date table_id = 'test_consent' drop_table(table_id, conn) column_names = ['subject_id', 'consent_date'] dtypes = ['VARCHAR (255)', 'date'] table_consent = create_table(table_id, conn, column_names, dtypes) date = datetime.datetime.today().strftime('%Y-%m-%d') table_consent.insert_rows([('x5dc', date)], cols=column_names) conn.close()
'feature_of_interest', 'question', 'response_array'] ) with OptionalSSHTunnelForwarder(**ssh_args) as tunnel: with psycopg2.connect(port=tunnel.local_bind_port, host=tunnel.local_bind_host, **db_args) as conn: table_metadata = Table('rc_data_dictionary', conn) table_metadata.insert_rows(rows_metadata, cols_metadata, on_conflict='update') # for table_id, table_info in table_infos.items(): for table_id, _ in survey_ids.items(): table_info = table_infos[table_id] print(f'Overwriting table {table_id}') drop_table('rc_' + table_id, conn) table = create_table('rc_' + table_id, conn, table_info['columns'], table_info['dtypes'], primary_key=['subject_id', 'redcap_event_name']) df = fetch_survey(project, survey_name=table_id, survey_id=survey_ids[table_id]) df = df.rename(columns={'record_id': 'subject_id'}) # XXX: not consistent. complete_col = [col for col in df.columns if col.endswith('complete')] if len(complete_col) == 0: warn(f'Skipping {table_id} because of missing complete col') continue df = df[df[complete_col[0]] == 2]