コード例 #1
0
def test_copy(mock_data):
    """Test copy of files."""
    src_dirname, dest_dirname = mock_data
    with psycopg2.connect(port='5432', host='localhost', **db_args) as conn:
        db_table = Table('log_file', conn)
        sensor_file_table = Table('log_sensor_file', conn)
        db_rows = copy_files(src_dirname, dest_dirname, db_table,
                             sensor_file_table)
コード例 #2
0
def test_delete(mock_data):
    """Test deleting files."""
    src_dirname, dest_dirname = mock_data
    with psycopg2.connect(port='5432', host='localhost', **db_args) as conn:
        db_table = Table('log_file', conn)
        delete_files(db_table, target_dir=src_dirname,
                     suitable_dest_dir=dest_dirname,
                     threshold=0.1, older_than=-1)
        df = db_table.query(where='is_deleted=True')
        assert df.src_dirname.isna().all()  # directly written not copied
        assert (df.dest_dirname == src_dirname).all()
コード例 #3
0
def test_write(mock_data):
    """Test writing files."""
    src_dirname, _ = mock_data

    with psycopg2.connect(port='5432', host='localhost', **db_args) as conn:
        sensor_file_table = Table('log_sensor_file', conn)
        db_table = Table('log_file', conn)

        for id in range(5):
            with NamedTemporaryFile(dir=src_dirname, delete=False) as fp:
                fp.write(b'Hello world!')
                dest_dir, fname = os.path.split(fp.name)
                sensor_file_table.insert_rows([(f'sensor_file_{id}', [fname])],
                                              cols=['log_sensor_file_id', 'sensor_file_path'])

        write_files(sensor_file_table, db_table, dest_dir)

        # Write file to a subfolder
        dest_dir2 = os.path.join(src_dirname, 'test_subfolder')
        with NamedTemporaryFile(dir=dest_dir2, delete=False) as fp:
            fp.write(b'test')
            _, fname = os.path.split(fp.name)
            fname = os.path.join('test_subfolder', fname)
            sensor_file_table.insert_rows([(f'sensor_file{id + 1}', [fname])],
                                          cols=['log_sensor_file_id', 'sensor_file_path'])
        write_files(sensor_file_table, db_table, dest_dir)
        # no error should be thrown if no new files to write
        write_files(sensor_file_table, db_table, dest_dir)
コード例 #4
0
    schemaname != 'information_schema';
"""

tables = query(query_tables_cmd)
table_ids = [table[1] for table in tables]

#### Create graph

A = pgv.AGraph(directed=True,
               repulsiveforce=10.0,
               overlap=False,
               splines='curved')

# Add nodes
for table_id in table_ids:
    table = Table(table_id, conn)

    rows = []
    rows.append(f'<th><td bgcolor="lightsalmon"><b>{table_id}</b></td></th>')
    for column_name in table.column_names:
        rows.append(f''
                    '<tr>'
                    f'<td port="{column_name}">{column_name}</td>'
                    '</tr>')
    label = "<<table border='0' cellborder='1' cellspacing='0' cellpadding='4'>"
    label += '\n'.join(rows)
    label += "</table>>"
    A.add_node(table_id, shape='plaintext', label=label)

# Add edges
for table_id in table_ids:
コード例 #5
0
from neurobooth_terra import Table, create_table
from neurobooth_terra.fixes import OptionalSSHTunnelForwarder
from neurobooth_terra.dataflow import copy_files

from config import ssh_args, db_args

src_dir = '/autofs/nas/neurobooth/data_test/'
dest_dir = '/autofs/nas/neurobooth/data_test_backup/'

with OptionalSSHTunnelForwarder(**ssh_args) as tunnel:
    with psycopg2.connect(port=tunnel.local_bind_port,
                          host=tunnel.local_bind_host,
                          **db_args) as conn:

        sensor_file_table = Table('log_sensor_file', conn)
        db_table = Table('log_file', conn)
        copy_files(src_dir, dest_dir, db_table, sensor_file_table)

# xdf and csv are not represented, other files did not get written due to
# non-graceful exists.
# Possible ways to handling retrospective entries added to log_sensor_file:
# 1. Do not affect write_files assuming files still exist on NAS.
#    It will simply copy over the new entries to log_file.
# 2. However, retrospective entries should be added for copy operations.
#    For example, if the data was copied from NAS to DrWho, the subsequent
#    rsync operation will not produce an output for that file and hence
#    it will not be added automatically to log_file.
# 3. For deletion, there are two options:
#    a. Retrospectively adding the data_verified by looking at file header (?)
#    b. Use date_verified as current date and let the deletion happen x days
コード例 #6
0
from neurobooth_terra.fixes import OptionalSSHTunnelForwarder
from config import ssh_args, db_args

from neurobooth_terra import Table


def sanitize_date(s):
    return s[0].strftime('%Y-%m-%d')


with OptionalSSHTunnelForwarder(**ssh_args) as tunnel:
    with psycopg2.connect(port=tunnel.local_bind_port,
                          host=tunnel.local_bind_host,
                          **db_args) as conn:

        table_task = Table('log_task', conn)
        table_session = Table('log_session', conn)

        task_df = table_task.query(
            where='log_session_id IS NULL AND date_times IS NOT NULL')
        session_df = table_session.query().reset_index()

        task_df['date_times'] = task_df['date_times'].apply(sanitize_date)
        task_groups = task_df.groupby(by=['subject_id', 'date_times'])

        log_session_id = int(session_df.log_session_id.max() + 1)
        for group, df in task_groups:
            table_session.insert_rows(cols=[
                'log_session_id', 'subject_id', 'date', 'application_id'
            ],
                                      vals=[(log_session_id, group[0],
コード例 #7
0
# don't run rsync on weekend
# run this file on weekend.

if dry_run:
    stats = shutil.disk_usage(target_dir)
    threshold = stats.used / stats.total - 0.1  # ensure that it deletes
    older_than = 1
else:
    threshold = 0.9
    older_than = 30

with OptionalSSHTunnelForwarder(**ssh_args) as tunnel:
    with psycopg2.connect(port=tunnel.local_bind_port,
                          host=tunnel.local_bind_host,
                          **db_args) as conn:

        if dry_run:
            copy_table(src_table_id=table_id,
                       target_table_id=table_id + '_copy',
                       conn=conn)
            db_table = Table(table_id + '_copy', conn)
        else:
            db_table = Table(table_id, conn)

        delete_files(db_table,
                     target_dir,
                     suitable_dest_dir,
                     threshold=threshold,
                     older_than=older_than,
                     dry_run=dry_run)
コード例 #8
0
def test_psql_connection():
    """Test that we can connect to the database"""

    conn = psycopg2.connect(connect_str)

    table_id = 'test'
    drop_table(table_id, conn)
    assert 'test' not in list_tables(conn)

    column_names = ['subject_id', 'first_name_birth', 'last_name_birth', 'Age']
    dtypes = ['VARCHAR (255)', 'VARCHAR (255)', 'VARCHAR (255)', 'INTEGER']
    table_subject = create_table(table_id,
                                 conn=conn,
                                 column_names=column_names,
                                 dtypes=dtypes)
    with pytest.raises(Exception, match='already exists'):
        table_subject = create_table(table_id,
                                     conn=conn,
                                     column_names=column_names,
                                     dtypes=dtypes)
    conn.rollback()  # XXX: needed for the error testing above
    table_subject.insert_rows([('x5dc', 'mainak', 'jas', 21),
                               ('y5d3', 'anoopum', 'gupta', 25)],
                              cols=column_names)
    with pytest.raises(ValueError, match='vals must be a list of tuple'):
        table_subject.insert_rows('blah', ['subject_id'])
    with pytest.raises(ValueError, match='entries in vals must be tuples'):
        table_subject.insert_rows(['blah'], ['subject_id'])
    with pytest.raises(ValueError, match='tuple length must match'):
        table_subject.insert_rows([('x5dc', 'mainak', 'jas')], ['subject_id'])

    assert sorted(table_subject.data_types) == sorted(dtypes)
    table_subject.close()

    table_test = Table('test', conn)
    assert table_test.primary_key == ['subject_id']

    # test updating row
    table_test.update_row('y5d3', ('blah', 'anupum', 'gupta', 32),
                          cols=column_names)
    df = table_test.query()
    assert 'blah' in df.index

    with pytest.raises(ValueError, match='vals must be a tuple'):
        table_test.update_row('blah', 'mainak', ['first_name_birth'])

    # test updating row partially
    table_test.update_row('blah', ('mainak', ), ['first_name_birth'])
    df = table_test.query()
    assert df.loc['blah']['first_name_birth'] == 'mainak'

    with pytest.raises(ValueError, match='column blah is not present'):
        table_test.update_row('blah', ('mainak', ), ['blah'])

    with pytest.raises(ValueError, match='length of vals'):
        table_test.update_row('blah', ('mainak', ),
                              ['first_name_birth', 'last_name_birth'])

    # test dropping a column
    table_test.drop_column(col='subject_id')
    assert 'subject_id' not in table_test.column_names

    # test adding an auto-incrementing default value to the column
    table_test.add_column(col='subject_id', dtype='VARCHAR')
    table_test.alter_column(col='subject_id', default=dict(prefix='SUBJ'))
    pk_val = table_test.insert_rows(
        [('mainak', 'jas', 21)],
        cols=['first_name_birth', 'last_name_birth', 'Age'])
    assert pk_val == 'SUBJ1'
    df = table_test.query()
    assert 'SUBJ1' in df.index

    # test insertion of date
    table_id = 'test_consent'
    drop_table(table_id, conn)

    column_names = ['subject_id', 'consent_date']
    dtypes = ['VARCHAR (255)', 'date']
    table_consent = create_table(table_id, conn, column_names, dtypes)
    date = datetime.datetime.today().strftime('%Y-%m-%d')
    table_consent.insert_rows([('x5dc', date)], cols=column_names)

    conn.close()
コード例 #9
0
metadata.to_csv('data_dictionary_modified.csv')

table_infos = get_tables_structure(metadata, include_surveys=survey_ids.keys())

metadata = metadata.reset_index()
rows_metadata, cols_metadata = dataframe_to_tuple(
    metadata, df_columns=['field_name', 'redcap_form_name',
                          'database_table_name', 'redcap_form_description',
                          'feature_of_interest', 'question', 'response_array']
)

with OptionalSSHTunnelForwarder(**ssh_args) as tunnel:
    with psycopg2.connect(port=tunnel.local_bind_port,
                          host=tunnel.local_bind_host, **db_args) as conn:

        table_metadata = Table('rc_data_dictionary', conn)
        table_metadata.insert_rows(rows_metadata, cols_metadata,
                                   on_conflict='update')

        # for table_id, table_info in table_infos.items():
        for table_id, _ in survey_ids.items():
            table_info = table_infos[table_id]
            print(f'Overwriting table {table_id}')
            drop_table('rc_' + table_id, conn)
            table = create_table('rc_' + table_id, conn, table_info['columns'],
                                 table_info['dtypes'],
                                 primary_key=['subject_id', 'redcap_event_name'])
            df = fetch_survey(project, survey_name=table_id,
                              survey_id=survey_ids[table_id])
            df = df.rename(columns={'record_id': 'subject_id'})