Beispiel #1
0
def _create_file_meta_for_s3(postgres_url, s3_client, tmp_file):
    utils.create_tables(url=postgres_url)
    bucket_name = BUCKET_NAME
    s3_client.create_bucket(bucket_name, delete_contents_if_exists=True)

    # create file and upload
    filename = os.path.basename(tmp_file)
    project_id = "22"
    project_name = "battlestar"
    node_name = "galactica"
    node_id = "1006"
    file_name = filename
    file_uuid = os.path.join(str(project_id), str(node_id), str(file_name))
    display_name = os.path.join(str(project_name), str(node_name),
                                str(file_name))
    created_at = str(datetime.datetime.now())
    file_size = 1234

    d = {
        "object_name": os.path.join(str(project_id), str(node_id),
                                    str(file_name)),
        "bucket_name": bucket_name,
        "file_name": filename,
        "user_id": USER_ID,
        "user_name": "starbucks",
        "location": SIMCORE_S3_STR,
        "location_id": SIMCORE_S3_ID,
        "project_id": project_id,
        "project_name": project_name,
        "node_id": node_id,
        "node_name": node_name,
        "file_uuid": file_uuid,
        "file_id": file_uuid,
        "raw_file_path": file_uuid,
        "display_file_path": display_name,
        "created_at": created_at,
        "last_modified": created_at,
        "file_size": file_size,
    }

    fmd = FileMetaData(**d)

    return fmd
def main():

    config = configparser.ConfigParser()
    config.read('dwh.cfg')

    print("Connecting to Amazon Redshift...")
    conn = psycopg2.connect(
        "host={} dbname={} user={} password={} port={}".format(
            *config['CLUSTER'].values()))
    cur = conn.cursor()
    print("Successfully connected to Amazon Redshift")

    print("Dropping any existing tables...")
    drop_tables(cur, conn)

    print("Creating Tables...")
    create_tables(cur, conn)
    print("Done Creating Tables.")

    conn.close()
    print("Disconnected from Amazon Redshift.")
Beispiel #3
0
async def test_links_s3(postgres_service_url, s3_client, mock_files_factory, dsm_fixture):
    utils.create_tables(url=postgres_service_url)

    tmp_file = mock_files_factory(1)[0]
    fmd = _create_file_meta_for_s3(postgres_service_url, s3_client, tmp_file)

    dsm = dsm_fixture

    up_url = await dsm.upload_link(fmd.user_id, fmd.file_uuid)
    with io.open(tmp_file, 'rb') as fp:
        d = fp.read()
        req = urllib.request.Request(up_url, data=d, method='PUT')
        with urllib.request.urlopen(req) as _f:
            pass

    tmp_file2 = tmp_file + ".rec"
    user_id = 0
    down_url = await dsm.download_link(user_id, SIMCORE_S3_STR, fmd.file_uuid)

    urllib.request.urlretrieve(down_url, tmp_file2)

    assert filecmp.cmp(tmp_file2, tmp_file)
async def test_dsm_datcore_to_local(postgres_service_url, dsm_fixture,
                                    mock_files_factory,
                                    datcore_structured_testbucket):
    if not has_datcore_tokens():
        return
    utils.create_tables(url=postgres_service_url)
    dsm = dsm_fixture
    user_id = USER_ID
    data = await dsm.list_files(user_id=user_id,
                                location=DATCORE_STR,
                                uuid_filter=BUCKET_NAME)
    assert len(data)

    url, filename = await dsm.download_link_datcore(
        user_id, datcore_structured_testbucket["file_id1"])

    tmp_file = mock_files_factory(1)[0]
    tmp_file2 = tmp_file + ".fromdatcore"

    urllib.request.urlretrieve(url, tmp_file2)

    assert filecmp.cmp(tmp_file2, tmp_file)
Beispiel #5
0
async def test_dsm_datcore(postgres_service_url, dsm_fixture,
                           datcore_structured_testbucket):
    if not has_datcore_tokens():
        return

    utils.create_tables(url=postgres_service_url)
    dsm = dsm_fixture
    user_id = "0"
    data = await dsm.list_files(user_id=user_id,
                                location=DATCORE_STR,
                                uuid_filter=BUCKET_NAME)
    # the fixture creates two files
    assert len(data) == 3

    # delete the first one
    fmd_to_delete = data[0].fmd
    print("Deleting", fmd_to_delete.bucket_name, fmd_to_delete.object_name)
    await dsm.delete_file(user_id, DATCORE_STR, fmd_to_delete.file_id)

    data = await dsm.list_files(user_id=user_id,
                                location=DATCORE_STR,
                                uuid_filter=BUCKET_NAME)
    assert len(data) == 2
async def test_copy_s3_s3(postgres_service_url, s3_client, mock_files_factory,
                          dsm_fixture):
    utils.create_tables(url=postgres_service_url)

    tmp_file = mock_files_factory(1)[0]
    fmd = _create_file_meta_for_s3(postgres_service_url, s3_client, tmp_file)

    dsm = dsm_fixture
    data = await dsm.list_files(user_id=fmd.user_id, location=SIMCORE_S3_STR)
    assert len(data) == 0

    # upload the file
    up_url = await dsm.upload_link(fmd.user_id, fmd.file_uuid)
    with io.open(tmp_file, "rb") as fp:
        d = fp.read()
        req = urllib.request.Request(up_url, data=d, method="PUT")
        with urllib.request.urlopen(req) as _f:
            pass

    data = await dsm.list_files(user_id=fmd.user_id, location=SIMCORE_S3_STR)
    assert len(data) == 1

    from_uuid = fmd.file_uuid
    new_project = "zoology"
    to_uuid = os.path.join(new_project, fmd.node_id, fmd.file_name)
    await dsm.copy_file(
        user_id=fmd.user_id,
        dest_location=SIMCORE_S3_STR,
        dest_uuid=to_uuid,
        source_location=SIMCORE_S3_STR,
        source_uuid=from_uuid,
    )

    data = await dsm.list_files(user_id=fmd.user_id, location=SIMCORE_S3_STR)

    assert len(data) == 2
def dsm_mockup_db(postgres_service_url, s3_client, mock_files_factory):
    # db
    utils.create_tables(url=postgres_service_url)

    # s3 client
    bucket_name = BUCKET_NAME
    s3_client.create_bucket(bucket_name, delete_contents_if_exists=True)

    # TODO: use pip install Faker
    users = ["alice", "bob", "chuck", "dennis"]

    projects = [
        "astronomy",
        "biology",
        "chemistry",
        "dermatology",
        "economics",
        "futurology",
        "geology",
    ]
    location = SIMCORE_S3_STR

    nodes = ["alpha", "beta", "gamma", "delta"]

    N = 100
    files = mock_files_factory(count=N)
    counter = 0
    data = {}
    for _file in files:
        idx = randrange(len(users))
        user_name = users[idx]
        user_id = idx + 10
        idx = randrange(len(projects))
        project_name = projects[idx]
        project_id = idx + 100
        idx = randrange(len(nodes))
        node = nodes[idx]
        node_id = idx + 10000
        file_name = str(counter)
        object_name = Path(str(project_id), str(node_id), str(counter)).as_posix()
        file_uuid = Path(object_name).as_posix()
        raw_file_path = file_uuid
        display_file_path = str(Path(project_name) / Path(node) / Path(file_name))
        created_at = str(datetime.datetime.now())
        file_size = 1234
        assert s3_client.upload_file(bucket_name, object_name, _file)

        d = {
            "file_uuid": file_uuid,
            "location_id": "0",
            "location": location,
            "bucket_name": bucket_name,
            "object_name": object_name,
            "project_id": str(project_id),
            "project_name": project_name,
            "node_id": str(node_id),
            "node_name": node,
            "file_name": file_name,
            "user_id": str(user_id),
            "user_name": user_name,
            "file_id": str(uuid.uuid4()),
            "raw_file_path": file_uuid,
            "display_file_path": display_file_path,
            "created_at": created_at,
            "last_modified": created_at,
            "file_size": file_size,
        }

        counter = counter + 1

        data[object_name] = FileMetaData(**d)

        # pylint: disable=no-member
        utils.insert_metadata(postgres_service_url, data[object_name])

    total_count = 0
    for _obj in s3_client.list_objects_v2(bucket_name, recursive=True):
        total_count = total_count + 1

    assert total_count == N
    yield data

    # s3 client
    s3_client.remove_bucket(bucket_name, delete_contents=True)

    # db
    utils.drop_tables(url=postgres_service_url)
Beispiel #8
0
from svm_line_classification.svm_predict_lines import svm_predict_lines
from dl_line_classification.rnn_predict_lines import rnn_predict_lines, LineClassifier
from dl_line_classification.data_generation import DataGenerator
from dl_line_classification.train import train_dl_classification_model
from info_extraction.extraction import extract_line_information
from utils import create_tables

parser = argparse.ArgumentParser(description='')
parser.add_argument('db_filepath',
                    type=str,
                    help="Specify database file to predict lines")
args = parser.parse_args()

cnx = sqlite3.connect(args.db_filepath)
cur = cnx.cursor()
create_tables(cnx)

# Indexes of accessible conferences to process
PROCESS_LINES = False
GENERATE_VOCAB = False
TRAIN_LINE_CLASSIFIER = True
PREDICT_LINES_DL = False
EXTRACT_INFO = False
CONF_IDS = [i for i in range(1, 200)]
""" Process Lines
- Processes HTML of each page to lines, ordered by conference_id
"""
if PROCESS_LINES:
    add_page_lines(cnx, CONF_IDS)
    cnx.commit()
""" Generate Vocab
Beispiel #9
0
 def test_creating_a_table_succeeds(self, init_db):
     create_tables(init_db)
Beispiel #10
0
def migrate():
    create_tables(db)
Beispiel #11
0
def create_tables():
    """Use PeeWee to create tables."""
    utils.create_tables()
Beispiel #12
0
def test_table_creation(postgres_service):
    utils.create_tables(url=postgres_service)
    a = 12
Beispiel #13
0
def dsm_mockup_db(postgres_service_url, s3_client, mock_files_factory):
    # db
    utils.create_tables(url=postgres_service_url)

    # s3 client
    bucket_name = BUCKET_NAME
    s3_client.create_bucket(bucket_name, delete_contents_if_exists=True)

    # TODO: use pip install Faker
    users = ['alice', 'bob', 'chuck', 'dennis']

    projects = [
        'astronomy', 'biology', 'chemistry', 'dermatology', 'economics',
        'futurology', 'geology'
    ]
    location = SIMCORE_S3_STR

    nodes = ['alpha', 'beta', 'gamma', 'delta']

    N = 100
    files = mock_files_factory(count=N)
    counter = 0
    data = {}
    for _file in files:
        idx = randrange(len(users))
        user_name = users[idx]
        user_id = idx + 10
        idx = randrange(len(projects))
        project_name = projects[idx]
        project_id = idx + 100
        idx = randrange(len(nodes))
        node = nodes[idx]
        node_id = idx + 10000
        file_name = str(counter)
        object_name = Path(str(project_id), str(node_id),
                           str(counter)).as_posix()
        file_uuid = Path(object_name).as_posix()

        assert s3_client.upload_file(bucket_name, object_name, _file)

        d = {
            'file_uuid': file_uuid,
            'location_id': "0",
            'location': location,
            'bucket_name': bucket_name,
            'object_name': object_name,
            'project_id': str(project_id),
            'project_name': project_name,
            'node_id': str(node_id),
            'node_name': node,
            'file_name': file_name,
            'user_id': str(user_id),
            'user_name': user_name
        }

        counter = counter + 1

        data[object_name] = FileMetaData(**d)

        # pylint: disable=no-member
        utils.insert_metadata(postgres_service_url, data[object_name])

    total_count = 0
    for _obj in s3_client.list_objects_v2(bucket_name, recursive=True):
        total_count = total_count + 1

    assert total_count == N
    yield data

    # s3 client
    s3_client.remove_bucket(bucket_name, delete_contents=True)

    # db
    utils.drop_tables(url=postgres_service_url)
Beispiel #14
0
 def setUp(self):
     utils.create_tables()
     self.data = {
         "url": "https://ddg.gg/",
         "code": utils.gen_code(),
     }
Beispiel #15
0
cache_proxy = CacheProxy(mc)
app.extensions = getattr(app, 'extensions', {})
app.extensions['cache_proxy'] = cache_proxy
app.config.from_object(DevConfig)

# db
db = Database(app)
auth = Auth(app, db)
# toolbar = DebugToolbarExtension(app)

mail = Mail(app)

import models

import utils
utils.create_tables()
utils.init_admin_user()

import views
app.jinja_env.globals['static'] = (
    lambda filename: url_for('static', filename=filename))

# Blueprint for social login
# from social_login import qq_bp, weibo_bp, wechat_bp
# app.register_blueprint(qq_bp)
# app.register_blueprint(weibo_bp)
# app.register_blueprint(wechat_bp)

# configure for api (bluepint for api)
api_bp = Blueprint('api', __name__, url_prefix="/api")
api = Api(api_bp, default_mediatype='application/json')
def get_db_session():
    engine = db_connect()
    create_tables(engine)
    Session = sessionmaker(bind=engine)
    session = Session()
    return session