def _create_file_meta_for_s3(postgres_url, s3_client, tmp_file): utils.create_tables(url=postgres_url) bucket_name = BUCKET_NAME s3_client.create_bucket(bucket_name, delete_contents_if_exists=True) # create file and upload filename = os.path.basename(tmp_file) project_id = "22" project_name = "battlestar" node_name = "galactica" node_id = "1006" file_name = filename file_uuid = os.path.join(str(project_id), str(node_id), str(file_name)) display_name = os.path.join(str(project_name), str(node_name), str(file_name)) created_at = str(datetime.datetime.now()) file_size = 1234 d = { "object_name": os.path.join(str(project_id), str(node_id), str(file_name)), "bucket_name": bucket_name, "file_name": filename, "user_id": USER_ID, "user_name": "starbucks", "location": SIMCORE_S3_STR, "location_id": SIMCORE_S3_ID, "project_id": project_id, "project_name": project_name, "node_id": node_id, "node_name": node_name, "file_uuid": file_uuid, "file_id": file_uuid, "raw_file_path": file_uuid, "display_file_path": display_name, "created_at": created_at, "last_modified": created_at, "file_size": file_size, } fmd = FileMetaData(**d) return fmd
def main(): config = configparser.ConfigParser() config.read('dwh.cfg') print("Connecting to Amazon Redshift...") conn = psycopg2.connect( "host={} dbname={} user={} password={} port={}".format( *config['CLUSTER'].values())) cur = conn.cursor() print("Successfully connected to Amazon Redshift") print("Dropping any existing tables...") drop_tables(cur, conn) print("Creating Tables...") create_tables(cur, conn) print("Done Creating Tables.") conn.close() print("Disconnected from Amazon Redshift.")
async def test_links_s3(postgres_service_url, s3_client, mock_files_factory, dsm_fixture): utils.create_tables(url=postgres_service_url) tmp_file = mock_files_factory(1)[0] fmd = _create_file_meta_for_s3(postgres_service_url, s3_client, tmp_file) dsm = dsm_fixture up_url = await dsm.upload_link(fmd.user_id, fmd.file_uuid) with io.open(tmp_file, 'rb') as fp: d = fp.read() req = urllib.request.Request(up_url, data=d, method='PUT') with urllib.request.urlopen(req) as _f: pass tmp_file2 = tmp_file + ".rec" user_id = 0 down_url = await dsm.download_link(user_id, SIMCORE_S3_STR, fmd.file_uuid) urllib.request.urlretrieve(down_url, tmp_file2) assert filecmp.cmp(tmp_file2, tmp_file)
async def test_dsm_datcore_to_local(postgres_service_url, dsm_fixture, mock_files_factory, datcore_structured_testbucket): if not has_datcore_tokens(): return utils.create_tables(url=postgres_service_url) dsm = dsm_fixture user_id = USER_ID data = await dsm.list_files(user_id=user_id, location=DATCORE_STR, uuid_filter=BUCKET_NAME) assert len(data) url, filename = await dsm.download_link_datcore( user_id, datcore_structured_testbucket["file_id1"]) tmp_file = mock_files_factory(1)[0] tmp_file2 = tmp_file + ".fromdatcore" urllib.request.urlretrieve(url, tmp_file2) assert filecmp.cmp(tmp_file2, tmp_file)
async def test_dsm_datcore(postgres_service_url, dsm_fixture, datcore_structured_testbucket): if not has_datcore_tokens(): return utils.create_tables(url=postgres_service_url) dsm = dsm_fixture user_id = "0" data = await dsm.list_files(user_id=user_id, location=DATCORE_STR, uuid_filter=BUCKET_NAME) # the fixture creates two files assert len(data) == 3 # delete the first one fmd_to_delete = data[0].fmd print("Deleting", fmd_to_delete.bucket_name, fmd_to_delete.object_name) await dsm.delete_file(user_id, DATCORE_STR, fmd_to_delete.file_id) data = await dsm.list_files(user_id=user_id, location=DATCORE_STR, uuid_filter=BUCKET_NAME) assert len(data) == 2
async def test_copy_s3_s3(postgres_service_url, s3_client, mock_files_factory, dsm_fixture): utils.create_tables(url=postgres_service_url) tmp_file = mock_files_factory(1)[0] fmd = _create_file_meta_for_s3(postgres_service_url, s3_client, tmp_file) dsm = dsm_fixture data = await dsm.list_files(user_id=fmd.user_id, location=SIMCORE_S3_STR) assert len(data) == 0 # upload the file up_url = await dsm.upload_link(fmd.user_id, fmd.file_uuid) with io.open(tmp_file, "rb") as fp: d = fp.read() req = urllib.request.Request(up_url, data=d, method="PUT") with urllib.request.urlopen(req) as _f: pass data = await dsm.list_files(user_id=fmd.user_id, location=SIMCORE_S3_STR) assert len(data) == 1 from_uuid = fmd.file_uuid new_project = "zoology" to_uuid = os.path.join(new_project, fmd.node_id, fmd.file_name) await dsm.copy_file( user_id=fmd.user_id, dest_location=SIMCORE_S3_STR, dest_uuid=to_uuid, source_location=SIMCORE_S3_STR, source_uuid=from_uuid, ) data = await dsm.list_files(user_id=fmd.user_id, location=SIMCORE_S3_STR) assert len(data) == 2
def dsm_mockup_db(postgres_service_url, s3_client, mock_files_factory): # db utils.create_tables(url=postgres_service_url) # s3 client bucket_name = BUCKET_NAME s3_client.create_bucket(bucket_name, delete_contents_if_exists=True) # TODO: use pip install Faker users = ["alice", "bob", "chuck", "dennis"] projects = [ "astronomy", "biology", "chemistry", "dermatology", "economics", "futurology", "geology", ] location = SIMCORE_S3_STR nodes = ["alpha", "beta", "gamma", "delta"] N = 100 files = mock_files_factory(count=N) counter = 0 data = {} for _file in files: idx = randrange(len(users)) user_name = users[idx] user_id = idx + 10 idx = randrange(len(projects)) project_name = projects[idx] project_id = idx + 100 idx = randrange(len(nodes)) node = nodes[idx] node_id = idx + 10000 file_name = str(counter) object_name = Path(str(project_id), str(node_id), str(counter)).as_posix() file_uuid = Path(object_name).as_posix() raw_file_path = file_uuid display_file_path = str(Path(project_name) / Path(node) / Path(file_name)) created_at = str(datetime.datetime.now()) file_size = 1234 assert s3_client.upload_file(bucket_name, object_name, _file) d = { "file_uuid": file_uuid, "location_id": "0", "location": location, "bucket_name": bucket_name, "object_name": object_name, "project_id": str(project_id), "project_name": project_name, "node_id": str(node_id), "node_name": node, "file_name": file_name, "user_id": str(user_id), "user_name": user_name, "file_id": str(uuid.uuid4()), "raw_file_path": file_uuid, "display_file_path": display_file_path, "created_at": created_at, "last_modified": created_at, "file_size": file_size, } counter = counter + 1 data[object_name] = FileMetaData(**d) # pylint: disable=no-member utils.insert_metadata(postgres_service_url, data[object_name]) total_count = 0 for _obj in s3_client.list_objects_v2(bucket_name, recursive=True): total_count = total_count + 1 assert total_count == N yield data # s3 client s3_client.remove_bucket(bucket_name, delete_contents=True) # db utils.drop_tables(url=postgres_service_url)
from svm_line_classification.svm_predict_lines import svm_predict_lines from dl_line_classification.rnn_predict_lines import rnn_predict_lines, LineClassifier from dl_line_classification.data_generation import DataGenerator from dl_line_classification.train import train_dl_classification_model from info_extraction.extraction import extract_line_information from utils import create_tables parser = argparse.ArgumentParser(description='') parser.add_argument('db_filepath', type=str, help="Specify database file to predict lines") args = parser.parse_args() cnx = sqlite3.connect(args.db_filepath) cur = cnx.cursor() create_tables(cnx) # Indexes of accessible conferences to process PROCESS_LINES = False GENERATE_VOCAB = False TRAIN_LINE_CLASSIFIER = True PREDICT_LINES_DL = False EXTRACT_INFO = False CONF_IDS = [i for i in range(1, 200)] """ Process Lines - Processes HTML of each page to lines, ordered by conference_id """ if PROCESS_LINES: add_page_lines(cnx, CONF_IDS) cnx.commit() """ Generate Vocab
def test_creating_a_table_succeeds(self, init_db): create_tables(init_db)
def migrate(): create_tables(db)
def create_tables(): """Use PeeWee to create tables.""" utils.create_tables()
def test_table_creation(postgres_service): utils.create_tables(url=postgres_service) a = 12
def dsm_mockup_db(postgres_service_url, s3_client, mock_files_factory): # db utils.create_tables(url=postgres_service_url) # s3 client bucket_name = BUCKET_NAME s3_client.create_bucket(bucket_name, delete_contents_if_exists=True) # TODO: use pip install Faker users = ['alice', 'bob', 'chuck', 'dennis'] projects = [ 'astronomy', 'biology', 'chemistry', 'dermatology', 'economics', 'futurology', 'geology' ] location = SIMCORE_S3_STR nodes = ['alpha', 'beta', 'gamma', 'delta'] N = 100 files = mock_files_factory(count=N) counter = 0 data = {} for _file in files: idx = randrange(len(users)) user_name = users[idx] user_id = idx + 10 idx = randrange(len(projects)) project_name = projects[idx] project_id = idx + 100 idx = randrange(len(nodes)) node = nodes[idx] node_id = idx + 10000 file_name = str(counter) object_name = Path(str(project_id), str(node_id), str(counter)).as_posix() file_uuid = Path(object_name).as_posix() assert s3_client.upload_file(bucket_name, object_name, _file) d = { 'file_uuid': file_uuid, 'location_id': "0", 'location': location, 'bucket_name': bucket_name, 'object_name': object_name, 'project_id': str(project_id), 'project_name': project_name, 'node_id': str(node_id), 'node_name': node, 'file_name': file_name, 'user_id': str(user_id), 'user_name': user_name } counter = counter + 1 data[object_name] = FileMetaData(**d) # pylint: disable=no-member utils.insert_metadata(postgres_service_url, data[object_name]) total_count = 0 for _obj in s3_client.list_objects_v2(bucket_name, recursive=True): total_count = total_count + 1 assert total_count == N yield data # s3 client s3_client.remove_bucket(bucket_name, delete_contents=True) # db utils.drop_tables(url=postgres_service_url)
def setUp(self): utils.create_tables() self.data = { "url": "https://ddg.gg/", "code": utils.gen_code(), }
cache_proxy = CacheProxy(mc) app.extensions = getattr(app, 'extensions', {}) app.extensions['cache_proxy'] = cache_proxy app.config.from_object(DevConfig) # db db = Database(app) auth = Auth(app, db) # toolbar = DebugToolbarExtension(app) mail = Mail(app) import models import utils utils.create_tables() utils.init_admin_user() import views app.jinja_env.globals['static'] = ( lambda filename: url_for('static', filename=filename)) # Blueprint for social login # from social_login import qq_bp, weibo_bp, wechat_bp # app.register_blueprint(qq_bp) # app.register_blueprint(weibo_bp) # app.register_blueprint(wechat_bp) # configure for api (bluepint for api) api_bp = Blueprint('api', __name__, url_prefix="/api") api = Api(api_bp, default_mediatype='application/json')
def get_db_session(): engine = db_connect() create_tables(engine) Session = sessionmaker(bind=engine) session = Session() return session