Exemple #1
0
def model_login(conn_params):
    link = URL(conn_params['database_driver'], username = conn_params['username'],
        password= conn_params['password'], host = conn_params['host'])
    if conn_params['connection_database']:
        link.database = conn_params['connection_database']
    elif not conn_params['connection_database'] and conn_params['database_driver'] == 'postgresql':
        link.database = 'postgres'
    engine = create_engine(link)
    conn = ''
    dict_ret = {}
    try:
        conn = engine.connect()
    except OperationalError as e:
        dict_ret =  {'login': False, 'msg': str(e)}
    else:
        # todo 'msg'
        dict_ret =  {'login': True, 'msg': ''}
        conn.close()
    return dict_ret
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Pedro H Knoll
Udacity FullStack NanoDegree
Data population script
    - ver: 0.1  04/2018
"""

from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
from sqlalchemy.orm import sessionmaker

from models import (Base, User, Category, Organization, DATABASE)

engine = create_engine(URL(**DATABASE))
Base.metadata.bind = engine
DBSession = sessionmaker(bind=engine)
session = DBSession()

# Create a user
User1 = User(
    name="Robot Machine",
    email="*****@*****.**",
    picture=
    'https://pbs.twimg.com/profile_images/2671170543/18debd694829ed78203a5a36dd364160_400x400.png'
)
session.add(User1)
session.commit()

# Create some test Category objects
def connect_database():
    return create_engine(URL(**settings.DATABASE))
Exemple #4
0
 def adjust_database_uri(cls, uri: URL,
                         selected_schema: Optional[str]) -> None:
     if selected_schema:
         uri.database = parse.quote(selected_schema, safe="")
Exemple #5
0
def main(argv):

    # parse command line options
    opts, args = parse_options(argv)
    if not (opts or args):
        return 1

    urlfile = opts.urlfile
    csvfile = opts.csvfile
    dbuser = opts.dbuser
    dbpass = opts.dbpass
    dbhost = opts.dbhost
    dbport = opts.dbport
    dbschema = opts.dbschema
    depth = opts.depth
    resume = opts.resume
    cluster_jobs = opts.cluster_jobs

    # connect to database
    try:
        engine = create_engine(URL("mysql+mysqlconnector",
                               username=dbuser,
                               password=dbpass,
                               host=dbhost,
                               port=dbport))
        # create database schema
        engine.execute("CREATE DATABASE IF NOT EXISTS " + dbschema)
        engine.execute("USE " + dbschema)
    except Exception as e:
        print("ERROR: Can't connect to database (%s)" % e)
        return 1

    # if resume previous crawl, do not clean database
    if not resume:
        # read file with URL list
        if urlfile:
            try:
                with open(urlfile, 'r') as file:
                    url_list = [line.strip() for line in file.readlines()]
                if not url_list:
                    print("No data in file %s" % urlfile)
                    return 1
                print("Read file %s" % urlfile)
            except IOError as error:
                print("I/O error({0}): {1}".format(error.errno, error.strerror))
                return 1
            except:  # handle other exceptions such as attribute errors
                print("Unexpected error:", sys.exc_info()[0])
                return 1
        else:
            # URL in command line argument
            url_list = [args[0]]

        # clean database
        Base.metadata.drop_all(engine)
        Base.metadata.create_all(engine)

        session = Session(engine)

        # insert URL list into database
        links = []
        for url in url_list:
            links.append(Link(url))
        session.add_all(links)
        session.commit()

        session.close()

    # run in cluster mode
    if cluster_jobs > 0:
        # 'compute' needs definition of class Crawler
        cluster = dispy.JobCluster(compute, depends=[Crawler])
        jobs = []
        for i in range(1, cluster_jobs + 1):
            crawler = Crawler(i, dbuser, dbpass, dbhost, dbport, dbschema, depth)  # create object of Crawler
            job = cluster.submit(crawler)  # it is sent to a node for executing 'compute'
            job.id = crawler  # store this object for later use
            jobs.append(job)

        # waits until all jobs finish
        for job in jobs:
            job()  # wait for job to finish
            print('Job %s:\n%s\n%s\n%s' % (job.id.job_num, job.stdout, job.stderr, job.exception))
    else:  # run in standalone mode
        crawler = Crawler(0, dbuser, dbpass, dbhost, dbport, dbschema, depth)
        crawler.crawl()

    # write product list to .csv file
    if csvfile:
        if not csvfile.lower().endswith('.csv'):
            csvfile += '.csv'
        try:
            with open(csvfile, 'w', newline='') as outfile:
                writer = csv.writer(outfile, quoting=csv.QUOTE_ALL)

                session = Session(engine)
                # paginate results
                rows_per_page = 50
                page_number = 0
                dbquery = session.query(Product)

                products = dbquery.limit(rows_per_page).offset(page_number * rows_per_page).all()
                while products:
                    for product in products:
                        writer.writerow([product.url, product.title, product.name])

                    page_number += 1
                    products = dbquery.limit(rows_per_page).offset(page_number * rows_per_page).all()

            print("Write file %s" % csvfile)
        except IOError as error:
            print("I/O error({0}): {1}".format(error.errno, error.strerror))
            return 1
        except:  # handle other exceptions such as attribute errors
            print("Unexpected error:", sys.exc_info()[0])
            return 1

        session.close()

    engine.dispose()
    return 0
def db_connect():
    """
    Performs database connection using database settings from settings.py.
    Returns sqlalchemy engine instance
    """
    return create_engine(URL(**settings.DATABASE))
Exemple #7
0
import os
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, scoped_session

# DB 연결 정보
DB = {
    'drivername': 'mysql',
    'host': '',
    'port': '',
    'username': "",
    'password': "",
    'database': "",
    'query': {
        'charset': 'utf8'
    }
}

ENGINE = create_engine(URL(**DB))

session = scoped_session(
    sessionmaker(autocommit=False, autoflush=False, bind=ENGINE))

Base = declarative_base()

Base.query = session.query_property()
Exemple #8
0
import sandman2
from sqlalchemy.engine.url import URL

from utvsapi import models

url = URL('mysql', query={'read_default_file': './mysql.cnf'})
app = sandman2.get_app(url, user_models=models.all(), read_only=True)

if __name__ == '__main__':
    app.run(debug=True)
Exemple #9
0

import os

from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy.engine.url import URL
from sqlalchemy.ext.automap import automap_base


# TODO: Config-ify.
url = URL(drivername='postgresql', database='osp_graphs')

engine = create_engine(url)

factory = sessionmaker(bind=engine)

session = scoped_session(factory)

Base = automap_base()

Base.prepare(engine, reflect=True)

Text = Base.classes.text
Citation = Base.classes.citation
Field = Base.classes.field
Subfield = Base.classes.subfield
SubfieldDocument = Base.classes.subfield_document
Document = Base.classes.document

# added after original code:
Exemple #10
0
import sqlalchemy as sa
from sqlalchemy.orm import sessionmaker
from sqlalchemy.engine.url import URL

# 初始化数据库连接:
url = URL(drivername='postgresql',
          username='******',
          password='******',
          host='localhost',
          database='employeeinfodb')
engine = sa.create_engine(url)

# 创建DBSession类型:
DBSession = sessionmaker(engine)

dbSession = DBSession()
Exemple #11
0
from random import randint

import datetime
from sqlalchemy import create_engine, select, func
from sqlalchemy.engine.url import URL

import settings
import utils
import tables

engine = create_engine(URL(**settings.DATABASE))
tables.create_all(engine)

data = utils.get_static_data()

with engine.begin() as conn:
    for book in data['books']:
        ins = tables.Book.insert().values(title=book)
        conn.execute(ins)

ins = tables.Person.insert()
values = []
for person in data['persons']:
    dob = utils.get_dob(1980, 2010)
    values.append({'name': person, 'date_of_birth': dob})

with engine.begin() as conn:
    conn.execute(ins, values)

ins = tables.Write.insert()
with engine.begin() as conn:
Exemple #12
0
 def _get_test_engine():
     return create_engine(URL(**connection_params, host=docker_ip))
Exemple #13
0
 def get_connection_string(self):
     return str(URL('sqlite+pysqlite', None, None, None, None, self.dbpath))
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
from sqlalchemy import event, DDL

from alembic.config import Config
from alembic import command

from pipeline.results import Base
from pipeline.luigi_ext import DatabaseConfig

event.listen(Base.metadata, 'before_create',
             DDL("CREATE SCHEMA IF NOT EXISTS results"))

url = None

if not url:
    url = URL("postgres",
              host=DatabaseConfig().host,
              username=DatabaseConfig().user,
              database=DatabaseConfig().database,
              password=DatabaseConfig().password,
              port=DatabaseConfig().port)

engine = create_engine(url, echo=True)
Base.metadata.create_all(engine)

alembic_cfg = Config("alembic.ini")
command.stamp(alembic_cfg, "head")
Exemple #15
0
def make_url(url_params: dict) -> URL:
    return URL(**url_params)
Exemple #16
0
    def _initialize(self):
        models.Store_menu.objects.all().delete()

        query_store = "SELECT * FROM django_test.api_store where id="
        # query_menu="SELECT * FROM django_test.api_menu where id between 1 and 50000"
        # query_menu="SELECT * FROM django_test.api_menu where id between 50001 and 100000"
        # query_menu="SELECT * FROM django_test.api_menu where id between 100001 and 150000"
        # query_menu="SELECT * FROM django_test.api_menu where id between 150001 and 200000"
        # query_menu="SELECT * FROM django_test.api_menu where id between 200001 and 250000"
        # query_menu="SELECT * FROM django_test.api_menu where id between 250001 and 300000"
        # query_menu="SELECT * FROM django_test.api_menu where id between 300001 and 400000;"
        # query_menu="SELECT * FROM django_test.api_menu where id between 400001 and 550000"
        # query_menu="SELECT * FROM django_test.api_menu where id between 550001 and 700000"
        # query_menu="SELECT * FROM django_test.api_menu where id between 700001 and 1000000"

        # query_menu="SELECT * FROM django_test.api_menu where id between 1000001 and 1200000"

        # query_menu="SELECT * FROM django_test.api_menu where id between 1200001 and 1500000"

        query_menu = "SELECT * FROM django_test.api_menu where id between 1500001 and 1651857"
        query_menu = "SELECT * FROM django_test.api_menu"

        engine = create_engine(
            URL(drivername="mysql",
                username="******",
                password="******",
                host="52.78.173.64",
                port="3306",
                database="django_test",
                query={'charset': 'utf8mb4'}))
        # engine = create_engine("mysql+mysqldb://root:[email protected]/django_test",
        #                        encodeing='utf-8')
        conn = engine.connect()

        print("[*] loading menus...")
        menus = pd.read_sql(
            sql=query_menu,  # mysql query
            con=conn)
        print(menus)
        # menus= menus.head(10)
        print("[*] Initializing store_menus...")

        Store_menus_bulk = []
        tmp_store_id = menus.iloc[0].store_id
        print("temp store id: " + str(tmp_store_id))
        store = pd.read_sql(
            sql=query_store + str(tmp_store_id),  # mysql query
            con=conn)
        print(store.iloc[0].id)
        #일단 1번째 인덱스의 store 정보 부터 가져온다.

        temp = ""
        for menu in menus.itertuples():
            if (menu.id % 100000) == 0:
                print("10만개 완료")
            now_store_id = menu.store_id
            if tmp_store_id != now_store_id:  # 저장시작
                Store_menus_bulk.append(
                    models.Store_menu(id=store.iloc[0].id,
                                      store_name=store.iloc[0].store_name,
                                      branch=store.iloc[0].branch,
                                      area=store.iloc[0].area,
                                      tel=store.iloc[0].tel,
                                      address=store.iloc[0].address,
                                      latitude=store.iloc[0].latitude,
                                      longitude=store.iloc[0].longitude,
                                      category=store.iloc[0].category,
                                      menu=temp + "|" +
                                      store.iloc[0].category))
                tmp_store_id = now_store_id
                temp = ""
                temp += menu.menu_name
                store = pd.read_sql(
                    sql=query_store + str(tmp_store_id),  # mysql query
                    con=conn)
            else:  #상점번호가 같다면 temp란 str에 메뉴이름을 계속 추가한다.
                temp += "|" + menu.menu_name

        models.Store_menu.objects.bulk_create(Store_menus_bulk)
        print("[*] query [" + query_menu + "] is completed")
Exemple #17
0
 def open_spider(self, spider):
     self.engine = create_engine(URL.create(**self.database_settings))
Exemple #18
0
    "users.starlette_password.hashers.PBKDF2PasswordHasher",
    "users.starlette_password.hashers.PBKDF2SHA1PasswordHasher",
    "users.starlette_password.hashers.BCryptSHA256PasswordHasher",
]

# Services URLs

ASSOCIATION_FRONTEND_URL = config(
    "ASSOCIATION_FRONTEND_URL",
)

# Unit test configuration

RUNNING_TESTS = config("RUNNING_TESTS", cast=bool, default=False)

if RUNNING_TESTS:
    original_url = make_url(DATABASE_URL)
    test_db_url = URL.create(
        drivername=original_url.drivername,
        username=original_url.username,
        password=original_url.password,
        host=original_url.host,
        port=original_url.port,
        database=f"TEST_{original_url.database}",
        query=original_url.query,
    )

    DATABASE_URL = str(test_db_url)
    PASSWORD_HASHERS = ["users.starlette_password.plain_hasher.PlainPasswordHasher"]
    SERVICE_TO_SERVICE_SECRET = "test-service-to-service"
def get_connection_url():
    arguments = {k[3:].lower(): os.environ[k] for k in db_env_vars}
    return URL(**arguments)
Exemple #20
0
from h3_helper import *
from s2_generate_cell_geometry import *
from EAGGR_generate_cell_geometry import *
from shapely.geometry import box

HOST = 'localhost'
DB = 'dggs'
USER = '******'
PORT = 5432
PWD = 'postgres'

workDataDir = r"D:\UTCloud\DGGS\working\Python\workingdata"
# Database info
db_url = URL(drivername='postgresql+psycopg2',
             host=HOST,
             database=DB,
             username=USER,
             port=PORT,
             password=PWD)

# Create engine
engine = create_engine(db_url)

# read extent file
gdf = gpd.read_file(workDataDir + "working.gpkg", layer='dev_extent2')

# convert extent to geoJSON
extentJSON = json.loads((gdf.to_json()))

# create geojson extent
extentJSON = gpd.GeoSeries(box(12.2, 47.3, 13.7, 48.2)).__geo_interface__
Exemple #21
0
def get_url():
    """
    Return the URL to be used with engine creation based on configuration
    """
    used = Profile.get_used()
    return URL(**dict(used))
Exemple #22
0
PG_DB_DATABASE = config("PG_DB_DATABASE", default=None)

# TESTING = config("TESTING", cast=bool, default=False)
# if TESTING:
#     if PG_DB_DATABASE:
#         PG_DB_DATABASE += "_test"
#     else:
#         PG_DB_DATABASE = "market_test"

PG_DB_DSN = config(
    "PG_DB_DSN",
    cast=make_url,
    default=URL(
        drivername=PG_DB_DRIVER,
        username=PG_DB_USER,
        password=PG_DB_PASSWORD,
        host=PG_DB_HOST,
        port=PG_DB_PORT,
        database=PG_DB_DATABASE,
    ),
)
PG_DB_POOL_MIN_SIZE = config("PG_DB_POOL_MIN_SIZE", cast=int, default=1)
PG_DB_POOL_MAX_SIZE = config("PG_DB_POOL_MAX_SIZE", cast=int, default=16)
PG_DB_ECHO = config("PG_DB_ECHO", cast=bool, default=False)
PG_DB_SSL = config("PG_DB_SSL", default=None)
PG_DB_USE_CONNECTION_FOR_REQUEST = config("PG_DB_USE_CONNECTION_FOR_REQUEST",
                                          cast=bool,
                                          default=True)
PG_DB_RETRY_LIMIT = config("PG_DB_RETRY_LIMIT", cast=int, default=1)
PG_DB_RETRY_INTERVAL = config("PG_DB_RETRY_INTERVAL", cast=int, default=1)

# first user infos
Exemple #23
0
from sqlalchemy import create_engine
from sqlalchemy import MetaData
from sqlalchemy import inspect
from sqlalchemy import Table
from sqlalchemy import Column, Integer, String
from sqlalchemy.engine.url import URL

db_url = {
    'drivername': 'postgres',
    'username': '******',
    'password': '******',
    'host': '192.168.99.100',
    'port': 5432
}
engine = create_engine(URL(**db_url))
m = MetaData()
table = Table('Test', m, Column('id', Integer, primary_key=True),
              Column('key', String, nullable=True), Column('val', String))

table.create(engine)
inspector = inspect(engine)
print('Test' in inspector.get_table_names())

table.drop(engine)
inspector = inspect(engine)
print('Test' in inspector.get_table_names())
def test_main():

    while not (pg_ready(PG_HOST) and pg_ready(PG_HOST_EMPTY)):
        print("Waiting for Postgres containers...")
        time.sleep(1)

    s3 = boto3.resource('s3', endpoint_url=S3_URL)
    while not (s3_ready(s3)):
        print("Waiting for S3 container...")
        time.sleep(1)

    ssm = boto3.client('ssm', endpoint_url=SSM_URL)
    while not (ssm_ready(ssm)):
        print("Waiting for SSM container...")
        time.sleep(1)

    bucket = s3.create_bucket(Bucket=S3_BUCKET)

    ssm.put_parameter(Name=POSTGRES_PASSWORD_SSM_KEY,
                      Value='password',
                      Type='SecureString')

    os.environ.update({
        'POSTGRES_HOST': PG_HOST,
        'POSTGRES_PORT': PG_PORT,
        'POSTGRES_USER': PG_USER,
        'POSTGRES_DATABASE': PG_DATABASE,
        'S3_BUCKET': S3_BUCKET,
    })

    lambda_handler(
        {
            'organization_schema': ORGANIZATION_SCHEMA,
            's3_key': S3_KEY,
        }, {})

    bucket.download_file(S3_KEY, DUMP)

    # Import the dump file into a clean database
    # There is not an easy way to import a whole file using SqlAlchemy, so use psql
    subprocess.check_output([
        PSQL, '--host', PG_HOST_EMPTY, '--port', PG_PORT, '--user', PG_USER,
        '--dbname', PG_DATABASE, '--file', DUMP, '-v', 'ON_ERROR_STOP=1'
    ],
                            env={
                                'PGPASSWORD': PG_PASSWORD,
                                'LD_LIBRARY_PATH': PG_PATH
                            })

    # Connect to the new database
    engine = create_engine(
        URL(drivername='postgresql',
            database=PG_DATABASE,
            username=PG_USER,
            password=PG_PASSWORD,
            host=PG_HOST_EMPTY,
            port=PG_PORT))

    # Does the pennsievedb seed data from pennsieve-api/local-seed.sql look good?
    with engine.connect() as conn:
        rows = conn.execute(
            f'SELECT * FROM "{ORGANIZATION_SCHEMA}".datasets;').fetchall()
        assert len(rows) == 1
        assert rows[0]['name'] == 'Pennsieve Dataset'
        assert rows[0][
            'node_id'] == 'N:dataset:c0f0db41-c7cb-4fb5-98b4-e90791f8a975'

        with pytest.raises(ProgrammingError):
            conn.execute('SELECT * FROM "2".datasets;').fetchall()

        rows = conn.execute(
            f'SELECT * FROM "{ORGANIZATION_SCHEMA}".files;').fetchall()
        assert len(rows) == 0
Exemple #25
0
    def crawl(self):
        import math
        import time
        import urllib.parse
        import urllib.request
        from crawler_db import (Status, Link, Product)
        from crawler_web import Webpage

        import sqlalchemy
        from sqlalchemy import (create_engine, exc)
        from sqlalchemy.engine.url import URL
        from sqlalchemy.orm import (Session, exc)

        # connect to database
        try:
            engine = create_engine(URL("mysql+mysqlconnector",
                                   username=self.dbuser,
                                   password=self.dbpass,
                                   host=self.dbhost,
                                   port=self.dbport))
            engine.execute("USE " + self.dbschema)  # select database
        except Exception as e:
            print("ERROR: Can't connect to database (%s)" % e)
            return 1

        session = Session(engine)
        start_time = time.time()

        num_retries = 3
        if self.job_num:
            attempts = num_retries  # running in cluster mode
            # retry if new links are not available momentarily,
            # otherwise process has finished
        else:
            attempts = 1  # running in standalone mode
            # no retry

        while attempts:
            # process same site first
            link = session.query(Link).filter(Link.depth > 1, Link.status == Status.new).with_for_update().first()
            if not link:
                link = session.query(Link).filter_by(status=Status.new).with_for_update().first()

            while link:
                attempts = num_retries  # restart attempts
                this_url = link.url
                link_depth = link.depth + 1
                self.num_processed += 1
                self.host = urllib.parse.urlparse(this_url)[1]

                status = Status.visited
                try:
                    page = Webpage(this_url)
                    if not page.fetch():
                        status = Status.error
                    else:
                        if not self.depth or (link_depth <= self.depth):
                            for link_url in [self._pre_visit_url_condense(l) for l in page.out_urls]:
                                # apply pre-visit filters.
                                do_not_follow = [f for f in self.pre_visit_filters if not f(link_url)]

                                # if no filters failed, process URL
                                if [] == do_not_follow:
                                    new_link = Link(link_url, depth=link_depth)
                                    session.begin_nested()  # establish a savepoint
                                    session.add(new_link)
                                    try:
                                        session.flush()
                                    except sqlalchemy.exc.IntegrityError:  # rollback duplicated entry
                                        session.rollback()
                                        continue
                                    except exc.FlushError:  # rollback duplicated entry
                                        session.rollback()
                                        continue
                                    except:
                                        session.rollback()
                                        raise
                                    session.commit()

                        # apply product filters.
                        is_product = [f for f in self.product_filters if not f(page)]

                        # if no filters failed, process product
                        if [] == is_product:
                            product = Product(this_url, title=page.title, name=page.product_name)
                            session.begin_nested()  # establish a savepoint
                            session.add(product)
                            try:
                                session.flush()
                            except:
                                session.rollback()
                                raise
                            session.commit()

                except Exception as e:
                    print("ERROR: Can't process url '%s' (%s)" % (this_url, e))
                    status = Status.error

                link.status = status
                session.commit()

                # process same site first
                link = session.query(Link).filter(Link.depth > 1, Link.status == Status.new).with_for_update().first()
                if not link:
                    link = session.query(Link).filter_by(status=Status.new).with_for_update().first()

            # sleep if running in cluster mode
            if self.job_num:
                time.sleep(5)
            attempts -= 1

        end_time = time.time()
        time_diff = end_time - start_time

        rate = 0
        if time_diff:
            rate = int(math.ceil(float(self.num_processed) / time_diff))

        print("\tProcessed:    %d" % self.num_processed)
        print("\tStats:        %d/s after %0.2fs" % (rate, time_diff))

        session.close()
        engine.dispose()
        return 0
def workers_queue(pid, q1, q2):
    """Receiving parameters from q1, then computing and finally put results
       into q2
    """
    engine = create_engine(
        URL(**CONF['database']['connect_args']),
        pool_size=1,
        pool_recycle=CONF['database']['pool_recycle'],
        client_encoding='utf8')
    Session = scoped_session(sessionmaker(bind=engine))
    session = Session()
    parser = BulkParser(platform_id=1, save_none_url_tweet=True)

    while True:
        try:
            data = q1.get(timeout=1)
        except Empty:
            logger.info('Worker process %s: queue is empty for 1 seconds', pid)
            q2.put((pid, 'STOP', None))
            break
        if data == 'STOP':
            logger.info('Worker process %s: STOP sign received from q1!', pid)
            q1.put('STOP')
            q2.put((pid, 'STOP', None))
            break
        else:
            logger.info('Worker process %s: data=%s received', pid, data)
        w_open_left, w_close_right = data
        jds = dict()
        g_urls_map = dict()
        query = """
            SELECT tw.id, tw.json_data, u.id, u.raw
            FROM tweet AS tw
            LEFT JOIN ass_tweet_url AS atu ON atu.tweet_id=tw.id
            LEFT JOIN url AS u ON u.id=atu.url_id
            WHERE tw.id>:l AND tw.id<=:r
            """
        for tw_id, jd, url_id, url in engine.execute(
                text(query).bindparams(l=w_open_left, r=w_close_right)):
            jds[tw_id] = jd
            if url_id is not None:
                g_urls_map[url] = url_id
        g_uusers_set = set()
        g_edges_set = set()
        for tw_id, jd in jds.iteritems():
            parser.parse_existed_one(
                tw_id,
                jd,
                session,
                g_urls_map=g_urls_map,
                g_uusers_set=g_uusers_set,
                g_edges_set=g_edges_set)
        edges = [
            dict(
                tweet_raw_id=t0,
                from_raw_id=t1,
                to_raw_id=t2,
                url_id=t3,
                is_quoted_url=t4,
                is_mention=t5,
                tweet_type=t6) for t0, t1, t2, t3, t4, t5, t6 in g_edges_set
            if t3 != -1
        ]
        uusers = [dict(raw_id=t1, screen_name=t2) for t1, t2 in g_uusers_set]
        # session.bulk_insert_mappings(TwitterNetworkEdge, edges)
        stmt_do_nothing = insert(TwitterNetworkEdge).values(
            edges).on_conflict_do_nothing(index_elements=[
                'tweet_raw_id', 'from_raw_id', 'to_raw_id', 'url_id',
                'is_quoted_url', 'is_mention', 'tweet_type'
            ])
        session.execute(stmt_do_nothing)
        session.commit()
        q2.put((pid, 'RUN', uusers))
        logger.info('Worker process %s: tweets from %s to %s done', pid,
                    w_open_left + 1, w_close_right)
Exemple #27
0
def db_connect():
    return create_engine(URL(**settings.DATABASE))
Exemple #28
0
    def __init__(
        self,
        domain: Optional[Domain] = None,
        dialect: Text = "sqlite",
        host: Optional[Text] = None,
        port: Optional[int] = None,
        db: Text = "rasa.db",
        username: Text = None,
        password: Text = None,
        event_broker: Optional[EventChannel] = None,
        login_db: Optional[Text] = None,
    ) -> None:
        import sqlalchemy
        from sqlalchemy.orm import sessionmaker
        from sqlalchemy.engine.url import URL
        from sqlalchemy import create_engine

        engine_url = URL(
            dialect,
            username,
            password,
            host,
            port,
            database=login_db if login_db else db,
        )

        logger.debug(
            "Attempting to connect to database "
            'via "{}"'.format(engine_url.__to_string__())
        )

        # Database might take a while to come up
        while True:
            try:
                self.engine = create_engine(engine_url)

                # if `login_db` has been provided, use current connection with
                # that database to create working database `db`
                if login_db:
                    self._create_database_and_update_engine(db, engine_url)

                try:
                    self.Base.metadata.create_all(self.engine)
                except (
                    sqlalchemy.exc.OperationalError,
                    sqlalchemy.exc.ProgrammingError,
                ) as e:
                    # Several Rasa services started in parallel may attempt to
                    # create tables at the same time. That is okay so long as
                    # the first services finishes the table creation.
                    logger.error("Could not create tables: {}".format(e))

                self.session = sessionmaker(bind=self.engine)()
                break
            except (
                sqlalchemy.exc.OperationalError,
                sqlalchemy.exc.IntegrityError,
            ) as e:

                logger.warning(e)
                sleep(5)

        logger.debug("Connection to SQL database '{}' successful".format(db))

        super(SQLTrackerStore, self).__init__(domain, event_broker)
Exemple #29
0
# Third party lib imports

from flask import Flask

from sqlalchemy.engine.url import URL

from flask.ext.sqlalchemy import SQLAlchemy

# Local imports
from naas import settings

app = Flask(__name__)

app.config['SQLALCHEMY_DATABASE_URI'] = URL(**settings.DATABASE)

db = SQLAlchemy(app)

import naas.views
Exemple #30
0
def test_create_connect_args():
    sfdialect = base.SnowflakeDialect()

    test_data = [
        (
            # 0: full host name and no account
            URL("snowflake",
                username="******",
                password="******",
                host='testaccount.snowflakecomputing.com',
                query={}),
            {
                'autocommit': False,
                'host': 'testaccount.snowflakecomputing.com',
                'password': '******',
                'user': '******'
            }),
        (
            # 1: account name only
            URL("snowflake",
                username="******",
                password="******",
                host='testaccount',
                query={}),
            {
                'autocommit': False,
                'host': 'testaccount.snowflakecomputing.com',
                'password': '******',
                'user': '******',
                'port': '443',
                'account': 'testaccount'
            }),
        (
            # 2: account name including region
            URL("snowflake",
                username="******",
                password="******",
                host='testaccount.eu-central-1',
                query={}),
            {
                'autocommit': False,
                'host': 'testaccount.eu-central-1.snowflakecomputing.com',
                'password': '******',
                'user': '******',
                'port': '443',
                'account': 'testaccount'
            }),
        (
            # 3: full host including region
            URL("snowflake",
                username="******",
                password="******",
                host='testaccount.eu-central-1.snowflakecomputing.com',
                query={}),
            {
                'autocommit': False,
                'host': 'testaccount.eu-central-1.snowflakecomputing.com',
                'password': '******',
                'user': '******'
            }),
        (
            # 4: full host including region and account
            URL("snowflake",
                username="******",
                password="******",
                host='testaccount.eu-central-1.snowflakecomputing.com',
                query={'account': 'testaccount'}),
            {
                'autocommit': False,
                'host': 'testaccount.eu-central-1.snowflakecomputing.com',
                'password': '******',
                'user': '******',
                'account': 'testaccount'
            }),
        (
            # 5: full host including region and account including region
            URL("snowflake",
                username="******",
                password="******",
                host='testaccount.eu-central-1.snowflakecomputing.com',
                query={'account': 'testaccount.eu-central-1'}),
            {
                'autocommit': False,
                'host': 'testaccount.eu-central-1.snowflakecomputing.com',
                'password': '******',
                'user': '******',
                'account': 'testaccount.eu-central-1'
            }),
        (
            # 6: full host including region and account including region
            URL("snowflake",
                username="******",
                password="******",
                host='snowflake.reg.local',
                port='8082',
                query={'account': 'testaccount'}),
            {
                'autocommit': False,
                'host': 'snowflake.reg.local',
                'password': '******',
                'user': '******',
                'port': 8082,
                'account': 'testaccount'
            }),
    ]

    for idx, ts in enumerate(test_data):
        _, opts = sfdialect.create_connect_args(ts[0])
        assert opts == ts[1], "Failed: {0}: {1}".format(idx, ts[0])
Exemple #31
0
    def create_engine(self):
        """Create the database engine"""
        log.debug("Creating ENGINE")
        options = {
            'convert_unicode': self.native_unicode,
            'poolclass': GreenQueuePool,
            'pool_size': self.pool_size,
            'pool_recycle': self.pool_recycle,
            'pool_timeout': self.pool_timeout

        }

        if self.database_uri.startswith('sqlite:'):

            options.pop('pool_timeout')

            match = _sqlite_re.match(self.database_uri)
            if match is None:
                raise ArgumentError('Could not parse rfc1738 URL')
            database, query = match.groups()
            if database is None:
                database = ':memory:'
            if query:
                query = url_decode(query).to_dict()
            else:
                query = {}
            info = URL('sqlite', database=database, query=query)
            pool_size = options.get('pool_size', 0)
            # we go to memory and the pool size was explicitly set to 0
            # which is fail.  Let the user know that
            if info.database in (None, '', ':memory:'):
                if pool_size == 0:
                    raise RuntimeError('SQLite in memory database with an '
                                       'empty queue not possible due to data '
                                       'loss.')
                # if pool size is None or explicitly set to 0 we assume the
                # user did not want a queue for this sqlite connection and
                # hook in the null pool.
                elif not pool_size:
                    log.warn("SQLite database is using the NullPool")
                    from sqlalchemy.pool import NullPool
                    options['poolclass'] = NullPool
        else:
            info = make_url(self.database_uri)
            # if mysql is the database engine, god forbid, and no connection
            # encoding is provided we set it to utf-8
            if info.drivername == 'mysql':
                info.query.setdefault('charset', 'utf8')
                options.setdefault('pool_size', 10)
                options.setdefault('pool_recycle', 7200)
            elif info.drivername.startswith('postgresql+psycopg2'):
                from psycopg2 import extensions, OperationalError
                from gevent.socket import wait_read, wait_write
                options['use_native_unicode'] = self.native_unicode

                def wait_callback(conn, timeout=None):
                    """
                    A wait callback useful to allow gevent to work with Psycopg.
                    https://bitbucket.org/dvarrazzo/psycogreen/src/tip/gevent/
                    """
                    while True:
                        state = conn.poll()
                        if state == extensions.POLL_OK:
                            break
                        elif state == extensions.POLL_READ:
                            wait_read(conn.fileno(), timeout=timeout)
                        elif state == extensions.POLL_WRITE:
                            wait_write(conn.fileno(), timeout=timeout)
                        else:
                            raise OperationalError(
                                "Bad result from poll: %r" % state
                            )
                if hasattr(extensions, 'set_wait_callback'):
                    extensions.set_wait_callback(wait_callback)

        dialect_cls = info.get_dialect()

        # get the correct DBAPI base on connection url
        dbapi_args = {}
        dbapi = dialect_cls.dbapi(**dbapi_args)

        # create the dialect
        dialect_args = {'dbapi':dbapi}
        dialect = dialect_cls(**dialect_args)

        # assemble connection arguments for this dialect
        (cargs, connection_params) = dialect.create_connect_args(info)
        log.debug("CARGS: %s; CONNECTION_PARAMS: %s;", cargs, connection_params)
        log.debug("Creating db engine. info: %s; options: %s;", info, options)
        engine = sqlalchemy.create_engine(info, **options)
        database_engine_created.send(self, engine=engine)
Exemple #32
0
    else:
        value = default

    if str(value).upper() == "TRUE":
        value = True
    elif str(value).upper() == "FALSE":
        value = False

    return value


postgres_url = str(
    URL(
        drivername="postgresql+psycopg2",
        host=env_var("POSTGRES_HOST", "localhost"),
        port=env_var("POSTGRES_PORT", "5432"),
        username=env_var("POSTGRES_USERNAME"),
        password=env_var("POSTGRES_PASSWORD"),
        database=env_var("POSTGRES_DATABASE", "matcher"),
    ))

postgres_test_url = str(
    URL(
        drivername="postgresql+psycopg2",
        host=env_var("POSTGRES_TEST_HOST", env_var("POSTGRES_HOST",
                                                   "localhost")),
        port=env_var("POSTGRES_TEST_PORT", env_var("POSTGRES_PORT", "5432")),
        username=env_var("POSTGRES_TEST_USERNAME",
                         env_var("POSTGRES_USERNAME")),
        password=env_var("POSTGRES_TEST_PASSWORD",
                         env_var("POSTGRES_PASSWORD")),
        database=env_var("POSTGRES_TEST_DATABASE",
Exemple #33
0
    def do_run(self):
        # R0914: Too many local variables.
        # pylint: disable=R0914

        import sqlalchemy
        from sqlalchemy import exc
        from sqlalchemy.event import listen
        from sqlalchemy.engine.url import URL

        def start_query(conn, *dummy):
            ''' Save time the query starts. '''
            conn.info['wh_time'] = timeit.default_timer()

        def end_query(conn, *dummy):
            ''' Save time the query's finished. '''
            conn.info['wh_time'] = timeit.default_timer() - conn.info['wh_time']

        config = {
            k: v for k, v in self.config.iteritems()
            if k in self.config_raw['properties'].keys()
        }
        config['drivername'] = config.pop('dbtype')

        if config['drivername'] == 'sqlite':
            config['database'] = config.pop('host')

        query = config.pop('query')

        url = URL(**config)

        try:
            engine = sqlalchemy.create_engine(url)
        except ImportError as err:
            return ((
                'error',
                '{}. Sensor checking sql requires it. Please install it.'
                .format(err)
            ),)

        error_msg = (
            '(database {})\nError: {{}}\n Message from database: "{{}}"'
            .format(url.__to_string__())
        )
        try:
            connection = engine.connect()
        except exc.TimeoutError:
            return (
                ('error', error_msg.format('Timeout getting connection', None)),
            )
        except exc.SQLAlchemyError as err:
            return (
                ('error', error_msg.format('Could not connect to database', err)),
            )

        listen(connection, 'before_cursor_execute', start_query)
        listen(connection, 'after_cursor_execute', end_query)
        try:
            result = connection.execute(query).fetchall()
            time = connection.info['wh_time']
        except exc.StatementError as err:
            return ((
                'error', error_msg.format(
                    'Error executing statement {}'.format(err.statement), err)
            ),)
        except exc.SQLAlchemyError as err:
            return ((
                'error', error_msg.format(
                    'Error executing statement, your query: {}'.format(
                        self.config['query']),
                    err.message)
            ),)
        finally:
            connection.close()

        try:
            if len(result) == 1 and len(result[0]) == 1:
                result = ('result_num', float(result[0][0]))
            else:
                raise ValueError
        except ValueError:
            result = ('result', str(result)[1:-1])

        return (result, ('query_time', time))
Exemple #34
0
    def __getitem__(self, name):

        if 'DATABASES' not in settings:
            raise exceptions.ImproperlyConfigured(
                'DATABASES not configured in project settings.')

        if name not in settings['DATABASES']:
            raise exceptions.ImproperlyConfigured(
                '%r not present in DATABASES configuration.' % name)

        config = settings['DATABASES'][name]

        if isinstance(config, six.string_types):
            url = make_url(config)
            options = {}

        else:
            config = dict(map(lambda i: (i[0].lower(), i[1]), config.items()))
            options = config.get('options', {})
            url = URL(
                config['engine'],
                username=config.get('username', config.get('user')),
                password=config.get('password', config.get('pass')),
                host=config.get('hostname', config.get('host')),
                port=config.get('port'),
                database=config.get('name', config.get('database')))

        # If alchemist is invoked by a test runner we should switch to using
        # testing databases.

        if settings['TESTING']:

            if url.drivername.startswith('sqlite'):

                # Switch to using an in-memory database for sqlite.
                url.database = ':memory:'

            else:

                # Switch to using a named testing database for other dialects.
                ident = threading.current_thread().ident
                url.database = 'test_%s_%s' % (url.database, ident)

        # Apply MySQL hacks to make MySQL play nice.
        pool_size = None
        pool_recycle = None
        if url.drivername.startswith('mysql'):
            pool_size = 10
            pool_recycle = 7200

        # Get "global" options for the database engine.
        pool_size = settings.get('DATABASE_POOL_SIZE', pool_size)
        if pool_size:
            options.setdefault('pool_size', pool_size)

        pool_recycle = settings.get('DATABASE_POOL_RECYCLE', pool_recycle)
        if pool_recycle:
            options.setdefault('pool_recycle', pool_recycle)

        pool_timeout = settings.get('DATABASE_POOL_TIMEOUT')
        if pool_timeout:
            options.setdefault('pool_timeout', pool_timeout)

        # Forward configuration to sqlalchemy and create the engine.
        engine = sa.create_engine(url, **options)

        if settings["DEBUG"]:
            # Create a no-op listener if we're in debug mode.
            from sqlalchemy.event import listen
            listen(engine, "after_cursor_execute", lambda *a, **kw: None)

        # Return the created engine.
        return engine