Exemple #1
0
def index_item(item):
    ds = get_data_source()
    client = get_client()
    client.index(
        index=ds.get('database'),
        body=item,
    )
Exemple #2
0
def connect_postgres():
    ds = get_data_source()
    return psycopg2.connect(
        host=ds.get('host') or 'localhost',
        port=ds.get('port') or '5432',
        user=ds.get('username') or None,
        password=ds.get('password') or None,
        database=ds.get('database') or 'test',
    )
Exemple #3
0
def connect_mysql():
    ds = get_data_source()
    return pymysql.connect(
        host=ds.get('host') or 'localhost',
        port=int(ds.get('port')) if ds.get('port') is not None else 3306,
        user=ds.get('username') or None,
        password=ds.get('password') or None,
        database=ds.get('database') or 'test',
        charset='utf8'
    )
Exemple #4
0
def get_producer():
    ds = get_data_source()
    if ds.get('username') is not None and len(ds.get('username')) > 0:
        return KafkaProducer(
            sasl_mechanism="PLAIN",
            security_protocol='SASL_PLAINTEXT',
            sasl_plain_username=ds.get('username'),
            sasl_plain_password=ds.get('password'),
            bootstrap_servers=f'{ds.get("host")}:{ds.get("port")}')
    else:
        return KafkaProducer(
            bootstrap_servers=f'{ds.get("host")}:{ds.get("port")}')
Exemple #5
0
def get_col():
    ds = get_data_source()

    if ds.get('type') is None:
        # default data source
        mongo_host = os.environ.get('CRAWLAB_MONGO_HOST') or 'localhost'
        mongo_port = int(os.environ.get('CRAWLAB_MONGO_PORT') or 27017) or 27017
        mongo_db = os.environ.get('CRAWLAB_MONGO_DB') or 'test'
        mongo_username = os.environ.get('CRAWLAB_MONGO_USERNAME') or ''
        mongo_password = os.environ.get('CRAWLAB_MONGO_PASSWORD') or ''
        mongo_authsource = os.environ.get('CRAWLAB_MONGO_AUTHSOURCE') or 'admin'
        collection = os.environ.get('CRAWLAB_COLLECTION') or 'test'
        mongo = MongoClient(
            host=mongo_host,
            port=mongo_port,
            username=mongo_username,
            password=mongo_password,
            authSource=mongo_authsource,
        )
        db = mongo.get_database(mongo_db)
        col = db.get_collection(collection)

        return col

    # specified mongo data source
    mongo = MongoClient(
        host=ds.get('host'),
        port=int(ds.get('port')),
        username=ds.get('username'),
        password=ds.get('password'),
        authSource=ds.get('auth_source') or 'admin',
    )
    collection = os.environ.get('CRAWLAB_COLLECTION') or 'test'
    db = mongo.get_database(ds.get('database'))
    col = db.get_collection(collection)
    return col
Exemple #6
0
def get_client() -> elasticsearch.Elasticsearch:
    ds = get_data_source()
    return elasticsearch.Elasticsearch(hosts=[{
        'host': ds.get('host'),
        'port': ds.get('port')
    }])
Exemple #7
0
def send_msg(item):
    ds = get_data_source()
    producer = get_producer()
    producer.send(ds.get('database'), json.dumps(item).encode('utf-8'))