def index_item(item): ds = get_data_source() client = get_client() client.index( index=ds.get('database'), body=item, )
def connect_postgres(): ds = get_data_source() return psycopg2.connect( host=ds.get('host') or 'localhost', port=ds.get('port') or '5432', user=ds.get('username') or None, password=ds.get('password') or None, database=ds.get('database') or 'test', )
def connect_mysql(): ds = get_data_source() return pymysql.connect( host=ds.get('host') or 'localhost', port=int(ds.get('port')) if ds.get('port') is not None else 3306, user=ds.get('username') or None, password=ds.get('password') or None, database=ds.get('database') or 'test', charset='utf8' )
def get_producer(): ds = get_data_source() if ds.get('username') is not None and len(ds.get('username')) > 0: return KafkaProducer( sasl_mechanism="PLAIN", security_protocol='SASL_PLAINTEXT', sasl_plain_username=ds.get('username'), sasl_plain_password=ds.get('password'), bootstrap_servers=f'{ds.get("host")}:{ds.get("port")}') else: return KafkaProducer( bootstrap_servers=f'{ds.get("host")}:{ds.get("port")}')
def get_col(): ds = get_data_source() if ds.get('type') is None: # default data source mongo_host = os.environ.get('CRAWLAB_MONGO_HOST') or 'localhost' mongo_port = int(os.environ.get('CRAWLAB_MONGO_PORT') or 27017) or 27017 mongo_db = os.environ.get('CRAWLAB_MONGO_DB') or 'test' mongo_username = os.environ.get('CRAWLAB_MONGO_USERNAME') or '' mongo_password = os.environ.get('CRAWLAB_MONGO_PASSWORD') or '' mongo_authsource = os.environ.get('CRAWLAB_MONGO_AUTHSOURCE') or 'admin' collection = os.environ.get('CRAWLAB_COLLECTION') or 'test' mongo = MongoClient( host=mongo_host, port=mongo_port, username=mongo_username, password=mongo_password, authSource=mongo_authsource, ) db = mongo.get_database(mongo_db) col = db.get_collection(collection) return col # specified mongo data source mongo = MongoClient( host=ds.get('host'), port=int(ds.get('port')), username=ds.get('username'), password=ds.get('password'), authSource=ds.get('auth_source') or 'admin', ) collection = os.environ.get('CRAWLAB_COLLECTION') or 'test' db = mongo.get_database(ds.get('database')) col = db.get_collection(collection) return col
def get_client() -> elasticsearch.Elasticsearch: ds = get_data_source() return elasticsearch.Elasticsearch(hosts=[{ 'host': ds.get('host'), 'port': ds.get('port') }])
def send_msg(item): ds = get_data_source() producer = get_producer() producer.send(ds.get('database'), json.dumps(item).encode('utf-8'))