Ejemplo n.º 1
0
  def set_sparkenv(cls, spark_home=None):
    import urllib.request, subprocess
    URL = 'http://apache.claz.org/spark/spark-2.4.3/spark-2.4.3-bin-hadoop2.7.tgz'
    spark_home = spark_home or os.environ.get('SPARK_HOME', None)

    if not spark_home:
      # download Spark binary
      ans = input(
        'SPARK_HOME is not found. Would you like download and install in your home folder? (Y or N): '
      )
      if ans.lower() == 'y':
        archive_path = get_home_path() + '/' + URL.split('/')[-1]
        if Path(archive_path).exists():
          log('+Archive "{}" already downloaded.'.format(archive_path))
        else:
          log('+Downloading "{}" to "{}"'.format(URL, archive_path))
          urllib.request.urlretrieve(URL, archive_path)
          os.system('cd {} && tar -xf {}'.format(get_home_path(),
                                                 archive_path))
          pyspark_home = archive_path.replace('.tgz', '/python')
          subprocess.call(
            ['pip', 'install', '--target=' + pyspark_home, 'py4j'])
        spark_home = archive_path.replace('.tgz', '')

    os.environ['SPARK_HOME'] = spark_home

    if not Path(spark_home).exists():
      raise Exception('SPARK_HOME="{}" does not exists.'.format(spark_home))

    # add pyspark of SPARK_HOME to path
    sys.path.append(spark_home + '/python')

    log('-Using SPARK_HOME=' + spark_home)

    return spark_home
Ejemplo n.º 2
0
def alias_cli():
    "Install alias"
    from xutil.helpers import get_home_path, get_dir_path, get_script_path
    from xutil.diskio import read_file, write_file
    from shutil import copyfile
    ans = input("Install 'alias.sh' in home directory (Y to proceed)? ")
    if ans.lower() != 'y':
        return

    src_path = get_dir_path() + '/alias.sh'
    dst_path = get_home_path() + '/.xutil.alias.sh'
    bash_profile_path = get_home_path() + '/.bashrc'

    # log('src_path -> ' + src_path)
    # log('dst_path -> ' + dst_path)
    copyfile(src_path, dst_path)

    bash_prof_text = read_file(bash_profile_path)

    if not dst_path in bash_prof_text:
        bash_prof_text = '{}\n\n. {}\n'.format(bash_prof_text, dst_path)
        write_file(bash_profile_path, bash_prof_text)
        log('+Updated ' + bash_profile_path)
Ejemplo n.º 3
0
from collections import deque
import time, socket
import os, hashlib
import dbnet.store as store

worker_db_prof = {}
worker_hostname = socket.gethostname()
worker_name = None
worker_status = 'IDLE'
worker_queue = deque([])
worker_pid = os.getpid()
worker_sql_cache = {}
load_profile(create_if_missing=True)  # load profile environments

WEBAPP_PORT = int(os.getenv('DBNET_WEBAPP_PORT', default=5566))
DBNET_FOLDER = os.getenv('DBNET_FOLDER', default=get_home_path() + '/dbnet')
SQL_FOLDER = DBNET_FOLDER + '/sql'
os.makedirs(SQL_FOLDER, exist_ok=True)
CSV_FOLDER = DBNET_FOLDER + '/csv'
os.makedirs(CSV_FOLDER, exist_ok=True)

sync_queue = lambda: store.worker_set(hostname=worker_hostname,
                                      worker_name=worker_name,
                                      queue_length=len(worker_queue))

set_worker_idle = lambda: store.sqlx('workers').update_rec(
    hostname=worker_hostname,
    worker_name=worker_name,
    status='IDLE',
    task_id=None,
    task_function=None,