def worker_process_func(job_holder, status_holder): """ Worker process :type job_holder: JobHolder :type status_holder: StatusHolder """ signal.signal(signal.SIGTERM, Controller.slave_signal_handler) while not Controller.quit_flag: a_job = job_holder.get_job() if a_job is None: gevent.sleep(1) continue runner = JobRunner(a_job, job_holder, status_holder) runner.start()
def main(argv): if len(argv) == 0: argv = [ 'bigquery-e2e', 'publicdata', 'samples', 'shakespeare', 'bigquery-e2e', '3', '/tmp/bigquery' ] if len(argv) < 6: # Wrong number of args, print the usage and quit. arg_names = [ sys.argv[0], '<project_id>', '<source_project_id>', '<source_dataset_id>', '<source_table_id>', '<destination_bucket>', '<partition_count>', '[output_directory]' ] print 'Usage: %s' % (' '.join(arg_names)) print 'Got: %s' % (argv, ) return gcs_bucket = argv[4] job_runner = JobRunner(project_id=argv[0]) partition_count = int(argv[5]) download_dir = argv[6] if len(argv) > 6 else None gcs_readers = [] for index in range(partition_count): # Note: a separate GCS reader is required per partition. gcs_reader = GcsReader(gcs_bucket=gcs_bucket, download_dir=download_dir) gcs_readers.append(gcs_reader) run_partitioned_extract_job(job_runner, gcs_readers, source_project_id=argv[1], source_dataset_id=argv[2], source_table_id=argv[3])
def run_bigquery_job(job_id_prefix, job_type, config): '''Run a bigquery job and update pipeline status.''' global g_state runner = JobRunner(PROJECT_ID, job_id_prefix + '_' + job_type, client=bigquery) runner.start_job({job_type: config}) with g_state_lock: g_state[job_type + '_job_id'] = runner.job_id job_state = 'STARTED' while job_state != 'DONE': time.sleep(5) result = runner.get_job() job_state = result['status']['state'] with g_state_lock: g_state[job_type + '_result'] = pre(json.dumps(result, indent=2)) if 'errorResult' in result['status']: raise RuntimeError( json.dumps(result['status']['errorResult'], indent=2))
def run_bigquery_job(job_id_prefix, job_type, config): '''Run a bigquery job and update pipeline status.''' global g_state runner = JobRunner(PROJECT_ID, job_id_prefix + '_' + job_type, client=bigquery) runner.start_job({job_type: config}) with g_state_lock: g_state[job_type + '_job_id'] = runner.job_id job_state = 'STARTED' while job_state != 'DONE': time.sleep(5) result = runner.get_job() job_state = result['status']['state'] with g_state_lock: g_state[job_type + '_result'] = pre(json.dumps(result, indent=2)) if 'errorResult' in result['status']: raise RuntimeError(json.dumps(result['status']['errorResult'], indent=2))
def build(args): jr = JobRunner(cpus=args.j) epigenomes = WebEpigenomesLoader(args) for assembly in ["hg19", "mm10"]: for assays in ["H3K27ac", "DNase"]: epis = epigenomes.GetByAssemblyAndAssays(assembly, assays) for epi in epis.epis: for exp in epi.exps(): if exp.encodeID.startswith("EN"): # print exp.encodeID jr.append([[ os.path.realpath(__file__), "--job", exp.encodeID, "--assembly", assembly, "--process" ]]) else: # ROADMAP bigWig = exp.files[0] if not bigWig: print "missing", exp else: if not os.path.exists(bigWig.normFnp()): jr.append([[ normBin, "--assembly=" + bigWig.assembly, "--bwFnp=" + bigWig.normFnp(), bigWig.fnp() ]]) if args.test: return jr.runOne() if args.local: return jr.run() jobOptions = {"mem": 64000, "time": "3:59", "cores": 2, "queue": "short"} jr.cluster("/project/umw_zhiping_weng/encyc/norm", jobOptions)
def build(args): jr = JobRunner(cpus=args.j) epigenomes = WebEpigenomesLoader(args) for assembly in ["hg19", "mm10"]: for assays in ["H3K27ac", "DNase"]: epis = epigenomes.GetByAssemblyAndAssays(assembly, assays) for epi in epis.epis: for exp in epi.exps(): if exp.encodeID.startswith("EN"): # print exp.encodeID jr.append([ [os.path.realpath(__file__), "--job", exp.encodeID, "--assembly", assembly, "--process"]]) else: # ROADMAP bigWig = exp.files[0] if not bigWig: print "missing", exp else: if not os.path.exists(bigWig.normFnp()): jr.append([ [normBin, "--assembly=" + bigWig.assembly, "--bwFnp=" + bigWig.normFnp(), bigWig.fnp()]]) if args.test: return jr.runOne() if args.local: return jr.run() jobOptions = {"mem": 64000, "time": "3:59", "cores": 2, "queue": "short"} jr.cluster("/project/umw_zhiping_weng/encyc/norm", jobOptions)
from platform import uname import sys from job_runner import JobRunner IS_MAC = (uname()[0] == 'Darwin') if IS_MAC: cores_per_node = 8 runner = JobRunner('Fifteen6', cores_per_node) else: cores_per_node = int(sys.argv[1]) runner = JobRunner(sys.argv[2], cores_per_node) runner.run_jobs()
def main(argv): logging.basicConfig() parser = ArgumentParser(description='Read BigQuery table into a text file') parser.add_argument('-a', '--service_account', required=True, help='Big Query service account name') parser.add_argument( '-s', '--client_secret', required=True, help='Path to client_secrets.json file required for API login') parser.add_argument( '-c', '--credentials', help= ('Path to credentials file (e.g. bigquery_credentials.dat) required for API login. ', 'If the file is not present, the browser window will be shown and you will be asked to authenticate' )) parser.add_argument('-k', '--keyfile', help='Path to the key file (e.g., key.p12)') parser.add_argument('-o', '--download_dir', default='.', help='The directory where the output will be exported') parser.add_argument('-p', '--project_id', required=True, help='BigQuery source project ID') parser.add_argument('-d', '--dataset_id', required=True, help='BigQuery source dataset ID') parser.add_argument('-t', '--table_id', help='Source table ID') parser.add_argument('-b', '--gcs_bucket', help='Google Cloud Service destination bucket') parser.add_argument('-n', '--partition_count', help='Partition count for partitioned reader', type=int) parser.add_argument('--partitioned', dest="partitioned", help='Use partitioned reader', required=False, action='store_true') parser.set_defaults(partitioned=False) args = parser.parse_args() job_runner = JobRunner(project_id=args.project_id) auth = BigQuery_Auth(service_acc=args.service_account, client_secrets=args.client_secret, credentials=args.credentials, key_file=args.keyfile) if args.partitioned: gcs_readers = [] for index in range(int(args.partition_count)): # Note: a separate GCS reader is required per partition. gcs_readers.append( GcsReader(auth=auth, gcs_bucket=args.gcs_bucket, download_dir=args.download_dir)) run_partitioned_extract_job(job_runner, gcs_readers, source_project_id=args.project_id, source_dataset_id=args.dataset_id, source_table_id=args.table_id) else: reader = SimpleReader gcs_reader = GcsReader(auth=auth, gcs_bucket=args.gcs_bucket, download_dir=args.download_dir) reader.run_extract_job(job_runner, gcs_reader, source_project_id=args.project_id, source_dataset_id=args.dataset_id, source_table_id=args.table_id)
# released into the public domain by the authors. '''Runs Python commands used in Chapter 12''' import auth gcs_bucket = 'bigquery-e2e' project_id = 'bigquery-e2e' # Using GcsReader from gcs_reader import GcsReader GcsReader(gcs_bucket=gcs_bucket, download_dir='/tmp/bigquery').read('shakespeare.json') # Extracting the publicdata:samples.shakespeare table and reading. from job_runner import JobRunner import extract_and_read extract_and_read.run_extract_job(JobRunner(project_id=project_id), GcsReader(gcs_bucket=gcs_bucket, download_dir='/tmp/bigquery'), source_project_id='publicdata', source_dataset_id='samples', source_table_id='shakespeare') # Partitioned extract and parallel read. from extract_and_partitioned_read import run_partitioned_extract_job run_partitioned_extract_job(JobRunner(project_id=project_id), [ GcsReader(gcs_bucket=gcs_bucket, download_dir='/tmp/bigquery') for x in range(3) ], source_project_id='publicdata', source_dataset_id='samples', source_table_id='shakespeare')
import numpy as np import pandas as pd from seaborn import jointplot, lmplot, hls_palette import matplotlib.pyplot as plt from matplotlib import gridspec import os import sys from matplotlib import cm from job_runner import JobRunner export_plots = True show_plots = False job_name = 'Fifteen6' runner = JobRunner(job_name, readonly=True) figure_folder = os.path.join(os.path.sep, 'Users', 'Jason', 'Desktop', 'TempFig', job_name) if not os.path.exists(figure_folder): os.makedirs(figure_folder) runner.cursor.execute( "SELECT * FROM job_results WHERE objective_value IS NOT NULL AND job_name=\"{0}\"" .format(job_name)) completed_job_data = runner.cursor.fetchall() Y_all = [] X_all = [] for row in completed_job_data: Y_all.append(row['objective_value']) X_all.append([row[key] for key in runner.parameters_to_optimize])