def dispatch_request(ip, request): method = REQUEST_TABLE[request['request']] spec = inspect.getargspec(method) # figure out what args are required if spec[3] is not None: required_args = set(spec[0][:-len(spec[3])]) # set of args with no default value else: required_args = set(spec[0]) # all args are required # remove compulsory stuff from request dict, add IP del request['version'] del request['api_key'] del request['nonce'] del request['signature'] del request['timestamp'] del request['request'] request['ip'] = ip # check required args are provided and we haven't been sent superfluous args keys = set(request.keys()) if not keys.issubset(spec[0]): raise errors.InvalidArgumentError() # some request args aren't in method args elif not required_args.issubset(keys): raise errors.InvalidArgumentError() # some required method args aren't in request args return method(**request) # call into target method
def market_depth(ip, step=0.05): try: step = Decimal(str(step)) except: raise errors.InvalidArgumentError() if step <= 0: raise errors.InvalidArgumentError() cursor_bid = run_market_depth_query(step, 'bid') cursor_ask = run_market_depth_query(step, 'ask') return { 'bid': compile_market_depth_results(cursor_bid, step), 'ask': compile_market_depth_results(cursor_ask, step) }
def run_market_depth_query(step, column): if column == 'bid': offer_currency = 'GBP' elif column == 'ask': offer_currency = 'BTC' else: raise errors.InvalidArgumentError() query = """SELECT CAST(txdb_order.%s / %s AS UNSIGNED INTEGER) * %s as 'price', SUM(txdb_order.want_amount) as 'volume' FROM txdb_order INNER JOIN txdb_balance ON txdb_order.balance_id = txdb_balance.id INNER JOIN txdb_currency ON txdb_balance.currency_id = txdb_currency.id WHERE txdb_order.filled = 0 AND txdb_order.cancelled = 0 AND txdb_currency.code = %s GROUP BY CAST(txdb_order.%s / %s AS UNSIGNED INTEGER) ORDER BY price ASC""" % (column, '%s', '%s', '%s', column, '%s') cursor = connection.cursor() cursor.execute(query, [step, step, offer_currency, step]) return cursor
def one_task_per_interval( interval_count, validate_task_output, if_sequence=0, and_end_task=True, reuse_tasks=True, interval_list_param="interval_list", oldest_git_commit_to_reuse='6ca726fc265f9e55765bf1fdf71b86285b8a0ff2', task_key_params=['name', 'inputs', 'interval', 'ref'], script=arvados.current_job()['script']): """ Queue one task for each of interval_count intervals, splitting the genome chunk (described by the .interval_list file) evenly. Each new task will have an "inputs" parameter: a manifest containing a set of one or more gVCF files and its corresponding index. Each new task will also have a "ref" parameter: a manifest containing the reference files to use. Note that all gVCFs not matching the group_by_regex are ignored. if_sequence and and_end_task arguments have the same significance as in arvados.job_setup.one_task_per_input_file(). """ if if_sequence != arvados.current_task()['sequence']: return interval_list_file = gatk_helper.mount_single_gatk_interval_list_input( interval_list_param=interval_list_param) interval_reader = open(interval_list_file, mode="r") lines = interval_reader.readlines() sn_intervals = dict() sns = [] total_len = 0 for line in lines: if line[0] == '@': # skip all lines starting with '@' continue fields = line.split("\t") if len(fields) != 5: raise errors.InvalidArgumentError( "interval_list %s has invalid line [%s] - expected 5 fields but got %s" % (interval_list_file, line, len(fields))) sn = fields[0] start = int(fields[1]) end = int(fields[2]) length = int(end) - int(start) + 1 total_len += int(length) sn_intervals[sn] = (start, end) sns.append(sn) print "Total chunk length is %s" % total_len interval_len = int(total_len / interval_count) intervals = [] print "Splitting chunk into %s intervals of size ~%s" % (interval_count, interval_len) for interval_i in range(0, interval_count): interval_num = interval_i + 1 intervals_count = 0 remaining_len = interval_len interval = [] while len(sns) > 0: sn = sns.pop(0) if not sn_intervals.has_key(sn): raise errors.ValueError( "sn_intervals missing entry for sn [%s]" % sn) start, end = sn_intervals[sn] if (end - start + 1) > remaining_len: # not enough space for the whole sq, split it real_end = end end = remaining_len + start - 1 assert ((end - start + 1) <= remaining_len) sn_intervals[sn] = (end + 1, real_end) sns.insert(0, sn) interval.append("%s:%s-%s" % (sn, start, end)) remaining_len -= (end - start + 1) intervals_count += 1 if remaining_len <= 0: break if intervals_count > 0: intervals.append(interval) else: print "WARNING: skipping empty intervals for %s" % interval_input_name print "Have %s intervals" % (len(intervals)) if reuse_tasks: # get candidates for task reuse job_filters = [ ['script', '=', script], ['repository', '=', arvados.current_job()['repository']], ['script_version', 'in git', oldest_git_commit_to_reuse], [ 'docker_image_locator', 'in docker', arvados.current_job()['docker_image_locator'] ], ] reusable_tasks = get_reusable_tasks(if_sequence + 1, task_key_params, job_filters) print "Have %s potentially reusable tasks" % (len(reusable_tasks)) for interval in intervals: interval_str = ' '.join(interval) print "Creating task to process interval: [%s]" % interval_str new_task_params = arvados.current_task()['parameters'] new_task_params['interval'] = interval_str if reuse_tasks: task = create_or_reuse_task(if_sequence + 1, new_task_params, reusable_tasks, task_key_params, validate_task_output) else: task = create_task(if_sequence + 1, new_task_params) if and_end_task: print "Ending task %s successfully" % if_sequence arvados.api().job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'success': True }).execute() exit(0)
def one_task_per_group_combined_inputs(ref_input, job_input, interval_lists, group_by_regex, if_sequence=0, and_end_task=True, create_task_func=create_task): """ Queue one task for each group of gVCFs and corresponding interval_list in the inputs_collection, with grouping based on three things: - the stream in which the gVCFs are held within the collection - the value of the named capture group "group_by" in the group_by_regex against the filename in the inputs_collection Each new task will have an "inputs" parameter: a manifest containing a set of one or more gVCF files and its corresponding index. Each new task will also have a "ref" parameter: a manifest containing the reference files to use. Note that all gVCFs not matching the group_by_regex are ignored. if_sequence and and_end_task arguments have the same significance as in arvados.job_setup.one_task_per_input_file(). """ if if_sequence != arvados.current_task()['sequence']: return group_by_r = re.compile(group_by_regex) # prepare interval_lists il_cr = arvados.CollectionReader(interval_lists) il_ignored_files = [] interval_list_by_group = {} for s in il_cr.all_streams(): for f in s.all_files(): m = re.search(group_by_r, f.name()) if m: group_name = m.group('group_by') interval_list_m = re.search(r'\.interval_list', f.name()) if interval_list_m: if group_name not in interval_list_by_group: interval_list_by_group[group_name] = dict() interval_list_by_group[group_name][s.name(), f.name()] = f continue # if we make it this far, we have files that we are ignoring il_ignored_files.append("%s/%s" % (s.name(), f.name())) # prepare gVCF input collections cr = arvados.CollectionReader(job_input) ignored_files = [] last_stream_name = "" gvcf_by_group = {} gvcf_indices = {} for s in sorted(cr.all_streams(), key=lambda stream: stream.name()): stream_name = s.name() # handle each stream name separately if stream_name != last_stream_name: if last_stream_name != "": print "Done processing files in stream %s" % last_stream_name one_task_per_gvcf_group_in_stream_combined_inputs( last_stream_name, gvcf_by_group, gvcf_indices, interval_list_by_group, if_sequence, ref_input, create_task_func=create_task_func) # now that we are done with last_stream_name, reinitialise dicts to # process data from new stream print "Processing files in stream %s" % stream_name gvcf_by_group = {} gvcf_indices = {} last_stream_name = stream_name # loop over all the files in this stream (there may be only one) for f in s.all_files(): if re.search(r'\.tbi$', f.name()): gvcf_indices[s.name(), f.name()] = f continue m = re.search(group_by_r, f.name()) if m: group_name = m.group('group_by') gvcf_m = re.search(r'\.vcf\.gz$', f.name()) if gvcf_m: if group_name not in gvcf_by_group: gvcf_by_group[group_name] = dict() gvcf_by_group[group_name][s.name(), f.name()] = f continue interval_list_m = re.search(r'\.interval_list', f.name()) if interval_list_m: if group_name not in interval_list_by_group: interval_list_by_group[group_name] = dict() if (s.name(), f.name()) in interval_list_by_group[group_name]: if interval_list_by_group[group_name][s.name( ), f.name()].as_manifest() != f.as_manifest(): raise errors.InvalidArgumentError( "Already have interval_list for group %s file %s/%s, but manifests are not identical!" % (group_name, s.name(), f.name())) else: interval_list_by_group[group_name][s.name(), f.name()] = f continue # if we make it this far, we have files that we are ignoring ignored_files.append("%s/%s" % (s.name(), f.name())) # finally, process the last stream print "Processing last stream" one_task_per_gvcf_group_in_stream_combined_inputs( stream_name, gvcf_by_group, gvcf_indices, interval_list_by_group, if_sequence, ref_input, create_task_func=create_task_func) # report on any ignored files if len(ignored_files) > 0: print "WARNING: ignored non-matching files in inputs_collection: %s" % ( ' '.join(ignored_files)) # TODO: could use `setmedian` from https://github.com/ztane/python-Levenshtein # to print most representative "median" filename (i.e. skipped 15 files like median), then compare the # rest of the files to that median (perhaps with `ratio`) if and_end_task: print "Ending task %s successfully" % if_sequence arvados.api().job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'success': True }).execute() exit(0)
def chunked_tasks_per_cram_file( ref_input, job_input, interval_lists, validate_task_output, if_sequence=0, and_end_task=True, reuse_tasks=True, reuse_tasks_retrieve_all=True, interval_list_param="interval_list", oldest_git_commit_to_reuse='6ca726fc265f9e55765bf1fdf71b86285b8a0ff2', script=arvados.current_job()['script']): """ Queue one task for each cram file in this job's input collection. Each new task will have an "input" parameter: a manifest containing one .cram file and its corresponding .crai index file. Files in the input collection that are not named *.cram or *.crai (as well as *.crai files that do not match any .cram file present) are silently ignored. if_sequence and and_end_task arguments have the same significance as in arvados.job_setup.one_task_per_input_file(). """ if if_sequence != arvados.current_task()['sequence']: return # prepare interval lists cr = arvados.CollectionReader(interval_lists) chunk_interval_list = {} chunk_input_pdh_names = [] for s in cr.all_streams(): for f in s.all_files(): if re.search(r'\.interval_list$', f.name()): chunk_interval_list[s.name(), f.name()] = f for ((s_name, f_name), chunk_interval_list_f) in sorted(chunk_interval_list.items()): chunk_input = chunk_interval_list_f.as_manifest() try: r = arvados.api().collections().create(body={ "manifest_text": chunk_input }).execute() chunk_input_pdh = r["portable_data_hash"] chunk_input_name = os.path.join(s_name, f_name) chunk_input_pdh_names.append((chunk_input_pdh, chunk_input_name)) except: raise if len(chunk_input_pdh_names) == 0: raise errors.InvalidArgumentError( "No interval_list files found in %s" % (interval_lists)) # prepare CRAM input collections cr = arvados.CollectionReader(job_input) cram = {} crai = {} for s in cr.all_streams(): for f in s.all_files(): if re.search(r'\.cram$', f.name()): cram[s.name(), f.name()] = f elif re.search(r'\.crai$', f.name()): crai[s.name(), f.name()] = f for ((s_name, f_name), cram_f) in cram.items(): crai_f = crai.get( (s_name, re.sub(r'cram$', 'crai', f_name)), crai.get((s_name, re.sub(r'cram$', 'cram.crai', f_name)), None)) task_input = cram_f.as_manifest() if crai_f: task_input += crai_f.as_manifest() else: # no CRAI for CRAM raise errors.InvalidArgumentError( "No correponding CRAI file found for CRAM file %s" % f_name) # Create a portable data hash for the task's subcollection try: r = arvados.api().collections().create(body={ "manifest_text": task_input }).execute() task_input_pdh = r["portable_data_hash"] except: raise if reuse_tasks: task_key_params = ['input', 'ref', 'chunk'] # get candidates for task reuse job_filters = [ ['script', '=', script], ['repository', '=', arvados.current_job()['repository']], ['script_version', 'in git', oldest_git_commit_to_reuse], [ 'docker_image_locator', 'in docker', arvados.current_job()['docker_image_locator'] ], ] if reuse_tasks_retrieve_all: # retrieve a full set of all possible reusable tasks reusable_tasks = get_reusable_tasks(if_sequence + 1, task_key_params, job_filters) print "Have %s tasks for potential reuse" % ( len(reusable_tasks)) else: reusable_task_jobs = get_jobs_for_task_reuse(job_filters) print "Have %s jobs for potential task reuse" % ( len(reusable_task_jobs)) reusable_task_job_uuids = [ job['uuid'] for job in reusable_task_jobs['items'] ] for chunk_input_pdh, chunk_input_name in chunk_input_pdh_names: # Create task for each CRAM / chunk new_task_params = { 'input': task_input_pdh, 'ref': ref_input, 'chunk': chunk_input_pdh } print "Creating new task to process %s with chunk interval %s " % ( f_name, chunk_input_name) if reuse_tasks: if reuse_tasks_retrieve_all: task = create_or_reuse_task(if_sequence + 1, new_task_params, reusable_tasks, task_key_params, validate_task_output) else: task = create_or_reuse_task_from_jobs( if_sequence + 1, new_task_params, reusable_task_job_uuids, task_key_params, validate_task_output) else: task = create_task(if_sequence + 1, new_task_params) if and_end_task: print "Ending task 0 successfully" arvados.api().job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'success': True }).execute() exit(0)
def one_task_per_gvcf_group_in_stream_combined_inputs( stream_name, gvcf_by_group, gvcf_indices, interval_list_by_group, if_sequence, ref_input_pdh, create_task_func=create_task): """ Process one stream of data and launch a subtask for handling it """ print "Finalising stream %s" % stream_name for group_name in sorted(gvcf_by_group.keys()): print "Have %s gVCFs in group %s" % (len( gvcf_by_group[group_name]), group_name) # require interval_list for this group if group_name not in interval_list_by_group: raise errors.InvalidArgumentError( "Inputs collection did not contain interval_list for group %s" % group_name) interval_lists = interval_list_by_group[group_name].keys() if len(interval_lists) > 1: raise errors.InvalidArgumentError( "Inputs collection contained more than one interval_list for group %s: %s" % (group_name, ' '.join(interval_lists))) interval_list_manifest = interval_list_by_group[group_name].get( interval_lists[0]).as_manifest() # "combined_inputs" style is to have interval_list and inputs in same collection task_inputs_manifest = interval_list_manifest for ((s_name, gvcf_name), gvcf_f) in gvcf_by_group[group_name].items(): task_inputs_manifest += gvcf_f.as_manifest() gvcf_index_f = gvcf_indices.get( (s_name, re.sub(r'vcf.gz$', 'vcf.tbi', gvcf_name)), gvcf_indices.get( (s_name, re.sub(r'vcf.gz$', 'vcf.gz.tbi', gvcf_name)), None)) if gvcf_index_f: task_inputs_manifest += gvcf_index_f.as_manifest() else: # no index for gVCF - TODO: should this be an error or warning? print "WARNING: No correponding .tbi index file found for gVCF file %s" % gvcf_name #raise errors.InvalidArgumentError("No correponding .tbi index file found for gVCF file %s" % gvcf_name) # Create a portable data hash for the task's subcollection try: r = arvados.api().collections().create( body={ "manifest_text": task_inputs_manifest }).execute() task_inputs_pdh = r["portable_data_hash"] except: raise # Create task to process this group name_components = [] if len(stream_name) > 0 and stream_name != ".": name_components.append(stream_name) if len(group_name) > 0: name_components.append(group_name) if len(name_components) == 0: name = "all" else: name = '::'.join(name_components) print "Creating task to process %s" % name new_task_params = { 'inputs': task_inputs_pdh, 'ref': ref_input_pdh, 'name': name } task = create_task_func(if_sequence + 1, new_task_params)
def historical_prices(ip, start_date=None, end_date=None, scope='daily'): try: start_date = int(start_date) if start_date is not None else None end_date = int(end_date) if end_date is not None else None except: raise errors.InvalidArgumentError() # are both valid unix timestamps? if start_date is not None and (start_date < 0 or start_date > 0x7FFFFFFF): raise errors.InvalidArgumentError() elif end_date is not None and (end_date < 0 or end_date > 0x7FFFFFFF or end_date <= start_date): raise errors.InvalidArgumentError() # adjust scope of query if scope == 'monthly': group_by = 'YEAR(txdb_transaction.executed), MONTH(txdb_transaction.executed)' elif scope == 'weekly': group_by = 'YEARWEEK(txdb_transaction.executed)' elif scope == 'hourly': group_by = 'DATE(txdb_transaction.executed), HOUR(txdb_transaction.executed)' elif scope == '15mins': group_by = 'DATE(txdb_transaction.executed), HOUR(txdb_transaction.executed), CAST(MINUTE(txdb_transaction.executed) / 15 AS INTEGER)' else: # scope == 'daily' group_by = 'DATE(txdb_transaction.executed)' # construct where clause limiting date range date_limits = "" date_params = [] if start_date: date_limits += " AND txdb_transaction.executed >= FROM_UNIXTIME(%s)" date_params.append(start_date) if end_date: date_limits += " AND txdb_transaction.executed <= FROM_UNIXTIME(%s)" date_params.append(end_date) # abusing MySQL since 2011 query = """SELECT txdb_transaction.executed as 'timestamp', SUBSTRING_INDEX(GROUP_CONCAT(txdb_order.bid ORDER BY txdb_transaction.executed ASC), ',', 1) AS 'open', SUBSTRING_INDEX(GROUP_CONCAT(txdb_order.bid ORDER BY txdb_transaction.executed DESC), ',', 1) AS 'close', SUBSTRING_INDEX(GROUP_CONCAT(txdb_order.bid ORDER BY txdb_order.bid ASC), ',', 1) AS 'low', SUBSTRING_INDEX(GROUP_CONCAT(txdb_order.bid ORDER BY txdb_order.bid DESC), ',', 1) AS 'high', AVG(txdb_order.bid) as 'mean', SUM(linked_transaction.amount) as 'volume' FROM txdb_transaction INNER JOIN txdb_balance from_balance ON txdb_transaction.from_balance_id = from_balance.id INNER JOIN txdb_currency from_currency ON from_balance.currency_id = from_currency.id INNER JOIN txdb_transaction linked_transaction ON txdb_transaction.linked_transaction_id = linked_transaction.id INNER JOIN txdb_order ON txdb_transaction.order_id = txdb_order.id WHERE txdb_transaction.reversed = 0 AND from_currency.code = 'GBP' %s GROUP BY %s ORDER BY txdb_transaction.executed ASC LIMIT %d""" % (date_limits, group_by, settings.HISTORICAL_PRICES_ROW_LIMIT) cursor = connection.cursor() cursor.execute(query, date_params) results = { 'timestamp': [], 'open': [], 'close': [], 'low': [], 'high': [], 'mean': [], 'volume': [] } # format the results into something usable by ChartDirector row = cursor.fetchone() while row: ts = calendar.timegm(floor_datetime(row[0], scope).timetuple()) results['timestamp'].append(ts) results['open'].append(str(row[1])) results['close'].append(str(row[2])) results['low'].append(str(row[3])) results['high'].append(str(row[4])) results['mean'].append(str(row[5])) results['volume'].append(str(row[6])) row = cursor.fetchone() return results