def list_from_xsv(self, xsv_name, separator='\t', header=False): """ Parse element from separated value file. Return a list containing the values matched on each line of the file. Parameters: - **xsv_name**: String. filename of the .xsv; it is assumed to live in the project data folder - **index**: Integer. Index of the element to retrieve - **separator**: String. The separating character in the file. Default to tab. - **header**: Boolean. Flag indicating whether the file has a header. Return: - List(string). List of elements parsed from xsv. """ out = list() try: xsv_file = open(projSet.__data_file_dir__ + xsv_name, 'r') except IOError as e: logging.info('Could not open xsv for writing: %s' % e.message) return out # Process file line-by-line if header: xsv_file.readline() while 1: line = xsv_file.readline().strip() if line == '': break tokens = line.split(separator) out.append([str(tokens[index]) for index in xrange(len(tokens))]) return out
def _process_help(args): """ Used by Threshold::process() for forking. Should not be called externally. """ state = args[1] thread_args = RevertRateArgsClass(state[0], state[1], state[2], state[3], state[4], state[6], state[7], state[8]) users = args[0] if thread_args.log_progress: logging.info(__name__ + ' :: Computing reverts on %s users (PID %s)' % (len(users), str(os.getpid()))) results_agg = list() dropped_users = 0 umpd_obj = UMP_MAP[thread_args.group](users, thread_args) for user_data in umpd_obj: total_revisions = 0.0 total_reverts = 0.0 # Call query on revert rate for each user # # 1. Obtain user registration date # 2. Compute end date based on 't' # 3. Get user revisions in time period query_args = namedtuple('QueryArgs', 'date_start date_end')\ (format_mediawiki_timestamp(user_data.start), format_mediawiki_timestamp(user_data.end)) try: revisions = query_mod.\ revert_rate_user_revs_query(user_data.user, thread_args.project, query_args) except query_mod.UMQueryCallError as e: logging.error(__name__ + ' :: Failed to ' 'get revisions: {0}'.format(e.message)) dropped_users += 1 continue results_thread = mpw.build_thread_pool(revisions, _revision_proc, thread_args.rev_threads, state) for r in results_thread: total_revisions += r[0] total_reverts += r[1] if not total_revisions: results_agg.append([user_data.user, 0.0, total_revisions]) else: results_agg.append([user_data.user, total_reverts / total_revisions, total_revisions]) if thread_args.log_progress: logging.debug(__name__ + ' :: PID {0} complete. Dropped users = {1}'. format(str(os.getpid()), dropped_users)) return results_agg
def get_elem_from_nested_list(self, in_list, index): """ Parse element from separated value file. Return a list containing the values matched on each line of the file. Usage: :: >>> el = DL.ExperimentsLoader() >>> results = el.execute_SQL(SQL_query_string) >>> new_results = el.get_elem_from_nested_list(results,0) Parameters: - **in_list**: List(List(\*)). List of lists from which to parse elements. - **index**: Integer. Index of the element to retrieve Return: - List(\*). List of sub-elements parsed from list. """ out_list = list() for elem in in_list: try: out_list.append(elem[index]) except Exception: logging.info('Unable to extract index %s from %s' % ( str(index), str(elem))) return out_list
def wrapper(self, users, **kwargs): # If users are empty flag an error if not users: raise UserMetricError('No users to pass to process method.') # Ensure user IDs are strings users = dl.DataLoader().cast_elems_to_string(users) # Add attributes from _param_types self.assign_attributes(kwargs, 'process') # Echo input params for metric process call if hasattr(self, 'log_') and self.log_: logging.info(__name__ + ' :: parameters = ' + str(kwargs)) return proc_func(self, users, **kwargs)
def get_users(cohort_expr): """ get users from cohort """ if search(COHORT_REGEX, cohort_expr): logging.info(__name__ + ' :: Processing cohort by expression.') users = [user for user in parse_cohorts(cohort_expr)] else: logging.info(__name__ + ' :: Processing cohort by tag name.') try: id = query_mod.get_cohort_id(cohort_expr) users = [u for u in query_mod.get_cohort_users(id)] except (IndexError, TypeError, query_mod.UMQueryCallError) as e: logging.error(__name__ + ' :: Could not retrieve users ' 'for cohort {0}: {1}'. format(cohort_expr, str(e))) return [] return users
def _process_help(args): """ First determine if the user has made an adequate number of edits. If so, compute the number of minutes that passed between the Nth and Mth edit. - Parameters: - **user_handle** - List(int). List of user ids. - **first_edit** - Integer. The numeric value of the first edit from which to measure the threshold. - **threshold_edit** - Integer. The numeric value of the threshold edit from which to measure the threshold """ # Unpack args state = args[1] users = args[0] thread_args = um.UserMetric._unpack_params(state) if thread_args.log_: logging.debug(__name__ + '::Computing Time to threshold on ' '{0} users. (PID = {1})'.format(len(users), getpid())) minutes_to_threshold = list() # For each user gather their revisions and produce a time diff for user in users: revs = query_mod.\ time_to_threshold_revs_query(user, thread_args.project, None) revs = [rev[0] for rev in revs] minutes_to_threshold.append( [user, get_minute_diff_result(revs, thread_args.threshold_edit, thread_args.first_edit)]) if thread_args.log_: logging.info(__name__ + '::Processed PID = {0}.'.format(getpid())) return minutes_to_threshold
def time_series_listener(process_queue, event_queue): """ Listener for ``time_series_worker``. Blocks and logs until all processes computing time series data are complete. Returns time dependent data from metrics. Parameters ~~~~~~~~~~ process_queue : list List of active processes computing metrics data. event_queue : multiprocessing.Queue Asynchronous data coming in from worker processes. """ data = list() while 1: # sleep before checking worker threads time.sleep(PROCESS_SLEEP_TIME) logging.info(__name__ + ' :: Time series process queue\n' '\t{0} threads. (PID = {1})'. format(str(len(process_queue)), os.getpid())) while not event_queue.empty(): data.extend(event_queue.get()) for p in process_queue: if not p.is_alive(): p.terminate() process_queue.remove(p) # exit if all process have finished if not len(process_queue): break # sort return sorted(data, key=operator.itemgetter(0), reverse=False)
def _process_help(args): """ Worker thread method for NamespaceOfEdits::process(). """ users = args[0] state = args[1] metric_params = um.UserMetric._unpack_params(state) query_args_type = namedtuple('QueryArgs', 'start end') if metric_params.log_: logging.info(__name__ + '::Computing namespace edits. (PID = %s)' % getpid()) # Tally counts of namespace edits results = dict() ump_res = UMP_MAP[metric_params.group](users, metric_params) for ump_rec in ump_res: results[str(ump_rec.user)] = OrderedDict() for ns in NamespaceEdits.VALID_NAMESPACES: results[str(ump_rec.user)][str(ns)] = 0 query_results = query_mod.namespace_edits_rev_query([ump_rec.user], metric_params.project, query_args_type(ump_rec.start, ump_rec.end)) for row in query_results: try: if row[1] in NamespaceEdits.VALID_NAMESPACES: results[str(row[0])][str(row[1])] = int(row[2]) except (KeyError, IndexError): logging.error(__name__ + "::Could not process row: %s" % str(row)) continue return [(user, results[user]) for user in results]
def _process_help(args): """ Used by Threshold::process() for forking. Should not be called externally. """ # Unpack args users = args[0] state = args[1] metric_params = um.UserMetric._unpack_params(state) if metric_params.log_: logging.info(__name__ + ' :: Processing revision data ' + '(%s users) by user... (PID = %s)' % ( len(users), os.getpid())) logging.info(__name__ + ' :: ' + str(metric_params)) # only proceed if there is user data if not len(users): return [] results = list() dropped_users = 0 umpd_obj = UMP_MAP[metric_params.group](users, metric_params) for t in umpd_obj: uid = long(t.user) try: count = query_mod.rev_count_query(uid, metric_params.survival_, metric_params.namespace, metric_params.project, t.start, t.end) except query_mod.UMQueryCallError: dropped_users += 1 continue if count < metric_params.n: results.append((uid, 0)) else: results.append((uid, 1)) if metric_params.log_: logging.info(__name__ + '::Processed PID = %s. ' 'Dropped users = %s.' % ( os.getpid(), str(dropped_users))) return results
def _process_help(args): """ Used by Threshold::process() for forking. Should not be called externally. """ # Unpack args users = args[0] state = args[1] metric_params = um.UserMetric._unpack_params(state) if metric_params.log_: logging.info(__name__ + ' :: Processing pages created data ' + '(%s users) by user... (PID = %s)' % ( len(users), os.getpid())) logging.info(__name__ + ' :: ' + str(metric_params)) # only proceed if there is user data if not len(users): return [] results = list() dropped_users = 0 umpd_obj = UMP_MAP[metric_params.group](users, metric_params) for t in umpd_obj: uid = long(t.user) try: count = query_mod.pages_created_query(uid, metric_params.project, metric_params) print count except query_mod.UMQueryCallError: dropped_users += 1 continue try: results.append((str(uid), count[0][0])) except TypeError: dropped_users += 1 if metric_params.log_: logging.info(__name__ + '::Processed PID = %s. ' 'Dropped users = %s.' % ( os.getpid(), str(dropped_users))) return results
def generate_test_cohort(project, max_size=10, write=False, user_interval_size=1, rev_interval_size=7, rev_lower_limit=0): """ Build a test cohort (list of UIDs) for the given project. Parameters ~~~~~~~~~~ project : str Wikipedia project e.g. 'enwiki'. size : uint Number of users to include in the cohort. write: boolean Flag indicating whether to write the cohort to settings.__cohort_meta_db__ and settings.__cohort_db__. user_interval_size: uint Number of days within which to take registered users rev_lower_limit: int Minimum number of revisions a user must have between registration and the Returns the list of UIDs from the corresponding project that defines the test cohort. """ # Determine the time bounds that define the cohort acceptance criteria ts_start_o = datetime.now() + timedelta(days=-60) ts_end_user_o = ts_start_o + timedelta(days=int(user_interval_size)) ts_end_revs_o = ts_start_o + timedelta(days=int(rev_interval_size)) ts_start = format_mediawiki_timestamp(ts_start_o) ts_end_user = format_mediawiki_timestamp(ts_end_user_o) ts_end_revs = format_mediawiki_timestamp(ts_end_revs_o) # Synthesize query and execute logging.info(__name__ + ' :: Getting users from {0}.\n\n' '\tUser interval: {1} - {2}\n' '\tRevision interval: {1} - {3}\n' '\tMax users = {4}\n' '\tMin revs = {5}\n'. format(project, ts_start, ts_end_user, ts_end_revs, max_size, rev_lower_limit ) ) query = sub_tokens(SELECT_PROJECT_IDS, db=escape_var(str(project))) # @TODO MOVE DB REFS INTO QUERY MODULE try: params = { 'ts_start': str(ts_start), 'ts_end_user': str(ts_end_user), 'ts_end_revs': str(ts_end_revs), 'max_size': int(max_size), 'rev_lower_limit': int(rev_lower_limit), } except ValueError as e: raise Exception(__name__ + ' :: Bad params ' + str(e)) conn = Connector(instance=settings.PROJECT_DB_MAP[project]) conn._cur_.execute(query, params) users = [row for row in conn._cur_] del conn # get latest cohort id & cohort name utm_name = generate_test_cohort_name(project) # add new ids to usertags & usertags_meta if write: logging.info(__name__ + ' :: Inserting records...\n\n' '\tCohort name - {0}\n' '\t{2} - {3} record(s)\n'. format(utm_name, settings.__cohort_db__, len(users))) query_mod.add_cohort_data(utm_name, users, project) return users
def job_control(): """ Controls the execution of user metrics requests Parameters ~~~~~~~~~~ request_queue : multiprocessing.Queue Queues incoming API requests. """ # Store executed and pending jobs respectively job_queue = list() # Global job ID number job_id = 0 # Tallies the number of concurrently running jobs concurrent_jobs = 0 log_name = '{0} :: {1}'.format(__name__, job_control.__name__) logging.debug('{0} - STARTING...'.format(log_name)) while 1: time.sleep(RESQUEST_TIMEOUT) # Request Queue Processing # ------------------------ # logging.debug(log_name + ' :: POLLING REQUESTS...') logging.debug(log_name + ' :: JOB QUEUE - {0}'.format(str(job_queue))) req_item = None # Only process if there are fewer than the maximum number of concurrent # jobs if concurrent_jobs < MAX_CONCURRENT_JOBS: # Pop from request target req_item = umapi_broker_context.pop(REQUEST_BROKER_TARGET) # Push to process target if req_item: url_hash = sha1(req_item.encode('utf-8')).hexdigest() umapi_broker_context.add(PROCESS_BROKER_TARGET, url_hash, req_item) logging.debug(log_name + ' :: PULLING item from request queue -> ' '\n\t{0}' .format(req_item)) # Process complete jobs # --------------------- if concurrent_jobs: for job_item in job_queue: if not job_item.queue.empty(): logging.info(log_name + ' :: READING RESPONSE - {0}'. format(job_item.request)) # Pull data off of the queue and add it to response queue data = '' while not job_item.queue.empty(): data += job_item.queue.get(True) # Remove from process target url_hash = sha1(job_item.request.encode('utf-8')).hexdigest() try: umapi_broker_context.remove(PROCESS_BROKER_TARGET, url_hash) except Exception as e: logging.error(log_name + ' :: Could not process ' '{0} from {1} -- {2}'. format(job_item.request, PROCESS_BROKER_TARGET, e.message)) # Add to response target umapi_broker_context.add(RESPONSE_BROKER_TARGET, url_hash, pack_response_for_broker( job_item.request, data)) del job_queue[job_queue.index(job_item)] concurrent_jobs -= 1 logging.debug(log_name + ' :: RUN -> RESPONSE - Job ID {0}' '\n\tConcurrent jobs = {1}' .format(str(job_item.id), concurrent_jobs)) # Process request # --------------- if req_item: req_q = Queue() proc = Process(target=process_metrics, args=(req_q, req_item)) proc.start() job_item = job_item_type(job_id, proc, req_item, req_q) job_queue.append(job_item) concurrent_jobs += 1 job_id += 1 logging.debug(log_name + ' :: WAIT -> RUN - Job ID {0}' '\n\tConcurrent jobs = {1}, REQ = {2}' .format(str(job_id), concurrent_jobs, req_item)) logging.debug('{0} - FINISHING.'.format(log_name))
def build_time_series(start, end, interval, metric, aggregator, cohort, **kwargs): """ Builds a timeseries dataset for a given metric. Parameters: start: str or datetime. date + time indicating start of time series end : str or datetime. date + time indicating end of time series interval : int. integer value in hours that defines the amount of time between data-points metric : class object. Metrics class (derived from UserMetric) aggregator : method. Aggregator method used to aggregate data for time series data points cohort : list(str). list of user IDs e.g. >>> cohort = ['156171','13234584'] >>> metric = ba.BytesAdded >>> aggregator = agg.list_sum_indices >>> build_time_series('20120101000000', '20120112000000', 24, metric, aggregator, cohort, num_threads=4, num_threads_metric=2, log=True) """ log = bool(kwargs['log']) if 'log' in kwargs else False # Get datetime types, and the number of threads start = date_parse(format_mediawiki_timestamp(start)) end = date_parse(format_mediawiki_timestamp(end)) k = kwargs['kt_'] if 'kt_' in kwargs else MAX_THREADS # Compute window size and ensure that all the conditions # necessary to generate a proper time series are met num_intervals = int((end - start).total_seconds() / (3600 * interval)) intervals_per_thread = num_intervals / k # Compose the sets of time series lists f = lambda t, i: t + datetime.timedelta( hours=int(intervals_per_thread * interval * i)) time_series = [_get_timeseries(f(start, i), f(start, i+1), interval) for i in xrange(k)] if f(start, k) < end: time_series.append(_get_timeseries(f(start, k), end, interval)) event_queue = Queue() process_queue = list() if log: logging.info(__name__ + ' :: Spawning procs\n' '\t%s - %s, interval = %s\n' '\tthreads = %s ... ' % (str(start), str(end), interval, k)) for i in xrange(len(time_series)): p = Process(target=time_series_worker, args=(time_series[i], metric, aggregator, cohort, event_queue, kwargs)) p.start() process_queue.append(p) # Call the listener return time_series_listener(process_queue, event_queue)
def time_series_worker(time_series, metric, aggregator, cohort, event_queue, kwargs): """ Worker thread which computes time series data for a set of points Parameter ~~~~~~~~~ time_series : list(datetime) Datetimes defining series. metric : string Metric name. aggregator : method aggregator method reference. cohort : string Cohort name. event_queue : multiporcessing.Queue Asynchronous data-structure to communicate with parent proc. """ log = bool(kwargs['log']) if 'log' in kwargs else False data = list() ts_s = time_series.next() new_kwargs = deepcopy(kwargs) # re-map some keyword args relating to thread counts if 'metric_threads' in new_kwargs: d = json.loads(new_kwargs['metric_threads']) for key in d: new_kwargs[key] = d[key] del new_kwargs['metric_threads'] while 1: try: ts_e = time_series.next() except StopIteration: break if log: logging.info(__name__ + ' :: Processing thread:\n' '\t{0}, {1} - {2} ...'.format(os.getpid(), str(ts_s), str(ts_e))) metric_obj = metric(datetime_start=ts_s, datetime_end=ts_e, **new_kwargs).\ process(cohort, **new_kwargs) r = agg_engine(aggregator, metric_obj, metric.header()) if log: logging.info(__name__ + ' :: Processing complete:\n' '\t{0}, {1} - {2} ...'.format(os.getpid(), str(ts_s), str(ts_e))) data.append([str(ts_s), str(ts_e)] + r.data) ts_s = ts_e event_queue.put(data)
def process_metrics(p, request_meta): """ Worker process for requests, forked from the job controller. This method handles: * Filtering cohort type: "regular" cohort, single user, user group * Secondary validation * """ log_name = '{0} :: {1}'.format(__name__, process_metrics.__name__) logging.info(log_name + ' - START JOB' '\n\tCOHORT = {0} - METRIC = {1}' ' - PID = {2})'. format(request_meta.cohort_expr, request_meta.metric, getpid())) err_msg = __name__ + ' :: Request failed.' users = list() # obtain user list - handle the case where a lone user ID is passed # !! The username should already be validated if request_meta.is_user: uid = MediaWikiUser.is_user_name(request_meta.cohort_expr, request_meta.project) if uid: valid = True users = [uid] else: valid = False err_msg = error_codes[3] # The "all" user group. All users within a time period. elif request_meta.cohort_expr == 'all': users = MediaWikiUser(query_type=1) try: users = [u for u in users.get_users( request_meta.start, request_meta.end, project=request_meta.project)] valid = True except Exception: valid = False err_msg = error_codes[5] # "TYPICAL" COHORT PROCESSING else: users = get_users(request_meta.cohort_expr) # Default project is what is stored in usertags_meta project = query_mod.get_cohort_project_by_meta( request_meta.cohort_expr) if project: request_meta.project = project logging.debug(__name__ + ' :: Using default project from ' \ 'usertags_meta {0}.'.format(project)) valid = True err_msg = '' if valid: # process request results = process_data_request(request_meta, users) results = str(results) response_size = getsizeof(results, None) if response_size > MAX_BLOCK_SIZE: index = 0 # Dump the data in pieces - block until it is picked up while index < response_size: p.put(results[index:index+MAX_BLOCK_SIZE], block=True) index += MAX_BLOCK_SIZE else: p.put(results, block=True) logging.info(log_name + ' - END JOB' '\n\tCOHORT = {0} - METRIC = {1}' ' - PID = {2})'. format(request_meta.cohort_expr, request_meta.metric, getpid())) else: p.put(err_msg, block=True) logging.info(log_name + ' - END JOB - FAILED.' '\n\tCOHORT = {0} - METRIC = {1}' ' - PID = {2})'. format(request_meta.cohort_expr, request_meta.metric, getpid()))
def process_data_request(request_meta, users): """ Main entry point of the module, prepares results for a given request. Coordinates a request based on the following parameters:: metric_handle (string) - determines the type of metric object to build. Keys metric_dict. users (list) - list of user IDs. **kwargs - Keyword arguments may contain a variety of variables. Most notably, "aggregator" if the request requires aggregation, "time_series" flag indicating a time series request. The remaining kwargs specify metric object parameters. """ # Set interval length in hours if not present if not request_meta.slice: request_meta.slice = DEFAULT_INERVAL_LENGTH else: request_meta.slice = float(request_meta.slice) # Get the aggregator key agg_key = get_agg_key(request_meta.aggregator, request_meta.metric) if \ request_meta.aggregator else None args = ParameterMapping.map(request_meta) # Initialize the results results, metric_class, metric_obj = format_response(request_meta) start = metric_obj.datetime_start end = metric_obj.datetime_end if results['type'] == request_types.time_series: # Get aggregator try: aggregator_func = get_aggregator_type(agg_key) except MetricsAPIError as e: results['data'] = 'Request failed. ' + e.message return results # Determine intervals and thread allocation total_intervals = (date_parse(end) - date_parse(start)).\ total_seconds() / (3600 * request_meta.slice) time_threads = max(1, int(total_intervals / INTERVALS_PER_THREAD)) time_threads = min(MAX_THREADS, time_threads) logging.info(__name__ + ' :: Initiating time series for %(metric)s\n' '\tAGGREGATOR = %(agg)s\n' '\tFROM: %(start)s,\tTO: %(end)s.' % { 'metric': metric_class.__name__, 'agg': request_meta.aggregator, 'start': str(start), 'end': str(end), }) metric_threads = '"k_" : {0}, "kr_" : {1}'.format(USER_THREADS, REVISION_THREADS) metric_threads = '{' + metric_threads + '}' new_kwargs = deepcopy(args) del new_kwargs['slice'] del new_kwargs['aggregator'] del new_kwargs['datetime_start'] del new_kwargs['datetime_end'] out = tspm.build_time_series(start, end, request_meta.slice, metric_class, aggregator_func, users, kt_=time_threads, metric_threads=metric_threads, log=True, **new_kwargs) results['header'] = ['timestamp'] + \ getattr(aggregator_func, um.METRIC_AGG_METHOD_HEAD) for row in out: timestamp = date_parse(row[0][:19]).strftime( DATETIME_STR_FORMAT) results['data'][timestamp] = row[3:] elif results['type'] == request_types.aggregator: # Get aggregator try: aggregator_func = get_aggregator_type(agg_key) except MetricsAPIError as e: results['data'] = 'Request failed. ' + e.message return results logging.info(__name__ + ' :: Initiating aggregator for %(metric)s\n' '\AGGREGATOR = %(agg)s\n' '\tFROM: %(start)s,\tTO: %(end)s.' % { 'metric': metric_class.__name__, 'agg': request_meta.aggregator, 'start': str(start), 'end': str(end), }) try: metric_obj.process(users, k_=USER_THREADS, kr_=REVISION_THREADS, log_=True, **args) except UserMetricError as e: logging.error(__name__ + ' :: Metrics call failed: ' + str(e)) results['data'] = str(e) return results r = um.aggregator(aggregator_func, metric_obj, metric_obj.header()) results['header'] = to_string(r.header) results['data'] = r.data[1:] elif results['type'] == request_types.raw: logging.info(__name__ + ':: Initiating raw request for %(metric)s\n' '\tFROM: %(start)s,\tTO: %(end)s.' % { 'metric': metric_class.__name__, 'start': str(start), 'end': str(end), }) try: metric_obj.process(users, k_=USER_THREADS, kr_=REVISION_THREADS, log_=True, **args) except UserMetricError as e: logging.error(__name__ + ' :: Metrics call failed: ' + str(e)) results['data'] = str(e) return results for m in metric_obj.__iter__(): results['data'][m[0]] = m[1:] return results
def build_table_query(self, select_fields, table_name, where_fields=None, where_ops=None, group_fields=None, order_fields=None): """ Constructs a SQL query given the parameters. Parmeters ~~~~~~~~~ select_fields : List(string) Column names to return in query where_fields : List(string) Statements which to condition results where_ops : List(string) Logical operators on which to combine where statements *[optional]* group_fields : List(string) Column names to group on *[optional]* order_fields : List(string). Column names to order by *[optional]* Return a formatted SQL query constructed from parameters. Note that this may be an invalid query if the input was not well formed. """ # Pre- process defaults if where_fields is None: where_fields = [] if where_ops is None: where_ops = [] if group_fields is None: group_fields = [] if order_fields is None: order_fields = [] # Begin function try: select_str = 'select ' for field in select_fields: select_str = field + ',' select_str = select_str[:-1] if where_fields: where_str = 'where ' for index in range(len(where_ops)): where_str = where_fields[index] + ' ' + \ where_ops[index] + ' ' where_str = where_str + where_fields[len(where_ops)] else: where_str = '' if group_fields: group_str = 'group by ' for field in group_fields: group_str = field + ',' group_str = group_str[:-1] else: group_str = '' if order_fields: order_str = 'order by ' for field in order_fields: order_str = field + ',' order_str = order_str[:-1] else: order_str = '' sql = '%s from %s %s %s %s' % (select_str, table_name, where_str, group_str, order_str) except Exception: logging.info('Could not build query for %s: ' % table_name) sql = '' return sql