def rows_func(rows): try: bson_data = bson.decode_all(rows)[0] rows_data = bson_data['array'] #key_indices = bson_data['keyindices'] acc_wrapper._set_data(list(init_acc_values)) for row in rows_data: row_wrapper.load_row(row) aggregator_function(acc_wrapper, row_wrapper) result = [] for key_index in key_indices_wrapper: answer = rows_data[0][key_index] result.append(answer) result.extend(acc_wrapper._get_data()) return numpy_to_bson_friendly(result) except Exception as e: try: e_msg = unicode(e) except: e_msg = u'<unable to get exception message>' try: e_row = unicode(bson.decode_all(rows)[0]['array']) except: e_row = u'<unable to get row data>' try: msg = base64.urlsafe_b64encode((u'Exception: %s running UDF on row: %s' % (e_msg, e_row)).encode('utf-8')) except: msg = base64.urlsafe_b64encode(u'Exception running UDF, unable to provide details.'.encode('utf-8')) raise IaPyWorkerError(msg)
def rows_func(rows): try: bson_data = bson.decode_all(rows)[0] rows_data = bson_data['array'] key_indices = bson_data['keyindices'] acc_wrapper._set_data(list(init_acc_values)) for row in rows_data: row_wrapper.load_row(row) aggregator_function(acc_wrapper, row_wrapper) result = [] for key_index in key_indices: answer = [rows_data[0][key_index]] result.extend(answer) result.extend(acc_wrapper._get_data()) return numpy_to_bson_friendly(result) except Exception as e: try: e_msg = unicode(e) except: e_msg = u'<unable to get exception message>' try: e_row = unicode(bson.decode_all(rows)[0]['array']) except: e_row = u'<unable to get row data>' try: msg = base64.urlsafe_b64encode((u'Exception: %s running UDF on row: %s' % (e_msg, e_row)).encode('utf-8')) except: msg = base64.urlsafe_b64encode(u'Exception running UDF, unable to provide details.'.encode('utf-8')) raise IaPyWorkerError(msg)
def add_many_columns(row): result = row_function(row) data = [] for i, data_type in enumerate(data_types): try: value = result[i] except TypeError as e: raise RuntimeError("UDF returned non-indexable value. Provided schema indicated an Indexable return type") except IndexError as e: raise RuntimeError("UDF return value did not match the number of items in the provided schema") cast_value = valid_data_types.cast(value, data_type) data.append(numpy_to_bson_friendly(cast_value)) # return json.dumps(data, cls=NumpyJSONEncoder) return data
def add_many_columns(row): result = row_function(row) data = [] for i, data_type in enumerate(data_types): try: value = result[i] except TypeError as e: raise RuntimeError("UDF returned non-indexable value. Provided schema indicated an Indexable return type") except IndexError as e: raise RuntimeError("UDF return value did not match the number of items in the provided schema") cast_value = valid_data_types.cast(value, data_type) data.append(numpy_to_bson_friendly(cast_value)) # return json.dumps(data, cls=NumpyJSONEncoder) return data
def project_columns(row): return [numpy_to_bson_friendly(row[index]) for index in indices]
def add_one_column(row): result = row_function(row) cast_value = valid_data_types.cast(result, data_type) return [numpy_to_bson_friendly(cast_value)]
def ifilterfalse(predicate, iterable): """Filter records that do not match predicate and return decoded object so that batch processing can encode""" return (numpy_to_bson_friendly(bson.decode_all(item)[0]["array"]) for item in iterable if not predicate(item))
def ifilter(predicate, iterable): """Filter records and return decoded object so that batch processing can work correctly""" return (numpy_to_bson_friendly(bson.decode_all(item)[0]["array"]) for item in iterable if predicate(item))
def add_one_column(row): result = row_function(row) cast_value = valid_data_types.cast(result, data_type) return [numpy_to_bson_friendly(cast_value)]
def ifilterfalse(predicate, iterable): """Filter records that do not match predicate and return decoded object so that batch processing can encode""" return (numpy_to_bson_friendly(bson.decode_all(item)[0]["array"]) for item in iterable if not predicate(item))
def ifilter(predicate, iterable): """Filter records and return decoded object so that batch processing can work correctly""" return (numpy_to_bson_friendly(bson.decode_all(item)[0]["array"]) for item in iterable if predicate(item))
def project_columns(row): return [numpy_to_bson_friendly(row[index]) for index in indices]