def test_between_date(self): g = df_generate(self.df, "2014-01-01:2014-02-01", "filter__date__between__date__:Y-:m-:d") self.assertEquals(g, u"date in ['2014-01-01', '2014-01-02', " "'2014-01-03', '2014-01-04', '2014-01-05', " "'2014-01-06', '2014-01-07', '2014-01-08', " "'2014-01-09', '2014-01-10', '2014-01-11', " "'2014-01-12', '2014-01-13', '2014-01-14', " "'2014-01-15', '2014-01-16', '2014-01-17', " "'2014-01-18', '2014-01-19', '2014-01-20', " "'2014-01-21', '2014-01-22', '2014-01-23', " "'2014-01-24', '2014-01-25', '2014-01-26', " "'2014-01-27', '2014-01-28', '2014-01-29', " "'2014-01-30', '2014-01-31', '2014-02-01']")
def test_between_date(self): g = df_generate(self.df, "2014-01-01:2014-02-01", "filter__date__between__date__:Y-:m-:d") self.assertEquals( g, u"date in ['2014-01-01', '2014-01-02', " "'2014-01-03', '2014-01-04', '2014-01-05', " "'2014-01-06', '2014-01-07', '2014-01-08', " "'2014-01-09', '2014-01-10', '2014-01-11', " "'2014-01-12', '2014-01-13', '2014-01-14', " "'2014-01-15', '2014-01-16', '2014-01-17', " "'2014-01-18', '2014-01-19', '2014-01-20', " "'2014-01-21', '2014-01-22', '2014-01-23', " "'2014-01-24', '2014-01-25', '2014-01-26', " "'2014-01-27', '2014-01-28', '2014-01-29', " "'2014-01-30', '2014-01-31', '2014-02-01']")
def test_is(self): g = df_generate(self.df, "2014-01-01", "filter__date") self.assertEquals(g, u"date == '2014-01-01'")
def test_notin_int(self): g = df_generate(self.df, "1,2,3", "filter__int__notin__int") self.assertEquals(g, u"[1, 2, 3] not in int")
def data(ws, mongodb, slug): if not ws: abort(400, 'Expected WebSocket request.') DW = DataWarehouse() element = mongodb['element'].find_one({'slug': slug}) element['page_limit'] = 50 if request.GET.get('limit', True) is False: element['page_limit'] = 9999999999 data = DW.get(element.get('cube')) columns = data.get('columns') or [] fields = columns if request.GET.get('fields', None): fields = request.GET.get('fields').split(',') cube_last_update = mongodb['cube'].find_one({'slug': element.get('cube')}) ws.send(json.dumps({'type': 'last_update', 'data': str(cube_last_update.get('lastupdate', ''))})) ws.send(json.dumps({'type': 'columns', 'data': fields})) filters = [i[0] for i in request.GET.iteritems() if len(i[0].split('filter__')) > 1] if element['type'] == 'grid': page = int(request.GET.get('page', 1)) page_start = 0 page_end = element['page_limit'] if page >= 2: page_end = element['page_limit'] * page page_start = page_end - element['page_limit'] else: page_start = None page_end = None df = DataFrame(data.get('data') or {}, columns=fields) if len(filters) >= 1: for f in filters: s = f.split('__') field = s[1] operator = s[2] value = request.GET.get(f) if operator == 'like': df = df[df[field].str.contains(value)] elif operator == 'regex': df = DataFrameSearchColumn(df, field, value, operator) else: df = df.query(df_generate(df, value, f)) groupby = [] if request.GET.get('groupby', None): groupby = request.GET.get('groupby', ).split(',') if len(groupby) >= 1: df = DataFrame(df.groupby(groupby).grouper.get_group_levels()) if request.GET.get('orderby', element.get('orderby', None)) and request.GET.get( 'orderby', element.get('orderby', None)) in fields: orderby = request.GET.get('orderby', element.get('orderby', '')) if type(orderby) == str: orderby = orderby.split(',') orderby__order = request.GET.get('orderby__order', element.get('orderby__order', '')) if type(orderby__order) == str: orderby__order = orderby__order.split(',') ind = 0 for orde in orderby__order: if orde == '0': orderby__order[ind] = False else: orderby__order[ind] = True ind += 1 df = df.sort(orderby, ascending=orderby__order) ws.send(json.dumps({'type': 'max_page', 'data': len(df)})) # CLEAN MEMORY del filters, fields, columns gc.collect() categories = [] for i in df.to_dict(outtype='records')[page_start:page_end]: if element.get('categories', None): categories.append(i[element.get('categories')]) ws.send(json.dumps({'type': 'data', 'data': i})) # CLEAN MEMORY del df gc.collect() ws.send(json.dumps({'type': 'categories', 'data': categories})) ws.send(json.dumps({'type': 'close'})) # CLEAN MEMORY del categories gc.collect()
def data(mongodb, slug, ext='xls'): DW = DataWarehouse() element = mongodb['element'].find_one({'slug': slug}) element['page_limit'] = 50 if request.GET.get('limit', True) is False: element['page_limit'] = 9999999999 data = DW.get(element.get('cube')) columns = data.get('columns') or [] fields = columns if request.GET.get('fields', None): fields = request.GET.get('fields').split(',') filters = [i[0] for i in request.GET.iteritems() if len(i[0].split('filter__')) > 1] df = DataFrame(data.get('data') or {}, columns=fields) if len(filters) >= 1: for f in filters: s = f.split('__') field = s[1] operator = s[2] value = request.GET.get(f) if operator == 'like': df = df[df[field].str.contains(value)] elif operator == 'regex': df = DataFrameSearchColumn(df, field, value, operator) else: df = df.query(df_generate(df, value, f)) groupby = [] if request.GET.get('groupby', None): groupby = request.GET.get('groupby', "").split(',') if len(groupby) >= 1: df = DataFrame(df.groupby(groupby).grouper.get_group_levels()) if request.GET.get('orderby', element.get('orderby', None)) and request.GET.get( 'orderby', element.get('orderby', None)) in fields: orderby = request.GET.get('orderby', element.get('orderby', '')) if type(orderby) == str: orderby = orderby.split(',') orderby__order = request.GET.get('orderby__order', element.get('orderby__order', '')) if type(orderby__order) == str: orderby__order = orderby__order.split(',') ind = 0 for orde in orderby__order: if orde == '0': orderby__order[ind] = False else: orderby__order[ind] = True ind += 1 df = df.sort(orderby, ascending=orderby__order) # CLEAN MEMORY del filters, fields, columns gc.collect() file_name = '{}/assets/exports/openmining-{}.{}'.format( PROJECT_PATH, element.get('cube'), ext) if ext == 'csv': df.to_csv(file_name, sep=";") contenttype = 'text/csv' else: df.to_excel(file_name) contenttype = 'application/vnd.ms-excel' response.set_header('charset', 'utf-8') response.set_header('Content-disposition', 'attachment; ' 'filename={}.{}'.format(element.get('cube'), ext)) response.content_type = contenttype ifile = open(file_name, "r") o = ifile.read() ifile.close() return o
def test_is_type_str(self): g = df_generate(self.df, "1", "filter__int__is__str") self.assertEquals(g, u"int == '1'")
def test_in_str_text(self): g = df_generate(self.df, "Diamond,Angular", "filter__str__in__str") self.assertEquals(g, u"str in ['Diamond', 'Angular']")
def test_lte(self): g = df_generate(self.df, "1", "filter__int__lte") self.assertEquals(g, u"int <= 1")
def test_is_type_int(self): g = df_generate(self.df, "1", "filter__int__is__int") self.assertEquals(g, u"int == 1")
def test_is_type_str_text(self): g = df_generate(self.df, "Diamon", "filter__nivel__is__str") self.assertEquals(g, u"nivel == 'Diamon'")
def test_in_int(self): g = df_generate(self.df, "1,2,3", "filter__int__in__int") self.assertEquals(g, u"int in [1, 2, 3]")
def test_notin_str(self): g = df_generate(self.df, "1,2,3", "filter__int__notin") self.assertEquals(g, u"['1', '2', '3'] not in int")
def data(mongodb, slug, ext='xls'): MyClient = riak.RiakClient(protocol=conf("riak")["protocol"], http_port=conf("riak")["http_port"], host=conf("riak")["host"]) MyBucket = MyClient.bucket(conf("riak")["bucket"]) element = mongodb['element'].find_one({'slug': slug}) columns = json.loads(MyBucket.get( '{}-columns'.format(element.get('cube'))).data or []) fields = columns if request.GET.get('fields', None): fields = request.GET.get('fields').split(',') filters = [i[0] for i in request.GET.iteritems() if len(i[0].split('filter__')) > 1] df = DataFrame(MyBucket.get(element.get('cube')).data, columns=fields) if len(filters) >= 1: for f in filters: s = f.split('__') field = s[1] operator = s[2] value = request.GET.get(f) if operator in ['like', 'regex']: df = DataFrameSearchColumn(df, field, value, operator) else: df = df.query(df_generate(df, value, f)) groupby = [] if request.GET.get('groupby', None): groupby = request.GET.get('groupby', ).split(',') if len(groupby) >= 1: df = df.groupby(groupby) if request.GET.get('orderby', None): orderby = request.GET.get('orderby', []) orderby__order = True if request.GET.get('orderby__order', 0) != 1: orderby__order = False df = df.sort(orderby, ascending=orderby__order) # CLEAN MEMORY del filters, fields, columns gc.collect() file_name = '{}/assets/exports/openmining-{}.{}'.format( PROJECT_PATH, element.get('cube'), ext) if ext == 'csv': df.to_csv(file_name, sep=";") contenttype = 'text/csv' else: df.to_excel(file_name) contenttype = 'application/vnd.ms-excel' response.set_header('charset', 'utf-8') response.set_header('Content-disposition', 'attachment; ' 'filename={}.{}'.format(element.get('cube'), ext)) response.content_type = contenttype ifile = open(file_name, "r") o = ifile.read() ifile.close() return o
def test_in_str(self): g = df_generate(self.df, "1,2,3", "filter__int__in") self.assertEquals(g, u"int in ['1', '2', '3']")
def data(mongodb, slug): # check protocol to work ws = request.environ.get("wsgi.websocket") protocol = "websocket" if not ws: response.content_type = "application/json" protocol = "http" DataManager = __from__("mining.controllers.data.{}.DataManager".format(protocol)) # instantiates the chosen protocol DM = DataManager(ws) # instantiate data warehouse DW = DataWarehouse() element = mongodb["element"].find_one({"slug": slug}) element["page_limit"] = 50 if request.GET.get("limit", True) is False: element["page_limit"] = 9999999999 if element["type"] == "grid" and "download" not in request.GET.keys(): page = int(request.GET.get("page", 1)) page_start = 0 page_end = element["page_limit"] if page >= 2: page_end = element["page_limit"] * page page_start = page_end - element["page_limit"] else: page = 1 page_start = None page_end = None filters = [i[0] for i in request.GET.iteritems() if len(i[0].split("filter__")) > 1] if not DW.search: data = DW.get(element.get("cube"), page=page) else: data = DW.get(element.get("cube"), filters=filters, page=page) columns = data.get("columns") or [] fields = columns if request.GET.get("fields", None): fields = request.GET.get("fields").split(",") cube_last_update = mongodb["cube"].find_one({"slug": element.get("cube")}) DM.send(json.dumps({"type": "last_update", "data": str(cube_last_update.get("lastupdate", ""))})) DM.send(json.dumps({"type": "columns", "data": fields})) df = DataFrame(data.get("data") or {}, columns=fields) if len(filters) >= 1: for f in filters: s = f.split("__") field = s[1] operator = s[2] value = request.GET.get(f) if operator == "like": df = df[df[field].str.contains(value)] elif operator == "regex": df = DataFrameSearchColumn(df, field, value, operator) else: df = df.query(df_generate(df, value, f)) groupby = [] if request.GET.get("groupby", None): groupby = request.GET.get("groupby", "").split(",") if len(groupby) >= 1: df = DataFrame(df.groupby(groupby).grouper.get_group_levels()) if ( request.GET.get("orderby", element.get("orderby", None)) and request.GET.get("orderby", element.get("orderby", None)) in fields ): orderby = request.GET.get("orderby", element.get("orderby", "")) if type(orderby) == str: orderby = orderby.split(",") orderby__order = request.GET.get("orderby__order", element.get("orderby__order", "")) if type(orderby__order) == str: orderby__order = orderby__order.split(",") ind = 0 for orde in orderby__order: if orde == "0": orderby__order[ind] = False else: orderby__order[ind] = True ind += 1 df = df.sort(orderby, ascending=orderby__order) DM.send(json.dumps({"type": "max_page", "data": data.get("count", len(df))})) # CLEAN MEMORY del filters, fields, columns gc.collect() categories = [] records = df.to_dict(orient="records") if not DW.search: records = records[page_start:page_end] for i in records: if element.get("categories", None): categories.append(i[element.get("categories")]) DM.send(json.dumps({"type": "data", "data": i})) DM.send(json.dumps({"type": "categories", "data": categories})) DM.send(json.dumps({"type": "close"})) # CLEAN MEMORY del categories gc.collect() if not ws: if "download" in request.GET.keys(): ext = request.GET.get("download", "xls") if ext == "": ext = "xls" file_name = "{}/frontend/assets/exports/openmining-{}.{}".format(PROJECT_PATH, element.get("cube"), ext) if ext == "csv": df.to_csv(file_name, sep=";") contenttype = "text/csv" else: df.to_excel(file_name) contenttype = "application/vnd.ms-excel" response.set_header("charset", "utf-8") response.set_header("Content-disposition", "attachment; " "filename={}.{}".format(element.get("cube"), ext)) response.content_type = contenttype ifile = open(file_name, "r") o = ifile.read() ifile.close() return o return json.dumps(DM.data)
def data(mongodb, slug, ext="xls"): DW = DataWarehouse() element = mongodb["element"].find_one({"slug": slug}) element["page_limit"] = 50 if request.GET.get("limit", True) is False: element["page_limit"] = 9999999999 data = DW.get(element.get("cube")) columns = data.get("columns") or [] fields = columns if request.GET.get("fields", None): fields = request.GET.get("fields").split(",") filters = [i[0] for i in request.GET.iteritems() if len(i[0].split("filter__")) > 1] df = DataFrame(data.get("data") or {}, columns=fields) if len(filters) >= 1: for f in filters: s = f.split("__") field = s[1] operator = s[2] value = request.GET.get(f) if operator == "like": df = df[df[field].str.contains(value)] elif operator == "regex": df = DataFrameSearchColumn(df, field, value, operator) else: df = df.query(df_generate(df, value, f)) groupby = [] if request.GET.get("groupby", None): groupby = request.GET.get("groupby").split(",") if len(groupby) >= 1: df = DataFrame(df.groupby(groupby).grouper.get_group_levels()) if ( request.GET.get("orderby", element.get("orderby", None)) and request.GET.get("orderby", element.get("orderby", None)) in fields ): orderby = request.GET.get("orderby", element.get("orderby", "")) if type(orderby) == str: orderby = orderby.split(",") orderby__order = request.GET.get("orderby__order", element.get("orderby__order", "")) if type(orderby__order) == str: orderby__order = orderby__order.split(",") ind = 0 for orde in orderby__order: if orde == "0": orderby__order[ind] = False else: orderby__order[ind] = True ind += 1 df = df.sort(orderby, ascending=orderby__order) # CLEAN MEMORY del filters, fields, columns gc.collect() file_name = "{}/assets/exports/openmining-{}.{}".format(PROJECT_PATH, element.get("cube"), ext) if ext == "csv": df.to_csv(file_name, sep=";") contenttype = "text/csv" else: df.to_excel(file_name) contenttype = "application/vnd.ms-excel" response.set_header("charset", "utf-8") response.set_header("Content-disposition", "attachment; " "filename={}.{}".format(element.get("cube"), ext)) response.content_type = contenttype ifile = open(file_name, "r") o = ifile.read() ifile.close() return o
def data(mongodb, slug): # check protocol to work ws = request.environ.get('wsgi.websocket') protocol = "websocket" if not ws: response.content_type = 'application/json' protocol = "http" DataManager = __from__( "mining.controllers.data.{}.DataManager".format(protocol)) # instantiates the chosen protocol DM = DataManager(ws) # instantiate data warehouse DW = DataWarehouse() element = mongodb['element'].find_one({'slug': slug}) element['page_limit'] = 50 if request.GET.get('limit', True) is False: element['page_limit'] = 9999999999 if element['type'] == 'grid' and "download" not in request.GET.keys(): page = int(request.GET.get('page', 1)) page_start = 0 page_end = element['page_limit'] if page >= 2: page_end = element['page_limit'] * page page_start = page_end - element['page_limit'] else: page = 1 page_start = None page_end = None filters = [i[0] for i in request.GET.iteritems() if len(i[0].split('filter__')) > 1] if not DW.search: data = DW.get(element.get('cube'), page=page) else: data = DW.get(element.get('cube'), filters=filters, page=page) columns = data.get('columns') or [] fields = columns if request.GET.get('fields', None): fields = request.GET.get('fields').split(',') cube_last_update = mongodb['cube'].find_one({'slug': element.get('cube')}) DM.send(json.dumps({'type': 'last_update', 'data': str(cube_last_update.get('lastupdate', ''))})) DM.send(json.dumps({'type': 'columns', 'data': fields})) df = DataFrame(data.get('data') or {}, columns=fields) if len(filters) >= 1: for f in filters: s = f.split('__') field = s[1] operator = s[2] value = request.GET.get(f) if operator == 'like': df = df[df[field].str.contains(value)] elif operator == 'regex': df = DataFrameSearchColumn(df, field, value, operator) else: df = df.query(df_generate(df, value, f)) groupby = [] if request.GET.get('groupby', None): groupby = request.GET.get('groupby', "").split(',') if len(groupby) >= 1: df = DataFrame(df.groupby(groupby).grouper.get_group_levels()) if request.GET.get('orderby', element.get('orderby', None)) and request.GET.get( 'orderby', element.get('orderby', None)) in fields: orderby = request.GET.get('orderby', element.get('orderby', '')) if type(orderby) == str: orderby = orderby.split(',') orderby__order = request.GET.get('orderby__order', element.get('orderby__order', '')) if type(orderby__order) == str: orderby__order = orderby__order.split(',') ind = 0 for orde in orderby__order: if orde == '0': orderby__order[ind] = False else: orderby__order[ind] = True ind += 1 df = df.sort(orderby, ascending=orderby__order) DM.send(json.dumps({'type': 'max_page', 'data': data.get('count', len(df))})) # CLEAN MEMORY del filters, fields, columns gc.collect() categories = [] # TODO: loop in aggregate (apply mult aggregate) aggregate = [i[0] for i in request.GET.iteritems() if len(i[0].split('aggregate__')) > 1] if len(aggregate) >= 1: agg = aggregate[0].split('__') _agg = getattr(df.groupby(agg[1]), request.GET.get(aggregate[0]))() DF_A = DataFrame(_agg[_agg.keys()[0]]).to_dict().get(_agg.keys()[0]) DM.send(json.dumps({'type': 'aggregate', 'data': DF_A})) records = df.to_dict(orient='records') if not DW.search: records = records[page_start:page_end] for i in records: if element.get('categories', None): categories.append(i[element.get('categories')]) DM.send(json.dumps({'type': 'data', 'data': i})) DM.send(json.dumps({'type': 'categories', 'data': categories})) DM.send(json.dumps({'type': 'close'})) # CLEAN MEMORY del categories gc.collect() if not ws: if "download" in request.GET.keys(): ext = request.GET.get("download", "xls") if ext == '': ext = 'xls' file_name = '{}/frontend/assets/exports/openmining-{}.{}'.format( PROJECT_PATH, element.get('cube'), ext) if ext == 'csv': df.to_csv(file_name, sep=";") contenttype = 'text/csv' else: df.to_excel(file_name) contenttype = 'application/vnd.ms-excel' response.set_header('charset', 'utf-8') response.set_header('Content-disposition', 'attachment; ' 'filename={}.{}'.format( element.get('cube'), ext)) response.content_type = contenttype ifile = open(file_name, "r") o = ifile.read() ifile.close() return o return json.dumps(DM.data)