def getBarChartRaceBySentimentAndDate(self): cube = self.workspace.cube("tweet") cube.browser = self.browserTweet cut = [PointCut("concept", [self.concept])] cell = Cell(cube, cut) result = self.browserTweet.aggregate(cell, drilldown=["time:day", "sentiment"], aggregates=["numberOfTweets_sum"]) output = [] for row in result.table_rows("time"): output.append(row.record) data = defaultdict(lambda: defaultdict(lambda: defaultdict())) for row in output: date = row['time.day'] + "/" + row['time.month'] + "/" + row['time.year'] sentiment = row['sentiment.sentimentLabel'] data[date][sentiment]['numberOfTweets'] = row['numberOfTweets_sum'] dataList = [] element = {'date': '', 'sentimentsList': []} for date in data: element['date'] = date sentimentElement = {'sentiment': '', 'numberOfTweets': 0} mySentimentsList = [] for sentiment in data[date]: sentimentElement['sentiment'] = sentiment sentimentElement['numberOfTweets'] = data[date][sentiment]['numberOfTweets'] mySentimentsList.append(sentimentElement) sentimentElement = {'sentiment': '', 'numberOfTweets': 0} element['sentimentsList'] = mySentimentsList dataList.append(element) element = {'date': '', 'sentimentsList': []} return dataList
def compare_kill_distances(browser, cube): first_place_distances = np.array([]) all_distances = np.array([]) cuts = [PointCut("match_date_dimension", [2017])] cell = Cell(cube, cuts) result = browser.aggregate( drilldown=["killer_dimension", "victim_dimension"]) # print(result.summary["record_count"]) for record in result: distance = 0 x1 = record["killer_dimension.killer_pos_x"] x2 = record["victim_dimension.victim_pos_x"] y1 = record["killer_dimension.killer_pos_y"] y2 = record["victim_dimension.victim_pos_y"] if (y1 != None and y2 != None and x1 != None and x2 != None): distance = get_distance(x1, y1, x2, y2) if (record["killer_dimension.killer_placement"] == 1): first_place_distances = np.append(first_place_distances, distance) all_distances = np.append(all_distances, distance) first_place_average_kill_distance = np.mean(first_place_distances) average_kill_distance = np.mean(all_distances) print("First place averages a kill distance of: {}".format( first_place_average_kill_distance)) print("Average kill distance in comparison: {}".format( average_kill_distance)) return first_place_average_kill_distance - average_kill_distance
def cslice(self, dimension, values): print("Slicing by %s" % dimension) cut = PointCut("time", [5]) cell = Cell(self.browser.cube, [cut]) result = self.browser.aggregate(cell, drilldown=["item"]) print("result") print(result.summary) print(result.to_dict()) for record in result: print(record) print("result cells") print(result.cell) print(self.browser.facts(cell)) #for cell in self.browser.facts(cell): # print(cell) ratings = [rating for rating in self.browser.facts(cell)] result = [] for rating in self.browser.facts(cell): print("Before") print(rating) rating["rating"] = float(rating["rating"]) print("After") print(rating) result.append(rating) print(result) return result
def getPieChartSource(self): cube = self.workspace.cube("tweet") cube.browser = self.browserTweet cut = [PointCut("concept", [self.concept])] cell = Cell(cube, cut) result = self.browserTweet.aggregate(cell, drilldown=["location","source"],aggregates=["numberOfTweets_sum"]) output = defaultdict(lambda: defaultdict()) for row in result.table_rows("location"): continent = row.record['location.continentName'] source = row.record['source.sourceName'] output[continent][source] = row.record['numberOfTweets_sum'] temp = {'continentName': '', 'sources': [{'source': '', 'numberOfTweets': ''}, {'source': '', 'numberOfTweets': ''}, {'source': '', 'numberOfTweets': ''}, {'source': '', 'numberOfTweets': ''}]} print("output ",output) i = 0 data = [] continentsList = ['Asia','Africa','Australia','Europe','North America','South America'] for continent in continentsList: temp['continentName'] = continent if output[continent]: temp['sources'][i]['source'] = "iPhone" temp['sources'][i]['numberOfTweets'] = output[continent].get('iPhone', 0) i += 1 temp['sources'][i]['source'] = "Android" temp['sources'][i]['numberOfTweets'] = output[continent].get('Android', 0) i += 1 temp['sources'][i]['source'] = "Web" temp['sources'][i]['numberOfTweets'] = output[continent].get('Web', 0) i += 1 temp['sources'][i]['source'] = "Unknown" temp['sources'][i]['numberOfTweets'] = output[continent].get('Unknown', 0) else: temp['sources'][i]['source'] = "iPhone" temp['sources'][i]['numberOfTweets'] = 0 i += 1 temp['sources'][i]['source'] = "Android" temp['sources'][i]['numberOfTweets'] = 0 i += 1 temp['sources'][i]['source'] = "Web" temp['sources'][i]['numberOfTweets'] = 0 i += 1 temp['sources'][i]['source'] = "Unknown" temp['sources'][i]['numberOfTweets'] = 0 i = 0 data.append(temp) temp = {'continentName': '', 'sources': [{'source': '', 'numberOfTweets': ''}, {'source': '', 'numberOfTweets': ''}, {'source': '', 'numberOfTweets': ''}, {'source': '', 'numberOfTweets': ''}]} return data
def item_context_agg(browser, item): print("Slicing by item") cut = PointCut("item", [item]) cell = Cell(browser.cube, [cut]) result = browser.aggregate(cell, drilldown=["time"]) print("result") print(result.summary) print(result.to_dict()) for record in result: print(record) print("result cells") for cell in browser.facts(cell): print(cell)
def cslice(browser): print("Slicing by time") cut = PointCut("time", [5]) cell = Cell(browser.cube, [cut]) print("dir cell") print(dir(cell)) print(cell) result = browser.aggregate(cell, drilldown=["item"]) print("result") print(result.summary) print(result.to_dict()) for record in result: print(record) print("result cells") print(result.cell) print(browser.facts(cell)) for cell in browser.facts(cell): print(cell)
def item_context_agg(self, item): result = [] print("Slicing by item") cut = PointCut("item", [item]) cell = Cell(self.browser.cube, [cut]) agg = self.browser.aggregate(cell, drilldown=["time"]) print("agg") print(agg.summary) print(agg.to_dict()) for record in agg: print("Record in agg") print(record) record["sum"] = float(record["sum"]) record["average"] = float(record["average"]) result.append(record) print("result cells") for cell in self.browser.facts(cell): print(cell) return result
def getBarChartRaceByLanguageAndDate(self): cube = self.workspace.cube("tweet") cube.browser = self.browserTweet cut = [PointCut("concept", [self.concept])] cell = Cell(cube, cut) result = self.browserTweet.aggregate(cell, drilldown=["time:day", "language"], aggregates=["numberOfTweets_sum"]) output = [] for row in result.table_rows("time"): output.append(row.record) data = defaultdict(lambda: defaultdict(lambda: defaultdict())) languagesList = [] for row in output: date = row['time.day'] + "/" + row['time.month'] + "/" + row['time.year'] language = row['language.languageName'] languagesList.append(language) # creating data structure containing all languages data[date][language]['numberOfTweets'] = row['numberOfTweets_sum'] #GET LIST OF LANGUAGES FROM FILE import pickle with open('../Docs/languagesStructure.pickle', 'rb') as file: languagesList = pickle.load(file) print(len(languagesList)) element = {'date': '', 'languagesList': []} dataList = [] for date in data: element['date'] = date element['languagesList'] = [] print(len(languagesList)) for language in languagesList: if language in data[date]: element['languagesList'].append({'language':language,'numberOfTweets':data[date][language]['numberOfTweets']}) else: element['languagesList'].append({'language':language,'numberOfTweets':0}) dataList.append(element) return dataList
# Drill down on service print('\n\n...drilling down on services...') result = browser.aggregate(drilldown=["dm_service:service_name"]) for record in result: print(record) # print(record["dm_service.crown"], "\t", record['amount_sum']) # Cut print('\n\n...Cut services for Crown Service Only...') dimension = cube.dimension("dm_service") hierarchy = dimension.hierarchy() cell = Cell(cube, cuts_from_string(cube, "dm_service:True")) cut = cell.cut_for_dimension(dimension) if cut: path = cut.path else: path = [] # # Do the work, do the aggregation. # result = browser.aggregate(cell, drilldown=["dm_datetime"]) for record in result: print(record)
workspace = Workspace() workspace.register_default_store("sql", url="sqlite:///restaurant.sqlite") workspace.import_model("model.json") # 2. Getting a browser cube = workspace.cube("restaurant_details") browser = workspace.browser(cube) dimension = cube.dimension("location") # Rolling up to State print("\n" "Roll up to state\n" "================") cell = Cell(browser.cube) rollup(cell, "location") # Drilling down into the cities of each state print("\n" "Drill down by state\n" "===================") drilldown(cell, "location", 3) # Slicing by a particular state print("\n" "Slice by State\n" "==============") cell = cell.slice(PointCut("location", ["CA"]))
from __future__ import print_function from cubes import Workspace, Cell, PointCut workspace = Workspace() workspace.register_default_store("sql", url="sqlite:///data_sqlite/f1.sqlite") workspace.import_model("models/model.json") browser = workspace.browser("qualifying") cut1 = PointCut("drivers", []) cut2 = PointCut("races", []) cell = Cell(browser.cube, cuts=[cut1, cut2]) result = browser.aggregate(cell, drilldown=["drivers", "races"]) list_res = [row for row in result] def filter_racer(data, name, year): temp = list( filter( lambda x: x['drivers.surname'] == name and x['races.year'] == year, data)) return sorted(temp, key=lambda x: x['position_min']) for line in filter_racer(list_res, 'Hamilton', 2009): print(line)
def process(request): agg = [ { "name": "occurrence_sum", "label": "Nombre d'occurrences observées", "function": "sum", "measure": "occurrence_count", }, { "name": "richness", "label": "Nombre d'espèces observées", "measure": "taxon_dimension_id", "function": "count_distinct", }, ] dm = get_dimensional_model( 'taxon_observed_occurrences', agg, ) workspace = dm.get_cubes_workspace() cube = workspace.cube('taxon_observed_occurrences') browser = workspace.browser(cube) selected_entity = json.loads( request.POST.get('selected_entity', None) ) cuts = [] invert_location_cuts = [] invert_env_cuts = [] invert_env = False if selected_entity['type'] == 'draw': location_cuts = get_occurrence_location_cuts(selected_entity) cuts += location_cuts invert_env_cuts += location_cuts invert_location_cuts += get_occurrence_location_cuts( selected_entity, invert=True ) area = 0 else: entity_cut = PointCut( selected_entity['type'], [selected_entity['value']] ) cuts += [entity_cut] invert_env_cuts += [entity_cut] invert_location_cuts += [ PointCut( selected_entity['type'], [selected_entity['value']], invert=True ), ] dim = get_dimension(selected_entity['type']) area = dim.get_value(selected_entity['value'], ["area"])[0] # Update cuts with rainfall filter rainfall_filter = request.POST.get('rainfall_filter', None) if rainfall_filter is not None and rainfall_filter != '': cuts += [ PointCut('rainfall', [rainfall_filter]) ] invert_env_cuts += [ PointCut('rainfall', [rainfall_filter], invert=True) ] invert_env = True # Update cuts with elevation filter elevation_filter = request.POST.get('elevation_filter', None) if elevation_filter is not None and elevation_filter != '': cuts += [ PointCut('elevation', [elevation_filter]) ] invert_env_cuts += [ PointCut('elevation', [elevation_filter], invert=True) ] invert_env = True df = pd.DataFrame(list(browser.facts(cell=Cell(cube, cuts)))) summary = {'occurrence_sum': 0, 'richness': 0} records = [] taxa_ids = pd.Index([]) if len(df) > 0: # Init summary with occurrence_sum summary['occurrence_sum'] = df['occurrence_count'].sum() # Filter occurrences identified at species level for richness df_species = df[df['taxon_dimension.species'] != 'NS'] # Init records with occurrence sum records = pd.DataFrame( df_species.groupby( ['taxon_dimension.familia', 'taxon_dimension.genus', 'taxon_dimension.species'] )['occurrence_count'].sum(), columns=['occurrence_count'] ).rename( columns={'occurrence_count': 'occurrence_sum'}, ) records['richness'] = 1 if len(df_species) > 0: taxa_ids = pd.Index(df_species['taxon_dimension_id'].unique()) # Update summary with richness summary['richness'] = df_species['taxon_dimension_id'].nunique() # Records to dict records = records.reset_index().to_dict(orient='index').values() # Compute unique taxa in selected location indicator invert_loc_cell = Cell(cube, invert_location_cuts) invert_loc_df = pd.DataFrame(list(browser.facts(cell=invert_loc_cell))) invert_loc_taxa_ids = pd.Index([]) if len(invert_loc_df) > 0: invert_loc_taxa_ids = pd.Index( invert_loc_df['taxon_dimension_id'].unique() ) diff = taxa_ids.difference(invert_loc_taxa_ids) if invert_env > 0: invert_env_cell = Cell(cube, invert_env_cuts) list(browser.facts(cell=invert_env_cell)) invert_env_df = pd.DataFrame(list(browser.facts(cell=invert_env_cell))) if len(invert_env_df) > 0: invert_env_taxa_ids = pd.Index( invert_env_df['taxon_dimension_id'].unique() ) diff = diff.difference(invert_env_taxa_ids) summary['unique_taxa_in_entity'] = len(diff) # Extract table attributes attributes = [ 'taxon_dimension.familia', 'taxon_dimension.genus', 'taxon_dimension.species', ] attributes_names = [] for i in attributes: attributes_names.append((i, cube.attribute(i).label)) aggregates_names = [(i.name, i.label) for i in cube.aggregates] return Response({ 'summary': summary, 'records': records, 'columns': attributes_names + aggregates_names, 'area': area, })
workspace = Workspace() workspace.register_default_store( "sql", url="postgresql://*****:*****@localhost/willowood") workspace.import_model("SalesTable.json") browser = workspace.browser("salestable") result = browser.aggregate() print(result.summary["record_count"]) print(result.summary["Qty"]) print(result.summary["Value"]) cube = browser.cube # result = browser.aggregate(drilldown=["billing_date"]) # # for record in result: # print(' record: ', record) # TryingOut Yesterday, MTD , YTD, LYTD cuts = [ PointCut("bh_master_code", [45000002]), # PointCut("billing_date", ["2018-08-17"]), ] cell = Cell(cube, cuts) result1 = browser.aggregate(cell) print(' Cut Portion: Qty:- ', result1.summary["Qty"]) print(' Cut Portion: Value:- ', result1.summary["Value"])
"==================================================") # result = browser.aggregate(drilldown=["item"]) # print(("%-20s%10s%10s%10s\n"+"-"*50) % ("Category", "Count", "Total", "Double")) # for row in result.table_rows("item"): print("%-20s%10d%10d%10d" % ( row.label, row.record["record_count"], row.record["amount_sum"], row.record["double_amount_sum"]) ) print("\n" "Slice where Category = Equity\n" "==================================================") cut = PointCut("item", ["e"]) cell = Cell(browser.cube, cuts = [cut]) result = browser.aggregate(cell, drilldown=["item"]) print(("%-20s%10s%10s%10s\n"+"-"*50) % ("Sub-category", "Count", "Total", "Double")) for row in result.table_rows("item"): print("%-20s%10d%10d%10d" % ( row.label, row.record["record_count"], row.record["amount_sum"], row.record["double_amount_sum"], ))
def report(dim_name=None): browser = workspace.browser(CUBE_NAME) cube = browser.cube if not dim_name: return render_template('report.html', dimensions=cube.dimensions) # First we need to get the hierarchy to know the order of levels. Cubes # supports multiple hierarchies internally. dimension = cube.dimension(dim_name) hierarchy = dimension.hierarchy() # Parse the`cut` request parameter and convert it to a list of # actual cube cuts. Think of this as of multi-dimensional path, even that # for this simple example, we are goint to use only one dimension for # browsing. cutstr = request.args.get("cut") cell = Cell(cube, cuts_from_string(cube, cutstr)) # Get the cut of actually browsed dimension, so we know "where we are" - # the current dimension path cut = cell.cut_for_dimension(dimension) if cut: path = cut.path else: path = [] # # Do the work, do the aggregation. # result = browser.aggregate(cell, drilldown=[dim_name]) # If we have no path, then there is no cut for the dimension, # therefore # there is no corresponding detail. if path: details = browser.cell_details(cell, dimension)[0] else: details = [] # Find what level we are on and what is going to be the drill-down level # in the hierarchy levels = hierarchy.levels_for_path(path) if levels: next_level = hierarchy.next_level(levels[-1]) else: next_level = hierarchy.next_level(None) # Are we at the very detailed level? is_last = hierarchy.is_last(next_level) # Finally, we render it return render_template('report.html', dimensions=cube.dimensions, dimension=dimension, levels=levels, next_level=next_level, result=result, cell=cell, is_last=is_last, details=details)
This function is like recursively traversing directories on a file system and aggregating the file sizes, for example. * `cell` - cube cell to drill-down * `dimension` - dimension to be traversed through all levels """ if cell.is_base(dimension): return result = browser.aggregate(aggregates=["contract_amount_sum"], cell=cell, drilldown=[dimension]) # for row in cubes.drilldown_rows(cell, result, dimension): for row in result.table_rows(dimension): indent = " " * (len(row.path) - 1) print "%s%s: %d" % (indent, row.label, row.record["contract_amount_sum"]) new_cell = cell.drilldown(dimension, row.key) drilldown(new_cell, dimension) # Get whole cube cell = Cell(cube) print "Drill down through date hierarchy:" drilldown(cell, cube.dimension("date")) # print "Drill down through CPV hierarchy in 2011:" # cell = cell.slice(cubes.PointCut("date", [2010,5])) # drilldown(cell, cube.dimension("cpv"))