Esempio n. 1
0
    def getBarChartRaceBySentimentAndDate(self):
        cube = self.workspace.cube("tweet")
        cube.browser = self.browserTweet

        cut = [PointCut("concept", [self.concept])]
        cell = Cell(cube, cut)

        result = self.browserTweet.aggregate(cell, drilldown=["time:day", "sentiment"],
                                             aggregates=["numberOfTweets_sum"])

        output = []
        for row in result.table_rows("time"):
            output.append(row.record)

        data = defaultdict(lambda: defaultdict(lambda: defaultdict()))
        for row in output:
            date = row['time.day'] + "/" + row['time.month'] + "/" + row['time.year']
            sentiment = row['sentiment.sentimentLabel']
            data[date][sentiment]['numberOfTweets'] = row['numberOfTweets_sum']
        dataList = []
        element = {'date': '', 'sentimentsList': []}
        for date in data:
            element['date'] = date
            sentimentElement = {'sentiment': '', 'numberOfTweets': 0}
            mySentimentsList = []
            for sentiment in data[date]:
                sentimentElement['sentiment'] = sentiment
                sentimentElement['numberOfTweets'] = data[date][sentiment]['numberOfTweets']
                mySentimentsList.append(sentimentElement)
                sentimentElement = {'sentiment': '', 'numberOfTweets': 0}
            element['sentimentsList'] = mySentimentsList
            dataList.append(element)
            element = {'date': '', 'sentimentsList': []}
        return dataList
def compare_kill_distances(browser, cube):
    first_place_distances = np.array([])
    all_distances = np.array([])
    cuts = [PointCut("match_date_dimension", [2017])]
    cell = Cell(cube, cuts)
    result = browser.aggregate(
        drilldown=["killer_dimension", "victim_dimension"])
    # print(result.summary["record_count"])
    for record in result:
        distance = 0
        x1 = record["killer_dimension.killer_pos_x"]
        x2 = record["victim_dimension.victim_pos_x"]
        y1 = record["killer_dimension.killer_pos_y"]
        y2 = record["victim_dimension.victim_pos_y"]
        if (y1 != None and y2 != None and x1 != None and x2 != None):
            distance = get_distance(x1, y1, x2, y2)
            if (record["killer_dimension.killer_placement"] == 1):
                first_place_distances = np.append(first_place_distances,
                                                  distance)
            all_distances = np.append(all_distances, distance)

    first_place_average_kill_distance = np.mean(first_place_distances)
    average_kill_distance = np.mean(all_distances)

    print("First place averages a kill distance of: {}".format(
        first_place_average_kill_distance))
    print("Average kill distance in comparison: {}".format(
        average_kill_distance))

    return first_place_average_kill_distance - average_kill_distance
Esempio n. 3
0
 def cslice(self, dimension, values):
     print("Slicing by %s" % dimension)
     cut = PointCut("time", [5])
     cell = Cell(self.browser.cube, [cut])
     result = self.browser.aggregate(cell, drilldown=["item"])
     print("result")
     print(result.summary)
     print(result.to_dict())
     for record in result:
         print(record)
     print("result cells")
     print(result.cell)
     print(self.browser.facts(cell))
     #for cell in self.browser.facts(cell):
     #    print(cell)
     ratings = [rating for rating in self.browser.facts(cell)]
     result = []
     for rating in self.browser.facts(cell):
         print("Before")
         print(rating)
         rating["rating"] = float(rating["rating"])
         print("After")
         print(rating)
         result.append(rating)
     print(result)
     return result
Esempio n. 4
0
    def getPieChartSource(self):
        cube = self.workspace.cube("tweet")
        cube.browser = self.browserTweet

        cut = [PointCut("concept", [self.concept])]
        cell = Cell(cube, cut)

        result = self.browserTweet.aggregate(cell, drilldown=["location","source"],aggregates=["numberOfTweets_sum"])
        output = defaultdict(lambda: defaultdict())

        for row in result.table_rows("location"):
            continent = row.record['location.continentName']
            source = row.record['source.sourceName']
            output[continent][source] = row.record['numberOfTweets_sum']
        temp = {'continentName': '',
                'sources': [{'source': '', 'numberOfTweets': ''}, {'source': '', 'numberOfTweets': ''},
                            {'source': '', 'numberOfTweets': ''}, {'source': '', 'numberOfTweets': ''}]}
        print("output ",output)
        i = 0
        data = []
        continentsList = ['Asia','Africa','Australia','Europe','North America','South America']
        for continent in continentsList:
            temp['continentName'] = continent
            if output[continent]:
                temp['sources'][i]['source'] = "iPhone"
                temp['sources'][i]['numberOfTweets'] = output[continent].get('iPhone', 0)
                i += 1
                temp['sources'][i]['source'] = "Android"
                temp['sources'][i]['numberOfTweets'] = output[continent].get('Android', 0)
                i += 1
                temp['sources'][i]['source'] = "Web"
                temp['sources'][i]['numberOfTweets'] = output[continent].get('Web', 0)
                i += 1
                temp['sources'][i]['source'] = "Unknown"
                temp['sources'][i]['numberOfTweets'] = output[continent].get('Unknown', 0)
            else:
                temp['sources'][i]['source'] = "iPhone"
                temp['sources'][i]['numberOfTweets'] = 0
                i += 1
                temp['sources'][i]['source'] = "Android"
                temp['sources'][i]['numberOfTweets'] = 0
                i += 1
                temp['sources'][i]['source'] = "Web"
                temp['sources'][i]['numberOfTweets'] = 0
                i += 1
                temp['sources'][i]['source'] = "Unknown"
                temp['sources'][i]['numberOfTweets'] = 0

            i = 0
            data.append(temp)
            temp = {'continentName': '',
                'sources': [{'source': '', 'numberOfTweets': ''}, {'source': '', 'numberOfTweets': ''},
                            {'source': '', 'numberOfTweets': ''}, {'source': '', 'numberOfTweets': ''}]}
        return data
Esempio n. 5
0
def item_context_agg(browser, item):
    print("Slicing by item")
    cut = PointCut("item", [item])
    cell = Cell(browser.cube, [cut])
    result = browser.aggregate(cell, drilldown=["time"])
    print("result")
    print(result.summary)
    print(result.to_dict())
    for record in result:
        print(record)
    print("result cells")
    for cell in browser.facts(cell):
        print(cell)
Esempio n. 6
0
def cslice(browser):
    print("Slicing by time")
    cut = PointCut("time", [5])
    cell = Cell(browser.cube, [cut])
    print("dir cell")
    print(dir(cell))
    print(cell)
    result = browser.aggregate(cell, drilldown=["item"])
    print("result")
    print(result.summary)
    print(result.to_dict())
    for record in result:
        print(record)
    print("result cells")
    print(result.cell)
    print(browser.facts(cell))
    for cell in browser.facts(cell):
        print(cell)
Esempio n. 7
0
 def item_context_agg(self, item):
     result = []
     print("Slicing by item")
     cut = PointCut("item", [item])
     cell = Cell(self.browser.cube, [cut])
     agg = self.browser.aggregate(cell, drilldown=["time"])
     print("agg")
     print(agg.summary)
     print(agg.to_dict())
     for record in agg:
         print("Record in agg")
         print(record)
         record["sum"] = float(record["sum"])
         record["average"] = float(record["average"])
         result.append(record)
     print("result cells")
     for cell in self.browser.facts(cell):
         print(cell)
     return result
Esempio n. 8
0
    def getBarChartRaceByLanguageAndDate(self):
        cube = self.workspace.cube("tweet")
        cube.browser = self.browserTweet

        cut = [PointCut("concept", [self.concept])]
        cell = Cell(cube, cut)

        result = self.browserTweet.aggregate(cell, drilldown=["time:day", "language"],
                                             aggregates=["numberOfTweets_sum"])
        output = []
        for row in result.table_rows("time"):
            output.append(row.record)
        data = defaultdict(lambda: defaultdict(lambda: defaultdict()))
        languagesList = []
        for row in output:
            date = row['time.day'] + "/" + row['time.month'] + "/" + row['time.year']
            language = row['language.languageName']
            languagesList.append(language)
            # creating data structure containing all languages
            data[date][language]['numberOfTweets'] = row['numberOfTweets_sum']

        #GET LIST OF LANGUAGES FROM FILE
        import pickle
        with open('../Docs/languagesStructure.pickle', 'rb') as file:
            languagesList = pickle.load(file)
        print(len(languagesList))
        element = {'date': '', 'languagesList': []}
        dataList = []
        for date in data:
            element['date'] = date
            element['languagesList'] = []
            print(len(languagesList))
            for language in languagesList:
                if language in data[date]:
                    element['languagesList'].append({'language':language,'numberOfTweets':data[date][language]['numberOfTweets']})
                else:
                    element['languagesList'].append({'language':language,'numberOfTweets':0})
            dataList.append(element)
        return dataList
Esempio n. 9
0
# Drill down on service
print('\n\n...drilling down on services...')
result = browser.aggregate(drilldown=["dm_service:service_name"])
for record in result:
    print(record)
    # print(record["dm_service.crown"], "\t", record['amount_sum'])

# Cut

print('\n\n...Cut services for Crown Service Only...')

dimension = cube.dimension("dm_service")
hierarchy = dimension.hierarchy()

cell = Cell(cube, cuts_from_string(cube, "dm_service:True"))

cut = cell.cut_for_dimension(dimension)

if cut:
    path = cut.path
else:
    path = []

#
# Do the work, do the aggregation.
#
result = browser.aggregate(cell, drilldown=["dm_datetime"])
for record in result:
    print(record)
Esempio n. 10
0
workspace = Workspace()
workspace.register_default_store("sql", url="sqlite:///restaurant.sqlite")
workspace.import_model("model.json")

# 2. Getting a browser
cube = workspace.cube("restaurant_details")
browser = workspace.browser(cube)
dimension = cube.dimension("location")


# Rolling up to State
print("\n"
      "Roll up to state\n"
      "================")

cell = Cell(browser.cube)
rollup(cell, "location")


# Drilling down into the cities of each state
print("\n"
      "Drill down by state\n"
      "===================")
drilldown(cell, "location", 3)


# Slicing by a particular state
print("\n"
      "Slice by State\n"
      "==============")
cell = cell.slice(PointCut("location", ["CA"]))
from __future__ import print_function
from cubes import Workspace, Cell, PointCut

workspace = Workspace()
workspace.register_default_store("sql", url="sqlite:///data_sqlite/f1.sqlite")
workspace.import_model("models/model.json")

browser = workspace.browser("qualifying")

cut1 = PointCut("drivers", [])
cut2 = PointCut("races", [])
cell = Cell(browser.cube, cuts=[cut1, cut2])
result = browser.aggregate(cell, drilldown=["drivers", "races"])

list_res = [row for row in result]


def filter_racer(data, name, year):
    temp = list(
        filter(
            lambda x: x['drivers.surname'] == name and x['races.year'] == year,
            data))

    return sorted(temp, key=lambda x: x['position_min'])


for line in filter_racer(list_res, 'Hamilton', 2009):
    print(line)
Esempio n. 12
0
def process(request):
    agg = [
        {
            "name": "occurrence_sum",
            "label": "Nombre d'occurrences observées",
            "function": "sum",
            "measure": "occurrence_count",
        },
        {
            "name": "richness",
            "label": "Nombre d'espèces observées",
            "measure": "taxon_dimension_id",
            "function": "count_distinct",
        },
    ]
    dm = get_dimensional_model(
        'taxon_observed_occurrences',
        agg,
    )
    workspace = dm.get_cubes_workspace()
    cube = workspace.cube('taxon_observed_occurrences')
    browser = workspace.browser(cube)
    selected_entity = json.loads(
        request.POST.get('selected_entity', None)
    )
    cuts = []
    invert_location_cuts = []
    invert_env_cuts = []
    invert_env = False
    if selected_entity['type'] == 'draw':
        location_cuts = get_occurrence_location_cuts(selected_entity)
        cuts += location_cuts
        invert_env_cuts += location_cuts
        invert_location_cuts += get_occurrence_location_cuts(
            selected_entity,
            invert=True
        )
        area = 0
    else:
        entity_cut = PointCut(
            selected_entity['type'],
            [selected_entity['value']]
        )
        cuts += [entity_cut]
        invert_env_cuts += [entity_cut]
        invert_location_cuts += [
            PointCut(
                selected_entity['type'],
                [selected_entity['value']],
                invert=True
            ),
        ]
        dim = get_dimension(selected_entity['type'])
        area = dim.get_value(selected_entity['value'], ["area"])[0]
    # Update cuts with rainfall filter
    rainfall_filter = request.POST.get('rainfall_filter', None)
    if rainfall_filter is not None and rainfall_filter != '':
        cuts += [
            PointCut('rainfall', [rainfall_filter])
        ]
        invert_env_cuts += [
            PointCut('rainfall', [rainfall_filter], invert=True)
        ]
        invert_env = True
    # Update cuts with elevation filter
    elevation_filter = request.POST.get('elevation_filter', None)
    if elevation_filter is not None and elevation_filter != '':
        cuts += [
            PointCut('elevation', [elevation_filter])
        ]
        invert_env_cuts += [
            PointCut('elevation', [elevation_filter], invert=True)
        ]
        invert_env = True
    df = pd.DataFrame(list(browser.facts(cell=Cell(cube, cuts))))
    summary = {'occurrence_sum': 0, 'richness': 0}
    records = []
    taxa_ids = pd.Index([])
    if len(df) > 0:
        # Init summary with occurrence_sum
        summary['occurrence_sum'] = df['occurrence_count'].sum()
        # Filter occurrences identified at species level for richness
        df_species = df[df['taxon_dimension.species'] != 'NS']
        # Init records with occurrence sum
        records = pd.DataFrame(
            df_species.groupby(
                ['taxon_dimension.familia', 'taxon_dimension.genus',
                 'taxon_dimension.species']
            )['occurrence_count'].sum(),
            columns=['occurrence_count']
        ).rename(
            columns={'occurrence_count': 'occurrence_sum'},
        )
        records['richness'] = 1
        if len(df_species) > 0:
            taxa_ids = pd.Index(df_species['taxon_dimension_id'].unique())
            # Update summary with richness
            summary['richness'] = df_species['taxon_dimension_id'].nunique()
            # Records to dict
            records = records.reset_index().to_dict(orient='index').values()
    # Compute unique taxa in selected location indicator
    invert_loc_cell = Cell(cube, invert_location_cuts)
    invert_loc_df = pd.DataFrame(list(browser.facts(cell=invert_loc_cell)))
    invert_loc_taxa_ids = pd.Index([])
    if len(invert_loc_df) > 0:
        invert_loc_taxa_ids = pd.Index(
            invert_loc_df['taxon_dimension_id'].unique()
        )
    diff = taxa_ids.difference(invert_loc_taxa_ids)
    if invert_env > 0:
        invert_env_cell = Cell(cube, invert_env_cuts)
        list(browser.facts(cell=invert_env_cell))
        invert_env_df = pd.DataFrame(list(browser.facts(cell=invert_env_cell)))
        if len(invert_env_df) > 0:
            invert_env_taxa_ids = pd.Index(
                invert_env_df['taxon_dimension_id'].unique()
            )
            diff = diff.difference(invert_env_taxa_ids)
    summary['unique_taxa_in_entity'] = len(diff)
    # Extract table attributes
    attributes = [
        'taxon_dimension.familia',
        'taxon_dimension.genus',
        'taxon_dimension.species',
    ]
    attributes_names = []
    for i in attributes:
        attributes_names.append((i, cube.attribute(i).label))
    aggregates_names = [(i.name, i.label) for i in cube.aggregates]
    return Response({
        'summary': summary,
        'records': records,
        'columns': attributes_names + aggregates_names,
        'area': area,
    })
Esempio n. 13
0
workspace = Workspace()
workspace.register_default_store(
    "sql", url="postgresql://*****:*****@localhost/willowood")
workspace.import_model("SalesTable.json")

browser = workspace.browser("salestable")

result = browser.aggregate()

print(result.summary["record_count"])

print(result.summary["Qty"])
print(result.summary["Value"])
cube = browser.cube
# result = browser.aggregate(drilldown=["billing_date"])
#
# for record in result:
#     print(' record: ', record)

# TryingOut Yesterday, MTD , YTD, LYTD
cuts = [
    PointCut("bh_master_code", [45000002]),
    # PointCut("billing_date", ["2018-08-17"]),
]

cell = Cell(cube, cuts)
result1 = browser.aggregate(cell)

print(' Cut Portion: Qty:-  ', result1.summary["Qty"])
print(' Cut Portion: Value:-  ', result1.summary["Value"])
Esempio n. 14
0
      "==================================================")
#
result = browser.aggregate(drilldown=["item"])
#
print(("%-20s%10s%10s%10s\n"+"-"*50) % ("Category", "Count", "Total", "Double"))
#
for row in result.table_rows("item"):
    print("%-20s%10d%10d%10d" % ( row.label,
                              row.record["record_count"],
                              row.record["amount_sum"],
                              row.record["double_amount_sum"])
                              )

print("\n"
      "Slice where Category = Equity\n"
      "==================================================")

cut = PointCut("item", ["e"])
cell = Cell(browser.cube, cuts = [cut])

result = browser.aggregate(cell, drilldown=["item"])

print(("%-20s%10s%10s%10s\n"+"-"*50) % ("Sub-category", "Count", "Total", "Double"))

for row in result.table_rows("item"):
    print("%-20s%10d%10d%10d" % ( row.label,
                              row.record["record_count"],
                              row.record["amount_sum"],
                              row.record["double_amount_sum"],
                              ))
Esempio n. 15
0
def report(dim_name=None):
    browser = workspace.browser(CUBE_NAME)
    cube = browser.cube

    if not dim_name:
        return render_template('report.html', dimensions=cube.dimensions)

    # First we need to get the hierarchy to know the order of levels. Cubes
    # supports multiple hierarchies internally.

    dimension = cube.dimension(dim_name)
    hierarchy = dimension.hierarchy()

    # Parse the`cut` request parameter and convert it to a list of 
    # actual cube cuts. Think of this as of multi-dimensional path, even that 
    # for this simple example, we are goint to use only one dimension for
    # browsing.

    cutstr = request.args.get("cut")
    cell = Cell(cube, cuts_from_string(cube, cutstr))

    # Get the cut of actually browsed dimension, so we know "where we are" -
    # the current dimension path
    cut = cell.cut_for_dimension(dimension)

    if cut:
        path = cut.path
    else:
        path = []

    #
    # Do the work, do the aggregation.
    #
    result = browser.aggregate(cell, drilldown=[dim_name])

    # If we have no path, then there is no cut for the dimension, # therefore
    # there is no corresponding detail.
    if path:
        details = browser.cell_details(cell, dimension)[0]
    else:
        details = []

    # Find what level we are on and what is going to be the drill-down level
    # in the hierarchy

    levels = hierarchy.levels_for_path(path)
    if levels:
        next_level = hierarchy.next_level(levels[-1])
    else:
        next_level = hierarchy.next_level(None)

    # Are we at the very detailed level?

    is_last = hierarchy.is_last(next_level)
    # Finally, we render it

    return render_template('report.html',
                            dimensions=cube.dimensions,
                            dimension=dimension,
                            levels=levels,
                            next_level=next_level,
                            result=result,
                            cell=cell,
                            is_last=is_last,
                            details=details)
Esempio n. 16
0
def report(dim_name=None):
    browser = workspace.browser(CUBE_NAME)
    cube = browser.cube

    if not dim_name:
        return render_template('report.html', dimensions=cube.dimensions)

    # First we need to get the hierarchy to know the order of levels. Cubes
    # supports multiple hierarchies internally.

    dimension = cube.dimension(dim_name)
    hierarchy = dimension.hierarchy()

    # Parse the`cut` request parameter and convert it to a list of
    # actual cube cuts. Think of this as of multi-dimensional path, even that
    # for this simple example, we are goint to use only one dimension for
    # browsing.

    cutstr = request.args.get("cut")
    cell = Cell(cube, cuts_from_string(cube, cutstr))

    # Get the cut of actually browsed dimension, so we know "where we are" -
    # the current dimension path
    cut = cell.cut_for_dimension(dimension)

    if cut:
        path = cut.path
    else:
        path = []

    #
    # Do the work, do the aggregation.
    #
    result = browser.aggregate(cell, drilldown=[dim_name])

    # If we have no path, then there is no cut for the dimension, # therefore
    # there is no corresponding detail.
    if path:
        details = browser.cell_details(cell, dimension)[0]
    else:
        details = []

    # Find what level we are on and what is going to be the drill-down level
    # in the hierarchy

    levels = hierarchy.levels_for_path(path)
    if levels:
        next_level = hierarchy.next_level(levels[-1])
    else:
        next_level = hierarchy.next_level(None)

    # Are we at the very detailed level?

    is_last = hierarchy.is_last(next_level)
    # Finally, we render it

    return render_template('report.html',
                           dimensions=cube.dimensions,
                           dimension=dimension,
                           levels=levels,
                           next_level=next_level,
                           result=result,
                           cell=cell,
                           is_last=is_last,
                           details=details)
Esempio n. 17
0
    This function is like recursively traversing directories on a file system
    and aggregating the file sizes, for example.
    
    * `cell` - cube cell to drill-down
    * `dimension` - dimension to be traversed through all levels
    """

    if cell.is_base(dimension):
        return

    result = browser.aggregate(aggregates=["contract_amount_sum"], cell=cell, drilldown=[dimension])

    # for row in cubes.drilldown_rows(cell, result, dimension):
    for row in result.table_rows(dimension):
        indent = "    " * (len(row.path) - 1)
        print "%s%s: %d" % (indent, row.label, row.record["contract_amount_sum"])

        new_cell = cell.drilldown(dimension, row.key)
        drilldown(new_cell, dimension)

# Get whole cube
cell = Cell(cube)

print "Drill down through date hierarchy:"
drilldown(cell, cube.dimension("date"))

# print "Drill down through CPV hierarchy in 2011:"
# cell = cell.slice(cubes.PointCut("date", [2010,5]))
# drilldown(cell, cube.dimension("cpv"))