def test_4():
    """An example from the spider dataset
    question : What are the themes of farm competitions sorted by year in
               ascending order?
    """
    table = data.spider_eval.evaluation.get_table('farm', 'farm_competition')
    query_result = topk.topk(table,
                             'Year', ['Theme'],
                             True,
                             -1,
                             slices=None,
                             date_range=None,
                             date_column_name='date',
                             day_first=False,
                             group_columns=None,
                             summary_operator=None)
    print(query_result)
    expected_result = """   Year                Theme
0  2002               Aliens
1  2003             MTV Cube
2  2004      Valentine's Day
3  2005         MTV Asia Aid
4  2006          Codehunters
5  2013  Carnival M is back!"""
    expected_suggestions = """[]"""
    assert (expected_result == query_result[0].to_string())
    assert (expected_suggestions == str(query_result[1]))
def test_1():
    """An example from the IPL dataset
    question : top-15 city based on win_by_runs in season = 2017
               in the date range 2008-05-08 to 2017-04-12
    """
    table = pandas.read_csv('data/matches.csv')
    query_result = topk.topk(table,
                             'win_by_runs', ['city'],
                             False,
                             40,
                             slices=[('season', enums.Filters.EQUAL_TO, 2017)],
                             date_range=('2008-05-08', '2017-04-12'),
                             date_column_name='date',
                             day_first=False)
    print(query_result)
    expected_result = """        city  win_by_runs
0       Pune           97
1  Hyderabad           35
2  Bangalore           15
3       Pune            0
4     Rajkot            0
5     Indore            0
6  Hyderabad            0
7     Mumbai            0
8     Indore            0
9     Mumbai            0"""
    expected_suggestions = """[{'suggestion': 'The results has duplicates', 'oversight': <Oversights.DUPLICATES_IN_TOPK: 1>}, {'suggestion': 'Instead of 40 only 10 rows are present in the results', 'oversight': <Oversights.TOPK_WHEN_LESS_THAN_K_PRESENT: 2>}]"""
    assert (expected_result == query_result[0].to_string())
    assert (expected_suggestions == str(query_result[1]))
def test_6():
    """An example from the spider dataset
    question : What are the dates of publications in descending order of price?
    """
    table = data.spider_eval.evaluation.get_table('book_2', 'publication')
    query_result = topk.topk(table,
                             'Price', ['Publication_Date'],
                             False,
                             -1,
                             slices=None,
                             date_range=None,
                             date_column_name='date',
                             day_first=False)
    print(query_result)
    expected_result = """  Publication_Date       Price
0      August 2008  15000000.0
1       March 2008   6000000.0
2        June 2006   4100000.0
3     October 2005   3000000.0
4      August 2008   3000000.0
5       March 2007   2000000.0
6       April 2007   2000000.0"""
    expected_suggestions = """[{'suggestion': 'The results has duplicates', 'oversight': <Oversights.DUPLICATES_IN_TOPK: 1>}]"""
    assert (expected_result == query_result[0].to_string())
    assert (expected_suggestions == str(query_result[1]))
def test_1():
    """An example from the IPL dataset
    question : top-15 city based on win_by_runs in season = 2017
               in the date range 2008-05-08 to 2017-04-12
    """
    table = pandas.read_csv('data/matches.csv')
    query_result = topk.topk(table,
                             'win_by_runs', ['city'],
                             False,
                             40,
                             slices=[('season', enums.Filters.EQUAL_TO, 2017)],
                             date_range=('2008-05-08', '2017-04-12'),
                             date_column_name='date',
                             date_format='%Y-%m-%d')
    print(query_result)
    expected_result = """        city  win_by_runs
0       Pune           97
1  Hyderabad           35
2  Bangalore           15
3       Pune            0
4     Rajkot            0
5     Indore            0
6  Hyderabad            0
7     Mumbai            0
8     Indore            0
9     Mumbai            0"""
    expected_suggestions = """[{'suggestion': 'The results has duplicates', 'oversight_name': 'Duplicates in top-k'}]"""
    assert (expected_result == query_result[0].to_string())
    assert (expected_suggestions == str(query_result[1]))
def test_2():
    """An example from the IPL dataset
    question : top 5 player_of_match based on avg(win_by_runs)
               in season 2017 in date range 2008-05-08 to 2017-04-12
    """
    table = pandas.read_csv('data/matches.csv')
    query_result = topk.topk(table,
                             'win_by_runs', ['player_of_match'],
                             False,
                             5,
                             slices=[('season', enums.Filters.EQUAL_TO, 2017)],
                             date_range=('2017-05-09', '2017-05-12'),
                             date_column_name='date',
                             day_first=False,
                             summary_operator=enums.SummaryOperators.MEAN)
    print(query_result)
    expected_result = """  player_of_match  MEAN of win_by_runs
0       MM Sharma                   14
1         KK Nair                    7
2         WP Saha                    7
3         SS Iyer                    0"""
    expected_suggestions = """[{'suggestion': 'Instead of 5 only 4 rows are present in the results', 'oversight': <Oversights.TOPK_WHEN_LESS_THAN_K_PRESENT: 2>}, {'oversight': <Oversights.REGRESSION_TO_THE_MEAN: 4>, 'suggestion': "very few of the top-k in the given date range will be in the previous window's top-k"}]"""

    assert (expected_result == query_result[0].to_string())
    assert (expected_suggestions == str(query_result[1]))
Beispiel #6
0
def test_6():
    """An example from the spider dataset
    question : What are the dates of publications in descending order of price?
    """
    table = data.spider_eval.evaluation.get_table('book_2', 'publication')
    query_result = topk.topk(table,
                             'Price', ['Publication_Date'],
                             False,
                             10000,
                             slices=None,
                             date_range=None,
                             date_column_name='date',
                             date_format='%Y-%m-%d')
    print(query_result)
    expected_result = """  Publication_Date       Price
0      August 2008  15000000.0
1       March 2008   6000000.0
2        June 2006   4100000.0
3     October 2005   3000000.0
4      August 2008   3000000.0
5       March 2007   2000000.0
6       April 2007   2000000.0"""
    expected_suggestions = """['The results has duplicates, you forgot to apply group by']"""
    assert (expected_result == query_result[0].to_string())
    assert (expected_suggestions == str(query_result[1]))
def test_5():
    """An example from the spider dataset
    question : For each city list their names in decreasing order by their
               highest station latitude.
    """
    table = data.spider_eval.evaluation.get_table('bike_1', 'station')
    query_result = topk.topk(table,
                             'lat', ['city'],
                             False,
                             -1,
                             slices=None,
                             date_range=None,
                             date_column_name='date',
                             day_first=False,
                             summary_operator=enums.SummaryOperators.MAX)
    print(query_result)
    expected_result = """            city  MAX of lat
0  San Francisco   37.804770
1   Redwood City   37.491269
2      Palo Alto   37.448598
3  Mountain View   37.406940
4       San Jose   37.352601"""
    expected_suggestions = """[]"""
    assert (expected_result == query_result[0].to_string())
    assert (expected_suggestions == str(query_result[1]))
Beispiel #8
0
def topk_sim(input_vec, k_value):
    neighbours = []
    for row in train_rows:
        sim = similarity(input_vec, row[3])
        neighbours.append((row, sim))
        #neighbours.append((row[2], sim))
    neigh_topk = topk(neighbours, k_value, True)
    #neigh_topk = neighbours
    for neigh in neigh_topk:
        row = neigh[0]
        sim = neigh[1]
        #print row[2], sim
    return neigh_topk
Beispiel #9
0
def topk_sim(input_vec, k_value):
    neighbours = []
    for row in train_rows:
        sim = similarity(input_vec, row[3])
        neighbours.append((row, sim))
        #neighbours.append((row[2], sim))
    neigh_topk = topk(neighbours, k_value, True)
    #neigh_topk = neighbours
    for neigh in neigh_topk:
        row = neigh[0]
        sim = neigh[1]
        #print row[2], sim
    return neigh_topk
def test_7():
    """An example from the spider dataset
    question : What is the name and salary of all employees in order of salary?
    """
    table = data.spider_eval.evaluation.get_table('flight_1', 'employee')
    query_result = topk.topk(table,
                             'salary', ['name'],
                             True,
                             -1,
                             slices=None,
                             date_range=None,
                             date_column_name='date',
                             day_first=False)
    print(query_result)
    expected_result = """                name  salary
0        Milo Brooks      20
1        Donald King   18050
2    Richard Jackson   23980
3     Patricia Jones   24450
4        Linda Davis   27984
5   Elizabeth Taylor   32021
6      Haywood Kelly   32899
7       Chad Stewart   33546
8     David Anderson   43001
9     Barbara Wilson   43723
10      Robert Brown   44740
11    Michael Miller   48090
12     William Moore   48250
13   Jennifer Thomas   54921
14      William Ward   84476
15    Michael Miller   99890
16        Larry West  101745
17     William Jones  105743
18       Eric Cooper  114323
19       James Smith  120433
20      Dorthy Lewis  152013
21     John Williams  153972
22      Mary Johnson  178345
23       Karen Scott  205187
24        Mark Young  205187
25   Lawrence Sperry  212156
26   Angela Martinez  212156
27   Joseph Thompson  212156
28       Betty Adams  227489
29       Lisa Walker  256481
30     George Wright  289950"""
    expected_suggestions = """[{'suggestion': 'The results has duplicates', 'oversight': <Oversights.DUPLICATES_IN_TOPK: 1>}]"""
    assert (expected_result == query_result[0].to_string())
    assert (expected_suggestions == str(query_result[1]))
def test_3():
    """An example from the spider dataset
    question : In which year were most departments established?
    """
    table = data.spider_eval.evaluation.get_table('department_management',
                                                  'department')
    query_result = topk.topk(table,
                             'Department_ID', ['Creation'],
                             False,
                             1,
                             slices=None,
                             date_range=None,
                             date_column_name='date',
                             day_first=False,
                             summary_operator=enums.SummaryOperators.COUNT)
    print(query_result)
    expected_result = """  Creation  COUNT of Department_ID
0     1789                       2"""
    expected_suggestions = """[{'oversight': <Oversights.TOPK_VS_OTHERS: 6>, 'change_list': {'topKLimit': 14}, 'suggestion': 'The rows NOT in the top-k have a much larger sum over Department_ID than the rows in top-k', 'confidence_score': 0.15384615384615385}]"""
    assert (expected_result == query_result[0].to_string())
    assert (expected_suggestions == str(query_result[1]))
Beispiel #12
0
def test_3():
    """An example from the spider dataset
    question : In which year were most departments established?
    """
    table = data.spider_eval.evaluation.get_table('department_management',
                                                  'department')
    query_result = topk.topk(table,
                             'Department_ID', ['Creation'],
                             False,
                             1,
                             slices=None,
                             date_range=None,
                             date_column_name='date',
                             date_format='%Y-%m-%d',
                             summary_operator=SummaryOperators.COUNT)
    print(query_result)
    expected_result = """  Creation  Department_ID
0     1789              2"""
    expected_suggestions = """['No suggestions as date condition is not there.']"""
    assert (expected_result == query_result[0].to_string())
    assert (expected_suggestions == str(query_result[1]))
def test_8():
    """This query uses a manually created dataset - to test similarity between
    rank vectors question : Top-4 rating between 23/05/2010 to 25/05/2011
    """
    # in this database the ranks reverse if the previous window is considered
    table = pandas.read_csv('data/rating.csv')
    query_result = topk.topk(table,
                             'Rating', ['User Name'],
                             True,
                             4,
                             slices=None,
                             date_range=('2010-05-23', '2011-05-25'),
                             date_column_name='date',
                             day_first=True)
    print(query_result)
    expected_result = """  User Name  Rating
0      Benq    3400
1     300iq    4300
2       cba    5200
3   tourist    6100"""
    expected_suggestions = """[{'oversight': <Oversights.REGRESSION_TO_THE_MEAN: 4>, 'suggestion': "The ranks of the top-k in the date range differs much from the previous window's top-k"}]"""
    assert (expected_result == query_result[0].to_string())
    assert (expected_suggestions == str(query_result[1]))
def test_2():
    """An example from the IPL dataset
    question : top 5 player_of_match based on avg(win_by_runs)
               in season 2017 in date range 2008-05-08 to 2017-04-12
    """
    table = pandas.read_csv('data/matches.csv')
    query_result = topk.topk(table,
                             'win_by_runs', ['player_of_match'],
                             False,
                             5,
                             slices=[('season', enums.Filters.EQUAL_TO, 2017)],
                             date_range=('2017-05-09', '2017-05-12'),
                             date_column_name='date',
                             date_format='%Y-%m-%d',
                             summary_operator=enums.SummaryOperators.MEAN)
    print(query_result)
    expected_result = """  player_of_match  win_by_runs
0       MM Sharma           14
1         KK Nair            7
2         WP Saha            7
3         SS Iyer            0"""
    expected_suggestions = """[{'oversight_name': 'Regression to the mean', 'suggestion': "very few of the top-k in the given date range will be in the previous window's top-k"}]"""
    assert (expected_result == query_result[0].to_string())
    assert (expected_suggestions == str(query_result[1]))
Beispiel #15
0
def hello_http(request):
    """HTTP Cloud Function.
    Args:
        request (flask.Request): The request object.
        <http://flask.pocoo.org/docs/1.0/api/#flask.Request>
    Returns:
        The response text, or any set of values that can be turned into a
        Response object using `make_response`
        <http://flask.pocoo.org/docs/1.0/api/#flask.Flask.make_response>.
    """
    request_json = request.get_json(silent=True)

    request_args = request.args

    # extracting the intent parameters from the json
    intent = request_json['intent']
    table = request_json['table']
    row_range = request_json['rowRange']
    metric = request_json['metric']
    dimensions = request_json['dimensions']
    summary_operator = request_json['summaryOperator']
    slices = request_json['slices']
    is_asc = request_json['isAsc']
    k = request_json['k']
    slices = request_json['slices']
    slice_compare_column = request_json['comparisonValue']
    date = request_json['dateRange']
    time_granularity = request_json['timeGranularity']
    row_start = row_range["rowStart"]
    row_end = row_range["rowEnd"]
    row_header = row_range["header"]

    # Converting the list of list into a pandas dataframe.
    query_table = []
    for row in range(row_start - 1, row_end):
        if row != row_header - 1:
            query_table.append(table[row])
    query_table_dataframe = pandas.DataFrame(query_table,
                                             columns=table[row_header - 1])

    # Converting the variables that contain denote the
    # date range into the desired format.
    date_column_name = None
    date_range = None
    if date != "null":
        date_column_name = date['dateCol']
        date_range = (date['dateStart'], date['dateEnd'])

    # Converting the Slices passed in the json into a
    # list of tuples (col, operator, val)
    slices_list = None
    if slices != "null":
        slices_list = []
        for item in slices:
            val = item['sliceVal']
            col = item['sliceCol']
            operator = _str_to_filter_enum(item['sliceOp'])
            slices_list.append((col, operator, val))

    if dimensions == 'null':
        dimensions = None

    if metric == 'null':
        metric = None

    summary_operator = _str_to_summary_operator_enum(summary_operator)

    suggestions = []

    if intent == 'show':
        query_table_dataframe = show(query_table_dataframe,
                                     slices=slices_list,
                                     metric=metric,
                                     dimensions=dimensions,
                                     summary_operator=summary_operator)
    elif intent == 'topk':
        query_result = topk.topk(query_table_dataframe,
                                 metric,
                                 dimensions,
                                 is_asc,
                                 k,
                                 summary_operator=summary_operator,
                                 date_column_name=date_column_name,
                                 date_range=date_range,
                                 slices=slices_list)
        query_table_dataframe = query_result[0]
        suggestions = query_result[1]

    elif intent == 'slice_compare':
        query_table_dataframe = slice_compare.slice_compare(
            query_table_dataframe,
            metric,
            dimensions, [], [],
            slice_compare_column_list,
            summary_operator=summary_operator,
            date_column_name=date_column_name,
            date_range=date_range,
            slices=slices_list)
    else:
        raise Exception("Intent name does not match")

    # In updated suggestions, change_list is replaced with the json of
    # the new query.
    updated_suggestions = []
    for suggestion in suggestions:
        updated_suggestion = suggestion
        if 'change_list' in suggestion.keys():
            updated_suggestion['json'] = \
            _convert_change_list_to_new_query_json(request_json, suggestion['change_list'])
        updated_suggestions.append(updated_suggestion)

    suggestions = updated_suggestions
    final_table = []

    # converting into a json object and returning
    final_table = query_table_dataframe.values.tolist()
    final_table.insert(0, list(query_table_dataframe.columns.values))

    json_ret = {'outputTable': final_table, 'suggestions': suggestions}
    json_string = json.dumps(json_ret)
    return json_string
Beispiel #16
0
def hello_http(request):
    """HTTP Cloud Function.
    Args:
        request (flask.Request): The request object.
        <http://flask.pocoo.org/docs/1.0/api/#flask.Request>
    Returns:
        The response text, or any set of values that can be turned into a
        Response object using `make_response`
        <http://flask.pocoo.org/docs/1.0/api/#flask.Flask.make_response>.
    """
    request_json = request.get_json(silent=True)

    request_args = request.args

    # extracting the intent parameters from the json
    intent = _get_value(request_json, 'intent')
    table = _get_value(request_json, 'table')
    metric = _get_value(request_json, 'metric')
    dimensions = _get_value(request_json, 'dimensions')
    summary_operator = _get_value(request_json, 'summaryOperator')
    slices = _get_value(request_json, 'slices')
    is_asc = _get_value(request_json, 'isAsc')
    k = _get_value(request_json, 'topKLimit')
    slices = _get_value(request_json, 'slices')
    slice_comparision_arg = _get_value(request_json, 'comparisonValue')
    time_comparision_arg = _get_value(request_json, 'compareDateRange')
    date = _get_value(request_json, 'dateRange')
    time_granularity = _get_value(request_json, 'timeGranularity')
    correlation_metrics = _get_value(request_json, 'correlationMetrics')
    rangeA1Notation = _get_value(request_json, 'rangeA1Notation')

    # Converting the list of list into a pandas dataframe.
    query_table = []
    for row in range(1, len(table)):
        if row != 0:
            query_table.append(table[row])
    query_table_dataframe = pandas.DataFrame(query_table, columns=table[0])

    (all_dimensions,
     all_metrics) = _list_all_dimensions_metrics(query_table_dataframe,
                                                 dimensions, metric)

    # Remove empty columns
    query_table_dataframe = remove_empty_columns(query_table_dataframe)

    # Remove duplicate named columns
    query_table_dataframe = remove_duplicate_named_columns(
        query_table_dataframe)

    # Converting the variables that contain denote the
    # date range into the desired format.
    date_column_name = None
    date_range = None
    day_first = None
    if date != None:
        date_columns = request_json['dateColumns']
        date_column_name = date['dateCol']
        date_range = (date['dateStart'], date['dateEnd'])
        day_first = date_columns[date_column_name]['day_first']

    # Converting the Slices passed in the json into a
    # list of tuples (col, operator, val)
    slices_list = None
    if slices != None:
        slices_list = []
        for item in slices:
            val = item['sliceVal']
            col = item['sliceCol']
            operator = _str_to_filter_enum(item['sliceOp'])
            slices_list.append((col, operator, val))

    if dimensions == 'null':
        dimensions = None

    if slice_comparision_arg is not None:
        slice_compare_column = slice_comparision_arg['comparisonColumn']
        slice1 = slice_comparision_arg['slice1']
        slice2 = slice_comparision_arg['slice2']

    if time_comparision_arg is not None:
        time_compare_column = time_comparision_arg['dateCol']
        date_range1 = (time_comparision_arg['dateStart1'],
                       time_comparision_arg['dateEnd1'])
        date_range2 = (time_comparision_arg['dateStart2'],
                       time_comparision_arg['dateEnd2'])
        day_first = request_json['dateColumns'][time_compare_column][
            'day_first']

    if metric == 'null':
        metric = None

    summary_operator = _str_to_summary_operator_enum(summary_operator)

    time_granularity = _str_to_time_granularity_enum(time_granularity)

    suggestions = []

    wrong_points_suggestion = wrong_points.wrong_points(query_table_dataframe)

    if intent == 'show':
        query_table_dataframe = show(query_table_dataframe,
                                     slices=slices_list,
                                     metric=metric,
                                     dimensions=dimensions,
                                     summary_operator=summary_operator,
                                     date_column_name=date_column_name,
                                     day_first=day_first,
                                     date_range=date_range)

        if summary_operator == enums.SummaryOperators.MEAN:
            suggestions.append(get_hardcoded_mean_vs_median_suggestion())

        updated_suggestions = []
        for suggestion in suggestions:
            updated_suggestion = suggestion
            if 'change_list' in suggestion.keys():
                updated_suggestion['json'] = func(request_json,
                                                  suggestion['change_list'])
            updated_suggestions.append(updated_suggestion)

        suggestions = updated_suggestions

    elif intent == 'topk':
        query_result = topk.topk(query_table_dataframe,
                                 metric,
                                 dimensions,
                                 is_asc,
                                 k,
                                 summary_operator=summary_operator,
                                 slices=slices_list,
                                 date_column_name=date_column_name,
                                 day_first=day_first,
                                 date_range=date_range)
        query_table_dataframe = query_result[0]
        suggestions = query_result[1]
        updated_suggestions = []
        for suggestion in suggestions:
            updated_suggestion = suggestion
            if 'change_list' in suggestion.keys():
                updated_suggestion['json'] = func(request_json,
                                                  suggestion['change_list'])
            updated_suggestion['oversight'] = updated_suggestion[
                'oversight'].name
            updated_suggestions.append(updated_suggestion)

        suggestions = updated_suggestions

    elif intent == 'slice_compare':
        query_result = slice_compare.slice_compare(
            query_table_dataframe,
            metric,
            all_dimensions,
            all_metrics,
            slice_compare_column,
            slice1,
            slice2,
            summary_operator,
            date_column_name=date_column_name,
            date_range=date_range,
            day_first=day_first,
            slices=slices_list,
            dimensions=dimensions)
        query_table_dataframe = query_result[0]
        suggestions = query_result[1]
        updated_suggestions = []

        for suggestion in suggestions:
            updated_suggestion = suggestion
            if 'change_list' in suggestion.keys():
                updated_suggestion['json'] = func(request_json,
                                                  suggestion['change_list'])
            updated_suggestion['oversight'] = updated_suggestion[
                'oversight'].name
            updated_suggestions.append(updated_suggestion)

        suggestions = updated_suggestions

    elif intent == 'time_compare':
        query_result = time_compare.time_compare(query_table_dataframe,
                                                 metric,
                                                 all_dimensions,
                                                 time_compare_column,
                                                 date_range1,
                                                 date_range2,
                                                 day_first,
                                                 summary_operator,
                                                 slices=slices_list,
                                                 dimensions=dimensions)
        query_table_dataframe = query_result[0]
        suggestions = query_result[1]
        updated_suggestions = []

        for suggestion in suggestions:
            updated_suggestion = suggestion
            if 'change_list' in suggestion.keys():
                updated_suggestion['json'] = func(request_json,
                                                  suggestion['change_list'])
            updated_suggestion['oversight'] = updated_suggestion[
                'oversight'].name
            updated_suggestions.append(updated_suggestion)

        suggestions = updated_suggestions

    elif intent == 'correlation':
        query_table_dataframe = correlation.correlation(
            query_table_dataframe,
            correlation_metrics['metric1'],
            correlation_metrics['metric2'],
            slices=slices_list,
            date_column_name=date_column_name,
            day_first=day_first,
            date_range=date_range,
            dimensions=dimensions)

    elif intent == 'trend':
        query_table_dataframe = trend.trend(query_table_dataframe,
                                            metric,
                                            time_granularity,
                                            summary_operator,
                                            date_column_name=date_column_name,
                                            day_first=day_first,
                                            date_range=date_range,
                                            slices=slices_list)

    else:
        raise Exception("Intent name does not match")

    if wrong_points_suggestion is not None:
        wrong_points_suggestion['oversight'] = wrong_points_suggestion[
            'oversight'].name
        suggestions = [wrong_points_suggestion] + suggestions

    final_table = []

    # converting into a json object and returning
    final_table = query_table_dataframe.values.tolist()
    final_table.insert(0, list(query_table_dataframe.columns.values))

    json_ret = {'outputTable': final_table, 'suggestions': suggestions}

    if rangeA1Notation is not None:
        all_row_labels = _get_all_row_labels(rangeA1Notation)
        all_column_labels = _get_all_column_labels(rangeA1Notation)
        cheader_to_clabel = _get_cheader_to_clabel(table, all_column_labels)

        if slices_list is not None:
            json_ret[
                'slicing_passed_list'] = insert_as_column.insert_as_column_show(
                    table,
                    cheader_to_clabel,
                    all_row_labels[0],
                    all_row_labels[-1],
                    all_column_labels[0],
                    all_column_labels[-1],
                    slices=slices_list)

        if intent == 'topk' and summary_operator is None:
            filter_column_label_number = _get_number_of_column_label(
                all_column_labels[-1]) + 1
            filter_column_label = _get_label_from_number(
                filter_column_label_number)

            json_ret[
                'list_topk_indices'] = insert_as_column.insert_as_column_topk_column(
                    table, cheader_to_clabel, all_row_labels[0],
                    all_row_labels[-1], all_column_labels[0],
                    all_column_labels[-1], filter_column_label, metric, is_asc,
                    k)

    json_string = json.dumps(json_ret)
    return json_string
Beispiel #17
0
k=int(args.k)
t=int(args.t)
b=int(args.b)

if len(sys.argv) != 11:
	print "Incomplete arguments, type 'python run_ranked_insert.py -h' "
	exit()


# Calculate the budget:
budget = b

# Calculate the number of iterations
iterations = 50*math.log(0.51*(n+1))

print "Number of vectors " + str(n)
print "Size of vectors " + str(m)
print "The 'k' in top k " + str(k)
print "Type of data " + str(t)
print "Budget " + str(b)
print "Number of iterations "+str(iterations)
print "** ** ** "
print "Result"
print "** ** ** "

# Initialize sorted vectors
vectors,means = data.getSortedVectors(n,m,10000,t)

tk.topk(vectors,means,k,budget,iterations,"runs")
exit()