def name_usage(self, name, start_year=MAX_YEAR, stop_year=MAX_YEAR): year_gender = dict() for i in xrange(start_year, stop_year + 1): year_gender[i] = {'M': 0, 'F': 0} for gender in ['M', 'F']: q = Query(and_args(eq('name', name), eq('gender', gender), gte('year', start_year), lte('year', stop_year))) categorize = CategorizeSpec() categorize.stats = StatsSpec('year', 'count') q.add_categorization(categorize) q.limit(0) result = self.names.find(q.build()) try: for year, stat in result.categories['year'].items(): year_gender[int(float(year))][gender] = stat['sum'] except KeyError: # No stats exists for year meaning no resuls for that name pass return year_gender
def test_categorize_by_query(self): q = Query(regex('name', '.*')) cat_spec = CategorizeSpec() cat_spec.add_categorize_query(eq('name', 'Dan')) q.add_categorization(cat_spec) q.limit(0) result = self.test_collection.find(q.build()) self.assertTrue(len(result.categories) > 0)
def test_categorize_with_stats(self): print '\nTest categorize with stats\n' q = Query(regex('name', '.*')) cat_spec = CategorizeSpec() cat_spec.stats = StatsSpec('name', 'age') q.add_categorization(cat_spec) q.limit(0) result = self.test_collection.find(q.build()) self.assertTrue(len(result.categories) > 0)
def test_categorize_by_field(self): print '\nTest categorize by field\n' q = Query(regex('name', '.*')) cat_spec = CategorizeSpec() cat_spec.field = 'name' q.add_categorization(cat_spec) q.limit(0) result = self.test_collection.find(q.build()) self.assertTrue(len(result.categories) > 0)
def test_categorize_by_range(self): print '\nTest categorize by range' q = Query(regex('name', '.*')) range_spec = RangeSpec('age', 1, 10, 1) cat_spec = CategorizeSpec() cat_spec.range_spec = range_spec q.add_categorization(cat_spec) q.limit(0) result = self.test_collection.find(q.build()) self.assertTrue(len(result.categories) > 0)
def categorize_by_field(query_field, field): print '\nCategorize by value for field, {field}'.format(field=field) q = Query(regex(query_field, '.*')) q.limit(0) category = CategorizeSpec() category.field = field q.add_categorization(category) query = json.dumps(q.build()) query_pprint(q.build()) print '\n' + curl('PUT', 'query/all', data=query, accept_header=True) result = demo_collection.find(q.build()) json_pprint(result.response_doc)
def categorize_with_stats(field, stat_field): print '\nCategorize with stats on field, {stat_field}'.format(stat_field=stat_field) q = Query(regex(field, '.*')) q.limit(0) category = CategorizeSpec() category.stats = StatsSpec(field, stat_field) q.add_categorization(category) query = json.dumps(q.build()) query_pprint(q.build()) print '\n' + curl('PUT', 'query/all', data=query, accept_header=True) result = demo_collection.find(q.build()) json_pprint(result.response_doc)
def partial_name_search(self, name_prefix, start_year=MAX_YEAR, stop_year=MAX_YEAR): q = Query(and_args(regex('name', name_prefix + '.*'), gte('year', start_year), lte('year', stop_year))) q.limit(0) categorize = CategorizeSpec() categorize.stats = StatsSpec('name', 'count') q.add_categorization(categorize) #print q.build() result = self.names.find(q.build()) name_count = dict() for field, stat in result.categories['name'].items(): name_count[field] = stat['sum'] return name_count
def categorize_with_stats(field, stat_field): print '\nCategorize with stats on field, {stat_field}'.format( stat_field=stat_field) q = Query(regex(field, '.*')) q.limit(0) category = CategorizeSpec() category.stats = StatsSpec(field, stat_field) q.add_categorization(category) query = json.dumps(q.build()) query_pprint(q.build()) print '\n' + curl('PUT', 'query/all', data=query, accept_header=True) result = demo_collection.find(q.build()) json_pprint(result.response_doc)
def categorize_by_range(query_field, field, start, end, increment): print '\nCategorize by range for field, {field}'.format(field=field) q = Query(regex(query_field, '.*')) q.limit(0) category = CategorizeSpec() _range = RangeSpec(field, start, end, increment) category.range_spec = _range q.add_categorization(category) query = json.dumps(q.build()) query_pprint(q.build()) print '\n' + curl('PUT', 'query/all', data=query, accept_header=True) result = demo_collection.find(q.build()) json_pprint(result.response_doc)