コード例 #1
0
    def _overall_stats(self):
        count_open_licenses = agate.Summary(
            'license_id', agate.Number(),
            lambda r: sum(license_id in utils.OPEN_LICENSES
                          for license_id in r.values()))

        self.overall_package_stats = self._package_table().aggregate([
            ('open_data_count', count_open_licenses),
        ])
        self.resource_stats = self._package_resource_table().compute([
            ('open_format', agate.Formula(agate.Boolean(),
                                          open_formats_count)),
        ])
        if len(self._package_resource_table()) > 0:
            self.resource_stats = self.resource_stats.aggregate([
                ('open_format_count', agate.Count('open_format', True)),
                ('min_date', agate.Min('created')),
                ('max_date', agate.Max('created'))
            ])
            format_table = self._package_resource_table().group_by(
                "format").aggregate([
                    ('count', agate.Count()),
                ])
            count = format_table.aggregate([
                ('different_formats', agate.Count()),
            ])
            self.open_datasets = self.overall_package_stats.get(
                "open_data_count", 0)
            self.open_format_count = self.resource_stats.get(
                "open_format_count", 0)
            self.format_count = count.get("different_formats", 0)
            self.compute_dates()
コード例 #2
0
 def get_package_date_aggregates(self, package_table):
     return package_table.aggregate([('min_date', agate.Min('created')),
                                     ('max_date', agate.Max('created'))])
コード例 #3
0
    country_json = json.load(f)

country_dict = {}
for dct in country_json:
    country_dict[dct['name']] = dct['parent']

cpi_and_cl = cpi_and_cl.compute([('continent', agate.Formula(agate.Text(), get_country))])

grp_by_cont = cpi_and_cl.group_by('continent')
print(grp_by_cont)

for cont, table in grp_by_cont.items():
    print(cont, len(table.rows))

# 눈으로 확인했을 때 아프리카와 아시아가 높은 값을 가지는 것을 확인할 수 있다.
# 하지만 이것만으로 데이터에 접근하기엔 쉽지 않다.
# 이 때 필요한 것이 집계 메서드이다.
# 국민들이 인식하는 정부 부패 및 아동 노동과 관련하여 대륙들이 어떻게 다른지 비교해보자.
agg = grp_by_cont.aggregate([
    ('cl_mean', agate.Mean('Total (%)')),
    ('cl_max', agate.Max('Total (%)')),
    ('cpi_median', agate.Median('CPI 2013 Score')),
    ('cpi_min', agate.Min('CPI 2013 Score'))
])
agg.print_table()
print()
agg.print_bars('continent', 'cl_max')

with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'cpi_and_cl_2.pickle'), 'wb') as f:
    pickle.dump(cpi_and_cl, f)
コード例 #4
0
ファイル: app.py プロジェクト: gutte/course-assign
courselist.output_csv('courselist')

selected_courses.to_csv('temp.csv')
selected_courses = agate.Table.from_csv(
    'temp.csv')  # workaround to fix .normalize() indexing issue
os.remove('temp.csv')


selected_courses = selected_courses.join(courselist,'courseid','id') \
    .join(prefs_n,['student','course'],['student','course']) \
    .select(['student','courseid','course','block','preference'])

selected_courses = selected_courses.order_by(lambda r:
                                             (r['student'], r['block']))

selected_courses.output_csv('selections_by_student')

selected_courses.select(['course', 'block', 'courseid', 'student']).order_by(
    lambda r: (r['course'], r['block'])).output_csv('selections_by_course')

# finally output something on the console

selected_courses.pivot('block').print_table()

print('Highest preference used:')
selected_courses.pivot('student',aggregation=agate.Max('preference')) \
    .pivot('Max') \
    .order_by('Max') \
    .print_table()
コード例 #5
0
    return new_table


if __name__ == '__main__':
    data_lists = generate_test_data()

    # Create data table
    tbl = agate.Table(data_lists,
                      column_names=column_names,
                      column_types=column_types)

    # Produce summary table
    by_payband = tbl.group_by('pb')
    summary_tbl = by_payband.aggregate([('count', agate.Count()),
                                        ('sal_min', agate.Min('salary')),
                                        ('sal_max', agate.Max('salary')),
                                        ('sal_median', agate.Median('salary'))
                                        ])

    # Display summary of generated test data
    print('Model data summary:\n')
    summary_tbl.print_table()
    print()
    summary_tbl.print_bars('pb', 'count', width=40)

    # ---Generate random numbers for simulation
    new_table = _add_random_column(tbl)

    # Show distributions of new table
    rand_tbl_count = new_table.pivot('random_group')
    rand_tbl_count = rand_tbl_count.order_by('random_group')
コード例 #6
0
    print('{}: {}%'.format(r['Countries and areas'], r['Female']))

female_data = table.where(lambda r: r['Female'] is not None)
most_females = female_data.order_by('Female', reverse=True).limit(10)
for r in most_females.rows:
    print('{}: {}%'.format(r['Countries and areas'], r['Female']))

try:
    table.columns['Place of residence (%) Urban'].mean()
except:
    pass

has_por = table.where(lambda r: r['Place of residence (%) Urban'] is not None)

has_por.columns['Place of residence (%) Urban'].aggregate(agate.Mean())
has_por.columns['Place of residence (%) Urban'].aggregate(agate.Max())

has_por.columns['Rural'].aggregate(agate.Mean())
has_por.columns['Rural'].aggregate(agate.Max())

has_por.find(lambda x: x['Rural'] > 50)

ranked = table.compute([(agate.Rank('Total (%)',
                                    reverse=True), 'Total Child Labor Rank')])

# If we wanted a column showing children not working percentage ...


def reverse_percent(row):
    return 100 - row['Total (%)']