def _count(self, args): key = args.get('key') persons = Person.find() total = len(persons) df = pd.DataFrame(persons) current_date = df['release_date'].max() result = { 'status': 'success', 'current_date': current_date, 'total': total } if not key: return result sum = df[key].value_counts().to_dict() rows = list(map(lambda k: {key: k, 'count': sum[k]}, sum)) rows = sorted(rows, key=lambda r: r[key]) result['rows'] = rows return result
def _find(self, args={}): filters = [] if args.get('age'): filters.append({'key': 'age', 'value': request.args['age']}) if args.get('sex'): filters.append({'key': 'sex', 'value': request.args['sex']}) if args.get('area'): filters.append({'key': 'area', 'value': request.args['area']}) if args.get('reason'): filters.append({'key': 'reason', 'value': request.args['reason']}) if args.get('status'): filters.append({'key': 'status', 'value': request.args['status']}) if args.get('cluster_no'): filters.append({ 'key': 'cluster_no', 'value': int(request.args['cluster_no']) }) if args.get('release_date'): filters.append({ 'key': 'release_date', 'value': request.args['release_date'] }) else: if args.get('from_date'): filters.append({ 'key': 'release_date', 'symbol': '>=', 'value': request.args['from_date'] }) if args.get('to_date'): filters.append({ 'key': 'release_date', 'symbol': '<=', 'value': request.args['to_date'] }) offset = args.get('offset', '') offset = int(offset) if str.isdecimal(offset) else 0 limit = args.get('limit', '') limit = int(limit) if str.isdecimal(limit) else None persons = Person.find(filters=filters, offset=offset, limit=limit) total = Person.count(filters=filters) current_date = Person.current_date() result = { 'status': 'success', 'current_date': current_date, 'persons': persons, 'total': total } return result
def _cross(self, args): row_key = args.get('row') if not row_key: return { 'status': 'failure', 'message': 'Parameter "row" not defined.', } col_key = args.get('col') if not col_key: return { 'status': 'failure', 'message': 'Parameter "col" not defined.', } persons = Person.find() total = len(persons) df = pd.DataFrame(persons) current_date = df['release_date'].max() row_total = df[row_key].value_counts().to_dict() table = pd.crosstab(df[col_key], df[row_key]) data = table.to_dict() rows = [] for key in data: row = { row_key: key, 'values': list(map(lambda name: {'name': name, 'count': data[key][name]}, data[key])), 'total': row_total.get(key) or 0 } rows.append(row) col_total = df[col_key].value_counts().to_dict() col_total = list(map(lambda name: {'name': name, 'count': col_total[name]}, col_total)) result = { 'status': 'success', 'current_date': current_date, 'rows': rows, 'col_total': col_total, 'total': total } return result
# 'refer': {'government': 'nagoya', 'release_no': 305}, # 'release_date': '2020-07-15', # 'remarks': [], # 'route': {'area': '東京都', 'text': '東京都'}, # 'sex': 'female', # }) return result # Main if __name__ == '__main__': last_date = None last_no = 0 current_persons = Person.find(order=['-no'], limit=1) if len(current_persons) > 0: last_date = current_persons[-1]['release_date'] last_no = current_persons[-1]['no'] releases = read_aichi_release(last_date=last_date) for release in releases: # filepath = os.path.join( # config.DATA_DIR, 'aichi/releases', # 'aichi_release_{}.pdf'.format(release['pdf']['current_date'].strftime(r'%Y%m%d'))) filepath = os.path.join( config.DATA_DIR, 'aichi/releases', 'aichi_release_{}.pdf'.format( re.sub(r'^.*\/(\d+)\.pdf$', r'\1', release['pdf']['url']))) if not os.path.exists(filepath): logger.info('Download PDF file %s', filepath) util.download_file(release['pdf']['url'], filepath=filepath)