def _convert(self, _list, created_at: date): """" ヘッダが 都道府県名","市区町村名","総数(人口) 【H28.1.1時点】","交付枚数 【H29.5.15時点】","交付率" となっているので、日付を抜き出して列に追加する """ population_ymd = None card_ymd = None population_date = StringUtil.extract_date_from_header(_list[0][2]) if (population_date is not None): population_ymd = population_date.strftime('%Y/%m/%d') card_date = StringUtil.extract_date_from_header(_list[0][3]) if (card_date is not None): card_ymd = card_date.strftime('%Y/%m/%d') else: card_ymd = created_at.strftime('%Y/%m/%d') header = [ "都道府県名", "市区町村名", "総数(人口)", "交付枚数", "人口に対する交付枚数率", "人口算出基準日", "交付件数基準日" ] if (_list[1][0] == '全国'): # remove 全国 data = list( map(lambda x: x + [population_ymd, card_ymd], _list[2:])) else: data = list( map(lambda x: x + [population_ymd, card_ymd], _list[1:])) self._list = self._citycode.add_citycode(data, header) return self._list
def _convert(self, _list, created_at: date): """" ヘッダが "都道府県名","総数(人口) 【H28.1.1時点】","交付枚数 【H29.5.15時点】","人口に対する 交付枚数率" となっているので、日付を抜き出して列に追加する """ population_ymd = None card_ymd = None population_date = StringUtil.extract_date_from_header(_list[0][1]) if (population_date is not None): population_ymd = population_date.strftime('%Y/%m/%d') card_date = StringUtil.extract_date_from_header(_list[0][2]) if (card_date is not None): card_ymd = card_date.strftime('%Y/%m/%d') else: card_ymd = created_at.strftime('%Y/%m/%d') header = [ "都道府県名", "総数(人口)", "交付枚数", "人口に対する交付枚数率", "人口算出基準日", "交付枚数算出基準日" ] data = list(map(lambda x: x + [population_ymd, card_ymd], _list[1:])) self._list = [header] + data return self._list
def _convert(self, _list: list) -> list: """ demographics.csvの一部の日付のファイルにおいて、 人口(計) がNULL、右隣の 交付件数(男) に値が2つ入ってしまっているケースがある https://github.com/codeforjapan/mynumbercard_statistics/issues/86 """ _list = StringUtil.fix_numberfield_error(_list, 1, 13, [0, 1]) """ CSVのヘッダが ["年齢","人口(H28.1.1時点)","","","交付件数(H29.5.15時点)","","","交付率","","","全体に対する交付件数割合","",""] ["","男","女","計","男","女","計","男","女","計","男","女","計"] という2段組になってしまっているので、ヘッダを一行にして、(*時点)の部分を抜き出して最終列に加える処理を行う """ population_ymd = StringUtil.extract_date_from_header( self._list[0][1]).strftime('%Y/%m/%d') card_ymd = StringUtil.extract_date_from_header( self._list[0][4]).strftime('%Y/%m/%d') header = [ "年齢", "人口(男)", "人口(女)", "人口(計)", "交付件数(男)", "交付件数(女)", "交付件数(計)", "交付率(男)", "交付率(女)", "交付率(計)", "全体に対する交付件数割合(男)", "全体に対する交付件数割合(女)", "全体に対する交付件数割合(計)", "人口算出基準日", "交付枚数算出基準日" ] self._list = [header] + list( map(lambda x: x + [population_ymd, card_ymd], _list[2:])) return self._list
def _convert(self, _list: list, created_at: date) -> list: """ CSVのヘッダが ["年齢","人口(H28.1.1時点)","","","交付件数(H29.5.15時点)","","","交付率","","","全体に対する交付件数割合","",""] ["","男","女","計","男","女","計","男","女","計","男","女","計"] という2段組になってしまっているので、ヘッダを一行にして、(*時点)の部分を抜き出して最終列に加える処理を行う ただし、R3年6月から、交付枚数から基準日が消えているので、デフォルト日付を使う """ _list = StringUtil.fix_numberfield_error(_list, 1, 13, [0, 1]) card_ymd = None population_ymd = StringUtil.extract_date_from_header( self._list[0][1]).strftime('%Y/%m/%d') card_date = StringUtil.extract_date_from_header(_list[0][4]) if (card_date is not None): card_ymd = card_date.strftime('%Y/%m/%d') else: card_ymd = created_at.strftime('%Y/%m/%d') header = [ "年齢", "人口(男)", "人口(女)", "人口(計)", "交付件数(男)", "交付件数(女)", "交付件数(計)", "交付率(男)", "交付率(女)", "交付率(計)", "全体に対する交付件数割合(男)", "全体に対する交付件数割合(女)", "全体に対する交付件数割合(計)", "人口算出基準日", "交付枚数算出基準日" ] self._list = [header] + list( map(lambda x: x + [population_ymd, card_ymd], _list[2:])) return self._list
def _convert(self, _list: list) -> list: """ なぜか データの1行目の人口(女)と人口(計)がくっついて閉まっているので分割する '65,269,421 127,443,563 11,249,560' というふうになっている """ fixdata = _list[2][3] if (type(fixdata) is str): _list[2][2] = int(fixdata.split(' ')[0].replace(',', '')) _list[2][3] = int(fixdata.split(' ')[1].replace(',', '')) """ CSVのヘッダが ["年齢","人口(H28.1.1時点)","","","交付件数(H29.5.15時点)","","","交付率","","","全体に対する交付件数割合","",""] ["","男","女","計","男","女","計","男","女","計","男","女","計"] という2段組になってしまっているので、ヘッダを一行にして、(*時点)の部分を抜き出して最終列に加える処理を行う """ population_ymd = StringUtil.extract_date_from_header( self._list[0][1]).strftime('%Y/%m/%d') card_ymd = StringUtil.extract_date_from_header( self._list[0][4]).strftime('%Y/%m/%d') header = [ "年齢", "人口(男)", "人口(女)", "人口(計", "交付件数(男)", "交付件数(女)", "交付件数(計)", "交付率(男)", "交付率(女)", "交付率(計)", "全体に対する交付件数割合(男)", "全体に対する交付件数割合(女)", "全体に対する交付件数割合(計)", "人口算出基準日", "交付件数基準日" ] self._list = [header] + list( map(lambda x: x + [population_ymd, card_ymd], _list[2:])) return self._list
def parse_file(self, fo): parser = parser(self.config[PATHS]['db_path'], self.config[FILES]['db_name']) lines = fo.readlines() data = [] for line in lines: blocks = line.split(',') route = street = sender = info = user_info = phone = postcode = '' if len(blocks) > 0: postcode = blocks[0] # 1 & 2 are internal codes defining parcel type - not used here if len(blocks) > 3: name = StringUtil.titlecase(blocks[3]) if len(blocks) > 4: street = blocks[4].strip() if len(blocks) > 5: phone = blocks[5].strip() if len(sender) > 6: sender = self.expandSender( blocks[6] ) # the company/person sending the package - added to note if len(blocks) > 7: info = blocks[7] if len(blocks) > 8: route = blocks[8][3:6] if len(user_info) > 9: user_info = blocks[9] postcodevalid, postcode = RWAddressParser.validatePostcode( postcode) if not postcodevalid: # TODO some form of error checking and reporting continue #error = 'Error : Route={route}, Name={name}, Postcode={postcode} - Invalid postcode'.format(route=route, name=name, postcode = postcode) # self.comms.emit(error) #continue address = parser.parse(street, postcode, name) if address: notes = sender notes += ' : ' + info rowlist = [] rowlist.append(address.address()) # should be number + street rowlist.append(address.city) # town/city rowlist.append(address.region2) # County/State rowlist.append(postcode) # post code rowlist.append(address.country) # Country rowlist.append("1.0") # Priority rowlist.append(phone) # Phone number rowlist.append(StringUtil.chomp(notes)) rowlist.append(address.lat) rowlist.append(address.lon) data.append(rowlist) self.m_rwdata[route] = data
def appendData(self, list: list, created_at: date): if (len(self._alllist) == 0): self._alllist.extend(self.convert(list, created_at)) else: self._alllist = self.convert(list, created_at) + self._alllist[1:] self._alllist = StringUtil.complement_error_lines( self._alllist, 6, True)
def _convert(self, _list: list) -> list: """ "区分","","人口 (H29.1.1時点)","交付枚数 (H29.8.31時点)","人口に対する交付枚数率" というヘッダになっているので基準日を抜き出して列として追加する """ population_ymd = None card_ymd = None population_date = StringUtil.extract_date_from_header(_list[0][2]) if (population_date is not None): population_ymd = population_date.strftime('%Y/%m/%d') card_date = StringUtil.extract_date_from_header(_list[0][3]) if (card_date is not None): card_ymd = card_date.strftime('%Y/%m/%d') header = ["区分", "人口", "交付枚数", "人口に対する交付枚数率", "人口算出基準日", "交付枚数算出基準日"] data = list(map(lambda x: x + [population_ymd, card_ymd], _list[1:])) self._list = [header] + self.merge_rows(data, 0, 1) return self._list
def _convert(self, _list: list) -> list: """ CSVのヘッダが ["年齢","人口(H28.1.1時点)","","","交付件数(H29.5.15時点)","","","交付率","","","全体に対する交付件数割合","",""] ["","男","女","計","男","女","計","男","女","計","男","女","計"] という2段組になってしまっているので、ヘッダを一行にして、(*時点)の部分を抜き出して最終列に加える処理を行う """ population_ymd = StringUtil.extract_date_from_header( self._list[0][1]).strftime('%Y/%m/%d') card_ymd = StringUtil.extract_date_from_header( self._list[0][4]).strftime('%Y/%m/%d') header = [ "年齢", "人口(男)", "人口(女)", "人口(計)", "交付件数(男)", "交付件数(女)", "交付件数(計)", "交付率(男)", "交付率(女)", "交付率(計)", "全体に対する交付件数割合(男)", "全体に対する交付件数割合(女)", "全体に対する交付件数割合(計)", "人口算出基準日", "交付枚数算出基準日" ] self._list = [header] + list( map(lambda x: x + [population_ymd, card_ymd], _list[2:])) return self._list
def generate_upsert_docs(self, line, fields_define_list): """ parse line to update doc """ i = 0 query_doc = {} update_doc = {} query_field_list = self.update_query.split(MongoLoader.fields_delim) line_list = line.split(self.delimiter) for item in fields_define_list: field = item[0] typeinfo = item[1] formatval = StringUtil.formatvalue(line_list[i], typeinfo) update_doc[field] = formatval if field in query_field_list: query_doc[field] = formatval i += 1 update_doc = {"$set": update_doc} return query_doc, update_doc
exit(1) # load data file with open(DATA_FILE) as f: loaded: dict = json.load(f) # create dir if it does not exist if (not os.path.exists(OUT_DIR)): os.makedirs(OUT_DIR) # delete total files delete_total_csvs(OUT_DIR + "/total") # load csv data for key in loaded.keys(): date = StringUtil.extract_date_from_title(loaded.get(key)) ymd = date.strftime('%Y%m%d') if (not date): print( 'The system could not retrieve date string from the title "{0}" '. format(loaded.get(key))) continue processor = Processor(date) # create processor instance print('Create file for the date {0}'.format(date)) target_dir = RAW_DIR + '/' + key if (not os.path.exists(target_dir)): print('The data for the key {0} does not exists. Skip this key'.format( key)) continue # read all csv file of the raw data
city_id = db.insertCityIntoDB('Brixham', region_id) city_id = db.insertCityIntoDB('Kingswear', region_id) city_id = db.insertCityIntoDB('Dartmouth', region_id) city_id = db.insertCityIntoDB('Dartmouth', region_id) # should be ignored as is a repeat region = g_address.region2 preferred = db.getPreferredRegionFromDB(region, country_id) if preferred != None: region = preferred region_id = db.getRegionIdFromDB(region, country_id) city = g_address.city city_id = db.getCityIdFromDB(city, region_id) street = g_address.street s_address = expand_address(street)[0] # pypostal expands rd to road etc s_address = StringUtil.titlecase(s_address) def_lat = g_address.lat def_lon = g_address.lon street_id = db.insertStreetIntoDB(s_address, city_id, postcode, def_lat, def_lon) lat,lon = db.getDefaultLatLonFromDB(postcode) print(lat) print(lon) db.close()