def _convert(self, _list, created_at: date):
     """"
     ヘッダが
     都道府県名","市区町村名","総数(人口)
     【H28.1.1時点】","交付枚数
     【H29.5.15時点】","交付率"
     となっているので、日付を抜き出して列に追加する
     """
     population_ymd = None
     card_ymd = None
     population_date = StringUtil.extract_date_from_header(_list[0][2])
     if (population_date is not None):
         population_ymd = population_date.strftime('%Y/%m/%d')
     card_date = StringUtil.extract_date_from_header(_list[0][3])
     if (card_date is not None):
         card_ymd = card_date.strftime('%Y/%m/%d')
     else:
         card_ymd = created_at.strftime('%Y/%m/%d')
     header = [
         "都道府県名", "市区町村名", "総数(人口)", "交付枚数", "人口に対する交付枚数率", "人口算出基準日",
         "交付件数基準日"
     ]
     if (_list[1][0] == '全国'):  # remove 全国
         data = list(
             map(lambda x: x + [population_ymd, card_ymd], _list[2:]))
     else:
         data = list(
             map(lambda x: x + [population_ymd, card_ymd], _list[1:]))
     self._list = self._citycode.add_citycode(data, header)
     return self._list
 def _convert(self, _list, created_at: date):
     """"
     ヘッダが
     "都道府県名","総数(人口)
     【H28.1.1時点】","交付枚数
     【H29.5.15時点】","人口に対する
     交付枚数率"
     となっているので、日付を抜き出して列に追加する
     """
     population_ymd = None
     card_ymd = None
     population_date = StringUtil.extract_date_from_header(_list[0][1])
     if (population_date is not None):
         population_ymd = population_date.strftime('%Y/%m/%d')
     card_date = StringUtil.extract_date_from_header(_list[0][2])
     if (card_date is not None):
         card_ymd = card_date.strftime('%Y/%m/%d')
     else:
         card_ymd = created_at.strftime('%Y/%m/%d')
     header = [
         "都道府県名", "総数(人口)", "交付枚数", "人口に対する交付枚数率", "人口算出基準日", "交付枚数算出基準日"
     ]
     data = list(map(lambda x: x + [population_ymd, card_ymd], _list[1:]))
     self._list = [header] + data
     return self._list
 def _convert(self, _list: list) -> list:
     """
     demographics.csvの一部の日付のファイルにおいて、
     人口(計) がNULL、右隣の 交付件数(男) に値が2つ入ってしまっているケースがある
     https://github.com/codeforjapan/mynumbercard_statistics/issues/86
     """
     _list = StringUtil.fix_numberfield_error(_list, 1, 13, [0, 1])
     """
     CSVのヘッダが
     ["年齢","人口(H28.1.1時点)","","","交付件数(H29.5.15時点)","","","交付率","","","全体に対する交付件数割合","",""]
     ["","男","女","計","男","女","計","男","女","計","男","女","計"]
     という2段組になってしまっているので、ヘッダを一行にして、(*時点)の部分を抜き出して最終列に加える処理を行う
     """
     population_ymd = StringUtil.extract_date_from_header(
         self._list[0][1]).strftime('%Y/%m/%d')
     card_ymd = StringUtil.extract_date_from_header(
         self._list[0][4]).strftime('%Y/%m/%d')
     header = [
         "年齢", "人口(男)", "人口(女)", "人口(計)", "交付件数(男)", "交付件数(女)", "交付件数(計)",
         "交付率(男)", "交付率(女)", "交付率(計)", "全体に対する交付件数割合(男)", "全体に対する交付件数割合(女)",
         "全体に対する交付件数割合(計)", "人口算出基準日", "交付枚数算出基準日"
     ]
     self._list = [header] + list(
         map(lambda x: x + [population_ymd, card_ymd], _list[2:]))
     return self._list
 def _convert(self, _list: list, created_at: date) -> list:
     """
     CSVのヘッダが
     ["年齢","人口(H28.1.1時点)","","","交付件数(H29.5.15時点)","","","交付率","","","全体に対する交付件数割合","",""]
     ["","男","女","計","男","女","計","男","女","計","男","女","計"]
     という2段組になってしまっているので、ヘッダを一行にして、(*時点)の部分を抜き出して最終列に加える処理を行う
     ただし、R3年6月から、交付枚数から基準日が消えているので、デフォルト日付を使う
     """
     _list = StringUtil.fix_numberfield_error(_list, 1, 13, [0, 1])
     card_ymd = None
     population_ymd = StringUtil.extract_date_from_header(
         self._list[0][1]).strftime('%Y/%m/%d')
     card_date = StringUtil.extract_date_from_header(_list[0][4])
     if (card_date is not None):
         card_ymd = card_date.strftime('%Y/%m/%d')
     else:
         card_ymd = created_at.strftime('%Y/%m/%d')
     header = [
         "年齢", "人口(男)", "人口(女)", "人口(計)", "交付件数(男)", "交付件数(女)", "交付件数(計)",
         "交付率(男)", "交付率(女)", "交付率(計)", "全体に対する交付件数割合(男)", "全体に対する交付件数割合(女)",
         "全体に対する交付件数割合(計)", "人口算出基準日", "交付枚数算出基準日"
     ]
     self._list = [header] + list(
         map(lambda x: x + [population_ymd, card_ymd], _list[2:]))
     return self._list
    def _convert(self, _list: list) -> list:
        """
    なぜか データの1行目の人口(女)と人口(計)がくっついて閉まっているので分割する
    '65,269,421 127,443,563  11,249,560' というふうになっている
    """
        fixdata = _list[2][3]

        if (type(fixdata) is str):
            _list[2][2] = int(fixdata.split(' ')[0].replace(',', ''))
            _list[2][3] = int(fixdata.split(' ')[1].replace(',', ''))
        """
    CSVのヘッダが
    ["年齢","人口(H28.1.1時点)","","","交付件数(H29.5.15時点)","","","交付率","","","全体に対する交付件数割合","",""]
    ["","男","女","計","男","女","計","男","女","計","男","女","計"]
    という2段組になってしまっているので、ヘッダを一行にして、(*時点)の部分を抜き出して最終列に加える処理を行う
    """
        population_ymd = StringUtil.extract_date_from_header(
            self._list[0][1]).strftime('%Y/%m/%d')
        card_ymd = StringUtil.extract_date_from_header(
            self._list[0][4]).strftime('%Y/%m/%d')
        header = [
            "年齢", "人口(男)", "人口(女)", "人口(計", "交付件数(男)", "交付件数(女)", "交付件数(計)",
            "交付率(男)", "交付率(女)", "交付率(計)", "全体に対する交付件数割合(男)", "全体に対する交付件数割合(女)",
            "全体に対する交付件数割合(計)", "人口算出基準日", "交付件数基準日"
        ]
        self._list = [header] + list(
            map(lambda x: x + [population_ymd, card_ymd], _list[2:]))
        return self._list
Beispiel #6
0
    def parse_file(self, fo):
        parser = parser(self.config[PATHS]['db_path'],
                        self.config[FILES]['db_name'])
        lines = fo.readlines()
        data = []
        for line in lines:
            blocks = line.split(',')

            route = street = sender = info = user_info = phone = postcode = ''
            if len(blocks) > 0: postcode = blocks[0]
            # 1 & 2 are internal codes defining parcel type - not used here
            if len(blocks) > 3: name = StringUtil.titlecase(blocks[3])
            if len(blocks) > 4: street = blocks[4].strip()
            if len(blocks) > 5: phone = blocks[5].strip()
            if len(sender) > 6:
                sender = self.expandSender(
                    blocks[6]
                )  # the company/person sending the package - added to note
            if len(blocks) > 7: info = blocks[7]
            if len(blocks) > 8: route = blocks[8][3:6]
            if len(user_info) > 9: user_info = blocks[9]

            postcodevalid, postcode = RWAddressParser.validatePostcode(
                postcode)
            if not postcodevalid:
                # TODO some form of error checking and reporting
                continue
                #error = 'Error : Route={route}, Name={name}, Postcode={postcode} - Invalid postcode'.format(route=route, name=name, postcode = postcode)
            # self.comms.emit(error)
            #continue

            address = parser.parse(street, postcode, name)
            if address:
                notes = sender
                notes += ' : ' + info

                rowlist = []
                rowlist.append(address.address())  # should be number + street
                rowlist.append(address.city)  # town/city
                rowlist.append(address.region2)  # County/State
                rowlist.append(postcode)  # post code
                rowlist.append(address.country)  # Country
                rowlist.append("1.0")  # Priority
                rowlist.append(phone)  # Phone number
                rowlist.append(StringUtil.chomp(notes))
                rowlist.append(address.lat)
                rowlist.append(address.lon)
                data.append(rowlist)
        self.m_rwdata[route] = data
 def appendData(self, list: list, created_at: date):
     if (len(self._alllist) == 0):
         self._alllist.extend(self.convert(list, created_at))
     else:
         self._alllist = self.convert(list, created_at) + self._alllist[1:]
     self._alllist = StringUtil.complement_error_lines(
         self._alllist, 6, True)
 def _convert(self, _list: list) -> list:
     """
     "区分","","人口
     (H29.1.1時点)","交付枚数
     (H29.8.31時点)","人口に対する交付枚数率"
     というヘッダになっているので基準日を抜き出して列として追加する
     """
     population_ymd = None
     card_ymd = None
     population_date = StringUtil.extract_date_from_header(_list[0][2])
     if (population_date is not None):
         population_ymd = population_date.strftime('%Y/%m/%d')
     card_date = StringUtil.extract_date_from_header(_list[0][3])
     if (card_date is not None):
         card_ymd = card_date.strftime('%Y/%m/%d')
     header = ["区分", "人口", "交付枚数", "人口に対する交付枚数率", "人口算出基準日", "交付枚数算出基準日"]
     data = list(map(lambda x: x + [population_ymd, card_ymd], _list[1:]))
     self._list = [header] + self.merge_rows(data, 0, 1)
     return self._list
 def _convert(self, _list: list) -> list:
     """
     CSVのヘッダが
     ["年齢","人口(H28.1.1時点)","","","交付件数(H29.5.15時点)","","","交付率","","","全体に対する交付件数割合","",""]
     ["","男","女","計","男","女","計","男","女","計","男","女","計"]
     という2段組になってしまっているので、ヘッダを一行にして、(*時点)の部分を抜き出して最終列に加える処理を行う
     """
     population_ymd = StringUtil.extract_date_from_header(
         self._list[0][1]).strftime('%Y/%m/%d')
     card_ymd = StringUtil.extract_date_from_header(
         self._list[0][4]).strftime('%Y/%m/%d')
     header = [
         "年齢", "人口(男)", "人口(女)", "人口(計)", "交付件数(男)", "交付件数(女)", "交付件数(計)",
         "交付率(男)", "交付率(女)", "交付率(計)", "全体に対する交付件数割合(男)", "全体に対する交付件数割合(女)",
         "全体に対する交付件数割合(計)", "人口算出基準日", "交付枚数算出基準日"
     ]
     self._list = [header] + list(
         map(lambda x: x + [population_ymd, card_ymd], _list[2:]))
     return self._list
 def generate_upsert_docs(self, line, fields_define_list):
     """
     parse line to update doc
     """
     i = 0
     query_doc = {}
     update_doc = {}
     query_field_list = self.update_query.split(MongoLoader.fields_delim)
     line_list = line.split(self.delimiter)
     for item in fields_define_list:
         field = item[0]
         typeinfo = item[1]
         formatval = StringUtil.formatvalue(line_list[i], typeinfo)
         update_doc[field] = formatval
         if field in query_field_list:
             query_doc[field] = formatval
         i += 1
     update_doc = {"$set": update_doc}
     return query_doc, update_doc
Beispiel #11
0
 def generate_upsert_docs(self, line, fields_define_list):
     """
     parse line to update doc
     """
     i = 0
     query_doc = {}
     update_doc = {}
     query_field_list = self.update_query.split(MongoLoader.fields_delim)
     line_list = line.split(self.delimiter)
     for item in fields_define_list:
         field = item[0]
         typeinfo = item[1]
         formatval = StringUtil.formatvalue(line_list[i], typeinfo)
         update_doc[field] = formatval
         if field in query_field_list:
             query_doc[field] = formatval
         i += 1
     update_doc = {"$set": update_doc}
     return query_doc, update_doc
Beispiel #12
0
    exit(1)

# load data file
with open(DATA_FILE) as f:
    loaded: dict = json.load(f)

# create dir if it does not exist
if (not os.path.exists(OUT_DIR)):
    os.makedirs(OUT_DIR)

# delete total files
delete_total_csvs(OUT_DIR + "/total")

# load csv data
for key in loaded.keys():
    date = StringUtil.extract_date_from_title(loaded.get(key))
    ymd = date.strftime('%Y%m%d')
    if (not date):
        print(
            'The system could not retrieve date string from the title "{0}" '.
            format(loaded.get(key)))
        continue
    processor = Processor(date)  # create processor instance
    print('Create file for the date {0}'.format(date))
    target_dir = RAW_DIR + '/' + key
    if (not os.path.exists(target_dir)):
        print('The data for the key {0} does not exists. Skip this key'.format(
            key))
        continue

    # read all csv file of the raw data
Beispiel #13
0
    city_id = db.insertCityIntoDB('Brixham', region_id)
    city_id = db.insertCityIntoDB('Kingswear', region_id)
    city_id = db.insertCityIntoDB('Dartmouth', region_id)

    city_id = db.insertCityIntoDB('Dartmouth', region_id) # should be ignored as is a repeat

    region = g_address.region2
    preferred = db.getPreferredRegionFromDB(region, country_id)
    if preferred != None:
        region = preferred
    region_id = db.getRegionIdFromDB(region, country_id)
    city = g_address.city
    city_id = db.getCityIdFromDB(city, region_id)
    street = g_address.street
    s_address = expand_address(street)[0] # pypostal expands rd to road etc
    s_address = StringUtil.titlecase(s_address)
    def_lat = g_address.lat
    def_lon = g_address.lon

    street_id = db.insertStreetIntoDB(s_address, city_id, postcode, def_lat, def_lon)

    lat,lon = db.getDefaultLatLonFromDB(postcode)
    print(lat)
    print(lon)

    db.close()