Example #1
0
def test_prepare_csv_read():
    expect(list(prepare_csv_read(open('tests/data/real_file.csv'),
                                 ('type', 'bool', 'string')))) == \
        [{'bool': 'true', 'type': 'file', 'string': 'test'}]
    test_list = ['James,Rowe', 'ell,caro']
    expect(list(prepare_csv_read(test_list, ('first', 'last')))) == \
        [{'last': 'Rowe', 'first': 'James'}, {'last': 'caro', 'first': 'ell'}]
Example #2
0
    def import_locations(self, cells_file):
        """Parse OpenCellID.org data files.

        ``import_locations()`` returns a dictionary with keys containing the
        OpenCellID.org_ database identifier, and values consisting of
        a ``Cell`` objects.

        It expects cell files in the following format::

            22747,52.0438995361328,-0.2246370017529,234,33,2319,647,0,1,
            2008-04-05 21:32:40,2008-04-05 21:32:40
            22995,52.3305015563965,-0.2255620062351,234,10,20566,4068,0,1,
            2008-04-05 21:32:59,2008-04-05 21:32:59
            23008,52.3506011962891,-0.2234109938145,234,10,10566,4068,0,1,
            2008-04-05 21:32:59,2008-04-05 21:32:59

        The above file processed by ``import_locations()`` will return the
        following ``dict`` object::

            {23008: Cell(23008, 52.3506011963, -0.223410993814, 234, 10, 10566,
                         4068, 0, 1, datetime.datetime(2008, 4, 5, 21, 32, 59),
                         datetime.datetime(2008, 4, 5, 21, 32, 59)),
             22747: Cell(22747, 52.0438995361, -0.224637001753, 234, 33, 2319,
                         647, 0, 1, datetime.datetime(2008, 4, 5, 21, 32, 40),
                         datetime.datetime(2008, 4, 5, 21, 32, 40)),
             22995: Cell(22995, 52.3305015564, -0.225562006235, 234, 10, 20566,
                         4068, 0, 1, datetime.datetime(2008, 4, 5, 21, 32, 59),
                         datetime.datetime(2008, 4, 5, 21, 32, 59))}

        :type cells_file: ``file``, ``list`` or ``str``
        :param cells_file: Cell data to read
        :rtype: ``dict``
        :return: Cell data with their associated database identifier

        .. _OpenCellID.org: http://opencellid.org/
        """
        self._cells_file = cells_file
        field_names = ('ident', 'latitude', 'longitude', 'mcc', 'mnc', 'lac',
                       'cellid', 'crange', 'samples', 'created', 'updated')
        parse_date = lambda s: datetime.datetime.strptime(s,
                                                          '%Y-%m-%d %H:%M:%S')
        field_parsers = (int, float, float, int, int, int, int, int, int,
                         parse_date, parse_date)
        data = utils.prepare_csv_read(cells_file, field_names)

        for row in data:
            try:
                cell = dict((n, p(row[n]))
                            for n, p in zip(field_names, field_parsers))
            except ValueError:
                if r"\N" in row.values():
                    # A few entries are incomplete, and when that occurs the
                    # export includes the string "\N" to denote missing
                    # data.  We just ignore them for now
                    logging.debug('Skipping incomplete entry %r' % row)
                    break
                else:
                    raise utils.FileFormatError('opencellid.org')
            else:
                self[row['ident']] = Cell(**cell)
Example #3
0
    def import_locations(self, marker_file):
        """Import trigpoint database files.

        ``import_locations()`` returns a dictionary with keys containing the
        trigpoint identifier, and values that are :class:`Trigpoint` objects.

        It expects trigpoint marker files in the format provided at
        alltrigs-wgs84.txt_, which is the following format::

            H  SOFTWARE NAME & VERSION
            I  GPSU 4.04,
            S SymbolSet=0
            ...
            W,500936,N52.066035,W000.281449,    37.0,Broom Farm
            W,501097,N52.010585,W000.173443,    97.0,Bygrave
            W,505392,N51.910886,W000.186462,   136.0,Sish Lane

        Any line not consisting of 6 comma separated fields will be ignored.
        The reader uses the :mod:`csv` module, so alternative whitespace
        formatting should have no effect.  The above file processed by
        ``import_locations()`` will return the following ``dict`` object::

            {500936: point.Point(52.066035, -0.281449, 37.0, "Broom Farm"),
             501097: point.Point(52.010585, -0.173443, 97.0, "Bygrave"),
             505392: point.Point(51.910886, -0.186462, 136.0, "Sish Lane")}

        :type marker_file: ``file``, ``list`` or ``str``
        :param marker_file: Trigpoint marker data to read
        :rtype: ``dict``
        :return: Named locations with :class:`Trigpoint` objects
        :raise ValueError: Invalid value for ``marker_file``

        .. _alltrigs-wgs84.txt: http://www.haroldstreet.org.uk/trigpoints/
        """
        self._marker_file = marker_file
        field_names = ('tag', 'identity', 'latitude', 'longitude', 'altitude',
                       'name')
        pos_parse = lambda x, s: float(s[1:]) if s[0] == x else 0 - float(s[1:])
        latitude_parse = partial(pos_parse, 'N')
        longitude_parse = partial(pos_parse, 'E')
        # A value of 8888.0 denotes unavailable data
        altitude_parse = lambda s: None if s.strip() == '8888.0' else float(s)
        field_parsers = (str, int, latitude_parse, longitude_parse,
                         altitude_parse, str)

        data = utils.prepare_csv_read(marker_file, field_names)

        for row in (x for x in data if x['tag'] == 'W'):
            for name, parser in zip(field_names, field_parsers):
                row[name] = parser(row[name])
            del row['tag']
            try:
                self[row['identity']] = Trigpoint(**row)
            except TypeError:
                # Workaround formatting error in 506514 entry that contains
                # spurious comma
                del row[None]
                self[row['identity']] = Trigpoint(**row)
Example #4
0
    def import_locations(self, zone_file):
        """Parse zoneinfo zone description data files

        ``import_locations()`` returns a list of :class:`Zone` objects.

        It expects data files in one of the following formats::

            AN	+1211-06900	America/Curacao
            AO	-0848+01314	Africa/Luanda
            AQ	-7750+16636	Antarctica/McMurdo	McMurdo Station, Ross Island

        Files containing the data in this format can be found in the :file:`zone.tab`
        file that is normally found in :file:`/usr/share/zoneinfo` on
        UNIX-like systems, or from the `standard distribution site`_.

        When processed by ``import_locations()`` a ``list`` object of the
        following style will be returned::

            [Zone(None, None, "AN", "America/Curacao", None),
             Zone(None, None, "AO", "Africa/Luanda", None),
             Zone(None, None, "AO", "Antartica/McMurdo",
                  ["McMurdo Station", "Ross Island"])]

        >>> zones = Zones(open("timezones"))
        >>> for value in sorted(zones, key=attrgetter("zone")):
        ...     print(value)
        Africa/Luanda (AO: 08°48'00"S, 013°14'00"E)
        America/Curacao (AN: 12°11'00"N, 069°00'00"W)
        Antarctica/McMurdo (AQ: 77°50'00"S, 166°36'00"E also McMurdo Station,
        Ross Island)

        :type zone_file: ``file``, ``list`` or ``str``
        :param zone_file: ``zone.tab`` data to read
        :rtype: ``list``
        :return: Locations as :class:`Zone` objects
        :raise FileFormatError: Unknown file format

        .. _standard distribution site: ftp://elsie.nci.nih.gov/pub/

        """
        self._zone_file = zone_file
        field_names = ("country", "location", "zone", "comments")

        data = utils.prepare_csv_read(zone_file, field_names, delimiter=r"	")

        for row in (x for x in data if not x['country'].startswith("#")):
            if row['comments']:
                row['comments'] = row['comments'].split(", ")
            self.append(Zone(**row))
Example #5
0
    def import_timezones_file(self, data):
        """Parse geonames.org_ timezone exports.

        ``import_timezones_file()`` returns a dictionary with keys containing
        the timezone identifier, and values consisting of a UTC offset and UTC
        offset during daylight savings time in minutes.

        It expects data files in the following format::

            Europe/Andorra	1.0	2.0
            Asia/Dubai	4.0	4.0
            Asia/Kabul	4.5	4.5

        Files containing the data in this format can be downloaded from the
        geonames site in their `database export page`_

        Files downloaded from the geonames site when processed by
        ``import_timezones_file()`` will return ``dict`` object of the
        following style::

            {"Europe/Andorra": (60, 120),
             "Asia/Dubai": (240, 240),
             "Asia/Kabul": (270, 270)}

        :type data: ``file``, ``list`` or ``str``
        :param data: geonames.org timezones data to read
        :rtype: ``list``
        :return: geonames.org timezone identifiers with their UTC offsets
        :raise FileFormatError: Unknown file format

        .. _geonames.org: http://www.geonames.org/
        .. _database export page: http://download.geonames.org/export/dump/
        """
        self._tzfile = data
        field_names = ("ident", "gmt_offset", "dst_offset")
        time_parse = lambda n: int(float(n) * 60)
        data = utils.prepare_csv_read(data, field_names, delimiter=r"	")

        self.timezones = {}
        for row in data:
            if row["ident"] == "TimeZoneId":
                continue
            try:
                delta = list(map(time_parse, (row["gmt_offset"], row["dst_offset"])))
            except ValueError:
                raise utils.FileFormatError("geonames.org")
            self.timezones[row["ident"]] = delta
Example #6
0
def read_csv(filename):
    """Pull locations from a user's CSV file.

    Read gpsbabel_'s CSV output format

    .. _gpsbabel: http://www.gpsbabel.org/

    :param str filename: CSV file to parse (STDIN if '-')
    :rtype: ``tuple`` of ``dict`` and ``list``
    :return: List of locations as ``str`` objects
    """
    if filename == "-":
        filename = sys.stdin
    field_names = ("latitude", "longitude", "name")
    data = utils.prepare_csv_read(filename, field_names, skipinitialspace=True)
    index = 0
    locations = {}
    args = []
    for row in data:
        index += 1
        name = "%02i:%s" % (index, row["name"])
        locations[name] = (row["latitude"], row["longitude"])
        args.append(name)
    return locations, args
Example #7
0
    def import_timezones_file(self, data):
        """
        Parse geonames.org_ timezone exports

        ``import_timezones_file()`` returns a dictionary with keys containing
        the timezone identifier, and values consisting of a UTC offset and UTC
        offset during daylight savings time in minutes.

        It expects data files in the following format::

            Europe/Andorra	1.0	2.0
            Asia/Dubai	4.0	4.0
            Asia/Kabul	4.5	4.5

        Files containing the data in this format can be downloaded from the
        geonames site in their `database export page`_

        Files downloaded from the geonames site when processed by
        ``import_timezones_file()`` will return ``dict`` object of the following
        style::

            {"Europe/Andorra": (60, 120),
             "Asia/Dubai": (240, 240),
             "Asia/Kabul": (270, 270)}

        >>> timezones = Locations(None, open("geonames_timezones")).timezones
        >>> for key, value in sorted(timezones.items()):
        ...     print("%s - %s" % (key, value))
        Asia/Dubai - [240, 240]
        Asia/Kabul - [270, 270]
        Europe/Andorra - [60, 120]
        >>> header_skip_check = Locations(None,
        ...                               open("geonames_timezones_header"))
        >>> print(header_skip_check) # doctest: +ELLIPSIS
        Locations(None, <open file ...>)
        >>> broken_file_check = Locations(None,
        ...                               open("geonames_timezones_broken"))
        Traceback (most recent call last):
            ...
        FileFormatError: Incorrect data format, if you're using a file
        downloaded from geonames.org please report this to James Rowe
        <*****@*****.**>

        :type data: ``file``, ``list`` or ``str``
        :param data: geonames.org timezones data to read
        :rtype: ``list``
        :return: geonames.org timezone identifiers with their UTC offsets
        :raise FileFormatError: Unknown file format

        .. _geonames.org: http://geonames.org/
        .. _database export page: http://download.geonames.org/export/dump/

        """
        self._tzfile = data
        field_names = ("ident", "gmt_offset", "dst_offset")
        time_parse = lambda n: int(float(n) * 60)
        data = utils.prepare_csv_read(data, field_names, delimiter=r"	")

        self.timezones = {}
        for row in data:
            if row['ident'] == "TimeZoneId":
                continue
            try:
                delta = map(time_parse, (row['gmt_offset'], row['dst_offset']))
            except ValueError:
                raise utils.FileFormatError("geonames.org")
            self.timezones[row['ident']] = delta
Example #8
0
    def import_locations(self, data):
        """
        Parse geonames.org country database exports

        ``import_locations()`` returns a list of :class:`trigpoints.Trigpoint`
        objects generated from the data exported by geonames.org_.

        It expects data files in the following tab separated format::

            2633441	Afon Wyre	Afon Wyre	River Wayrai,River Wyrai,Wyre	52.3166667	-4.1666667	H	STM	GB	GB	00				0		-9999	Europe/London	1994-01-13
            2633442	Wyre	Wyre	Viera	59.1166667	-2.9666667	T	ISL	GB	GB	V9				0		1	Europe/London	2004-09-24
            2633443	Wraysbury	Wraysbury	Wyrardisbury	51.45	-0.55	P	PPL	GB		P9				0		28	Europe/London	2006-08-21

        Files containing the data in this format can be downloaded from the
        geonames.org_ site in their `database export page`_.

        Files downloaded from the geonames site when processed by
        ``import_locations()`` will return ``list`` objects of the following
        style::

            [Location(2633441, "Afon Wyre", "Afon Wyre",
                      ['River Wayrai', 'River Wyrai', 'Wyre'],
                      52.3166667, -4.1666667, "H", "STM", "GB", ['GB'], "00",
                      None, None, None, 0, None, -9999, "Europe/London",
                      datetime.date(1994, 1, 13)),
             Location(2633442, "Wyre", "Wyre", ['Viera'], 59.1166667,
                      -2.9666667, "T", "ISL", "GB", ['GB'], "V9", None, None,
                      None, 0, None, 1, "Europe/London",
                      datetime.date(2004, 9, 24)),
             Location(2633443, "Wraysbury", "Wraysbury", ['Wyrardisbury'],
                      51.45, -0.55, "P", "PPL", "GB", None, "P9", None, None,
                      None, 0, None, 28, "Europe/London",
                      datetime.date(2006, 8, 21))]

        >>> locations = Locations(open("geonames"))
        >>> for location in sorted(locations, key=attrgetter("geonameid")):
        ...     print("%i - %s" % (location.geonameid, location))
        2633441 - Afon Wyre (River Wayrai, River Wyrai, Wyre - N52.317°;
        W004.167°)
        2633442 - Wyre (Viera - N59.117°; W002.967°)
        2633443 - Wraysbury (Wyrardisbury - N51.450°; W000.550°)
        >>> broken_locations = Locations(open("broken_geonames"))
        Traceback (most recent call last):
            ...
        FileFormatError: Incorrect data format, if you're using a file
        downloaded from geonames.org please report this to James Rowe
        <*****@*****.**>

        :type data: ``file``, ``list`` or ``str``
        :param data: geonames.org locations data to read
        :rtype: ``list``
        :return: geonames.org identifiers with :class:`Location` objects
        :raise FileFormatError: Unknown file format

        .. _geonames.org: http://geonames.org/
        .. _database export page: http://download.geonames.org/export/dump/

        """
        self._data = data
        field_names = ("geonameid", "name", "asciiname", "alt_names",
                       "latitude", "longitude", "feature_class", "feature_code",
                       "country", "alt_country", "admin1", "admin2", "admin3",
                       "admin4", "population", "altitude", "gtopo30", "tzname",
                       "modified_date")
        comma_split = lambda s: s.split(",")
        date_parse = lambda s: datetime.date(*map(int, s.split("-")))
        or_none = lambda x, s: x(s) if s else None
        str_or_none = lambda s: or_none(str, s)
        float_or_none = lambda s: or_none(float, s)
        int_or_none = lambda s: or_none(int, s)
        tz_parse = lambda s: self.timezones[s][0] if self.timezones else None
        field_parsers = (int_or_none, str_or_none, str_or_none, comma_split,
                         float_or_none, float_or_none, str_or_none, str_or_none,
                         str_or_none, comma_split, str_or_none, str_or_none,
                         str_or_none, str_or_none, int_or_none, int_or_none,
                         int_or_none, tz_parse, date_parse)
        data = utils.prepare_csv_read(data, field_names, delimiter=r"	")
        for row in data:
            try:
                for name, parser in zip(field_names, field_parsers):
                    row[name] = parser(row[name])
            except ValueError:
                raise utils.FileFormatError("geonames.org")
            self.append(Location(**row))
Example #9
0
def test_prepare_csv_read(data, keys, result):
    assert list(prepare_csv_read(data, keys)) == result
Example #10
0
    def import_locations(self, marker_file):
        """
        Import trigpoint database files

        ``import_locations()`` returns a dictionary with keys containing the
        trigpoint identifier, and values that are :class:`Trigpoint` objects.

        It expects trigpoint marker files in the format provided at
        alltrigs-wgs84.txt_, which is the following format::

            H  SOFTWARE NAME & VERSION
            I  GPSU 4.04,
            S SymbolSet=0
            ...
            W,500936,N52.066035,W000.281449,    37.0,Broom Farm
            W,501097,N52.010585,W000.173443,    97.0,Bygrave
            W,505392,N51.910886,W000.186462,   136.0,Sish Lane

        Any line not consisting of 6 comma separated fields will be ignored.
        The reader uses the :mod:`csv` module, so alternative whitespace
        formatting should have no effect.  The above file processed by
        ``import_locations()`` will return the following ``dict`` object::

            {500936: point.Point(52.066035, -0.281449, 37.0, "Broom Farm"),
             501097: point.Point(52.010585, -0.173443, 97.0, "Bygrave"),
             505392: point.Point(51.910886, -0.186462, 136.0, "Sish Lane")}

        >>> marker_file = open("trigpoints")
        >>> markers = Trigpoints(marker_file)
        >>> for key, value in sorted(markers.items()):
        ...     print("%s - %s" % (key, value))
        500936 - Broom Farm (52°03'57"N, 000°16'53"W alt 37m)
        501097 - Bygrave (52°00'38"N, 000°10'24"W alt 97m)
        505392 - Sish Lane (51°54'39"N, 000°11'11"W alt 136m)
        >>> marker_file.seek(0)
        >>> markers = Trigpoints(marker_file.readlines())
        >>> markers = Trigpoints(open("southern_trigpoints"))
        >>> print(markers[1])
        FakeLand (48°07'23"S, 000°07'23"W alt 12m)
        >>> markers = Trigpoints(open("broken_trigpoints"))
        >>> for key, value in sorted(markers.items()):
        ...     print("%s - %s" % (key, value))
        500968 - Brown Hill Nm  See The Heights (53°38'23"N, 001°39'34"W)
        501414 - Cheriton Hill Nm  See Paddlesworth (51°06'03"N, 001°08'33"E)

        :type marker_file: ``file``, ``list`` or ``str``
        :param marker_file: Trigpoint marker data to read
        :rtype: ``dict``
        :return: Named locations with :class:`Trigpoint` objects
        :raise ValueError: Invalid value for ``marker_file``

        .. _alltrigs-wgs84.txt: http://www.haroldstreet.org.uk/trigpoints.php

        """
        self._marker_file = marker_file
        field_names = ("tag", "identity", "latitude", "longitude", "altitude",
                       "name")
        pos_parse = lambda x, s: float(s[1:]) if s[0] == x else 0 - float(s[1:])
        latitude_parse = partial(pos_parse, "N")
        longitude_parse = partial(pos_parse, "E")
        # A value of 8888.0 denotes unavailable data
        altitude_parse = lambda s: None if s.strip() == "8888.0" else float(s)
        field_parsers = (str, int, latitude_parse, longitude_parse,
                         altitude_parse, str)

        data = utils.prepare_csv_read(marker_file, field_names)

        for row in (x for x in data if x['tag'] == "W"):
            for name, parser in zip(field_names, field_parsers):
                row[name] = parser(row[name])
            del row['tag']
            try:
                self[row['identity']] = Trigpoint(**row)
            except TypeError:
                # Workaround formatting error in 506514 entry that contains
                # spurious comma
                del row[None]
                self[row['identity']] = Trigpoint(**row)