コード例 #1
0
ファイル: importer.py プロジェクト: MarkBennett/ichnaea
def load_file(session, source_file, batch_size=10000):
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)
    utcmin = utcnow - datetime.timedelta(120)

    with open(source_file, 'r') as fd:
        reader = csv.reader(fd, delimiter='\t', quotechar=None)
        counter = 0

        for fields in reader:
            try:
                time = int(fields[0])
                if time == 0:  # pragma: no cover
                    # unknown time gets an old date
                    time = utcmin
                else:
                    # convert from unixtime to utc
                    time = datetime.datetime.utcfromtimestamp(time)

                key = normalize_wifi_key(str(fields[5]))
                if key == '000000000000':  # pragma: no cover
                    continue

                lat = fields[1]
                lon = fields[2]
                signal = int(fields[3])
                channel = int(fields[4])
                wifi = dict(
                    key=key,
                    channel=channel,
                    signal=signal,
                )
                data = dict(
                    lat=lat,
                    lon=lon,
                    time=time,
                    accuracy=0,
                    altitude=0,
                    altitude_accuracy=0,
                    radio='',
                    cell=(),
                    wifi=[wifi],
                )
            except (ValueError, IndexError):
                continue
            # side effect, schedules async tasks
            process_measure(data, utcnow, session)

            # flush every 1000 new records
            counter += 1
            if counter % batch_size == 0:
                session.flush()
                print('Added %s records.' % counter)

    # add the rest
    session.flush()
    return counter
コード例 #2
0
ファイル: importer.py プロジェクト: MarkBennett/ichnaea
def load_file(session, source_file, batch_size=10000):
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)
    utcmin = utcnow - datetime.timedelta(120)

    with open(source_file, 'r') as fd:
        reader = csv.reader(fd, delimiter='\t', quotechar=None)
        counter = 0

        for fields in reader:
            try:
                time = int(fields[0])
                if time == 0:  # pragma: no cover
                    # unknown time gets an old date
                    time = utcmin
                else:
                    # convert from unixtime to utc
                    time = datetime.datetime.utcfromtimestamp(time)

                key = normalize_wifi_key(str(fields[5]))
                if key == '000000000000':  # pragma: no cover
                    continue

                lat = fields[1]
                lon = fields[2]
                signal = int(fields[3])
                channel = int(fields[4])
                wifi = dict(
                    key=key,
                    channel=channel,
                    signal=signal,
                )
                data = dict(
                    lat=lat,
                    lon=lon,
                    time=time,
                    accuracy=0,
                    altitude=0,
                    altitude_accuracy=0,
                    radio='',
                    cell=(),
                    wifi=[wifi],
                )
            except (ValueError, IndexError):
                continue
            # side effect, schedules async tasks
            process_measure(data, utcnow, session)

            # flush every 1000 new records
            counter += 1
            if counter % batch_size == 0:
                session.flush()
                print('Added %s records.' % counter)

    # add the rest
    session.flush()
    return counter