Example #1
0
def collect_tweets(day):
    logger.debug('Collecting Tweets for %s' % day.strftime('%Y-%m-%d'))

    if not os.path.isfile(input0(day)):
        raise FluDetectorError("Couldn't find tweets from %s" % input0(day))

    if not os.path.isfile(input2(day)):
        raise FluDetectorError("Couldn't find tweets from %s" % input2(day))

    path = input1(day)
    if not os.path.isfile(path):
        raise FluDetectorError("Couldn't find tweets from %s" % path)
    logger.debug("  Decompressing day's Tweets")
    lzop('--decompress', path, '--output', SINGLE_DAY_PATH, '--force')
Example #2
0
def find_matching_index(headers, possible, required=False):
    for i, h in enumerate(headers):
        if h in possible:
            return i
    if required:
        e = FluDetectorError('No %s header in CSV file' % ','.join(possible))
        logger.exception(e)
        raise e
Example #3
0
def find_region_index(headers):
    indexes = {}
    for code, name in REGIONS.iteritems():
        index = find_matching_index(headers, [code, name])
        if index:
            indexes[code] = index
    if not indexes:
        e = FluDetectorError('No region headers found')
        logger.exception(e)
        raise e
    return indexes
Example #4
0
def run(model, start, end, csv_file=None, **kwargs):
    if csv_file is None:
        raise FluDetectorError('No CSV file provided')
    logger.info('Reading CSV file into %s' % str(model))
    csv_reader = csv.reader(csv_file)

    headers = next(csv_reader)

    day_index = find_matching_index(headers, ['Day', 'Date'], required=True)
    region_index = find_region_index(headers)

    logger.debug('Found columns for regions %s' %
                 ', '.join(region_index.keys()))

    logger.info('Reading rows...')
    for row_index, row in enumerate(csv_reader):
        day = datetime.strptime(row[day_index], '%Y-%m-%d').date()

        if day < start or day > end:
            continue

        for region, col_index in region_index.iteritems():
            try:
                value = float(row[col_index])
            except ValueError:
                logger.debug('Skipping row %d column %d, not a float' %
                             (row_index + 1, col_index))
                continue
            try:
                ms = ModelScore.query.filter_by(model_id=model.id,
                                                day=day,
                                                region=region).one()
            except NoResultFound:
                ms = ModelScore()
                ms.region = region
                ms.model = model
                ms.day = day
            ms.value = value
            db.session.add(ms)

    db.session.commit()
    logger.info('Done!')