def collect_google_scores(terms, start, end): logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR) logger.info('Querying %d terms between %s and %s' % (len(terms), start, end)) logger.debug(', '.join(t.term for t in terms)) service = build( 'trends', 'v1beta', developerKey=os.environ["GOOGLE_API_KEY"], discoveryServiceUrl='https://www.googleapis.com/discovery/v1/apis/trends/v1beta/rest', cache_discovery=False ) graph = service.getTimelinesForHealth( terms=[t.term for t in terms], geoRestriction_region='GB-ENG', time_startDate=start.strftime('%Y-%m-%d'), time_endDate=end.strftime("%Y-%m-%d"), timelineResolution='day') try: response = graph.execute() except HttpError as e: logger.exception(e) raise e for line in response['lines']: term = next(t for t in terms if t.term == line['term']) for point in line['points']: day = datetime.strptime(point['date'], "%b %d %Y").date() gs = get_google_score(term, day) gs.value = float(point['value']) yield gs
def calculate_moving_averages(model, day): window_size = model.get_data()['average_window_size'] logger.debug('Calculating %d-day averages on %s' % (window_size, day)) for term in model.google_terms: avg = calculate_moving_average(term, day, window_size) if avg is None: continue yield (term.term, avg)
def shrew(day): logger.debug('Running Shrew over Tweets from %s' % day.strftime('%Y-%m-%d')) cmd = sh.Command('/home/deploy/shrew/shrew') cmd( 'run', '/home/deploy/fludetector/fludetector/sources/twitter/shrew-profile.yaml', '--as', 'local', '--day', day.strftime('%Y%m%d'), '--input0', input0(day), '--input1', input1(day), '--input2', input2(day), '--output', SHREW_OUTPUT_PATH)
def send_to_matlab(model, averages): fd = tempfile.NamedTemporaryFile() fd.write('\n'.join('%s,%f' % a for a in averages)) fd.flush() logger.debug('Sending query list and GoogleScores to fmedia13') scp(fd.name, 'fmedia13:/tmp/fludetector_google_matlab_input') fd.close() fmedia13 = ssh.bake('fmedia13') run = ';'.join([ "fin='/tmp/fludetector_google_matlab_input'", "fout='/tmp/fludetector_google_matlab_output'", "cd /home/vlampos/website_v2", "run('gpml/startup.m')", "%s(fin,fout)" % model.get_data()['matlab_function'], "exit"]) logger.debug('Running matlab function over scores') fmedia13('matlab', '-nodisplay', '-nojvm', '-r', '"%s"' % run) logger.debug('Reading matlab results back') value = float(fmedia13('cat', '/tmp/fludetector_google_matlab_output').strip()) logger.debug('Cleaning up temp files') fmedia13('rm', '/tmp/fludetector_google_matlab_input', '/tmp/fludetector_google_matlab_output') return value
def collect_tweets(day): logger.debug('Collecting Tweets for %s' % day.strftime('%Y-%m-%d')) if not os.path.isfile(input0(day)): raise FluDetectorError("Couldn't find tweets from %s" % input0(day)) if not os.path.isfile(input2(day)): raise FluDetectorError("Couldn't find tweets from %s" % input2(day)) path = input1(day) if not os.path.isfile(path): raise FluDetectorError("Couldn't find tweets from %s" % path) logger.debug(" Decompressing day's Tweets") lzop('--decompress', path, '--output', SINGLE_DAY_PATH, '--force')
def run(model, start, end, csv_file=None, **kwargs): if csv_file is None: raise FluDetectorError('No CSV file provided') logger.info('Reading CSV file into %s' % str(model)) csv_reader = csv.reader(csv_file) headers = next(csv_reader) day_index = find_matching_index(headers, ['Day', 'Date'], required=True) region_index = find_region_index(headers) logger.debug('Found columns for regions %s' % ', '.join(region_index.keys())) logger.info('Reading rows...') for row_index, row in enumerate(csv_reader): day = datetime.strptime(row[day_index], '%Y-%m-%d').date() if day < start or day > end: continue for region, col_index in region_index.iteritems(): try: value = float(row[col_index]) except ValueError: logger.debug('Skipping row %d column %d, not a float' % (row_index + 1, col_index)) continue try: ms = ModelScore.query.filter_by(model_id=model.id, day=day, region=region).one() except NoResultFound: ms = ModelScore() ms.region = region ms.model = model ms.day = day ms.value = value db.session.add(ms) db.session.commit() logger.info('Done!')