예제 #1
0
from geopy.geocoders import Nominatim #open street map -- free :)

# import sqlalchemy
from foodbornenyc.models.models import get_db_session
from foodbornenyc.models.download_history import YelpDownloadHistory
from foodbornenyc.models.businesses import Business, YelpCategory
from foodbornenyc.models.businesses import businesses, categories, business_category_table
from foodbornenyc.models.locations import Location, locations, location_id
from foodbornenyc.models.documents import YelpReview, Document
from foodbornenyc.models.documents import yelp_reviews, documents, document_associations

from foodbornenyc.settings import yelp_download_config as config
from foodbornenyc.db_settings import database_config as dbconfig

from foodbornenyc.util.util import get_logger, xstr, xuni, sec_to_hms
logger = get_logger(__name__, level='INFO')

def download_url_to_file(url, data_dir, filename):
    """Download a url to local file.

        Write to file as stream. This keeps a low memory footprint

        Args:
            url (str): the url to download from

            data_dir (str): the local directory to write the file

            filename (str): the name of the file to write

        Returns:
            None
예제 #2
0
Simple Twitter Search based on keywords once every 5 seconds & save to database
"""
from time import time, sleep

from twython import Twython
from twython.exceptions import TwythonError
from sqlalchemy.exc import OperationalError

from foodbornenyc.models.documents import Tweet
from foodbornenyc.db_settings import twitter_config
from foodbornenyc.sources.twitter.util \
    import tweet_to_Tweet, user_to_TwitterUser, place_to_Location,\
           reset_location_cache, twitter, db

from foodbornenyc.util.util import sec_to_hms, get_logger, xuni
logger = get_logger(__name__, level="INFO")


search_terms = [
    '#foodpoisoning',
    '#stomachache',
    '"food poison"',
    '"food poisoning"',
    'stomach',
    'vomit',
    'puke',
    'diarrhea',
    '"the runs"'
]

def search(keywords, since_id=None, count=100):
예제 #3
0
import datetime
from time import time

from sqlalchemy import func, select
from sqlalchemy.exc import OperationalError
from sklearn.externals import joblib


from foodbornenyc.settings import yelp_classify_config as config

from foodbornenyc.models.models import get_db_session
from foodbornenyc.models.documents import YelpReview, documents
#import foodbornenyc.models.businesses

from foodbornenyc.util.util import get_logger, sec_to_hms
logger = get_logger(__name__)

class YelpClassify(object):
    """Method to classify yelp reviews already loaded into the database"""
    def __init__(self, sick_path=None):
        """ Initialize the method object and it's classifier

        Args:
            sick_path (str): if specified, load the classifier from this path
                             else load from config

        Returns:
            None

        """
        if sick_path: