Ejemplo n.º 1
0
 def __init__(self):
     self.__aws_access_key = secrets.get("AWS_ACCESS_KEY")
     self.__aws_secret_key = secrets.get("AWS_SECRET_KEY")
     self.__accidents_types = [ACCIDENTS_TYPE_1, ACCIDENTS_TYPE_3]
     self.__s3_resource = None
     self.__s3_bucket = None
     self.__temp_directory = None
     self.__local_files_directory = None
     self.__current_year = None
     self.__download_from_s3_callback = None
Ejemplo n.º 2
0
def scrape(screen_name, latest_tweet_id=None, count=100):
    """
    get all user's recent tweets
    """
    auth = tweepy.OAuthHandler(
        secrets.get("TWITTER_CONSUMER_KEY"), secrets.get("TWITTER_CONSUMER_SECRET")
    )
    auth.set_access_token(secrets.get("TWITTER_ACCESS_KEY"), secrets.get("TWITTER_ACCESS_SECRET"))
    # We use JSONParser to enable testing
    api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

    # fetch the last 100 tweets if there are no tweets in the DB
    if latest_tweet_id is None:
        all_tweets = api.user_timeline(screen_name=screen_name, count=count, tweet_mode="extended")
    else:
        all_tweets = api.user_timeline(
            screen_name=screen_name, count=count, tweet_mode="extended", since_id=latest_tweet_id
        )
        # FIX: why the count param here ^ ?
    for tweet in all_tweets:
        yield parse_tweet(tweet, screen_name)
Ejemplo n.º 3
0
def geocode_extract(location):
    """
    this method takes a string representing location and a google maps key and returns a dict of the corresponding
    location found on google maps (by that string), describing details of the location found and the geometry
    :param location: string representing location
    :return: a dict containing data about the found location on google maps, with the keys: street,
    road_no [road number], intersection, city, address, district and the geometry of the location.
    """
    street = None
    road_no = None
    intersection = None
    subdistrict = None
    city = None
    district = None
    address = None
    geom = {"lat": None, "lng": None}
    try:
        gmaps = googlemaps.Client(key=secrets.get("GOOGLE_MAPS_KEY"))
        geocode_result = gmaps.geocode(location, region="il")
        if geocode_result is None or geocode_result == []:
            return None
        response = geocode_result[0]
        geom = response["geometry"]["location"]
        for item in response["address_components"]:
            if "route" in item["types"]:
                if item["short_name"].isdigit():
                    road_no = int(item["short_name"])
                else:
                    street = item["long_name"]
            elif "point_of_interest" in item["types"] or "intersection" in item["types"]:
                intersection = item["long_name"]
            elif "locality" in item["types"]:
                city = item["long_name"]
            elif "administrative_area_level_2" in item["types"]:
                subdistrict = item["long_name"]
            elif "administrative_area_level_1" in item["types"]:
                district = item["long_name"]
        address = response["formatted_address"]
        if road_no is None and extract_road_number(location) is not None:
            road_no = extract_road_number(location)
    except Exception as _:
        logging.exception("geocode extract location {}".format(location))

    return {
        "street": street,
        "road_no": road_no,
        "intersection": intersection,
        "city": city,
        "address": address,
        "subdistrict": subdistrict,
        "district": district,
        "geom": geom,
    }
Ejemplo n.º 4
0
def geocode_extract(location):
    """
    this method takes a string representing location and a google maps key and returns a dict of the corresponding
    location found on google maps (by that string), describing details of the location found and the geometry
    :param location: string representing location
    :return: a dict containing data about the found location on google maps, with the keys: street,
    road_no [road number], intersection, city, address, district and the geometry of the location.
    """
    street = None
    road_no = None
    intersection = None
    subdistrict = None
    city = None
    district = None
    address = None
    geom = {"lat": None, "lng": None}
    for candidate_location_string in get_candidate_location_strings(location):
        try:
            logging.debug(
                f'using location string: "{candidate_location_string}"')
            gmaps = googlemaps.Client(key=secrets.get("GOOGLE_MAPS_KEY"))
            geocode_result = gmaps.geocode(candidate_location_string,
                                           region="il")

            # if we got no results, move to next iteration of location string
            if not geocode_result:
                logging.warning(
                    f'location string: "{candidate_location_string}" returned no results from gmaps'
                )
                continue

            response = geocode_result[0]
            geom = response["geometry"]["location"]
            for item in response["address_components"]:
                if "route" in item["types"]:
                    if item["short_name"].isdigit():
                        road_no = int(item["short_name"])
                    else:
                        street = item["long_name"]
                elif "point_of_interest" in item[
                        "types"] or "intersection" in item["types"]:
                    intersection = item["long_name"]
                elif "locality" in item["types"]:
                    city = item["long_name"]
                elif "administrative_area_level_2" in item["types"]:
                    subdistrict = item["long_name"]
                elif "administrative_area_level_1" in item["types"]:
                    district = item["long_name"]
            address = response["formatted_address"]
            if road_no is None and extract_road_number(
                    candidate_location_string) is not None:
                road_no = extract_road_number(candidate_location_string)
        except Exception as _:
            logging.exception(
                f'exception caught while extracting geocode location for: "{candidate_location_string}"'
            )

        return {
            "street": street,
            "road_no": road_no,
            "intersection": intersection,
            "city": city,
            "address": address,
            "subdistrict": subdistrict,
            "district": district,
            "geom": geom,
        }

    # we can no longer rectify the location string, log and return None
    logging.exception(f"Failed to extract location for {location}")
    return None
Ejemplo n.º 5
0
 def __init__(self):
     self._username = secrets.get("MAILUSER")
     self._password = secrets.get("MAILPASS")
     self.imap_session = None
Ejemplo n.º 6
0
 def __init__(self):
     self._aws_access_key = secrets.get("AWS_ACCESS_KEY")
     self._aws_secret_key = secrets.get("AWS_SECRET_KEY")
     self._s3_resource = None
     self._s3_bucket = None
     self._client = None
Ejemplo n.º 7
0
def main(detach_dir, username=None, password=None, email_search_start_date=""):
    try:
        username = username or secrets.get("MAILUSER")
        password = password or secrets.get("MAILPASS")
        if not username:
            logging.error(
                "Username not set. Please set env var MAILUSER or use the --username argument"
            )
        if not password:
            logging.error(
                "Password not set. Please set env var MAILPASS or use the --password argument"
            )
        if not username or not password:
            exit()

        imapsession = imaplib.IMAP4_SSL("imap.gmail.com")
        try:
            imapsession.login(username, password)
        except imaplib.IMAP4.error:
            logging.error("Bad credentials, unable to sign in!")
            exit()

        try:
            imapsession.select(mail_dir)
            if email_search_start_date == "":
                typ, data = imapsession.search(None, "ALL")
            else:
                search_start_date = datetime.strptime(
                    email_search_start_date, "%d.%m.%Y").strftime("%d-%b-%Y")
                typ, data = imapsession.search(
                    None, '(SINCE "{0}")'.format(search_start_date))
        except imaplib.IMAP4.error:
            logging.error("Error searching given mailbox: %s" % mail_dir)
            exit()

        file_found = False
        if not os.path.exists(detach_dir):
            os.makedirs(detach_dir)
        total = 0

        # Iterating over all emails
        started = datetime.now()
        logging.info("Login successful! Importing files, please hold...")
        filepath = None
        for msgId in data[0].split():
            typ, message_parts = imapsession.fetch(msgId, "(RFC822)")
            if typ != "OK":
                logging.error("Error fetching mail.")
                raise Exception("Error fetching mail")

            email_body = message_parts[0][1]
            mail = email.message_from_string(email_body)
            try:
                mtime = datetime.strptime(mail["Date"][:-6],
                                          "%a, %d %b %Y %H:%M:%S")
            except ValueError:
                mtime = datetime.strptime(mail["Date"][:-12],
                                          "%a, %d %b %Y %H:%M:%S")

            for part in mail.walk():
                if (part.get_content_maintype() == "multipart"
                        or part.get("Content-Disposition") is None):
                    continue
                filename = part.get_filename()

                if bool(filename) and filename.endswith(".zip"):
                    filename = "{0}-{1}_{2}-{3}.zip".format(
                        "cbs_data", mtime.date(), mtime.hour, mtime.minute)
                    filepath = os.path.join(detach_dir, filename)
                    if os.path.isfile(filepath):
                        break
                    total += 1
                    print("Currently loading: " + filename + "       ")
                    sys.stdout.write("\033[F")
                    time.sleep(0.1)
                    with open(filepath, "wb") as fp:
                        fp.write(part.get_payload(decode=True))
                    file_found = True

            if file_found:
                break

        logging.info("Imported {0} file(s) in {1}".format(
            total, time_delta(started)))
        imapsession.close()
        imapsession.logout()
        return filepath
    except Exception as _:
        pass  # Todo - send an error email to anyway email