def __init__(self): self.__aws_access_key = secrets.get("AWS_ACCESS_KEY") self.__aws_secret_key = secrets.get("AWS_SECRET_KEY") self.__accidents_types = [ACCIDENTS_TYPE_1, ACCIDENTS_TYPE_3] self.__s3_resource = None self.__s3_bucket = None self.__temp_directory = None self.__local_files_directory = None self.__current_year = None self.__download_from_s3_callback = None
def scrape(screen_name, latest_tweet_id=None, count=100): """ get all user's recent tweets """ auth = tweepy.OAuthHandler( secrets.get("TWITTER_CONSUMER_KEY"), secrets.get("TWITTER_CONSUMER_SECRET") ) auth.set_access_token(secrets.get("TWITTER_ACCESS_KEY"), secrets.get("TWITTER_ACCESS_SECRET")) # We use JSONParser to enable testing api = tweepy.API(auth, parser=tweepy.parsers.JSONParser()) # fetch the last 100 tweets if there are no tweets in the DB if latest_tweet_id is None: all_tweets = api.user_timeline(screen_name=screen_name, count=count, tweet_mode="extended") else: all_tweets = api.user_timeline( screen_name=screen_name, count=count, tweet_mode="extended", since_id=latest_tweet_id ) # FIX: why the count param here ^ ? for tweet in all_tweets: yield parse_tweet(tweet, screen_name)
def geocode_extract(location): """ this method takes a string representing location and a google maps key and returns a dict of the corresponding location found on google maps (by that string), describing details of the location found and the geometry :param location: string representing location :return: a dict containing data about the found location on google maps, with the keys: street, road_no [road number], intersection, city, address, district and the geometry of the location. """ street = None road_no = None intersection = None subdistrict = None city = None district = None address = None geom = {"lat": None, "lng": None} try: gmaps = googlemaps.Client(key=secrets.get("GOOGLE_MAPS_KEY")) geocode_result = gmaps.geocode(location, region="il") if geocode_result is None or geocode_result == []: return None response = geocode_result[0] geom = response["geometry"]["location"] for item in response["address_components"]: if "route" in item["types"]: if item["short_name"].isdigit(): road_no = int(item["short_name"]) else: street = item["long_name"] elif "point_of_interest" in item["types"] or "intersection" in item["types"]: intersection = item["long_name"] elif "locality" in item["types"]: city = item["long_name"] elif "administrative_area_level_2" in item["types"]: subdistrict = item["long_name"] elif "administrative_area_level_1" in item["types"]: district = item["long_name"] address = response["formatted_address"] if road_no is None and extract_road_number(location) is not None: road_no = extract_road_number(location) except Exception as _: logging.exception("geocode extract location {}".format(location)) return { "street": street, "road_no": road_no, "intersection": intersection, "city": city, "address": address, "subdistrict": subdistrict, "district": district, "geom": geom, }
def geocode_extract(location): """ this method takes a string representing location and a google maps key and returns a dict of the corresponding location found on google maps (by that string), describing details of the location found and the geometry :param location: string representing location :return: a dict containing data about the found location on google maps, with the keys: street, road_no [road number], intersection, city, address, district and the geometry of the location. """ street = None road_no = None intersection = None subdistrict = None city = None district = None address = None geom = {"lat": None, "lng": None} for candidate_location_string in get_candidate_location_strings(location): try: logging.debug( f'using location string: "{candidate_location_string}"') gmaps = googlemaps.Client(key=secrets.get("GOOGLE_MAPS_KEY")) geocode_result = gmaps.geocode(candidate_location_string, region="il") # if we got no results, move to next iteration of location string if not geocode_result: logging.warning( f'location string: "{candidate_location_string}" returned no results from gmaps' ) continue response = geocode_result[0] geom = response["geometry"]["location"] for item in response["address_components"]: if "route" in item["types"]: if item["short_name"].isdigit(): road_no = int(item["short_name"]) else: street = item["long_name"] elif "point_of_interest" in item[ "types"] or "intersection" in item["types"]: intersection = item["long_name"] elif "locality" in item["types"]: city = item["long_name"] elif "administrative_area_level_2" in item["types"]: subdistrict = item["long_name"] elif "administrative_area_level_1" in item["types"]: district = item["long_name"] address = response["formatted_address"] if road_no is None and extract_road_number( candidate_location_string) is not None: road_no = extract_road_number(candidate_location_string) except Exception as _: logging.exception( f'exception caught while extracting geocode location for: "{candidate_location_string}"' ) return { "street": street, "road_no": road_no, "intersection": intersection, "city": city, "address": address, "subdistrict": subdistrict, "district": district, "geom": geom, } # we can no longer rectify the location string, log and return None logging.exception(f"Failed to extract location for {location}") return None
def __init__(self): self._username = secrets.get("MAILUSER") self._password = secrets.get("MAILPASS") self.imap_session = None
def __init__(self): self._aws_access_key = secrets.get("AWS_ACCESS_KEY") self._aws_secret_key = secrets.get("AWS_SECRET_KEY") self._s3_resource = None self._s3_bucket = None self._client = None
def main(detach_dir, username=None, password=None, email_search_start_date=""): try: username = username or secrets.get("MAILUSER") password = password or secrets.get("MAILPASS") if not username: logging.error( "Username not set. Please set env var MAILUSER or use the --username argument" ) if not password: logging.error( "Password not set. Please set env var MAILPASS or use the --password argument" ) if not username or not password: exit() imapsession = imaplib.IMAP4_SSL("imap.gmail.com") try: imapsession.login(username, password) except imaplib.IMAP4.error: logging.error("Bad credentials, unable to sign in!") exit() try: imapsession.select(mail_dir) if email_search_start_date == "": typ, data = imapsession.search(None, "ALL") else: search_start_date = datetime.strptime( email_search_start_date, "%d.%m.%Y").strftime("%d-%b-%Y") typ, data = imapsession.search( None, '(SINCE "{0}")'.format(search_start_date)) except imaplib.IMAP4.error: logging.error("Error searching given mailbox: %s" % mail_dir) exit() file_found = False if not os.path.exists(detach_dir): os.makedirs(detach_dir) total = 0 # Iterating over all emails started = datetime.now() logging.info("Login successful! Importing files, please hold...") filepath = None for msgId in data[0].split(): typ, message_parts = imapsession.fetch(msgId, "(RFC822)") if typ != "OK": logging.error("Error fetching mail.") raise Exception("Error fetching mail") email_body = message_parts[0][1] mail = email.message_from_string(email_body) try: mtime = datetime.strptime(mail["Date"][:-6], "%a, %d %b %Y %H:%M:%S") except ValueError: mtime = datetime.strptime(mail["Date"][:-12], "%a, %d %b %Y %H:%M:%S") for part in mail.walk(): if (part.get_content_maintype() == "multipart" or part.get("Content-Disposition") is None): continue filename = part.get_filename() if bool(filename) and filename.endswith(".zip"): filename = "{0}-{1}_{2}-{3}.zip".format( "cbs_data", mtime.date(), mtime.hour, mtime.minute) filepath = os.path.join(detach_dir, filename) if os.path.isfile(filepath): break total += 1 print("Currently loading: " + filename + " ") sys.stdout.write("\033[F") time.sleep(0.1) with open(filepath, "wb") as fp: fp.write(part.get_payload(decode=True)) file_found = True if file_found: break logging.info("Imported {0} file(s) in {1}".format( total, time_delta(started))) imapsession.close() imapsession.logout() return filepath except Exception as _: pass # Todo - send an error email to anyway email