def import_to_datastore(directory, provider_code, batch_size): """ goes through all the files in a given directory, parses and commits them """ try: files_from_lms = dict(get_files(directory)) if len(files_from_lms) == 0: return 0 logging.info("Importing '{}'".format(directory)) started = datetime.now() accidents = list(import_accidents(provider_code=provider_code, **files_from_lms)) db.session.execute(Marker.__table__.insert(), accidents) db.session.commit() involved = list(import_involved(provider_code=provider_code, **files_from_lms)) db.session.execute(Involved.__table__.insert(), involved) db.session.commit() vehicles = list(import_vehicles(provider_code=provider_code, **files_from_lms)) db.session.execute(Vehicle.__table__.insert(), vehicles) db.session.commit() total = len(accidents) + len(involved) + len(vehicles) logging.info("\t{0} items in {1}".format(total, time_delta(started))) return total except ValueError as e: failed_dirs[directory] = e.message return 0
def import_to_datastore(directory, provider_code, batch_size): """ goes through all the files in a given directory, parses and commits them """ try: files_from_lms = dict(get_files(directory)) if len(files_from_lms) == 0: return 0 logging.info("Importing '{}'".format(directory)) started = datetime.now() accidents = list(import_accidents(provider_code=provider_code, **files_from_lms)) new_ids = [m["id"] for m in accidents if 0 == Marker.query.filter(and_(Marker.id == m["id"], Marker.provider_code == m["provider_code"])).count()] if not new_ids: logging.info("\t\tNothing loaded, all accidents already in DB") return 0 db.session.execute(Marker.__table__.insert(), [m for m in accidents if m["id"] in new_ids]) db.session.commit() involved = list(import_involved(provider_code=provider_code, **files_from_lms)) db.session.execute(Involved.__table__.insert(), [i for i in involved if i["accident_id"] in new_ids]) db.session.commit() vehicles = list(import_vehicles(provider_code=provider_code, **files_from_lms)) db.session.execute(Vehicle.__table__.insert(), [v for v in vehicles if v["accident_id"] in new_ids]) db.session.commit() total = len(accidents) + len(involved) + len(vehicles) logging.info("\t{0} items in {1}".format(total, time_delta(started))) return total except ValueError as e: failed_dirs[directory] = e.message return 0
def main(): parser = argparse.ArgumentParser() parser.add_argument('--specific_folder', dest='specific_folder', action='store_true', default=False) parser.add_argument('--delete_all', dest='delete_all', action='store_true', default=True) parser.add_argument('--path', type=str, default="static/data/lms") parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--provider_code', type=int) args = parser.parse_args() if args.specific_folder: if fileDialog: dir_name = tkFileDialog.askdirectory( initialdir=os.path.abspath(args.path), title='Please select a directory') else: dir_name = raw_input('Please provide the directory path: ') dir_list = [dir_name] if args.delete_all: confirm_delete_all = raw_input( "Are you sure you want to delete all the current data? (y/n)\n" ) if confirm_delete_all.lower() == 'n': args.delete_all = False else: dir_list = glob.glob("{0}/*/*".format(args.path)) # wipe all the Markers and Involved data first if args.delete_all: tables = (Vehicle, Involved, Marker) logging.info("Deleting tables: " + ", ".join(table.__name__ for table in tables)) for table in tables: db.session.query(table).delete() db.session.commit() started = datetime.now() total = 0L for directory in dir_list: parent_directory = os.path.basename( os.path.dirname(os.path.join(os.pardir, directory))) provider_code = args.provider_code if args.provider_code else get_provider_code( parent_directory) total += import_to_datastore(directory, provider_code, args.batch_size) delete_invalid_entries() failed = [ "\t'{0}' ({1})".format(directory, fail_reason) for directory, fail_reason in failed_dirs.iteritems() ] logging.info("Finished processing all directories{0}{1}".format( ", except:\n" if failed else "", "\n".join(failed))) logging.info("Total: {0} items in {1}".format(total, time_delta(started)))
def import_to_datastore(directory, provider_code, batch_size): """ goes through all the files in a given directory, parses and commits them """ try: files_from_lms = dict(get_files(directory)) if len(files_from_lms) == 0: return 0 logging.info("Importing '{}'".format(directory)) started = datetime.now() accidents = list( import_accidents(provider_code=provider_code, **files_from_lms)) db.session.execute(Marker.__table__.insert(), accidents) db.session.commit() involved = list( import_involved(provider_code=provider_code, **files_from_lms)) db.session.execute(Involved.__table__.insert(), involved) db.session.commit() vehicles = list( import_vehicles(provider_code=provider_code, **files_from_lms)) db.session.execute(Vehicle.__table__.insert(), vehicles) db.session.commit() total = len(accidents) + len(involved) + len(vehicles) logging.info("\t{0} items in {1}".format(total, time_delta(started))) return total except ValueError as e: failed_dirs[directory] = e.message return 0
def main(): parser = argparse.ArgumentParser() parser.add_argument('--specific_folder', dest='specific_folder', action='store_true', default=False) parser.add_argument('--delete_all', dest='delete_all', action='store_true', default=True) parser.add_argument('--path', type=str, default="static/data/lms") parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--provider_code', type=int) args = parser.parse_args() if args.specific_folder: if fileDialog: dir_name = tkFileDialog.askdirectory(initialdir=os.path.abspath(args.path), title='Please select a directory') else: dir_name = raw_input('Please provide the directory path: ') dir_list = [dir_name] if args.delete_all: confirm_delete_all = raw_input("Are you sure you want to delete all the current data? (y/n)\n") if confirm_delete_all.lower() == 'n': args.delete_all = False else: dir_list = glob.glob("{0}/*/*".format(args.path)) # wipe all the Markers and Involved data first if args.delete_all: tables = (Vehicle, Involved, Marker) logging.info("Deleting tables: " + ", ".join(table.__name__ for table in tables)) for table in tables: db.session.query(table).delete() db.session.commit() started = datetime.now() total = 0L for directory in dir_list: parent_directory = os.path.basename(os.path.dirname(os.path.join(os.pardir, directory))) provider_code = args.provider_code if args.provider_code else get_provider_code(parent_directory) total += import_to_datastore(directory, provider_code, args.batch_size) delete_invalid_entries() failed = ["\t'{0}' ({1})".format(directory, fail_reason) for directory, fail_reason in failed_dirs.iteritems()] logging.info("Finished processing all directories{0}{1}".format(", except:\n" if failed else "", "\n".join(failed))) logging.info("Total: {0} items in {1}".format(total, time_delta(started)))
def import_to_datastore(directory, provider_code, batch_size): """ goes through all the files in a given directory, parses and commits them """ try: files_from_lms = dict(get_files(directory)) if len(files_from_lms) == 0: return 0 logging.info("Importing '{}'".format(directory)) started = datetime.now() accidents = list( import_accidents(provider_code=provider_code, **files_from_lms)) new_ids = [ m["id"] for m in accidents if 0 == Marker.query.filter( and_(Marker.id == m["id"], Marker.provider_code == m["provider_code"])).count() ] if not new_ids: logging.info("\t\tNothing loaded, all accidents already in DB") return 0 db.session.execute(Marker.__table__.insert(), [m for m in accidents if m["id"] in new_ids]) db.session.commit() involved = list( import_involved(provider_code=provider_code, **files_from_lms)) db.session.execute( Involved.__table__.insert(), [i for i in involved if i["accident_id"] in new_ids]) db.session.commit() vehicles = list( import_vehicles(provider_code=provider_code, **files_from_lms)) db.session.execute( Vehicle.__table__.insert(), [v for v in vehicles if v["accident_id"] in new_ids]) db.session.commit() total = len(accidents) + len(involved) + len(vehicles) logging.info("\t{0} items in {1}".format(total, time_delta(started))) return total except ValueError as e: failed_dirs[directory] = e.message return 0
def main(username=None, password=None, lastmail=False): username = username or os.environ.get("MAILUSER") password = password or os.environ.get("MAILPASS") if not username: logging.error("Username not set. Please set env var MAILUSER or use the --username argument") if not password: logging.error("Password not set. Please set env var MAILPASS or use the --password argument") if not username or not password: exit() imapsession = imaplib.IMAP4_SSL("imap.gmail.com") try: imapsession.login(username, password) except imaplib.IMAP4.error: logging.error("Bad credentials, unable to sign in!") exit() try: imapsession.select(mail_dir) typ, data = imapsession.search(None, "ALL") except imaplib.IMAP4.error: logging.error("Error searching given mailbox: %s" % mail_dir) exit() file_found = False listdir = os.listdir(detach_dir) is_empty = len(listdir) <= 1 or not lastmail total = 0 # Iterating over all emails started = datetime.now() logging.info("Login successful! Importing files, please hold...") for msgId in data[0].split(): typ, message_parts = imapsession.fetch(msgId, "(RFC822)") if typ != "OK": logging.error("Error fetching mail.") raise email_body = message_parts[0][1] mail = email.message_from_string(email_body) mtime = datetime.strptime(mail["Date"][:-6], "%a, %d %b %Y %H:%M:%S") if not is_empty: # Accident folder is not empty, we only need the latest if datetime.now() - mtime < timedelta(hours=4): file_found = True else: continue # Handles Gmail bug which hasn't physically removed some of the deleted files mail_date = datetime(2015, 10, 06, 10) if mtime < mail_date: continue for part in mail.walk(): if part.get_content_maintype() == "multipart" or part.get("Content-Disposition") is None: continue filename = part.get_filename() if bool(filename) and filename.endswith(".csv"): filename = "UH-{0}_{1}-{2}.csv".format(mtime.date(), mtime.hour, mtime.minute) filepath = os.path.join(detach_dir, filename) if os.path.isfile(filepath): break total += 1 print "Currently loading: " + filename + " " sys.stdout.write("\033[F") time.sleep(0.1) with open(filepath, "wb") as fp: fp.write(part.get_payload(decode=True)) if file_found: break logging.info("Imported {0} file(s) in {1}".format(total, time_delta(started))) imapsession.close() imapsession.logout()
def main(username=None, password=None, lastmail=False): username = username or os.environ.get('MAILUSER') password = password or os.environ.get('MAILPASS') if not username: logging.error( "Username not set. Please set env var MAILUSER or use the --username argument" ) if not password: logging.error( "Password not set. Please set env var MAILPASS or use the --password argument" ) if not username or not password: exit() imapsession = imaplib.IMAP4_SSL('imap.gmail.com') try: imapsession.login(username, password) except imaplib.IMAP4.error: logging.error('Bad credentials, unable to sign in!') exit() try: imapsession.select(mail_dir) typ, data = imapsession.search(None, 'ALL') except imaplib.IMAP4.error: logging.error('Error searching given mailbox: %s' % mail_dir) exit() file_found = False listdir = os.listdir(detach_dir) is_empty = len(listdir) <= 1 or not lastmail total = 0 # Iterating over all emails started = datetime.now() logging.info("Login successful! Importing files, please hold...") for msgId in data[0].split(): typ, message_parts = imapsession.fetch(msgId, '(RFC822)') if typ != 'OK': logging.error('Error fetching mail.') raise email_body = message_parts[0][1] mail = email.message_from_string(email_body) mtime = datetime.strptime(mail['Date'][:-6], '%a, %d %b %Y %H:%M:%S') if not is_empty: # Accident folder is not empty, we only need the latest if datetime.now() - mtime < timedelta(hours=4): file_found = True else: continue # Handles Gmail bug which hasn't physically removed some of the deleted files mail_date = datetime(2015, 10, 06, 10) if mtime < mail_date: continue for part in mail.walk(): if part.get_content_maintype( ) == 'multipart' or part.get('Content-Disposition') is None: continue filename = part.get_filename() if bool(filename) and filename.endswith(".csv"): filename = 'UH-{0}_{1}-{2}.csv'.format(mtime.date(), mtime.hour, mtime.minute) filepath = os.path.join(detach_dir, filename) if os.path.isfile(filepath): break total += 1 print 'Currently loading: ' + filename + ' ' sys.stdout.write("\033[F") time.sleep(0.1) with open(filepath, 'wb') as fp: fp.write(part.get_payload(decode=True)) if file_found: break logging.info("Imported {0} file(s) in {1}".format(total, time_delta(started))) imapsession.close() imapsession.logout()
continue self.cache[prop] = [response.get('daily').get('data')[0].get(prop)] if __name__ == '__main__': temperature_client = TemperatureClient('https://api.darksky.net/forecast') properties = [ 'time', 'sunriseTime', 'sunsetTime', 'temperatureHigh', 'dewPoint', 'humidity', 'windSpeed', 'cloudCover' ] start_date = starting_date() end_date = start_date + year() # locations = ['USA_AK_FAIRBANKS', 'USA_CA_LOS_ANGELES', 'USA_IL_CHICAGO-OHARE', 'USA_MN_MINNEAPOLIS', 'USA_TX_HOUSTON', 'USA_WA_SEATTLE'] # locations = ['USA_AK_FAIRBANKS','USA_CA_LOS_ANGELES', 'USA_IL_CHICAGO-OHARE', 'USA_MN_MINNEAPOLIS', 'USA_TX_HOUSTON', 'USA_WA_SEATTLE'] locations = [ 'USA_NV_LAS_VEGAS', 'USA_CA_SAN_FRANCISCO', 'USA_AZ_PHOENIX', 'USA_GA_ATLANTA', 'USA_MD_BALTIMORE', 'USA_CO_BOULDER' ] for location in locations: temperature_client.cache = {} coordinates = get_lat_lng(location) current_date = start_date while current_date <= end_date: response = temperature_client.get_data(coordinates + ',' + str(current_date)) temperature_client.parse_data(response, properties) current_date = current_date + time_delta() data = temperature_client.get_cache() temperature_data = pd.DataFrame.from_dict(data) temperature_data.to_csv('data/' + location + '_temperature_usage.csv')