import warnings from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score, roc_auc_score from sklearn.model_selection import train_test_split from skafossdk import * from helpers.logger import get_logger from helpers.schema import MODEL_SCHEMA from helpers.modeling import save_model from helpers.data import fetch_data warnings.filterwarnings("ignore") # TEST_SIZE = float(os.getenv('TEST_SIZE', 0.2)) log = get_logger('no-show-training') ska = Skafos() ## Grab data using the Skafos data engine log.info("Fetching historical appointment data over a 3 month range!") X, y = fetch_data(engine=ska.engine, location="S3") X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42) ## Build model on training data # NOTE: Perform more feature and hyperparameter tuning log.info("Building a basic random forest classifier with balanced classes") rf = RandomForestClassifier(class_weight='balanced') rf.fit(X_train, y_train)
from skafossdk import * from social.entity import SocialStatements from soundcloud.soundcloud_proccessor import SoundcloudProcessor from helpers.logger import get_logger # Initialize the skafos sdk ska = Skafos() ingest_log = get_logger('user-fetch') if __name__ == "__main__": ingest_log.info('Starting job') ingest_log.info('Fetching soundcloud user data') entity = SocialStatements(ingest_log, ska.engine) #,ska.engine processor = SoundcloudProcessor(entity, ingest_log).fetch()
# 1. Check S3 for new files # 2. If any files less than <time> old, run, otherwise sleep import os import boto3 import glob from typing import List, Dict from generators import gallery from helpers.logger import get_logger from datetime import datetime logger = get_logger("icu_generator") class C: BUCKET = str(os.getenv("BUCKET")) UNPROCESSED = str(os.getenv("UNPROCESSED")) TEMP = str(os.getenv("TEMP")) CACHE = str(os.getenv("CACHE")) LIMIT = int(os.getenv("LIMIT")) HUGODIR = str(os.getenv("HUGODIR")) class Album(object): def __init__(self, name): self.name: str = name self.images: List[Image] = [] self.date: datetime = None self.date_pretty: str = ""
#!/usr/bin/env python3.7 from multiprocessing import Queue # Import custom subpackages from config import config from helpers import logger, generic from binders import gps_device_binder from core import recorder, monitor import os import sys import time # Initialize the logger logger = logger.get_logger('gps_locator') if __name__ == '__main__': # Clear console generic.clear_console() logger.info(f'--------------------------------------------------') logger.info(f'Main PID: {os.getpid()}') # Initialization config_file = "./config/config.json" # Setup telemetry queue used by the Monitor and Recorder q = Queue()
import time import warnings import pandas as pd from helpers.logger import get_logger from helpers.modeling import load_latest_model from helpers.data import normalize_gender, batches, fetch_upcoming, save_predictions from helpers.schema import PREDICTION_SCHEMA, FEATURES, OUTPUT from skafossdk import DataSourceType, Skafos warnings.filterwarnings("ignore") ## Load the most recent noshow model that has been pre-trained and stored on s3 start = time.time() log = get_logger('no-show-scoring') ska = Skafos() log.info("Loading latest pre-trained no-show predictor!") latest_model = load_latest_model(engine=ska.engine, keyspace='4d5ba8393483f7a07a2ba4ca') ## Pull in upcoming appts from no_shows keyspace log.info("Loading upcoming appointments") upcoming = fetch_upcoming(engine=ska.engine, location="S3") log.info("Loaded {} upcoming appointments to score".format(len(upcoming))) ## Load data to a pandas dataframe and perform some normalization steps log.info("Prepping data for scoring") df = normalize_gender(upcoming) X = df[FEATURES] ## Score the batch of appointments log.info("Scoring all appointments")
from dotenv import load_dotenv from helpers import logger, gsheets, data_cleaning, db_connections, dates import os import pandas as pd from datetime import datetime, timedelta import numpy as np from dateutil.relativedelta import relativedelta # Load .env file load_dotenv() # Define Google Spreadsheets target REPORT_KEY = os.getenv('RFAM_SAMPLE_REPORT') REPORT_TAB_UPDATE = 'Last Update' REPORT_TAB_DATA = 'Data' log_update = logger.get_logger(REPORT_TAB_UPDATE) log_data = logger.get_logger(REPORT_TAB_DATA) client_update = gsheets.db_client(log_update) client_data = gsheets.db_client(log_data) # Timestamp def curdate(): now = dates.current_datetime_jkt().strftime('%Y-%m-%d %H:%M:%S') currentdate = pd.DataFrame(pd.Series([now])).transpose() # Push data to Google Spreadsheets gsheets.save_sheet(client_update, REPORT_KEY, REPORT_TAB_UPDATE, currentdate, 'B1', False) curdate()
from skafossdk import * from social.entity import SocialStatements from gramlist.gramlist_proccessor import GramlistProcessor from helpers.logger import get_logger # Initialize the skafos sdk ska = Skafos() ingest_log = get_logger('gramlist-fetch') if __name__ == "__main__": ingest_log.info('Starting job') ingest_log.info('Fetching gramlist user data') entity = SocialStatements(ingest_log, ska.engine) # , ska.engine processor = GramlistProcessor(entity, ingest_log).fetch()