Beispiel #1
0
import warnings
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split
from skafossdk import *
from helpers.logger import get_logger
from helpers.schema import MODEL_SCHEMA
from helpers.modeling import save_model
from helpers.data import fetch_data

warnings.filterwarnings("ignore")

#

TEST_SIZE = float(os.getenv('TEST_SIZE', 0.2))
log = get_logger('no-show-training')
ska = Skafos()

## Grab data using the Skafos data engine
log.info("Fetching historical appointment data over a 3 month range!")
X, y = fetch_data(engine=ska.engine, location="S3")
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=TEST_SIZE,
                                                    random_state=42)

## Build model on training data
# NOTE: Perform more feature and hyperparameter tuning
log.info("Building a basic random forest classifier with balanced classes")
rf = RandomForestClassifier(class_weight='balanced')
rf.fit(X_train, y_train)
Beispiel #2
0
from skafossdk import *
from social.entity import SocialStatements
from soundcloud.soundcloud_proccessor import SoundcloudProcessor
from helpers.logger import get_logger


# Initialize the skafos sdk
ska = Skafos()

ingest_log = get_logger('user-fetch')

if __name__ == "__main__":
    ingest_log.info('Starting job')

    ingest_log.info('Fetching soundcloud user data')
    entity = SocialStatements(ingest_log, ska.engine) #,ska.engine
    processor = SoundcloudProcessor(entity, ingest_log).fetch()
Beispiel #3
0
# 1. Check S3 for new files
# 2. If any files less than <time> old, run, otherwise sleep
import os
import boto3
import glob
from typing import List, Dict


from generators import gallery
from helpers.logger import get_logger
from datetime import datetime


logger = get_logger("icu_generator")


class C:
    BUCKET = str(os.getenv("BUCKET"))
    UNPROCESSED = str(os.getenv("UNPROCESSED"))
    TEMP = str(os.getenv("TEMP"))
    CACHE = str(os.getenv("CACHE"))
    LIMIT = int(os.getenv("LIMIT"))
    HUGODIR = str(os.getenv("HUGODIR"))


class Album(object):
    def __init__(self, name):
        self.name: str = name
        self.images: List[Image] = []
        self.date: datetime = None
        self.date_pretty: str = ""
Beispiel #4
0
#!/usr/bin/env python3.7

from multiprocessing import Queue

# Import custom subpackages
from config import config
from helpers import logger, generic
from binders import gps_device_binder
from core import recorder, monitor

import os
import sys
import time

# Initialize the logger
logger = logger.get_logger('gps_locator')

if __name__ == '__main__':

    # Clear console
    generic.clear_console()

    logger.info(f'--------------------------------------------------')
    logger.info(f'Main PID: {os.getpid()}')

    # Initialization
    config_file = "./config/config.json"

    # Setup telemetry queue used by the Monitor and Recorder
    q = Queue()
Beispiel #5
0
import time
import warnings
import pandas as pd
from helpers.logger import get_logger
from helpers.modeling import load_latest_model
from helpers.data import normalize_gender, batches, fetch_upcoming, save_predictions
from helpers.schema import PREDICTION_SCHEMA, FEATURES, OUTPUT
from skafossdk import DataSourceType, Skafos

warnings.filterwarnings("ignore")

## Load the most recent noshow model that has been pre-trained and stored on s3
start = time.time()
log = get_logger('no-show-scoring')
ska = Skafos()
log.info("Loading latest pre-trained no-show predictor!")
latest_model = load_latest_model(engine=ska.engine,
                                 keyspace='4d5ba8393483f7a07a2ba4ca')

## Pull in upcoming appts from no_shows keyspace
log.info("Loading upcoming appointments")
upcoming = fetch_upcoming(engine=ska.engine, location="S3")
log.info("Loaded {} upcoming appointments to score".format(len(upcoming)))

## Load data to a pandas dataframe and perform some normalization steps
log.info("Prepping data for scoring")
df = normalize_gender(upcoming)
X = df[FEATURES]

## Score the batch of appointments
log.info("Scoring all appointments")
from dotenv import load_dotenv
from helpers import logger, gsheets, data_cleaning, db_connections, dates
import os
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
from dateutil.relativedelta import relativedelta

# Load .env file
load_dotenv()

# Define Google Spreadsheets target
REPORT_KEY = os.getenv('RFAM_SAMPLE_REPORT')
REPORT_TAB_UPDATE = 'Last Update'
REPORT_TAB_DATA = 'Data'
log_update = logger.get_logger(REPORT_TAB_UPDATE)
log_data = logger.get_logger(REPORT_TAB_DATA)
client_update = gsheets.db_client(log_update)
client_data = gsheets.db_client(log_data)


# Timestamp
def curdate():
    now = dates.current_datetime_jkt().strftime('%Y-%m-%d %H:%M:%S')
    currentdate = pd.DataFrame(pd.Series([now])).transpose()
    # Push data to Google Spreadsheets
    gsheets.save_sheet(client_update, REPORT_KEY, REPORT_TAB_UPDATE,
                       currentdate, 'B1', False)


curdate()
Beispiel #7
0
from skafossdk import *
from social.entity import SocialStatements
from gramlist.gramlist_proccessor import GramlistProcessor
from helpers.logger import get_logger

# Initialize the skafos  sdk
ska = Skafos()

ingest_log = get_logger('gramlist-fetch')

if __name__ == "__main__":
    ingest_log.info('Starting job')

    ingest_log.info('Fetching gramlist user data')
    entity = SocialStatements(ingest_log, ska.engine)  # , ska.engine
    processor = GramlistProcessor(entity, ingest_log).fetch()