def get_stackoverflow_data(self, model_dir, test_size, random_state=None):
        """Retrieves the stackoverflow dataset and preprocesses this by splitting and tokenizing.
        (https://storage.googleapis.com/tensorflow-workshop-examples/stack-overflow-data.csv)

        Args:
            model_dir: String. Path to where the trained model is saved.
            test_size: Float. Fraction of the dataset to use for test.
            random_state: Int. Seed for train/test split.

        Returns:
            x_train: List. Training examples.
            x_test: List. Test exmaples.
            y_train: List. Encoded labels for training set.
            y_test: List. Encoded labels for test set.
            label_encoder: sklearn.preprocessing.LabelEncoder()
        """
        logger = custom_logger.get_logger()
        if not os.path.exists("/app/data/stack-overflow-data.csv"):
            logger.info("Downloading stackoverflow data.")
            pf.utils.fetch_url(
                "https://storage.googleapis.com/tensorflow-workshop-examples/stack-overflow-data.csv",
                fetch_dir="/app/data/",
            )
            logger.info("Finished downloading data.")
        df = pd.read_csv("/app/data/stack-overflow-data.csv")
        df_train, df_test = train_test_split(df,
                                             test_size=test_size,
                                             random_state=random_state)

        label_encoder = preprocessing.LabelEncoder()
        label_encoder.fit(df["tags"])
        np.save(os.path.join(model_dir, "label_encoder.npy"),
                label_encoder.classes_)

        y_train = label_encoder.transform(df_train["tags"])
        y_test = label_encoder.transform(df_test["tags"])

        logger.info("Converting data to {} format...".format(
            self.model_type.upper()))
        x_train = self.convert_data(df_train["post"])
        x_test = self.convert_data(df_test["post"])
        logger.info("Finished converting data to {} format".format(
            self.model_type.upper()))

        logger.info("x_train shape: {}".format(x_train.shape))
        logger.info("y_train shape: {}".format(y_train.shape))
        return x_train, x_test, y_train, y_test, label_encoder
Beispiel #2
0
import re, base64
import tensorflow as tf
from tensorflow.keras.models import model_from_json
from config import MNIST, APP
from custom_logger import get_logger

LOGGER = get_logger(__name__)


def stringToImage(img):
    imgstr = re.search(r'base64, (.*)', str(img)).group(1)
    with open(APP.MEDIA / 'image.png', 'wb') as out:
        out.write(base64.b64decode(imgstr))


def load_model(json_model: str, weights: str):
    with open(MNIST.SAVED_MODELS / json_model) as f:
        model = model_from_json(f.read())
    model.load_weights(str(MNIST.SAVED_MODELS / weights))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'],
                  run_eagerly=True)
    graph = tf.compat.v1.get_default_graph()
    print(type(graph), type(model))
    return graph, model
Beispiel #3
0
import sys
import multiprocessing as multiproc
import custom_logger
import datetime

import ztorch_simulation as zsim

if __name__ == '__main__':

    start_time = datetime.datetime.utcnow()

    logger = custom_logger.get_logger('Data_Generation')
    logger.info('Starting data generation...')

    num_time_steps = 1000
    if len(sys.argv) > 1:
        num_time_steps = int(sys.argv[1])

    # (std, num_vnf_profiles, num_time_steps, output_file_prefix)
    params = [(0.1, 750, num_time_steps, True),
              (0.1, 1000, num_time_steps, True),
              (0.1, 1250, num_time_steps, True),
              (0.06, 1000, num_time_steps, True),
              (0.08, 1000, num_time_steps, True),
              (0.12, 1000, num_time_steps, True)]

    procs = []

    for param in params:
        proc = multiproc.Process(target=zsim.Simulation, args=param)
        proc.start()
Beispiel #4
0
import custom_logger
import datetime

import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
from scipy.stats import pearsonr

import numpy as np

import ztorch_simulation as zsim

if __name__ == '__main__':

    start_time = datetime.datetime.utcnow()

    logger = custom_logger.get_logger('Run_Simulations')
    logger.info('Starting simulations...')

    num_time_steps = 10000
    if len(sys.argv) > 1:
        num_time_steps = int(sys.argv[1])

    on_the_fly = True
    if num_time_steps is not None and len(sys.argv) > 2:
        on_the_fly = bool(sys.argv[2])

    # (std, num_vnf_profiles, num_time_steps, output_file_prefix, input_file_prefix)
    params = [
        #{
        #    'std': 0.50,
        #    'num_init_profiles': 100,
Beispiel #5
0
    if attached_file is not None:
        logger.info(f"attach file: {attached_file}")
        msg.add_attachment(open(attached_file, "r").read(),
                           filename=os.path.basename(attached_file))
    else:
        logger.info("No attachments")
    logger.info(f"List emails: {receiver_emails}")
    for one_receiver in receiver_emails:
        msg['To'] = one_receiver
        try:
            server.sendmail(sender_email, one_receiver, msg.as_string())
        except Exception:
            logger.error(f"ERROR: Can't send email to {one_receiver}:\n" +
                         traceback.format_exc())
    server.quit()
    logger.debug(">>>>send_email.send_mail end")


if __name__ == "__main__":
    import custom_logger

    program_file = os.path.realpath(__file__)
    logger = custom_logger.get_logger(program_file=program_file)

    receiver_emails = SETTINGS.settings['recipient_emails']
    subject = "DEBUG: send_email"
    message = "DEBUG: send_email"
    # attached_file = None
    attached_file = logger.handlers[0].baseFilename
    send_email(receiver_emails, subject, message, logger, attached_file)
#!/usr/bin/env python
import subprocess
import datetime

from custom_logger import get_logger

import sys
__author__ = 'cenk'

logger = get_logger()


def hourly_rollup(args):
    keyspace = args[1]
    now = datetime.datetime.now()
    end_time = datetime.datetime(now.year, now.month, now.day, now.hour, 00)
    end_time = int(end_time.strftime("%s"))
    start_time = end_time - (60 * 60)
    logger.debug("End Time: %s, Start Time: %s", end_time, start_time)
    command = "nohup /data/spark/bin/spark-submit --class net.egemsoft.rrd.Main  " \
              "--master spark://ipam-ulus-db-2  target/cassandra-spark-rollup-1.0-driver.jar " \
              " spMaster=spark://ipam-ulus-db-2:7077 casHost=ipam-ulus-db-2 " \
              "casKeyspace=%s casTable=metric rollup=300 start=%s end=%s destRollup=3600 ttl=7776000 &\n" % (
                  keyspace,start_time, end_time)
    logger.debug("Command: %s", command)
    try:
        p = subprocess.call(command, shell=True, stdout=subprocess.PIPE, cwd="/home/sparkuser/cassandra-spark-rollup")
        logger.debug(p)
    except Exception, e:
        logger.error(e.message)
Beispiel #7
0
    "albert_large_v2",
    "albert_xlarge_v2",
    "albert_xxlarge_v2",
]

supported_bert_models = [
    "uncased_L-12_H-768_A-12",
    "uncased_L-24_H-1024_A-16",
    "cased_L-12_H-768_A-12",
    "cased_L-24_H-1024_A-16",
    "multi_cased_L-12_H-768_A-12",
    "wwm_uncased_L-24_H-1024_A-16",
    "wwm_cased_L-24_H-1024_A-16",
]

logger = custom_logger.get_logger()


def validate_model(model_name):
    """Validates the provided model name.
    Args:
        model_name: String. Name of the model. See supported models at the top.

    Returns:
        model_type: String. Either "albert" or "bert".
    """
    if model_name in supported_albert_models:
        model_type = "albert"
    elif model_name in supported_bert_models:
        model_type = "bert"
    else:
def get_process_list():
    """Return last 10 items sorted dict processes by process.memory_percent()"""
    process = {
        p.memory_percent(): p.info
        for p in psutil.process_iter(['name', 'username'])
    }
    if PASSWORDS.DEBUG:
        logger.debug(f"process:\n{pprint.pformat(process)}")
    process = dict((sorted(process.items(), reverse=True))[:10])
    # print(process)
    return process


if __name__ == "__main__":
    program_file = os.path.realpath(__file__)
    logger = get_logger(program_file=program_file)
    print(f"Log file: {logger.handlers[0].baseFilename}")
    counter = 1
    logger.info(">>>> BEGIN PROBE >>>>")
    while True:
        work_done = False
        memory_utilization = psutil.virtual_memory().percent  # float
        logger.info(memory_utilization)
        if memory_utilization > 90:
            logger.info(f">>>> probe #{counter}")
            process_list = pprint.pformat(get_process_list())
            receiver_emails = PASSWORDS.settings['recipient_emails']
            subject = "MaxMemoryUtilization"
            message = f"List processes:\n{process_list}"
            logger.info(message)
            # attached_file = None