def get_stackoverflow_data(self, model_dir, test_size, random_state=None): """Retrieves the stackoverflow dataset and preprocesses this by splitting and tokenizing. (https://storage.googleapis.com/tensorflow-workshop-examples/stack-overflow-data.csv) Args: model_dir: String. Path to where the trained model is saved. test_size: Float. Fraction of the dataset to use for test. random_state: Int. Seed for train/test split. Returns: x_train: List. Training examples. x_test: List. Test exmaples. y_train: List. Encoded labels for training set. y_test: List. Encoded labels for test set. label_encoder: sklearn.preprocessing.LabelEncoder() """ logger = custom_logger.get_logger() if not os.path.exists("/app/data/stack-overflow-data.csv"): logger.info("Downloading stackoverflow data.") pf.utils.fetch_url( "https://storage.googleapis.com/tensorflow-workshop-examples/stack-overflow-data.csv", fetch_dir="/app/data/", ) logger.info("Finished downloading data.") df = pd.read_csv("/app/data/stack-overflow-data.csv") df_train, df_test = train_test_split(df, test_size=test_size, random_state=random_state) label_encoder = preprocessing.LabelEncoder() label_encoder.fit(df["tags"]) np.save(os.path.join(model_dir, "label_encoder.npy"), label_encoder.classes_) y_train = label_encoder.transform(df_train["tags"]) y_test = label_encoder.transform(df_test["tags"]) logger.info("Converting data to {} format...".format( self.model_type.upper())) x_train = self.convert_data(df_train["post"]) x_test = self.convert_data(df_test["post"]) logger.info("Finished converting data to {} format".format( self.model_type.upper())) logger.info("x_train shape: {}".format(x_train.shape)) logger.info("y_train shape: {}".format(y_train.shape)) return x_train, x_test, y_train, y_test, label_encoder
import re, base64 import tensorflow as tf from tensorflow.keras.models import model_from_json from config import MNIST, APP from custom_logger import get_logger LOGGER = get_logger(__name__) def stringToImage(img): imgstr = re.search(r'base64, (.*)', str(img)).group(1) with open(APP.MEDIA / 'image.png', 'wb') as out: out.write(base64.b64decode(imgstr)) def load_model(json_model: str, weights: str): with open(MNIST.SAVED_MODELS / json_model) as f: model = model_from_json(f.read()) model.load_weights(str(MNIST.SAVED_MODELS / weights)) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'], run_eagerly=True) graph = tf.compat.v1.get_default_graph() print(type(graph), type(model)) return graph, model
import sys import multiprocessing as multiproc import custom_logger import datetime import ztorch_simulation as zsim if __name__ == '__main__': start_time = datetime.datetime.utcnow() logger = custom_logger.get_logger('Data_Generation') logger.info('Starting data generation...') num_time_steps = 1000 if len(sys.argv) > 1: num_time_steps = int(sys.argv[1]) # (std, num_vnf_profiles, num_time_steps, output_file_prefix) params = [(0.1, 750, num_time_steps, True), (0.1, 1000, num_time_steps, True), (0.1, 1250, num_time_steps, True), (0.06, 1000, num_time_steps, True), (0.08, 1000, num_time_steps, True), (0.12, 1000, num_time_steps, True)] procs = [] for param in params: proc = multiproc.Process(target=zsim.Simulation, args=param) proc.start()
import custom_logger import datetime import matplotlib.pyplot as plt from scipy.interpolate import interp1d from scipy.stats import pearsonr import numpy as np import ztorch_simulation as zsim if __name__ == '__main__': start_time = datetime.datetime.utcnow() logger = custom_logger.get_logger('Run_Simulations') logger.info('Starting simulations...') num_time_steps = 10000 if len(sys.argv) > 1: num_time_steps = int(sys.argv[1]) on_the_fly = True if num_time_steps is not None and len(sys.argv) > 2: on_the_fly = bool(sys.argv[2]) # (std, num_vnf_profiles, num_time_steps, output_file_prefix, input_file_prefix) params = [ #{ # 'std': 0.50, # 'num_init_profiles': 100,
if attached_file is not None: logger.info(f"attach file: {attached_file}") msg.add_attachment(open(attached_file, "r").read(), filename=os.path.basename(attached_file)) else: logger.info("No attachments") logger.info(f"List emails: {receiver_emails}") for one_receiver in receiver_emails: msg['To'] = one_receiver try: server.sendmail(sender_email, one_receiver, msg.as_string()) except Exception: logger.error(f"ERROR: Can't send email to {one_receiver}:\n" + traceback.format_exc()) server.quit() logger.debug(">>>>send_email.send_mail end") if __name__ == "__main__": import custom_logger program_file = os.path.realpath(__file__) logger = custom_logger.get_logger(program_file=program_file) receiver_emails = SETTINGS.settings['recipient_emails'] subject = "DEBUG: send_email" message = "DEBUG: send_email" # attached_file = None attached_file = logger.handlers[0].baseFilename send_email(receiver_emails, subject, message, logger, attached_file)
#!/usr/bin/env python import subprocess import datetime from custom_logger import get_logger import sys __author__ = 'cenk' logger = get_logger() def hourly_rollup(args): keyspace = args[1] now = datetime.datetime.now() end_time = datetime.datetime(now.year, now.month, now.day, now.hour, 00) end_time = int(end_time.strftime("%s")) start_time = end_time - (60 * 60) logger.debug("End Time: %s, Start Time: %s", end_time, start_time) command = "nohup /data/spark/bin/spark-submit --class net.egemsoft.rrd.Main " \ "--master spark://ipam-ulus-db-2 target/cassandra-spark-rollup-1.0-driver.jar " \ " spMaster=spark://ipam-ulus-db-2:7077 casHost=ipam-ulus-db-2 " \ "casKeyspace=%s casTable=metric rollup=300 start=%s end=%s destRollup=3600 ttl=7776000 &\n" % ( keyspace,start_time, end_time) logger.debug("Command: %s", command) try: p = subprocess.call(command, shell=True, stdout=subprocess.PIPE, cwd="/home/sparkuser/cassandra-spark-rollup") logger.debug(p) except Exception, e: logger.error(e.message)
"albert_large_v2", "albert_xlarge_v2", "albert_xxlarge_v2", ] supported_bert_models = [ "uncased_L-12_H-768_A-12", "uncased_L-24_H-1024_A-16", "cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16", "multi_cased_L-12_H-768_A-12", "wwm_uncased_L-24_H-1024_A-16", "wwm_cased_L-24_H-1024_A-16", ] logger = custom_logger.get_logger() def validate_model(model_name): """Validates the provided model name. Args: model_name: String. Name of the model. See supported models at the top. Returns: model_type: String. Either "albert" or "bert". """ if model_name in supported_albert_models: model_type = "albert" elif model_name in supported_bert_models: model_type = "bert" else:
def get_process_list(): """Return last 10 items sorted dict processes by process.memory_percent()""" process = { p.memory_percent(): p.info for p in psutil.process_iter(['name', 'username']) } if PASSWORDS.DEBUG: logger.debug(f"process:\n{pprint.pformat(process)}") process = dict((sorted(process.items(), reverse=True))[:10]) # print(process) return process if __name__ == "__main__": program_file = os.path.realpath(__file__) logger = get_logger(program_file=program_file) print(f"Log file: {logger.handlers[0].baseFilename}") counter = 1 logger.info(">>>> BEGIN PROBE >>>>") while True: work_done = False memory_utilization = psutil.virtual_memory().percent # float logger.info(memory_utilization) if memory_utilization > 90: logger.info(f">>>> probe #{counter}") process_list = pprint.pformat(get_process_list()) receiver_emails = PASSWORDS.settings['recipient_emails'] subject = "MaxMemoryUtilization" message = f"List processes:\n{process_list}" logger.info(message) # attached_file = None