def generate_default_logger(): """ Method which generate default logger if logger object not given in argument :return: default logger object """ return BaseMLLogger(log_name='exception_handler', log_file_name='sds-ml-exception-handler')
""" MODULE DOCSTRING THERE!!! """ # TODO write module docstring import gzip import numpy from rdkit import Chem from MLLogger import BaseMLLogger from learner.fingerprints import get_molstring_and_headers from general_helper import NUMPY_PROCESSOR_DTYPES LOGGER = BaseMLLogger(log_name='sds_ml_processor_logger') def sdf_to_csv(infile, fptype, write_sdf=False, find_classes=False, find_values=False, value_name_list=None, class_name_list=None, units_prop=None, cut_off=None, relation_prop=None, stream=None, molecules=None, processing_errors=None): """ This script is designed to simplify data preparation for ML-methods.
from exception_handler import MLExceptionHandler from general_helper import (get_model_info, get_oauth, fetch_token, prepare_prediction_parameters, molecules_from_mol_strings, logging_exception_message, cache_model, molecules_from_smiles, prepare_prediction_files, MODELS_IN_MEMORY_CACHE, clear_models_folder) from learner.algorithms import algorithm_code_by_name from mass_transit.MTMessageProcessor import PureConsumer, PurePublisher from mass_transit.mass_transit_constants import (PREDICT_SINGLE_STRUCTURE, SINGLE_STRUCTURE_PREDICTED) from messages import single_structure_property_predicted from predictor.Predictor import MLPredictor API_MODELS_ENTITIES_URL = os.environ['API_MODELS_ENTITIES_URL'] LOGGER = BaseMLLogger(log_name='logger', log_file_name='sds-ml-single-structure-predictor') os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1' # TODO make it better! move to other place (general_helper.py?) # set hardcoded unitless property units # using to remove units if it unitless UNITLESS = 'Unitless' @MLExceptionHandler(logger=LOGGER) def callback(body): """ Pika callback function used by single structure predictor. Make list of json with prediction data for each model prediction. :param body: RabbitMQ MT message's body """
from learner import algorithms from MLLogger import BaseMLLogger from exception_handler import MLExceptionHandler from general_helper import (logging_exception_message, get_molecules_from_sdf_bytes, numpy_to_csv, get_inchi_key, make_directory) from mass_transit.MTMessageProcessor import PureConsumer, PurePublisher from mass_transit.mass_transit_constants import ( CALCULATE_FEATURE_VECTORS, FEATURE_VECTORS_CALCULATED, FEATURE_VECTORS_CALCULATION_FAILED) from messages import (feature_vectors_calculated_message, feature_vectors_calculation_failed) from processor import sdf_to_csv from structure_featurizer import generate_csv LOGGER = BaseMLLogger(log_name='logger', log_file_name='sds-feature-vector-calculator') os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1' REDIS_CLIENT = redis.StrictRedis(host='redis', db=0) TEMP_FOLDER = os.environ['OSDR_TEMP_FILES_FOLDER'] # make temporary folder if it does not exists make_directory(TEMP_FOLDER) try: EXPIRATION_TIME = int(os.environ['REDIS_EXPIRATION_TIME_SECONDS']) except KeyError: EXPIRATION_TIME = 12 * 60 * 60 # 12 hours LOGGER.error('Max thread number not defined. Set it to 1') @MLExceptionHandler(logger=LOGGER,
import re import matplotlib import numpy as np import pandas import seaborn as sns from keras.callbacks import Callback from sklearn.metrics import r2_score, confusion_matrix, roc_curve, auc from MLLogger import BaseMLLogger matplotlib.use('Agg') import matplotlib.pyplot as plt from scipy import interp LOGGER = BaseMLLogger(log_name='sds_ml_plotter_logger') # plots types names ROC_PLOT = 'roc_plot' CONFUSION_MATRIX = 'confusion_matrix' REGRESSION_RESULT_TEST = 'regression_results_test' REGRESSION_RESULT_TRAIN = 'regression_results_train' RADAR_PLOT = 'radar_plot' THUMBNAIL_IMAGE = 'thumbnail_image' TRAIN_COLOR = 'blue' TEST_COLOR = 'red' VALIDATION_COLOR = 'green' class BatchLogger(Callback): def __init__(self, display): """
from MLLogger import BaseMLLogger from general_helper import make_directory, numpy_to_csv, get_distance from learner.algorithms import CLASSIFIER, REGRESSOR from processor import sdf_to_csv try: BLOB_URL = '{}/blobs'.format(os.environ['OSDR_BLOB_SERVICE_URL']) except KeyError: BLOB_URL = '' TEMP_FOLDER = os.environ['OSDR_TEMP_FILES_FOLDER'] # create temporary folder if it not exist make_directory(TEMP_FOLDER) LOGGER = BaseMLLogger(log_name='predictor_logger', log_file_name='predictor-logger') class MLPredictor(object): def __init__(self, parameters, dataframe=None): """ Creation predictor object from given parameters (user input) and logger (if defined) :param parameters: user input with prediction parameters :param dataframe: prepared dataframe """ self.dataset_file_name = parameters['DatasetFileName'] self.primary_field = parameters['ClassName'] self.fptype = parameters['Fingerprints']
get_oauth, fetch_token, get_multipart_object, post_data_to_blob, get_file_info_from_blob ) from learner.algorithms import ( CODES, DNN_REGRESSOR, DNN_CLASSIFIER, ELASTIC_NETWORK, LOGISTIC_REGRESSION ) from learner.algorithms import NAIVE_BAYES from mass_transit.MTMessageProcessor import MTPublisher from mass_transit.mass_transit_constants import ( MODELER_FAIL_TEST, MODEL_TRAINED_TEST, PREDICTOR_FAIL_TEST, PROPERTIES_PREDICTED_TEST, GENERATE_REPORT_TEST, OPTIMIZE_TRAINING_TEST, OPTIMIZE_TRAINING_FAIL_TEST, PREDICT_SINGLE_STRUCTURE_TEST, FEATURE_VECTORS_CALCULATOR_TEST, FEATURE_VECTORS_CALCULATOR_FAIL_TEST) TEMP_FOLDER = os.environ['OSDR_TEMP_FILES_FOLDER'] LOGGER = BaseMLLogger(log_name='ml_test_logger', log_file_name='ml_test') REDIS_CLIENT = redis.StrictRedis(host='redis', db=0) MODELER_FAIL_FLAG = False CLASSIC_CLASSIFICATION_NAIVE_TRAINED_FLAG = False CLASSIC_REGRESSION_TRAINED_FLAG = False PREDICTOR_FAIL_FLAG = False CLASSIC_CLASSIFICATION_PREDICTED_FLAG = False REGRESSOR_TRAINING_OPTIMIZED = False CLASSIFIER_TRAINING_OPTIMIZED = False NAIVE_BAYES_MODEL_BLOB_ID = None LOGISTIC_REGRESSION_MODEL_BLOB_ID = None CLASSIC_CLASSIFICATION_MODEL_BUCKET = None CLASSIC_CLASSIFICATION_FILES_BLOB_IDS = [] ELASTIC_NETWORK_MODEL_BLOB_ID = None CLASSIC_REGRESSION_FILES_BLOB_IDS = []
make_directory) from learner.algorithms import CLASSIFIER, REGRESSOR from learner.plotters import radar_plot from mass_transit.MTMessageProcessor import PureConsumer, PurePublisher from mass_transit.mass_transit_constants import ( GENERATE_REPORT, REPORT_GENERATED, TRAINING_REPORT_GENERATION_FAILED) from messages import (training_report_generated_message, training_report_generation_failed) from report_helper.TMP_text import (TRAINING_CSV_METRICS, ALL_MODELS_TRAINING_CSV_METRICS) from report_helper.html_render import make_pdf_report os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1' TEMP_FOLDER = os.environ['OSDR_TEMP_FILES_FOLDER'] REDIS_CLIENT = redis.StrictRedis(host='redis', db=0) LOGGER = BaseMLLogger(log_name='logger', log_file_name='sds-ml-training-reporter') OPTIMIZER_FORMATTER = '{:.04f}'.format @MLExceptionHandler(logger=LOGGER, fail_publisher=TRAINING_REPORT_GENERATION_FAILED, fail_message_constructor=training_report_generation_failed) def generate_training_report(body): """ Pika callback function used by training report generator. Make plots files, general metrics csv file and report file if success. :param body: RabbitMQ MT message's body """ oauth = get_oauth()
MODELS_IN_MEMORY_CACHE, clear_models_folder) from mass_transit.MTMessageProcessor import PureConsumer, PurePublisher from mass_transit.mass_transit_constants import (PREDICTION_FAILED, PREDICT_PROPERTIES, PROPERTIES_PREDICTED) from predictor.Predictor import MLPredictor os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1' os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = '1' CLIENT_ID = os.environ['OSDR_ML_MODELER_CLIENT_ID'] TEMP_FOLDER = os.environ['OSDR_TEMP_FILES_FOLDER'] BLOB_URL = '{}/blobs'.format(os.environ['OSDR_BLOB_SERVICE_URL']) BLOB_VERSION_URL = '{}/version'.format(os.environ['OSDR_BLOB_SERVICE_URL']) LOGGER = BaseMLLogger(log_name='logger', log_file_name='sds-ml-predictor-core') LOGGER.info('Checking BLOB service: {}'.format(BLOB_VERSION_URL)) RESPONSE = requests.get(BLOB_VERSION_URL, verify=False) LOGGER.info('BLOB version received: {}'.format(RESPONSE.text)) @MLExceptionHandler(logger=LOGGER, fail_publisher=PREDICTION_FAILED, fail_message_constructor=prediction_failed) def callback(body): """ Pika callback function used by ml predictor. Make file with predicted properties by picked model. Send file to blob storage for OSDR
TRAINER_CLASS) from learner.fingerprints import validate_fingerprints from learner.plotters import radar_plot, distribution_plot, THUMBNAIL_IMAGE from mass_transit.MTMessageProcessor import PurePublisher, PureConsumer from mass_transit.mass_transit_constants import (TRAIN_MODEL, TRAINING_FAILED, MODEL_TRAINED, MODEL_TRAINING_STARTED, MODEL_THUMBNAIL_GENERATED) from messages import (model_trained_message, utc_now_str, model_training_start_message, thumbnail_generated_message, training_failed) from processor import sdf_to_csv from report_helper.TMP_text import MODEL_PDF_REPORT from report_helper.html_render import make_pdf_report LOGGER = BaseMLLogger(log_name='logger', log_file_name='sds-ml-modeler') LOGGER.info('Configuring from environment variables') os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1' CLIENT_ID = os.environ['OSDR_ML_MODELER_CLIENT_ID'] BLOB_URL = '{}/blobs'.format(os.environ['OSDR_BLOB_SERVICE_URL']) BLOB_VERSION_URL = '{}/version'.format(os.environ['OSDR_BLOB_SERVICE_URL']) TEMP_FOLDER = os.environ['OSDR_TEMP_FILES_FOLDER'] LOGGER.info('Configured') LOGGER.info('Checking BLOB service: {}'.format(BLOB_VERSION_URL)) RESPONSE = requests.get(BLOB_VERSION_URL, verify=False) LOGGER.info('BLOB version received: {}'.format(RESPONSE.text)) @MLExceptionHandler(logger=LOGGER,
CLASSIFIER, REGRESSOR, model_type_by_code, NAIVE_BAYES, ELASTIC_NETWORK, TRAINER_CLASS, ALGORITHM, CODES ) from mass_transit.MTMessageProcessor import PureConsumer, PurePublisher from mass_transit.mass_transit_constants import ( OPTIMIZE_TRAINING, TRAINING_OPTMIZATION_FAILED, TRAINING_OPTIMIZED ) from messages import training_optimization_failed, model_training_optimized from processor import sdf_to_csv os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1' BLOB_URL = '{}/blobs'.format(os.environ['OSDR_BLOB_SERVICE_URL']) REDIS_CLIENT = redis.StrictRedis(host='redis', db=0) TEMP_FOLDER = os.environ['OSDR_TEMP_FILES_FOLDER'] LOGGER = BaseMLLogger( log_name='logger', log_file_name='sds-ml-training-optimizer') try: EXPIRATION_TIME = int(os.environ['REDIS_EXPIRATION_TIME_SECONDS']) except KeyError: EXPIRATION_TIME = 12*60*60 # 12 hours LOGGER.error('Max thread number not defined. Set it to 1') OPTIMIZER_FORMATTER = '{:.04f}'.format # set optimizer fingerprints sets # will found optimal set from this list, and use it later for training model # all other sets will be shown on optimizer report and on training report BASE_FINGERPRINTS = [ [ {'Type': 'DESC'}, {'Type': 'AVALON', 'Size': 512}, {'Type': 'ECFP', 'Radius': 3, 'Size': 128}, {'Type': 'FCFC', 'Radius': 2, 'Size': 256} ], [
import keras import numpy import requests import tensorflow from oauthlib.oauth2 import BackendApplicationClient from rdkit import Chem from requests_oauthlib import OAuth2Session from requests_toolbelt import MultipartEncoder from scipy import sparse from sklearn.externals import joblib from MLLogger import BaseMLLogger os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1' LOGGER = BaseMLLogger(log_name='logger', log_file_name='sds-ml-logger') # define modules default global variables CLIENT_ID = None CLIENT_SECRET = None SCOPE = None TOKEN_URL = None BLOB_URL = None TEMP_FOLDER = os.getcwd() OSDR_API_URL = 'https://api.dev.dataledger.io/osdr/v1/api' SCALER_FILENAME = 'scaler.sav' DENSITY_MODEL_FILENAME = 'density_model.sav' DISTANCE_MATRIX_FILENAME = 'distance_matrix.npz' TRAIN_MEAN_FILENAME = 'train_mean.npy' K_MEANS_FILENAME = 'k_means.ksav' MODEL_ADDITIONAL_FILES = [ SCALER_FILENAME, DENSITY_MODEL_FILENAME, TRAIN_MEAN_FILENAME,
""" General methods for MT constants and MT-emulation Python classes """ import json import uuid import pika from MLLogger import BaseMLLogger LOGGER = BaseMLLogger(log_name='mt_library_logger', log_file_name='sds-mt-library') # json to Python types mapping table # key is json type, value is Python type TYPES_TABLE = { 'string': str, 'integer': int, 'decimal': float, 'boolean': bool, 'array': list, 'Guid': uuid.UUID, 'object': dict } class Guid(str): def __init__(self, string_value): try: uuid.UUID(string_value)