def generate_default_logger():
    """
    Method which generate default logger if logger object not given in argument

    :return: default logger object
    """

    return BaseMLLogger(log_name='exception_handler',
                        log_file_name='sds-ml-exception-handler')
Beispiel #2
0
"""
MODULE DOCSTRING THERE!!!
"""
# TODO write module docstring
import gzip

import numpy
from rdkit import Chem

from MLLogger import BaseMLLogger
from learner.fingerprints import get_molstring_and_headers
from general_helper import NUMPY_PROCESSOR_DTYPES

LOGGER = BaseMLLogger(log_name='sds_ml_processor_logger')


def sdf_to_csv(infile,
               fptype,
               write_sdf=False,
               find_classes=False,
               find_values=False,
               value_name_list=None,
               class_name_list=None,
               units_prop=None,
               cut_off=None,
               relation_prop=None,
               stream=None,
               molecules=None,
               processing_errors=None):
    """
    This script is designed to simplify data preparation for ML-methods.
Beispiel #3
0
from exception_handler import MLExceptionHandler
from general_helper import (get_model_info, get_oauth, fetch_token,
                            prepare_prediction_parameters,
                            molecules_from_mol_strings,
                            logging_exception_message, cache_model,
                            molecules_from_smiles, prepare_prediction_files,
                            MODELS_IN_MEMORY_CACHE, clear_models_folder)
from learner.algorithms import algorithm_code_by_name
from mass_transit.MTMessageProcessor import PureConsumer, PurePublisher
from mass_transit.mass_transit_constants import (PREDICT_SINGLE_STRUCTURE,
                                                 SINGLE_STRUCTURE_PREDICTED)
from messages import single_structure_property_predicted
from predictor.Predictor import MLPredictor

API_MODELS_ENTITIES_URL = os.environ['API_MODELS_ENTITIES_URL']
LOGGER = BaseMLLogger(log_name='logger',
                      log_file_name='sds-ml-single-structure-predictor')
os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
# TODO make it better! move to other place (general_helper.py?)
# set hardcoded unitless property units
# using to remove units if it unitless
UNITLESS = 'Unitless'


@MLExceptionHandler(logger=LOGGER)
def callback(body):
    """
    Pika callback function used by single structure predictor.
    Make list of json with prediction data for each model prediction.

    :param body: RabbitMQ MT message's body
    """
Beispiel #4
0
from learner import algorithms
from MLLogger import BaseMLLogger
from exception_handler import MLExceptionHandler
from general_helper import (logging_exception_message,
                            get_molecules_from_sdf_bytes, numpy_to_csv,
                            get_inchi_key, make_directory)
from mass_transit.MTMessageProcessor import PureConsumer, PurePublisher
from mass_transit.mass_transit_constants import (
    CALCULATE_FEATURE_VECTORS, FEATURE_VECTORS_CALCULATED,
    FEATURE_VECTORS_CALCULATION_FAILED)
from messages import (feature_vectors_calculated_message,
                      feature_vectors_calculation_failed)
from processor import sdf_to_csv
from structure_featurizer import generate_csv

LOGGER = BaseMLLogger(log_name='logger',
                      log_file_name='sds-feature-vector-calculator')

os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
REDIS_CLIENT = redis.StrictRedis(host='redis', db=0)
TEMP_FOLDER = os.environ['OSDR_TEMP_FILES_FOLDER']
# make temporary folder if it does not exists
make_directory(TEMP_FOLDER)

try:
    EXPIRATION_TIME = int(os.environ['REDIS_EXPIRATION_TIME_SECONDS'])
except KeyError:
    EXPIRATION_TIME = 12 * 60 * 60  # 12 hours
    LOGGER.error('Max thread number not defined. Set it to 1')


@MLExceptionHandler(logger=LOGGER,
Beispiel #5
0
import re

import matplotlib
import numpy as np
import pandas
import seaborn as sns
from keras.callbacks import Callback
from sklearn.metrics import r2_score, confusion_matrix, roc_curve, auc

from MLLogger import BaseMLLogger

matplotlib.use('Agg')
import matplotlib.pyplot as plt
from scipy import interp

LOGGER = BaseMLLogger(log_name='sds_ml_plotter_logger')
# plots types names
ROC_PLOT = 'roc_plot'
CONFUSION_MATRIX = 'confusion_matrix'
REGRESSION_RESULT_TEST = 'regression_results_test'
REGRESSION_RESULT_TRAIN = 'regression_results_train'
RADAR_PLOT = 'radar_plot'
THUMBNAIL_IMAGE = 'thumbnail_image'
TRAIN_COLOR = 'blue'
TEST_COLOR = 'red'
VALIDATION_COLOR = 'green'


class BatchLogger(Callback):
    def __init__(self, display):
        """
Beispiel #6
0
from MLLogger import BaseMLLogger
from general_helper import make_directory, numpy_to_csv, get_distance
from learner.algorithms import CLASSIFIER, REGRESSOR

from processor import sdf_to_csv

try:
    BLOB_URL = '{}/blobs'.format(os.environ['OSDR_BLOB_SERVICE_URL'])
except KeyError:
    BLOB_URL = ''

TEMP_FOLDER = os.environ['OSDR_TEMP_FILES_FOLDER']
# create temporary folder if it not exist
make_directory(TEMP_FOLDER)
LOGGER = BaseMLLogger(log_name='predictor_logger',
                      log_file_name='predictor-logger')


class MLPredictor(object):
    def __init__(self, parameters, dataframe=None):
        """
        Creation predictor object from given parameters (user input) and logger
        (if defined)

        :param parameters: user input with prediction parameters
        :param dataframe: prepared dataframe
        """

        self.dataset_file_name = parameters['DatasetFileName']
        self.primary_field = parameters['ClassName']
        self.fptype = parameters['Fingerprints']
Beispiel #7
0
    get_oauth, fetch_token, get_multipart_object, post_data_to_blob,
    get_file_info_from_blob
)
from learner.algorithms import (
    CODES, DNN_REGRESSOR, DNN_CLASSIFIER, ELASTIC_NETWORK, LOGISTIC_REGRESSION
)
from learner.algorithms import NAIVE_BAYES
from mass_transit.MTMessageProcessor import MTPublisher
from mass_transit.mass_transit_constants import (
    MODELER_FAIL_TEST, MODEL_TRAINED_TEST, PREDICTOR_FAIL_TEST,
    PROPERTIES_PREDICTED_TEST, GENERATE_REPORT_TEST, OPTIMIZE_TRAINING_TEST,
    OPTIMIZE_TRAINING_FAIL_TEST, PREDICT_SINGLE_STRUCTURE_TEST,
    FEATURE_VECTORS_CALCULATOR_TEST, FEATURE_VECTORS_CALCULATOR_FAIL_TEST)

TEMP_FOLDER = os.environ['OSDR_TEMP_FILES_FOLDER']
LOGGER = BaseMLLogger(log_name='ml_test_logger', log_file_name='ml_test')
REDIS_CLIENT = redis.StrictRedis(host='redis', db=0)

MODELER_FAIL_FLAG = False
CLASSIC_CLASSIFICATION_NAIVE_TRAINED_FLAG = False
CLASSIC_REGRESSION_TRAINED_FLAG = False
PREDICTOR_FAIL_FLAG = False
CLASSIC_CLASSIFICATION_PREDICTED_FLAG = False
REGRESSOR_TRAINING_OPTIMIZED = False
CLASSIFIER_TRAINING_OPTIMIZED = False
NAIVE_BAYES_MODEL_BLOB_ID = None
LOGISTIC_REGRESSION_MODEL_BLOB_ID = None
CLASSIC_CLASSIFICATION_MODEL_BUCKET = None
CLASSIC_CLASSIFICATION_FILES_BLOB_IDS = []
ELASTIC_NETWORK_MODEL_BLOB_ID = None
CLASSIC_REGRESSION_FILES_BLOB_IDS = []
                            make_directory)
from learner.algorithms import CLASSIFIER, REGRESSOR
from learner.plotters import radar_plot
from mass_transit.MTMessageProcessor import PureConsumer, PurePublisher
from mass_transit.mass_transit_constants import (
    GENERATE_REPORT, REPORT_GENERATED, TRAINING_REPORT_GENERATION_FAILED)
from messages import (training_report_generated_message,
                      training_report_generation_failed)
from report_helper.TMP_text import (TRAINING_CSV_METRICS,
                                    ALL_MODELS_TRAINING_CSV_METRICS)
from report_helper.html_render import make_pdf_report

os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
TEMP_FOLDER = os.environ['OSDR_TEMP_FILES_FOLDER']
REDIS_CLIENT = redis.StrictRedis(host='redis', db=0)
LOGGER = BaseMLLogger(log_name='logger',
                      log_file_name='sds-ml-training-reporter')
OPTIMIZER_FORMATTER = '{:.04f}'.format


@MLExceptionHandler(logger=LOGGER,
                    fail_publisher=TRAINING_REPORT_GENERATION_FAILED,
                    fail_message_constructor=training_report_generation_failed)
def generate_training_report(body):
    """
    Pika callback function used by training report generator.
    Make plots files, general metrics csv file and report file if success.

    :param body: RabbitMQ MT message's body
    """

    oauth = get_oauth()
                            MODELS_IN_MEMORY_CACHE, clear_models_folder)
from mass_transit.MTMessageProcessor import PureConsumer, PurePublisher
from mass_transit.mass_transit_constants import (PREDICTION_FAILED,
                                                 PREDICT_PROPERTIES,
                                                 PROPERTIES_PREDICTED)
from predictor.Predictor import MLPredictor

os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
CLIENT_ID = os.environ['OSDR_ML_MODELER_CLIENT_ID']
TEMP_FOLDER = os.environ['OSDR_TEMP_FILES_FOLDER']
BLOB_URL = '{}/blobs'.format(os.environ['OSDR_BLOB_SERVICE_URL'])
BLOB_VERSION_URL = '{}/version'.format(os.environ['OSDR_BLOB_SERVICE_URL'])

LOGGER = BaseMLLogger(log_name='logger', log_file_name='sds-ml-predictor-core')

LOGGER.info('Checking BLOB service: {}'.format(BLOB_VERSION_URL))
RESPONSE = requests.get(BLOB_VERSION_URL, verify=False)
LOGGER.info('BLOB version received: {}'.format(RESPONSE.text))


@MLExceptionHandler(logger=LOGGER,
                    fail_publisher=PREDICTION_FAILED,
                    fail_message_constructor=prediction_failed)
def callback(body):
    """
    Pika callback function used by ml predictor.
    Make file with predicted properties by picked model.
    Send file to blob storage for OSDR
                                TRAINER_CLASS)
from learner.fingerprints import validate_fingerprints
from learner.plotters import radar_plot, distribution_plot, THUMBNAIL_IMAGE
from mass_transit.MTMessageProcessor import PurePublisher, PureConsumer
from mass_transit.mass_transit_constants import (TRAIN_MODEL, TRAINING_FAILED,
                                                 MODEL_TRAINED,
                                                 MODEL_TRAINING_STARTED,
                                                 MODEL_THUMBNAIL_GENERATED)
from messages import (model_trained_message, utc_now_str,
                      model_training_start_message,
                      thumbnail_generated_message, training_failed)
from processor import sdf_to_csv
from report_helper.TMP_text import MODEL_PDF_REPORT
from report_helper.html_render import make_pdf_report

LOGGER = BaseMLLogger(log_name='logger', log_file_name='sds-ml-modeler')

LOGGER.info('Configuring from environment variables')
os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
CLIENT_ID = os.environ['OSDR_ML_MODELER_CLIENT_ID']
BLOB_URL = '{}/blobs'.format(os.environ['OSDR_BLOB_SERVICE_URL'])
BLOB_VERSION_URL = '{}/version'.format(os.environ['OSDR_BLOB_SERVICE_URL'])
TEMP_FOLDER = os.environ['OSDR_TEMP_FILES_FOLDER']

LOGGER.info('Configured')
LOGGER.info('Checking BLOB service: {}'.format(BLOB_VERSION_URL))
RESPONSE = requests.get(BLOB_VERSION_URL, verify=False)
LOGGER.info('BLOB version received: {}'.format(RESPONSE.text))


@MLExceptionHandler(logger=LOGGER,
Beispiel #11
0
    CLASSIFIER, REGRESSOR, model_type_by_code, NAIVE_BAYES, ELASTIC_NETWORK,
    TRAINER_CLASS, ALGORITHM, CODES
)
from mass_transit.MTMessageProcessor import PureConsumer, PurePublisher
from mass_transit.mass_transit_constants import (
    OPTIMIZE_TRAINING, TRAINING_OPTMIZATION_FAILED, TRAINING_OPTIMIZED
)
from messages import training_optimization_failed, model_training_optimized
from processor import sdf_to_csv

os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
BLOB_URL = '{}/blobs'.format(os.environ['OSDR_BLOB_SERVICE_URL'])
REDIS_CLIENT = redis.StrictRedis(host='redis', db=0)

TEMP_FOLDER = os.environ['OSDR_TEMP_FILES_FOLDER']
LOGGER = BaseMLLogger(
    log_name='logger', log_file_name='sds-ml-training-optimizer')
try:
    EXPIRATION_TIME = int(os.environ['REDIS_EXPIRATION_TIME_SECONDS'])
except KeyError:
    EXPIRATION_TIME = 12*60*60  # 12 hours
    LOGGER.error('Max thread number not defined. Set it to 1')
OPTIMIZER_FORMATTER = '{:.04f}'.format
# set optimizer fingerprints sets
# will found optimal set from this list, and use it later for training model
# all other sets will be shown on optimizer report and on training report
BASE_FINGERPRINTS = [
    [
        {'Type': 'DESC'}, {'Type': 'AVALON', 'Size': 512},
        {'Type': 'ECFP', 'Radius': 3, 'Size': 128},
        {'Type': 'FCFC', 'Radius': 2, 'Size': 256}
    ], [
Beispiel #12
0
import keras
import numpy
import requests
import tensorflow
from oauthlib.oauth2 import BackendApplicationClient
from rdkit import Chem
from requests_oauthlib import OAuth2Session
from requests_toolbelt import MultipartEncoder
from scipy import sparse
from sklearn.externals import joblib

from MLLogger import BaseMLLogger

os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
LOGGER = BaseMLLogger(log_name='logger', log_file_name='sds-ml-logger')
# define modules default global variables
CLIENT_ID = None
CLIENT_SECRET = None
SCOPE = None
TOKEN_URL = None
BLOB_URL = None
TEMP_FOLDER = os.getcwd()
OSDR_API_URL = 'https://api.dev.dataledger.io/osdr/v1/api'
SCALER_FILENAME = 'scaler.sav'
DENSITY_MODEL_FILENAME = 'density_model.sav'
DISTANCE_MATRIX_FILENAME = 'distance_matrix.npz'
TRAIN_MEAN_FILENAME = 'train_mean.npy'
K_MEANS_FILENAME = 'k_means.ksav'
MODEL_ADDITIONAL_FILES = [
    SCALER_FILENAME, DENSITY_MODEL_FILENAME, TRAIN_MEAN_FILENAME,
Beispiel #13
0
"""
General methods for MT constants and MT-emulation Python classes
"""

import json
import uuid

import pika

from MLLogger import BaseMLLogger

LOGGER = BaseMLLogger(log_name='mt_library_logger',
                      log_file_name='sds-mt-library')

# json to Python types mapping table
# key is json type, value is Python type
TYPES_TABLE = {
    'string': str,
    'integer': int,
    'decimal': float,
    'boolean': bool,
    'array': list,
    'Guid': uuid.UUID,
    'object': dict
}


class Guid(str):
    def __init__(self, string_value):
        try:
            uuid.UUID(string_value)