Example #1
0
def handle_error(e):
    logger = loggerElk(__name__)
    logger.Information("Error Handler")
    code = 500
    if isinstance(e, HTTPError):
        code = e.code
    logger.Error(str(e), sys.exc_info())
    return {'message': 'Something went wrong: ' + str(e)}, code
Example #2
0
    def __init__(self, conf, nlp):
        self.conf = conf
        self.nlp = nlp
        self.logger.Information('GbcMlDocumentClassifierPrediction::POST - loading dictionary...')
        self.conf.load_dict()
        self.tf = None
        self.tf_idf = None
        self.vectorizer = None
        self.from_file = ClassFile()

        self.logger = loggerElk(__name__, True)
Example #3
0
 def __init__(self, conf: Configuration):
     self.conf = conf
     self.clf = None
     self.logger = loggerElk(__name__, True)
Example #4
0
 def __init__(self):
     self.conf = Configuration()
     self.logger = loggerElk(__name__, True)
Example #5
0
        def __init__(self,
                     hidden_layer_sizes=(100, ),
                     activation="relu",
                     solver='adam',
                     alpha=0.0001,
                     batch_size='auto',
                     learning_rate="constant",
                     learning_rate_init=0.001,
                     power_t=0.5,
                     max_iter=200,
                     shuffle=True,
                     random_state=None,
                     tol=1e-4,
                     verbose=False,
                     warm_start=False,
                     momentum=0.9,
                     nesterovs_momentum=True,
                     early_stopping=False,
                     validation_fraction=0.1,
                     beta_1=0.9,
                     beta_2=0.999,
                     epsilon=1e-8,
                     n_iter_no_change=10,
                     max_fun=15000,
                     conf=None):
            super().__init__(hidden_layer_sizes=hidden_layer_sizes,
                             activation=activation,
                             solver=solver,
                             alpha=alpha,
                             batch_size=batch_size,
                             learning_rate=learning_rate,
                             learning_rate_init=learning_rate_init,
                             power_t=power_t,
                             max_iter=max_iter,
                             loss='log_loss',
                             shuffle=shuffle,
                             random_state=random_state,
                             tol=tol,
                             verbose=verbose,
                             warm_start=warm_start,
                             momentum=momentum,
                             nesterovs_momentum=nesterovs_momentum,
                             early_stopping=early_stopping,
                             validation_fraction=validation_fraction,
                             beta_1=beta_1,
                             beta_2=beta_2,
                             epsilon=epsilon,
                             n_iter_no_change=n_iter_no_change,
                             max_fun=max_fun)
            # Load model
            self.conf = conf
            self.logger = loggerElk(__name__, True)

            # Building the model
            self.classifier = Sequential()

            # Creating the method for model
            # Step 1- Convolution
            self.classifier.add(
                Convolution2D(128, (5, 5),
                              input_shape=(self.conf.nn_image_size,
                                           self.conf.nn_image_size, 1),
                              activation='relu'))
            # adding another layer
            self.classifier.add(Convolution2D(64, (4, 4), activation='relu'))
            # Pooling it
            self.classifier.add(MaxPooling2D(pool_size=(2, 2)))
            # Adding another layer
            self.classifier.add(Convolution2D(32, (3, 3), activation='relu'))
            # Pooling
            self.classifier.add(MaxPooling2D(pool_size=(2, 2)))
            # Adding another layer
            self.classifier.add(Convolution2D(32, (3, 3), activation='relu'))
            # Pooling
            self.classifier.add(MaxPooling2D(pool_size=(2, 2)))
            # Step 2- Flattening
            self.classifier.add(Flatten())
            # Step 3- Full connection
            self.classifier.add(Dense(units=128, activation='relu'))
            # For the output step
            self.classifier.add(
                Dense(units=self.conf.nn_class_size, activation='softmax'))
            self.classifier.add(Dropout(0.02))
            # Add reularizers
            # classifier.add(Dense(128,
            #                input_dim = 128,
            #                kernel_regularizer = regularizers.l1(0.001),
            #                activity_regularizer = regularizers.l1(0.001),
            #                activation = 'relu'))

            self.classifier.compile(optimizer='adam',
                                    loss='categorical_crossentropy',
                                    metrics=['accuracy'])
class GbcMachineLearningService(Resource):
    from api import api

    logger = loggerElk(__name__, True)
    nlp = None

    machineLearningService = api.model('MachineLearningService', {
        'source': fields.String(required=True, description='Source of the data (PLAINTEXT | FILE | IMAGE | S3)'),
        'data': fields.String(required=True, description='Repo resource identifier (url)'),
        'domain': fields.String(required=True, description='Repo resource identifier (domain name)'),
        'model': fields.String(required=True, description='Source of classifier '
                                                          '(BAGGING | BOOSTING_ADA | BOOSTING_SGD '
                                                          '| DECISION_TREE | EXTRA_TREES | NAIVE_BAYES_MULTI '
                                                          '| NAIVE_BAYES_COMPLEMENT | RANDOM_FOREST | VOTING '
                                                          '| CNN_NETWORK)'),
        'lang': fields.String(required=True, description='Language (es, en)'),
    })

    def __init__(self, *args, **kwargs):
        # start - JAEGER
        config = Config(config={'sampler': {'type': 'const', 'param': 1},
                                'logging': True
                                },
                        service_name=__name__)
        config.initialize_tracer()
        super().__init__(*args, **kwargs)

    trace_requests()  # noqa

    # end - JAEGER
    @api.doc(
        description='Machine Learning Service',
        responses={
            200: 'OK',
            400: 'Invalid Argument',
            500: 'Internal Error'})
    @api.expect(machineLearningService)
    def post(self):
        root_span = None
        try:
            self.logger.Information('GbcMachineLearningService::POST - init')
            # start - JAEGER
            root_span = opentracing.tracer.start_span(operation_name=inspect.currentframe().f_code.co_name)
            # end - JAEGER

            request_payload = request.get_json()
            # self.logger.LogInput('GbcMachineLearningService::POST: ', request_payload)

            source = request_payload['source']
            data = request_payload['data']
            domain = request_payload['domain']
            model = request_payload['model']
            lang = request_payload['lang']

            if source == 'PLAINTEXT':
                response = 'PLAINTEXT'
            elif source == 'FILE':
                response = 'FILE'
            elif source == 'IMAGE':
                response = 'IMAGE'
            elif source == 'S3':
                s3 = self.getS3Session()
                return {'result': 'not implemented'}
            else:
                response = ''
                raise Exception('No valid source provided')

            res = {
                'result': 'ok',
                'response': response
            }

            return jsonify(res)

        except Exception as e:
            self.logger.Error('ERROR - GbcMachineLearningService::POST' + str(e.args), sys.exc_info())
            return {'message': 'Something went wrong: ' + str(e)}, 500

        finally:
            root_span.finish()

    class Student(object):
        def __init__(self, first_name: str, last_name: str):
            self.first_name = first_name
            self.last_name = last_name

    @classmethod
    def getS3Session(cls):
        session = boto3.Session(
            aws_access_key_id=os.environ['ENV_ACCESS_KEY_ID'],
            aws_secret_access_key=os.environ['ENV_SECRET_ACCESS_KEY']
        )
        s3 = session.client(u's3')
        return s3
Example #7
0
from commonsLib import loggerElk
from controller.ClassFile import ClassFile

logger = loggerElk(__name__, True)


class Singleton:
    # Here will be the instance stored.
    __instance = None
    vectorizers = None

    @staticmethod
    def getInstance(conf):
        """ Static access method. """
        if Singleton.__instance is None:
            Singleton(conf)
        return Singleton.__instance

    def __init__(self, conf):
        """ Virtually private constructor. """
        if Singleton.__instance is not None:
            raise Exception("This class is a singleton!")
        else:
            self.vectorizers = {}

            files = ClassFile.list_files_ext(conf.base_dir, 'vectorizer.tfidf')
            logger.Information('GbcMlDocumentClassifierPrediction::POST - loading vectorizers...')

            for f in files:
                key = ClassFile.get_containing_dir_name(f)
                logger.Information(f'GbcMlDocumentClassifierPrediction::POST - loading model: {key}...')
Example #8
0
    def __init__(self, conf_route="model/config.yml"):
        self.logger = loggerElk(__name__, True)

        # load config file
        try:
            with open(conf_route, 'r') as yml_file:
                cfg = yaml.load(yml_file)
        except Exception as exc:
            self.logger.Error("Error loading model/config.yml...")

        development = cfg['development']

        # data augmentation
        self.examples_per_case = development['augmentation']

        # server
        server = development['server']
        self.server_host = server['host']
        self.server_port = server['port']

        # image processing
        image_processing = development['image_processing']
        self.resize_width = image_processing['resize_width']
        self.resize_height = image_processing['resize_height']
        self.crop_width = image_processing['crop_width']
        self.crop_height = image_processing['crop_height']

        # working directories
        directories = development['directories']
        self.base_dir = directories['base_dir']
        self.sample = directories['sample']
        self.cat_file = directories['cat_file']
        self.dictionary = directories['dictionary']
        self.classes = directories['classes']
        self.working_path = ''

        # pre process
        pre_process = development['pre_process']
        self.pre_process_batch_size = pre_process['pre_process_batch_size']
        self.max_string_size = pre_process['max_string_size']
        self.spa_dict = []

        # files
        files = development['files']
        self.tf = files['tf']
        self.tfidf = files['tfidf']
        self.vectorizer = files['vectorizer']

        # system separator
        self.sep = os.path.sep

        # cnn layers sizes
        nn_layer = development['nn_layer']
        self.filters = nn_layer['filters']
        self.kernels = nn_layer['kernels']
        self.units = nn_layer['units']
        self.epochs = nn_layer['epochs']

        # bagging
        bagging = development['bagging']
        self.bagging_model = bagging['bagging_model']
        self.bagging_n_estimators = bagging['bagging_n_estimators']
        self.bagging_max_samples = bagging['bagging_max_samples']
        self.bagging_bootstrap = bagging['bagging_bootstrap']
        self.bagging_n_jobs = bagging['bagging_n_jobs']

        # boosting
        boosting = development['boosting']
        self.boosting_model = boosting['boosting_model']
        self.boosting_n_estimators = boosting['boosting_n_estimators']

        # decision tree
        decision_tree = development['decision tree']
        self.dt_model = decision_tree['dt_model']
        self.dt_max_depth = decision_tree['dt_max_depth']

        # extra trees
        extra_trees = development['extra trees']
        self.et_model = extra_trees['et_model']
        self.et_n_estimators = extra_trees['et_n_estimators']
        self.et_max_features = extra_trees['et_max_features']
        self.et_bootstrap = extra_trees['et_bootstrap']
        self.et_n_jobs = extra_trees['et_n_jobs']

        # naive bayes
        naive_bayes = development['naive bayes']
        self.nb_model = naive_bayes['nb_model']

        # random forest
        random_forest = development['random forest']
        self.rf_model = random_forest['rf_model']
        self.rf_n_estimators = random_forest['rf_n_estimators']
        self.rf_max_leaf_nodes = random_forest['rf_max_leaf_nodes']
        self.rf_n_jobs = random_forest['rf_n_jobs']

        # n_network
        n_network = development['nnetwork']
        self.nn_model_name = n_network['nn_model_name']
        self.nn_solver = n_network['nn_solver']
        self.nn_alpha = n_network['nn_alpha']
        self.nn_hidden_layer_sizes = n_network['nn_hidden_layer_sizes']
        self.nn_random_state = n_network['nn_random_state']
        self.nn_image_size = n_network['nn_image_size']
        self.nn_class_size = n_network['nn_class_size']
        self.nn_batch_size = n_network['nn_batch_size']
        self.nn_epochs = n_network['nn_epochs']

        # voting
        voting = development['voting']
        self.voting_model = voting['voting_model']
        self.voting = voting['voting']
        self.voting_n_jobs = voting['voting_n_jobs']