コード例 #1
0
    def transform(self, sents):
        debug('Progressing %s/%s steps (%s)' %
              (self.order, self.num_pipeline_steps, self.__class__.__name__))

        sents = super().transform(sents)

        # basic sentence filtering
        sents[self.operant_column_name] = sents[
            self.operant_column_name].apply(self.sentence_to_embeding_tokens)

        # colect tasks
        operators, arguments = self._collect_tasks(sents)

        results = {nam: opr(*arguments[nam]) for nam, opr in operators.items()}

        # measure persistance fraction
        for key in ['persist_sentences', 'persist_unknown_words']:
            if getattr(self, key):
                try:
                    self.metrics[key] = {
                        'completed_inserts':
                        float(sum(results[key])) / float(len(results[key]))
                    }
                except ZeroDivisionError as err:
                    warn(
                        'Caught zero division error. Try increasing the batch size.'
                    )

        return results['filter_unknown_words']
コード例 #2
0
 def number_to_string(num):
     try:
         string = self.underlying_engine.number_to_words(num)
     except NumOutOfRangeError:
         warn('NumOutOfRangeError caught from inflect engine for %s' %
              num)
         string = ''
     except Exception:
         warn('Caught unknown exception from inflect engine')
         string = ''
     return string
コード例 #3
0
    def _check_derived_class_argument(self, arguments, default_values):

        for arg, val in zip(arguments, default_values):
            if not hasattr(self, arg):
                class_name = self.__class__.__name__
                try:
                    warn('%s: argument "%s" has no value using defaults:' %
                         (class_name, arg))
                    debug(val)
                    setattr(self, arg, val)
                except Exception as err:
                    error('Cannot set default valeus for argument %s' % arg)
                    raise
コード例 #4
0
def persist(backend, insert_qry):

    committed = False
    try:
        committed = backend.execute_insert(insert_qry)
        debug('Excecuted query: %s'%insert_qry)

    except Exception as err:

        if err.pgcode == '23505':
            warn('Caught primary key vioaltion, when %s'%insert_qry)
        else:
            error('Throwing unknown runtime exception, when: %s'%insert_qry)
            print(err,err.pgcode)
            raise

    return committed
コード例 #5
0
    def _create_steps(self, specs, confs):

        self.pipeline_steps = []
        for order, (module_name, class_name, step_name) in enumerate(specs):

            try:  # default conf safety
                args = confs['%s_conf' % step_name].get('args', [])
                kwargs = confs['%s_conf' % step_name].get('kwargs', {})
            except KeyError as err:
                warn('No backend configuration found for %s. Using defaults.' %
                     class_name)
                args, kwargs = [], {}

            kwargs['wrapper_db'] = self._db_backend
            kwargs['wrapper_order'] = order + 1
            kwargs['wrapper_num_pipeline_steps'] = len(self.pipeline_steps) + 1
            class_instance = instansiate_engine(module_name, class_name, args,
                                                kwargs)

            self.pipeline_steps += [(step_name, class_instance)]

        return self.pipeline_steps
コード例 #6
0
def persist_sentences(*args):

    try: # parse args
        db   = args[0] # db_backend
        data = args[1] # raw isnert_data
        name = args[2] # table_name
    except KeyError as err:
        error('Not enough arguments to persist sentences')
        raise err

    # helping stuff
    row_to_string = lambda row: "(%s, '{%s}')"%(row.values[0],row.values[1])
    insert_frmter = lambda row: row_to_string(row).replace('[','').replace(']','')

    # prepare insert
    if data.shape[0] == 0:
        responce = []
        warn('Nothing to persist.')
    else:
        insert_data = data.apply(insert_frmter, axis=1)
        responce = [persist(db, insert_qry(name, row)) for row in insert_data]

    return responce
コード例 #7
0
 def fit(self, sents):
     warn('Default "%s.fit" method does not do anything' %
          self.__class__.__name__)
     return sents