Python Evaluation Examples

Programming Language: Python

Namespace/Package Name: utils.evaluation

Method/Function: Evaluation

Examples at hotexamples.com: 4

Python Evaluation - 4 examples found. These are the top rated real world Python examples of utils.evaluation.Evaluation extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: experimentGenStreams.py Project: AndreasAlam/iot-ecml2019

def evaluate_method(classifier, stream_name, method_name, initial_size,
                    step_size):
    print(stream_name, method_name)
    try:
        start = time.time()
        data, meta = arff.loadarff("streams/%s.arff" % stream_name)
        if data is None:
            print("Empty data")
            raise Exception

        classes = meta[meta.names()[-1]][1]
        evl = evaluation.Evaluation(classifier=classifier,
                                    stream_name="%s" % stream_name,
                                    method_name=method_name,
                                    tqdm=False)
        evl.test_and_train(data=data,
                           classes=classes,
                           initial_size=initial_size,
                           step_size=step_size)
        evl.compute_metrics()
        evl.save_to_csv_metrics()
        print("End", stream_name, method_name, time.time() - start)
    except Exception as ex:
        print(str(ex))
        traceback.print_exc()
        print("Exception in ", stream_name, method_name)

Example #2

Show file

File: experimentRealStreams.py Project: w4k2/oceis-iccs2020

def evaluate_method(classifier, stream_name, method_name, initial_size, step_size):

    logging.basicConfig(filename='realStreams.log', filemode="a", format='%(asctime)s - %(levelname)s: %(message)s', level='DEBUG')

    try:
        logging.info("Start %s %s", stream_name, method_name)
        print(stream_name, method_name)
        start = time.time()

        data, meta = arff.loadarff("streams/%s.arff" % stream_name)
        if data is None:
            print("Empty data")
            raise Exception

        classes = meta[meta.names()[-1]][1]
        evl = evaluation.Evaluation(classifier=classifier, stream_name="%s" % stream_name, method_name=method_name, experiment_name=experiment_name, tqdm=False)
        evl.test_and_train(data=data, classes=classes, initial_size=initial_size, step_size=step_size)
        evl.save_to_csv_confmat()
        logging.info("End %s %s %f", stream_name, method_name, time.time()-start)
        print("End", stream_name, method_name, time.time()-start)

    except Exception as ex:
        logging.exception("Exception in %s %s", stream_name, method_name)
        print(str(ex))
        traceback.print_exc()
        print("Exception in %s %s" % (stream_name, method_name))

Example #3

Show file

File: corpus.py Project: wanlinxie/dissertation

    def evaluate(self,
                 learner,
                 partition='test',
                 debug_idxs=None,
                 skip_idxs=(),
                 decoder='ilp',
                 n_eval=(1, 2, 3, 4),
                 streaming=True,
                 overwritten_params=(),
                 eval_path=None,
                 output_path=None,
                 lm_proxy=None,
                 **kwargs):
        """Run the transduction model on designated test instances and report
        performance metrics.
        """
        # When evaluating multiple iterations of the same model over a fixed
        # partition, decoding should ensure that initialization isn't
        # unnecessarily repeated.
        #print(kwargs)

        if partition == 'test' and kwargs['subcorpus'] is not None and kwargs[
                'subcorpus'] == 'final':
            #if kwargs['subcorpus'] == 'final':
            print("FINAL")
            eval_instances = self.get_instances(partition=partition,
                                                debug_idxs=debug_idxs,
                                                skip_idxs=skip_idxs)
            system_name = learner.name

        elif learner is not None:
            eval_instances = self.decode_instances(learner,
                                                   partition=partition,
                                                   debug_idxs=debug_idxs,
                                                   skip_idxs=skip_idxs,
                                                   decoder=decoder,
                                                   streaming=streaming,
                                                   overwritten_params=\
                                                           overwritten_params,
                                                   **kwargs)
            system_name = learner.name
        else:
            eval_instances = self.get_instances(partition=partition,
                                                debug_idxs=debug_idxs,
                                                skip_idxs=skip_idxs)
            system_name = 'baseline'

        num_instances = len(eval_instances)

        # Record overwritten parameters in the filenames
        overwriting_str = None
        if len(overwritten_params) > 0:
            overwriting_str = '_OW-'
            i = 0
            for param_name, value in overwritten_params.iteritems():
                if isinstance(value, list) or isinstance(value, tuple):
                    overwriting_str += '+'.join(str(v) for v in sorted(value))
                else:
                    overwriting_str += str(value)
                i += 1
                if i < len(overwritten_params):
                    overwriting_str += '-'

        if output_path is not None:
            output_filename = ''.join(
                (output_path, '/', '_'.join((partition, 'under', system_name)),
                 overwriting_str if overwriting_str is not None else '', '_',
                 decoder, '.out'))
            outf = open(output_filename, 'wb')

        # Determine the evaluations to run by looking at a representative
        # instance
        i = 0
        while i < len(eval_instances) and \
                not hasattr(eval_instances[i], 'output_sent'):
            i += 1
        if i == len(eval_instances):
            print "WARNING: all instances failed; skipping evaluation"
            sys.exit()
        some_instance = eval_instances[i]
        has_labels = hasattr(some_instance, 'label_sentences')
        has_rasp = hasattr(some_instance.gold_sentences[0], 'relgraph')
        has_outtrees = hasattr(some_instance.output_sent, 'outtree')
        has_outframes = hasattr(some_instance.output_sent, 'outframes')

        # FIXME TEMPORARY! MUST MAKE "False" FOR TEST!
        skip_failed = False

        # Initialize the evaluations
        eval_obj = evaluation.Evaluation(title='TRANSDUCTION_EVAL')
        output_sents = []
        with timer.AvgTimer(num_instances):
            for i, instance in enumerate(eval_instances):
                sys.stdout.write("Evaluating " + str(num_instances) +
                                 (" " +
                                  partition if partition is not None else "") +
                                 " instances: " + str(i + 1) + '\r')

                # Duration and failure status
                eval_obj.include(
                        system=system_name,
                        corpus='other',
                        decode_time=instance.decode_times[-1],
                        solution_time=instance.solution_times[-1] \
                                if len(instance.solution_times) > 0 else 0,
                        inputs=len(instance.input_sents),
                        _failed=int(not hasattr(instance, 'output_sent')),
                        )

                if skip_failed and not hasattr(instance, 'output_sent'):
                    print "WARNING: Skipping failed instance", instance.idx
                    continue

                # POS tag recall
                for use_labels in set([False]) | set([has_labels]):
                    #for prefix in ('NN', 'VB', 'JJ', 'RB'):
                    #    p, r, f = instance.score_content_words(
                    #            use_labels=use_labels, prefixes=(prefix,))
                    #    eval_obj.add_metrics(
                    #            precision=p,
                    #            recall=r,
                    #            system=system_name,
                    #            corpus=('LBLs ' + prefix) if use_labels \
                    #                    else ('GOLD ' + prefix),
                    #            )
                    p, r, f = instance.score_content_words(
                        use_labels=use_labels, prefixes=('NN', 'VB'))
                    eval_obj.add_metrics(
                            precision=p,
                            recall=r,
                            system=system_name,
                            corpus=('LBLs ' + 'NN+VB') if use_labels \
                                else ('GOLD ' + 'NN+VB'),
                            )

                try:
                    if lm_proxy is not None:
                        output_tokens = instance.output_sent.tokens \
                                if hasattr(instance, 'output_sent') else []
                        eval_obj.include(system=system_name,
                                         corpus='other',
                                         lm=lm_proxy.score_sent(output_tokens))
                except jsonrpc.RPCTransportError:
                    print "ERROR: JSON-RPC hiccups; skipping LM scoring"
                    pass

                if decoder.startswith('dp+'):
                    # Record convergence of dual decomposition or
                    # bisection. Will be 0 if neither are used.
                    eval_obj.include(
                        system=system_name,
                        corpus='other',
                        convergence_=int(instance.converged),
                        iterations=instance.num_iterations,
                    )

                if len(instance.sentences) == 1:
                    # Paraphrasing or compression-specific metrics
                    eval_obj.include(
                        system=system_name,
                        corpus='STATS gold',
                        comp_=instance.get_gold_compression_rate(),
                        length=instance.avg_gold_len,
                        proj_=avg(
                            int(gold_sent.dparse.is_projective())
                            for gold_sent in instance.gold_sentences),
                        overlap_=avg(
                            instance.get_overlap(gold_sent)
                            for gold_sent in instance.gold_sentences),
                    )
                    eval_obj.include(
                        system=system_name,
                        corpus='STATS input',
                        comp_=1.0,
                        length=instance.avg_len,
                        proj_=int(
                            instance.sentences[0].dparse.is_projective()),
                        overlap_=instance.get_overlap(instance.sentences[0]))
                    eval_obj.include(
                        system=system_name,
                        corpus='STATS output',
                        comp_=instance.get_compression_rate(),
                        length=len(instance.output_sent.tokens) if hasattr(
                            instance, 'output_sent') else 0,
                    )
                    if hasattr(instance, 'output_sent') and has_outtrees:
                        eval_obj.include(
                                system=system_name,
                                corpus='STATS output',
                                proj_=int(instance.output_sent.\
                                          outtree.is_projective())
                                      if hasattr(instance.output_sent.outtree,\
                                                 'is_projective')
                                      else 0,
                                overlap_=instance.get_overlap(
                                    instance.output_sent,
                                    parse_type='outtree')
                                )

#                    print "INSTANCE ", instance.idx
#                    crossing_edges = \
#                        instance.output_sent.outtree.get_crossing_edges()
#                    print "\n\nINPUT:",
#                    self.dump_parse(instance.sentences[0])
#
#                    for gs, gold_sent in enumerate(
#                            instance.gold_sentences):
#                        # get output indices for gold
#                        gold_idxs = []
#                        i = 0
#                        for token in gold_sent.tokens:
#                            while instance.sentences[0].tokens[i] != token:
#                                i += 1
#                            gold_idxs.append((0,i))
#
#                        print "\nGOLD:", gs,
#                        self.dump_parse(gold_sent,
#                            idx_mapper=gold_idxs)
#
#                    print "\n\nOUTPUT:",
#                    self.dump_parse(instance.output_sent,
#                            parse_type='outtree',
#                            crossing_edges=crossing_edges,
#                            idx_mapper=instance.output_idxs)

# n-gram precision and recall
                for use_labels in set([False]) | set([has_labels]):
                    for n in n_eval:
                        p, r, f = instance.score_ngrams(n=n,
                                                        use_labels=use_labels)
                        eval_obj.add_metrics(
                            precision=p,
                            recall=r,
                            system=system_name,
                            corpus='LBLs n=' +
                            str(n) if use_labels else 'GOLD n=' + str(n),
                        )
                if hasattr(instance, 'output_sent') and has_outframes:
                    # Precision and recall for frames
                    p, r, f = instance.score_frames(fes=False,
                                                    frames_type='outframes',
                                                    use_labels=use_labels)
                    eval_obj.add_metrics(
                        precision=p,
                        recall=r,
                        system=system_name,
                        corpus="GOLD frames",
                    )

                    # Precision and recall for frame elements
                    p, r, f = instance.score_frames(fes=True,
                                                    frames_type='outframes',
                                                    use_labels=use_labels)
                    eval_obj.add_metrics(
                        precision=p,
                        recall=r,
                        system=system_name,
                        corpus="GOLD fes",
                    )

                # Parse output sentences for syntactic evaluation. The
                # 100 token limit is intended for the Stanford parser.
                if hasattr(instance, 'output_sent') and \
                        len(instance.output_sent.tokens) <= 100:
                    output_sents.append(instance.output_sent)

                # Write the output to a file
                if output_path is not None:
                    outf.write(instance.get_display_string())
#            print
            if output_path is not None:
                outf.close()

            # Parse-based evaluations
            try:
                parse_types = ['dparse']
                if has_outtrees:
                    parse_types.append('outtree')

                # Get annotations. Only run RASP if the inputs have RASP
                # annotations since it's slow
                annotations.annotate(output_sents, 'Stanford')
                if has_rasp:
                    annotations.annotate(output_sents, 'Rasp')
                    parse_types.append('relgraph')

                # Add dependency results to evaluations
                for i, instance in enumerate(eval_instances):
                    if skip_failed and not hasattr(instance, 'output_sent'):
                        print "WARNING: Skipping failed instance",
                        print instance.idx, "again"
                        continue

                    for parse_type in parse_types:
                        for use_labels in set([False]) | set([has_labels]):
                            name = ('LBLs ' if use_labels else 'GOLD ') + \
                                parse_type
                            p, r, f = instance.score_dependencies(
                                parse_type=parse_type, use_labels=use_labels)
                            eval_obj.add_metrics(
                                precision=p,
                                recall=r,
                                system=system_name,
                                corpus=name,
                                _failed=int(not instance.has_output_parses(
                                    parse_type=parse_type)))
            except OSError:
                print "Skipping parser evaluations"

        print eval_obj.title
        print eval_obj.table(skip_single_keys=True)
        if eval_path is not None and debug_idxs is None:
            eval_filename = ''.join(
                (eval_path, '/', '_'.join((partition, 'under', system_name)),
                 overwriting_str if overwriting_str is not None else '', '_',
                 decoder, '.eval'))
            eval_obj.save(eval_filename, append=False)

Example #4

Show file

            pred_pca_val = logreg.predict_proba(X_scaled_pca_val)
            fpr, tpr, thresholds = metrics.roc_curve(y_val, pred_pca_val[:, 0])
            roc_auc = metrics.auc(fpr, tpr)
            models.append([
                p, c, X_scaled_pca_val.shape, scaler, pca, logreg,
                pred_pca_val[:, 0]
            ])
            for top in tops:
                t1 = np.argsort(pred_pca_val[:, 0])[0:top]
                y_pred_val = logreg.predict(X_scaled_pca_val)
                y_pred_val[t1] = 1
                result_val_pred = []
                for i in xrange(len(ks_val)):
                    if y_pred_val[i] == 1:
                        result_val_pred.append(ks_val[i])
                E1 = evaluation.Evaluation(result_val_pred, result_val_truth)
                f1 = E1.F1()
                i1 = E1.intersection()
                p1 = E1.precision()
                r1 = E1.recall()
                print 'pca: %f, c: %e, top: %d, auc: %f, f1: %f, i1: %d, p1: %f, r1: %f' % (
                    p, c, top, roc_auc, f1, i1, p1, r1)
                f.write(
                    'pca: %f, c: %e, top: %d, auc: %f, f1: %f, i1: %d, p1: %f, r1: %f\n'
                    % (p, c, top, roc_auc, f1, i1, p1, r1))

models_file = path + '/models/logreg1.pkl'
with open(models_file, 'wb') as fp:
    pickle.dump(models, fp, protocol=2)

### Tunning GBDT