def evaluate_method(classifier, stream_name, method_name, initial_size,
                    step_size):
    print(stream_name, method_name)
    try:
        start = time.time()
        data, meta = arff.loadarff("streams/%s.arff" % stream_name)
        if data is None:
            print("Empty data")
            raise Exception

        classes = meta[meta.names()[-1]][1]
        evl = evaluation.Evaluation(classifier=classifier,
                                    stream_name="%s" % stream_name,
                                    method_name=method_name,
                                    tqdm=False)
        evl.test_and_train(data=data,
                           classes=classes,
                           initial_size=initial_size,
                           step_size=step_size)
        evl.compute_metrics()
        evl.save_to_csv_metrics()
        print("End", stream_name, method_name, time.time() - start)
    except Exception as ex:
        print(str(ex))
        traceback.print_exc()
        print("Exception in ", stream_name, method_name)
def evaluate_method(classifier, stream_name, method_name, initial_size, step_size):

    logging.basicConfig(filename='realStreams.log', filemode="a", format='%(asctime)s - %(levelname)s: %(message)s', level='DEBUG')

    try:
        logging.info("Start %s %s", stream_name, method_name)
        print(stream_name, method_name)
        start = time.time()

        data, meta = arff.loadarff("streams/%s.arff" % stream_name)
        if data is None:
            print("Empty data")
            raise Exception

        classes = meta[meta.names()[-1]][1]
        evl = evaluation.Evaluation(classifier=classifier, stream_name="%s" % stream_name, method_name=method_name, experiment_name=experiment_name, tqdm=False)
        evl.test_and_train(data=data, classes=classes, initial_size=initial_size, step_size=step_size)
        evl.save_to_csv_confmat()
        logging.info("End %s %s %f", stream_name, method_name, time.time()-start)
        print("End", stream_name, method_name, time.time()-start)

    except Exception as ex:
        logging.exception("Exception in %s %s", stream_name, method_name)
        print(str(ex))
        traceback.print_exc()
        print("Exception in %s %s" % (stream_name, method_name))
Esempio n. 3
0
    def evaluate(self,
                 learner,
                 partition='test',
                 debug_idxs=None,
                 skip_idxs=(),
                 decoder='ilp',
                 n_eval=(1, 2, 3, 4),
                 streaming=True,
                 overwritten_params=(),
                 eval_path=None,
                 output_path=None,
                 lm_proxy=None,
                 **kwargs):
        """Run the transduction model on designated test instances and report
        performance metrics.
        """
        # When evaluating multiple iterations of the same model over a fixed
        # partition, decoding should ensure that initialization isn't
        # unnecessarily repeated.
        #print(kwargs)

        if partition == 'test' and kwargs['subcorpus'] is not None and kwargs[
                'subcorpus'] == 'final':
            #if kwargs['subcorpus'] == 'final':
            print("FINAL")
            eval_instances = self.get_instances(partition=partition,
                                                debug_idxs=debug_idxs,
                                                skip_idxs=skip_idxs)
            system_name = learner.name

        elif learner is not None:
            eval_instances = self.decode_instances(learner,
                                                   partition=partition,
                                                   debug_idxs=debug_idxs,
                                                   skip_idxs=skip_idxs,
                                                   decoder=decoder,
                                                   streaming=streaming,
                                                   overwritten_params=\
                                                           overwritten_params,
                                                   **kwargs)
            system_name = learner.name
        else:
            eval_instances = self.get_instances(partition=partition,
                                                debug_idxs=debug_idxs,
                                                skip_idxs=skip_idxs)
            system_name = 'baseline'

        num_instances = len(eval_instances)

        # Record overwritten parameters in the filenames
        overwriting_str = None
        if len(overwritten_params) > 0:
            overwriting_str = '_OW-'
            i = 0
            for param_name, value in overwritten_params.iteritems():
                if isinstance(value, list) or isinstance(value, tuple):
                    overwriting_str += '+'.join(str(v) for v in sorted(value))
                else:
                    overwriting_str += str(value)
                i += 1
                if i < len(overwritten_params):
                    overwriting_str += '-'

        if output_path is not None:
            output_filename = ''.join(
                (output_path, '/', '_'.join((partition, 'under', system_name)),
                 overwriting_str if overwriting_str is not None else '', '_',
                 decoder, '.out'))
            outf = open(output_filename, 'wb')

        # Determine the evaluations to run by looking at a representative
        # instance
        i = 0
        while i < len(eval_instances) and \
                not hasattr(eval_instances[i], 'output_sent'):
            i += 1
        if i == len(eval_instances):
            print "WARNING: all instances failed; skipping evaluation"
            sys.exit()
        some_instance = eval_instances[i]
        has_labels = hasattr(some_instance, 'label_sentences')
        has_rasp = hasattr(some_instance.gold_sentences[0], 'relgraph')
        has_outtrees = hasattr(some_instance.output_sent, 'outtree')
        has_outframes = hasattr(some_instance.output_sent, 'outframes')

        # FIXME TEMPORARY! MUST MAKE "False" FOR TEST!
        skip_failed = False

        # Initialize the evaluations
        eval_obj = evaluation.Evaluation(title='TRANSDUCTION_EVAL')
        output_sents = []
        with timer.AvgTimer(num_instances):
            for i, instance in enumerate(eval_instances):
                sys.stdout.write("Evaluating " + str(num_instances) +
                                 (" " +
                                  partition if partition is not None else "") +
                                 " instances: " + str(i + 1) + '\r')

                # Duration and failure status
                eval_obj.include(
                        system=system_name,
                        corpus='other',
                        decode_time=instance.decode_times[-1],
                        solution_time=instance.solution_times[-1] \
                                if len(instance.solution_times) > 0 else 0,
                        inputs=len(instance.input_sents),
                        _failed=int(not hasattr(instance, 'output_sent')),
                        )

                if skip_failed and not hasattr(instance, 'output_sent'):
                    print "WARNING: Skipping failed instance", instance.idx
                    continue

                # POS tag recall
                for use_labels in set([False]) | set([has_labels]):
                    #for prefix in ('NN', 'VB', 'JJ', 'RB'):
                    #    p, r, f = instance.score_content_words(
                    #            use_labels=use_labels, prefixes=(prefix,))
                    #    eval_obj.add_metrics(
                    #            precision=p,
                    #            recall=r,
                    #            system=system_name,
                    #            corpus=('LBLs ' + prefix) if use_labels \
                    #                    else ('GOLD ' + prefix),
                    #            )
                    p, r, f = instance.score_content_words(
                        use_labels=use_labels, prefixes=('NN', 'VB'))
                    eval_obj.add_metrics(
                            precision=p,
                            recall=r,
                            system=system_name,
                            corpus=('LBLs ' + 'NN+VB') if use_labels \
                                else ('GOLD ' + 'NN+VB'),
                            )

                try:
                    if lm_proxy is not None:
                        output_tokens = instance.output_sent.tokens \
                                if hasattr(instance, 'output_sent') else []
                        eval_obj.include(system=system_name,
                                         corpus='other',
                                         lm=lm_proxy.score_sent(output_tokens))
                except jsonrpc.RPCTransportError:
                    print "ERROR: JSON-RPC hiccups; skipping LM scoring"
                    pass

                if decoder.startswith('dp+'):
                    # Record convergence of dual decomposition or
                    # bisection. Will be 0 if neither are used.
                    eval_obj.include(
                        system=system_name,
                        corpus='other',
                        convergence_=int(instance.converged),
                        iterations=instance.num_iterations,
                    )

                if len(instance.sentences) == 1:
                    # Paraphrasing or compression-specific metrics
                    eval_obj.include(
                        system=system_name,
                        corpus='STATS gold',
                        comp_=instance.get_gold_compression_rate(),
                        length=instance.avg_gold_len,
                        proj_=avg(
                            int(gold_sent.dparse.is_projective())
                            for gold_sent in instance.gold_sentences),
                        overlap_=avg(
                            instance.get_overlap(gold_sent)
                            for gold_sent in instance.gold_sentences),
                    )
                    eval_obj.include(
                        system=system_name,
                        corpus='STATS input',
                        comp_=1.0,
                        length=instance.avg_len,
                        proj_=int(
                            instance.sentences[0].dparse.is_projective()),
                        overlap_=instance.get_overlap(instance.sentences[0]))
                    eval_obj.include(
                        system=system_name,
                        corpus='STATS output',
                        comp_=instance.get_compression_rate(),
                        length=len(instance.output_sent.tokens) if hasattr(
                            instance, 'output_sent') else 0,
                    )
                    if hasattr(instance, 'output_sent') and has_outtrees:
                        eval_obj.include(
                                system=system_name,
                                corpus='STATS output',
                                proj_=int(instance.output_sent.\
                                          outtree.is_projective())
                                      if hasattr(instance.output_sent.outtree,\
                                                 'is_projective')
                                      else 0,
                                overlap_=instance.get_overlap(
                                    instance.output_sent,
                                    parse_type='outtree')
                                )

#                    print "INSTANCE ", instance.idx
#                    crossing_edges = \
#                        instance.output_sent.outtree.get_crossing_edges()
#                    print "\n\nINPUT:",
#                    self.dump_parse(instance.sentences[0])
#
#                    for gs, gold_sent in enumerate(
#                            instance.gold_sentences):
#                        # get output indices for gold
#                        gold_idxs = []
#                        i = 0
#                        for token in gold_sent.tokens:
#                            while instance.sentences[0].tokens[i] != token:
#                                i += 1
#                            gold_idxs.append((0,i))
#
#                        print "\nGOLD:", gs,
#                        self.dump_parse(gold_sent,
#                            idx_mapper=gold_idxs)
#
#                    print "\n\nOUTPUT:",
#                    self.dump_parse(instance.output_sent,
#                            parse_type='outtree',
#                            crossing_edges=crossing_edges,
#                            idx_mapper=instance.output_idxs)

# n-gram precision and recall
                for use_labels in set([False]) | set([has_labels]):
                    for n in n_eval:
                        p, r, f = instance.score_ngrams(n=n,
                                                        use_labels=use_labels)
                        eval_obj.add_metrics(
                            precision=p,
                            recall=r,
                            system=system_name,
                            corpus='LBLs n=' +
                            str(n) if use_labels else 'GOLD n=' + str(n),
                        )
                if hasattr(instance, 'output_sent') and has_outframes:
                    # Precision and recall for frames
                    p, r, f = instance.score_frames(fes=False,
                                                    frames_type='outframes',
                                                    use_labels=use_labels)
                    eval_obj.add_metrics(
                        precision=p,
                        recall=r,
                        system=system_name,
                        corpus="GOLD frames",
                    )

                    # Precision and recall for frame elements
                    p, r, f = instance.score_frames(fes=True,
                                                    frames_type='outframes',
                                                    use_labels=use_labels)
                    eval_obj.add_metrics(
                        precision=p,
                        recall=r,
                        system=system_name,
                        corpus="GOLD fes",
                    )

                # Parse output sentences for syntactic evaluation. The
                # 100 token limit is intended for the Stanford parser.
                if hasattr(instance, 'output_sent') and \
                        len(instance.output_sent.tokens) <= 100:
                    output_sents.append(instance.output_sent)

                # Write the output to a file
                if output_path is not None:
                    outf.write(instance.get_display_string())
#            print
            if output_path is not None:
                outf.close()

            # Parse-based evaluations
            try:
                parse_types = ['dparse']
                if has_outtrees:
                    parse_types.append('outtree')

                # Get annotations. Only run RASP if the inputs have RASP
                # annotations since it's slow
                annotations.annotate(output_sents, 'Stanford')
                if has_rasp:
                    annotations.annotate(output_sents, 'Rasp')
                    parse_types.append('relgraph')

                # Add dependency results to evaluations
                for i, instance in enumerate(eval_instances):
                    if skip_failed and not hasattr(instance, 'output_sent'):
                        print "WARNING: Skipping failed instance",
                        print instance.idx, "again"
                        continue

                    for parse_type in parse_types:
                        for use_labels in set([False]) | set([has_labels]):
                            name = ('LBLs ' if use_labels else 'GOLD ') + \
                                parse_type
                            p, r, f = instance.score_dependencies(
                                parse_type=parse_type, use_labels=use_labels)
                            eval_obj.add_metrics(
                                precision=p,
                                recall=r,
                                system=system_name,
                                corpus=name,
                                _failed=int(not instance.has_output_parses(
                                    parse_type=parse_type)))
            except OSError:
                print "Skipping parser evaluations"

        print eval_obj.title
        print eval_obj.table(skip_single_keys=True)
        if eval_path is not None and debug_idxs is None:
            eval_filename = ''.join(
                (eval_path, '/', '_'.join((partition, 'under', system_name)),
                 overwriting_str if overwriting_str is not None else '', '_',
                 decoder, '.eval'))
            eval_obj.save(eval_filename, append=False)
Esempio n. 4
0
            pred_pca_val = logreg.predict_proba(X_scaled_pca_val)
            fpr, tpr, thresholds = metrics.roc_curve(y_val, pred_pca_val[:, 0])
            roc_auc = metrics.auc(fpr, tpr)
            models.append([
                p, c, X_scaled_pca_val.shape, scaler, pca, logreg,
                pred_pca_val[:, 0]
            ])
            for top in tops:
                t1 = np.argsort(pred_pca_val[:, 0])[0:top]
                y_pred_val = logreg.predict(X_scaled_pca_val)
                y_pred_val[t1] = 1
                result_val_pred = []
                for i in xrange(len(ks_val)):
                    if y_pred_val[i] == 1:
                        result_val_pred.append(ks_val[i])
                E1 = evaluation.Evaluation(result_val_pred, result_val_truth)
                f1 = E1.F1()
                i1 = E1.intersection()
                p1 = E1.precision()
                r1 = E1.recall()
                print 'pca: %f, c: %e, top: %d, auc: %f, f1: %f, i1: %d, p1: %f, r1: %f' % (
                    p, c, top, roc_auc, f1, i1, p1, r1)
                f.write(
                    'pca: %f, c: %e, top: %d, auc: %f, f1: %f, i1: %d, p1: %f, r1: %f\n'
                    % (p, c, top, roc_auc, f1, i1, p1, r1))

models_file = path + '/models/logreg1.pkl'
with open(models_file, 'wb') as fp:
    pickle.dump(models, fp, protocol=2)

### Tunning GBDT