def run_make_submission(settings, targets_and_pipelines, split_ratio): pool = Pool(settings.N_jobs) for i, (target, pipeline, feature_masks, classifier, classifier_name) in enumerate(targets_and_pipelines): for j, feature_mask in enumerate(feature_masks): progress_str = 'T=%d/%d M=%d/%d' % (i+1, len(targets_and_pipelines), j+1, len(feature_masks)) pool.apply_async(make_submission_predictions, [settings, target, pipeline, classifier, classifier_name], {'feature_mask': feature_mask, 'progress_str': progress_str, 'quiet': True}) pool.close() pool.join() guesses = ['clip,preictal'] num_masks = None classifier_names = [] for target, pipeline, feature_masks, classifier, classifier_name in targets_and_pipelines: classifier_names.append(classifier_name) if num_masks is None: num_masks = len(feature_masks) else: assert num_masks == len(feature_masks) test_predictions = [] for feature_mask in feature_masks: data = make_submission_predictions(settings, target, pipeline, classifier, classifier_name, feature_mask=feature_mask) test_predictions.append(data.mean_predictions) predictions = np.mean(test_predictions, axis=0) guesses += make_csv_for_target_predictions(target, predictions) output = '\n'.join(guesses) write_submission_file(settings, output, 'ensemble n=%d split_ratio=%s' % (num_masks, split_ratio), None, str(classifier_names), targets_and_pipelines)
def run_make_submission(settings, targets, classifiers, pipelines): print 'Submissions task' print 'Targets', ', '.join(targets) print 'Pipelines', ', '.join([p.get_name() for p in pipelines]) print 'Classifiers', ', '.join([c[1] for c in classifiers]) run_prepare_data_for_submission(settings, targets, pipelines) pool = Pool(settings.N_jobs) for pipeline in pipelines: for classifier, classifier_name in classifiers: for target in targets: pool.apply_async( make_submission_csv, [settings, target, pipeline, classifier, classifier_name]) pool.close() pool.join() use_median_submissions = False for pipeline in pipelines: for classifier, classifier_name in classifiers: guesses_mean = ['clip,preictal'] guesses_median = ['clip,preictal'] for target in targets: print 'Target %s pipeline %s classifier %s' % ( target, pipeline.get_name(), classifier_name) predictions_mean, predictions_median = make_submission_csv( settings, target, pipeline, classifier, classifier_name) guesses_mean += predictions_mean guesses_median += predictions_median mean_output = '\n'.join(guesses_mean) median_output = '\n'.join(guesses_median) out = [] if use_median_submissions and mean_output != median_output: out.append((mean_output, 'mean')) out.append((median_output, 'median')) else: out.append((mean_output, None)) for guesses, name in out: write_submission_file(settings, guesses, name, pipeline, classifier_name)
def run_make_submission(settings, targets_and_pipelines, classifier, classifier_name): pool = Pool(settings.N_jobs) for i, (target, pipeline, feature_masks) in enumerate(targets_and_pipelines): for j, feature_mask in enumerate(feature_masks): progress_str = 'T=%d/%d M=%d/%d' % ( i + 1, len(targets_and_pipelines), j + 1, len(feature_masks)) pool.apply_async( make_submission_predictions, [settings, target, pipeline, classifier, classifier_name], { 'feature_mask': feature_mask, 'quiet': True, 'progress_str': progress_str }) pool.close() pool.join() guesses = ['clip,preictal'] for target, pipeline, feature_masks in targets_and_pipelines: test_predictions = [] for feature_mask in feature_masks: data = make_submission_predictions(settings, target, pipeline, classifier, classifier_name, feature_mask=feature_mask) test_predictions.append(data.mean_predictions) predictions = np.mean(test_predictions, axis=0) guesses += make_csv_for_target_predictions(target, predictions) output = '\n'.join(guesses) submission_targets_and_pipelines = [ (target, pipeline, feature_masks, classifier, classifier_name) for target, pipeline, feature_masks in targets_and_pipelines ] write_submission_file(settings, output, None, None, classifier_name, submission_targets_and_pipelines)
def run_make_submission(settings, targets, classifiers, pipelines): print 'Submissions task' print 'Targets', ', '.join(targets) print 'Pipelines', ', '.join([p.get_name() for p in pipelines]) print 'Classifiers', ', '.join([c[1] for c in classifiers]) run_prepare_data_for_submission(settings, targets, pipelines) pool = Pool(settings.N_jobs) for pipeline in pipelines: for classifier, classifier_name in classifiers: for target in targets: pool.apply_async(make_submission_csv, [settings, target, pipeline, classifier, classifier_name]) pool.close() pool.join() use_median_submissions = False for pipeline in pipelines: for classifier, classifier_name in classifiers: guesses_mean = ['clip,preictal'] guesses_median = ['clip,preictal'] for target in targets: print 'Target %s pipeline %s classifier %s' % (target, pipeline.get_name(), classifier_name) predictions_mean, predictions_median = make_submission_csv(settings, target, pipeline, classifier, classifier_name) guesses_mean += predictions_mean guesses_median += predictions_median mean_output = '\n'.join(guesses_mean) median_output = '\n'.join(guesses_median) out = [] if use_median_submissions and mean_output != median_output: out.append((mean_output, 'mean')) out.append((median_output, 'median')) else: out.append((mean_output, None)) for guesses, name in out: write_submission_file(settings, guesses, name, pipeline, classifier_name)
def run_make_submission(settings, targets_and_pipelines, classifier, classifier_name): pool = Pool(settings.N_jobs) for i, (target, pipeline, feature_masks) in enumerate(targets_and_pipelines): for j, feature_mask in enumerate(feature_masks): progress_str = 'T=%d/%d M=%d/%d' % (i+1, len(targets_and_pipelines), j+1, len(feature_masks)) pool.apply_async(make_submission_predictions, [settings, target, pipeline, classifier, classifier_name], {'feature_mask': feature_mask, 'quiet': True, 'progress_str': progress_str}) pool.close() pool.join() guesses = ['clip,preictal'] for target, pipeline, feature_masks in targets_and_pipelines: test_predictions = [] for feature_mask in feature_masks: data = make_submission_predictions(settings, target, pipeline, classifier, classifier_name, feature_mask=feature_mask) test_predictions.append(data.mean_predictions) predictions = np.mean(test_predictions, axis=0) guesses += make_csv_for_target_predictions(target, predictions) output = '\n'.join(guesses) submission_targets_and_pipelines = [(target, pipeline, feature_masks, classifier, classifier_name) for target, pipeline, feature_masks in targets_and_pipelines] write_submission_file(settings, output, None, None, classifier_name, submission_targets_and_pipelines)