예제 #1
0
def main():

    usage = "%prog results_file"
    parser = OptionParser(usage=usage)

    (options, args) = parser.parse_args()
    results_file = args[0]

    results = pd.DataFrame(columns=('masked', 'test', 'valid', 'dir'))

    lines = fh.read_text(results_file)

    for i, line in enumerate(lines):

        if i > 0:
            parts = line.split()
            date = parts[0]
            time = parts[1]
            name = parts[2]
            masked = float(parts[3])
            test = float(parts[4])

            name_parts = name.split('_')
            run_num = int(name_parts[-1])

            if run_num < 41:

                if test > 0:
                    valid = parts[5][1:-1]
                else:
                    valid = masked

                #results.loc[run_num, 'iteration'] = run_num
                results.loc[run_num, 'masked'] = masked
                results.loc[run_num, 'test'] = test
                results.loc[run_num, 'valid'] = valid
                results.loc[run_num, 'dir'] = name

    results.to_csv(results_file + 'results.csv', columns=results.columns)

    sorted = results.sort('masked')
    print sorted

    print "best by masked"
    print sorted.values[-1, :]

    print "best by valid"
    sorted = results.sort('valid')
    print sorted.values[-1, :]
예제 #2
0
파일: stanford.py 프로젝트: dallascard/guac
def parse_xml_output(xml_filename):
    raw_xml = fh.read_text(xml_filename)
    sentences = []
    sentiments = []
    dependencies = []
    dependency_tuples = []
    entities = []
    coref = []
    groups = []
    detailed_groups = []
    coref_indices = []
    coref_heads = []
    line_index = 0
    sentence_id = 0
    while line_index < len(raw_xml):
        line = raw_xml[line_index].lstrip()
        match = re.search('<sentence id="(\d+)" line="\d+" sentimentValue="(\d+)" sentiment="(.*)"', line)
        if match is not None:
            line_index, sentence, sent_dependencies, sent_dependency_tuples,\
                sent_entities = parse_sentence(raw_xml, line_index)
            sentences.append(sentence)
            sentiments.append([match.group(3)])
            dependencies.append(sent_dependencies)
            dependency_tuples.append(sent_dependency_tuples)
            entities.append(sent_entities)
        if re.search('<coreference>', line) is not None:
            line_index, coref, groups, detailed_groups, coref_indices,\
                coref_heads = parse_coref(raw_xml, line_index, len(sentences))

        line_index += 1

    # deal with the cases where there are no entities
    if len(coref) == 0:
        for s in sentences:
            coref.append([])

    return sentences, sentiments, dependencies, dependency_tuples, entities, coref, groups, detailed_groups,\
        coref_indices, coref_heads
예제 #3
0
def test_over_time(project_dir, subset, config_file, model_type, field, train_start, train_end, test_start, test_end, n_train=None, n_calib=0, penalty='l2', suffix='', loss='log', objective='f1', do_ensemble=True, dh=300, label='label', intercept=True, n_dev_folds=5, average='micro', seed=None, alpha_min=0.01, alpha_max=1000.0, n_alphas=8, sample_labels=False, group_identical=False, annotated_subset=None, nonlinearity='tanh', init_lr=1e-2, min_epochs=2, max_epochs=50, patience=5, tol=1e-4, list_size=1, repeats=1, oracle=False, lower=None, interactive=False, stoplist_file=None, cshift=False, n_cshift=None, do_cfm=True, do_platt=True, dropout=0.0, min_test=None, test_prop=None, verbose=False):
    # Just run a regular model, one per year, training on the past, and save the reults

    if seed is not None:
        seed = int(seed)
        np.random.seed(seed)

    log = {
        'project': project_dir,
        'subset': subset,
        'config_file': config_file,
        'model_type': model_type,
        'field': field,
        'train_start': train_start,
        'train_end': train_end,
        'test_start': test_start,
        'test_end': test_end,
        'n_train': n_train,
        'n_calib': n_calib,
        'penalty': penalty,
        'cshift': cshift,
        'n_cshift': n_cshift,
        'suffix': suffix,
        'loss': loss,
        'objective': objective,
        'do_ensemble': do_ensemble,
        'dh': dh,
        'label': label,
        'intercept': intercept,
        'n_dev_folds': n_dev_folds,
        'average': average,
        'seed': seed,
        'alpha_min': alpha_min,
        'alpha_max': alpha_max,
        'n_alphas': n_alphas,
        'sample_labels': sample_labels,
        'group_identical': group_identical,
        'annotated_subset': annotated_subset,
        'nonlinearity': nonlinearity,
        'init_lr': init_lr,
        'min_epochs': min_epochs,
        'max_epochs': max_epochs,
        'patience': patience,
        'tol': tol,
        'interactive': interactive,
        'stoplist_file': stoplist_file,
        'list_size': list_size
    }

    model_basename = make_model_basename(log)

    # save the experiment parameters to a log file
    logfile = os.path.join(dirs.dir_logs(project_dir), model_basename + '.json')
    fh.makedirs(dirs.dir_logs(project_dir))

    fh.write_to_json(log, logfile)

    # load the features specified in the config file
    config = fh.read_json(config_file)
    feature_defs = []
    for f in config['feature_defs']:
        feature_defs.append(features.parse_feature_string(f))

    # load the file that contains metadata about each item
    metadata_file = os.path.join(dirs.dir_subset(project_dir, subset), 'metadata.csv')
    metadata = fh.read_csv_to_df(metadata_file)
    field_vals = list(set(metadata[field].values))
    field_vals.sort()
    print("Splitting data according to %s", field)
    print("Values:", field_vals)

    print("\nTesting on %s to %s" % (test_start, test_end))

    # first, split into training and non-train data based on the field of interest
    all_items = list(metadata.index)
    test_selector_all = (metadata[field] >= int(test_start)) & (metadata[field] <= int(test_end))
    test_subset_all = metadata[test_selector_all]
    test_items_all = test_subset_all.index.tolist()
    n_test_all = len(test_items_all)

    if min_test is not None:
        if n_test_all < min_test:
            print("Not enough test samples; exiting")
            return

    if train_end is None:
        if train_start is None:
            train_selector_all = metadata[field] < int(test_start)
        else:
            train_selector_all = (metadata[field] < int(test_start)) & (metadata[field] >= train_start)
    else:
        if train_start is None:
            train_selector_all = metadata[field] <= int(train_end)
        else:
            train_selector_all = (metadata[field] <= int(train_end)) & (metadata[field] >= train_start)

    train_subset_all = metadata[train_selector_all]
    train_items_all = list(train_subset_all.index)
    n_train_all = len(train_items_all)
    # only keep the items in the train and test sets
    all_items = train_items_all + test_items_all

    print("Train: %d, Test: %d (labeled and unlabeled)" % (n_train_all, n_test_all))

    # load all labels
    label_dir = dirs.dir_labels(project_dir, subset)
    labels_df = fh.read_csv_to_df(os.path.join(label_dir, label + '.csv'), index_col=0, header=0)
    labels_df = labels_df.loc[all_items]

    # if desired, attempt to learn weights for the training data using techniques for covariate shift
    if cshift:
        print("Training a classifier for covariate shift")
        # start by learning to discriminate train from non-train data
        # Label items based on whether they come from train or test
        train_test_labels = np.zeros((len(all_items), 2), dtype=int)
        train_test_labels[:n_train_all, 0] = 1
        train_test_labels[n_train_all:, 1] = 1
        if np.sum(train_test_labels[:, 0]) < np.sum(train_test_labels[:, 1]):
            cshift_pos_label = 0
        else:
            cshift_pos_label = 1
        train_test_labels_df = pd.DataFrame(train_test_labels, index=all_items, columns=[0, 1])

        if n_cshift is not None and len(all_items) >= n_cshift:
            print("Taking a random sample of %d items for reweighting" % n_cshift)
            #np.random.shuffle(all_items)
            cshift_items = np.random.choice(all_items, size=n_cshift, replace=False)
        else:
            print("Using all train items")
            cshift_items = all_items

        print(train_test_labels_df.loc[cshift_items].mean(axis=0))

        # create a cshift model using the same specifiction as our model below (e.g. LR/MLP, etc.)
        model_name = model_basename + '_' + str(test_start) + '-' + str(test_end) + 'cshift'
        model, dev_f1, dev_acc, dev_cal, dev_cal_overall = train.train_model_with_labels(project_dir, model_type, loss, model_name, subset, train_test_labels_df, feature_defs, items_to_use=cshift_items, penalty=penalty, alpha_min=alpha_min, alpha_max=alpha_max, n_alphas=n_alphas, intercept=intercept, n_dev_folds=n_dev_folds, save_model=True, do_ensemble=False, dh=dh, seed=seed, pos_label=cshift_pos_label, verbose=False)
        print("cshift results: %0.4f f1, %0.4f acc" % (dev_f1, dev_acc))

        #X_cshift, features_concat = predict.load_data(project_dir, model_name, subset, items_to_use=all_items)
        X_cshift, features_concat = predict.load_data(project_dir, model_name, subset, items_to_use=all_items)
        cshift_pred_probs = model.predict_probs(X_cshift)
        f_items = features_concat.get_items()
        assert len(f_items) == len(all_items)
        for i in range(len(all_items)):
            assert all_items[i] == f_items[i]
        cshift_pred_probs_df = pd.DataFrame(cshift_pred_probs, index=features_concat.get_items(), columns=range(2))

        # display the min and max probs
        print("Min: %0.6f" % cshift_pred_probs_df[1].values[:n_train_all].min())
        print("Mean: %0.6f" % cshift_pred_probs_df[1].values[:n_train_all].mean())
        print("Max: %0.6f" % cshift_pred_probs_df[1].values[:n_train_all].max())
        # HACK: need to prevent 0s in prob(y=0|x)
        p_train_values = cshift_pred_probs_df[0].values
        threshold = 0.01
        p_train_values[p_train_values < threshold] = threshold
        print("After thresholding")
        print("Min: %0.6f" % p_train_values[:n_train_all].min())
        print("Mean: %0.6f" % p_train_values[:n_train_all].mean())
        print("Max: %0.6f" % p_train_values[:n_train_all].max())

        # use the estimated probability of each item being a training item to compute item weights
        weights = n_train_all / float(n_test_all) * (1.0/p_train_values - 1)
        weights_df_all = pd.DataFrame(weights, index=all_items)
        # print a summary of the weights from just the training items
        print("Min weight: %0.4f" % weights[:n_train_all].min())
        print("Ave weight: %0.4f" % weights[:n_train_all].mean())
        print("Max weight: %0.4f" % weights[:n_train_all].max())
        # print a summary of all weights
        #print("Min weight: %0.4f" % weights.min())
        #print("Ave weight: %0.4f" % weights.mean())
        #print("Max weight: %0.4f" % weights.max())
        # create a data frame with this information
    else:
        weights_df_all = None

    # find the labeled items
    print("Subsetting items with labels")
    label_sums_df = labels_df.sum(axis=1)
    labeled_item_selector = label_sums_df > 0
    labels_df = labels_df[labeled_item_selector]
    n_labeled_items, n_classes = labels_df.shape
    print("%d labeled items" % n_labeled_items)
    labeled_items = set(labels_df.index)

    train_items_labeled = [i for i in train_items_all if i in labeled_items]

    test_items = [i for i in test_items_all if i in labeled_items]
    #n_train = len(train_items)
    n_test = len(test_items)

    for r in range(repeats):

        # set seed very explicily here to make sure experiments are comparable
        if seed is not None:
            seed += 1
            np.random.seed(seed)

        print("* Starting repetition %d *" % r)
        model_name = model_basename + '_' + str(test_start) + '-' + str(test_end) + '_' + str(r)
        if n_train is not None and len(train_items_labeled) >= n_train:
            np.random.shuffle(train_items_labeled)
            train_items = np.random.choice(train_items_labeled, size=n_train, replace=False)
        else:
            print("Using all train items")
            train_items = train_items_labeled
        n_train_r = len(train_items)

        # now, choose a calibration set
        if n_calib > 0 and n_test >= n_calib:
            np.random.shuffle(test_items)
            calib_items = np.random.choice(test_items, size=n_calib, replace=False)
        elif n_test < n_calib:
            print("Error: Only %d labeled test instances available" % n_test)
            calib_items = test_items
        else:
            calib_items = []

        if weights_df_all is not None:
            weights_df = weights_df_all[labeled_item_selector]
        else:
            weights_df = None

        print("Labeled train: %d, test: %d" % (n_train_r, n_test))

        # create a data frame to hold a summary of the results
        output_df = pd.DataFrame([], columns=['N', 'training data', 'test data', 'cal', 'estimate', 'MAE', '95lcl', '95ucl', 'contains_test'])

        test_labels_df = labels_df.loc[test_items]
        # do a fake adjustment of the test label proportions
        if test_prop is not None:
            test_prop = float(test_prop)
            test_label_values = test_labels_df.values
            test_label_props = test_label_values[:, 1] / (test_label_values[:, 1] + test_label_values[:, 0])
            order = list(np.argsort(test_label_props))

            true_prop = np.mean(test_label_props)
            if test_prop < true_prop:
                i = 0
                running = test_label_props[order[i]]
                new_test_items = [test_items[order[i]]]
                i += 1
                while (running / i) <= test_prop:
                    running += test_label_props[order[i]]
                    new_test_items.append(test_items[order[i]])
                    i += 1
                print("Taking %d test_items" % len(new_test_items))
                test_items = new_test_items[:]
            else:
                order.reverse()
                i = 0
                running = test_label_props[order[i]]
                new_test_items = [test_items[order[i]]]
                i += 1
                while (running / i) >= test_prop:
                    running += test_label_props[order[i]]
                    new_test_items.append(test_items[order[i]])
                    i += 1
                print("Taking %d test_items" % len(new_test_items))
                test_items = new_test_items[:]

            test_labels_df = labels_df.loc[test_items]
            test_label_values = test_labels_df.values
            test_label_props = test_label_values[:, 1] / (test_label_values[:, 1] + test_label_values[:, 0])
            print("New props = %0.3f" % np.mean(test_label_props))

        # if instructed, sample labels in proportion to annotations (to simulate having one label per item)
        if sample_labels:
            print("Sampling labels")
            # normalize the labels
            temp = labels_df.values / np.array(labels_df.values.sum(axis=1).reshape((n_labeled_items, 1)), dtype=float)
            samples = np.zeros([n_labeled_items, n_classes], dtype=int)
            for i in range(n_labeled_items):
                index = np.random.choice(np.arange(n_classes), size=1, p=temp[i, :])
                samples[i, index] = 1
            sampled_labels_df = pd.DataFrame(samples, index=labels_df.index, columns=labels_df.columns)
        else:
            sampled_labels_df = labels_df

        train_labels_df = sampled_labels_df.loc[train_items].copy()
        if n_calib > 0:
            calib_labels_df = sampled_labels_df.loc[calib_items].copy()
        else:
            calib_labels_df = None

        # get the true proportion of labels in the test OR non-training data (calibration and test combined)
        target_props, target_estimate, target_std = get_estimate_and_std(test_labels_df, use_n_annotations=True)
        output_df.loc['target'] = [n_test, 'test', 'test', 'n/a', target_estimate, 0, target_estimate - 2 * target_std, target_estimate + 2 * target_std, np.nan]

        # get the same estimate from training data
        train_props, train_estimate, train_std = get_estimate_and_std(train_labels_df, use_n_annotations=True)
        print("Train props:", train_props, train_estimate)
        train_rmse = np.abs(train_estimate - target_estimate)
        train_contains_test = target_estimate > train_estimate - 2 * train_std and target_estimate < train_estimate + 2 * train_std
        output_df.loc['train'] = [n_train_r, 'train', 'test', 'n/a', train_estimate, train_rmse, train_estimate - 2 * train_std, train_estimate + 2 * train_std, train_contains_test]

        # get the same estimate from training data
        if n_calib > 0:
            calib_props, calib_estimate, calib_std = get_estimate_and_std(calib_labels_df, use_n_annotations=True)
            # compute the error of this estimate
            calib_rmse = np.abs(calib_estimate - target_estimate)
            calib_contains_test = target_estimate > calib_estimate - 2 * calib_std and target_estimate < calib_estimate + 2 * calib_std
            output_df.loc['calib'] = [n_calib, 'calib', 'test', 'n/a', calib_estimate, calib_rmse, calib_estimate - 2 * calib_std, calib_estimate + 2 * calib_std, calib_contains_test]
        else:
            calib_estimate = 0.0
            calib_std = 1.0
            output_df.loc['calib'] = [n_calib, 'calib', 'test', 'n/a', np.nan, np.nan, np.nan, np.nan, np.nan]

        if train_estimate > 0.5:
            pos_label = 0
        else:
            pos_label = 1
        print("Using %d as the positive label" % pos_label)

        results_df = pd.DataFrame([], columns=['f1', 'acc', 'mae', 'estimated calibration'])

        # Now train a model on the training data, saving the calibration data for calibration

        if stoplist_file is not None:
            stoplist = fh.read_text(stoplist_file)
            stoplist = {s.strip() for s in stoplist}
            print(stoplist)
        else:
            stoplist = None

        print("Training a LR model")
        model, dev_f1, dev_acc, dev_cal_mae, dev_cal_est = train.train_model_with_labels(project_dir, model_type, 'log', model_name, subset, sampled_labels_df, feature_defs, weights_df=weights_df, items_to_use=train_items, penalty=penalty, alpha_min=alpha_min, alpha_max=alpha_max, n_alphas=n_alphas, intercept=intercept, objective=objective, n_dev_folds=n_dev_folds, do_ensemble=do_ensemble, dh=dh, seed=seed, pos_label=pos_label, vocab=None, group_identical=group_identical, nonlinearity=nonlinearity, init_lr=init_lr, min_epochs=min_epochs, max_epochs=max_epochs, patience=patience, do_cfm=do_cfm, do_platt=do_platt, lower=lower, stoplist=stoplist, dropout=dropout, verbose=verbose)
        results_df.loc['cross_val'] = [dev_f1, dev_acc, dev_cal_mae, dev_cal_est]

        X_test, features_concat = predict.load_data(project_dir, model_name, subset, items_to_use=test_items)
        test_predictions = model.predict(X_test)
        test_predictions_df = pd.DataFrame(test_predictions, index=features_concat.get_items(), columns=[label])
        test_pred_probs = model.predict_probs(X_test)
        _, n_labels = test_pred_probs.shape
        test_pred_probs_df = pd.DataFrame(test_pred_probs, index=features_concat.get_items(), columns=range(n_labels))

        f1_test, acc_test = evaluate_predictions.evaluate_predictions(test_labels_df, test_predictions_df, test_pred_probs_df, pos_label=pos_label, average=average)
        true_test_vector = np.argmax(test_labels_df.as_matrix(), axis=1)
        test_cal_est = evaluation.evaluate_calibration_rmse(true_test_vector, test_pred_probs_df.as_matrix(), min_bins=1, max_bins=1)
        test_cc_estimate, test_pcc_estimate = model.predict_proportions(X_test)

        test_cc_mae = np.mean(np.abs(test_cc_estimate[1] - target_estimate))
        test_pcc_mae = np.mean(np.abs(test_pcc_estimate[1] - target_estimate))

        results_df.loc['test'] = [f1_test, acc_test, test_pcc_mae, test_cal_est]

        output_df.loc['CC'] = [n_train_r, 'train', 'test', 'n/a', test_cc_estimate[1], test_cc_mae, np.nan, np.nan, np.nan]
        output_df.loc['PCC'] = [n_train_r, 'train', 'test', 'n/a', test_pcc_estimate[1], test_pcc_mae, np.nan, np.nan, np.nan]

        test_acc_estimate_internal, test_acc_ms_estimate_internal = model.predict_proportions(X_test, do_cfm=do_cfm)

        test_acc_rmse_internal = np.abs(test_acc_estimate_internal[1] - target_estimate)
        test_acc_ms_rmse_internal = np.abs(test_acc_ms_estimate_internal[1] - target_estimate)

        output_df.loc['ACC_internal'] = [n_train_r, 'train', 'test', 'n/a', test_acc_estimate_internal[1], test_acc_rmse_internal, np.nan, np.nan, np.nan]
        output_df.loc['MS_internal'] = [n_train_r, 'train', 'nontrain', 'predicted', test_acc_ms_estimate_internal[1], test_acc_ms_rmse_internal, np.nan, np.nan, np.nan]

        test_platt1_estimate, test_platt2_estimate = model.predict_proportions(X_test, do_platt=do_platt)

        test_platt1_rmse = np.abs(test_platt1_estimate[1] - target_estimate)
        test_platt2_rmse = np.abs(test_platt2_estimate[1] - target_estimate)

        output_df.loc['PCC_platt1'] = [n_train_r, 'train', 'test', 'n/a', test_platt1_estimate[1], test_platt1_rmse, np.nan, np.nan, np.nan]
        output_df.loc['PCC_platt2'] = [n_train_r, 'train', 'nontrain', 'predicted', test_platt2_estimate[1], test_platt2_rmse, np.nan, np.nan, np.nan]

        if n_calib > 0:
            cc_plus_cal_estimate = (test_cc_estimate[1] + calib_estimate) / 2.0
            pcc_plus_cal_estimate = (test_pcc_estimate[1] + calib_estimate) / 2.0
            cc_plus_cal_mae = np.mean(np.abs(cc_plus_cal_estimate - target_estimate))
            pcc_plus_cal_mae = np.mean(np.abs(pcc_plus_cal_estimate - target_estimate))

            #output_df.loc['CC_plus_cal'] = [n_train, 'train', 'test', 'n/a', cc_plus_cal_estimate, cc_plus_cal_mae, np.nan, np.nan, np.nan]
            output_df.loc['PCC_plus_cal'] = [n_train_r, 'train', 'test', 'n/a', pcc_plus_cal_estimate, pcc_plus_cal_mae, np.nan, np.nan, np.nan]

        results_df.to_csv(os.path.join(dirs.dir_models(project_dir), model_name, 'accuracy.csv'))
        output_df.to_csv(os.path.join(dirs.dir_models(project_dir), model_name, 'results.csv'))

        """
예제 #4
0
파일: stanford.py 프로젝트: dallascard/guac
def parse_xml_files(xml_filelist_filename, output_dir):
    filelist = fh.read_text(xml_filelist_filename)
    parsed_files = {}
    sentiments = {}
    dependencies = {}
    dependency_tuples = {}
    entities = {}
    coref = {}
    coref_entities = {}
    coref_heads = {}
    all_groups = {}
    jk_grams = {}
    amalgram_pairs = {}
    for file in filelist:
        file = file.rstrip('\n')
        print file
        # peel off both .txt and .xml
        basename = fh.get_basename_wo_ext(fh.get_basename_wo_ext(file))
        sentences, doc_sentiments, doc_dependencies, doc_dependency_tuples, doc_entities, doc_coref, groups, _,\
            doc_coref_entities, doc_coref_heads = parse_xml_output(file)
        parsed_files[basename] = sentences
        sentiments[basename] = doc_sentiments
        dependencies[basename] = doc_dependencies
        dependency_tuples[basename] = doc_dependency_tuples
        entities[basename] = doc_entities
        coref[basename] = doc_coref
        coref_entities[basename] = doc_coref_entities
        coref_heads[basename] = doc_coref_heads

        doc_jk_grams, doc_jk_indices = find_jk_grams(sentences)
        jk_grams[basename] = doc_jk_grams

        # output documents to amalgram format
        #amalgram_dir = os.path.join(dirs.data_amalgram_dir, 'input')
        #if not os.path.exists(amalgram_dir):
        #    os.makedirs(amalgram_dir)
        tagged_sents = ['\n'.join([t['word'] + '\t' + t['POS'] for t in s]) + '\n' for s in sentences]
        # save word/tag pairs for amalgram
        tagged_sents = [[(t['word'], t['POS']) for t in s] for s in sentences]
        amalgram_pairs[basename] = tagged_sents

        # uncomment for extracting story elements...
        parsed_dir = os.path.join(output_dir, 'parsed')
        if not os.path.exists(parsed_dir):
            os.makedirs(parsed_dir)
        parsed_filename = os.path.join(parsed_dir, basename + '.json')
        fh.write_to_json(sentences, parsed_filename, sort_keys=False)

    sentiment_filename = fh.make_filename(output_dir, 'sentiments', 'json')
    fh.write_to_json(sentiments, sentiment_filename, sort_keys=False)

    dependencies_filename = fh.make_filename(output_dir, 'dependency_tuple_ids', 'json')
    fh.write_to_json(dependency_tuples, dependencies_filename, sort_keys=False)

    coref_filename = fh.make_filename(output_dir, 'entities', 'json')
    fh.write_to_json(coref, coref_filename, sort_keys=False)

    jkgrams_filename = fh.make_filename(output_dir, 'jkgrams', 'json')
    fh.write_to_json(jk_grams, jkgrams_filename, sort_keys=False)

    coref_heads_filename = fh.make_filename(output_dir, 'coref_heads', 'json')
    fh.write_to_json(coref_heads, coref_heads_filename, sort_keys=False)

    amalgram_keys = amalgram_pairs.keys()
    amalgram_keys.sort()
    amalgram_data_file = os.path.join(dirs.data_amalgram_dir, 'input.txt')
    with codecs.open(amalgram_data_file, 'w', encoding='utf-8') as output_file:
        for k in amalgram_keys:
            sents = amalgram_pairs[k]
            for s in sents:
                for p in s:
                    output_file.write(p[0] + '\t' + p[1] + '\n')
                output_file.write('\n')

    for k in amalgram_keys:
        amalgram_data_file = os.path.join(dirs.data_amalgram_dir, k + '.txt')
        with codecs.open(amalgram_data_file, 'w', encoding='utf-8') as output_file:
            sents = amalgram_pairs[k]
            for s in sents:
                for p in s:
                    output_file.write(p[0] + '\t' + p[1] + '\n')
                output_file.write('\n')

    amalgram_index_file = os.path.join(dirs.data_amalgram_dir, 'index.txt')
    with codecs.open(amalgram_index_file, 'w', encoding='utf-8') as output_file:
        for k in amalgram_keys:
            sents = amalgram_pairs[k]
            for s in sents:
                output_file.write(k + '\n')

    #all_groups_filename = fh.make_filename(output_dir, 'all_groups', 'json')
    #fh.write_to_json(all_groups, all_groups_filename)

    return parsed_files, dependencies