Example #1
0
def main():
    # Replace with your path (obvs)
    parc_directory = "./../Data/parc30-conll/train-conll-foreval/"
    polnear_directory = "./../Data/polnear-conll/train-conll-foreval/"  # remember the folder structure should be ./../Data/corpus/corpus_subset/corpus_file1.xml

    one_sentence_total = 0
    multiple_sentences_total = 0

    i = 1
    for filename in os.listdir(
            polnear_directory
    ):  #specify which dir you want to run the code on (i.e. which corpus from above). Adjust on line 53 accordingly.
        if i % 50 == 0:
            # This bit just lets you know where you are (prints some stuff every 100 files)
            print(filename)
            print('one sentence:', one_sentence_total, 'multiple sentence:',
                  multiple_sentences_total)
        i += 1
        df = import_attribution_doc(polnear_directory + filename)
        if df["attribution"][0] != 0:
            atts = extract_attributions(df)
            att_spans = extract_attribution_spans(atts)
            one_sentence, multiple_sentences = count_span_sentence_overlaps(
                df, att_spans)
            one_sentence_total += one_sentence
            multiple_sentences_total += multiple_sentences
    print()
    print('one sentence:', one_sentence_total)
    print('multiple sentence:', multiple_sentences_total)
def main():
    # Replace with your path (obvs)
    parc_directory = "./../Data/parc30-conll/train-conll-foreval/"
    polnear_directory = "../Data/polnear-conll/polnear-conll/train-conll-foreval/"

    one_sentence_total = 0
    multiple_sentences_total = 0

    i = 1
    for filename in os.listdir(polnear_directory):
        if i % 50 == 0:
            # This bit just lets you know where you are (prints some stuff every 100 files)
            print(filename)
            print('one sentence:', one_sentence_total, 'multiple sentence:',
                  multiple_sentences_total)
        i += 1
        df = import_attribution_doc(polnear_directory + filename)
        if df["attribution"][0] != 0:
            atts = extract_attributions(df)
            att_spans = extract_attribution_spans(atts)
            one_sentence, multiple_sentences = count_span_sentence_overlaps(
                df, att_spans)
            one_sentence_total += one_sentence
            multiple_sentences_total += multiple_sentences
    print()
    print('one sentence:', one_sentence_total)
    print('multiple sentence:', multiple_sentences_total)