def main(): # Replace with your path (obvs) parc_directory = "./../Data/parc30-conll/train-conll-foreval/" polnear_directory = "./../Data/polnear-conll/train-conll-foreval/" # remember the folder structure should be ./../Data/corpus/corpus_subset/corpus_file1.xml one_sentence_total = 0 multiple_sentences_total = 0 i = 1 for filename in os.listdir( polnear_directory ): #specify which dir you want to run the code on (i.e. which corpus from above). Adjust on line 53 accordingly. if i % 50 == 0: # This bit just lets you know where you are (prints some stuff every 100 files) print(filename) print('one sentence:', one_sentence_total, 'multiple sentence:', multiple_sentences_total) i += 1 df = import_attribution_doc(polnear_directory + filename) if df["attribution"][0] != 0: atts = extract_attributions(df) att_spans = extract_attribution_spans(atts) one_sentence, multiple_sentences = count_span_sentence_overlaps( df, att_spans) one_sentence_total += one_sentence multiple_sentences_total += multiple_sentences print() print('one sentence:', one_sentence_total) print('multiple sentence:', multiple_sentences_total)
def main(): # Replace with your path (obvs) parc_directory = "./../Data/parc30-conll/train-conll-foreval/" polnear_directory = "../Data/polnear-conll/polnear-conll/train-conll-foreval/" one_sentence_total = 0 multiple_sentences_total = 0 i = 1 for filename in os.listdir(polnear_directory): if i % 50 == 0: # This bit just lets you know where you are (prints some stuff every 100 files) print(filename) print('one sentence:', one_sentence_total, 'multiple sentence:', multiple_sentences_total) i += 1 df = import_attribution_doc(polnear_directory + filename) if df["attribution"][0] != 0: atts = extract_attributions(df) att_spans = extract_attribution_spans(atts) one_sentence, multiple_sentences = count_span_sentence_overlaps( df, att_spans) one_sentence_total += one_sentence multiple_sentences_total += multiple_sentences print() print('one sentence:', one_sentence_total) print('multiple sentence:', multiple_sentences_total)