def execute(): in_fname = INFILE out_fname = INFILE + '.out.txt' # generate output filenames and open them (SPLIT_OUT can be a list of any length) temp = [[x, INFILE + '.' + x + '.txt'] for x in SPLIT_OUT] out_fnames = dict(temp) out_files = dict() for o in out_fnames: fn = out_fnames[o] f = open(fn, mode='w', encoding='utf-8') out_files[o] = f f.write("File created by SFMMinor.py while splitting out minor entries marked by {}\n\n".format(o)) with open(in_fname, encoding='utf-8') as infile: sfm_records = sfm.SFMRecordReader(infile) recs = list(sfm_records) # load entire file into memory entries, _entries_stripped = sfm.build_indexes(recs, exclude_fields=DONT_INDEX_IF) with open(out_fname, mode='w', encoding='utf-8') as outfile: outfile.write(sfm_records.header) for rec in recs: r = rec.as_lists() is_minor = rec.find_first(MINOR_MKRS) if is_minor: minlx = r[0][1].strip() report = "Probable minor entry {} identified due to link {}".format(minlx, str(is_minor)) #Strip it down to ASCII so the console can handle it report = report.encode('ascii', 'replace') #print(report) mn = is_minor[1].strip() matches = [main[2] for main in entries[mn]] if len(matches) > 1: print("ERROR: ambiguous link.") print(report) for main in matches: for bref in BACKREF_MKRS: found = main.find_values(bref) if minlx in found: #The following needs to be stripped down to ASCII #print(" Main entry found ({}); it mentions the minor entry here: {} {}".format(main.as_lists()[0], bref, str(found))) if not is_minor[0].endswith(bref): #The following needs to be stripped down to ASCII #print(" Updating the minor entry by appending {} to marker {}.".format(bref, is_minor[0])) is_minor[0] += bref # print(rec.as_string()) break s = rec.as_string() if SPLIT_OUT: found = rec.find_first(SPLIT_OUT) if found: f = out_files[found[0]] #The following needs to be stripped down to ASCII #print(" Removing entry {} from the main file since it contains field {}; saving it in {} instead".format(rec.as_lists()[0], found, f)) f.write(s) # write the minor entry out to the appropriate separate file s = '' outfile.write(s) print("Done writing to file {}".format(out_fname)) for o in out_files: out_files[o].close() print("Done writing to file {}".format(out_fnames[o]))
def execute(): in_fname = INFILE out_fname = INFILE + '.out.txt' # generate output filenames and open them (SPLIT_OUT can be a list of any length) temp = [[x, INFILE + '.' + x + '.txt'] for x in SPLIT_OUT] out_fnames = dict(temp) out_files = dict() for o in out_fnames: fn = out_fnames[o] f = open(fn, mode='w', encoding='utf-8') out_files[o] = f f.write( "File created by SFMMinor.py while splitting out minor entries marked by {}\n\n" .format(o)) with open(in_fname, encoding='utf-8') as infile: sfm_records = sfm.SFMRecordReader(infile) recs = list(sfm_records) # load entire file into memory entries, _entries_stripped = sfm.build_indexes( recs, exclude_fields=DONT_INDEX_IF) with open(out_fname, mode='w', encoding='utf-8') as outfile: outfile.write(sfm_records.header) for rec in recs: r = rec.as_lists() is_minor = rec.find_first(MINOR_MKRS) if is_minor: minlx = r[0][1].strip() report = "Probable minor entry {} identified due to link {}".format( minlx, str(is_minor)) #Strip it down to ASCII so the console can handle it report = report.encode('ascii', 'replace') #print(report) mn = is_minor[1].strip() matches = [main[2] for main in entries[mn]] if len(matches) > 1: print("ERROR: ambiguous link.") print(report) for main in matches: for bref in BACKREF_MKRS: found = main.find_values(bref) if minlx in found: #The following needs to be stripped down to ASCII #print(" Main entry found ({}); it mentions the minor entry here: {} {}".format(main.as_lists()[0], bref, str(found))) if not is_minor[0].endswith(bref): #The following needs to be stripped down to ASCII #print(" Updating the minor entry by appending {} to marker {}.".format(bref, is_minor[0])) is_minor[0] += bref # print(rec.as_string()) break s = rec.as_string() if SPLIT_OUT: found = rec.find_first(SPLIT_OUT) if found: f = out_files[found[0]] #The following needs to be stripped down to ASCII #print(" Removing entry {} from the main file since it contains field {}; saving it in {} instead".format(rec.as_lists()[0], found, f)) f.write( s ) # write the minor entry out to the appropriate separate file s = '' outfile.write(s) print("Done writing to file {}".format(out_fname)) for o in out_files: out_files[o].close() print("Done writing to file {}".format(out_fnames[o]))