def get_missing_records(): canon_file, files = get_files() canon_xml = my_xml.to_set(my_xml.from_file(canon_file)) files_xml = set() for file in files: files_xml |= my_xml.to_set(my_xml.from_file(canon_file)) return files_xml - canon_xml
def create_upload_file(directory, filename): xml = my_xml.from_file(filename) if xml is None: return 1 canon = set() if os.path.isfile(get_canon_path(directory)): canon |= my_xml.to_set(my_xml.from_file(get_canon_path(directory))) output, canon = remove_duplicates(my_xml.to_set(xml), canon) my_xml.print_as_xml(output, get_output_path(filename), 'wb') my_xml.print_as_xml(canon, get_canon_path(directory), 'wb') return 0
def main(): files = get_test_files() for file in files: xml = my_xml.from_file(file) purchases = xml.findall("Purchase") pos = set() for purchase in purchases: po = my_xml.get_value(purchase, "PurchaseOrderNumber") if po in pos: xml.getroot().remove(purchase) else: pos.add(po) my_xml.print_as_xml(my_xml.to_set(purchases), "clean-" + file, "wb")
def merge(directory): files = getunmergedfiles() files = getunmergedfiles(directory) with tqdm(total=len(files), unit_scale=True, desc='Merging files') as progress_bar: for (filename) in getunmergedfiles(): if "merged" in filename: continue # the agency abbreviation is the third section of a hyphen-separated filename agency = filename.split('-')[2] agencyset = set() if agency in agencies: agencyset = agency_collection[agency] agencyset |= my_xml.to_set(my_xml.from_file(filename)) agency_collection[agency] = agencyset progress_bar.update(1) for (agency) in agencies: print(agency) my_xml.print_as_xml(agency_collection[agency], "upload-" + agencies[int(agency)] + "-" + time.strftime("%y%m%d") + "-merged.xml", "wb")
def build_canon(directory): elements = set() for (filename) in get_xml_files(directory): elements |= my_xml.to_set(my_xml.from_file(filename)) if elements != set(): my_xml.print_as_xml(elements, get_canon_path(directory), 'wb')