Beispiel #1
0
def get_missing_records():
    canon_file, files = get_files()
    canon_xml = my_xml.to_set(my_xml.from_file(canon_file))
    files_xml = set()
    for file in files:
        files_xml |= my_xml.to_set(my_xml.from_file(canon_file))
    return files_xml - canon_xml
Beispiel #2
0
def create_upload_file(directory, filename):
    xml = my_xml.from_file(filename)
    if xml is None:
        return 1
    canon = set()
    if os.path.isfile(get_canon_path(directory)):
        canon |= my_xml.to_set(my_xml.from_file(get_canon_path(directory)))
    output, canon = remove_duplicates(my_xml.to_set(xml), canon)
    my_xml.print_as_xml(output, get_output_path(filename), 'wb')
    my_xml.print_as_xml(canon, get_canon_path(directory), 'wb')
    return 0
Beispiel #3
0
def create_upload_file(directory, filename):
    xml = my_xml.from_file(filename)
    if xml is None:
        return 1
    canon = set()
    if os.path.isfile(get_canon_path(directory)):
        canon |= my_xml.to_set(my_xml.from_file(get_canon_path(directory)))
    output, canon = remove_duplicates(my_xml.to_set(xml), canon)
    my_xml.print_as_xml(output, get_output_path(filename), 'wb')
    my_xml.print_as_xml(canon, get_canon_path(directory), 'wb')
    return 0
Beispiel #4
0
def main():
    files = get_test_files()
    for file in files:
        xml = my_xml.from_file(file)
        purchases = xml.findall("Purchase")
        pos = set()
        for purchase in purchases:
            po = my_xml.get_value(purchase, "PurchaseOrderNumber")
            if po in pos:
                xml.getroot().remove(purchase)
            else:
                pos.add(po)
        my_xml.print_as_xml(my_xml.to_set(purchases), "clean-" + file, "wb")
Beispiel #5
0
def main():
    files = get_test_files()
    for file in files:
        xml = my_xml.from_file(file)
        purchases = xml.findall("Purchase")
        pos = set()
        for purchase in purchases:
            po = my_xml.get_value(purchase, "PurchaseOrderNumber")
            if po in pos:
                xml.getroot().remove(purchase)
            else:
                pos.add(po)
        my_xml.print_as_xml(my_xml.to_set(purchases), "clean-" + file, "wb")
Beispiel #6
0
def merge(directory):
    files = getunmergedfiles()
    files = getunmergedfiles(directory)
    with tqdm(total=len(files), unit_scale=True, desc='Merging files') as progress_bar:
        for (filename) in getunmergedfiles():
            if "merged" in filename:
                continue
            # the agency abbreviation is the third section of a hyphen-separated filename
            agency = filename.split('-')[2]
            agencyset = set()
            if agency in agencies:
                agencyset = agency_collection[agency]
            agencyset |= my_xml.to_set(my_xml.from_file(filename))
            agency_collection[agency] = agencyset
            progress_bar.update(1)
    for (agency) in agencies:
        print(agency)
        my_xml.print_as_xml(agency_collection[agency], "upload-" + agencies[int(agency)] + "-" + time.strftime("%y%m%d") +
                            "-merged.xml", "wb")
Beispiel #7
0
def build_canon(directory):
    elements = set()
    for (filename) in get_xml_files(directory):
        elements |= my_xml.to_set(my_xml.from_file(filename))
    if elements != set():
        my_xml.print_as_xml(elements, get_canon_path(directory), 'wb')
Beispiel #8
0
def build_canon(directory):
    elements = set()
    for (filename) in get_xml_files(directory):
        elements |= my_xml.to_set(my_xml.from_file(filename))
    if elements != set():
        my_xml.print_as_xml(elements, get_canon_path(directory), 'wb')