Example #1
0
def ParseGHGInventoryFile(data_file, uid_mapping_file, sep1=None):
    """
    Split each data row to  list of strings, delimeters are white
    space  characters by default. The  data  file  can  contain  
    A) Line comment starting with  '#' (for the whole file) and 
    B)  comment for  the time  series in  front of  it
    starting and  ending with '#'.  The  so called list comprehension
    reads the  data file, filters  out line with single  '#', then  separates 
    the  times series  from the data comment and  then splits the time series  into a list
    of strings. The result is a list of lists (datalss) of data series (datals)
    including their UID's to mapped to match current CRFREporter xml.
    """
    (uid340set, uiddict340to500) = Create340to500UIDMapping(uid_mapping_file)
    f = open(data_file)
    datalss = [
        x.rpartition('#')[2].split(sep=sep1) for x in f.readlines()
        if x.count('#') != 1
    ]
    #Remove empty lines (result from readlines == [])
    datalss = [x for x in datalss if x != []]
    for datals in datalss:
        uid = datals[0]
        uid = uid.replace(' ', '')
        uid_stripped = uid.strip('{}')
        #Some UIDs have changed from CRFReporter version 3.4.0 to 5.0.0
        uid_changed = MapUID340to500(uid_stripped, uid340set, uiddict340to500)
        datals[0] = uid_changed
    f.close()
    return datalss
Example #2
0
def InsertAllNKComments(t, dirfilels, uidmapping):
    (uid340set, uiddict340to500) = Create340to500UIDMapping(uidmapping)
    not_found_uid_ls = []
    start_year = crfxmlconstants.lulu_start_year
    for file in dirfilels:
        f = open(file)
        print("Reading file", file)
        if os.path.basename(file).startswith('CKP'):
            start_year = crfxmlconstants.kp_start_year
        else:
            start_year = crfxmlconstants.lulu_start_year
        #datals = f.readlines()
        #datals = [x.strip('\n') for x in datals]
        #Split comments using ';' separator
        print("Start year", start_year)
        datals = [
            x.rpartition('#')[2].split(';') for x in f.readlines()
            if x.count('#') != 1
        ]
        f.close()
        #datals = [x.split(';') for x in datals]
        counter = 1
        for nkcomments_ls in datals:
            if len(nkcomments_ls) == 0:
                print("Found empty line")
            else:
                #The first string in the list is the uid
                uid = nkcomments_ls.pop(0)
                #print(nkcomments_ls)
                #The old reporter wrapped uid in {}, the new one does not
                uid_new = uid.strip('{}')
                #Some UIDs have changed from CRFReporter version 3.4.0 to 5.0.0
                uid_changed = MapUID340to500(uid_new, uid340set,
                                             uiddict340to500)
                if uid_changed != uid_new:
                    print("UID changed:", uid_new, "-->", uid_changed)
                    uid_new = uid_changed
                InsertNKComment(uid_new, t, nkcomments_ls, counter,
                                not_found_uid_ls, start_year)
                counter = counter + 1
Example #3
0
        print("No CRFReporter 340 to 500 UID mapping file")
        quit()
    activity_file = options.f5
    start_year = lulu_start_year
    if os.path.basename(activity_file).startswith('KP'):
        start_year = kp_start_year
    elif os.path.basename(activity_file).startswith('LU'):
        start_year = lulu_start_year
    else:
        print("Cannot decide if KP LULUCF or LULUCF file",
              activity_file,
              file=sys.stderr)
        quit()

    print("Summary for", activity_file, "begins")
    (uid340set, uiddict340to500) = Create340to500UIDMapping(options.f6)

    #Parse xml tree
    print("Parsing Party Profile xml from:", options.f1)
    t = ET()
    t.parse(options.f1)
    #Find all varibles once
    it = t.iter('variable')
    variablels = [e for e in it if e.tag == 'variable']
    print("Reading GHG Inventory files")
    dirfilels = glob.glob(options.f2)
    print("Inserting GHG files into a dictionary")
    ghg_dict = CreateGHGDictionary(dirfilels)
    print("Reading UID files for Activities and Regions:", options.f5)

    #Check data available
Example #4
0
def main():
    #---------------------------------The main program begins--------------------------------------------------
    #Command line generator
    parser = OP()
    parser.add_option(
        "-u",
        "--uid",
        dest="f1",
        help="Create '#' separated text file for each UID for excel")
    parser.add_option("-p",
                      "--pxml",
                      dest="f2",
                      help="Read CRFReporter Party Profile xml file")
    parser.add_option(
        "-x",
        "--xml",
        dest="f3",
        help="Write new Party profile populated with inventory results")
    parser.add_option("-c",
                      "--csv",
                      dest="f4",
                      help="Read GHG inventory csv files")
    parser.add_option("-a",
                      "--all",
                      action="store_true",
                      dest="f5",
                      default=False,
                      help="Print all UID identifiers")
    parser.add_option("-m",
                      "--map",
                      dest="f6",
                      help="CRFReporter 3.0.0 --> 5.0.0 UID mapping file")
    parser.add_option("-o",
                      "--check",
                      dest="f7",
                      help="Check inventory and quit")
    parser.add_option("-e",
                      "--false",
                      dest="f8",
                      help="Check for erroneus input and quit")
    parser.add_option(
        "-s",
        "--scen",
        action="store_true",
        dest="f9",
        default=False,
        help="Create '#' separated UID text file for excel and quit")
    parser.add_option("-y",
                      "--year",
                      dest="f10",
                      help="Current inventory year")
    parser.add_option("--oldxml",
                      dest="f11",
                      help="Check if UID exists in older CRFReporter version")
    (options, args) = parser.parse_args()

    if options.f2 is None:
        print("No input Reporter Party Profile XML data file")
        quit()
    #Generate UID file for those who make inventory
    if not os.path.isfile(user_information_file):
        print("Missing file", user_information_file,
              "mapping ghg inventory files to file owners")
        quit()

    dictionary = CreateUserInformation(user_information_file)

    #Print all UID identifiers (do not filter with LULU/KP keywords
    write_all = False
    if options.f5 is True:
        write_all = True

    write_scen = False
    if options.f9 is True:
        write_scen = True

    if options.f10 is None:
        print("Missing current inventory year")
        quit()

    current_inventory_year = int(options.f10)

    #Parse xml tree
    print("Parsing Party Profile xml from:", options.f2)
    t = ET()
    t.parse(options.f2)

    if options.f4 is None and options.f1:
        print("Generating UID file:", options.f1)
        f = open(options.f1, 'w')
        it = t.iter('variable')
        variablels = [e for e in it if e.tag == 'variable']
        variablels.sort(key=SortKey)
        WriteHeader(f, lulu_start_year, current_inventory_year)
        for x in variablels:
            WriteVariables(f, x, dictionary, write_all)
        f.close()
        print("Done")
        quit()
    #-------Optional checking of inventory---------------
    if options.f7 is not None:
        print("Checking inventory to file", options.f7)
        f = open(options.f7, 'w')
        it = t.iter('variable')
        variablels = [e for e in it if e.tag == 'variable']
        variablels.sort(key=SortKey)
        WriteHeader(f, lulu_start_year, 2014)
        for x in variablels:
            WriteEmptyAndFalseVariables(f, x)
        f.close()
        print("Done")
        quit()

    if options.f8 is not None:
        print("Checking inventory for erroneous input to file", options.f8)
        f = open(options.f8, 'w')
        it = t.iter('variable')
        variablels = [e for e in it if e.tag == 'variable']
        variablels.sort(key=SortKey)
        WriteHeader(f, lulu_start_year, 2014)
        for x in variablels:
            WriteFalseInput(f, x)
        f.close()
        print("Done")
        quit()
    #------------------------------------------------------

    #List the inventory files to be imported
    dirfilels = []
    if options.f4 is None:
        print("Missing GHG Inventory csv files")
        quit()
    dirfilels = glob.glob(options.f4)
    #UID mapping from CRFReporter 3.4.0-->5.0.0
    if options.f6 is None:
        print("No CRFReporter 3.4.0 --> CRFReporter 5.0.0 UID mapping file")
        quit()
    (uid340set, uiddict340to500) = Create340to500UIDMapping(options.f6)
    #This is for Paula's scenario in year 2015
    if write_scen == True:
        print(
            "Writing # separated scenario text file for excel to: options.f1")
        CreateScenarioExcel(options.f1, t, dirfilels, uid340set,
                            uiddict340to500)
        quit()

    time_series_count = 0
    not_found_uid_ls = []

    #Populate xml with inventory reults and write new xml file
    if not options.f3 is None:
        print("Populating Party Profile xml:", options.f2,
              "with inventory results")
        for file in dirfilels:
            f = open(file)
            start_year = lulu_start_year
            #Important!: all LULU files shall start with 'LU' and KP LULU files with 'KP'
            if file.startswith('KP'):
                start_year = kp_start_year
            elif file.startswith('LU'):
                start_year = lulu_start_year
            else:
                print("Cannot decide if KP LULUCF or LULUCF file", file)
            #Split each data row to  list of strings, delimeters are white
            #space  characters  The  data  file  can  contain  A)  comment
            #starting with  '#' for the  whole file spanning  over several
            #lines  and B)  comment for  the time  series in  front of  it
            #starting and  ending with '#'.  The following line (so called
            #list comprehension)  reads the  data file, filters  out lines
            #with single  '#', then  separates the  times series  from the
            #data comment and  finally splits the time series  into a list
            #of strings (datals).
            datals = [
                x.rpartition('#')[2].split() for x in f.readlines()
                if x.count('#') != 1
            ]
            f.close()
            ##Retrieve user based on the first time series uid
            time_series = datals[0]
            #The first string in the list is the uid
            uid = time_series[0]
            uid_new = uid.strip('{}')
            fowner = dictionary[uid_new][2]
            #Find all varibles once
            it = t.iter('variable')
            variablels = [e for e in it if e.tag == 'variable']
            print(
                "--------------------------------------------------------------------------"
            )
            print("File:", file, "User:"******"--------------------------------------------------------------------------"
            )
            for time_series in datals:
                if len(time_series) == 0:
                    print(file, "Found empty line")
                else:
                    #The first string in the list is the uid
                    uid = time_series.pop(0)
                    uid_new = uid.strip('{}')
                    fowner = dictionary[uid_new][2]
                    #if fowner == 'aritt':
                    #Filter out Tarja until Tarja is ready
                    #   print("Found aritt, doing nothing")
                    #else:
                    time_series_count += 1
                    #Some UIDs have changed from CRFReporter version 3.4.0 to 5.0.0
                    uid_changed = MapUID340to500(uid_new, uid340set,
                                                 uiddict340to500)
                    if uid_changed != uid_new:
                        print("UID changed:", uid_new, "-->", uid_changed)
                        uid_new = uid_changed
                    else:
                        print("UID:", uid_new)
                    InsertInventoryData(uid_new, variablels, time_series, file,
                                        not_found_uid_ls, start_year,
                                        current_inventory_year)
                print(
                    "--------------------------------------------------------------------------"
                )
        print("Done, total of", time_series_count, "time series")
        if len(not_found_uid_ls) == 0:
            print("Found all UIDs")
        else:
            print(
                "REMEMBER TO UPDATE PARTY PROFILE XML AFTER NEW NODES IN THE INVENTORY!",
                file=sys.stderr)
            if not options.f11 is None:
                t2 = ET()
                print("Parsing Party Profile xml from:",
                      options.f11,
                      file=sys.stderr)
                t2.parse(options.f11)
                it = t2.iter('variable')
                variablels = [e for e in it if e.tag == 'variable']
                truly_missing_uid_ls = []
                for uid in not_found_uid_ls:
                    if not CheckUIDInXml(uid, variablels):
                        truly_missing_uid_ls.append(uid)
                print("------------------------------------------------------",
                      file=sys.stderr)
                print("The following",
                      len(truly_missing_uid_ls),
                      "UIDs not found in the current inventory:",
                      options.f2,
                      file=sys.stderr)
                print("or in inventory:", options.f11, file=sys.stderr)
                print("UID", "File", "Owner", file=sys.stderr)
                for uid in truly_missing_uid_ls:
                    file = dictionary[uid][0]
                    owner = dictionary[uid][2]
                    print(uid, file, owner, file=sys.stderr)
                if len(truly_missing_uid_ls) == 0:
                    print("All UIDs found", file=sys.stderr)
            print("------------------------------------------------------",
                  file=sys.stderr)
            print("The following",
                  len(not_found_uid_ls),
                  "UIDs not found in current invetory:",
                  options.f2,
                  file=sys.stderr)
            print("UID", "File", "Owner", file=sys.stderr)
            for uid in not_found_uid_ls:
                file = dictionary[uid][0]
                owner = dictionary[uid][2]
                print(uid, file, owner, file=sys.stderr)
            print("------------------------------------------------------",
                  file=sys.stderr)

    if not options.f1 is None:
        print("Generating UID file:", options.f1)
        f = open(options.f1, 'w')
        it = t.iter('variable')
        variablels = [e for e in it if e.tag == 'variable']
        variablels.sort(key=SortKey)
        WriteHeader(f, lulu_start_year, current_inventory_year)
        for x in variablels:
            WriteVariables(f, x, dictionary, write_all)
        f.close()
        print("Done")
    print("Pretty print xml for humans")
    PrettyPrint(t.getroot(), 0, "   ")
    print("Writing xml to:", options.f3)
    if not options.f3 is None:
        t.write(options.f3)
        print("Done")

    print("Exit program")
Example #5
0
def GHGToCRFReporter(file_ls,
                     partyprofile_xml_file,
                     crf_xml_file,
                     uid_mapping_file,
                     current_inventory_year,
                     sep1=None,
                     kp_1990=None):
    time_series_count = 0
    t = ET()
    t.parse(partyprofile_xml_file)
    (uid340set, uiddict340to500) = Create340to500UIDMapping(uid_mapping_file)
    not_found_uid_ls = []
    #Find all varibles once
    it = t.iter('variable')
    variablels = [e for e in it if e.tag == 'variable']
    for file_name in file_ls:
        f = open(file_name)
        start_year = crfxmlconstants.lulu_start_year
        #Important!: all LULU files shall start with 'LU' and KPLULU files with 'KP'
        if os.path.basename(file_name).startswith('KP'):
            start_year = crfxmlconstants.kp_start_year
        elif os.path.basename(file_name).startswith('LU'):
            start_year = crfxmlconstants.lulu_start_year
        else:
            print("Cannot decide if KP LULUCF or LULUCF file",
                  file_name,
                  file=sys.stderr)
        #Split each data row to  list of strings, delimeters are white
        #space  characters  The  data  file  can  contain  A)  comment
        #starting with  '#' for the  whole file spanning  over several
        #lines  and B)  comment for  the time  series in  front of  it
        #starting and  ending with '#'.  The following line (so called
        #list comprehension)  reads the  data file, filters  out lines
        #with single  '#', then  separates the  times series  from the
        #data comment and  finally splits the time series  into a list
        #of strings (datals).
        datals = [
            x.rpartition('#')[2].split(sep=sep1) for x in f.readlines()
            if x.count('#') != 1
        ]
        #print(datals)
        f.close()
        if len(datals) == 0:
            print("Empty file", file_name, file=sys.stderr)
            continue
        ##Retrieve user based on the first time series uid
        #print(file_name)
        #time_series=datals[0]
        #The first string in the list is the uid
        #uid = time_series[0]
        #uid=uid.replace(' ','')
        #uid_new = uid.strip('{}')
        not_found_uid_ls = []
        print(
            "--------------------------------------------------------------------------"
        )
        print("File:", file_name)
        print(
            "--------------------------------------------------------------------------"
        )
        for time_series in datals:
            if len(time_series) == 0:
                print(file_name, "Found empty line")
            else:
                #The first string in the list is the uid
                uid = time_series.pop(0)
                uid = uid.replace(' ', '')
                uid_new = uid.strip('{}')
                time_series_count += 1
                #Some UIDs have changed from CRFReporter version 3.4.0 to 5.0.0
                uid_changed = MapUID340to500(uid_new, uid340set,
                                             uiddict340to500)
                if uid_changed != uid_new:
                    print("UID changed:", uid_new, "-->", uid_changed)
                    uid_new = uid_changed
                else:
                    print("UID:", uid_new)
                crfreporter.InsertInventoryData(uid_new, variablels,
                                                time_series, file_name,
                                                not_found_uid_ls, start_year,
                                                current_inventory_year,
                                                kp_1990)
                print(
                    "--------------------------------------------------------------------------"
                )
        print("Done, total of", time_series_count, "time series")
        if len(not_found_uid_ls) == 0:
            print("Found all UIDs")
        else:
            print(
                "REMEMBER TO UPDATE PARTY PROFILE XML AFTER NEW NODES IN THE INVENTORY!",
                file=sys.stderr)
            print("------------------------------------------------------",
                  file=sys.stderr)
            print("The following",
                  len(not_found_uid_ls),
                  "UIDs not found in current invetory:",
                  file=sys.stderr)
            print("UID", file=sys.stderr)
            for uid in not_found_uid_ls:
                print(uid, file_name, file=sys.stderr)
            print("------------------------------------------------------",
                  file=sys.stderr)
    print("Pretty print xml")
    ppxml.PrettyPrint(t.getroot(), 0, "   ")
    print("Writing xml to:", crf_xml_file)
    t.write(crf_xml_file)
    print("Done")