def ParseGHGInventoryFile(data_file, uid_mapping_file, sep1=None): """ Split each data row to list of strings, delimeters are white space characters by default. The data file can contain A) Line comment starting with '#' (for the whole file) and B) comment for the time series in front of it starting and ending with '#'. The so called list comprehension reads the data file, filters out line with single '#', then separates the times series from the data comment and then splits the time series into a list of strings. The result is a list of lists (datalss) of data series (datals) including their UID's to mapped to match current CRFREporter xml. """ (uid340set, uiddict340to500) = Create340to500UIDMapping(uid_mapping_file) f = open(data_file) datalss = [ x.rpartition('#')[2].split(sep=sep1) for x in f.readlines() if x.count('#') != 1 ] #Remove empty lines (result from readlines == []) datalss = [x for x in datalss if x != []] for datals in datalss: uid = datals[0] uid = uid.replace(' ', '') uid_stripped = uid.strip('{}') #Some UIDs have changed from CRFReporter version 3.4.0 to 5.0.0 uid_changed = MapUID340to500(uid_stripped, uid340set, uiddict340to500) datals[0] = uid_changed f.close() return datalss
def InsertAllNKComments(t, dirfilels, uidmapping): (uid340set, uiddict340to500) = Create340to500UIDMapping(uidmapping) not_found_uid_ls = [] start_year = crfxmlconstants.lulu_start_year for file in dirfilels: f = open(file) print("Reading file", file) if os.path.basename(file).startswith('CKP'): start_year = crfxmlconstants.kp_start_year else: start_year = crfxmlconstants.lulu_start_year #datals = f.readlines() #datals = [x.strip('\n') for x in datals] #Split comments using ';' separator print("Start year", start_year) datals = [ x.rpartition('#')[2].split(';') for x in f.readlines() if x.count('#') != 1 ] f.close() #datals = [x.split(';') for x in datals] counter = 1 for nkcomments_ls in datals: if len(nkcomments_ls) == 0: print("Found empty line") else: #The first string in the list is the uid uid = nkcomments_ls.pop(0) #print(nkcomments_ls) #The old reporter wrapped uid in {}, the new one does not uid_new = uid.strip('{}') #Some UIDs have changed from CRFReporter version 3.4.0 to 5.0.0 uid_changed = MapUID340to500(uid_new, uid340set, uiddict340to500) if uid_changed != uid_new: print("UID changed:", uid_new, "-->", uid_changed) uid_new = uid_changed InsertNKComment(uid_new, t, nkcomments_ls, counter, not_found_uid_ls, start_year) counter = counter + 1
print("No CRFReporter 340 to 500 UID mapping file") quit() activity_file = options.f5 start_year = lulu_start_year if os.path.basename(activity_file).startswith('KP'): start_year = kp_start_year elif os.path.basename(activity_file).startswith('LU'): start_year = lulu_start_year else: print("Cannot decide if KP LULUCF or LULUCF file", activity_file, file=sys.stderr) quit() print("Summary for", activity_file, "begins") (uid340set, uiddict340to500) = Create340to500UIDMapping(options.f6) #Parse xml tree print("Parsing Party Profile xml from:", options.f1) t = ET() t.parse(options.f1) #Find all varibles once it = t.iter('variable') variablels = [e for e in it if e.tag == 'variable'] print("Reading GHG Inventory files") dirfilels = glob.glob(options.f2) print("Inserting GHG files into a dictionary") ghg_dict = CreateGHGDictionary(dirfilels) print("Reading UID files for Activities and Regions:", options.f5) #Check data available
def main(): #---------------------------------The main program begins-------------------------------------------------- #Command line generator parser = OP() parser.add_option( "-u", "--uid", dest="f1", help="Create '#' separated text file for each UID for excel") parser.add_option("-p", "--pxml", dest="f2", help="Read CRFReporter Party Profile xml file") parser.add_option( "-x", "--xml", dest="f3", help="Write new Party profile populated with inventory results") parser.add_option("-c", "--csv", dest="f4", help="Read GHG inventory csv files") parser.add_option("-a", "--all", action="store_true", dest="f5", default=False, help="Print all UID identifiers") parser.add_option("-m", "--map", dest="f6", help="CRFReporter 3.0.0 --> 5.0.0 UID mapping file") parser.add_option("-o", "--check", dest="f7", help="Check inventory and quit") parser.add_option("-e", "--false", dest="f8", help="Check for erroneus input and quit") parser.add_option( "-s", "--scen", action="store_true", dest="f9", default=False, help="Create '#' separated UID text file for excel and quit") parser.add_option("-y", "--year", dest="f10", help="Current inventory year") parser.add_option("--oldxml", dest="f11", help="Check if UID exists in older CRFReporter version") (options, args) = parser.parse_args() if options.f2 is None: print("No input Reporter Party Profile XML data file") quit() #Generate UID file for those who make inventory if not os.path.isfile(user_information_file): print("Missing file", user_information_file, "mapping ghg inventory files to file owners") quit() dictionary = CreateUserInformation(user_information_file) #Print all UID identifiers (do not filter with LULU/KP keywords write_all = False if options.f5 is True: write_all = True write_scen = False if options.f9 is True: write_scen = True if options.f10 is None: print("Missing current inventory year") quit() current_inventory_year = int(options.f10) #Parse xml tree print("Parsing Party Profile xml from:", options.f2) t = ET() t.parse(options.f2) if options.f4 is None and options.f1: print("Generating UID file:", options.f1) f = open(options.f1, 'w') it = t.iter('variable') variablels = [e for e in it if e.tag == 'variable'] variablels.sort(key=SortKey) WriteHeader(f, lulu_start_year, current_inventory_year) for x in variablels: WriteVariables(f, x, dictionary, write_all) f.close() print("Done") quit() #-------Optional checking of inventory--------------- if options.f7 is not None: print("Checking inventory to file", options.f7) f = open(options.f7, 'w') it = t.iter('variable') variablels = [e for e in it if e.tag == 'variable'] variablels.sort(key=SortKey) WriteHeader(f, lulu_start_year, 2014) for x in variablels: WriteEmptyAndFalseVariables(f, x) f.close() print("Done") quit() if options.f8 is not None: print("Checking inventory for erroneous input to file", options.f8) f = open(options.f8, 'w') it = t.iter('variable') variablels = [e for e in it if e.tag == 'variable'] variablels.sort(key=SortKey) WriteHeader(f, lulu_start_year, 2014) for x in variablels: WriteFalseInput(f, x) f.close() print("Done") quit() #------------------------------------------------------ #List the inventory files to be imported dirfilels = [] if options.f4 is None: print("Missing GHG Inventory csv files") quit() dirfilels = glob.glob(options.f4) #UID mapping from CRFReporter 3.4.0-->5.0.0 if options.f6 is None: print("No CRFReporter 3.4.0 --> CRFReporter 5.0.0 UID mapping file") quit() (uid340set, uiddict340to500) = Create340to500UIDMapping(options.f6) #This is for Paula's scenario in year 2015 if write_scen == True: print( "Writing # separated scenario text file for excel to: options.f1") CreateScenarioExcel(options.f1, t, dirfilels, uid340set, uiddict340to500) quit() time_series_count = 0 not_found_uid_ls = [] #Populate xml with inventory reults and write new xml file if not options.f3 is None: print("Populating Party Profile xml:", options.f2, "with inventory results") for file in dirfilels: f = open(file) start_year = lulu_start_year #Important!: all LULU files shall start with 'LU' and KP LULU files with 'KP' if file.startswith('KP'): start_year = kp_start_year elif file.startswith('LU'): start_year = lulu_start_year else: print("Cannot decide if KP LULUCF or LULUCF file", file) #Split each data row to list of strings, delimeters are white #space characters The data file can contain A) comment #starting with '#' for the whole file spanning over several #lines and B) comment for the time series in front of it #starting and ending with '#'. The following line (so called #list comprehension) reads the data file, filters out lines #with single '#', then separates the times series from the #data comment and finally splits the time series into a list #of strings (datals). datals = [ x.rpartition('#')[2].split() for x in f.readlines() if x.count('#') != 1 ] f.close() ##Retrieve user based on the first time series uid time_series = datals[0] #The first string in the list is the uid uid = time_series[0] uid_new = uid.strip('{}') fowner = dictionary[uid_new][2] #Find all varibles once it = t.iter('variable') variablels = [e for e in it if e.tag == 'variable'] print( "--------------------------------------------------------------------------" ) print("File:", file, "User:"******"--------------------------------------------------------------------------" ) for time_series in datals: if len(time_series) == 0: print(file, "Found empty line") else: #The first string in the list is the uid uid = time_series.pop(0) uid_new = uid.strip('{}') fowner = dictionary[uid_new][2] #if fowner == 'aritt': #Filter out Tarja until Tarja is ready # print("Found aritt, doing nothing") #else: time_series_count += 1 #Some UIDs have changed from CRFReporter version 3.4.0 to 5.0.0 uid_changed = MapUID340to500(uid_new, uid340set, uiddict340to500) if uid_changed != uid_new: print("UID changed:", uid_new, "-->", uid_changed) uid_new = uid_changed else: print("UID:", uid_new) InsertInventoryData(uid_new, variablels, time_series, file, not_found_uid_ls, start_year, current_inventory_year) print( "--------------------------------------------------------------------------" ) print("Done, total of", time_series_count, "time series") if len(not_found_uid_ls) == 0: print("Found all UIDs") else: print( "REMEMBER TO UPDATE PARTY PROFILE XML AFTER NEW NODES IN THE INVENTORY!", file=sys.stderr) if not options.f11 is None: t2 = ET() print("Parsing Party Profile xml from:", options.f11, file=sys.stderr) t2.parse(options.f11) it = t2.iter('variable') variablels = [e for e in it if e.tag == 'variable'] truly_missing_uid_ls = [] for uid in not_found_uid_ls: if not CheckUIDInXml(uid, variablels): truly_missing_uid_ls.append(uid) print("------------------------------------------------------", file=sys.stderr) print("The following", len(truly_missing_uid_ls), "UIDs not found in the current inventory:", options.f2, file=sys.stderr) print("or in inventory:", options.f11, file=sys.stderr) print("UID", "File", "Owner", file=sys.stderr) for uid in truly_missing_uid_ls: file = dictionary[uid][0] owner = dictionary[uid][2] print(uid, file, owner, file=sys.stderr) if len(truly_missing_uid_ls) == 0: print("All UIDs found", file=sys.stderr) print("------------------------------------------------------", file=sys.stderr) print("The following", len(not_found_uid_ls), "UIDs not found in current invetory:", options.f2, file=sys.stderr) print("UID", "File", "Owner", file=sys.stderr) for uid in not_found_uid_ls: file = dictionary[uid][0] owner = dictionary[uid][2] print(uid, file, owner, file=sys.stderr) print("------------------------------------------------------", file=sys.stderr) if not options.f1 is None: print("Generating UID file:", options.f1) f = open(options.f1, 'w') it = t.iter('variable') variablels = [e for e in it if e.tag == 'variable'] variablels.sort(key=SortKey) WriteHeader(f, lulu_start_year, current_inventory_year) for x in variablels: WriteVariables(f, x, dictionary, write_all) f.close() print("Done") print("Pretty print xml for humans") PrettyPrint(t.getroot(), 0, " ") print("Writing xml to:", options.f3) if not options.f3 is None: t.write(options.f3) print("Done") print("Exit program")
def GHGToCRFReporter(file_ls, partyprofile_xml_file, crf_xml_file, uid_mapping_file, current_inventory_year, sep1=None, kp_1990=None): time_series_count = 0 t = ET() t.parse(partyprofile_xml_file) (uid340set, uiddict340to500) = Create340to500UIDMapping(uid_mapping_file) not_found_uid_ls = [] #Find all varibles once it = t.iter('variable') variablels = [e for e in it if e.tag == 'variable'] for file_name in file_ls: f = open(file_name) start_year = crfxmlconstants.lulu_start_year #Important!: all LULU files shall start with 'LU' and KPLULU files with 'KP' if os.path.basename(file_name).startswith('KP'): start_year = crfxmlconstants.kp_start_year elif os.path.basename(file_name).startswith('LU'): start_year = crfxmlconstants.lulu_start_year else: print("Cannot decide if KP LULUCF or LULUCF file", file_name, file=sys.stderr) #Split each data row to list of strings, delimeters are white #space characters The data file can contain A) comment #starting with '#' for the whole file spanning over several #lines and B) comment for the time series in front of it #starting and ending with '#'. The following line (so called #list comprehension) reads the data file, filters out lines #with single '#', then separates the times series from the #data comment and finally splits the time series into a list #of strings (datals). datals = [ x.rpartition('#')[2].split(sep=sep1) for x in f.readlines() if x.count('#') != 1 ] #print(datals) f.close() if len(datals) == 0: print("Empty file", file_name, file=sys.stderr) continue ##Retrieve user based on the first time series uid #print(file_name) #time_series=datals[0] #The first string in the list is the uid #uid = time_series[0] #uid=uid.replace(' ','') #uid_new = uid.strip('{}') not_found_uid_ls = [] print( "--------------------------------------------------------------------------" ) print("File:", file_name) print( "--------------------------------------------------------------------------" ) for time_series in datals: if len(time_series) == 0: print(file_name, "Found empty line") else: #The first string in the list is the uid uid = time_series.pop(0) uid = uid.replace(' ', '') uid_new = uid.strip('{}') time_series_count += 1 #Some UIDs have changed from CRFReporter version 3.4.0 to 5.0.0 uid_changed = MapUID340to500(uid_new, uid340set, uiddict340to500) if uid_changed != uid_new: print("UID changed:", uid_new, "-->", uid_changed) uid_new = uid_changed else: print("UID:", uid_new) crfreporter.InsertInventoryData(uid_new, variablels, time_series, file_name, not_found_uid_ls, start_year, current_inventory_year, kp_1990) print( "--------------------------------------------------------------------------" ) print("Done, total of", time_series_count, "time series") if len(not_found_uid_ls) == 0: print("Found all UIDs") else: print( "REMEMBER TO UPDATE PARTY PROFILE XML AFTER NEW NODES IN THE INVENTORY!", file=sys.stderr) print("------------------------------------------------------", file=sys.stderr) print("The following", len(not_found_uid_ls), "UIDs not found in current invetory:", file=sys.stderr) print("UID", file=sys.stderr) for uid in not_found_uid_ls: print(uid, file_name, file=sys.stderr) print("------------------------------------------------------", file=sys.stderr) print("Pretty print xml") ppxml.PrettyPrint(t.getroot(), 0, " ") print("Writing xml to:", crf_xml_file) t.write(crf_xml_file) print("Done")