class Filter: def __init__(self, inputFilePath, outputFilePath): self.inclusionList = [] self.exclusionList = [] self.stream = Stream(inputFilePath, 'Letter') self.new_shelve_file = outputFilePath def inclusionListAdd(self, filterList): self.inclusionList += filterList() def exclusionListAdd(self, filterList): self.exclusionList += filterList() def filter(self): with ShelveManager(self.new_shelve_file) as new_shelf: for index, fields in self.stream.stream(): #print(str(index)) if str(index)+'.0' in self.inclusionList and str(index)+'.0' not in self.exclusionList: new_shelf[index] = self._get_fields(fields) @editLogger('Filtered from Completed List', 'PythonScript') def _get_fields(self, fields): return fields
class Filter: def __init__(self, inputFilePath, outputFilePath): self.inclusionList = [] self.exclusionList = [] self.stream = Stream(inputFilePath, 'Letter') self.new_shelve_file = outputFilePath def inclusionListAdd(self, filterList): self.inclusionList += filterList() def exclusionListAdd(self, filterList): self.exclusionList += filterList() def filter(self): print(len(self.inclusionList)) with ShelveManager(self.new_shelve_file) as new_shelf: for index, fields in self.stream.stream(): #print(str(index)+'.0', str(index)+'.0' in self.inclusionList) if str(index) + '.0' in self.inclusionList and str( index) + '.0' not in self.exclusionList: print('Filtering letter, included ,', index) new_shelf[index] = self._get_fields(fields) else: print('Filtering letter, removed ,', index) @editLogger('Filtered from Completed List', 'PythonScript') def _get_fields(self, fields): return fields
def run_templater(inputFile, outputDir, tmpF): templateFile = open(tmpF).read() env = jinja2.Environment() env.globals.update(sorted=sorted, to_snake=to_snake_case) template = env.from_string(templateFile) s = Stream(inputFile, 'Letter') # Dirty counter for well-formedness wf = 0 bf = 0 for key, item in s.stream(): templatedText = template.render(item) f = open(outputDir+key+".xml", 'w') f.write(templatedText) f.close() # A dirty checker for wellformedness try: ET.fromstring(templatedText) wf += 1 except Exception as e: print(key, " is BAD:: ", e) bf += 1 print('GOOD: ', wf) print('BAD: ', bf)
def getValuesFromFile(self): stream = Stream(self.filePath, self.column, sheet="ID") for k, v in stream.stream(): #print(k) if k != 'None' and v["DATE"] < self.date: yield str(k) + '.0'
class Processor: def __init__(self): # f, outputFilePath): self.stream = Stream(self.inputFilePath, self.dict_key) self.new_shelf_file = self.outputFilePath def process(self): with ShelveManager(self.new_shelf_file) as new_shelf: for index, fields in self.stream.stream(): if index in new_shelf: #print('index in new shelf') ### Some sort of check whether self.resolve.. returns anything, otherwise don't set the index? new_shelf[index] = self.resolve(new_shelf[index], fields) else: #print('index not in new shelf') new_shelf[index] = self.transform(fields)
def getValuesFromFile(self): stream = Stream(self.filePath, self.column, sheet="ID NUMBERS") for k, v in stream.stream(): if k != 'None' and v['DATE'] < self.date: yield str(k) + '.0'
def run_templater(inputFile, outputDir, templateFilePath):#, templateFolder): print(templateFilePath) templateFile = open(templateFilePath).read() editors = [] contributors = [] wf = 0 bf = 0 ''' letterPlainTemplateFile = open(templateFolder + 'letter_plain.xml').read() letterEnvelopeTemplateFile = open(templateFolder + 'letter_envelope.xml').read() postcardAMTemplateFile = open(templateFolder + 'postcard_am.xml').read() postcardIMTemplateFile = open(templateFolder + 'postcard_im.xml').read() ''' s = Stream(inputFile, 'Letter') for key, item in s.stream(): print("Templating item: ", key, "-type ", item["Type"]) ''' if item["Type"] == 'Letter': print(item["Pages"]) if [p for k, p in item["Pages"].items() if p["PageType"] == 'EnvelopeType']: templateFile = letterEnvelopeTemplateFile else: templateFile = letterPlainTemplateFile elif item["Type"] == 'PostcardAM': templateFile = postcardAMTemplateFile elif item["Type"] == 'PostcardIM': templateFile = postcardIMTemplateFile else: templateFile = letterPlainTemplateFile ''' item["ProcessingVersion"] = "2" #print(len(item["Contributor_List"]), set(item["Contributor_List"])) new_cont_list = [] item["Editors"] = [] with shelve.open('editorList.shelve') as editor_list: for editor in item["Contributor_List"]: if editor in editor_list and "Python" not in editor: if editor_list[editor] != ("Susan Schreibman", "SS"): item["Editors"].append(editor_list[editor]) elif editor != "NULL": new_cont_list.append(editor) item["Contributor_List"] = new_cont_list #contributors += item["Contributor_List"] template_log = { 'editType': 'TEI template built', 'editor': "PythonScript", 'datetime': str(datetime.datetime.now())[:-7].replace(" ", "T")} item["Edits"].append(template_log) item["Edits"] = sorted(item["Edits"], key=lambda f: datetime.datetime.strptime(f['datetime'], "%Y-%m-%dT%H:%M:%S")) #print(templateFile) env = jinja2.Environment() env.globals.update(sorted=sorted, to_snake=to_snake_case,replace_contribs=replace_contrib_names, abstract_split=abstract_split) template = env.from_string(templateFile) #print(item["DocCollection"]) #print(item["Document_Collection"],"~", item["Document_Number"]) #print(item["Letter"]) #for k, v in item.items(): # print(k, type(k), v) if None in item: del item[None] templatedText = template.render(item) f = open(outputDir+key+".xml", 'w') #f = open("pagesList.xml", 'a') f.write(templatedText) f.close() # A dirty checker for wellformedness try: ET.fromstring(templatedText) wf += 1 except Exception as e: #print(key, " is BAD:: ", e) bf += 1