def process_one_file(bibcode, fname, provider): ext = fname.split('.')[-1] d = { CONSTANTS["BIBCODE"]: bibcode, CONSTANTS["PROVIDER"]: provider, CONSTANTS["FORMAT"]: ext, CONSTANTS["FILE_SOURCE"]: fname } extractor = StandardFileExtract.StandardExtractorXML(d) extractor.open_xml() xml = extractor.parse_xml() sections = xml.xpath(sections_xpath) or xml.xpath(paragraphs_xpath) summary = sections[-1].text_content() sys.stderr.write("summary is of type {}\n".format(type(summary))) text = TextCleaner(text=summary) if sections: summary = unicode(sections[-1].text_content()) if summary: text = TextCleaner(text=summary).run() return text