def main(): # Read external parameters: # - path and file: "/usr/xxx/data/XXX.xml" # - temporal expression: "every two days" # # temporal_expression = sys.argv[2] path_and_file = sys.argv[1] doc_analyser = DocumentAnalyser() path = os.path.abspath(os.path.dirname(path_and_file)) + "/" filename = os.path.basename(path_and_file) clinical_note = doc_analyser.analyse(path, filename, True) utterance_time = select_utterance_time(temporal_expression, clinical_note) print normalise(temporal_expression, utterance_time, True)
def analyse(self, path, file, normalisation=False): clinical_note = ClinicalDocument() text = '' file = codecs.open(path+file, 'rU') for line in file.readlines(): if not re.match("^(?:\]\]>)?<(?:\?|/)?[A-Za-z]+", line): text += line.lower() file.close() date_refs = [(match.start(), match.end()) for match in re.finditer(self.date_syntaxes[0], text)] clinical_note.file_name = file.name clinical_note.file_path = path if normalisation: clinical_note.admission_date = normalise(self.search_closest(self.admission_signals, date_refs, text, 'forward', 6))[2] clinical_note.discharge_date = normalise(self.search_closest(self.discharge_signals, date_refs, text, 'forward', 6))[2] if clinical_note.discharge_date == 'NONE': clinical_note.discharge_date = normalise(self.search_closest(self.discharge_signals, date_refs, text, 'forward', 50))[2] clinical_note.operation_date = normalise(self.search_closest(self.operation_signals, date_refs, text, 'both'), clinical_note.admission_date.replace('-', ''))[2] clinical_note.transfer_date = normalise(self.search_closest(self.transfer_signals, date_refs, text, 'both'), clinical_note.admission_date.replace('-', ''))[2] clinical_note.course_length = self.get_difference_from_normalised_dates(clinical_note.admission_date, clinical_note.discharge_date) else: clinical_note.admission_date = self.search_closest(self.admission_signals, date_refs, text, 'forward', 6) clinical_note.discharge_date = self.search_closest(self.discharge_signals, date_refs, text, 'forward', 6) if not clinical_note.discharge_date: clinical_note.discharge_date = self.search_closest(self.discharge_signals, date_refs, text, 'forward', 50) clinical_note.discharge_date_not_in_header = True clinical_note.operation_date = self.search_closest(self.operation_signals, date_refs, text, 'both') clinical_note.transfer_date = self.search_closest(self.transfer_signals, date_refs, text, 'both') clinical_note.course_length = self.get_difference_from_normalised_dates(normalise(clinical_note.admission_date)[2], normalise(clinical_note.discharge_date)[2]) return clinical_note