def init_y_annotations(self): """ initialises the joint y vector with data from manually annotated abstracts """ logging.info("Identifying seed data from annotated data") p = progressbar.ProgressBar(len(self.biviewer), timer=True) annotation_viewer = LabeledAbstractReader() counter = 0 for study in annotation_viewer: study_id = int(study["Biview_id"]) text = swap_num( annotation_viewer.get_biview_id(study_id)['abstract']) parsed_tags = tag_words(text, flatten=True) tagged_number = [w[0] for w in parsed_tags if 'n' in w[1]] if tagged_number: number = re.match("[Nn]?=?([1-9]+[0-9]*)", tagged_number[0]) if number: self.data["y_lookup_init"][study_id] = int(number.group(1)) counter += 1 else: raise TypeError( 'Unable to convert tagged number %s to integer', tagged_number[0]) self.seed_abstracts = counter logging.info("%d seed abstracts found", counter)
def init_y_annotations(self): """ initialises the joint y vector with data from manually annotated abstracts """ logging.info("Identifying seed data from annotated data") p = progressbar.ProgressBar(len(self.biviewer), timer=True) annotation_viewer = LabeledAbstractReader() counter = 0 for study in annotation_viewer: study_id = int(study["Biview_id"]) text = swap_num(annotation_viewer.get_biview_id(study_id)['abstract']) parsed_tags = tag_words(text, flatten=True) tagged_number = [w[0] for w in parsed_tags if 'n' in w[1]] if tagged_number: number = re.match("[Nn]?=?([1-9]+[0-9]*)", tagged_number[0]) if number: self.data["y_lookup_init"][study_id] = int(number.group(1)) counter += 1 else: raise TypeError('Unable to convert tagged number %s to integer', tagged_number[0]) self.seed_abstracts = counter logging.info("%d seed abstracts found", counter)
def __init__(self, text, window_size): if isinstance(text, str): self.text = re.sub("(?:[0-9]+)\,(?:[0-9]+)", "", text) self.text = swap_num(text) self.tag_tuple_sents = tag_words(self.text) elif isinstance(text, list): self.tag_tuple_sents = text self.functions = self.set_functions(self.tag_tuple_sents) self.w_pos_window = window_size self.load_templates()
def __init__(self, text, window_size): if isinstance(text, str): self.text = re.sub('(?:[0-9]+)\,(?:[0-9]+)', '', text) self.text = swap_num(text) self.tag_tuple_sents = tag_words(self.text) elif isinstance(text, list): self.tag_tuple_sents = text self.functions = self.set_functions(self.tag_tuple_sents) self.w_pos_window = window_size self.load_templates()