def get_target_phrases_item_data(target_phrases): # make an itemData of our custom target phrases target_phrases_item_data = itemData.itemData() for target_phrase in target_phrases: # create a contextItem from the target phrase contextItemTarget = itemData.contextItem( [target_phrase, target_phrase, target_phrase, target_phrase]) target_phrases_item_data.append(contextItemTarget) return target_phrases_item_data
def get_items(_file): def get_fileobj(_file): if not urllib.parse.urlparse(_file).scheme: _file = "file://"+_file return urllib.request.urlopen(_file, data=None) f0 = get_fileobj(_file) context_items = [contextItem((d["Lex"], d["Type"], r"%s"%d["Regex"], d["Direction"])) for d in yaml.load_all(f0)] return context_items
def get_target_items(target_list): """ takes concepts response in json format """ target_items = [] if not target_list: return target_items target_items = [ itemData.contextItem( (d["lex"], d["type"], r"%s" % d["regex"], d["direction"])) for d in target_list ] return target_items
def convertCSVtoitemData(csvFile, encoding='utf-8', delimiter="\t", headerRows=1, literalColumn=0, categoryColumn=1, regexColumn=2, ruleColumn=3): """ takes a CSV file of itemdata rules and creates a single itemData instance. csvFile: name of file to read items from encoding: unicode enocidng to use; default = 'utf-8' headerRows: number of header rows in file; default = 1 literalColumn: column from which to read the literal; default = 0 categoryColumn: column from which to read the category; default = 1 regexColumn: column from which to read the regular expression: default = 2 ruleColumn: column from which to read the rule; default = 3 """ items = itemData.itemData() # itemData to be returned to the user header = [] #reader, f0 = get_fileobj(csvFile) #print csvFile f = open(csvFile, 'rU') reader = csv.reader(f, delimiter=delimiter) # first grab numbe rof specified header rows for i in range(headerRows): row = next(reader) header.append(row) # now grab each itemData for row in reader: #print row tmp = [ row[literalColumn], row[categoryColumn], row[regexColumn], row[ruleColumn] ] tmp[2] = r'''{0}'''.format( tmp[2]) # convert the regular expression string into a raw string item = itemData.contextItem(tmp) items.append(item) f.close() return items
def test_instantiate_contextItem0(items): for item in items: assert itemData.contextItem(item)
def test_contextItem_rule(items): cti = itemData.contextItem(items[1]) assert cti.getRule() == "forward"
def test_contextItem_category(items): cti = itemData.contextItem(items[1]) assert cti.getCategory() == ["probable_negated_existence"]
iris.append(row[1]) sentences.append(row[5]) modifiers = itemData.get_items( "https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/20c752d6bd5191833f21ab81fc7f41877dca1db6/KB/pneumonia_modifiers.yml" ) for i in range(len(sentences)): # omg targets = [] with open('./hpo_labels.txt') as labelfile: reader = csv.reader(labelfile, delimiter='\t') for row in reader: if (row[1] == iris[i]): targets.append(contextItem((row[0], row[1], '', ''))) s = sentences[i] markup = pyConText.ConTextMarkup() markup.setRawText(s.lower()) markup.cleanText() markup.markItems(modifiers, mode="modifier") markup.markItems(targets, mode="target") markup.applyModifiers() markup.pruneMarks() markup.pruneSelfModifyingRelationships() markup.dropInactiveModifiers()
def test_contextItem_isa2(items): cti = itemData.contextItem(items[1]) assert cti.isA("PROBABLE_NEGATED_EXISTENCE")
def test_contextItem_getRE1(items): cti = itemData.contextItem(items[0]) assert cti.getRE() == r"""pulmonary\s(artery )?(embol[a-z]+)"""
def test_instantiate_contextItem(self): cit1 = itemData.contextItem(self.items[0]) assert cit1
def test_contextItem_isa(items): cti = itemData.contextItem(items[0]) assert cti.isA("pulmonary_embolism")
def test_contextItem_getRE(items): cti = itemData.contextItem(items[1]) assert cti.getRE() == r'\b%s\b'%items[1][0]
def test_contextItem_isa1(items): cti = itemData.contextItem(items[0]) assert cti.isA("PULMONARY_EMBOLISM")
def test_contextItem_literal(items): cti = itemData.contextItem(items[0]) assert cti.getLiteral() == "pulmonary embolism"
def test_instantiate_itemData(self): cit1 = itemData.contextItem(self.items[0]) it1 = itemData.itemData() it1.append(cit1) assert it1
def test_contextItem_getRE(items): cti = itemData.contextItem(items[1]) assert cti.getRE() == r'\b%s\b' % items[1][0]