def init(): ddext.import_lib('re') ddext.input('lattice_id', 'text') ddext.returns('lattice_id', 'text') ddext.returns('speaker_id', 'text') ddext.returns('sentenceid', 'text')
def init(): ddext.import_lib('nltk') ddext.input('lattice_id', 'text') ddext.input('candidate_id', 'bigint') ddext.input('word', 'text') ddext.returns('lattice_id', 'text') ddext.returns('candidate_id', 'bigint') ddext.returns('pos', 'text')
def init(): ddext.import_lib('json') ddext.input('publication_id', 'bigint') ddext.input('fname_prefix', 'text') ddext.input('fname', 'text[]') ddext.input('fval', 'float[]') ddext.returns('publication_id', 'bigint') ddext.returns('features', 'text')
def init(): # SD['json'] = __import__('json') ddext.import_lib('re') ddext.import_lib('sys') ddext.input('words', 'text[]') ddext.input('id', 'bigint') # Orders of input MATTERS now ddext.input('p1_start', 'int') ddext.input('p1_length', 'int') ddext.input('p2_start', 'int') ddext.input('p2_length', 'int') ddext.returns('relation_id', 'bigint') ddext.returns('feature', 'text')
def init(): # SD['json'] = __import__('json') ddext.import_lib("re") ddext.import_lib("sys") ddext.input("words", "text[]") ddext.input("id", "bigint") # Orders of input MATTERS now ddext.input("p1_start", "int") ddext.input("p1_length", "int") ddext.input("p2_start", "int") ddext.input("p2_length", "int") ddext.returns("relation_id", "bigint") ddext.returns("feature", "text")
def init(): # SD['json'] = __import__('json') ddext.import_lib('itertools') # Input commands MUST HAVE CORRECT ORDER ddext.input('sentence_id', 'text') ddext.input('words', 'text[]') ddext.input('ner_tags', 'text[]') # Returns commands MUST HAVE CORRECT ORDER ddext.returns('sentence_id', 'text') ddext.returns('start_position', 'int') # ddext.returns('start_index', 'int') ddext.returns('length', 'int') ddext.returns('text', 'text') ddext.returns('mention_id', 'text')
def init(): # import libraries ddext.import_lib(csv) ddext.import_lib(os) ddext.import_lib(sys) ddext.import_lib(re) #specify inputs ddext.input('sentence_id','text') ddext.input('p1_mention_id','text') ddext.input('p1_text','text') ddext.input('p2_mention_id','text') ddext.input('p2_text','text') # specify outputs ddext.returns('person1_id','text') ddext.returns('person2_id','text') ddext.returns('sentence_id','text') ddext.returns('description','text') ddext.returns('is_true','boolean') ddext.returns('relation_id','text') ddext.returns('description','text') ddext.returns('id','bigint') # The directory of this UDF file BASE_DIR = os.path.dirname(os.path.realpath(__file__)) # Load ids_names dict ids_names = {} with open(BASE_DIR + '/../data/ids_names.tsv') as f: for i,line in enumerate(f): line = line.split('\t') doc_id = line[0] name = line[1].rstrip() ids_names[doc_id] = name # Load the parent dictionary for distant supervision. # The first person is the child, the second the parent kid_parent_relationship = set() people_already_seen_as_kid = set() people_already_seen_as_parent = set() non_kid_parent_relationship = set() lines = open(BASE_DIR + '/../data/training-data-parent.tsv').readlines() for line in lines: arr = re.split('\s*\t\s*', line.strip().lower()) if len(arr) != 3: # print >> sys.stderr, line continue nameKid, nameParent, relation = arr if relation=="1": kid_parent_relationship.add((nameKid, nameParent)) # Add a spouse relation pair people_already_seen_as_kid.add(nameKid) # Record the person as married people_already_seen_as_parent.add(nameParent) else: non_kid_parent_relationship.add((nameKid, nameParent))
def init(): ddext.import_lib('nltk') ddext.input('document_id', 'bigint') ddext.input('pid', 'bigint') ddext.input('paragraph_id', 'bigint') ddext.input('paragraph', 'text') ddext.returns('document_id', 'bigint') ddext.returns('pid', 'bigint') ddext.returns('paragraph_id', 'bigint') ddext.returns('sentence', 'text') ddext.returns('words', 'text[]') # ddext.returns('lemma', 'text[]') ddext.returns('pos_tags', 'text[]') # ddext.returns('dependencies', 'text[]') # ddext.returns('ner_tags', 'text[]') ddext.returns('sentence_offset', 'bigint')
def init(): SD['APP_HOME'] = os.environ['APP_HOME'] # SD['json'] = __import__('json') ddext.import_lib('csv') ddext.import_lib('os') # from collections import defaultdict ddext.import_lib('defaultdict', 'collections') # Other examples of import_lib: # # "from collections import defaultdict as defdict": # ddext.import_lib('defaultdict', 'collections', 'defdict') # # "import defaultdict as defdict": # ddext.import_lib('defaultdict', as_name='defdict') # Input commands MUST HAVE CORRECT ORDER: # SAME AS SELECT ORDER, and SAME AS "run" ARGUMENT ORDER ddext.input('sentence_id', 'text') ddext.input('p1_id', 'text') ddext.input('p1_text', 'text') ddext.input('p2_id', 'text') ddext.input('p2_text', 'text') # Returns commands MUST HAVE CORRECT ORDER ddext.returns('person1_id', 'text') ddext.returns('person2_id', 'text') ddext.returns('sentence_id', 'text') ddext.returns('description', 'text') ddext.returns('is_true', 'boolean') ddext.returns('relation_id', 'text')
def init(): # SD['json'] = __import__('json') ddext.import_lib('csv') ddext.import_lib('os') # from collections import defaultdict ddext.import_lib('defaultdict', 'collections') # Other examples of import_lib: # # "from collections import defaultdict as defdict": # ddext.import_lib('defaultdict', 'collections', 'defdict') # # "import defaultdict as defdict": # ddext.import_lib('defaultdict', as_name='defdict') # Input commands MUST HAVE CORRECT ORDER: # SAME AS SELECT ORDER, and SAME AS "run" ARGUMENT ORDER ddext.input('sentence_id', 'text') ddext.input('p1_id', 'text') ddext.input('p1_text', 'text') ddext.input('p2_id', 'text') ddext.input('p2_text', 'text') # Returns commands MUST HAVE CORRECT ORDER ddext.returns('person1_id', 'text') ddext.returns('person2_id', 'text') ddext.returns('sentence_id', 'text') ddext.returns('description', 'text') ddext.returns('is_true', 'boolean') ddext.returns('relation_id', 'text')
def init(): ddext.import_lib('re') ddext.input('author_names', 'text') ddext.returns('name', 'text')