def Rotor(name): wiringConfiguration = WiringFactory.Wiring(name) offset = Offset(wiringConfiguration, "A") ringSetting = Ringsetting(offset, "A") turnover = RotorFactory.turnovers[name] return Rotor(ringSetting, turnover)
def process_file(json_filename, nb): docId, sentNr = re.search(r'wsj_(\d+).(\d+).json', json_filename).groups() sentNr = int(sentNr) data = json.load(open(json_filename)) data['nom'] = [] # index adjustments for consistency with ontonotes parses ptb_tree = Tree.parse(data['ptbparse']) ptbstring = tree_to_string(ptb_tree) # wrap traces onftree = Tree.parse(data['goldparse']) onfstring = tree_to_string(onftree) # wrap traces raw_onfstring = tree_to_string(onftree, wrap_traces=False) ptbstring_tok = add_spaces(ptbstring, onfstring) tokenize_offsets = split_offsets(ptbstring, ptbstring_tok) trace_offsets = Offset(ptbstring_tok.split(), onfstring.split(), ignore_braces=True) #print ptbstring #print ptbstring_tok #print onfstring #print tokenize_offsets #print trace_offsets pt = SpanTree.parse(data['ptbparse']) for nb_data in nb[docId][sentNr]: args = nb_data['args'] # TODO: arguments that are chains or concatenations of multiple nodes new_args = [] for pos, role in args: words, start, end = [], None, None leaf_id, depth = pt.parse_pos(pos) if leaf_id != None and depth != None: treepos = pt.get_treepos(leaf_id, depth) while is_trace(pt[treepos]): trace_id = int(pt[treepos].leaves()[0].split('-')[-1]) print 'looking for trace', trace_id tracepos = pt.find_trace(trace_id) if tracepos != None: print 'trace %s found! Here:', tracepos print pt[tracepos].pprint() treepos = tracepos else: break # could not follow trace words = pt[treepos].leaves() start, end = span_from_treepos(pt, treepos) #print start, end, # adjust of different tokenization assert start in tokenize_offsets start = min(tokenize_offsets[start]) assert end in tokenize_offsets end = max(tokenize_offsets[end]) # adjust of inserted traces in ontonotes start = trace_offsets.map_to_longer(start) end = trace_offsets.map_to_longer(end) #print '->', start, end phrase = '' if words: phrase = ' '.join(raw_onfstring.split()[start:end+1]) new_args.append( [role, pos, start, end, phrase] ) nb_data['args'] = new_args data['nom'].append(nb_data) #print nb_data json.dump(data, open(json_filename, 'w'), indent=2, sort_keys=True)
from offset import Offset from math import sqrt offset = Offset() def is_unit(obj, process): if process.readuint4(obj + offset.ObjectManager.ObjType) == 3: return True else: return False def is_player(obj, process): if process.readuint4(obj + offset.ObjectManager.ObjType) == 4: return True else: return False def position(unit, process): if is_unit(unit, process): x = process.readfloat(unit + offset.Unit.PosX) y = process.readfloat(unit + offset.Unit.PosY) z = process.readfloat(unit + offset.Unit.PosZ) elif is_player(unit, process): x = process.readfloat(unit + offset.Unit.PosX) y = process.readfloat(unit + offset.Unit.PosY) z = process.readfloat(unit + offset.Unit.PosZ) else: return False
br.set_handle_robots(False) #Follows refresh 0 but not hangs on refresh > 0 br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) #User-Agent br.addheaders = [('User-agent', 'Firefox')] #my user-agent: #Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11 #Open a site #response=br.open("http://www.fracfocusdata.org/DisclosureSearch/") #html=response.read() #Offset offset = Offset() #start... count = 0 urlcount = url.rowcount #urlcount=1292 #row_id=0 #Start time stime = long(time.strftime("%Y%m%d%H0000")) #DB connection conn = psycopg2.connect("dbname=XXX user=XXX host=XXX password=XXX") cur = conn.cursor() cur.execute("SELECT * FROM url;")
import sys import json from offset import Offset from brackets import escape_brackets, unescape_brackets if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument('json', action='store', help="json input file") parser.add_argument('jsonout', action='store', help="json output file") arguments = parser.parse_args(sys.argv[1:]) data = json.load(open(arguments.json)) cleanstring = escape_brackets(data['text']).split() tracestring = escape_brackets(data['treebank_sentence']).split() off = Offset(cleanstring, tracestring) words = data['words'] #print 'c:', cleanstring #print 't:', tracestring for i, w in enumerate(words): lemma = escape_brackets(w[0]) assert len(w) == 2 adjusted_idx = off.map_to_longer(i) assert lemma == cleanstring[i] assert lemma == tracestring[adjusted_idx] w[1]['idx'] = adjusted_idx json.dump(data, open(arguments.jsonout,'w'), indent=2, sort_keys=True)
br.set_handle_robots(False) #Follows refresh 0 but not hangs on refresh > 0 br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(),max_time=1) #User-Agent br.addheaders=[('User-agent','Firefox')] #my user-agent: #Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11 #Open a site #response=br.open("http://www.fracfocusdata.org/DisclosureSearch/") #html=response.read() #Offset offset=Offset() #start... count=0 urlcount=url.rowcount #urlcount=1292 #row_id=0 #Start time stime=long(time.strftime("%Y%m%d%H0000")) #DB connection conn=psycopg2.connect("dbname=XXX user=XXX host=XXX password=XXX") cur=conn.cursor()