Exemple #1
0
    def Rotor(name):
        wiringConfiguration = WiringFactory.Wiring(name)
        offset = Offset(wiringConfiguration, "A")
        ringSetting = Ringsetting(offset, "A")

        turnover = RotorFactory.turnovers[name]
        return Rotor(ringSetting, turnover)
Exemple #2
0
def process_file(json_filename, nb):
    docId, sentNr = re.search(r'wsj_(\d+).(\d+).json', json_filename).groups()
    sentNr = int(sentNr)
    data = json.load(open(json_filename))
    data['nom'] = []

    # index adjustments for consistency with ontonotes parses
    ptb_tree = Tree.parse(data['ptbparse'])
    ptbstring = tree_to_string(ptb_tree) # wrap traces

    onftree = Tree.parse(data['goldparse'])
    onfstring = tree_to_string(onftree) # wrap traces
    raw_onfstring = tree_to_string(onftree, wrap_traces=False)

    ptbstring_tok = add_spaces(ptbstring, onfstring)

    tokenize_offsets = split_offsets(ptbstring, ptbstring_tok)
    trace_offsets = Offset(ptbstring_tok.split(), onfstring.split(), ignore_braces=True)

    #print ptbstring
    #print ptbstring_tok
    #print onfstring
    #print tokenize_offsets
    #print trace_offsets

    pt = SpanTree.parse(data['ptbparse'])

    for nb_data in nb[docId][sentNr]:
        args = nb_data['args']

	# TODO: arguments that are chains or concatenations of multiple nodes

        new_args = []
        for pos, role in args:
            words, start, end = [], None, None
            leaf_id, depth = pt.parse_pos(pos)
            if leaf_id != None and depth != None:
                treepos = pt.get_treepos(leaf_id, depth)
                while is_trace(pt[treepos]):
                    trace_id = int(pt[treepos].leaves()[0].split('-')[-1])
                    print 'looking for trace', trace_id
                    tracepos = pt.find_trace(trace_id)
                    if tracepos != None:
                        print 'trace %s found! Here:', tracepos
                        print pt[tracepos].pprint()
                        treepos = tracepos
                    else:
                        break # could not follow trace

                words = pt[treepos].leaves()
                start, end = span_from_treepos(pt, treepos)
                #print start, end,

                # adjust of different tokenization
                assert start in tokenize_offsets
                start = min(tokenize_offsets[start])
                assert end in tokenize_offsets
                end = max(tokenize_offsets[end])

                # adjust of inserted traces in ontonotes
                start = trace_offsets.map_to_longer(start)
                end = trace_offsets.map_to_longer(end)
                #print '->', start, end

            phrase = ''
            if words:
                phrase = ' '.join(raw_onfstring.split()[start:end+1])
            new_args.append( [role, pos, start, end, phrase] )

        nb_data['args'] = new_args
        data['nom'].append(nb_data)

        #print nb_data
    json.dump(data, open(json_filename, 'w'), indent=2, sort_keys=True)
Exemple #3
0
from offset import Offset
from math import sqrt

offset = Offset()


def is_unit(obj, process):
    if process.readuint4(obj + offset.ObjectManager.ObjType) == 3:
        return True
    else:
        return False


def is_player(obj, process):
    if process.readuint4(obj + offset.ObjectManager.ObjType) == 4:
        return True
    else:
        return False


def position(unit, process):
    if is_unit(unit, process):
        x = process.readfloat(unit + offset.Unit.PosX)
        y = process.readfloat(unit + offset.Unit.PosY)
        z = process.readfloat(unit + offset.Unit.PosZ)
    elif is_player(unit, process):
        x = process.readfloat(unit + offset.Unit.PosX)
        y = process.readfloat(unit + offset.Unit.PosY)
        z = process.readfloat(unit + offset.Unit.PosZ)
    else:
        return False
br.set_handle_robots(False)

#Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)

#User-Agent
br.addheaders = [('User-agent', 'Firefox')]
#my user-agent:
#Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11

#Open a site
#response=br.open("http://www.fracfocusdata.org/DisclosureSearch/")
#html=response.read()

#Offset
offset = Offset()

#start...
count = 0
urlcount = url.rowcount
#urlcount=1292
#row_id=0

#Start time
stime = long(time.strftime("%Y%m%d%H0000"))

#DB connection
conn = psycopg2.connect("dbname=XXX user=XXX host=XXX password=XXX")
cur = conn.cursor()

cur.execute("SELECT * FROM url;")
Exemple #5
0
import sys
import json
from offset import Offset
from brackets import escape_brackets, unescape_brackets

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('json', action='store', help="json input file")
    parser.add_argument('jsonout', action='store', help="json output file")
    arguments = parser.parse_args(sys.argv[1:])

    data = json.load(open(arguments.json))

    cleanstring = escape_brackets(data['text']).split()
    tracestring = escape_brackets(data['treebank_sentence']).split()
    off = Offset(cleanstring, tracestring)

    words = data['words']
    #print 'c:', cleanstring
    #print 't:', tracestring
    for i, w in enumerate(words):
        lemma = escape_brackets(w[0])
        assert len(w) == 2
        adjusted_idx = off.map_to_longer(i)
        assert lemma == cleanstring[i]
        assert lemma == tracestring[adjusted_idx]
        w[1]['idx'] = adjusted_idx

    json.dump(data, open(arguments.jsonout,'w'), indent=2, sort_keys=True)
br.set_handle_robots(False)

#Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(),max_time=1)

#User-Agent
br.addheaders=[('User-agent','Firefox')]
#my user-agent:
#Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11

#Open a site
#response=br.open("http://www.fracfocusdata.org/DisclosureSearch/")
#html=response.read()

#Offset
offset=Offset()

#start...
count=0
urlcount=url.rowcount
#urlcount=1292
#row_id=0

#Start time
stime=long(time.strftime("%Y%m%d%H0000"))


#DB connection
conn=psycopg2.connect("dbname=XXX user=XXX host=XXX password=XXX")
cur=conn.cursor()