Ejemplo n.º 1
0
def cutActivity(row):
    fromdate = utils.parse(row[2])
    todate = utils.parse(row[3])
    # print fromdate, todate
    if (todate.date() - fromdate.date()).days != 1:
        print "error cutting activity: wrong date range"
        return [0, 0]
    else:
        middate = copy.deepcopy(todate).replace(hour=0, minute=0, second=0)
        firsthalf = [row[0], utils.deltatohours(middate - fromdate), fromdate.isoformat(' '), middate.isoformat(' ')]
        secondhalf = [row[0], utils.deltatohours(todate - middate), middate.isoformat(' '), todate.isoformat(' ')]
        # print firsthalf, secondhalf
        return [firsthalf, secondhalf]
Ejemplo n.º 2
0
 def join(self, layer_attr, data_table, data_attr, prefix=''):
     """
     Joins a data table to this feature collection.
     """
     # Build first a dictionary for the layer attribute
     idx = {}
     for feat in self.features:
         idx[parse(feat['properties'][layer_attr])] = feat
     # Now go through the data table and retrieve all data one by one
     for d in data_table:
         feat = idx.get(parse(d[data_attr]), None)
         if feat is None: continue
         for k in d:
             feat['properties'][prefix+k] = parse(d[k])
Ejemplo n.º 3
0
	def rewriteQuestion(inputSentence, addWHword = False):
		utils = RewriterUtils()
		utils.parse(inputSentence)
		vbzIndex = QuestionRewriter.getTheIndexOfVBZ(utils.wordList, utils.posMap)
		if vbzIndex == -1:
			raise Exception("No VBZ found!")

		rewrittenQuestion = [] 
		for i in xrange(vbzIndex + 1, len(utils.wordList)):
			rewriteQuestion.append(util.wordList[i])
		if addWHword:
			rewrittenQuestion.append(utils.wordList[vbzIndex])
		for i in xrange(0, vbzIndex):
			rewrittenQuestion.append(utils.wordList[i])
		return rewrittenQuestion
Ejemplo n.º 4
0
def main():
    usage = "usage: %prog <search>"
    opts = utils.parse(sys.argv[1:], {}, ".splunkrc", usage=usage)

    if len(opts.args) != 1:
        utils.error("Search expression required", 2)
    search = opts.args[0]

    service = connect(**opts.kwargs)

    try:
        result = service.get(
            "search/jobs/export",
            search=search,
            earliest_time="rt", 
            latest_time="rt", 
            search_mode="realtime")

        reader = results.ResultsReader(result.body)
        while True:
            kind = reader.read()
            if kind == None: break
            if kind == results.RESULT:
                event = reader.value
                pprint(event)

    except KeyboardInterrupt:
        print "\nInterrupted."
Ejemplo n.º 5
0
def main(argv):
    usage = "usage: %prog [options]"

    redirect_port_args = {
        "redirectport": {
            "flags": ["--redirectport"],
            "default": PORT,
            "help": "Port to use for redirect server (default: %s)" % PORT,
        },
    }

    opts = utils.parse(argv, redirect_port_args, ".splunkrc", usage=usage)

    # We have to provide a sensible value for namespace
    namespace = opts.kwargs["namespace"]
    namespace = namespace if namespace else "-"

    # Encode these arguments
    args = urllib.urlencode([
            ("scheme", opts.kwargs["scheme"]),
            ("host", opts.kwargs["host"]),
            ("port", opts.kwargs["port"]),
            ("redirecthost", "localhost"),
            ("redirectport", opts.kwargs["redirectport"]),
            ("username", opts.kwargs["username"]),
            ("password", opts.kwargs["password"]),
            ("namespace", namespace)
        ]),

    # Launch the browser
    webbrowser.open("file://%s" % os.path.join(os.getcwd(), "explorer.html?%s" % args))

    # And server the files
    server.serve(opts.kwargs["redirectport"])
Ejemplo n.º 6
0
def main():
    usage = ""

    argv = sys.argv[1:]

    splunk_opts = utils.parse(argv, {}, ".splunkrc", usage=usage)
    tracker = AnalyticsTracker("cli_app", splunk_opts.kwargs)
Ejemplo n.º 7
0
def main():
    """Main program."""

    usage = "usage: %prog [options] <command> [<args>]"

    argv = sys.argv[1:]

    command = None
    commands = ['create', 'delete', 'list']

    # parse args, connect and setup 
    opts = parse(argv, {}, ".splunkrc", usage=usage)
    service = connect(**opts.kwargs)
    program = Program(service)

    if len(opts.args) == 0:
        # no args means list
        command = "list"
    elif opts.args[0] in commands:
        # args and the first in our list of commands, extract 
        # command and remove from regular args
        command = opts.args[0]
        opts.args.remove(command)
    else:
        # first one not in our list, default to list
        command = "list"

    program.run(command, opts)
Ejemplo n.º 8
0
def main(argv):
    usage = "usage: %prog [options]"
    opts = parse(argv, {}, ".splunkrc", usage=usage)
    service = client.connect(**opts.kwargs)

    for logger in service.loggers:
        print "%s (%s)" % (logger.name, logger['level'])
Ejemplo n.º 9
0
    def test_10(self):
        ans = -8448

        nums = parse('test10.txt')
        result = median_maintenance(nums)

        self.assertEqual(ans, result)
Ejemplo n.º 10
0
def main():
    usage = "usage: follow.py <search>"
    opts = utils.parse(sys.argv[1:], {}, ".splunkrc", usage=usage)

    if len(opts.args) != 1:
        utils.error("Search expression required", 2)
    search = opts.args[0]

    service = client.connect(**opts.kwargs)

    job = service.jobs.create(search, earliest_time="rt", latest_time="rt", search_mode="realtime")

    # Wait for the job to transition out of QUEUED and PARSING so that
    # we can if its a transforming search, or not.
    while True:
        job.refresh()
        if job["dispatchState"] not in ["QUEUED", "PARSING"]:
            break
        time.sleep(2)  # Wait

    if job["reportSearch"] is not None:  # Is it a transforming search?
        count = lambda: int(job["numPreviews"])
        items = lambda _: job.preview()
    else:
        count = lambda: int(job["eventCount"])
        items = lambda offset: job.events(offset=offset)

    try:
        follow(job, count, items)
    except KeyboardInterrupt:
        print "\nInterrupted."
    finally:
        job.cancel()
Ejemplo n.º 11
0
def main(argv):
    usage = "usage: %prog [options]"

    redirect_port_args = {
        "redirectport": {
            "flags": ["--redirectport"],
            "default": PORT,
            "help": "Port to use for redirect server (default: %s)" % PORT,
        },
    }

    opts = utils.parse(argv, redirect_port_args, ".splunkrc", usage=usage)

    args = [("scheme", opts.kwargs["scheme"]),
            ("host", opts.kwargs["host"]),
            ("port", opts.kwargs["port"]),
            ("redirecthost", "localhost"),
            ("redirectport", opts.kwargs["redirectport"]),
            ("username", opts.kwargs["username"]),
            ("password", opts.kwargs["password"])]
    if 'app' in opts.kwargs.keys():
        args.append(('app', opts.kwargs['app']))
    if 'owner' in opts.kwargs.keys():
        args.append(('owner', opts.kwargs['owner']))

    # Encode these arguments
    args = urllib.urlencode(args)

    # Launch the browser
    webbrowser.open("file://%s" % os.path.join(os.getcwd(), "explorer.html?%s" % args))

    # And server the files
    server.serve(opts.kwargs["redirectport"])
Ejemplo n.º 12
0
def main():
    """ main entry """
    options = parse(sys.argv[1:], CLIRULES, ".splunkrc")

    if options.kwargs['omode'] not in OUTPUT_MODES:
        print "output mode must be one of %s, found %s" % (OUTPUT_MODES,
              options.kwargs['omode'])
        sys.exit(1)

    service = connect(**options.kwargs)

    if path.exists(options.kwargs['output']):
        if options.kwargs['recover'] == False:
            error("Export file exists, and recover option nor specified")
        else:
            options.kwargs['end'] = recover(options)
            options.kwargs['fixtail'] = True
            openmode = "a"
    else:
        openmode = "w"
        options.kwargs['fixtail'] = False
        
    try:
        options.kwargs['fd'] = open(options.kwargs['output'], openmode)
    except IOError:
        print "Failed to open output file %s w/ mode %s" % \
                             (options.kwargs['output'], openmode)
        sys.exit(1)

    export(options, service)
Ejemplo n.º 13
0
def main(argv):
    weeks = []
    sessions = []

    with open('weeks.csv', newline='') as csvfile:
        weeksreader = reader(csvfile, quotechar='"')
        weeks = make_weeks(weeksreader)

    with open('tt.csv', newline='') as csvfile:
        ttreader = reader(csvfile, quotechar='"')
        errors = []
        sessions = parse(ttreader, config.MODULES, weeks, errors)
        for i, error in enumerate(errors):
            if i == len(error) - 1:
                print(error[0] + '\n' + error[1])
            else:
                print(error[0] + '\n' + error[1] + '\n')

    calendar = []
    calendar.append(['Subject', 'Start Date', 'End Date',
                    'Start Time', 'End Time', 'Location'])
    for session in sessions:
        calendar.append([
            session.title + '-' + session.kind,
            session.start.strftime('%Y/%m/%d'),
            session.end.strftime('%Y/%m/%d'),
            session.start.strftime('%H:%M'),
            session.end.strftime('%H:%M'),
            session.location
        ])

    with open('parsed.csv', 'w', newline='') as csvfile:
        csvwriter = writer(csvfile, delimiter=',', quotechar='"',
                           quoting=QUOTE_MINIMAL)
        csvwriter.writerows(calendar)
Ejemplo n.º 14
0
def main():
    usage = ""

    argv = sys.argv[1:]

    opts = utils.parse(argv, {}, ".splunkrc", usage=usage)
    retriever = AnalyticsRetriever(opts.args[0], opts.kwargs)    
Ejemplo n.º 15
0
def main():
    usage = "usage: %prog <search>"
    opts = utils.parse(sys.argv[:], {}, ".splunkrc", usage=usage)
    
    service = connect(**opts.kwargs)

    try:
        result = service.get(
            "search/jobs/export",
            search="search instantaneous_eps",
            index="_internal",
            earliest_time="rt", 
            latest_time="rt", 
            search_mode="realtime")

        for result in ResultsReader(result.body):
            
            if result is not None:
                if isinstance(result, dict):
        
                        # extract only the event contents
                        event=result.items()[2][1]
                        # strip out the leading timestamp files, they don't read well
                        shorte=event[61:]
                        # send the shortened event contents to the speech synth
                        subprocess.call(["/usr/bin/say", shorte])
                        
    except KeyboardInterrupt:
        print "\nInterrupted."
Ejemplo n.º 16
0
    def setUpClass(cls):
        cls.opts = parse([], {}, ".splunkrc")

        # Before we start, make sure splunk doesn't need a restart.
        service = client.connect(**cls.opts.kwargs)
        if service.restart_required:
            service.restart(timeout=120)
Ejemplo n.º 17
0
    def test_sample_1(self):
        graph = parse('sample-1.txt')

        result = karger(graph)
        ans = 2

        self.assertEqual(ans, result)
Ejemplo n.º 18
0
 def test_parse(self):
     for func_etalon, func_str in self.funcs:
         func = parse(func_str, "x, y, z")
         assert all((
             x == y for x, y in zip(
                 (func(x, y, z) for x, y, z in self.xs),
                 (func_etalon(x, y, z) for x, y, z in self.xs)
             )
         ))
Ejemplo n.º 19
0
def main(argv):
    opts = parse(argv, {}, ".splunkrc")
    context = connect(**opts.kwargs)
    service = Service(context)
    assert service.apps().status == 200
    assert service.indexes().status == 200
    assert service.info().status == 200
    assert service.settings().status == 200
    assert service.search("search 404").status == 200
Ejemplo n.º 20
0
def main():
    try:
        in_file_name = sys.argv[1]
    except Exception:
        sys.stderr.write('Usage: python median_maintenance.py ${in_file_name}')

    nums = parse(in_file_name)

    result = median_maintenance(nums)

    print(result)
Ejemplo n.º 21
0
def main(argv):
    """ main entry """
    usage = 'usage: %prog --help for options'
    opts = utils.parse(argv, RULES, ".splunkrc", usage=usage)

    context = binding.connect(**opts.kwargs)
    operation = None

    # splunk.binding.debug = True # for verbose information (helpful for debugging)

    # Extract from command line and build into variable args
    kwargs = {}
    for key in RULES.keys():
        if opts.kwargs.has_key(key):
            if key == "operation":
                operation = opts.kwargs[key]
            else:
                kwargs[key] = urllib.quote(opts.kwargs[key])

    # no operation? if name present, default to list, otherwise list-all
    if not operation:
        if kwargs.has_key('name'):
            operation = 'list'
        else:
            operation = 'list-all'

    # pre-sanitize
    if (operation != "list" and operation != "create" 
                            and operation != "delete"
                            and operation != "list-all"):
        print "operation %s not one of list-all, list, create, delete" % operation
        sys.exit(0)

    if not kwargs.has_key('name') and operation != "list-all":
        print "operation requires a name"
        sys.exit(0)

    # remove arg 'name' from passing through to operation builder, except on create
    if operation != "create" and operation != "list-all":
        name = kwargs['name']
        kwargs.pop('name')

    # perform operation on saved search created with args from cli
    if operation == "list-all":
        result = context.get("saved/searches",  **kwargs)
    elif operation == "list":
        result = context.get("saved/searches/%s" % name, **kwargs)
    elif operation == "create":
        result = context.post("saved/searches", **kwargs)
    else:
        result = context.delete("saved/searches/%s" % name, **kwargs)
    print "HTTP STATUS: %d" % result.status
    xml_data = result.body.read()
    sys.stdout.write(xml_data)
Ejemplo n.º 22
0
def main():
    argv = sys.argv[1:]

    opts = utils.parse(argv, {}, ".splunkrc")
    global splunk_opts
    splunk_opts = opts.kwargs

    global tracker
    tracker = AnalyticsTracker("analytics", splunk_opts)

    debug(True)
    run(reloader=True)
Ejemplo n.º 23
0
def main():
    usage = "usage: oneshot.py <search>"
    opts = utils.parse(sys.argv[1:], {}, ".splunkrc", usage=usage)
    if len(opts.args) != 1:
        utils.error("Search expression required", 2)

    search = opts.args[0]
    service = connect(**opts.kwargs)
    socket.setdefaulttimeout(None)
    response = service.jobs.oneshot(search)

    pretty(response)
Ejemplo n.º 24
0
def main():
    try:
        in_file_name = sys.argv[1]
    except Exception:
        sys.stderr.write('usage: python karger.py ${in_file_name}')
        return

    graph = parse(in_file_name)

    result = karger(graph)

    print(result)
Ejemplo n.º 25
0
def main():
    opts = parse(sys.argv[1:], {}, ".splunkrc")
    service = connect(**opts.kwargs)

    for item in service.event_types:
        print "%s" % item.name
        print "=" * len(item.name)
        content = item.content
        for key in sorted(content.keys()):
            value = content[key]
            print "%s: %s" % (key, value)
        print
Ejemplo n.º 26
0
def getWeekActivity(fromdate=(datetime.datetime.today()).date().isoformat()):
    # first get this monday
    # if after or equal to this monday, this week
    # otherwise, previous weeks
    fromdate = utils.parse(fromdate)
    today = datetime.datetime.today()
    d1 = today.day - today.weekday()
    thismonday = datetime.datetime.today().replace(day=d1)
    if fromdate > thismonday:
        return getActivities(thismonday.date().isoformat(), fromdate.isoformat())
    else:
        thatmonday = (fromdate - td(days=(fromdate.weekday())))
        return getActivities(thatmonday.date().isoformat(), (thatmonday + td(days=7)).date().isoformat())
Ejemplo n.º 27
0
def main():
    opts = parse(sys.argv[1:], {}, ".splunkrc")
    service = connect(**opts.kwargs)

    for item in service.inputs:
        header =  "%s (%s)" % (item.name, item.kind)
        print header
        print '='*len(header)
        content = item.content
        for key in sorted(content.keys()):
            value = content[key]
            print "%s: %s" % (key, value)
        print
Ejemplo n.º 28
0
def main():
    for filename in os.listdir(DATA):
        filename = join(DATA, filename)
        with open(filename) as f:
            text = f.read()
            sections = extract_sections(text)
            for s in sections:
                print s
                parsed = parse(Lexer, Parser, ParserState, s)
                print s
                print filename
                pprint.pprint(parsed)
            import pdb;pdb.set_trace()
Ejemplo n.º 29
0
def main():
    usage = "usage: oneshot.py <search>"
    opts = utils.parse(sys.argv[1:], {}, ".splunkrc", usage=usage)
    opts.kwargs["namespace"] = "*:*" # Override namespace

    if len(opts.args) != 1:
        utils.error("Search expression required", 2)
    search = opts.args[0]

    service = connect(**opts.kwargs)
    socket.setdefaulttimeout(None)
    response = service.jobs.create(search, exec_mode="oneshot")

    pretty(response)
Ejemplo n.º 30
0
def main():
    opts = parse(sys.argv[1:], {}, ".splunkrc")
    service = connect(**opts.kwargs)

    for group in service.fired_alerts:
        header = "%s (count: %d)" % (group.name, group.count)
        print "%s" % header
        print '='*len(header)
        alerts = group.alerts
        for alert in alerts.list():
            content = alert.content
            for key in sorted(content.keys()):
                value = content[key]
                print "%s: %s" % (key, value)
            print
Ejemplo n.º 31
0
                 torch.pow((torch.mean(train_y) - test_y), 2) /
                 (2. * torch.pow(y_std_train, 2)))
    msll = torch.mean(sll)
    nll_sum = nll.sum()
    print("Summed NLL: {}".format(nll_sum))
    print("MSLL: {}".format(msll))

    del data_lh
    del data_mod

    return float(test_rmse), float(unnorm_test_rmse), float(
        alt_sampler.total_time), float(nll_sum), float(msll)


if __name__ == '__main__':
    args = utils.parse()
    if args.data != 'all':
        # data_l = ['fertility2', 'concreteslump2', 'servo2', 'machine2', 'yacht2', 'housing2', 'energy2']
        # data_l = ['yacht2','housing2','energy2']
        # data_l = ['concreteslump2']
        data_l = [args.data]
        with open('log_file_{}_{}_latent.out'.format(args.mlatent, args.data),
                  'w+') as f:
            for dataset in data_l:
                try:
                    test_rmses = []
                    unnorm_test_rmses = []
                    times = []
                    nlls = []
                    mslls = []
                    for experiment in range(10):
Ejemplo n.º 32
0
 def __init__(self, *args, **kwargs):
     super(TestGenerator, self).__init__(*args, **kwargs)
     initial_state, self.rules = parse(test_data)
     self.initial_state = '.' * 3 + initial_state + '.' * 11
Ejemplo n.º 33
0
    if len(sys.argv) != 2:
        print('[Usage] python script <input img dir>')
        print('[Exampe] python script ../input_image/parrington')
        sys.exit(0)

    input_dirname = sys.argv[1]

    result_f = os.path.join(result_folder, os.path.basename(input_dirname))

    if not os.path.isdir(result_f):
        os.makedirs(result_f)

    pool = mp.Pool(mp.cpu_count())

    img_list, focal_length = utils.parse(input_dirname)

    # img_list = img_list[2:4]

    print('Warp images to cylinder')
    cylinder_img_list = pool.starmap(utils.cylindrical_projection,
                                     [(img_list[i], focal_length[i])
                                      for i in range(len(img_list))])

    _, img_width, _ = img_list[0].shape
    stitched_image = cylinder_img_list[0].copy()

    shifts = [[0, 0]]
    cache_feature = [[], []]

    # add first img for end to end align
Ejemplo n.º 34
0
def main():
    opts = utils.parse(sys.argv[1:], {}, ".splunkrc")
    for arg in opts.args: 
        print_response(invoke(arg, **opts.kwargs))
Ejemplo n.º 35
0
    return tests


if __name__ == '__main__':
    if len(sys.argv) != 3:
        print 'Expected input format: python EvaluateCFList.py <method> <testList>'
    else:
        filename = 'data/jester-data-1.csv'
        items = {}
        users = {}
        matrix = []

        size = int(sys.argv[2])

        matrix, users, items = parse(filename)
        testData = gen_tests(users, size)
        f = Filter(matrix, users, items)

        method = sys.argv[1]
        print "Starting predictions"
        if method == 'all':
            w_results = f.execute('weighted_sum', testData)
            a_w_results = f.execute('adj_weighted_sum', testData)
            c_w_results = f.execute('cosine_weighted_sum', testData)
            c_a_w_results = f.execute('cosine_adj_weighted_sum', testData)
            print_evaluation(f, "Weighted Sum", w_results)
            print_evaluation(f, "Adjusted Weighted Sum", a_w_results)
            print_evaluation(f, "Cosine Weighted Sum", c_w_results)
            print_evaluation(f, "Cosine Adjusted Weighted Sum", c_a_w_results)
        else:
Ejemplo n.º 36
0
# parse.py
# Author: Moises Marin
# Date: November 27, 2017
# Purpose: Call parse function
#
#
import utils

utils.parse("./_config/config.json")


            raise
    except urllib2.HTTPError, response:
        pass  # Propagate HTTP errors via the returned response message
    return {
        'status': response.code,
        'reason': response.msg,
        'headers': response.info().dict,
        'body': StringIO(response.read())
    }


def handler(proxy):
    proxy_handler = urllib2.ProxyHandler({'http': proxy, 'https': proxy})
    opener = urllib2.build_opener(proxy_handler)
    urllib2.install_opener(opener)
    return request


opts = utils.parse(sys.argv[1:], RULES, ".splunkrc")
proxy = opts.kwargs['proxy']
try:
    service = client.connect(handler=handler(proxy), **opts.kwargs)
    pprint([app.name for app in service.apps])
except urllib2.URLError as e:
    if e.reason.errno == 1 and sys.version_info < (2, 6, 3):
        # There is a bug in Python < 2.6.3 that does not allow proxies with
        # HTTPS. You can read more at: http://bugs.python.org/issue1424152
        pass
    else:
        raise
Ejemplo n.º 38
0
def main(argv):
    global urllib2
    usage = "async.py <sync | async>"

    # Parse the command line args.
    opts = parse(argv, {}, ".splunkrc")

    # We have to see if we got either the "sync" or
    # "async" command line arguments.
    allowed_args = ["sync", "async"]
    if len(opts.args) == 0 or opts.args[0] not in allowed_args:
        error("Must supply either of: %s" % allowed_args, 2)

    # Note whether or not we are async.
    is_async = opts.args[0] == "async"

    # If we're async, we'' import `eventlet` and `eventlet`'s version
    # of `urllib2`. Otherwise, import the stdlib version of `urllib2`.
    #
    # The reason for the funky import syntax is that Python imports
    # are scoped to functions, and we need to make it global.
    # In a real application, you would only import one of these.
    if is_async:
        urllib2 = __import__('eventlet.green', globals(), locals(),
                             ['urllib2'], -1).urllib2
    else:
        urllib2 = __import__("urllib2", globals(), locals(), [], -1)

    # Create the service instance using our custom HTTP request handler.
    service = client.Service(handler=request, **opts.kwargs)
    service.login()

    # Record the current time at the start of the
    # "benchmark".
    oldtime = datetime.datetime.now()

    def do_search(query):
        # Create a search job for the query.

        # In the async case, eventlet will "relinquish" the coroutine
        # worker, and let others go through. In the sync case, we will
        # block the entire thread waiting for the request to complete.
        job = service.jobs.create(query, exec_mode="blocking")

        # We fetch the results, and cancel the job
        results = job.results()
        job.cancel()

        return results

    # We specify many queries to get show the advantages
    # of paralleism.
    queries = [
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
        'search * | head 100',
    ]

    # Check if we are async or not, and execute all the
    # specified queries.
    if is_async:
        import eventlet

        # Create an `eventlet` pool of workers.
        pool = eventlet.GreenPool(16)

        # If we are async, we use our worker pool to farm
        # out all the queries. We just pass, as we don't
        # actually care about the result.
        for results in pool.imap(do_search, queries):
            pass
    else:
        # If we are sync, then we just execute the queries one by one,
        # and we can also ignore the result.
        for query in queries:
            do_search(query)

    # Record the current time at the end of the benchmark,
    # and print the delta elapsed time.
    newtime = datetime.datetime.now()
    print("Elapsed Time: %s" % (newtime - oldtime))
Ejemplo n.º 39
0
        predict = self.sess.run(tf.argmax(self.predict, 2),
                                feed_dict={self.X: sent_matrix})
        # convert to tag
        tags = []
        for i in range(len(predict)):
            tag_predict = []
            for j in range(len(sent_token[i])):
                tag_predict.append(idx2tag[predict[i][j]])
            tags.append(tag_predict)
        return sent_token, tags


if __name__ == '__main__':
    print("Đọc dữ liệu...")
    sentences = utils.load_data(PATH_TRAIN)
    all_words, all_tags, word2idx, idx2word, tag2idx, idx2tag = utils.parse(
        sentences)
    X_train, Y_train = utils.sentence_to_number(sentences, MAX_LENGTH,
                                                word2idx, tag2idx)
    X_train, X_val, Y_train, Y_val = train_test_split(X_train,
                                                      Y_train,
                                                      test_size=0.3,
                                                      random_state=0)
    utils.save_config(all_words, all_tags, word2idx, idx2word, tag2idx,
                      idx2tag, PATH_ALLWORDS, PATH_ALLTAGS, PATH_WORD2IDX,
                      PATH_IDX2WORD, PATH_TAG2IDX, PATH_IDX2TAG)
    print("Lưu tham số thành công! Tiến hành training...")
    VOCAB_SIZE = len(word2idx.items())
    NUM_CLASSES = len(tag2idx.items())
    model = Model(VOCAB_SIZE, NUM_CLASSES)
    model.build_model()
    model.fit(X_train, Y_train, X_val, Y_val, save=True)
Ejemplo n.º 40
0
#!/usr/bin/env python

from __future__ import print_function

from utils import parse
from utils import combat

if __name__ == '__main__':
    first, top = 52, 100
    while first <= top:
        boost = (first + top) // 2
        with open('data.txt', 'r') as f:
            immune, infection = parse(f, boost)

        while len(immune) and len(infection):
            combat(immune, infection)

        score = immune.score()
        print(boost, score)
        if score > 0:
            top = boost
        else:
            first = boost

    print('Answer:', boost)
    # 4 wrong
    # 3 loops forever
    # 53
Ejemplo n.º 41
0
"""An example that prints Splunk service info & settings."""

import sys, os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

import splunklib.client as client

try:
    from utils import parse
except ImportError:
    raise Exception("Add the SDK repository to your PYTHONPATH to run the examples "
                "(e.g., export PYTHONPATH=~/splunk-sdk-python.")

if __name__ == "__main__":
    opts = parse(sys.argv[1:], {}, ".splunkrc")
    service = client.connect(**opts.kwargs)

    content = service.info
    for key in sorted(content.keys()):
        value = content[key]
        if isinstance(value, list):
            print "%s:" % key
            for item in value: print "    %s" % item
        else:
            print "%s: %s" % (key, value)

    print "Settings:"
    content = service.settings.content
    for key in sorted(content.keys()):
        value = content[key]
Ejemplo n.º 42
0
    else:
        meta_params = pickle.load(
            open(
                os.path.join(dic_path['PATH_TO_MODEL'],
                             'params_%d.pkl' % (batch_id - 1)), 'rb'))
    tot_grads = dict(zip(meta_params.keys(), [0] * len(meta_params.keys())))
    for key in meta_params.keys():
        for g in grads:
            tot_grads[key] += g[key]
    _beta = dic_agent_conf['BETA']
    meta_params = dict(
        zip(meta_params.keys(), [
            meta_params[key] - _beta * tot_grads[key]
            for key in meta_params.keys()
        ]))

    # save the meta parameters
    pickle.dump(
        meta_params,
        open(
            os.path.join(dic_path['PATH_TO_MODEL'],
                         'params' + "_" + str(batch_id) + ".pkl"), 'wb'))


if __name__ == '__main__':
    import os
    args = parse()  # defined in utils.py
    os.environ["CUDA_VISIBLE_DEVICES"] = args.visible_gpu

    main(args)
Ejemplo n.º 43
0
from utils import test_data

try:
    from functools import reduce
except:
    pass



def visit(node):
    return sum(node.metadata) + sum(visit(n) for n in node.children)





if __name__ == '__main__':
    with open('data.txt', 'r') as f:
        tree = parse(f.readline())

    debug = False
    if debug:
        tree = parse(test_data)

    print(tree)

    answer = visit(tree)
    print('Answer:', answer)
    # 138
    # 40908
Ejemplo n.º 44
0
def get_func_macro(name_exp, form_exp, func, func_name):
    name = utils.parse(name_exp)
    form = utils.parse(form_exp)
    func_node = Node(FUNC, val=func_name, func=func)
    product = utils.paren([func_node])
    return get_macro(name, form, product)
Ejemplo n.º 45
0
#!/usr/bin/env python

from __future__ import print_function

from utils import grow_generation
from utils import parse
from utils import grow

if __name__ == '__main__':
    with open('data.txt', 'r') as f:
        initial_state, rules = parse(f)

    print(initial_state)
    print(rules)
    width = 5000
    state = '.' * width + initial_state + '.' * width
    prev_count = 0
    for n in range(1100):
        state = grow(state, rules)

        count = 0
        for i, x in enumerate(state):
            if x == '#':
                count += i - width
        diff = count - prev_count
        prev_count = count
        print(n + 1, count, diff)

    answer = (50000000000 - (n + 1)) * diff + count
    print('Answer:', answer)
    # 250000045224 to high
Ejemplo n.º 46
0
def main(argv, dataset, seed=88):
    '''
    runs ESS with fixed hyperparameters:
    run with -h for CL arguments description
    '''
    # parse CL arguments #
    args = utils.parse()
    gen_pars = [args.lengthscale, args.period]
    linear_pars = [args.slope, args.intercept]
    mlatent = args.mlatent

    # TODO: set seed from main call
    torch.random.manual_seed(seed)
    ##########################################
    ## some set up and initialization stuff ##
    ##########################################

    print("Dataset: {}".format(dataset))
    train_x, train_y, test_x, test_y, y_std, y_std_train, gen_kern = data.read_data(
        dataset,
        nx=args.nx,
        gen_pars=gen_pars,
        linear_pars=linear_pars,
        spacing=args.spacing,
        noise=args.noise)
    in_dims = 1 if train_x.dim() == 1 else train_x.size(1)

    use_cuda = torch.cuda.is_available()
    print('Cuda is available', use_cuda)
    if use_cuda:
        torch.set_default_tensor_type(torch.cuda.DoubleTensor)
        train_x, train_y, test_x, test_y, y_std = train_x.cuda(), train_y.cuda(
        ), test_x.cuda(), test_y.cuda(), y_std.cuda()
        if gen_kern is not None:
            gen_kern = gen_kern.cuda()

    ###########################################
    ## set up the spectral and latent models ##
    ###########################################
    print("Input Dimensions {}".format(in_dims))

    shared = True if mlatent == 'shared' else False

    data_lh = gpytorch.likelihoods.GaussianLikelihood(
        noise_prior=gpytorch.priors.SmoothedBoxPrior(1e-8, 1e-3))
    data_mod = spectralgp.models.ProductKernelSpectralModel(
        train_x,
        train_y,
        data_lh,
        shared=shared,
        normalize=False,
        symmetrize=False,
        num_locs=args.nomg,
        spacing=args.spacing,
        pretrain=False,
        omega_max=8.,
        nonstat=True)
    #data_lh.raw_noise = torch.tensor(-3.5)
    ###############################
    ## set up sampling factories ##
    ###############################

    ess_fact = lambda nsamples, ide: spectralgp.sampling_factories.ess_factory(
        nsamples, data_mod, data_lh, ide)

    ss_fact = lambda nsamples, ide: spectralgp.sampling_factories.ss_factory(
        nsamples, data_mod, data_lh, ide)

    ################################
    ## set up alternating sampler ##
    ################################

    alt_sampler = spectralgp.samplers.AlternatingSamplerMultiDim(
        ss_fact,
        ess_fact,
        totalSamples=args.iters,
        numInnerSamples=args.ess_iters,
        numOuterSamples=args.optim_iters,
        in_dims=in_dims)

    alt_sampler.run()

    data_mod.eval()
    data_lh.eval()
    d = data_mod(test_x).mean - test_y
    du = d * y_std

    test_rmse = torch.sqrt(torch.mean(torch.pow(d, 2)))
    unnorm_test_rmse = torch.sqrt(torch.mean(torch.pow(du, 2)))
    print("Normalised RMSE: {}".format(test_rmse))
    print("Unnormalised RMSE: {}".format(unnorm_test_rmse))

    y_preds = data_lh(data_mod(test_x))
    # y_var = f_var + data_noise
    y_var = y_preds.variance

    nll = 0.5 * torch.log(2. * math.pi * y_var) + torch.pow(
        (data_mod(test_x).mean - test_y), 2) / (2. * y_var)
    sll = nll - (0.5 * torch.log(2. * math.pi * torch.pow(y_std_train, 2)) +
                 torch.pow((torch.mean(train_y) - test_y), 2) /
                 (2. * torch.pow(y_std_train, 2)))
    msll = torch.mean(sll)
    nll_sum = nll.sum()
    print("Summed NLL: {}".format(nll_sum))
    print("MSLL: {}".format(msll))

    del data_lh
    del data_mod

    return float(test_rmse), float(unnorm_test_rmse), float(
        alt_sampler.total_time), float(nll_sum), float(msll)
Ejemplo n.º 47
0
            # Check for collision and remove carts who collided.
            if c.p in cart_map and cart_map[c.p]._alive:
                c._alive = False
                cart_map[c.p]._alive = False
                return c.p
            else:
                cart_map[c.p] = c





if __name__ == '__main__':
    with open('data.txt', 'r') as f:
        track, carts = parse(f)

    debug = False
    if debug:
        track, carts = parse(test_data)

    carts = sorted(carts, key=lambda c: c.p)
    print('track:\n', '\n'.join(track), sep='')
    print('carts:', carts)

    answer = finc_collision_location(track, carts)


    print('Answer:', answer)
    # [Nice visualization](https://mk-hill.github.io/TrialAndError/cart-visualizer/)
    # Answer: 117,62
Ejemplo n.º 48
0
def parse_extract(idioms, sentences):
    '''
	Extracts idioms based on the dependency parse of the idiom and sentence.
	Parse all idioms, optionally in context, get their parse trees and top node 
	lemmata. Then, parse each sentence, check if the top node lemma is present,
	and match the idiom parse tree to a subtree of the sentence parse. Deal 
	with idioms containing indefinite pronouns and em-dashes properly.
	'''

    parser = utils.load_parser(config.PARSER)
    extracted_idioms = [
    ]  # List of dicts, format: {'snippet': "", 'idiom': "", 'start': 0, 'end': 0, 'bnc_doc_id': "", 'bnc_sent': "", 'bnc_char_start': 0, 'bnc_char_end': 0}
    # Use a PoS-ambiguous word to parse idioms containing em-dash wildcards
    ambiguous_word = 'fine'

    # Parse idioms in context
    if config.SENTENCES:
        cache_file = '{0}/example_sentences_{1}_{2}_{3}.json'.format(
            config.WORK_DIR, '_'.join(config.DICT),
            config.SENTENCES.split('/')[-1][:-4], config.TIME)
        idioms_with_sentences = utils.get_example_sentences(
            idioms, config.SENTENCES, cache_file)
        parsed_idioms = utils.parse_example_sentences(idioms_with_sentences,
                                                      ambiguous_word, parser)
    # Parse idioms without context
    else:
        parsed_idioms = []
        for idiom in idioms:
            parsed_idioms.append(
                utils.parse_idiom(idiom, ambiguous_word, parser))

    # Extract idiom instances by matching parse trees
    for sentences in documents:
        time_0 = time.time()
        print('Parsing document...')
        # Get sentence strings from BNC data and parse
        if config.CORPUS_TYPE[0:3] == 'bnc':
            sentences_with_metadata = sentences
            sentences = [
                sentence_with_metadata['sentence']
                for sentence_with_metadata in sentences_with_metadata
            ]
            # Parse sentence, and turn resulting Doc into Span object
            parsed_sentences = [
                utils.parse(parser, sentence)[:] for sentence in sentences
            ]
        # Parse corpus as a whole, let Spacy do the sentence splitting
        else:
            parsed_corpus = utils.parse(parser, ' '.join(sentences))
            parsed_sentences = parsed_corpus.sents

        print('\nDone! Parsing document took : {0:.2f} seconds').format(
            time.time() - time_0)
        # Cycle through sentences, attempt to match parse trees
        for sentence_idx, parsed_sentence in enumerate(parsed_sentences):
            for parsed_idiom in parsed_idioms:

                # Get idiom information
                idiom_top_lemma = parsed_idiom[0]
                idiom_top_token = parsed_idiom[1]
                idiom_subtree = parsed_idiom[2]
                # If not parsed in context, there is no stored list, so get generator
                if not idiom_subtree:
                    idiom_subtree = idiom_top_token.subtree
                # Use list, rather than generator
                idiom_subtree = [x for x in idiom_subtree]
                has_em_dash = parsed_idiom[3]
                # Save previously matched indices to check for overlapping spans
                previously_matched_indices = []

                # When idiom top lemma is em-dash, check if other lemma-tokens occur in sentence, only then try matching the parse trees
                consider_this_em_dash_idiom = False
                if has_em_dash and idiom_top_lemma == ambiguous_word:
                    idiom_content_tokens = [
                        token for token in idiom_subtree if
                        token.tag_ not in ['DT'] and token != idiom_top_token
                    ]
                    sentence_lemmata = [
                        token.lemma_ for token in parsed_sentence
                    ]
                    if all([
                            idiom_content_token.lemma_ in sentence_lemmata
                            for idiom_content_token in idiom_content_tokens
                    ]):
                        consider_this_em_dash_idiom = True

                # Cycle through sentence parse, match top lemma to sentence lemma and idiom parse tree to sentence parse tree
                for sentence_token in parsed_sentence:
                    # Match top lemma or em-dash heuristic or match any idiom token as possible top token in case of no directionality
                    if sentence_token.lemma_ == idiom_top_token.lemma_ or consider_this_em_dash_idiom or (
                            config.NO_DIRECTION and sentence_token.lemma_
                            in [x.lemma_ for x in idiom_subtree]):
                        sentence_top_token = sentence_token
                        # Keep track of indices of matching tokens for later span extraction
                        matched_indices = [sentence_top_token.i]
                        # Match parse trees, account for many special cases
                        for idiom_subtree_token in idiom_subtree:
                            # Skip top token and articles
                            if idiom_subtree_token != idiom_top_token and idiom_subtree_token.lower_ not in [
                                    'a', 'the', 'an'
                            ]:
                                matched_subtree_token = False
                                for sentence_subtree_token in sentence_token.subtree:
                                    # Match condition components
                                    # Spacy gives same lemma for all pronouns, so match on lower-cased form
                                    matching_lemma = (
                                        idiom_subtree_token.lemma_
                                        == sentence_subtree_token.lemma_ and
                                        idiom_subtree_token.lemma_ != u'-PRON-'
                                    ) or (idiom_subtree_token.lemma_
                                          == u'-PRON-'
                                          and idiom_subtree_token.lower_
                                          == sentence_subtree_token.lower_)
                                    # Optionally, ignore dependency labels
                                    matching_dep = idiom_subtree_token.dep_ == sentence_subtree_token.dep_ or config.NO_LABELS
                                    matching_head_lemma = (
                                        idiom_subtree_token.head.lemma_
                                        == sentence_subtree_token.head.lemma_
                                        and idiom_subtree_token.head.lemma_ !=
                                        u'-PRON-'
                                    ) or (
                                        idiom_subtree_token.head.lemma_
                                        == u'-PRON-'
                                        and idiom_subtree_token.head.lower_
                                        == sentence_subtree_token.head.lower_)
                                    # Optionally, allow for direction reversal
                                    if config.NO_DIRECTION:
                                        if idiom_subtree_token.head.lemma_ == u'-PRON-':
                                            matched_children = [
                                                x for x in
                                                sentence_subtree_token.children
                                                if x.lower_ ==
                                                idiom_subtree_token.head.lower_
                                            ]
                                        else:
                                            matched_children = [
                                                x for x in
                                                sentence_subtree_token.children
                                                if x.lemma_ ==
                                                idiom_subtree_token.head.lemma_
                                            ]
                                        matching_child_lemma = matched_children != []
                                        matching_head_lemma = matching_head_lemma or matching_child_lemma
                                    em_dash_lemma = has_em_dash and idiom_subtree_token.lemma_ == ambiguous_word
                                    em_dash_head_lemma = has_em_dash and idiom_subtree_token.head.lemma_ == ambiguous_word
                                    inverted_dep = idiom_subtree_token.dep_ == 'dobj' and sentence_subtree_token.dep_ == 'nsubjpass' or config.NO_LABELS
                                    # Default case: lemma, dep-rel and head lemma have to match.
                                    # In case of em-dash, match lemma or head lemma, and the other one to the ambiguous word
                                    if (matching_lemma and matching_dep
                                            and matching_head_lemma
                                            or em_dash_lemma
                                            and matching_head_lemma
                                            or matching_lemma
                                            and em_dash_head_lemma):
                                        matched_subtree_token = True
                                    # Passivization: match lemma, head lemma and inverted dep-rels
                                    elif matching_lemma and inverted_dep and matching_head_lemma:
                                        matched_subtree_token = True
                                    # Deal with someone and someone's
                                    elif idiom_subtree_token.lemma_ == 'someone':
                                        idiom_right_children = [
                                            right for right in
                                            idiom_subtree_token.rights
                                        ]
                                        # Deal with someone's - match any other PRP$ or NN(P)(S) + POS for lemma
                                        if idiom_right_children and idiom_right_children[
                                                0].lemma_ == "'s":
                                            sentence_right_children = [
                                                right for right in
                                                sentence_subtree_token.rights
                                            ]
                                            if (matching_dep
                                                    and matching_head_lemma and
                                                (sentence_subtree_token.tag_
                                                 == 'PRP$' or
                                                 sentence_subtree_token.tag_ in
                                                 ['NN', 'NNS', 'NNP', 'NNPS']
                                                 and sentence_right_children
                                                 and sentence_right_children[0]
                                                 .lemma_ == "'s")):
                                                matched_subtree_token = True
                                        # Deal with someone - match any other PRP or NN(P)(S) for lemma
                                        else:
                                            if ((matching_dep or inverted_dep)
                                                    and matching_head_lemma and
                                                    sentence_subtree_token.tag_
                                                    in [
                                                        'PRP', 'NN', 'NNS',
                                                        'NNP', 'NNPS'
                                                    ]):
                                                matched_subtree_token = True
                                    # Deal with one's - match any PRP$ for lemma
                                    elif idiom_subtree_token.lemma_ == 'one':
                                        idiom_right_children = [
                                            right for right in
                                            idiom_subtree_token.rights
                                        ]
                                        if idiom_right_children and idiom_right_children[
                                                0].lemma_ == "'s":
                                            if matching_dep and matching_head_lemma and sentence_subtree_token.tag_ == 'PRP$':
                                                matched_subtree_token = True
                                    # Deal with something and something's
                                    elif idiom_subtree_token.lemma_ == 'something':
                                        idiom_right_children = [
                                            right for right in
                                            idiom_subtree_token.rights
                                        ]
                                        # Deal with something's - match any other PRP$ or NN(P)(S) + POS for lemma
                                        if idiom_right_children and idiom_right_children[
                                                0].lemma_ == "'s":
                                            sentence_right_children = [
                                                right for right in
                                                sentence_subtree_token.rights
                                            ]
                                            if (matching_dep
                                                    and matching_head_lemma and
                                                (sentence_subtree_token.tag_
                                                 == 'PRP$' or
                                                 sentence_subtree_token.tag_ in
                                                 ['NN', 'NNS', 'NNP', 'NNPS']
                                                 and sentence_right_children
                                                 and sentence_right_children[0]
                                                 .lemma_ == "'s")):
                                                matched_subtree_token = True
                                        # Deal with something - match any other PRP or NN(P)(S) or this/that/these/those for lemma
                                        else:
                                            if ((matching_dep or inverted_dep)
                                                    and matching_head_lemma and
                                                (sentence_subtree_token.tag_
                                                 in [
                                                     'PRP', 'NN', 'NNS', 'NNP',
                                                     'NNPS'
                                                 ] or
                                                 sentence_subtree_token.lemma_
                                                 in [
                                                     'this', 'that', 'these',
                                                     'those'
                                                 ])):
                                                matched_subtree_token = True
                                    # Deal with 's of someone's, one's and something's by ignoring it
                                    elif idiom_subtree_token.lemma_ == "'s" and idiom_subtree_token.head.lemma_ in [
                                            'someone', 'one', 'something'
                                    ]:
                                        matched_subtree_token = True
                                        break

                                    if matched_subtree_token:  # Match, go to next idiom subtree token
                                        # Add child in case of no-directionality child match
                                        if config.NO_DIRECTION and matching_child_lemma:
                                            matched_indices.append(
                                                matched_children[0].i)
                                        else:
                                            matched_indices.append(
                                                sentence_subtree_token.i)
                                        break
                                if not matched_subtree_token:  # No match, go to next sentence token
                                    break

                        # If everything matches, extract snippet
                        if matched_subtree_token:
                            # Text of idiom subtree is dictionary form
                            dictionary_form = ''.join([
                                idiom_subtree_token.text_with_ws
                                for idiom_subtree_token in idiom_subtree
                            ]).strip()
                            # Deal with em-dash wildcard idiom, substitute em-dash back in for ambiguous word
                            if has_em_dash:
                                dictionary_form = re.sub(
                                    ambiguous_word, u'\u2014', dictionary_form)
                            # Get idiom token span
                            first_idiom_token_i = min(
                                matched_indices) - parsed_sentence.start
                            last_idiom_token_i = max(
                                matched_indices) - parsed_sentence.start
                            first_idiom_token = parsed_sentence[
                                first_idiom_token_i]
                            last_idiom_token = parsed_sentence[
                                last_idiom_token_i]
                            # Extract n-word context
                            if config.CONTEXT_TYPE == 'w':
                                span_start = max(
                                    0, first_idiom_token_i -
                                    config.CONTEXT_NUMBER)
                                span_end = min(
                                    len(parsed_sentence), last_idiom_token_i +
                                    1 + config.CONTEXT_NUMBER)
                                snippet = parsed_sentence[
                                    span_start:span_end].text
                                # Store character offset of snippet start
                                char_offset_span = parsed_sentence[
                                    span_start].idx
                            # Extract n-sentence context
                            elif config.CONTEXT_TYPE == 's':
                                if config.CONTEXT_NUMBER == 0:
                                    snippet = parsed_sentence.text
                                    # Store character offset of sentence (==snippet) start
                                    char_offset_span = parsed_sentence.start_char
                                else:
                                    snippet = ""
                                    # Get snippet sentences
                                    first_sentence_idx = sentence_idx - config.CONTEXT_NUMBER
                                    last_sentence_idx = sentence_idx + config.CONTEXT_NUMBER
                                    # Re-iterate over sentences to extract the sentence contents
                                    for sentence_idx_2, parsed_sentence_2 in enumerate(
                                            parsed_corpus.sents):
                                        if sentence_idx_2 >= first_sentence_idx and sentence_idx_2 <= last_sentence_idx:
                                            # Store character offset of snippet start
                                            if sentence_idx_2 == first_sentence_idx:
                                                char_offset_span = parsed_sentence_2.start_char
                                            # Add space between sentences
                                            if snippet:
                                                snippet += ' '
                                            snippet += parsed_sentence_2.text
                            # Get idiom character offsets in snippet
                            char_offset_start = first_idiom_token.idx - char_offset_span
                            char_offset_end = last_idiom_token.idx + len(
                                last_idiom_token.text) - char_offset_span
                            # Get BNC metadata/set dummy values
                            if config.CORPUS_TYPE[0:3] == 'bnc':
                                bnc_document_id = sentences_with_metadata[
                                    sentence_idx]['document_id']
                                bnc_sentence = sentences_with_metadata[
                                    sentence_idx]['sentence_number']
                                bnc_char_start = first_idiom_token.idx
                                bnc_char_end = last_idiom_token.idx + len(
                                    last_idiom_token.text)
                            else:
                                bnc_document_id = '-'
                                bnc_sentence = '-'
                                bnc_char_start = 0
                                bnc_char_end = 0

                            extracted_idiom = {
                                'snippet': snippet,
                                'idiom': dictionary_form,
                                'start': char_offset_start,
                                'end': char_offset_end,
                                'bnc_document_id': bnc_document_id,
                                'bnc_sentence': bnc_sentence,
                                'bnc_char_start': bnc_char_start,
                                'bnc_char_end': bnc_char_end
                            }

                            # Check whether the instance has already been added, with a larger span (this can happen with em-dash idioms). Don't do this for NLD matches.
                            if previously_matched_indices:
                                # Remove most recent entry if it has a larger span than the current entry
                                if min(previously_matched_indices) <= min(
                                        matched_indices
                                ) and max(previously_matched_indices) >= max(
                                        matched_indices) and (
                                            sentence_token.lemma_
                                            == idiom_top_token.lemma_
                                            or consider_this_em_dash_idiom):
                                    del extracted_idioms[-1]
                                # Only add current entry if it doesn't have a larger span than the most recent entry
                                if not (min(previously_matched_indices) >=
                                        min(matched_indices)
                                        and max(previously_matched_indices) <=
                                        max(matched_indices)) and (
                                            sentence_token.lemma_
                                            == idiom_top_token.lemma_
                                            or consider_this_em_dash_idiom):
                                    extracted_idioms.append(extracted_idiom)
                                    previously_matched_indices = matched_indices
                            else:
                                extracted_idioms.append(extracted_idiom)
                                previously_matched_indices = matched_indices

    return extracted_idioms
Ejemplo n.º 49
0
import matplotlib.pyplot as plt
import neural_net as nn
import numpy as np
import accuracy as acc
import utils

# get data
attr, label = utils.parse('E:/training_set_transformed.csv')
attr_test, target = utils.parse('E:/Features_TestSet_transformed.csv')

# train
mlp, scaler = nn.mlp_train(attr, label)

# mean square error
print 'mean square error: ', acc.mse(target,mlp.predict(scaler.transform(attr_test)))

# actual-perdicted ratio
print acc.ratio(target, mlp.predict(scaler.transform(attr_test)), 2)

# hits@10
hit_10 = []
for i in np.arange(0,1000,100):
	hit_10.append(acc.get_hits_at_ten(target[i:i+100],mlp.predict(scaler.transform(attr_test[i:i+100]))))
print 'Hit@10 average: ', np.mean(hit_10)
hit_10.append(np.mean(hit_10))

x = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', 'Avg.']
plt.bar(np.arange(len(hit_10)), hit_10, align='center', alpha=0.5)
plt.xticks(np.arange(len(hit_10)), x)
axes = plt.gca()
axes.set_ylim([0,10])
Ejemplo n.º 50
0
def main(argv, dataset, seed, iteration):
    '''
    runs ESS with fixed hyperparameters:
    run with -h for CL arguments description
    '''
    # parse CL arguments #
    args = utils.parse()
    gen_pars = [args.lengthscale, args.period]
    linear_pars = [args.slope, args.intercept]
    mlatent = args.mlatent
    model_avg = args.model_avg

    # TODO: set seed from main call
    torch.random.manual_seed(seed)
    ##########################################
    ## some set up and initialization stuff ##
    ##########################################

    print("Dataset: {}".format(dataset))
    train_x, train_y, test_x, test_y, y_std, y_std_train, gen_kern = data.read_data(
        dataset,
        nx=args.nx,
        gen_pars=gen_pars,
        linear_pars=linear_pars,
        spacing=args.spacing,
        noise=args.noise)
    in_dims = 1 if train_x.dim() == 1 else train_x.size(1)

    use_cuda = torch.cuda.is_available()
    print('Cuda is available', use_cuda)
    if use_cuda:
        torch.set_default_tensor_type(torch.cuda.DoubleTensor)
        train_x, train_y, test_x, test_y, y_std = train_x.cuda(), train_y.cuda(
        ), test_x.cuda(), test_y.cuda(), y_std.cuda()
        if gen_kern is not None:
            gen_kern = gen_kern.cuda()

    ###########################################
    ## set up the spectral and latent models ##
    ###########################################
    print("Input Dimensions {}".format(in_dims))

    shared = True if mlatent == 'shared' else False

    data_lh = gpytorch.likelihoods.GaussianLikelihood(
        noise_prior=gpytorch.priors.SmoothedBoxPrior(1e-8, 1e-3))
    data_mod = spectralgp.models.ProductKernelSpectralModel(
        train_x,
        train_y,
        data_lh,
        shared=shared,
        normalize=False,
        symmetrize=False,
        num_locs=args.nomg,
        spacing=args.spacing,
        period_factor=36.)

    #plot_prior_kernel(in_dims, data_mod, dataset, mlatent)
    plot_prior_subkernel(in_dims, data_mod, dataset, mlatent)
    #plot_prior_subkernel_individual(in_dims, data_mod, dataset, mlatent)

    ################################
    ## set up alternating sampler ##
    ################################

    #alt_sampler = spectralgp.samplers.AlternatingSampler(
    #[data_mod], [data_lh],
    #spectralgp.sampling_factories.ss_factory, [spectralgp.sampling_factories.ess_factory],
    #totalSamples=args.iters, numInnerSamples=args.ess_iters, numOuterSamples=args.optim_iters, num_dims=in_dims, num_tasks=1, lr=0.01)

    alt_sampler = spectralgp.samplers.AlternatingSampler(
        [data_mod], [data_lh],
        spectralgp.sampling_factories.ss_factory,
        [spectralgp.sampling_factories.ess_factory],
        totalSamples=args.iters,
        numInnerSamples=args.ess_iters,
        numOuterSamples=args.optim_iters,
        num_dims=in_dims)

    alt_sampler.run()

    meaned_data_mod_means, total_variance = model_average(
        data_mod, data_lh, alt_sampler, train_x, train_y, test_x, in_dims,
        model_avg)

    test_rmse = 0.0
    unnorm_test_rmse = 0.0
    nll_sum = 0.0
    msll = 0.0

    d = meaned_data_mod_means - test_y
    du = d * y_std

    test_rmse = torch.sqrt(torch.mean(torch.pow(d, 2)))
    unnorm_test_rmse = torch.sqrt(torch.mean(torch.pow(du, 2)))

    nll = 0.5 * torch.log(2. * math.pi * total_variance) + torch.pow(
        (meaned_data_mod_means - test_y), 2) / (2. * total_variance)
    sll = nll - (0.5 * torch.log(2. * math.pi * torch.pow(y_std_train, 2)) +
                 torch.pow((torch.mean(train_y) - test_y), 2) /
                 (2. * torch.pow(y_std_train, 2)))
    msll += torch.mean(sll)
    nll_sum += nll.sum()

    print("Normalised RMSE: {}".format(test_rmse))
    print("Unnormalised RMSE: {}".format(unnorm_test_rmse))
    print("Summed NLL: {}".format(nll_sum))
    print("MSLL: {}".format(msll))

    #plot_kernel(alt_sampler, data_mod, dataset, mlatent)
    plot_subkernel(alt_sampler, data_mod, dataset, mlatent)
    #plot_subkernel_individual(alt_sampler, data_mod, dataset, mlatent)

    del data_lh
    del data_mod

    return float(test_rmse), float(unnorm_test_rmse), float(
        alt_sampler.total_time), float(nll_sum), float(msll)
Ejemplo n.º 51
0
                    path.append(parent)
                path.reverse()
                return True, path
            else:
                if unchanged_state not in visitedStates:
                    stateQueue.put([unchanged_state, action])
            visitedStates.append(unchanged_state)
    return False, []


# Do not modify the following
if __name__ == '__main__':
    assert (len(sys.argv) == 3)
    stype = sys.argv[1]
    fname = sys.argv[2]
    initial, goal, actions, groundObjects = utils.parse(fname)

    print "Actions"
    for a in actions:
        print str(a)
    print "\nInitial\n"
    for i in initial:
        print str(i)
    print "\nGoal\n"
    for g in goal:
        print str(g)
    print '\n'

    if stype == 'forward':
        foundPlan, plan = forward_search(initial, goal, actions, groundObjects)
        print foundPlan
Ejemplo n.º 52
0
    for key in y_means:
        y_means[key] = y_means[key].cpu()

    output_dict = {
        "observations": {
            "x": train_x.cpu(),
            "y": train_y.cpu(),
            "means": y_means,
            "latent_y": latent_y.cpu(),
        },
        "results": DataFrame(all_outputs),
        "args": args
    }
    torch.save(output_dict, args.output)


if __name__ == "__main__":
    args = parse()
    use_fast_pred_var = True if not args.use_exact else False

    with use_toeplitz(args.toeplitz), max_cholesky_size(
        args.cholesky_size
    ), max_root_decomposition_size(args.sketch_size), cholesky_jitter(
        1e-3
    ), fast_pred_var(
        use_fast_pred_var 
    ), fast_pred_samples(
        True
    ):
        main(args)
Ejemplo n.º 53
0
import json
from flask import Flask, render_template, request, redirect
from utils.parse import *
from model import *

data = parse('static/data/svn_list.xml', 'static/data/svn_log.xml')


# home page
@app.route('/', methods=['GET'])
def home():
    return render_template('index.html')


# assignment with idx specified in url
@app.route('/assignment<idx>', methods=['GET'])
def assignment(idx):
    return render_template('assignment.html',
                           project=data['mliu60/Assignment' + idx])


# comment with path and parent id specified in url, post only
@app.route('/comment/<path>/<parent>', methods=['POST'])
def comment(path, parent):
    print "post"
    split = path.split()
    project_name = split[0] + '/' + split[1]
    content, date, parent, id = add_comment(content=request.form["content" +
                                                                 parent],
                                            file='/'.join(split),
                                            parent=int(parent))
Ejemplo n.º 54
0
 def test_parse_all(self):
     for string, data in samples:
         _data = parse(Lexer, Parser, ParserState, string)
         pprint.pprint(data)
         pprint.pprint(_data)
         self.assertEqual(data, _data)
Ejemplo n.º 55
0
         actual_page,
         "start_date":
         utils.date_to_string(actual_date),
         "end_date":
         utils.date_to_string(actual_date + datetime.timedelta(days=1))
     })
 replays = replays_info.json()["replays"]
 if len(replays) == 0:
     break
 for replay in replays:
     if replay['game_type'] in [
             'TeamLeague', 'HeroLeague', 'UnrankedDraft'
     ]:  # We are only interested on replays with draft
         print(replay["filename"])
         try:
             picks_and_bans = utils.parse(replay["filename"] +
                                          ".StormReplay")
         except Exception as error:
             print(error)
             continue
         print(picks_and_bans)
         if len(picks_and_bans["picks"]) != 10:
             print("Incorrect number of pick")
             continue
         if len(picks_and_bans["bans"]) != 6:
             for i in range(6 - len(picks_and_bans["bans"])):
                 picks_and_bans["bans"].append(None)
         # Add the data into the databse
         query = """INSERT INTO replay 
         (game_type, map, winner, ban1, ban2, ban3, ban4, ban5, ban6,
         pick1, pick2, pick3, pick4, pick5, pick6, pick7, pick8, pick9, pick10,
         level1, level2, level3, level4, level5, level6, level7, level8, level9, level10) 
Ejemplo n.º 56
0
def cmdline(argv, flags):
    """A cmdopts wrapper that takes a list of flags and builds the
       corresponding cmdopts rules to match those flags."""
    rules = dict([(flag, {'flags': ["--%s" % flag]}) for flag in flags])
    return parse(argv, rules)
Ejemplo n.º 57
0
                    for n in neighbors:
                        if self.ranks[n] is not None:
                            outlinks = len(self.graph.neighbors(n))
                            rank_sum += (1 / float(outlinks)) * self.ranks[n]
            
                # actual page rank compution
                self.ranks[key] = ((1 - float(self.d)) * (1/float(self.V))) + self.d*rank_sum

        return p

if __name__ == '__main__':
    filename = "E:\Download\social_network\email-EuAll.txt\Email-EuAll.txt"
    isDirected = True
    time_start=time.time()

    graph = parse(filename, isDirected)
    # p = PageRank(graph, isDirected)
    # p.rank()

    # sorted_r = sorted(p.ranks.items(), key=operator.itemgetter(1), reverse=True)
    pr=nx.pagerank(graph)
    time_end=time.time()
    print(time_end-time_start)
    # print(pr)
    # for tup in sorted_r:
    #     print ('{0:30} :{1:10}'.format(str(tup[0]), tup[1]))

 #       for node in graph.nodes():
 #          print node + rank(graph, node)

            #neighbs = graph.neighbors(node)
Ejemplo n.º 58
0
    raise Exception(
        "Add the SDK repository to your PYTHONPATH to run the examples "
        "(e.g., export PYTHONPATH=~/splunk-sdk-python.")


def request(url, message, **kwargs):
    method = message['method'].lower()
    data = message.get('body', "") if method == 'post' else None
    headers = dict(message.get('headers', []))
    # If running Python 2.7.9+, disable SSL certificate validation
    req = urllib2.Request(url, data, headers)
    try:
        if sys.version_info >= (2, 7, 9):
            response = urllib2.urlopen(
                req, context=ssl._create_unverified_context())
        else:
            response = urllib2.urlopen(req)
    except urllib2.HTTPError, response:
        pass  # Propagate HTTP errors via the returned response message
    return {
        'status': response.code,
        'reason': response.msg,
        'headers': response.info().dict,
        'body': StringIO(response.read())
    }


opts = utils.parse(sys.argv[1:], {}, ".splunkrc")
service = client.connect(handler=request, **opts.kwargs)
pprint([app.name for app in service.apps])
Ejemplo n.º 59
0
 def load_instructions(self, f):
     self._register0, self.instructions = parse(f)
Ejemplo n.º 60
0
def runCrawl(datadir='../data',
             storagedir='../MEDLINE',
             resultdir='.',
             verbose=1):
    '''
    Crawls the XML to extract the relationships between PubMed articles and MeSH / SCR terms. Takes ~3 hrs on local desktop
    '''
    ### Run Crawl ###
    tic1 = time.time()
    addedids = []
    addedmesh = []
    missed = 0
    allinds = []
    # Process all PMIDs
    step = 5000

    examplegenelinks, enumpmid, enumui, uis = setup(datadir=datadir,
                                                    resultdir=resultdir,
                                                    verbose=verbose)

    count = len(enumpmid.keys())
    if verbose:
        print('IDs from file: {}'.format(count))

    # Contains metadata of pmids
    meta = defaultdict(dict)
    indicies = []
    tic = time.time()
    log = open('Log.txt', 'w')
    xmlFls = glob.glob(os.path.join(storagedir, '*.xml'))
    for xmlFl in xmlFls:
        if os.stat(xmlFl).st_size != 0:
            # Sometimes the tags are malformed. If that's the case, use HTML recovering to fix the tags in the parsing process
            try:
                indicies, pmids, meshids, miss = parse(xmlFl, enumpmid, enumui,
                                                       uis, meta)
            except:
                try:
                    indicies, pmids, meshids, miss = parse(xmlFl,
                                                           enumpmid,
                                                           enumui,
                                                           uis,
                                                           meta,
                                                           recover=True)
                    if verbose:
                        print('Recovering XML')
                except:
                    if verbose:
                        print('Removing XML lines that are problematic')
                    try:
                        fl2 = fixXML(
                            xmlFl
                        )  # Try removing lines that have symbols within tags
                        xmlFl = fl2
                    except:
                        pass
                    # try fixed xml fl
                    try:
                        indicies, pmids, meshids, miss = parse(xmlFl,
                                                               enumpmid,
                                                               enumui,
                                                               uis,
                                                               meta,
                                                               recover=True)
                    except:
                        missed += 1
                        e = traceback.format_exc()
                        s = 'XML File:{} has a problem. Skipping...Error:{}\n'.format(
                            xmlFl, e)
                        log.write(s)
                        print(s)
                    continue
        else:
            os.remove(xmlFl)
            s = 'Failed to load:{}..Continuing'.format(flxmlFl)
            log.write(s)
            print(s)
            missed += 1
            continue

        # Write out UIs
        OUT = open('{}/UIs.txt'.format(resultdir), 'w')
        OUT.write('\n'.join(uis))
        OUT.close()

        # Output indices
        OUT = open('{}_IND.txt'.format(xmlFl), 'w')
        for x, y in indicies:
            OUT.write('{}\t{}\n'.format(x, y))
        OUT.close()

        allinds += indicies
        missed += miss
        addedids += pmids
        addedmesh += meshids

        if verbose:
            toc = time.time()
            print(toc - tic, ' for {}'.format(xmlFls.index(xmlFl)))
            print('Added PMIDs: {}'.format(len(addedids)))
            print('Added MeSH PMIDs: {}'.format(len(addedmesh)))
            print('Missed files: {}'.format(missed))
            tic = time.time()
    log.close()
    toc = time.time()
    if verbose:
        print('{} seconds to build'.format((toc - tic1)))
        print('{} minutes to build'.format((toc - tic1) / 60))

    ### Output data
    # Write out UIs
    OUT = open('{}/UIs.txt'.format(resultdir), 'w')
    OUT.write('\n'.join(uis))
    OUT.close()

    # Write out Meta
    OUT = open('{}/meta.txt'.format(resultdir), 'w')
    for key in meta:
        OUT.write('{}\t{}\n'.format(
            key, ''.join('{}:{}'.format(key, val)
                         for key, val in sorted(meta[key].items()))))
    OUT.close()

    # Write out all Indices
    OUT = open('{}/AllIND.txt'.format(storagedir), 'w')
    for x, y in allinds:
        OUT.write('{}\t{}\n'.format(x, y))
    OUT.close()

    # Write out PMIDs
    OUT = open('{}/pmids.txt'.format(resultdir), 'w')
    currentPMIDs = np.empty(shape=(len(enumpmid.keys()), 1))
    for pmid, ind in enumpmid.items():
        pmid = pmid.replace('\x00', '')
        currentPMIDs[ind] = pmid
    for i in range(0, len(currentPMIDs)):
        OUT.write('{}\n'.format(int(currentPMIDs[i][0])))
    OUT.close()

    currentPMIDs = [str(int(x[0])) for x in currentPMIDs]
    total = len(allinds)
    rows = len(currentPMIDs)
    cols = len(uis)
    return rows, cols, total, currentPMIDs, meta