Beispiel #1
0
def parse(text):
    log("Checking for coherence in '{0}'".format(text), 2)

    family_hits = []
    family_stem_words = stemmed_words(family_words, 'family_words')
    for sentence in sentence_tokenizer.parse(text):
        tree = parsers.parse(sentence)[0]
        family_hits += [
            (a_tree.node, a_tree[0].lower(), stemmer.stem(a_tree[0].lower())
             in family_stem_words)
            for a_tree in tree.subtrees(lambda x: x.node in noun_tags)
        ]
    log("Family hits: {0}".format(family_hits), 4)
    family_hit_values = (len([hit for hit in family_hits
                              if hit[2]]), len(family_hits))
    log("%d/%d" % family_hit_values, 3)

    work_hits = []
    work_stem_words = stemmed_words(work_words, 'work_words')
    for sentence in sentence_tokenizer.parse(text):
        tree = parsers.parse(sentence)[0]
        work_hits += [
            (a_tree.node, a_tree[0].lower(), stemmer.stem(a_tree[0].lower())
             in work_stem_words)
            for a_tree in tree.subtrees(lambda x: x.node in noun_tags)
        ]
    log("Work hits: {0}".format(work_hits), 4)
    work_hit_values = (len([hit for hit in work_hits
                            if hit[2]]), len(work_hits))
    log("%d/%d" % work_hit_values, 3)

    return family_hit_values[0], work_hit_values[0], work_hit_values[1]
    def test000(self):
        "watering a simple network"

        # XXX DISABLED by Remco
        #
        # First, the compute_lost_water_depth function has changed slightly
        # (the z values of puts is now different). This needs to be reflected
        # in this function.
        #
        # But, the reported flooding depth has also changed, and currently
        # I trust the function better than this test. It should probably be
        # replaced by a set of smaller tests that can more easily be inspected
        # by hand.

        return

        pool = {}
        G = nx.Graph()
        parse("lizard_riool/data/f3478-bb.rmb", pool)
        convert_to_graph(pool, G)

        compute_lost_water_depth(G, (0.0, 0.0))

        target = [((0.0, 0.0), 0.0, 0),

                  ((0.0, 1.0), 2.0, 0),
                  ((0.0, 2.0), 0.0, 2.0),
                  ((0.0, 3.0), 1.0, 1.0),
                  ((0.0, 4.0), 2.0, 0),
                  ((0.0, 5.0), 3.0, 0),

                  ((0.0, 6.0), 4.0, 0),
                  ((0.0, 7.0), 3.0, 1.0),
                  ((0.0, 8.0), 4.0, 0),

                  ((0.8, 5.6), 4.0, 0),
                  ((1.6, 6.2), 3.0, 1.0),
                  ((2.4000000000000004, 6.8), 3.0, 1.0),
                  ((3.2, 7.4), 3.0, 1.0),
                  ((4.0, 8.0), 4.0, 0),

                  ((1.0, 0.0), 2.0, 0),
                  ((2.0, 0.0), 1.0, 1.0),
                  ((3.0, 0.0), 2.0, 0),
                  ((3.5, 0.0), 3.0, 0),
                  ((5.0, 0.0), 3.0, 0),

                  ((1.0, 5.0), 4.0, 0),
                  ((2.0, 5.0), 3.0, 1.0),
                  ((3.0, 5.0), 4.0, 0),

                  ((4.0, 5.0), 4.2, 0),
                  ((5.0, 5.0), 4.6, 0),
                  ((6.0, 5.0), 5.0, 0),
                  ]

        current = [(n, G.node[n]['obj'].z, G.node[n]['obj'].flooded)
                   for n in sorted(G.node)]

        self.assertEqual(sorted(target), current)
    def test100(self):
        "watering a complex network"

        pool = {}
        G = nx.Graph()
        parse("lizard_riool/data/4F1 asfalt werk.RMB", pool)
        convert_to_graph(pool, G)
        compute_lost_water_depth(G, (138736.31, 485299.37))
    def test020(self):
        "file is read into dictionary and first values are Riool objects"

        pool = {}
        parse("lizard_riool/data/f3478-bb.rmb", pool)
        target = [Riool] * len(pool)
        current = [pool[k][0].__class__ for k in sorted(pool.keys())]
        self.assertEqual(target, current)
    def test010(self):
        "file is read into dictionary and values have correct length"

        pool = {}
        parse("lizard_riool/data/f3478-bb.rmb", pool)
        target = [5, 5, 3, 3, 3, 5]
        current = [len(pool[k]) for k in sorted(pool.keys())]
        self.assertEqual(target, current)
    def test000(self):
        "file is read into dictionary and keys are correct"

        pool = {}
        parse("lizard_riool/data/f3478-bb.rmb", pool)
        target = ['6400001', '6400002', '6400003',
                  '6400004', '6400005', '6400006']
        current = sorted(pool.keys())
        self.assertEqual(target, current)
    def test000(self):
        "raise error on an inconsistent request"

        pool = {}
        parse("lizard_riool/data/f3478-bb.rmb", pool)
        self.assertRaises(KeyError,
                          string_of_riool_to_string_of_rioolmeting,
                          pool,
                          ['6400001', '6400003', '6400004'])
    def test030(self):
        "testing a ZYB == 2 string"

        pool = {}
        parse("lizard_riool/data/f3478.rmb", pool)
        mrios = string_of_riool_to_string_of_rioolmeting(
            pool, ['6400001', '6400002', '6400003', '6400004'])
        self.assertEqual(
            ['6400004:00001.50', '6400004:00000.75'],
            [mrios[-2].suf_id, mrios[-1].suf_id])
    def test010(self):
        "graph associates nodes with 'obj'"

        pool = {}
        G = nx.Graph()
        parse("lizard_riool/data/f3478-bb.rmb", pool)
        convert_to_graph(pool, G)

        target = [True] * len(G.node)
        current = ['obj' in G.node[i] for i in G.node]
        self.assertEqual(target, current)
    def test030(self):
        """file is read into dictionary and all subsequent values are
        Rioolmeting objects"""

        pool = {}
        parse("lizard_riool/data/f3478-bb.rmb", pool)
        target = [[Rioolmeting] * len(pool[k][1:])
                  for k in sorted(pool.keys())]
        current = [[i.__class__ for i in pool[k][1:]]
                   for k in sorted(pool.keys())]
        self.assertEqual(target, current)
    def test010(self):
        "simple case: everything read in same direction"

        pool = {}
        parse("lizard_riool/data/f3478-bb.rmb", pool)
        target = []
        target.extend(pool['6400002'][1:])
        target.extend(pool['6400003'][1:])
        target.extend(pool['6400004'][1:])
        current = string_of_riool_to_string_of_rioolmeting(
            pool, ['6400002', '6400003', '6400004'])
        self.assertEqual(target, current)
    def test012(self):
        "graph nodes have a Put or a Rioolmeting 'obj'"

        pool = {}
        G = nx.Graph()
        parse("lizard_riool/data/f3478-bb.rmb", pool)
        convert_to_graph(pool, G)

        target = [True] * len(G.node)
        current = [G.node[i]['obj'].__class__ in [Put, Rioolmeting]
                   for i in G.node]
        self.assertEqual(target, current)
    def test001(self):
        "we empty graph before we populate it"

        pool = {}
        G = nx.Graph()
        G.add_node('abc')
        parse("lizard_riool/data/f3478-bb.rmb", pool)
        convert_to_graph(pool, G)

        target = [tuple] * len(G.node)
        current = [i.__class__ for i in G.node]
        self.assertEqual(target, current)
    def test020(self):
        "graph nodes have a Put or a Rioolmeting 'obj'"

        self.maxDiff = None
        pool = {}
        G = nx.Graph()
        parse("lizard_riool/data/f3478-bb.rmb", pool)
        convert_to_graph(pool, G)

        target = [(3.0, 0.0), (3.0, 5.0), (0.0, 1.0),
                  (3.2000000000000002, 7.4000000000000004),
                  (1.6000000000000001, 6.2000000000000002),
                  (3.5, 0.0), (2.4000000000000004, 6.7999999999999998),
                  (0.0, 6.0), (0.0, 4.0), (0.0, 5.0),
                  (2.0, 0.0), (4.0, 5.0),
                  (6.0, 5.0), (2.0, 5.0), (0.0, 2.0),
                  (0.80000000000000004, 5.5999999999999996),
                  (0.0, 3.0), (0.0, 0.0), (5.0, 0.0),
                  (5.0, 5.0), (0.0, 7.0),
                  (1.0, 5.0), (1.0, 0.0), (4.0, 8.0),
                  (0.0, 8.0)]

        current = G.node.keys()
        self.assertEqual(sorted(target), sorted(current))

        manholes = sorted(
            [k for k in G.node if isinstance(G.node[k]['obj'], Put)])
        self.assertEqual([(0.0, 0.0), (0.0, 5.0), (0.0, 8.0),
                          (3.0, 5.0), (4.0, 8.0),
                          (5.0, 0.0), (6.0, 5.0)],
                         manholes)

        self.assertEqual([(0.0, 1.0), (1.0, 0.0)],
                         G.edge[(0.0, 0.0)].keys())
        self.assertEqual([(0.0, 6.0),
                          (0.80000000000000004, 5.5999999999999996),
                          (0.0, 4.0),
                          (1.0, 5.0)],
                         G.edge[(0.0, 5.0)].keys())
        self.assertEqual([(0.0, 7.0)],
                         G.edge[(0.0, 8.0)].keys())
        self.assertEqual([(4.0, 5.0), (2.0, 5.0)],
                         G.edge[(3.0, 5.0)].keys())
        self.assertEqual([(3.2000000000000002, 7.4000000000000004)],
                         G.edge[(4.0, 8.0)].keys())
        self.assertEqual([(3.5, 0.0)],
                         G.edge[(5.0, 0.0)].keys())
        self.assertEqual([(5.0, 5.0)],
                         G.edge[(6.0, 5.0)].keys())
    def test000(self):
        "nodes are 3D tuples"

        pool = {}
        G = nx.Graph()
        parse("lizard_riool/data/f3478-bb.rmb", pool)
        convert_to_graph(pool, G)

        target = [tuple] * len(G.node)
        current = [i.__class__ for i in G.node]
        self.assertEqual(target, current)

        target = [2] * len(G.node)
        current = [len(i) for i in G.node]
        self.assertEqual(target, current)
Beispiel #16
0
def to_txt(dir='resumes/'):
    '''
		convert the CVs to plain text and save a mapping of their id and path		
	'''
    i = 0  # numeric id
    files = pr.explore(dir)  # get list of all supported files

    # lists of cv details
    cv = []
    cv_txt = []
    cv_id = []

    for f in files:
        if (pr.parse(f, i) == 1):
            # add cv details
            cv_id.append(i)
            cv.append(f)
            cv_txt.append('corpus/op/' + str(i) + '.txt')
            i += 1

    d = {
        'cid': cv_id,
        'cv': cv,
        'txt': cv_txt
    }  # make dataframe of cv-id-path mapping
    df = pd.DataFrame(d)
    df.set_index('cid')
    print(df)
    df.to_csv('db.csv')
def box_office(movie_names):
    """Movie selection. 
    """
    movie_names = [movie.lower() for movie in movie_names]
    text = "Welcome to the box office. Which movie would you like to watch? "
    text += "We have tickets for %s" % englishify(movie_names)
    text = wrap_text(text, "GoodNews")
    speak(text)
    
    while True:
        inp = get_input()
        resp = parse(inp, WKSPACE_ID)
        if get_intent(resp) == 'buy_ticket':
            entities = get_entities(resp)
            movie_choice = entities[0]
            if movie_choice in movie_names:
                break
            else:
                msg = "Sorry, we're not currently showing %s at the moment. "\
                        % movie_choice
                msg += "Please choose another movie to watch."
                speak(wrap_text(msg, "Apology"))
        else:
            e_msg = "Sorry, I didn't understand what you said. Could you try rephrasing?"
            speak(wrap_text(e_msg, "Apology"))

    text = "Here's your ticket. Enjoy the show. "
    text += "Would you like to go to the concessions or the auditorium?"
    text = wrap_text(text, "GoodNews")
    speak(text)

    return {'movie_choice': movie_choice}
    def test200(self):
        "watering a less complex network"

        pool = {}
        G = nx.Graph()
        parse("lizard_riool/data/f3478.rmb", pool)
        convert_to_graph(pool, G)
        self.assertEqual(
            [-4.5, -2.4029999999999996, -1.28],
            [i.z for i in pool['6400001'][1:]])
        self.assertEqual(
            [-4.0, -2.8452994616207485, -4.0],
            [i.z for i in pool['6400002'][1:]])
        self.assertEqual(
            [-1.8, -1.2000000000000002, -1.3000000000000003],
            [i.z for i in pool['6400003'][1:]])
        self.assertEqual([0.0, 1.046], [i.z for i in pool['6400004'][1:]])
    def test200(self):
        "watering a simple network, ZYB == 2 strings"

        pool = {}
        G = nx.Graph()
        parse("lizard_riool/data/f3478_2zyb2.rmb", pool)
        convert_to_graph(pool, G)
        # sink = node 1 of Riool 6400001 = 64D0001
        sink = tuple(pool['6400001'][0].point(1, False)[:2])
        compute_lost_water_depth(G, sink)
        target = [0, 0, 0]
        current = [
            pool['6400001'][1].flooded,
            pool['6400001'][2].flooded,
            pool['6400001'][3].flooded
            ]
        self.assertEqual(target, current)
    def test300(self):
        """Testing MRIO with ZYR=A (slope) and ZYS=E/F (degrees/%).

        The distance (ZYA) should be the hypotenuse!?
        """

        pool = {}
        G = nx.Graph()
        parse("lizard_riool/data/f3478.rmb", pool)
        convert_to_graph(pool, G)
        # Slope in degrees
        target = -5.0 + math.sin(math.pi / 4)
        current = pool['6400002'][1].z
        self.assertEqual('%.4f' % target, '%.4f' % current)
        # Slope in percentage
        target = -2.0 + math.sin(math.atan(0.2))
        current = pool['6400003'][1].z
        self.assertEqual('%.4f' % target, '%.4f' % current)
Beispiel #21
0
def test_parser_parse_pose():
    snapshot = utils.protocol.Snapshot(1234)
    snapshot.translation = (1.0, 2.0, 3.0)
    snapshot.rotation = (0.5, -0.5, 0.25, 0.75)
    parsed_data = parsers.parse('pose', (200639318).to_bytes(8, 'little') + \
        (1234).to_bytes(8, 'little') + snapshot.serialize())
    assert parsed_data == {
        'translation': (1.0, 2.0, 3.0),
        "rotation": (0.5, -0.5, 0.25, 0.75)
    }
Beispiel #22
0
def test_parser_parse_user():
    hello = utils.protocol.Hello(200639318, 'Zeevi Iosub',
                                 datetime(1988, 4, 27).timestamp(), 'm')
    parsed_data = parsers.parse('user', hello.serialize())
    assert parsed_data == {
        'user_id': 200639318,
        'username': '******',
        'birth_date': datetime(1988, 4, 27).timestamp(),
        'gender': 'm'
    }
Beispiel #23
0
def parse(text):
    log("Checking for coherence in '{0}'".format(text), 2)

    family_hits = []
    family_stem_words = stemmed_words(family_words, 'family_words')
    for sentence in sentence_tokenizer.parse(text):
        tree = parsers.parse(sentence)[0]
        family_hits += [(a_tree.node, a_tree[0].lower(), stemmer.stem(a_tree[0].lower()) in family_stem_words) for a_tree in tree.subtrees(lambda x: x.node in noun_tags)]
    log("Family hits: {0}".format(family_hits), 4)
    family_hit_values = (len([hit for hit in family_hits if hit[2]]), len(family_hits))
    log("%d/%d" % family_hit_values, 3)

    work_hits = []
    work_stem_words = stemmed_words(work_words, 'work_words')
    for sentence in sentence_tokenizer.parse(text):
        tree = parsers.parse(sentence)[0]
        work_hits += [(a_tree.node, a_tree[0].lower(), stemmer.stem(a_tree[0].lower()) in work_stem_words) for a_tree in tree.subtrees(lambda x: x.node in noun_tags)]
    log("Work hits: {0}".format(work_hits), 4)
    work_hit_values = (len([hit for hit in work_hits if hit[2]]), len(work_hits))
    log("%d/%d" % work_hit_values, 3)

    return family_hit_values[0], work_hit_values[0], work_hit_values[1]
Beispiel #24
0
def test_parser_parse_feelings():
    snapshot = utils.protocol.Snapshot(1234)
    snapshot.hunger = 1.0
    snapshot.thirst = -1.0
    snapshot.exhaustion = 0.0
    snapshot.happiness = -0.5
    parsed_data = parsers.parse('feelings', (200639318).to_bytes(8, 'little') + \
        (1234).to_bytes(8, 'little') + snapshot.serialize())
    assert parsed_data == {
        'hunger': 1.0,
        'thirst': -1.0,
        'exhaustion': 0.0,
        'happiness': -0.5
    }
Beispiel #25
0
    def __init__(self, docx_file):

        self.file_dir = docx_file

        # Unzip docx file 
        unzip = zipfile.ZipFile(docx_file, 'r')
        document_xml = unzip.read('word/document.xml')
        footnotes_xml = unzip.read('word/footnotes.xml')
        endnotes_xml = unzip.read('word/endnotes.xml')

        # Extract all XML files (for testing)
        unzip.extractall(path='docx_extract')
        unzip.close()

        # Ensure main document text in unicode 
        if not isinstance(document_xml, str):
            document_xml = document_xml.decode()

        # Parse XML files
        self.paragraphs = parse(document_xml, PARA)
        self.footnotes = parse_notes(footnotes_xml, FOOTNOTE)
        self.endnotes = parse_notes(endnotes_xml, ENDNOTE)
        self.tables = parse(document_xml, TABLE)
Beispiel #26
0
 def _listen(self, intent, entity_type='', n_entities=0, verify_with='', 
         context_key='', fail_message='', only_if=None):
     """Get input and listen for intent
     """
     if only_if and not self._check_only_if(*only_if):
         return
     assert self.workspace_id, 'No valid workspace ID'
     while True:
         # transcribe audio and parse it
         inp = self._input_fct()
         resp = parse(inp, self.workspace_id)
         if get_intent(resp) != intent.strip():
             error_msg = "Sorry, I didn't understand what you said. " +\
                     "Could you try rephrasing?"
             self._output_fct(error_msg)
             continue # mismatching intent so start over
         
         entities = get_entities(resp)
         # chop off entities if necessary
         if n_entities:
             entities = entities[:n_entities]
         # print(entities)
         
         # print('key:', context_key)
         if context_key:
             if verify_with:
                 # print('verifying with:', verify_with)
                 valid_entities = self._context[verify_with]
                 has_invalid_entities = False
                 for entity in entities:
                     if entity not in valid_entities:
                         has_invalid_entities = True
                         break
                 if has_invalid_entities:
                     # print('has invalid entities')
                     default_msg = "I didn't recognize something you said. " +\
                             "Could you repeat yourself?"
                     msg = fail_message if fail_message else default_msg
                     self._output_fct(msg)
                     continue
             if len(entities) == 0:
                 # print('entites has len 0')
                 pass
             elif len(entities) == 1:
                 # print('context updated')
                 self._context[context_key] = entities[0]
             else:
                 # print('context updated')
                 self._context[context_key] = entities
         return # none of the continues were hit
def concessions(menu):
    """Getting snacks. 
    """
    menu = [item.lower() for item in menu]
    bought = []
    text = "What can I get for you? We have " 
    text += englishify(menu)
    text = wrap_text(text, "GoodNews")
    speak(text)

    while True:
        inp = get_input() 
        resp = parse(inp, WKSPACE_ID)
        intent = get_intent(resp)
        if intent == 'order_food':
            # print('in order_food')
            entities = get_entities(resp)
            missing = []
            available = []
            for item in entities:
                if item not in menu:
                    missing.append(item)
                elif item not in bought:
                    available.append(item)
                    bought.append(item)
            missing_msg = "" 
            if missing:
                missing_msg = "Sorry we don't have %s on our menu. "\
                        % englishify(missing, conj=False)
                missing_msg = wrap_text(msg, 'Apology')
                # print(missing_msg)
            msg = "I'll get some %s for you. " % englishify(available)
            msg += "Can I get you anything else?" 
            speak(missing_msg + wrap_text(msg, 'GoodNews'))
        elif intent == 'done_ordering':
            # print('done ordering')
            break
        else:
            # print('misunderstanding')
            msg = "I'm sorry, I didn't understand what you said. Could you rephrase?"
            speak(wrap_text(msg, 'Apology'))
            
    text = "Thank you. Here's your %s. " % englishify(bought)
    text += "If you do not have your ticket yet, go to the box office."
    text += "Otherwise, you can go to the auditorium."
    text = wrap_text(text, "GoodNews")
    speak(text)

    return {'bought': bought}
Beispiel #28
0
def watch(f):
    """Ask whether the use wants to stay and watch. If yes, play the gif at `f`,
    else do nothing
    """
    while True:
        inp = get_input() 
        resp = parse(inp, WKSPACE_ID)
        intent = get_intent(resp)
        if intent == 'watch':
            print('watching %s' % f) # TODO: TEMPORARY
            # runGif(f)
            return
        elif intent  == 'no_watch':
            return
        else:
            msg = "I'm sorry I don't understand what you said. Could you rephrase?"
            speak(wrap_text(msg, 'Apology'))
Beispiel #29
0
def run_model(file_or_directory, param_map={}):

    # Determine if input is single or multi run.
    if os.path.isfile(file_or_directory):
        # Single run mode, open file and run.
        logger.debug("Importing file: {}".format(file_or_directory))
        input_file = open(file_or_directory, "r")

        # Parse into dataset.
        dataset = parse(input_file)
        logger.debug("Built dataset: {}".format(dataset.get("name")))
        for attribute in sorted(dataset.to_dict().keys()):
            logger.debug("    {} = {}".format(attribute, dataset.get(attribute)))

        # Parse model parameters and call model.
        result = None
        if param_map:
            logger.debug("Running model with custom parameters: {}".format(param_map))
            result = run_simulation(dataset, **param_map)
        else:
            logger.debug("Running model with default parameters.")
            result = run_simulation(dataset)

        # Run complete, return results.
        logger.info("Completed simulation run: {} ({} - {})".format(dataset.get("name"),
            dataset.get("start_year"), dataset.get("end_year")))
        return result

    elif os.path.isdir(file_or_directory):
        # Gather files an directories, and perform a recursive descent
        # into the contents, collecting results from runs.
        root_path = os.path.abspath(file_or_directory)
        dir_files = os.listdir(file_or_directory)
        dir_results = []

        for dir_file in dir_files:
            # Merge results, which may be lists of results, into a single list.
            abs_path = "{}/{}".format(root_path, dir_file)
            dir_results += [run_model(abs_path, param_map)]

        logger.info("Processed {} dataset runs.".format(len(dir_results)))
        return dir_results
    else:
        # Not a file or directory, exit.
        raise Exception("Not a file or directory: {}".format(file_or_directory))
def parse(text):
    treebank_rules = get_treebank_rules(cutoff=0)

    sentence_probs = []
    for line in text.split("\n"):
        sentences = sentence_tokenizer.parse(line)

        for sentence in sentences:

            # Add a period to the end of the sentence, which sometimes
            # forces a better parse
            #if sentence[-1] not in ('.', '!', '?'):
            #                    sentence += '.'

            parse_trees = parsers.parse(sentence)
            for tree in parse_trees:
                if cmd_utils.cmd_log_level() > 2:
                    print tree.pprint()

                evindenced_lexical_rules = set(lexical_rules(tree).keys())
                differences = evindenced_lexical_rules.difference(
                    treebank_rules)

                bad_generations = len(differences)
                log(
                    "Found {0} bad generations ({1})".format(
                        bad_generations, differences), 3)

                #bad_parse_prob = 1 if prob == 0 else 0
                #log("Scored {0} for prob {1}".format(bad_parse_prob, prob), 3)

                bad_tag_problems = num_tag_problems(tree)
                log("Found {0} X or FRAG tags".format(bad_tag_problems), 3)

                bad_sbar_problems = num_sbar_problems(tree)
                log("Found {0} bad SBAR issues".format(bad_sbar_problems), 3)

                total_problems = bad_sbar_problems + bad_tag_problems + bad_generations
                log("In '{0}'".format(sentence), 2)
                log(
                    "Found {0} sentence formation problems".format(
                        total_problems), 1)
                sentence_probs.append(total_problems)
    return sentence_probs
Beispiel #31
0
def		process(f, file):
  detail = None
  pure = None
  count = None
  p = None

  try:
    p = parsers.parse(f.parser, file, f.options)
  except parsers.exceptions.ParserError:
    error(f, "error: unable to parse the file \"" + file + "\".")

  try:
    pure, detail, count = styles.check(f.parser, f.style, p, f.options)
  except:
    traceback.print_exc()
    error(f, "error: FATAL !!")

  if pure or detail:
    f.pure += pure
    f.detail += detail
    f.count += count
def parse(text):
    treebank_rules = get_treebank_rules(cutoff=0)

    sentence_probs = []
    for line in text.split("\n"):
        sentences = sentence_tokenizer.parse(line)

        for sentence in sentences:

            # Add a period to the end of the sentence, which sometimes
            # forces a better parse
            #if sentence[-1] not in ('.', '!', '?'):
#                    sentence += '.'

            parse_trees = parsers.parse(sentence)
            for tree in parse_trees:
                if cmd_utils.cmd_log_level() > 2:
                    print tree.pprint()

                evindenced_lexical_rules = set(lexical_rules(tree).keys())
                differences = evindenced_lexical_rules.difference(treebank_rules)

                bad_generations = len(differences)
                log("Found {0} bad generations ({1})".format(bad_generations, differences), 3)

                #bad_parse_prob = 1 if prob == 0 else 0
                #log("Scored {0} for prob {1}".format(bad_parse_prob, prob), 3)

                bad_tag_problems = num_tag_problems(tree)
                log("Found {0} X or FRAG tags".format(bad_tag_problems), 3)


                bad_sbar_problems = num_sbar_problems(tree)
                log("Found {0} bad SBAR issues".format(bad_sbar_problems), 3)

                total_problems = bad_sbar_problems + bad_tag_problems + bad_generations
                log("In '{0}'".format(sentence), 2)
                log("Found {0} sentence formation problems".format(total_problems), 1)
                sentence_probs.append(total_problems)
    return sentence_probs
Beispiel #33
0
    def _get_next(self):
        """Gets the next node from the user and returns the appropriate node
        """
        if not self._current:
            return
        elif not self.neighbors(self._current): # current is a leaf node
            self._is_finished = True
            return

        while True:
            user_inp = self._input_fct() 
            resp = parse(user_inp, self.workspace_id)
            intent = get_intent(resp)

            if intent in self.neighbors(self._current):
                if self._is_runnable(intent): # will output something when False
                    return self._select(intent)
            elif intent:
                msg = "Sorry I can't go to %s" % user_inp
                self.output_fct(msg)
            else:
                msg = "Sorry I didn't catch that. Could you repeat yourself?"
                self.output_fct(msg)
Beispiel #34
0
from parsers import parse
import _regexes
# this is test for DOPs typeform

ua_string = 'Mozilla/5.0(iPad; U; CPU iPhone OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B314 Safari/531.21.10'
user_agent = parse(ua_string)
print user_agent.is_mobile # returns True
print user_agent.is_tablet # returns False
print user_agent.is_touch_capable # returns True
print user_agent.is_pc # returns False
print user_agent.is_bot # returns False
print str(user_agent) # returns "Samsung GT-I9300 / Android 4.0.4 / Android 4.0.4"
Beispiel #35
0
import parsers as p

data = p.parse('testdata/sample.xlsx', p.EXCEL_DOC)
print data
Beispiel #36
0
def test_parser_parse_depth_image():
    snapshot = utils.protocol.Snapshot(1234)
    parsed_data = parsers.parse('depth_image', (200639318).to_bytes(8, 'little') + \
        (1234).to_bytes(8, 'little') + snapshot.serialize())
    assert parsed_data == '../../static/200639318_1234_depth.png'
Beispiel #37
0
def parse(text, use_cache=True):
    num_agrees = 0
    num_not_agrees = 0
    num_unsure = 0

    lines = text.split("\n")
    for line in lines:
        sentences = sentence_tokenizer.parse(line, use_cache=use_cache)
        for sentence in sentences:

            line_agreements, line_non_agreements, line_unsure = 0, 0, 0

            # Possession seems to be tricky for the parser, so we fudge
            # a little here
            sentence = sentence.replace("'s", '')
            if sentence[-1] != ".":
                sentence += "."

            if use_cache:
                cache_rs = cache_utils.cache_get('sub_verb_agreement',
                                                 sentence)
                if cache_rs:
                    line_agreements, line_non_agreements, line_unsure = cache_rs
                    num_agrees += line_agreements
                    num_not_agrees += line_non_agreements
                    num_unsure += line_unsure
                    continue

            log("Looking for Sub-Verb agreement in '%s'" % (sentence, ), 1)

            tree = parsers.parse(sentence)[0]
            dependencies = parsers.dependences(sentence)
            sub_verb_deps = [
                dep for dep in dependencies if dep['dep_name'] == 'nsubj'
            ]

            if len(sub_verb_deps) == 0:
                log("Couldn't find Subject-Verb dependency info", 1)
                cache_utils.cache_set('sub_verb_agreement', sentence,
                                      (0, 0, 0))
                continue

            for sub_verb in sub_verb_deps:
                first_node = node_in_tree(tree, sub_verb['first_word'])
                sec_node = node_in_tree(tree, sub_verb['second_word'])
                if first_node and sec_node:

                    log("First Dep Node: %s" % (first_node, ), 2)
                    log("Sec Dep Node: %s" % (sec_node, ), 2)

                    try:
                        is_agreement = check_node_agreement(
                            first_node, sec_node)
                        if is_agreement:
                            line_agreements += 1
                        else:
                            line_non_agreements += 1
                        log("Agreement in sentence? %s" % (is_agreement, ), 1)
                    except Exception as e:
                        line_unsure += 1
                        log("Error looking for agreement? %s" % (e.message, ),
                            2)

                        # No agreement in pair.  Not sure how to handle.
                        # More exhaustive search?
            if use_cache:
                cache_utils.cache_set(
                    'sub_verb_agreement', sentence,
                    (line_agreements, line_non_agreements, line_unsure))
            num_agrees += line_agreements
            num_not_agrees += line_non_agreements
            num_unsure += line_unsure

    return num_agrees, num_not_agrees, num_unsure
Beispiel #38
0
def parse_sentences(line, use_cache=True, include_prob=False):

    log("Working on: %s" % (line, ), 2)

    if use_cache:
        correct_parse = cache_get("sentence_tokenizer", line)
        if correct_parse:
            log("Cache Hit: %s" % (correct_parse[0], ), 4)
            log("-------------\n", 4)
            return correct_parse if include_prob else correct_parse[0]

    all_possible_sentences = _possible_sentences_in_line(line)
    all_possible_sentence_probs = []
    invalid_possible_sentences = []
    stored_probs = {}

    for possible_sentences in all_possible_sentences:

        log("Examining: %s" % (possible_sentences, ), 1)
        prob_for_sentences = []
        sent_is_impossible = False

        for possible_sentence in possible_sentences:

            if use_cache:
                possible_sentence_prob = cache_get('possible_sentences',
                                                   possible_sentence)
                if possible_sentence_prob is not None:
                    log(
                        "Cache Hit: %s (from %s)" %
                        (possible_sentence, 'possible sentences'), 4)
                    prob_for_sentences.append(possible_sentence_prob)
                    continue

            if contains_any_invalid_setences(
                    possible_sentences,
                    invalid_possible_sentences) or sent_is_impossible:
                prob_for_sentences.append(0)
                continue
            elif possible_sentence in stored_probs:
                prob_for_sentences.append(stored_probs[possible_sentence])
                continue

            sentence_trees = parsers.parse(possible_sentence)
            if len(sentence_trees) == 0:
                log("Wasn't able to parse input %s" % (possible_sentence, ), 0)
                prob_for_sentences.append(0)
                invalid_possible_sentences.append(possible_sentence)
                sent_is_impossible = True
                continue
            else:
                sentence_tree = sentence_trees[0]

            if cmd_log_level() >= 4:
                print "--------"
                print "Pre Simplified Tree"
                print sentence_tree

            tree_utils.simplify_tree(
                sentence_tree,
                remove_starting_cc=possible_sentences.index(
                    possible_sentence) == 0)

            if cmd_log_level() >= 4:
                print "--------"
                print "Post Simplified Tree"
                print sentence_tree

            sentence_transitions = tree_utils.transitions_in_tree(
                sentence_tree)

            if not is_possible_sentence(sentence_tree):
                log("%s" % (sentence_transitions, ), 2)
                log("Invalid parse", 2)
                prob_for_sentences.append(0)
                invalid_possible_sentences.append(possible_sentence)
                sent_is_impossible = True
                if use_cache:
                    cache_set('possible_sentences', possible_sentence, 0)
            else:
                log("%s" % (sentence_transitions, ), 2)
                sentence_probs = []
                for transition in sentence_transitions:
                    try:
                        probs = hmm_utils.prob_of_all_transitions(transition,
                                                                  counts,
                                                                  gram_size=3)
                    except KeyError, e:
                        log("'Imposible' Tag order", 2, sep=' ** ')
                        log("%s" % (e, ), 2, sep=' ** ')
                        probs = [0]
                    sentence_probs += probs
                    log("Transitions: %s" % (transition, ), 3)
                    log("Probabilities: %s" % (probs, ), 3)

                attempt_sentence_prob = prod(sentence_probs)

                sentence_prob_boost = boost_for_sentence_tree(sentence_tree)
                attempt_sentence_prob *= sentence_prob_boost

                prob_for_sentences.append(attempt_sentence_prob)
                stored_probs[possible_sentence] = attempt_sentence_prob
                if use_cache:
                    cache_set('possible_sentences', possible_sentence,
                              attempt_sentence_prob)
        weighted_score = prod(prob_for_sentences) * (weight**(
            len(possible_sentences) - 1))
        if weighted_score > 0:
            log("Valid Parse: %s" % (possible_sentences, ), 2)
            log(weighted_score, 2)

        all_possible_sentence_probs.append(weighted_score)
Beispiel #39
0
            # First write the header line
            text = [line.strip() for line in open(os.path.join(dirpath, name)).readlines() if len(line.strip()) > 1]
            row = [int(grade_utils.grade_text("\n".join(text), test)) for test in grade_utils.cols]

            row.append(round_to(float(sum(row) + row[3] + (row[5] * 2)) / 10, 0.5))
            new_line = ",".join([str(v) for v in row])
            output.append(new_line)
    f = open('output.txt', 'w')
    file_contents = "\n".join(output)
    f.write(file_contents)
    f.close()
    print "Finished writing %d scores to output.txt" % (len(output) - 1,)

elif score_stdin or parse_stdin:
    import tree_utils
    trees = parsers.parse(cmd_utils.get_stdin())
    for tree in trees:
        print tree
        if score_stdin:
            sentence_transitions = tree_utils.transitions_in_tree(tree)
            sentence_probs = []
            for transition in sentence_transitions:
                print "Transitions: %s" % (transition)
                probs = hmm_utils.prob_of_all_transitions(transition, counts, gram_size=3)
                print "Probs: %s" % (probs)
                sentence_probs += probs
            total = 1
            for prob in sentence_probs:
                total *= prob
            print "Total: %f" % (total,)
elif sentence_parse_stdin:
def parse_sentences(line, use_cache=True, include_prob=False):

    log("Working on: %s" % (line,), 2)

    if use_cache:
        correct_parse = cache_get("sentence_tokenizer", line)
        if correct_parse:
            log("Cache Hit: %s" % (correct_parse[0],), 4)
            log("-------------\n", 4)
            return correct_parse if include_prob else correct_parse[0]

    all_possible_sentences = _possible_sentences_in_line(line)
    all_possible_sentence_probs = []
    invalid_possible_sentences = []
    stored_probs = {}

    for possible_sentences in all_possible_sentences:

        log("Examining: %s" % (possible_sentences,), 1)
        prob_for_sentences = []
        sent_is_impossible = False

        for possible_sentence in possible_sentences:

            if use_cache:
                possible_sentence_prob = cache_get('possible_sentences', possible_sentence)
                if possible_sentence_prob is not None:
                    log("Cache Hit: %s (from %s)" % (possible_sentence, 'possible sentences'), 4)
                    prob_for_sentences.append(possible_sentence_prob)
                    continue

            if contains_any_invalid_setences(possible_sentences, invalid_possible_sentences) or sent_is_impossible:
                prob_for_sentences.append(0)
                continue
            elif possible_sentence in stored_probs:
                prob_for_sentences.append(stored_probs[possible_sentence])
                continue

            sentence_trees = parsers.parse(possible_sentence)
            if len(sentence_trees) == 0:
                log("Wasn't able to parse input %s" % (possible_sentence,), 0)
                prob_for_sentences.append(0)
                invalid_possible_sentences.append(possible_sentence)
                sent_is_impossible = True
                continue
            else:
                sentence_tree = sentence_trees[0]

            if cmd_log_level() >= 4:
                print "--------"
                print "Pre Simplified Tree"
                print sentence_tree

            tree_utils.simplify_tree(sentence_tree,
                                     remove_starting_cc=possible_sentences.index(possible_sentence) == 0)

            if cmd_log_level() >= 4:
                print "--------"
                print "Post Simplified Tree"
                print sentence_tree

            sentence_transitions = tree_utils.transitions_in_tree(sentence_tree)

            if not is_possible_sentence(sentence_tree):
                log("%s" % (sentence_transitions,), 2)
                log("Invalid parse", 2)
                prob_for_sentences.append(0)
                invalid_possible_sentences.append(possible_sentence)
                sent_is_impossible = True
                if use_cache:
                    cache_set('possible_sentences', possible_sentence, 0)
            else:
                log("%s" % (sentence_transitions,), 2)
                sentence_probs = []
                for transition in sentence_transitions:
                    try:
                        probs = hmm_utils.prob_of_all_transitions(transition, counts, gram_size=3)
                    except KeyError, e:
                        log("'Imposible' Tag order", 2, sep=' ** ')
                        log("%s" % (e,), 2, sep=' ** ')
                        probs = [0]
                    sentence_probs += probs
                    log("Transitions: %s" % (transition,), 3)
                    log("Probabilities: %s" % (probs,), 3)

                attempt_sentence_prob = prod(sentence_probs)

                sentence_prob_boost = boost_for_sentence_tree(sentence_tree)
                attempt_sentence_prob *= sentence_prob_boost

                prob_for_sentences.append(attempt_sentence_prob)
                stored_probs[possible_sentence] = attempt_sentence_prob
                if use_cache:
                    cache_set('possible_sentences', possible_sentence, attempt_sentence_prob)
        weighted_score = prod(prob_for_sentences) * (weight ** (len(possible_sentences) - 1))
        if weighted_score > 0:
            log("Valid Parse: %s" % (possible_sentences,), 2)
            log(weighted_score, 2)

        all_possible_sentence_probs.append(weighted_score)
Beispiel #41
0
def fullparse(str):
    return func_to_term(p.parse(str))
Beispiel #42
0
import tunepid
import sys

try:
    parsers = reload(parsers)
    pids = reload(pids)
    tunepid = reload(tunepid)
except:
    pass

# load config
if len(sys.argv) < 2:
    print 'Input a .xacro or .urdf file'
    print 'syntax : gen_pid.py <package> <urdf/xacro file>'
    sys.exit(0)        
pid, Umax, Fmax, mass, damping, config_file = parsers.parse(sys.argv[1], sys.argv[2])

axes = ('x','y','z','roll','pitch','yaw')
max_gains = {'p': 150., 'i': 50., 'd': 10.}


class TunePID(QtWidgets.QMainWindow):

    def __init__(self, parent=None):
        QtWidgets.QMainWindow.__init__(self, parent)
        self.ui = tunepid.Ui_TunePID()
        self.ui.setupUi(self)
        
        self.psim = pids.Sim('p')
        self.vsim = pids.Sim('v')
        self.ui.p_sim.addWidget(self.psim.canvas)
Beispiel #43
0
    'a': 'a_example.txt',
    'b': 'b_read_on.txt',
    'c': 'c_incunabula.txt',
    'd': 'd_tough_choices.txt',
    'e': 'e_so_many_books.txt',
    'f': 'f_libraries_of_the_world.txt',
}


numero_libros = []
tiempos_registro = {}

in_file = 'input_data/' + BOOKS[sys.argv[1]]
out_file = 'output_data/' + BOOKS[sys.argv[1]]
print(in_file)
books, libraries, days = parse(in_file)


import pickle
with open('libros.pkl', 'wb') as f:
    pickle.dump(books, f)

with open('libraries.pkl', 'wb') as f:
    pickle.dump(libraries, f)

print('volcado')
def mejor_biblio(libraries, scanned):
    puntuaciones = []
    best_library = libraries[0]
    rest = []
    best_score = 0
Beispiel #44
0
def parse_query(query):
    def append_operator(term):
        assert not(lastType in (BINARY_OPERATOR, UNARY_OPERATOR) and get_type(term) == BINARY_OPERATOR)

        if get_type(term) == UNARY_OPERATOR and lastType == TERM:
            operators.append('AND')
        
        while len(operators) > 0 and OPERATORS[term][1] < OPERATORS[operators[-1]][1]:
            if get_type(operators[-1]) == UNARY_OPERATOR:
                terms.append( OPERATORS[ operators.pop() ][0](terms.pop()) )
            else:
                assert get_type(operators[-1]) == BINARY_OPERATOR
                terms.append( OPERATORS[ operators.pop() ][0] ( terms.pop(), terms.pop() ) )
            
        operators.append(term)


    for r in list(OPERATORS.keys()) + list(MODIFIERS.keys()) + ['(',')']:
        query = query.replace(r, ' ' + r + ' ')
    query = query.split(' ')

    terms = []
    operators = []
    lastType = BINARY_OPERATOR

    parenthesis_level = 0
    parenthesis_start = -1

    modifier = None
    modifier_terms = []

    for pos, term in enumerate(query):
        if not term:
            continue

        # Parenthesis
        if term == '(':
            parenthesis_level += 1
            if parenthesis_level == 1:
                parenthesis_start = pos + 1
        elif term == ')':
            parenthesis_level -= 1
            if parenthesis_level == 0:
                if lastType == TERM:
                    append_operator('AND')
                terms.append( parse_query(' '.join(query[parenthesis_start:pos])) )
                lastType = TERM
            continue
        if parenthesis_level > 0:
            continue

        # Modifier
        if get_type(term) == MODIFIER:
            if modifier is None:
                modifier = MODIFIERS[term]
            else:
                assert MODIFIERS[term] == modifier

                if lastType == TERM:
                    append_operator('AND')

                terms.append(modifier(modifier_terms))
                lastType = TERM
                modifier = None
                modifier_terms = []
            continue
        if modifier is not None:
            term_list = parse(term)
            modifier_terms.extend(nodes.KwNode(i) for i in term_list)
            continue

        # Operator or terms

        if get_type(term) in (BINARY_OPERATOR, UNARY_OPERATOR):
            append_operator(term)

        else:
            term_list = tuple(parse(term))
            if len(term_list) == 0:
                continue
            elif len(term_list) == 1:
                terms.append(nodes.KwNode(term_list[0]))
            else:
                terms.append(nodes.ExactNode([nodes.KwNode(i) for i in term_list]))

            if lastType == TERM:
                append_operator('AND')
        
        lastType = get_type(term)

    assert len(terms) > 0

    while len(terms) > 1:
        if get_type(operators[-1]) == UNARY_OPERATOR:
            terms.append( OPERATORS[ operators.pop() ][0](terms.pop()) )
        else:
            assert get_type(operators[-1]) == BINARY_OPERATOR
            terms.append( OPERATORS[ operators.pop() ][0] ( terms.pop(), terms.pop() ) )

    return terms[0]
Beispiel #45
0
 def test_scientific(self):
     assert parsers.parse("1.4E-5") == "14*10**-6"
     assert parsers.parse("+0045") == None
     assert parsers.parse("8.34e+3") == "834*10**1"
     assert parsers.parse("25.e25") == None
Beispiel #46
0
def issues_in_sentence(sentence, use_cache=True):
    """'Brute force' check for a bunch of possible word ordering issues.
    Specifically, looking for the following:
        - VP coming before NP in standard sentence
        - NP coming before VP in inverted sentence
        - JJ coming after Nount in NP
        - VB before PP in VP
        - VB before NP in VP
        - VP before S in standard sentence (with embedded sentences)
        - NN before CD in NP
        - NNP before CD in NP
    """
    if use_cache:
        result = cache_get('word_order_issues', sentence)
        if result is not None:
            return result

    tree = parsers.parse(sentence)[0]
    tree_utils.simplify_tree(tree, trim_adjecent_prop_nouns=True,
                             normalize_sent_roots=True,
                             normalize_plural=True,
                             normalize_case=True)

    log("Looking for order issues in: %s" % (sentence,), 1)
    if cmd_log_level() >= 4:
        print "Simplified Parse Tree"
        print tree

    problems = []
    problems += ["VP->NP in S"] * num_forbidden_orders(tree, ("S",), ('VP', 'NP'))
    problems += ["NP->VP in SINV"] * num_forbidden_orders(tree, ('SINV',), ('NP', 'VP'))
    problems += ["NN->JJ in NP"] * num_forbidden_orders(tree, ('NP',), ('NN', 'JP'))

    problems += ["PP->VB in VP"] * num_forbidden_orders(tree, ('VP',), ('PP', 'VB'))
    problems += ["NP->VP in VP"] * num_forbidden_orders(tree, ('VP',), ('NP', 'VP'))

    problems += ["S->VP in S"] * num_forbidden_orders(tree, ('S',), ('S', 'VP'))

    problems += ["S->VB in VP"] * num_forbidden_orders(tree, ('VP',), ('S', 'VB'))
    # problems += ["VB->VP in VP"] * num_forbidden_orders(tree, ('VP',), ('VB', 'VP'))

    problems += ["NP->RBR in ADVP"] * num_forbidden_orders(tree, ('ADVP',), ('NP', 'RBR'))
    problems += ["NN->DT in NP"] * num_forbidden_orders(tree, ('NP',), ('NN', 'DT'))
    problems += ["NNP->DT in NP"] * num_forbidden_orders(tree, ('NP',), ('NNP', 'DT'))
    problems += ["NN->CD in NP"] * num_forbidden_orders(tree, ('NP',), ('NN', 'CD'))
    problems += ["NNP->CD in NP"] * num_forbidden_orders(tree, ('NP',), ('NNP', 'CD'))

    problems += ['PP->NP in S'] * num_forbidden_orders(tree, ('S',), ('PP', 'NP'))

    # Toggle?
    problems += ['NP->VP in NP'] * num_forbidden_orders(tree, ('NP',), ('NP', 'VP'))

    # Seems like it should be VB->ADVP->PP
    problems += ['VB->PP->ADVP in VP'] * num_forbidden_orders(tree, ('VP',), ('VB', 'PP', 'ADVP'))
    problems += ['VB->PP->SBAR in VP'] * num_forbidden_orders(tree, ('VP',), ('VB', 'PP', 'SBAR'))

    problems += ['NP->S in NP'] * num_forbidden_orders(tree, ('NP',), ('NP', 'S'))

    # Seems like the ADJP should be in a NP or somewhere else, not a sibling
    # of a noun phrase
    problems += ['NP->ADJP in S'] * num_forbidden_orders(tree, ('S',), ('NP', 'ADJP'))

    # Last, if there is an S w/ only one child, we call it a word order problem...
    problems += ['Single Child S'] * len(list(tree.subtrees(lambda x: x in tree_utils.semi_tree_roots and len(x) == 1)))

    if tree[0].node not in tree_utils.semi_tree_roots and not hasattr(tree[0], '_has_error'):
        tree[0]._has_error = True
        problems += ['No S Root']

    log("Found %d order issues" % (len(problems),), 1)
    log("Issues: %s", (problems,), 2)

    if use_cache:
        cache_set('word_order_issues', sentence, problems)

    return problems
Beispiel #47
0
def parse(text):
    # Strip numbers out, since that seems to cause problems for my approach
    text = re.sub(r'\d+ ?', 'some ', text)

    sentences = sentence_tokenizer.parse(text)
    sentence_pronouns = []

    for sentence in sentences:
        log("Looking for pronouns in '{0}'".format(sentence), 2)

        pronoun_totals = [[], [], []]
        tree = parsers.parse(sentence)[0]
        pronoun_trees = tree.subtrees(lambda x: x.node in pronoun_tags)
        for pronoun_tree in pronoun_trees:
            # First total up all the first person pronouns
            for i in range(3):
                if pronoun_tree[0].lower() in pronouns[i]:
                    pronoun_totals[i].append(pronoun_tree[0])
        log("First Person '{0}'".format(pronoun_totals[0]), 3)
        log("Second Person '{0}'".format(pronoun_totals[1]), 3)
        log("Third Person '{0}'".format(pronoun_totals[2]), 3)
        sentence_pronouns.append(pronoun_totals)

    log("Pronouns found in text: %s" % (sentence_pronouns), 2)

    # If there are 3rd person pronouns in any sentence, we have to decide
    # if they are used correctly.  We do this in the following, very
    # expensive, but possibly correct manner.
    #
    # Start from the top down
    #   1. Look back 2 sentences and see if we can find a refernece.
    #       IF NOT - its an error and do no more
    #   2. If so, replace the refereneced word with "RUNNING"
    #      and search again, to see if there is a previous word it could refer
    #      to.
    #       IF NOT, its correct.  Replace the pronoun with the referenced word
    #       and continue
    #   3. Else, its not felicitous.  Give bad credit
    for i in range(len(sentences)):
        if len(sentence_pronouns[i][2]) > 0:
            pronoun_results = []
            for third_pronoun in sentence_pronouns[i][2]:
                all_sentences = sentences[max(0, i - 2):i + 1]
                norm_sentences = ". ".join(
                    [a_sen.strip(".") for a_sen in all_sentences]) + "."
                log(
                    "Looking for pronoun coherence for '{0}'".format(
                        norm_sentences), 4)
                pronouns_refs = parsers.parse_coref(norm_sentences)

                log("Recieved co-references {0}".format(pronouns_refs), 5)

                found_bundle = False

                for j in range(len(pronouns_refs)):
                    if third_pronoun == pronouns_refs[j]['pronoun']:
                        found_bundle = pronouns_refs[j]
                        break

                if not found_bundle:
                    log("Found NO anticedent for {0}".format(third_pronoun), 3)
                    pronoun_results.append((third_pronoun, -1))
                else:
                    log("Found anticedent for {0}".format(third_pronoun), 3)
                    ref_index = int(found_bundle['ref_sentence']) - 1 + (i - 2)

                    sentences[ref_index] = sentences[ref_index].replace(
                        found_bundle['ref'], 'RUNNING')
                    log(
                        "Replacing '{0}' with 'RUNNING'".format(
                            found_bundle['ref']), 3)

                    altered_sentences = sentences[max(0, i - 2):i + 1]
                    norm_altered_sentences = ". ".join(
                        [a_sen.strip(".")
                         for a_sen in altered_sentences]) + "."
                    log(
                        "New test sentences are '{0}'".format(
                            norm_altered_sentences), 4)
                    altered_pronouns_refs = parsers.parse_coref(
                        norm_altered_sentences)

                    if third_pronoun not in [
                            a_ref['pronoun'] for a_ref in altered_pronouns_refs
                    ]:
                        log("Anticedent is unambigious!", 3)

                        pro_index = int(
                            found_bundle['pronoun_sentence']) - 1 + (i - 2)
                        sentences[pro_index] = sentences[pro_index].replace(
                            found_bundle['pronoun'], found_bundle['ref'])

                        pronoun_results.append(
                            (third_pronoun, found_bundle['ref']))
                    else:
                        log("Anticedent is ambigious", 3)
                        log("New Sentences: {0}".format(altered_pronouns_refs),
                            4)
                        pronoun_results.append((third_pronoun, .5))
            sentence_pronouns[i][2] = pronoun_results
    return sentence_pronouns
Beispiel #48
0
blackberry_bold_touch_ua_string = 'Mozilla/5.0 (BlackBerry; U; BlackBerry 9930; en-US) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.0.0.241 Mobile Safari/534.11+'
windows_rt_ua_string = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; ARM; Trident/6.0)'
j2me_opera_ua_string = 'Opera/9.80 (J2ME/MIDP; Opera Mini/9.80 (J2ME/22.478; U; en) Presto/2.5.25 Version/10.54'
ie_ua_string = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)'
ie_touch_ua_string = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0; Touch)'
mac_safari_ua_string = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.13+ (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2'
windows_ie_ua_string = 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)'
ubuntu_firefox_ua_string = 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:15.0) Gecko/20100101 Firefox/15.0.1'
google_bot_ua_string = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
nokia_n97_ua_string = 'Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/12.0.024; Profile/MIDP-2.1 Configuration/CLDC-1.1; en-us) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.12344'
android_firefox_aurora_ua_string = 'Mozilla/5.0 (Android; Mobile; rv:27.0) Gecko/27.0 Firefox/27.0'
thunderbird_ua_string = 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.2.0 Lightning/4.0.2'
outlook_usa_string = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/6.0; Microsoft Outlook 15.0.4420)'
chromebook_ua_string = 'Mozilla/5.0 (X11; CrOS i686 0.12.433) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.77 Safari/534.30'

iphone_ua = parse(iphone_ua_string)
ipad_ua = parse(ipad_ua_string)
galaxy_tab = parse(galaxy_tab_ua_string)
galaxy_s3_ua = parse(galaxy_s3_ua_string)
kindle_fire_ua = parse(kindle_fire_ua_string)
playbook_ua = parse(playbook_ua_string)
nexus_7_ua = parse(nexus_7_ua_string)
windows_phone_ua = parse(windows_phone_ua_string)
windows_rt_ua = parse(windows_rt_ua_string)
blackberry_torch_ua = parse(blackberry_torch_ua_string)
blackberry_bold_ua = parse(blackberry_bold_ua_string)
blackberry_bold_touch_ua = parse(blackberry_bold_touch_ua_string)
j2me_opera_ua = parse(j2me_opera_ua_string)
ie_ua = parse(ie_ua_string)
ie_touch_ua = parse(ie_touch_ua_string)
mac_safari_ua = parse(mac_safari_ua_string)
Beispiel #49
0
from docopt import docopt
from parsers import parse
from my_json_dumps import my_json_dumps
from make_diff_dict import make_diff_dict
from make_diff_list import make_diff_list

if __name__ == '__main__':
    args = docopt(__doc__, version='gendiff 0.3')

    file_1, file_2 = args['<firstConfig>'], args['<secondConfig>']

    if not os.path.isfile(file_1):
        print('"{}" is not file'.format(file_1))
    elif not os.path.isfile(file_2):
        print('"{}" is not file'.format(file_2))

    else:
        before = parse(file_1)
        after = parse(file_2)

        if args['--format'] == 'plain':
            diff = make_diff_list(before, after, [], [])
            for row in diff:
                print(row)

        else:
            diff = make_diff_dict(before, after, {})
            pretty = my_json_dumps(diff)
            print(pretty)
Beispiel #50
0
                for test in grade_utils.cols
            ]

            row.append(
                round_to(float(sum(row) + row[3] + (row[5] * 2)) / 10, 0.5))
            new_line = ",".join([str(v) for v in row])
            output.append(new_line)
    f = open('output.txt', 'w')
    file_contents = "\n".join(output)
    f.write(file_contents)
    f.close()
    print "Finished writing %d scores to output.txt" % (len(output) - 1, )

elif score_stdin or parse_stdin:
    import tree_utils
    trees = parsers.parse(cmd_utils.get_stdin())
    for tree in trees:
        print tree
        if score_stdin:
            sentence_transitions = tree_utils.transitions_in_tree(tree)
            sentence_probs = []
            for transition in sentence_transitions:
                print "Transitions: %s" % (transition)
                probs = hmm_utils.prob_of_all_transitions(transition,
                                                          counts,
                                                          gram_size=3)
                print "Probs: %s" % (probs)
                sentence_probs += probs
            total = 1
            for prob in sentence_probs:
                total *= prob
Beispiel #51
0
 def pass_up(self, data):
     val = parse(self.encoding, data)
     print("Got data id=%d: " % self.id, val)
     sim.set(self.fsxid, float(val))
Beispiel #52
0
stl_headers = r""

# C头文件路径
c_headers = r""

# 其他宏
target_macros = []

# 头文件所在路径
header_paths = [
    r"./src/service", r"./src/viewmodel", r"./src/viewmodel/TestViewModel"
]

# 将所有文件的实体解析出来
models, enums, viewmodels, services = parse(full_paths, libclang_path,
                                            stl_headers, c_headers,
                                            target_macros, header_paths)

# 打印解析出来的个数
print('Parsed' + (' models: %d' % len(models) if len(models) else '') +
      (' enums: %d' % len(enums) if len(enums) else '') +
      (' viewmodels: %d' % len(viewmodels) if len(viewmodels) else '') +
      (' services: %d' % len(services) if len(services) else ''))

# 打印model详细信息
for model in models:
    print(model.accept_printer(printers.JSONPrinter()))
for viewmodel in viewmodels:
    print(viewmodel.accept_printer(printers.JSONPrinter()))
for enum in enums:
    print(enum.accept_printer(printers.JSONPrinter()))
Beispiel #53
0
    analogy_file = datasets.getpath(options.dataset, config, eval_mode.ALL_INFO)

    configlogger.writeConfig(log, settings=[
        ('Config file', options.config),
        ('Dataset', options.dataset),
        ('Path to dataset', analogy_file),
        ('Lowercasing analogies', options.to_lower),
        ('Output vocab file', vocabf),
    ], title='Vocabulary extraction from analogy dataset')

    log.writeln('Reading %s analogies from %s...' % (options.dataset, analogy_file))
    analogies = parsers.parse(
        analogy_file,
        options.dataset,
        eval_mode.ALL_INFO,
        data_mode.String,
        to_lower=options.to_lower
    )
    log.writeln('Read {0:,} analogies in {1:,} relations.\n'.format(
        sum([len(anlg_set) for anlg_set in analogies.values()]),
        len(analogies)
    ))

    log.writeln('Extracting vocabulary...')
    vocab = set()
    for (_, anlg_set) in analogies.items():
        for (a,b,c,d) in anlg_set:
            vocab.add(a)
            vocab.add(c)
            if options.dataset != datasets.Google:
Beispiel #54
0
def analogyTask(analogy_file,
                dataset,
                setting,
                analogy_type,
                embeddings,
                log=log,
                report_top_k=5,
                predictions_file=None,
                predictions_file_mode='w',
                to_lower=False):
    analogies = parsers.parse(analogy_file,
                              dataset,
                              setting,
                              analogy_type,
                              to_lower=to_lower)

    # if we're saving the predictions, start that file first
    if predictions_file:
        pred_stream = codecs.open(predictions_file, predictions_file_mode,
                                  'utf-8')

    # build the analogy completion model
    (vocab, emb_arr) = embeddings.toarray()
    vocab_indexer = {vocab[i]: i for i in range(len(vocab))}
    sess = tf.Session()
    grph = AnalogyModel(sess, emb_arr)

    completed, results = 0, {}
    for (relation, rel_analogies) in analogies.items():
        t_file = log.startTimer('  Starting relation: %s (%d/%d)' %
                                (relation, completed + 1, len(analogies)))

        rel_results = completeAnalogySet(rel_analogies,
                                         setting,
                                         emb_arr,
                                         vocab,
                                         vocab_indexer,
                                         grph,
                                         report_top_k,
                                         log=log)
        results[relation] = rel_results

        (correct, MAP, MRR, total, skipped, predictions) = rel_results
        log.stopTimer(
            t_file,
            message=
            '  Completed file: %s (%d/%d) [{0:.2f}s]\n    >> Skipped %d/%d' %
            (relation, completed + 1, len(analogies), skipped, total))

        if predictions_file:
            pred_stream.write(('{0}\n  %s\n{0}\n'.format('-' * 79)) % relation)
            for prediction in predictions:
                ((a, b, c, d), is_correct, num_candidates, top_k) = prediction
                pred_stream.write(
                    '\n%s:%s::%s:%s\nCorrect: %s\nPredictions: %d\n%s\n' %
                    (a, b, c, d, str(is_correct), num_candidates, '\n'.join(
                        [('    %s' % guess) for guess in top_k])))

        completed += 1

    # tie off the predictions file
    if predictions_file: pred_stream.close()

    return results