def parse(text): log("Checking for coherence in '{0}'".format(text), 2) family_hits = [] family_stem_words = stemmed_words(family_words, 'family_words') for sentence in sentence_tokenizer.parse(text): tree = parsers.parse(sentence)[0] family_hits += [ (a_tree.node, a_tree[0].lower(), stemmer.stem(a_tree[0].lower()) in family_stem_words) for a_tree in tree.subtrees(lambda x: x.node in noun_tags) ] log("Family hits: {0}".format(family_hits), 4) family_hit_values = (len([hit for hit in family_hits if hit[2]]), len(family_hits)) log("%d/%d" % family_hit_values, 3) work_hits = [] work_stem_words = stemmed_words(work_words, 'work_words') for sentence in sentence_tokenizer.parse(text): tree = parsers.parse(sentence)[0] work_hits += [ (a_tree.node, a_tree[0].lower(), stemmer.stem(a_tree[0].lower()) in work_stem_words) for a_tree in tree.subtrees(lambda x: x.node in noun_tags) ] log("Work hits: {0}".format(work_hits), 4) work_hit_values = (len([hit for hit in work_hits if hit[2]]), len(work_hits)) log("%d/%d" % work_hit_values, 3) return family_hit_values[0], work_hit_values[0], work_hit_values[1]
def test000(self): "watering a simple network" # XXX DISABLED by Remco # # First, the compute_lost_water_depth function has changed slightly # (the z values of puts is now different). This needs to be reflected # in this function. # # But, the reported flooding depth has also changed, and currently # I trust the function better than this test. It should probably be # replaced by a set of smaller tests that can more easily be inspected # by hand. return pool = {} G = nx.Graph() parse("lizard_riool/data/f3478-bb.rmb", pool) convert_to_graph(pool, G) compute_lost_water_depth(G, (0.0, 0.0)) target = [((0.0, 0.0), 0.0, 0), ((0.0, 1.0), 2.0, 0), ((0.0, 2.0), 0.0, 2.0), ((0.0, 3.0), 1.0, 1.0), ((0.0, 4.0), 2.0, 0), ((0.0, 5.0), 3.0, 0), ((0.0, 6.0), 4.0, 0), ((0.0, 7.0), 3.0, 1.0), ((0.0, 8.0), 4.0, 0), ((0.8, 5.6), 4.0, 0), ((1.6, 6.2), 3.0, 1.0), ((2.4000000000000004, 6.8), 3.0, 1.0), ((3.2, 7.4), 3.0, 1.0), ((4.0, 8.0), 4.0, 0), ((1.0, 0.0), 2.0, 0), ((2.0, 0.0), 1.0, 1.0), ((3.0, 0.0), 2.0, 0), ((3.5, 0.0), 3.0, 0), ((5.0, 0.0), 3.0, 0), ((1.0, 5.0), 4.0, 0), ((2.0, 5.0), 3.0, 1.0), ((3.0, 5.0), 4.0, 0), ((4.0, 5.0), 4.2, 0), ((5.0, 5.0), 4.6, 0), ((6.0, 5.0), 5.0, 0), ] current = [(n, G.node[n]['obj'].z, G.node[n]['obj'].flooded) for n in sorted(G.node)] self.assertEqual(sorted(target), current)
def test100(self): "watering a complex network" pool = {} G = nx.Graph() parse("lizard_riool/data/4F1 asfalt werk.RMB", pool) convert_to_graph(pool, G) compute_lost_water_depth(G, (138736.31, 485299.37))
def test020(self): "file is read into dictionary and first values are Riool objects" pool = {} parse("lizard_riool/data/f3478-bb.rmb", pool) target = [Riool] * len(pool) current = [pool[k][0].__class__ for k in sorted(pool.keys())] self.assertEqual(target, current)
def test010(self): "file is read into dictionary and values have correct length" pool = {} parse("lizard_riool/data/f3478-bb.rmb", pool) target = [5, 5, 3, 3, 3, 5] current = [len(pool[k]) for k in sorted(pool.keys())] self.assertEqual(target, current)
def test000(self): "file is read into dictionary and keys are correct" pool = {} parse("lizard_riool/data/f3478-bb.rmb", pool) target = ['6400001', '6400002', '6400003', '6400004', '6400005', '6400006'] current = sorted(pool.keys()) self.assertEqual(target, current)
def test000(self): "raise error on an inconsistent request" pool = {} parse("lizard_riool/data/f3478-bb.rmb", pool) self.assertRaises(KeyError, string_of_riool_to_string_of_rioolmeting, pool, ['6400001', '6400003', '6400004'])
def test030(self): "testing a ZYB == 2 string" pool = {} parse("lizard_riool/data/f3478.rmb", pool) mrios = string_of_riool_to_string_of_rioolmeting( pool, ['6400001', '6400002', '6400003', '6400004']) self.assertEqual( ['6400004:00001.50', '6400004:00000.75'], [mrios[-2].suf_id, mrios[-1].suf_id])
def test010(self): "graph associates nodes with 'obj'" pool = {} G = nx.Graph() parse("lizard_riool/data/f3478-bb.rmb", pool) convert_to_graph(pool, G) target = [True] * len(G.node) current = ['obj' in G.node[i] for i in G.node] self.assertEqual(target, current)
def test030(self): """file is read into dictionary and all subsequent values are Rioolmeting objects""" pool = {} parse("lizard_riool/data/f3478-bb.rmb", pool) target = [[Rioolmeting] * len(pool[k][1:]) for k in sorted(pool.keys())] current = [[i.__class__ for i in pool[k][1:]] for k in sorted(pool.keys())] self.assertEqual(target, current)
def test010(self): "simple case: everything read in same direction" pool = {} parse("lizard_riool/data/f3478-bb.rmb", pool) target = [] target.extend(pool['6400002'][1:]) target.extend(pool['6400003'][1:]) target.extend(pool['6400004'][1:]) current = string_of_riool_to_string_of_rioolmeting( pool, ['6400002', '6400003', '6400004']) self.assertEqual(target, current)
def test012(self): "graph nodes have a Put or a Rioolmeting 'obj'" pool = {} G = nx.Graph() parse("lizard_riool/data/f3478-bb.rmb", pool) convert_to_graph(pool, G) target = [True] * len(G.node) current = [G.node[i]['obj'].__class__ in [Put, Rioolmeting] for i in G.node] self.assertEqual(target, current)
def test001(self): "we empty graph before we populate it" pool = {} G = nx.Graph() G.add_node('abc') parse("lizard_riool/data/f3478-bb.rmb", pool) convert_to_graph(pool, G) target = [tuple] * len(G.node) current = [i.__class__ for i in G.node] self.assertEqual(target, current)
def test020(self): "graph nodes have a Put or a Rioolmeting 'obj'" self.maxDiff = None pool = {} G = nx.Graph() parse("lizard_riool/data/f3478-bb.rmb", pool) convert_to_graph(pool, G) target = [(3.0, 0.0), (3.0, 5.0), (0.0, 1.0), (3.2000000000000002, 7.4000000000000004), (1.6000000000000001, 6.2000000000000002), (3.5, 0.0), (2.4000000000000004, 6.7999999999999998), (0.0, 6.0), (0.0, 4.0), (0.0, 5.0), (2.0, 0.0), (4.0, 5.0), (6.0, 5.0), (2.0, 5.0), (0.0, 2.0), (0.80000000000000004, 5.5999999999999996), (0.0, 3.0), (0.0, 0.0), (5.0, 0.0), (5.0, 5.0), (0.0, 7.0), (1.0, 5.0), (1.0, 0.0), (4.0, 8.0), (0.0, 8.0)] current = G.node.keys() self.assertEqual(sorted(target), sorted(current)) manholes = sorted( [k for k in G.node if isinstance(G.node[k]['obj'], Put)]) self.assertEqual([(0.0, 0.0), (0.0, 5.0), (0.0, 8.0), (3.0, 5.0), (4.0, 8.0), (5.0, 0.0), (6.0, 5.0)], manholes) self.assertEqual([(0.0, 1.0), (1.0, 0.0)], G.edge[(0.0, 0.0)].keys()) self.assertEqual([(0.0, 6.0), (0.80000000000000004, 5.5999999999999996), (0.0, 4.0), (1.0, 5.0)], G.edge[(0.0, 5.0)].keys()) self.assertEqual([(0.0, 7.0)], G.edge[(0.0, 8.0)].keys()) self.assertEqual([(4.0, 5.0), (2.0, 5.0)], G.edge[(3.0, 5.0)].keys()) self.assertEqual([(3.2000000000000002, 7.4000000000000004)], G.edge[(4.0, 8.0)].keys()) self.assertEqual([(3.5, 0.0)], G.edge[(5.0, 0.0)].keys()) self.assertEqual([(5.0, 5.0)], G.edge[(6.0, 5.0)].keys())
def test000(self): "nodes are 3D tuples" pool = {} G = nx.Graph() parse("lizard_riool/data/f3478-bb.rmb", pool) convert_to_graph(pool, G) target = [tuple] * len(G.node) current = [i.__class__ for i in G.node] self.assertEqual(target, current) target = [2] * len(G.node) current = [len(i) for i in G.node] self.assertEqual(target, current)
def to_txt(dir='resumes/'): ''' convert the CVs to plain text and save a mapping of their id and path ''' i = 0 # numeric id files = pr.explore(dir) # get list of all supported files # lists of cv details cv = [] cv_txt = [] cv_id = [] for f in files: if (pr.parse(f, i) == 1): # add cv details cv_id.append(i) cv.append(f) cv_txt.append('corpus/op/' + str(i) + '.txt') i += 1 d = { 'cid': cv_id, 'cv': cv, 'txt': cv_txt } # make dataframe of cv-id-path mapping df = pd.DataFrame(d) df.set_index('cid') print(df) df.to_csv('db.csv')
def box_office(movie_names): """Movie selection. """ movie_names = [movie.lower() for movie in movie_names] text = "Welcome to the box office. Which movie would you like to watch? " text += "We have tickets for %s" % englishify(movie_names) text = wrap_text(text, "GoodNews") speak(text) while True: inp = get_input() resp = parse(inp, WKSPACE_ID) if get_intent(resp) == 'buy_ticket': entities = get_entities(resp) movie_choice = entities[0] if movie_choice in movie_names: break else: msg = "Sorry, we're not currently showing %s at the moment. "\ % movie_choice msg += "Please choose another movie to watch." speak(wrap_text(msg, "Apology")) else: e_msg = "Sorry, I didn't understand what you said. Could you try rephrasing?" speak(wrap_text(e_msg, "Apology")) text = "Here's your ticket. Enjoy the show. " text += "Would you like to go to the concessions or the auditorium?" text = wrap_text(text, "GoodNews") speak(text) return {'movie_choice': movie_choice}
def test200(self): "watering a less complex network" pool = {} G = nx.Graph() parse("lizard_riool/data/f3478.rmb", pool) convert_to_graph(pool, G) self.assertEqual( [-4.5, -2.4029999999999996, -1.28], [i.z for i in pool['6400001'][1:]]) self.assertEqual( [-4.0, -2.8452994616207485, -4.0], [i.z for i in pool['6400002'][1:]]) self.assertEqual( [-1.8, -1.2000000000000002, -1.3000000000000003], [i.z for i in pool['6400003'][1:]]) self.assertEqual([0.0, 1.046], [i.z for i in pool['6400004'][1:]])
def test200(self): "watering a simple network, ZYB == 2 strings" pool = {} G = nx.Graph() parse("lizard_riool/data/f3478_2zyb2.rmb", pool) convert_to_graph(pool, G) # sink = node 1 of Riool 6400001 = 64D0001 sink = tuple(pool['6400001'][0].point(1, False)[:2]) compute_lost_water_depth(G, sink) target = [0, 0, 0] current = [ pool['6400001'][1].flooded, pool['6400001'][2].flooded, pool['6400001'][3].flooded ] self.assertEqual(target, current)
def test300(self): """Testing MRIO with ZYR=A (slope) and ZYS=E/F (degrees/%). The distance (ZYA) should be the hypotenuse!? """ pool = {} G = nx.Graph() parse("lizard_riool/data/f3478.rmb", pool) convert_to_graph(pool, G) # Slope in degrees target = -5.0 + math.sin(math.pi / 4) current = pool['6400002'][1].z self.assertEqual('%.4f' % target, '%.4f' % current) # Slope in percentage target = -2.0 + math.sin(math.atan(0.2)) current = pool['6400003'][1].z self.assertEqual('%.4f' % target, '%.4f' % current)
def test_parser_parse_pose(): snapshot = utils.protocol.Snapshot(1234) snapshot.translation = (1.0, 2.0, 3.0) snapshot.rotation = (0.5, -0.5, 0.25, 0.75) parsed_data = parsers.parse('pose', (200639318).to_bytes(8, 'little') + \ (1234).to_bytes(8, 'little') + snapshot.serialize()) assert parsed_data == { 'translation': (1.0, 2.0, 3.0), "rotation": (0.5, -0.5, 0.25, 0.75) }
def test_parser_parse_user(): hello = utils.protocol.Hello(200639318, 'Zeevi Iosub', datetime(1988, 4, 27).timestamp(), 'm') parsed_data = parsers.parse('user', hello.serialize()) assert parsed_data == { 'user_id': 200639318, 'username': '******', 'birth_date': datetime(1988, 4, 27).timestamp(), 'gender': 'm' }
def parse(text): log("Checking for coherence in '{0}'".format(text), 2) family_hits = [] family_stem_words = stemmed_words(family_words, 'family_words') for sentence in sentence_tokenizer.parse(text): tree = parsers.parse(sentence)[0] family_hits += [(a_tree.node, a_tree[0].lower(), stemmer.stem(a_tree[0].lower()) in family_stem_words) for a_tree in tree.subtrees(lambda x: x.node in noun_tags)] log("Family hits: {0}".format(family_hits), 4) family_hit_values = (len([hit for hit in family_hits if hit[2]]), len(family_hits)) log("%d/%d" % family_hit_values, 3) work_hits = [] work_stem_words = stemmed_words(work_words, 'work_words') for sentence in sentence_tokenizer.parse(text): tree = parsers.parse(sentence)[0] work_hits += [(a_tree.node, a_tree[0].lower(), stemmer.stem(a_tree[0].lower()) in work_stem_words) for a_tree in tree.subtrees(lambda x: x.node in noun_tags)] log("Work hits: {0}".format(work_hits), 4) work_hit_values = (len([hit for hit in work_hits if hit[2]]), len(work_hits)) log("%d/%d" % work_hit_values, 3) return family_hit_values[0], work_hit_values[0], work_hit_values[1]
def test_parser_parse_feelings(): snapshot = utils.protocol.Snapshot(1234) snapshot.hunger = 1.0 snapshot.thirst = -1.0 snapshot.exhaustion = 0.0 snapshot.happiness = -0.5 parsed_data = parsers.parse('feelings', (200639318).to_bytes(8, 'little') + \ (1234).to_bytes(8, 'little') + snapshot.serialize()) assert parsed_data == { 'hunger': 1.0, 'thirst': -1.0, 'exhaustion': 0.0, 'happiness': -0.5 }
def __init__(self, docx_file): self.file_dir = docx_file # Unzip docx file unzip = zipfile.ZipFile(docx_file, 'r') document_xml = unzip.read('word/document.xml') footnotes_xml = unzip.read('word/footnotes.xml') endnotes_xml = unzip.read('word/endnotes.xml') # Extract all XML files (for testing) unzip.extractall(path='docx_extract') unzip.close() # Ensure main document text in unicode if not isinstance(document_xml, str): document_xml = document_xml.decode() # Parse XML files self.paragraphs = parse(document_xml, PARA) self.footnotes = parse_notes(footnotes_xml, FOOTNOTE) self.endnotes = parse_notes(endnotes_xml, ENDNOTE) self.tables = parse(document_xml, TABLE)
def _listen(self, intent, entity_type='', n_entities=0, verify_with='', context_key='', fail_message='', only_if=None): """Get input and listen for intent """ if only_if and not self._check_only_if(*only_if): return assert self.workspace_id, 'No valid workspace ID' while True: # transcribe audio and parse it inp = self._input_fct() resp = parse(inp, self.workspace_id) if get_intent(resp) != intent.strip(): error_msg = "Sorry, I didn't understand what you said. " +\ "Could you try rephrasing?" self._output_fct(error_msg) continue # mismatching intent so start over entities = get_entities(resp) # chop off entities if necessary if n_entities: entities = entities[:n_entities] # print(entities) # print('key:', context_key) if context_key: if verify_with: # print('verifying with:', verify_with) valid_entities = self._context[verify_with] has_invalid_entities = False for entity in entities: if entity not in valid_entities: has_invalid_entities = True break if has_invalid_entities: # print('has invalid entities') default_msg = "I didn't recognize something you said. " +\ "Could you repeat yourself?" msg = fail_message if fail_message else default_msg self._output_fct(msg) continue if len(entities) == 0: # print('entites has len 0') pass elif len(entities) == 1: # print('context updated') self._context[context_key] = entities[0] else: # print('context updated') self._context[context_key] = entities return # none of the continues were hit
def concessions(menu): """Getting snacks. """ menu = [item.lower() for item in menu] bought = [] text = "What can I get for you? We have " text += englishify(menu) text = wrap_text(text, "GoodNews") speak(text) while True: inp = get_input() resp = parse(inp, WKSPACE_ID) intent = get_intent(resp) if intent == 'order_food': # print('in order_food') entities = get_entities(resp) missing = [] available = [] for item in entities: if item not in menu: missing.append(item) elif item not in bought: available.append(item) bought.append(item) missing_msg = "" if missing: missing_msg = "Sorry we don't have %s on our menu. "\ % englishify(missing, conj=False) missing_msg = wrap_text(msg, 'Apology') # print(missing_msg) msg = "I'll get some %s for you. " % englishify(available) msg += "Can I get you anything else?" speak(missing_msg + wrap_text(msg, 'GoodNews')) elif intent == 'done_ordering': # print('done ordering') break else: # print('misunderstanding') msg = "I'm sorry, I didn't understand what you said. Could you rephrase?" speak(wrap_text(msg, 'Apology')) text = "Thank you. Here's your %s. " % englishify(bought) text += "If you do not have your ticket yet, go to the box office." text += "Otherwise, you can go to the auditorium." text = wrap_text(text, "GoodNews") speak(text) return {'bought': bought}
def watch(f): """Ask whether the use wants to stay and watch. If yes, play the gif at `f`, else do nothing """ while True: inp = get_input() resp = parse(inp, WKSPACE_ID) intent = get_intent(resp) if intent == 'watch': print('watching %s' % f) # TODO: TEMPORARY # runGif(f) return elif intent == 'no_watch': return else: msg = "I'm sorry I don't understand what you said. Could you rephrase?" speak(wrap_text(msg, 'Apology'))
def run_model(file_or_directory, param_map={}): # Determine if input is single or multi run. if os.path.isfile(file_or_directory): # Single run mode, open file and run. logger.debug("Importing file: {}".format(file_or_directory)) input_file = open(file_or_directory, "r") # Parse into dataset. dataset = parse(input_file) logger.debug("Built dataset: {}".format(dataset.get("name"))) for attribute in sorted(dataset.to_dict().keys()): logger.debug(" {} = {}".format(attribute, dataset.get(attribute))) # Parse model parameters and call model. result = None if param_map: logger.debug("Running model with custom parameters: {}".format(param_map)) result = run_simulation(dataset, **param_map) else: logger.debug("Running model with default parameters.") result = run_simulation(dataset) # Run complete, return results. logger.info("Completed simulation run: {} ({} - {})".format(dataset.get("name"), dataset.get("start_year"), dataset.get("end_year"))) return result elif os.path.isdir(file_or_directory): # Gather files an directories, and perform a recursive descent # into the contents, collecting results from runs. root_path = os.path.abspath(file_or_directory) dir_files = os.listdir(file_or_directory) dir_results = [] for dir_file in dir_files: # Merge results, which may be lists of results, into a single list. abs_path = "{}/{}".format(root_path, dir_file) dir_results += [run_model(abs_path, param_map)] logger.info("Processed {} dataset runs.".format(len(dir_results))) return dir_results else: # Not a file or directory, exit. raise Exception("Not a file or directory: {}".format(file_or_directory))
def parse(text): treebank_rules = get_treebank_rules(cutoff=0) sentence_probs = [] for line in text.split("\n"): sentences = sentence_tokenizer.parse(line) for sentence in sentences: # Add a period to the end of the sentence, which sometimes # forces a better parse #if sentence[-1] not in ('.', '!', '?'): # sentence += '.' parse_trees = parsers.parse(sentence) for tree in parse_trees: if cmd_utils.cmd_log_level() > 2: print tree.pprint() evindenced_lexical_rules = set(lexical_rules(tree).keys()) differences = evindenced_lexical_rules.difference( treebank_rules) bad_generations = len(differences) log( "Found {0} bad generations ({1})".format( bad_generations, differences), 3) #bad_parse_prob = 1 if prob == 0 else 0 #log("Scored {0} for prob {1}".format(bad_parse_prob, prob), 3) bad_tag_problems = num_tag_problems(tree) log("Found {0} X or FRAG tags".format(bad_tag_problems), 3) bad_sbar_problems = num_sbar_problems(tree) log("Found {0} bad SBAR issues".format(bad_sbar_problems), 3) total_problems = bad_sbar_problems + bad_tag_problems + bad_generations log("In '{0}'".format(sentence), 2) log( "Found {0} sentence formation problems".format( total_problems), 1) sentence_probs.append(total_problems) return sentence_probs
def process(f, file): detail = None pure = None count = None p = None try: p = parsers.parse(f.parser, file, f.options) except parsers.exceptions.ParserError: error(f, "error: unable to parse the file \"" + file + "\".") try: pure, detail, count = styles.check(f.parser, f.style, p, f.options) except: traceback.print_exc() error(f, "error: FATAL !!") if pure or detail: f.pure += pure f.detail += detail f.count += count
def parse(text): treebank_rules = get_treebank_rules(cutoff=0) sentence_probs = [] for line in text.split("\n"): sentences = sentence_tokenizer.parse(line) for sentence in sentences: # Add a period to the end of the sentence, which sometimes # forces a better parse #if sentence[-1] not in ('.', '!', '?'): # sentence += '.' parse_trees = parsers.parse(sentence) for tree in parse_trees: if cmd_utils.cmd_log_level() > 2: print tree.pprint() evindenced_lexical_rules = set(lexical_rules(tree).keys()) differences = evindenced_lexical_rules.difference(treebank_rules) bad_generations = len(differences) log("Found {0} bad generations ({1})".format(bad_generations, differences), 3) #bad_parse_prob = 1 if prob == 0 else 0 #log("Scored {0} for prob {1}".format(bad_parse_prob, prob), 3) bad_tag_problems = num_tag_problems(tree) log("Found {0} X or FRAG tags".format(bad_tag_problems), 3) bad_sbar_problems = num_sbar_problems(tree) log("Found {0} bad SBAR issues".format(bad_sbar_problems), 3) total_problems = bad_sbar_problems + bad_tag_problems + bad_generations log("In '{0}'".format(sentence), 2) log("Found {0} sentence formation problems".format(total_problems), 1) sentence_probs.append(total_problems) return sentence_probs
def _get_next(self): """Gets the next node from the user and returns the appropriate node """ if not self._current: return elif not self.neighbors(self._current): # current is a leaf node self._is_finished = True return while True: user_inp = self._input_fct() resp = parse(user_inp, self.workspace_id) intent = get_intent(resp) if intent in self.neighbors(self._current): if self._is_runnable(intent): # will output something when False return self._select(intent) elif intent: msg = "Sorry I can't go to %s" % user_inp self.output_fct(msg) else: msg = "Sorry I didn't catch that. Could you repeat yourself?" self.output_fct(msg)
from parsers import parse import _regexes # this is test for DOPs typeform ua_string = 'Mozilla/5.0(iPad; U; CPU iPhone OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B314 Safari/531.21.10' user_agent = parse(ua_string) print user_agent.is_mobile # returns True print user_agent.is_tablet # returns False print user_agent.is_touch_capable # returns True print user_agent.is_pc # returns False print user_agent.is_bot # returns False print str(user_agent) # returns "Samsung GT-I9300 / Android 4.0.4 / Android 4.0.4"
import parsers as p data = p.parse('testdata/sample.xlsx', p.EXCEL_DOC) print data
def test_parser_parse_depth_image(): snapshot = utils.protocol.Snapshot(1234) parsed_data = parsers.parse('depth_image', (200639318).to_bytes(8, 'little') + \ (1234).to_bytes(8, 'little') + snapshot.serialize()) assert parsed_data == '../../static/200639318_1234_depth.png'
def parse(text, use_cache=True): num_agrees = 0 num_not_agrees = 0 num_unsure = 0 lines = text.split("\n") for line in lines: sentences = sentence_tokenizer.parse(line, use_cache=use_cache) for sentence in sentences: line_agreements, line_non_agreements, line_unsure = 0, 0, 0 # Possession seems to be tricky for the parser, so we fudge # a little here sentence = sentence.replace("'s", '') if sentence[-1] != ".": sentence += "." if use_cache: cache_rs = cache_utils.cache_get('sub_verb_agreement', sentence) if cache_rs: line_agreements, line_non_agreements, line_unsure = cache_rs num_agrees += line_agreements num_not_agrees += line_non_agreements num_unsure += line_unsure continue log("Looking for Sub-Verb agreement in '%s'" % (sentence, ), 1) tree = parsers.parse(sentence)[0] dependencies = parsers.dependences(sentence) sub_verb_deps = [ dep for dep in dependencies if dep['dep_name'] == 'nsubj' ] if len(sub_verb_deps) == 0: log("Couldn't find Subject-Verb dependency info", 1) cache_utils.cache_set('sub_verb_agreement', sentence, (0, 0, 0)) continue for sub_verb in sub_verb_deps: first_node = node_in_tree(tree, sub_verb['first_word']) sec_node = node_in_tree(tree, sub_verb['second_word']) if first_node and sec_node: log("First Dep Node: %s" % (first_node, ), 2) log("Sec Dep Node: %s" % (sec_node, ), 2) try: is_agreement = check_node_agreement( first_node, sec_node) if is_agreement: line_agreements += 1 else: line_non_agreements += 1 log("Agreement in sentence? %s" % (is_agreement, ), 1) except Exception as e: line_unsure += 1 log("Error looking for agreement? %s" % (e.message, ), 2) # No agreement in pair. Not sure how to handle. # More exhaustive search? if use_cache: cache_utils.cache_set( 'sub_verb_agreement', sentence, (line_agreements, line_non_agreements, line_unsure)) num_agrees += line_agreements num_not_agrees += line_non_agreements num_unsure += line_unsure return num_agrees, num_not_agrees, num_unsure
def parse_sentences(line, use_cache=True, include_prob=False): log("Working on: %s" % (line, ), 2) if use_cache: correct_parse = cache_get("sentence_tokenizer", line) if correct_parse: log("Cache Hit: %s" % (correct_parse[0], ), 4) log("-------------\n", 4) return correct_parse if include_prob else correct_parse[0] all_possible_sentences = _possible_sentences_in_line(line) all_possible_sentence_probs = [] invalid_possible_sentences = [] stored_probs = {} for possible_sentences in all_possible_sentences: log("Examining: %s" % (possible_sentences, ), 1) prob_for_sentences = [] sent_is_impossible = False for possible_sentence in possible_sentences: if use_cache: possible_sentence_prob = cache_get('possible_sentences', possible_sentence) if possible_sentence_prob is not None: log( "Cache Hit: %s (from %s)" % (possible_sentence, 'possible sentences'), 4) prob_for_sentences.append(possible_sentence_prob) continue if contains_any_invalid_setences( possible_sentences, invalid_possible_sentences) or sent_is_impossible: prob_for_sentences.append(0) continue elif possible_sentence in stored_probs: prob_for_sentences.append(stored_probs[possible_sentence]) continue sentence_trees = parsers.parse(possible_sentence) if len(sentence_trees) == 0: log("Wasn't able to parse input %s" % (possible_sentence, ), 0) prob_for_sentences.append(0) invalid_possible_sentences.append(possible_sentence) sent_is_impossible = True continue else: sentence_tree = sentence_trees[0] if cmd_log_level() >= 4: print "--------" print "Pre Simplified Tree" print sentence_tree tree_utils.simplify_tree( sentence_tree, remove_starting_cc=possible_sentences.index( possible_sentence) == 0) if cmd_log_level() >= 4: print "--------" print "Post Simplified Tree" print sentence_tree sentence_transitions = tree_utils.transitions_in_tree( sentence_tree) if not is_possible_sentence(sentence_tree): log("%s" % (sentence_transitions, ), 2) log("Invalid parse", 2) prob_for_sentences.append(0) invalid_possible_sentences.append(possible_sentence) sent_is_impossible = True if use_cache: cache_set('possible_sentences', possible_sentence, 0) else: log("%s" % (sentence_transitions, ), 2) sentence_probs = [] for transition in sentence_transitions: try: probs = hmm_utils.prob_of_all_transitions(transition, counts, gram_size=3) except KeyError, e: log("'Imposible' Tag order", 2, sep=' ** ') log("%s" % (e, ), 2, sep=' ** ') probs = [0] sentence_probs += probs log("Transitions: %s" % (transition, ), 3) log("Probabilities: %s" % (probs, ), 3) attempt_sentence_prob = prod(sentence_probs) sentence_prob_boost = boost_for_sentence_tree(sentence_tree) attempt_sentence_prob *= sentence_prob_boost prob_for_sentences.append(attempt_sentence_prob) stored_probs[possible_sentence] = attempt_sentence_prob if use_cache: cache_set('possible_sentences', possible_sentence, attempt_sentence_prob) weighted_score = prod(prob_for_sentences) * (weight**( len(possible_sentences) - 1)) if weighted_score > 0: log("Valid Parse: %s" % (possible_sentences, ), 2) log(weighted_score, 2) all_possible_sentence_probs.append(weighted_score)
# First write the header line text = [line.strip() for line in open(os.path.join(dirpath, name)).readlines() if len(line.strip()) > 1] row = [int(grade_utils.grade_text("\n".join(text), test)) for test in grade_utils.cols] row.append(round_to(float(sum(row) + row[3] + (row[5] * 2)) / 10, 0.5)) new_line = ",".join([str(v) for v in row]) output.append(new_line) f = open('output.txt', 'w') file_contents = "\n".join(output) f.write(file_contents) f.close() print "Finished writing %d scores to output.txt" % (len(output) - 1,) elif score_stdin or parse_stdin: import tree_utils trees = parsers.parse(cmd_utils.get_stdin()) for tree in trees: print tree if score_stdin: sentence_transitions = tree_utils.transitions_in_tree(tree) sentence_probs = [] for transition in sentence_transitions: print "Transitions: %s" % (transition) probs = hmm_utils.prob_of_all_transitions(transition, counts, gram_size=3) print "Probs: %s" % (probs) sentence_probs += probs total = 1 for prob in sentence_probs: total *= prob print "Total: %f" % (total,) elif sentence_parse_stdin:
def parse_sentences(line, use_cache=True, include_prob=False): log("Working on: %s" % (line,), 2) if use_cache: correct_parse = cache_get("sentence_tokenizer", line) if correct_parse: log("Cache Hit: %s" % (correct_parse[0],), 4) log("-------------\n", 4) return correct_parse if include_prob else correct_parse[0] all_possible_sentences = _possible_sentences_in_line(line) all_possible_sentence_probs = [] invalid_possible_sentences = [] stored_probs = {} for possible_sentences in all_possible_sentences: log("Examining: %s" % (possible_sentences,), 1) prob_for_sentences = [] sent_is_impossible = False for possible_sentence in possible_sentences: if use_cache: possible_sentence_prob = cache_get('possible_sentences', possible_sentence) if possible_sentence_prob is not None: log("Cache Hit: %s (from %s)" % (possible_sentence, 'possible sentences'), 4) prob_for_sentences.append(possible_sentence_prob) continue if contains_any_invalid_setences(possible_sentences, invalid_possible_sentences) or sent_is_impossible: prob_for_sentences.append(0) continue elif possible_sentence in stored_probs: prob_for_sentences.append(stored_probs[possible_sentence]) continue sentence_trees = parsers.parse(possible_sentence) if len(sentence_trees) == 0: log("Wasn't able to parse input %s" % (possible_sentence,), 0) prob_for_sentences.append(0) invalid_possible_sentences.append(possible_sentence) sent_is_impossible = True continue else: sentence_tree = sentence_trees[0] if cmd_log_level() >= 4: print "--------" print "Pre Simplified Tree" print sentence_tree tree_utils.simplify_tree(sentence_tree, remove_starting_cc=possible_sentences.index(possible_sentence) == 0) if cmd_log_level() >= 4: print "--------" print "Post Simplified Tree" print sentence_tree sentence_transitions = tree_utils.transitions_in_tree(sentence_tree) if not is_possible_sentence(sentence_tree): log("%s" % (sentence_transitions,), 2) log("Invalid parse", 2) prob_for_sentences.append(0) invalid_possible_sentences.append(possible_sentence) sent_is_impossible = True if use_cache: cache_set('possible_sentences', possible_sentence, 0) else: log("%s" % (sentence_transitions,), 2) sentence_probs = [] for transition in sentence_transitions: try: probs = hmm_utils.prob_of_all_transitions(transition, counts, gram_size=3) except KeyError, e: log("'Imposible' Tag order", 2, sep=' ** ') log("%s" % (e,), 2, sep=' ** ') probs = [0] sentence_probs += probs log("Transitions: %s" % (transition,), 3) log("Probabilities: %s" % (probs,), 3) attempt_sentence_prob = prod(sentence_probs) sentence_prob_boost = boost_for_sentence_tree(sentence_tree) attempt_sentence_prob *= sentence_prob_boost prob_for_sentences.append(attempt_sentence_prob) stored_probs[possible_sentence] = attempt_sentence_prob if use_cache: cache_set('possible_sentences', possible_sentence, attempt_sentence_prob) weighted_score = prod(prob_for_sentences) * (weight ** (len(possible_sentences) - 1)) if weighted_score > 0: log("Valid Parse: %s" % (possible_sentences,), 2) log(weighted_score, 2) all_possible_sentence_probs.append(weighted_score)
def fullparse(str): return func_to_term(p.parse(str))
import tunepid import sys try: parsers = reload(parsers) pids = reload(pids) tunepid = reload(tunepid) except: pass # load config if len(sys.argv) < 2: print 'Input a .xacro or .urdf file' print 'syntax : gen_pid.py <package> <urdf/xacro file>' sys.exit(0) pid, Umax, Fmax, mass, damping, config_file = parsers.parse(sys.argv[1], sys.argv[2]) axes = ('x','y','z','roll','pitch','yaw') max_gains = {'p': 150., 'i': 50., 'd': 10.} class TunePID(QtWidgets.QMainWindow): def __init__(self, parent=None): QtWidgets.QMainWindow.__init__(self, parent) self.ui = tunepid.Ui_TunePID() self.ui.setupUi(self) self.psim = pids.Sim('p') self.vsim = pids.Sim('v') self.ui.p_sim.addWidget(self.psim.canvas)
'a': 'a_example.txt', 'b': 'b_read_on.txt', 'c': 'c_incunabula.txt', 'd': 'd_tough_choices.txt', 'e': 'e_so_many_books.txt', 'f': 'f_libraries_of_the_world.txt', } numero_libros = [] tiempos_registro = {} in_file = 'input_data/' + BOOKS[sys.argv[1]] out_file = 'output_data/' + BOOKS[sys.argv[1]] print(in_file) books, libraries, days = parse(in_file) import pickle with open('libros.pkl', 'wb') as f: pickle.dump(books, f) with open('libraries.pkl', 'wb') as f: pickle.dump(libraries, f) print('volcado') def mejor_biblio(libraries, scanned): puntuaciones = [] best_library = libraries[0] rest = [] best_score = 0
def parse_query(query): def append_operator(term): assert not(lastType in (BINARY_OPERATOR, UNARY_OPERATOR) and get_type(term) == BINARY_OPERATOR) if get_type(term) == UNARY_OPERATOR and lastType == TERM: operators.append('AND') while len(operators) > 0 and OPERATORS[term][1] < OPERATORS[operators[-1]][1]: if get_type(operators[-1]) == UNARY_OPERATOR: terms.append( OPERATORS[ operators.pop() ][0](terms.pop()) ) else: assert get_type(operators[-1]) == BINARY_OPERATOR terms.append( OPERATORS[ operators.pop() ][0] ( terms.pop(), terms.pop() ) ) operators.append(term) for r in list(OPERATORS.keys()) + list(MODIFIERS.keys()) + ['(',')']: query = query.replace(r, ' ' + r + ' ') query = query.split(' ') terms = [] operators = [] lastType = BINARY_OPERATOR parenthesis_level = 0 parenthesis_start = -1 modifier = None modifier_terms = [] for pos, term in enumerate(query): if not term: continue # Parenthesis if term == '(': parenthesis_level += 1 if parenthesis_level == 1: parenthesis_start = pos + 1 elif term == ')': parenthesis_level -= 1 if parenthesis_level == 0: if lastType == TERM: append_operator('AND') terms.append( parse_query(' '.join(query[parenthesis_start:pos])) ) lastType = TERM continue if parenthesis_level > 0: continue # Modifier if get_type(term) == MODIFIER: if modifier is None: modifier = MODIFIERS[term] else: assert MODIFIERS[term] == modifier if lastType == TERM: append_operator('AND') terms.append(modifier(modifier_terms)) lastType = TERM modifier = None modifier_terms = [] continue if modifier is not None: term_list = parse(term) modifier_terms.extend(nodes.KwNode(i) for i in term_list) continue # Operator or terms if get_type(term) in (BINARY_OPERATOR, UNARY_OPERATOR): append_operator(term) else: term_list = tuple(parse(term)) if len(term_list) == 0: continue elif len(term_list) == 1: terms.append(nodes.KwNode(term_list[0])) else: terms.append(nodes.ExactNode([nodes.KwNode(i) for i in term_list])) if lastType == TERM: append_operator('AND') lastType = get_type(term) assert len(terms) > 0 while len(terms) > 1: if get_type(operators[-1]) == UNARY_OPERATOR: terms.append( OPERATORS[ operators.pop() ][0](terms.pop()) ) else: assert get_type(operators[-1]) == BINARY_OPERATOR terms.append( OPERATORS[ operators.pop() ][0] ( terms.pop(), terms.pop() ) ) return terms[0]
def test_scientific(self): assert parsers.parse("1.4E-5") == "14*10**-6" assert parsers.parse("+0045") == None assert parsers.parse("8.34e+3") == "834*10**1" assert parsers.parse("25.e25") == None
def issues_in_sentence(sentence, use_cache=True): """'Brute force' check for a bunch of possible word ordering issues. Specifically, looking for the following: - VP coming before NP in standard sentence - NP coming before VP in inverted sentence - JJ coming after Nount in NP - VB before PP in VP - VB before NP in VP - VP before S in standard sentence (with embedded sentences) - NN before CD in NP - NNP before CD in NP """ if use_cache: result = cache_get('word_order_issues', sentence) if result is not None: return result tree = parsers.parse(sentence)[0] tree_utils.simplify_tree(tree, trim_adjecent_prop_nouns=True, normalize_sent_roots=True, normalize_plural=True, normalize_case=True) log("Looking for order issues in: %s" % (sentence,), 1) if cmd_log_level() >= 4: print "Simplified Parse Tree" print tree problems = [] problems += ["VP->NP in S"] * num_forbidden_orders(tree, ("S",), ('VP', 'NP')) problems += ["NP->VP in SINV"] * num_forbidden_orders(tree, ('SINV',), ('NP', 'VP')) problems += ["NN->JJ in NP"] * num_forbidden_orders(tree, ('NP',), ('NN', 'JP')) problems += ["PP->VB in VP"] * num_forbidden_orders(tree, ('VP',), ('PP', 'VB')) problems += ["NP->VP in VP"] * num_forbidden_orders(tree, ('VP',), ('NP', 'VP')) problems += ["S->VP in S"] * num_forbidden_orders(tree, ('S',), ('S', 'VP')) problems += ["S->VB in VP"] * num_forbidden_orders(tree, ('VP',), ('S', 'VB')) # problems += ["VB->VP in VP"] * num_forbidden_orders(tree, ('VP',), ('VB', 'VP')) problems += ["NP->RBR in ADVP"] * num_forbidden_orders(tree, ('ADVP',), ('NP', 'RBR')) problems += ["NN->DT in NP"] * num_forbidden_orders(tree, ('NP',), ('NN', 'DT')) problems += ["NNP->DT in NP"] * num_forbidden_orders(tree, ('NP',), ('NNP', 'DT')) problems += ["NN->CD in NP"] * num_forbidden_orders(tree, ('NP',), ('NN', 'CD')) problems += ["NNP->CD in NP"] * num_forbidden_orders(tree, ('NP',), ('NNP', 'CD')) problems += ['PP->NP in S'] * num_forbidden_orders(tree, ('S',), ('PP', 'NP')) # Toggle? problems += ['NP->VP in NP'] * num_forbidden_orders(tree, ('NP',), ('NP', 'VP')) # Seems like it should be VB->ADVP->PP problems += ['VB->PP->ADVP in VP'] * num_forbidden_orders(tree, ('VP',), ('VB', 'PP', 'ADVP')) problems += ['VB->PP->SBAR in VP'] * num_forbidden_orders(tree, ('VP',), ('VB', 'PP', 'SBAR')) problems += ['NP->S in NP'] * num_forbidden_orders(tree, ('NP',), ('NP', 'S')) # Seems like the ADJP should be in a NP or somewhere else, not a sibling # of a noun phrase problems += ['NP->ADJP in S'] * num_forbidden_orders(tree, ('S',), ('NP', 'ADJP')) # Last, if there is an S w/ only one child, we call it a word order problem... problems += ['Single Child S'] * len(list(tree.subtrees(lambda x: x in tree_utils.semi_tree_roots and len(x) == 1))) if tree[0].node not in tree_utils.semi_tree_roots and not hasattr(tree[0], '_has_error'): tree[0]._has_error = True problems += ['No S Root'] log("Found %d order issues" % (len(problems),), 1) log("Issues: %s", (problems,), 2) if use_cache: cache_set('word_order_issues', sentence, problems) return problems
def parse(text): # Strip numbers out, since that seems to cause problems for my approach text = re.sub(r'\d+ ?', 'some ', text) sentences = sentence_tokenizer.parse(text) sentence_pronouns = [] for sentence in sentences: log("Looking for pronouns in '{0}'".format(sentence), 2) pronoun_totals = [[], [], []] tree = parsers.parse(sentence)[0] pronoun_trees = tree.subtrees(lambda x: x.node in pronoun_tags) for pronoun_tree in pronoun_trees: # First total up all the first person pronouns for i in range(3): if pronoun_tree[0].lower() in pronouns[i]: pronoun_totals[i].append(pronoun_tree[0]) log("First Person '{0}'".format(pronoun_totals[0]), 3) log("Second Person '{0}'".format(pronoun_totals[1]), 3) log("Third Person '{0}'".format(pronoun_totals[2]), 3) sentence_pronouns.append(pronoun_totals) log("Pronouns found in text: %s" % (sentence_pronouns), 2) # If there are 3rd person pronouns in any sentence, we have to decide # if they are used correctly. We do this in the following, very # expensive, but possibly correct manner. # # Start from the top down # 1. Look back 2 sentences and see if we can find a refernece. # IF NOT - its an error and do no more # 2. If so, replace the refereneced word with "RUNNING" # and search again, to see if there is a previous word it could refer # to. # IF NOT, its correct. Replace the pronoun with the referenced word # and continue # 3. Else, its not felicitous. Give bad credit for i in range(len(sentences)): if len(sentence_pronouns[i][2]) > 0: pronoun_results = [] for third_pronoun in sentence_pronouns[i][2]: all_sentences = sentences[max(0, i - 2):i + 1] norm_sentences = ". ".join( [a_sen.strip(".") for a_sen in all_sentences]) + "." log( "Looking for pronoun coherence for '{0}'".format( norm_sentences), 4) pronouns_refs = parsers.parse_coref(norm_sentences) log("Recieved co-references {0}".format(pronouns_refs), 5) found_bundle = False for j in range(len(pronouns_refs)): if third_pronoun == pronouns_refs[j]['pronoun']: found_bundle = pronouns_refs[j] break if not found_bundle: log("Found NO anticedent for {0}".format(third_pronoun), 3) pronoun_results.append((third_pronoun, -1)) else: log("Found anticedent for {0}".format(third_pronoun), 3) ref_index = int(found_bundle['ref_sentence']) - 1 + (i - 2) sentences[ref_index] = sentences[ref_index].replace( found_bundle['ref'], 'RUNNING') log( "Replacing '{0}' with 'RUNNING'".format( found_bundle['ref']), 3) altered_sentences = sentences[max(0, i - 2):i + 1] norm_altered_sentences = ". ".join( [a_sen.strip(".") for a_sen in altered_sentences]) + "." log( "New test sentences are '{0}'".format( norm_altered_sentences), 4) altered_pronouns_refs = parsers.parse_coref( norm_altered_sentences) if third_pronoun not in [ a_ref['pronoun'] for a_ref in altered_pronouns_refs ]: log("Anticedent is unambigious!", 3) pro_index = int( found_bundle['pronoun_sentence']) - 1 + (i - 2) sentences[pro_index] = sentences[pro_index].replace( found_bundle['pronoun'], found_bundle['ref']) pronoun_results.append( (third_pronoun, found_bundle['ref'])) else: log("Anticedent is ambigious", 3) log("New Sentences: {0}".format(altered_pronouns_refs), 4) pronoun_results.append((third_pronoun, .5)) sentence_pronouns[i][2] = pronoun_results return sentence_pronouns
blackberry_bold_touch_ua_string = 'Mozilla/5.0 (BlackBerry; U; BlackBerry 9930; en-US) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.0.0.241 Mobile Safari/534.11+' windows_rt_ua_string = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; ARM; Trident/6.0)' j2me_opera_ua_string = 'Opera/9.80 (J2ME/MIDP; Opera Mini/9.80 (J2ME/22.478; U; en) Presto/2.5.25 Version/10.54' ie_ua_string = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)' ie_touch_ua_string = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0; Touch)' mac_safari_ua_string = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.13+ (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2' windows_ie_ua_string = 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)' ubuntu_firefox_ua_string = 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:15.0) Gecko/20100101 Firefox/15.0.1' google_bot_ua_string = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' nokia_n97_ua_string = 'Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/12.0.024; Profile/MIDP-2.1 Configuration/CLDC-1.1; en-us) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.12344' android_firefox_aurora_ua_string = 'Mozilla/5.0 (Android; Mobile; rv:27.0) Gecko/27.0 Firefox/27.0' thunderbird_ua_string = 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.2.0 Lightning/4.0.2' outlook_usa_string = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/6.0; Microsoft Outlook 15.0.4420)' chromebook_ua_string = 'Mozilla/5.0 (X11; CrOS i686 0.12.433) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.77 Safari/534.30' iphone_ua = parse(iphone_ua_string) ipad_ua = parse(ipad_ua_string) galaxy_tab = parse(galaxy_tab_ua_string) galaxy_s3_ua = parse(galaxy_s3_ua_string) kindle_fire_ua = parse(kindle_fire_ua_string) playbook_ua = parse(playbook_ua_string) nexus_7_ua = parse(nexus_7_ua_string) windows_phone_ua = parse(windows_phone_ua_string) windows_rt_ua = parse(windows_rt_ua_string) blackberry_torch_ua = parse(blackberry_torch_ua_string) blackberry_bold_ua = parse(blackberry_bold_ua_string) blackberry_bold_touch_ua = parse(blackberry_bold_touch_ua_string) j2me_opera_ua = parse(j2me_opera_ua_string) ie_ua = parse(ie_ua_string) ie_touch_ua = parse(ie_touch_ua_string) mac_safari_ua = parse(mac_safari_ua_string)
from docopt import docopt from parsers import parse from my_json_dumps import my_json_dumps from make_diff_dict import make_diff_dict from make_diff_list import make_diff_list if __name__ == '__main__': args = docopt(__doc__, version='gendiff 0.3') file_1, file_2 = args['<firstConfig>'], args['<secondConfig>'] if not os.path.isfile(file_1): print('"{}" is not file'.format(file_1)) elif not os.path.isfile(file_2): print('"{}" is not file'.format(file_2)) else: before = parse(file_1) after = parse(file_2) if args['--format'] == 'plain': diff = make_diff_list(before, after, [], []) for row in diff: print(row) else: diff = make_diff_dict(before, after, {}) pretty = my_json_dumps(diff) print(pretty)
for test in grade_utils.cols ] row.append( round_to(float(sum(row) + row[3] + (row[5] * 2)) / 10, 0.5)) new_line = ",".join([str(v) for v in row]) output.append(new_line) f = open('output.txt', 'w') file_contents = "\n".join(output) f.write(file_contents) f.close() print "Finished writing %d scores to output.txt" % (len(output) - 1, ) elif score_stdin or parse_stdin: import tree_utils trees = parsers.parse(cmd_utils.get_stdin()) for tree in trees: print tree if score_stdin: sentence_transitions = tree_utils.transitions_in_tree(tree) sentence_probs = [] for transition in sentence_transitions: print "Transitions: %s" % (transition) probs = hmm_utils.prob_of_all_transitions(transition, counts, gram_size=3) print "Probs: %s" % (probs) sentence_probs += probs total = 1 for prob in sentence_probs: total *= prob
def pass_up(self, data): val = parse(self.encoding, data) print("Got data id=%d: " % self.id, val) sim.set(self.fsxid, float(val))
stl_headers = r"" # C头文件路径 c_headers = r"" # 其他宏 target_macros = [] # 头文件所在路径 header_paths = [ r"./src/service", r"./src/viewmodel", r"./src/viewmodel/TestViewModel" ] # 将所有文件的实体解析出来 models, enums, viewmodels, services = parse(full_paths, libclang_path, stl_headers, c_headers, target_macros, header_paths) # 打印解析出来的个数 print('Parsed' + (' models: %d' % len(models) if len(models) else '') + (' enums: %d' % len(enums) if len(enums) else '') + (' viewmodels: %d' % len(viewmodels) if len(viewmodels) else '') + (' services: %d' % len(services) if len(services) else '')) # 打印model详细信息 for model in models: print(model.accept_printer(printers.JSONPrinter())) for viewmodel in viewmodels: print(viewmodel.accept_printer(printers.JSONPrinter())) for enum in enums: print(enum.accept_printer(printers.JSONPrinter()))
analogy_file = datasets.getpath(options.dataset, config, eval_mode.ALL_INFO) configlogger.writeConfig(log, settings=[ ('Config file', options.config), ('Dataset', options.dataset), ('Path to dataset', analogy_file), ('Lowercasing analogies', options.to_lower), ('Output vocab file', vocabf), ], title='Vocabulary extraction from analogy dataset') log.writeln('Reading %s analogies from %s...' % (options.dataset, analogy_file)) analogies = parsers.parse( analogy_file, options.dataset, eval_mode.ALL_INFO, data_mode.String, to_lower=options.to_lower ) log.writeln('Read {0:,} analogies in {1:,} relations.\n'.format( sum([len(anlg_set) for anlg_set in analogies.values()]), len(analogies) )) log.writeln('Extracting vocabulary...') vocab = set() for (_, anlg_set) in analogies.items(): for (a,b,c,d) in anlg_set: vocab.add(a) vocab.add(c) if options.dataset != datasets.Google:
def analogyTask(analogy_file, dataset, setting, analogy_type, embeddings, log=log, report_top_k=5, predictions_file=None, predictions_file_mode='w', to_lower=False): analogies = parsers.parse(analogy_file, dataset, setting, analogy_type, to_lower=to_lower) # if we're saving the predictions, start that file first if predictions_file: pred_stream = codecs.open(predictions_file, predictions_file_mode, 'utf-8') # build the analogy completion model (vocab, emb_arr) = embeddings.toarray() vocab_indexer = {vocab[i]: i for i in range(len(vocab))} sess = tf.Session() grph = AnalogyModel(sess, emb_arr) completed, results = 0, {} for (relation, rel_analogies) in analogies.items(): t_file = log.startTimer(' Starting relation: %s (%d/%d)' % (relation, completed + 1, len(analogies))) rel_results = completeAnalogySet(rel_analogies, setting, emb_arr, vocab, vocab_indexer, grph, report_top_k, log=log) results[relation] = rel_results (correct, MAP, MRR, total, skipped, predictions) = rel_results log.stopTimer( t_file, message= ' Completed file: %s (%d/%d) [{0:.2f}s]\n >> Skipped %d/%d' % (relation, completed + 1, len(analogies), skipped, total)) if predictions_file: pred_stream.write(('{0}\n %s\n{0}\n'.format('-' * 79)) % relation) for prediction in predictions: ((a, b, c, d), is_correct, num_candidates, top_k) = prediction pred_stream.write( '\n%s:%s::%s:%s\nCorrect: %s\nPredictions: %d\n%s\n' % (a, b, c, d, str(is_correct), num_candidates, '\n'.join( [(' %s' % guess) for guess in top_k]))) completed += 1 # tie off the predictions file if predictions_file: pred_stream.close() return results