def test_query_replace(self): DATAPLAN_REPLACE.update({"link": u"http://localhost/mockup"}) psr = HTMLParser(DATAPLAN_REPLACE) psr.run() records = psr.get_records() self.assertEqual("lexndru", records.get("username")) self.assertEqual("This is a sentence about lexndru", records.get("sentence"))
def test_query_remove_glue(self): DATAPLAN_REMOVE.update({"link": u"http://localhost/mockup"}) psr = HTMLParser(DATAPLAN_REMOVE) psr.run() records = psr.get_records() self.assertEqual("12:00", records.get("time")) self.assertEqual("The time is 12:00", records.get("alert"))
def main(): """Hap! bootstrap. """ # Parse shell arguments Shell.parse() # Print version if Shell.version: return print("Hap! v{}".format(__version__)) # Log config Log.configure(not Shell.silent and Shell.verbose) # Dump info and exit if Shell.sample: return print(SAMPLES_MESSAGE) # Input reader def read_json(): if not sys.stdin.isatty(): data = sys.stdin.read() if len(data) == 0 or not data: return False, "Invalid input stream" retval = FileReader.parse_json(data) if retval is not None: return True, retval elif sys.stdin.isatty() and Shell.input is None: Shell.psr.print_help() return False, None elif Shell.input is not None: fr = FileReader(Shell.input) ok, data = fr.read() if not ok: return False, data return True, data return False, "Input stream is not a valid JSON" # Read data status, data_in = read_json() if not status: raise SystemExit(data_in) # A dataplan is always an object if not isinstance(data_in, dict): raise SystemExit("Corrupted input provided. Please fix and try again") # Log shell params if Shell.verbose and not Shell.silent: Log.info(u"Filepath: {}".format(Shell.input)) Log.info(u"Save to file? {}".format(Shell.save)) # Update link? if Shell.link is not None: data_in.update({"link": unicode(Shell.link)}) # Crash if no link is provided if data_in.get("link", "") == "": raise SystemExit("No link provided. See --help") # Parse document psr = HTMLParser(data_in, no_cache=Shell.no_cache, refresh=(Shell.save and Shell.refresh)) psr.run() records = psr.get_records() dataplan = psr.get_dataplan() # Update dataplan if Shell.save: filename = Shell.input if filename is None: filename = "{}.json".format(uuid.uuid4().hex) fw = FileWriter(filename) fw.write(dataplan) # Print output if not Shell.silent: print_json(records)
def test_query_pattern(self): DATAPLAN_PATTERN.update({"link": u"http://localhost/mockup"}) psr = HTMLParser(DATAPLAN_PATTERN) psr.run() records = psr.get_records() self.assertEqual("dogs", records.get("topic"))
def test_query_css(self): DATAPLAN_CSS.update({"link": u"http://localhost/mockup"}) psr = HTMLParser(DATAPLAN_CSS) psr.run() records = psr.get_records() self.assertEqual("Hap GitHub", records.get("github"))
def test_query_xpath(self): DATAPLAN_XPATH.update({"link": u"http://localhost/mockup"}) psr = HTMLParser(DATAPLAN_XPATH) psr.run() records = psr.get_records() self.assertEqual("https://github.com/lexndru/hap", records.get("url"))