def parse_data(self): v = parse.findall('value {:d} goes to bot {:d}', self.data) for v_, b in v: self.bots[b].add(v_) z = parse.findall( 'bot {bot:d} gives low to {low[bot]:w} {low[value]:d} and high to {high[bot]:w} {high[value]:d}', self.data) self.instructions = {i['bot']: i.named for i in z}
def test_case_sensitivity(self): l = [r.fixed[0] for r in parse.findall("x({})x", "X(hi)X")] self.assertEqual(l, ["hi"]) l = [ r.fixed[0] for r in parse.findall("x({})x", "X(hi)X", case_sensitive=True) ] self.assertEqual(l, [])
def get_domains_from_text(text): # find and yield tidied https domains https_results = parse.findall("https://{domain}/", text) for result in https_results: domain = result["domain"] if domain.startswith("www."): domain = domain[4:] yield domain # find and yield raw http-without-s domains http_unsecured_results = parse.findall("http://{domain}/", text) for result in http_unsecured_results: yield "http://" + result["domain"]
def main(args): input_file = Path(args.input_file) suffix = "end" if args.end else "start" out_dir = Path(args.out_dir) for line in open(input_file): batch_size = next(parse.findall("b-{}/", line))[0] iteration = next(parse.findall("-t{}.log", line))[0] time = next(parse.findall(".log:{:ta} -", line))[0] output_file = out_dir.joinpath(f'b-{batch_size}', f't{iteration}-{suffix}.txt') output_file.parent.mkdir(parents=True, exist_ok=True) timestamp = '{}-{}-{}-{}-{}-{}'.format(time.year, time.month, time.day, time.hour, time.minute, time.second) output_file.write_text(timestamp)
def _label_ticket(data): fields, my_ticket, tickets = data.split('\n\n') fields = parse.findall(r'{:w}:_{:d}-{:d}_or_{:d}-{:d}', fields.replace(' ', '_')) fields = { field: set((*range(start, end + 1), *range(start_2, end_2 + 1))) for field, start, end, start_2, end_2 in fields } all_field_values = set(itertools.chain.from_iterable(fields.values())) my_ticket = _parse_ticket(my_ticket) tickets = [_parse_ticket(ticket) for ticket in tickets.split('\n')[1:]] tickets = [my_ticket, *tickets] tickets = [ ticket for ticket in tickets if set(ticket.values()).issubset(all_field_values) ] tickets = dict(enumerate(list(zip(*[i.values() for i in tickets])))) possibles = defaultdict(list) for field in fields: for index, values in tickets.items(): if set(values).issubset(fields[field]): possibles[field].append(index) actuals = {} while len(actuals) != len(possibles): possibles, actuals = _possibles_to_actuals(possibles, actuals) return actuals
def __init__(self, path): file = open(path, "r") source = file.readline() all_elec_id = [] for i in parse.findall("({:d})", source): all_elec_id.append(i.fixed[0]) all_elec_id = np.array(all_elec_id) self.record_elec_id = all_elec_id # channel_id -> elec_id: if not connected, value is -1 chan_elec_table = np.full(SIZE_CHANNEL, -1) for i, j in parse.findall("{:d}({:d})", source): chan_elec_table[i] = j self.chan_elec_table = chan_elec_table
def search_all(self, template: str) -> _Result: """Search the :class:`Element <Element>` (multiple times) for the given parse template. :param template: The Parse template to use. """ return [r for r in findall(template, self.html)]
def __init__(self, path, enc="utf-8"): for ext in ServerResource.MEDIA_EXTENSIONS: if path.endswith(ext): with open(path, "rb") as f: self.content = f.read() break elif path.endswith("amz"): # "Templating engine" stuff comes here... with open(path, 'r', encoding=enc) as f: self.content = f.read() self.soup = BeautifulSoup(self.content, "lxml") self.json_dicts = [ json.loads(' '.join(r.fixed[0].split())) for r in findall("|{}|", self.content) ] for json_dict in self.json_dicts: for condition in json_dict: if eval(condition): self.soup.body.insert_before( json_dict[condition]["True"]) else: self.soup.body.insert_before( json_dict[condition]["False"]) [s.extract() for s in self.soup("amz")] self.content = self.soup.prettify(formatter=None) else: with open(path, 'r', encoding=enc) as f: self.content = f.read() break
def query_find(self, string): #set up an empty key registry key_registry = [] #run through all keys and try and parse against them for key in self.keys(): key_registry.append({'key': key, 'find_resp': findall(key, str(string).lower())}) #get rid of false keys or None responses key_registry = [x for x in key_registry if x['find_resp'] is not None] joined = '' #get key from registry and unpack it's associated response for i in key_registry: for j in i['find_resp']: joined += j[0] #qury self dictionary for matching key and function for func in self.get(i['key']): return func(joined)
def remove_scene_direction(text, to_remove="({})"): """ Given an inputted string `text`, remove the scene direction. Defaults to removing text between parentheses, but could be configured via `to_remove` kwarg for other things. `to_remove="[{}]"` for instance would remove text inbetween braces. """ scene_direction_text_occurences = parse.findall(to_remove, text) indexes_to_remove = [] for occurence in scene_direction_text_occurences: for start_end_index in occurence.spans.values(): # add 1 to start and end indexes, to include the parentheses # in the text to be removed start_parens = start_end_index[0] - 1 end_parens = start_end_index[1] + 1 indexes_to_remove.append((start_parens, end_parens)) indexes_to_remove_in_order = sorted(indexes_to_remove) text_no_scene_direction = "" begin_index = 0 for start_end_index in indexes_to_remove_in_order: start_parens = start_end_index[0] end_parens = start_end_index[1] # grap the text up to the beginning of the parens text_no_scene_direction = text_no_scene_direction + text[begin_index:start_parens] # in the next iteration, grab text starting after the close parens begin_index = end_parens # grab any string hanging at the end text_no_scene_direction = text_no_scene_direction + text[begin_index:] return text_no_scene_direction
def findall(self, s): import parse sTransformed = self._createEscapeSequencesMap(s) parseResults = parse.findall(self.pattern, sTransformed, extra_types=self.extra_types, case_sensitive=self.case_sensitive) for parseResult in parseResults: yield self._resultToMyResult(parseResult, s)
def parse_input(puzzle_input) -> Bags: bags = defaultdict(dict) for line in puzzle_input: color, raw_contains = line.split(" bags contain ") for c in parse.findall("{count:d} {color} bag", raw_contains): bags[color][c["color"]] = c["count"] return bags
def importTable(file, macroKeyword): table = [] with open(file) as f: for line in f: for val in findall(macroKeyword + '({:g})', line): table.append(val[0]) assert len(table), "No table data found" return np.array(table)
def _get_failure_info(stdout, vcd_path): waveform = _render_vcd(vcd_path) search_format = 'Assert failed in top: {}:{linenumber:d}' line_num = search(search_format, stdout)['linenumber'] step_format = 'Checking assertions in step {step_num:d}..' steps = findall(step_format, stdout) step_num = list(steps)[-1]['step_num'] return BMC_Result(Result.FAIL, stdout, waveform, step_num, line_num)
def parse_data(self): rooms = list(parse.findall(r'{:D}-{:d}[{:w}]', self.data)) rooms = [Room(*room) for room in rooms] checksummed = (room for room in rooms if room.valid_checksum()) real = [(room.decrypt(), room.sector) for room in checksummed] r = dict(real) return r
def search(self, template: str, first: bool = False) -> _Result: """Search the :class:`Element <Element>` for the given parse template. :param template: The Parse template to use. """ elements = [r for r in findall(template, self.xml)] return _get_first_or_list(elements, first)
def __init__(self,logbookfd): self.fileContents = logbookfd.read() self.rawLogEntries = list(r.fixed[0] for r in parse.findall("+++Begin log entry+++{}" + "+++End log entry+++", self.fileContents)) self.logEntries = [] for entry in self.rawLogEntries: timestamp = parse.search("Time:{i}\n", entry)['i'] user = parse.search("User:{i}\n", entry)['i'] note = parse.search("Note:{i}\n", entry)['i'] tags = list(r.fixed[0] for r in parse.findall("\'+{}\'", entry)) addedFiles = list(r.fixed[0] for r in parse.findall(addFileStr + "{}\n", entry)) removedFiles = list(r.fixed[0] for r in parse.findall(removeFileStr + "{}\n", entry)) self.logEntries.append(logEntry(timestamp, user, note, tags, addedFiles, removedFiles))
def test_no_evaluate_result(self): # basic findall() test s = ''.join( m.evaluate_result().fixed[0] for m in parse.findall( ">{}<", "<p>some <b>bold</b> text</p>", evaluate_result=False ) ) self.assertEqual(s, "some bold text")
def main(args): input_file = Path(args.input_file) lines = input_file.read_text() breakdowns = { k: next(parse.findall(f"{k} ({v}) ({{:n}} samples", lines))[0] for k, v in components.items() } for k, v in breakdowns.items(): print(f'{k}, {v}')
def search_all(self, template): """Search the :class:`Element <Element>` (multiple times) for the given parse template. :param template: The Parse template to use. """ if not isinstance(template, str): raise TypeError("Expected string, got %r" % type(template)) return [r for r in findall(template, self.html)]
def _get_percent(self): """Get at which percentage the analysis stopped""" step_percent = "{:d} of {:d} steps ({percent:g}%) done" # Get last percentage last_percent_parse = [ x for x in parse.findall(step_percent, self.snakemaster) ] return last_percent_parse[-1]["percent"]
def part_a(data): fields, _, tickets = data.split('\n\n') ranges = PARSER.findall(fields) ranges = (range(start, end + 1) for start, end in ranges) valid = set(itertools.chain.from_iterable(ranges)) values = parse.findall('{num:d}', tickets) values = (match['num'] for match in values) values = (v for v in values if v not in valid) return sum(values)
def getKeys(auxfile,key_map,bib): f = open(auxfile,"r") cites = parse.findall("\citation{{{}}}",f.read()) f.close() keys = [s for c in cites for s in c[0].split(",")] remap = {} for key in keys: if ":" in key: result = resolve_key(key,key_map,bib) remap[key] = result #print key+ " : " + result return remap
def _get_rules_with_errors(self): """Return name and log files of rules which returned an error.,""" errors = """Error in rule {rule:S}: jobid: {jobid:d} output: {output} log: {log:S} (check log file(s) for error message) cluster_jobid: Submitted batch job {slurm_id:d}""" parsed_errors = list(parse.findall(errors, self.snakemaster)) if parsed_errors: return parsed_errors else: errors = """Error in rule {rule:S}: jobid: {jobid:d} output: {output} cluster_jobid: Submitted batch job {slurm_id:d}""" return list(parse.findall(errors, self.snakemaster))
def process(data): p = parse.compile("{bag} bags contain {bag_string}") rules = {} for record in data: parsed = p.parse(record) rules[parsed['bag']] = parsed['bag_string'] for rule in rules: r = rules[rule] sub_rules = [] for bag_rule in parse.findall("{num:d} {bag} bag", r): d = {'num': bag_rule['num'], 'bag': bag_rule['bag']} sub_rules.append(d) rules[rule] = sub_rules return rules
def search_all(self, template): """Search the :class:`Element <Element>` (multiple times) for the given parse template. :param template: The Parse template to use. """ if not isinstance(template, str): raise TypeError("Expected string, got %r" % type(template)) try: import parse except ImportError: raise ImportError("parse module is not installed. " "Install it using pip: $ pip install parse") return [r for r in parse.findall(template, self.html)]
def updateFileList(logbookfd, logbookFilename, baseDir): addFileStr = "addFile:" #CARE: check init string for logbook removeFileStr = "removeFile:" filesInLogbook = [] removedFilesInLogbook = [] filesInDirectories = [] difference = [] filesToAdd = [] filesToRemove = [] #check for files that are logged in the logbook logbookfd.seek(0) fileContents = logbookfd.read() filesInLogbook = list(r.fixed[0] for r in parse.findall(addFileStr+"{}\n", fileContents)) #subtract off files that we've already noted as removed removedFilesInLogbook = list(r.fixed[0] for r in parse.findall(removeFileStr+"{}\n", fileContents)) #final set of files in logbook filesInLogbook = list(set(filesInLogbook) -set(removedFilesInLogbook)) #check for files in all directories under baseDir for root, dirs, files in os.walk(baseDir): for file in files: filesInDirectories.append((os.path.relpath( os.path.join(root, file),baseDir))) #calculate symmetric difference of lists to determine #which files need to be added filesToAdd = list(set(filesInDirectories) -set(filesInLogbook)) filesToRemove = list(set(filesInLogbook) -set(filesInDirectories)) print(filesToAdd) print(filesToRemove) return(filesToAdd, filesToRemove)
def unique_compiler_cmds(run_f): list_compiler_commands = run_f["-###", "-c"] _, _, stderr = list_compiler_commands.run() stderr = stderr.split('\n') for line in stderr: res = parse.search('\"{0}\"', line) if res and os.path.exists(res[0]): results = parse.findall('\"{0}\"', line) cmd = res[0] args = [x[0] for x in results][1:] compiler_cmd = local[cmd] compiler_cmd = compiler_cmd[args] compiler_cmd = compiler_cmd["-S", "-emit-llvm"] yield compiler_cmd
def parse_all(content, patterns): """Extract the fields from the content. Args: content : str The content to be parsed. patterns : list of str The list of patterns to find. """ data = defaultdict(list) for pat in patterns: for match in parse.findall(pat, content): for key, value in match.named.items(): data[key].append(value) return data
def copy_markdown_images(root: str, file: str, repo: Repository, markdown: str) -> str: template = "![{name}]({path})" paths = [result["path"] for result in parse.findall(template, markdown)] parent = Path(file).parent for path in paths: if path.startswith("http"): continue img_path = Path(parent / path).resolve().relative_to( Path(".").resolve()) img = repo.get_contents(str(img_path)) destination = os.path.realpath(f"{root}/gen_/{img_path}") os.makedirs(os.path.dirname(destination), exist_ok=True) urllib.request.urlretrieve(img.download_url, destination) markdown = markdown.replace(path, f"gen_/{img_path}") return markdown
def inject_events(self, wait_max_seconds=2): t = None timestamp_offset = 0 with open(self.filename) as f: idx = 0 dev = None if idx in self._devices: dev = self._devices[idx] for l in f: if l.startswith('D:'): r = parse('D: {idx:d}', l) assert r is not None dev = self._devices[r['idx']] elif l.startswith('E:'): r = parse('E: {sec:d}.{usec:d} {len:2d}{data}', l) assert r is not None length = r['len'] timestamp = r['sec'] + r['usec'] / 1000000 r_ = findall(' {:S}', r['data']) data = [int(x[0], 16) for x in r_] assert len(data) == int(length) now = datetime.today() if t is None: t = now timestamp_offset = timestamp target_time = t + timedelta(seconds=timestamp - timestamp_offset) sleep = 0 if target_time > now: sleep = target_time - now sleep = sleep.seconds + sleep.microseconds / 1000000 if sleep < 0.01: pass elif sleep < wait_max_seconds: time.sleep(sleep) else: t = now timestamp_offset = timestamp time.sleep(wait_max_seconds) dev.call_input_event(data) self.replayed_count += 1
def pip_versions(package_name): pip_command = "{0} install {1}==0.xx".format(which_pip(), package_name) c = delegator.run(pip_command) no_matching = "No matching distribution found for mrpiper" in c.err if no_matching: return False main_result = parse.search("from versions: {})", c.err) # click.echo([package_name, c.err]) # click.echo([package_name, result.fixed[0], [item for item in parse.findall(" {:S},", result.fixed[0] + ",")]]) results = [ result.fixed[0] for result in parse.findall(" {:S},", main_result.fixed[0] + ",") ] # last_result = [result.fixed[0] for result in parse.findall(" {:w})", result.fixed[0])] # click.echo(results) # logger.error("No results? {0} \nMAIN: {1} \nRESULTS: {2}".format(c.err, main_result.fixed[0], type(results))) if not results: return [] return results
def validate_path_spec( cls, value: Optional[str], values: Dict[str, Any] ) -> Optional[str]: if value is None: return None if not value.startswith("./"): # enforce this for semantics raise ValueError("Path_spec must start with './'") name_indices = sorted([x[0] for x in parse.findall("{{name[{:d}]}}", value)]) if len(name_indices) == 0: raise ValueError("Path spec must contain at least one name identifier") if name_indices != list(range(max(name_indices) + 1)): raise ValueError( "Path spec must contain consecutive name identifiers, starting at 0" ) return value
def __init__(self, path, enc="utf-8"): for ext in ServerResource.MEDIA_EXTENSIONS: if path.endswith(ext): with open(path, "rb") as f: self.content = f.read() break elif path.endswith("amz"): # "Templating engine" stuff comes here... with open(path, 'r', encoding=enc) as f: self.content = f.read() self.soup = BeautifulSoup(self.content, "lxml") self.json_dicts = [json.loads(' '.join(r.fixed[0].split())) for r in findall("|{}|", self.content)] for json_dict in self.json_dicts: for condition in json_dict: if eval(condition): self.soup.body.insert_before(json_dict[condition]["True"]) else: self.soup.body.insert_before(json_dict[condition]["False"]) [s.extract() for s in self.soup("amz")] self.content = self.soup.prettify(formatter=None) else: with open(path, 'r', encoding=enc) as f: self.content = f.read() break
st = os.stat(os.path.join(root, filename)) atime = st.st_atime #access time mtime = st.st_mtime #modification time #inputfile = open("I:\\Documents and Settings\\pcr20\\My Documents\\My Pictures\\2012_06_23\\.picasa.ini") inputfile = open(os.path.join(root, filename),'rU') #U for universal line ending mode, convert to unix line ending automatically if necessary my_text = inputfile.read() #reads to whole text file for match in picasablock.finditer(my_text): #print "%s: %s" % (match.start(), match.group(1)) #find album parsealbum=parse.parse("[.album:{albumid}]{:s}{fieldstart}",match.group()) if parsealbum: #then we have an album parse_album_fields=parse.findall("{field}={fieldresult}\n",match.group(),parsealbum.spans["fieldstart"][0]) #check if album exists already albumidlist=[z["albumid"] for z in albumsfound] if parsealbum.named["albumid"] in albumidlist: idx=albumidlist.index(parsealbum.named["albumid"]) #duplicate album print "duplicate album in: ",os.path.relpath(os.path.join(root, filename),rootPath).replace(os.path.sep, '/'),str(parsealbum.named["albumid"]) for r in parse_album_fields: #print r.named if albumsfound[idx].has_key(r.named["field"]): if albumsfound[idx][r.named["field"]]!=r.named["fieldresult"]: print("WARNING: "+r.named["field"]+" was: "+albumsfound[idx][r.named["field"]]+" replaced by: "+r.named["fieldresult"]) albumsfound[idx][r.named["field"]]=r.named["fieldresult"]
def test_findall(self): # basic findall() test s = "".join(r.fixed[0] for r in parse.findall(">{}<", "<p>some <b>bold</b> text</p>")) self.assertEqual(s, "some bold text")
def test_no_evaluate_result(self): # basic findall() test s = ''.join(m.evaluate_result().fixed[0] for m in parse.findall(">{}<", "<p>some <b>bold</b> text</p>", evaluate_result=False)) self.assertEqual(s, "some bold text")
args = parser.parse_args() data_directory = "../../data/" scripts_path = os.path.join(data_directory, "scripts.txt") output_directory = os.path.join(data_directory, "characters") try: script_file_handle = open(scripts_path, 'r') except: print "Need master script file (scripts.txt) in the data directory." exit() if args.character: # user lower case to find character character_to_extract = args.character_to_extract.lower() else: characters = get_all_characters(script_file_handle) for character in characters: character = clean_character_name(character) script_file_handle.seek(0) # :^ option handles whitespace # e.g. can find strings like "Jake:text" or " Jake: text" etc. lines = '\n'.join(r.fixed[0] for r in parse.findall(character + ": {:^}\n", script_file_handle.read())) if lines: character_file = open(os.path.join(output_directory, character) + '.txt', 'w') character_file.write('%s\n' % lines)