Python findall Examples, parse.findall Python Examples

Example #1

0

Show file

File: day_10.py Project: cqkh42/advent-of-code

    def parse_data(self):
        v = parse.findall('value {:d} goes to bot {:d}', self.data)
        for v_, b in v:
            self.bots[b].add(v_)

        z = parse.findall(
            'bot {bot:d} gives low to {low[bot]:w} {low[value]:d} and high to {high[bot]:w} {high[value]:d}',
            self.data)
        self.instructions = {i['bot']: i.named for i in z}

Example #2

0

Show file

    def test_case_sensitivity(self):
        l = [r.fixed[0] for r in parse.findall("x({})x", "X(hi)X")]
        self.assertEqual(l, ["hi"])

        l = [
            r.fixed[0]
            for r in parse.findall("x({})x", "X(hi)X", case_sensitive=True)
        ]
        self.assertEqual(l, [])

Example #3

0

Show file

def get_domains_from_text(text):

    # find and yield tidied https domains
    https_results = parse.findall("https://{domain}/", text)
    for result in https_results:
        domain = result["domain"]
        if domain.startswith("www."):
            domain = domain[4:]
        yield domain

    # find and yield raw http-without-s domains
    http_unsecured_results = parse.findall("http://{domain}/", text)
    for result in http_unsecured_results:
        yield "http://" + result["domain"]

Example #4

0

Show file

File: extract_duration.py Project: csarron/sustainlp2020-energy

def main(args):
    input_file = Path(args.input_file)
    suffix = "end" if args.end else "start"
    out_dir = Path(args.out_dir)
    for line in open(input_file):
        batch_size = next(parse.findall("b-{}/", line))[0]
        iteration = next(parse.findall("-t{}.log", line))[0]
        time = next(parse.findall(".log:{:ta} -", line))[0]
        output_file = out_dir.joinpath(f'b-{batch_size}',
                                       f't{iteration}-{suffix}.txt')
        output_file.parent.mkdir(parents=True, exist_ok=True)
        timestamp = '{}-{}-{}-{}-{}-{}'.format(time.year, time.month, time.day,
                                               time.hour, time.minute,
                                               time.second)
        output_file.write_text(timestamp)

Example #5

0

Show file

def _label_ticket(data):
    fields, my_ticket, tickets = data.split('\n\n')
    fields = parse.findall(r'{:w}:_{:d}-{:d}_or_{:d}-{:d}',
                           fields.replace(' ', '_'))
    fields = {
        field: set((*range(start, end + 1), *range(start_2, end_2 + 1)))
        for field, start, end, start_2, end_2 in fields
    }

    all_field_values = set(itertools.chain.from_iterable(fields.values()))
    my_ticket = _parse_ticket(my_ticket)
    tickets = [_parse_ticket(ticket) for ticket in tickets.split('\n')[1:]]
    tickets = [my_ticket, *tickets]

    tickets = [
        ticket for ticket in tickets
        if set(ticket.values()).issubset(all_field_values)
    ]
    tickets = dict(enumerate(list(zip(*[i.values() for i in tickets]))))
    possibles = defaultdict(list)
    for field in fields:
        for index, values in tickets.items():
            if set(values).issubset(fields[field]):
                possibles[field].append(index)
    actuals = {}
    while len(actuals) != len(possibles):
        possibles, actuals = _possibles_to_actuals(possibles, actuals)

    return actuals

Example #6

0

Show file

File: data.py Project: evistream/201910

    def __init__(self, path):

        file = open(path, "r")
        source = file.readline()

        all_elec_id = []
        for i in parse.findall("({:d})", source):
            all_elec_id.append(i.fixed[0])
        all_elec_id = np.array(all_elec_id)
        self.record_elec_id = all_elec_id

        # channel_id -> elec_id: if not connected, value is -1
        chan_elec_table = np.full(SIZE_CHANNEL, -1)
        for i, j in parse.findall("{:d}({:d})", source):
            chan_elec_table[i] = j
        self.chan_elec_table = chan_elec_table

Example #7

0

Show file

File: requests-html.py Project: soo-pecialist/Automate_The_Boring_Stuff_With_Python

    def search_all(self, template: str) -> _Result:
        """Search the :class:`Element <Element>` (multiple times) for the given parse
        template.

        :param template: The Parse template to use.
        """
        return [r for r in findall(template, self.html)]

Example #8

0

Show file

 def __init__(self, path, enc="utf-8"):
     for ext in ServerResource.MEDIA_EXTENSIONS:
         if path.endswith(ext):
             with open(path, "rb") as f:
                 self.content = f.read()
                 break
         elif path.endswith("amz"):
             # "Templating engine" stuff comes here...
             with open(path, 'r', encoding=enc) as f:
                 self.content = f.read()
                 self.soup = BeautifulSoup(self.content, "lxml")
                 self.json_dicts = [
                     json.loads(' '.join(r.fixed[0].split()))
                     for r in findall("|{}|", self.content)
                 ]
                 for json_dict in self.json_dicts:
                     for condition in json_dict:
                         if eval(condition):
                             self.soup.body.insert_before(
                                 json_dict[condition]["True"])
                         else:
                             self.soup.body.insert_before(
                                 json_dict[condition]["False"])
                 [s.extract() for s in self.soup("amz")]
                 self.content = self.soup.prettify(formatter=None)
         else:
             with open(path, 'r', encoding=enc) as f:
                 self.content = f.read()
                 break

Example #9

0

Show file

File: requests_html.py Project: CrackerCat/requests-html

    def search_all(self, template: str) -> _Result:
        """Search the :class:`Element <Element>` (multiple times) for the given parse
        template.

        :param template: The Parse template to use.
        """
        return [r for r in findall(template, self.html)]

Example #10

0

Show file

    def query_find(self, string):

        #set up an empty key registry
        key_registry = []

        #run through all keys and try and parse against them
        for key in self.keys():

            key_registry.append({'key': key, 'find_resp': findall(key, str(string).lower())})

        #get rid of false keys or None responses
        key_registry = [x for x in key_registry if x['find_resp'] is not None]

        joined = ''

        #get key from registry and unpack it's associated response
        for i in key_registry:

            for j in i['find_resp']:

                joined += j[0]

            #qury self dictionary for matching key and function
            for func in self.get(i['key']):

                return func(joined)

Example #11

0

Show file

File: get_scripts.py Project: ryeakle/jake-and-amir-generator

def remove_scene_direction(text, to_remove="({})"):
    """
    Given an inputted string `text`, remove the scene direction.

    Defaults to removing text between parentheses, but could be configured
    via  `to_remove` kwarg for other things.  `to_remove="[{}]"` for instance
    would remove text inbetween braces.
    """
    scene_direction_text_occurences = parse.findall(to_remove, text)
    indexes_to_remove = []
    for occurence in scene_direction_text_occurences:
        for start_end_index in occurence.spans.values():
            # add 1 to start and end indexes, to include the parentheses
            # in the text to be removed
            start_parens = start_end_index[0] - 1
            end_parens = start_end_index[1] + 1
            indexes_to_remove.append((start_parens, end_parens))

    indexes_to_remove_in_order = sorted(indexes_to_remove)
    text_no_scene_direction = ""
    begin_index = 0
    for start_end_index in indexes_to_remove_in_order:
        start_parens = start_end_index[0]
        end_parens = start_end_index[1]

        # grap the text up to the beginning of the parens
        text_no_scene_direction = text_no_scene_direction + text[begin_index:start_parens]

        # in the next iteration, grab text starting after the close parens
        begin_index = end_parens

    # grab any string hanging at the end
    text_no_scene_direction = text_no_scene_direction + text[begin_index:]
    return text_no_scene_direction

Example #12

0

Show file

File: common_higher.py Project: bryanwills/labs_youthful_projects

 def findall(self, s):
     import parse
     sTransformed = self._createEscapeSequencesMap(s)
     parseResults = parse.findall(self.pattern, sTransformed,
         extra_types=self.extra_types, case_sensitive=self.case_sensitive)
     for parseResult in parseResults:
         yield self._resultToMyResult(parseResult, s)

Example #13

0

Show file

File: day_007.py Project: jwelch92/advent-of-code-2020

def parse_input(puzzle_input) -> Bags:
    bags = defaultdict(dict)

    for line in puzzle_input:
        color, raw_contains = line.split(" bags contain ")
        for c in parse.findall("{count:d} {color} bag", raw_contains):
            bags[color][c["color"]] = c["count"]
    return bags

Example #14

0

Show file

def importTable(file, macroKeyword):
    table = []
    with open(file) as f:
        for line in f:
            for val in findall(macroKeyword + '({:g})', line):
                table.append(val[0])
    assert len(table), "No table data found"
    return np.array(table)

Example #15

0

Show file

def _get_failure_info(stdout, vcd_path):
    waveform = _render_vcd(vcd_path)
    search_format = 'Assert failed in top: {}:{linenumber:d}'
    line_num = search(search_format, stdout)['linenumber']
    step_format = 'Checking assertions in step {step_num:d}..'
    steps = findall(step_format, stdout)
    step_num = list(steps)[-1]['step_num']
    return BMC_Result(Result.FAIL, stdout, waveform, step_num, line_num)

Example #16

0

Show file

    def parse_data(self):
        rooms = list(parse.findall(r'{:D}-{:d}[{:w}]', self.data))
        rooms = [Room(*room) for room in rooms]
        checksummed = (room for room in rooms if room.valid_checksum())
        real = [(room.decrypt(), room.sector) for room in checksummed]

        r = dict(real)
        return r

Example #17

0

Show file

File: haruspex.py Project: johnhowe/haruspex

def importTable(file, macroKeyword):
    table = []
    with open(file) as f:
        for line in f:
            for val in findall(macroKeyword + '({:g})', line):
                 table.append(val[0])
    assert len(table), "No table data found"
    return np.array(table)

Example #18

0

Show file

File: requests_xml.py Project: InsaneLoafer/Python_Practice

    def search(self, template: str, first: bool = False) -> _Result:
        """Search the :class:`Element <Element>` for the given parse
        template.
        :param template: The Parse template to use.
        """
        elements = [r for r in findall(template, self.xml)]

        return _get_first_or_list(elements, first)

Example #19

0

Show file

File: logViewer.py Project: nigeil/lablog

 def __init__(self,logbookfd):
     self.fileContents = logbookfd.read()
     self.rawLogEntries = list(r.fixed[0] for r in
             parse.findall("+++Begin log entry+++{}"
                 + "+++End log entry+++", self.fileContents))
     self.logEntries = []
     for entry in self.rawLogEntries:
         timestamp = parse.search("Time:{i}\n", entry)['i']
         user = parse.search("User:{i}\n", entry)['i']
         note = parse.search("Note:{i}\n", entry)['i']
         tags = list(r.fixed[0] for r in
                 parse.findall("\'+{}\'", entry))
         addedFiles = list(r.fixed[0] for r in 
                 parse.findall(addFileStr + "{}\n", entry))
         removedFiles = list(r.fixed[0] for r in 
                 parse.findall(removeFileStr + "{}\n", entry))
         self.logEntries.append(logEntry(timestamp, 
             user, note, tags, addedFiles, removedFiles))

Example #20

0

Show file

File: test_parse.py Project: silygose/parse

 def test_no_evaluate_result(self):
     # basic findall() test
     s = ''.join(
         m.evaluate_result().fixed[0]
         for m in parse.findall(
             ">{}<", "<p>some <b>bold</b> text</p>", evaluate_result=False
         )
     )
     self.assertEqual(s, "some bold text")

Example #21

0

Show file

def main(args):
    input_file = Path(args.input_file)
    lines = input_file.read_text()
    breakdowns = {
        k: next(parse.findall(f"{k} ({v}) ({{:n}} samples", lines))[0]
        for k, v in components.items()
    }
    for k, v in breakdowns.items():
        print(f'{k}, {v}')

Example #22

0

Show file

File: parsers.py Project: shishaktkumarCLS/pywebcopy

    def search_all(self, template):
        """Search the :class:`Element <Element>` (multiple times) for the given parse
        template.

        :param template: The Parse template to use.
        """
        if not isinstance(template, str):
            raise TypeError("Expected string, got %r" % type(template))

        return [r for r in findall(template, self.html)]

Example #23

0

Show file

File: slurm.py Project: sequana/sequana_pipetools

    def _get_percent(self):
        """Get at which percentage the analysis stopped"""

        step_percent = "{:d} of {:d} steps ({percent:g}%) done"

        # Get last percentage
        last_percent_parse = [
            x for x in parse.findall(step_percent, self.snakemaster)
        ]
        return last_percent_parse[-1]["percent"]

Example #24

0

Show file

def part_a(data):
    fields, _, tickets = data.split('\n\n')
    ranges = PARSER.findall(fields)
    ranges = (range(start, end + 1) for start, end in ranges)
    valid = set(itertools.chain.from_iterable(ranges))

    values = parse.findall('{num:d}', tickets)
    values = (match['num'] for match in values)
    values = (v for v in values if v not in valid)
    return sum(values)

Example #25

0

Show file

File: betterbib.py Project: dwferrer/betterbib

def getKeys(auxfile,key_map,bib):
    f = open(auxfile,"r")
    cites = parse.findall("\citation{{{}}}",f.read())
    f.close()
    keys = [s for c in cites for s in c[0].split(",")]
    remap = {}
    for key in keys:
        if ":" in key:
            result = resolve_key(key,key_map,bib)
            remap[key] = result
            #print key+ " : " + result
    return remap

Example #26

0

Show file

File: slurm.py Project: sequana/sequana_pipetools

    def _get_rules_with_errors(self):
        """Return name and log files of rules which returned an error.,"""

        errors = """Error in rule {rule:S}:
    jobid: {jobid:d}
    output: {output}
    log: {log:S} (check log file(s) for error message)
    cluster_jobid: Submitted batch job {slurm_id:d}"""

        parsed_errors = list(parse.findall(errors, self.snakemaster))

        if parsed_errors:
            return parsed_errors

        else:

            errors = """Error in rule {rule:S}:
    jobid: {jobid:d}
    output: {output}
    cluster_jobid: Submitted batch job {slurm_id:d}"""

        return list(parse.findall(errors, self.snakemaster))

Example #27

0

Show file

File: day7.py Project: osbornnick/AdventOfCode2020

def process(data):
    p = parse.compile("{bag} bags contain {bag_string}")
    rules = {}
    for record in data:
        parsed = p.parse(record)
        rules[parsed['bag']] = parsed['bag_string']
    for rule in rules:
        r = rules[rule]
        sub_rules = []
        for bag_rule in parse.findall("{num:d} {bag} bag", r):
            d = {'num': bag_rule['num'], 'bag': bag_rule['bag']}
            sub_rules.append(d)
        rules[rule] = sub_rules
    return rules

Example #28

0

Show file

    def search_all(self, template):
        """Search the :class:`Element <Element>` (multiple times) for the given parse
        template.

        :param template: The Parse template to use.
        """
        if not isinstance(template, str):
            raise TypeError("Expected string, got %r" % type(template))
        try:
            import parse
        except ImportError:
            raise ImportError("parse module is not installed. "
                              "Install it using pip: $ pip install parse")
        return [r for r in parse.findall(template, self.html)]

Example #29

0

Show file

File: updateFileList.py Project: nigeil/lablog

def updateFileList(logbookfd, logbookFilename, baseDir):
    addFileStr = "addFile:" #CARE: check init string for logbook
    removeFileStr = "removeFile:"
    filesInLogbook = []
    removedFilesInLogbook = []
    filesInDirectories = []
    difference = []
    filesToAdd = []
    filesToRemove = []

    #check for files that are logged in the logbook
    logbookfd.seek(0)
    fileContents = logbookfd.read()
    filesInLogbook = list(r.fixed[0] for r in 
            parse.findall(addFileStr+"{}\n", fileContents))
    #subtract off files that we've already noted as removed
    removedFilesInLogbook = list(r.fixed[0] for r in 
             parse.findall(removeFileStr+"{}\n", fileContents))
    #final set of files in logbook
    filesInLogbook = list(set(filesInLogbook) 
                            -set(removedFilesInLogbook))

    #check for files in all directories under baseDir
    for root, dirs, files in os.walk(baseDir):
        for file in files:
            filesInDirectories.append((os.path.relpath(
                os.path.join(root, file),baseDir)))
    
    #calculate symmetric difference of lists to determine
    #which files need to be added
    filesToAdd = list(set(filesInDirectories)
                            -set(filesInLogbook))
    filesToRemove = list(set(filesInLogbook) 
                            -set(filesInDirectories))
    print(filesToAdd)
    print(filesToRemove)
    return(filesToAdd, filesToRemove)

Example #30

0

Show file

File: pj_sequence.py Project: simbuerg/benchbuild

def unique_compiler_cmds(run_f):
    list_compiler_commands = run_f["-###", "-c"]
    _, _, stderr = list_compiler_commands.run()
    stderr = stderr.split('\n')
    for line in stderr:
        res = parse.search('\"{0}\"', line)
        if res and os.path.exists(res[0]):
            results = parse.findall('\"{0}\"', line)
            cmd = res[0]
            args = [x[0] for x in results][1:]

            compiler_cmd = local[cmd]
            compiler_cmd = compiler_cmd[args]
            compiler_cmd = compiler_cmd["-S", "-emit-llvm"]
            yield compiler_cmd

Example #31

0

Show file

File: parse2csv.py Project: cartoonist/parse2csv

def parse_all(content, patterns):
    """Extract the fields from the content.

    Args:
        content : str
            The content to be parsed.
        patterns : list of str
            The list of patterns to find.
    """
    data = defaultdict(list)
    for pat in patterns:
        for match in parse.findall(pat, content):
            for key, value in match.named.items():
                data[key].append(value)
    return data

Example #32

0

Show file

File: util.py Project: mercari/mkdocs-git-snippet

def copy_markdown_images(root: str, file: str, repo: Repository,
                         markdown: str) -> str:
    template = "![{name}]({path})"
    paths = [result["path"] for result in parse.findall(template, markdown)]
    parent = Path(file).parent
    for path in paths:
        if path.startswith("http"):
            continue
        img_path = Path(parent / path).resolve().relative_to(
            Path(".").resolve())

        img = repo.get_contents(str(img_path))
        destination = os.path.realpath(f"{root}/gen_/{img_path}")
        os.makedirs(os.path.dirname(destination), exist_ok=True)
        urllib.request.urlretrieve(img.download_url, destination)

        markdown = markdown.replace(path, f"gen_/{img_path}")
    return markdown

Example #33

0

Show file

 def inject_events(self, wait_max_seconds=2):
     t = None
     timestamp_offset = 0
     with open(self.filename) as f:
         idx = 0
         dev = None
         if idx in self._devices:
             dev = self._devices[idx]
         for l in f:
             if l.startswith('D:'):
                 r = parse('D: {idx:d}', l)
                 assert r is not None
                 dev = self._devices[r['idx']]
             elif l.startswith('E:'):
                 r = parse('E: {sec:d}.{usec:d} {len:2d}{data}', l)
                 assert r is not None
                 length = r['len']
                 timestamp = r['sec'] + r['usec'] / 1000000
                 r_ = findall(' {:S}', r['data'])
                 data = [int(x[0], 16) for x in r_]
                 assert len(data) == int(length)
                 now = datetime.today()
                 if t is None:
                     t = now
                     timestamp_offset = timestamp
                 target_time = t + timedelta(seconds=timestamp -
                                             timestamp_offset)
                 sleep = 0
                 if target_time > now:
                     sleep = target_time - now
                     sleep = sleep.seconds + sleep.microseconds / 1000000
                 if sleep < 0.01:
                     pass
                 elif sleep < wait_max_seconds:
                     time.sleep(sleep)
                 else:
                     t = now
                     timestamp_offset = timestamp
                     time.sleep(wait_max_seconds)
                 dev.call_input_event(data)
     self.replayed_count += 1

Example #34

0

Show file

File: piper.py Project: etos/mr-piper

def pip_versions(package_name):
    pip_command = "{0} install {1}==0.xx".format(which_pip(), package_name)
    c = delegator.run(pip_command)
    no_matching = "No matching distribution found for mrpiper" in c.err
    if no_matching:
        return False

    main_result = parse.search("from versions: {})", c.err)
    # click.echo([package_name, c.err])
    # click.echo([package_name, result.fixed[0], [item for item in parse.findall(" {:S},", result.fixed[0] + ",")]])
    results = [
        result.fixed[0]
        for result in parse.findall(" {:S},", main_result.fixed[0] + ",")
    ]
    # last_result = [result.fixed[0] for result in parse.findall(" {:w})", result.fixed[0])]
    # click.echo(results)
    # logger.error("No results? {0} \nMAIN: {1} \nRESULTS: {2}".format(c.err, main_result.fixed[0], type(results)))
    if not results:
        return []

    return results

Example #35

0

Show file

File: config.py Project: swaroopjagadish/datahub

    def validate_path_spec(
        cls, value: Optional[str], values: Dict[str, Any]
    ) -> Optional[str]:
        if value is None:
            return None

        if not value.startswith("./"):
            # enforce this for semantics
            raise ValueError("Path_spec must start with './'")

        name_indices = sorted([x[0] for x in parse.findall("{{name[{:d}]}}", value)])

        if len(name_indices) == 0:
            raise ValueError("Path spec must contain at least one name identifier")

        if name_indices != list(range(max(name_indices) + 1)):
            raise ValueError(
                "Path spec must contain consecutive name identifiers, starting at 0"
            )

        return value

Example #36

0

Show file

File: amazonia.py Project: EricsonWillians/Amazonia

	def __init__(self, path, enc="utf-8"):
		for ext in ServerResource.MEDIA_EXTENSIONS:
			if path.endswith(ext):
				with open(path, "rb") as f:
					self.content = f.read()
					break
			elif path.endswith("amz"):
				# "Templating engine" stuff comes here...
				with open(path, 'r', encoding=enc) as f:
					self.content = f.read()
					self.soup = BeautifulSoup(self.content, "lxml")
					self.json_dicts = [json.loads(' '.join(r.fixed[0].split())) for r in findall("|{}|", self.content)]
					for json_dict in self.json_dicts:
						for condition in json_dict:
							if eval(condition):
								self.soup.body.insert_before(json_dict[condition]["True"])
							else:
								self.soup.body.insert_before(json_dict[condition]["False"])
					[s.extract() for s in self.soup("amz")]
					self.content = self.soup.prettify(formatter=None)
			else:
				with open(path, 'r', encoding=enc) as f:
					self.content = f.read()
					break

Example #37

0

Show file

File: picasa_album_trawl_relpath.py Project: pcr20/flickr-uploader

        st = os.stat(os.path.join(root, filename))
        atime = st.st_atime #access time
        mtime = st.st_mtime #modification time

        #inputfile = open("I:\\Documents and Settings\\pcr20\\My Documents\\My Pictures\\2012_06_23\\.picasa.ini")
        inputfile = open(os.path.join(root, filename),'rU') #U for universal line ending mode, convert to unix line ending automatically if necessary
        
        my_text = inputfile.read() #reads to whole text file
        
        for match in picasablock.finditer(my_text):
            #print "%s: %s" % (match.start(), match.group(1))
            #find album    
            parsealbum=parse.parse("[.album:{albumid}]{:s}{fieldstart}",match.group())
            if parsealbum:
                #then we have an album
                parse_album_fields=parse.findall("{field}={fieldresult}\n",match.group(),parsealbum.spans["fieldstart"][0])                
                
                #check if album exists already
                albumidlist=[z["albumid"] for z in albumsfound]
                if parsealbum.named["albumid"] in albumidlist: 
                    idx=albumidlist.index(parsealbum.named["albumid"]) #duplicate album
                    
                    print "duplicate album in: ",os.path.relpath(os.path.join(root, filename),rootPath).replace(os.path.sep, '/'),str(parsealbum.named["albumid"])
                    
                    for r in parse_album_fields:
                        #print r.named
                        if albumsfound[idx].has_key(r.named["field"]):
                            if albumsfound[idx][r.named["field"]]!=r.named["fieldresult"]:
                                print("WARNING: "+r.named["field"]+" was: "+albumsfound[idx][r.named["field"]]+" replaced by: "+r.named["fieldresult"])
                        albumsfound[idx][r.named["field"]]=r.named["fieldresult"]

Example #38

0

Show file

File: test_parse.py Project: amigadave/parse

 def test_findall(self):
     # basic findall() test
     s = "".join(r.fixed[0] for r in parse.findall(">{}<", "<p>some <b>bold</b> text</p>"))
     self.assertEqual(s, "some bold text")

Example #39

0

Show file

File: test_parse.py Project: r1chardj0n3s/parse

 def test_no_evaluate_result(self):
     # basic findall() test
     s = ''.join(m.evaluate_result().fixed[0] for m in parse.findall(">{}<",
         "<p>some <b>bold</b> text</p>", evaluate_result=False))
     self.assertEqual(s, "some bold text")

Example #40

0

Show file

File: extract_characters.py Project: ryeakle/jake-and-amir-generator

    args = parser.parse_args()

    data_directory = "../../data/"
    scripts_path = os.path.join(data_directory, "scripts.txt")
    output_directory = os.path.join(data_directory, "characters")

    try:
        script_file_handle = open(scripts_path, 'r')
    except:
        print "Need master script file (scripts.txt) in the data directory."
        exit()

    if args.character:
        # user lower case to find character
        character_to_extract = args.character_to_extract.lower()
    else:
        characters = get_all_characters(script_file_handle)

    for character in characters:
        character = clean_character_name(character)

        script_file_handle.seek(0)

        # :^ option handles whitespace
        # e.g. can find strings like "Jake:text" or " Jake: text" etc.
        lines = '\n'.join(r.fixed[0] for r in parse.findall(character + ": {:^}\n", script_file_handle.read()))

        if lines:
            character_file = open(os.path.join(output_directory, character) + '.txt', 'w')
            character_file.write('%s\n' % lines)