def __init__(self, name, cap=16, create=False, size=0):
        #buffer
        self.buffer = Buffer(name, 602116, create, size)

        # start loader service
        self.id_queue = multiprocessing.Manager().Queue(maxsize=cap)
        self.data_queue = multiprocessing.Manager().Queue(maxsize=cap)
        self.loader = multiprocessing.Process(
            target=Loader.loading, args=(self.id_queue, self.data_queue, name, int(cap/2)))
        self.loader.start()
        assert(self.loader.is_alive() == True)

        # table
        self.data_lock = threading.Lock()
        self.id_table = {}   #id -> datanode
        self.data_refs = {}  #id -> refs
        self.task_tails = {}
        self.task_heads = {}

        self.pending_id_lock = threading.Lock()
        self.pending_id = {} #id->namelist

        #replacer
        self.replacer = Replacer()
        

        # start a thread to listen data queue
        executor = ThreadPoolExecutor(max_workers=1)
        executor.submit(self.listener)
Beispiel #2
0
def main():
    # http://stackoverflow.com/questions/15206010/how-to-pass-on-argparse-argument-to-function-as-kwargs
    args = command_line()
    assert (args.src_train_file is None) == (args.tgt_train_file is None)
    assert (args.src_dev_file is None) == (args.tgt_dev_file is None)
    # above 2 assertion lines are needed to guarantee that src_train_file being None implies tgt_train_file being None and so on...
    only_test = (args.src_train_file is None) and (args.src_dev_file is None)
    replacer = Replacer(args, mode=args.mode, src_embedding=args.src_embedding,
                        src_embedding_topn=args.src_embedding_topn, tgt_embedding=args.tgt_embedding,
                        src_voc_size=args.src_voc_size, tgt_voc_size=args.tgt_voc_size, only_test=only_test,
                        dictionary=args.dictionary, dictionary_topn=args.dictionary_topn,
                        src_sim_threshold=args.src_sim_threshold, tgt_sim_threshold=args.tgt_sim_threshold,
                        lex_prob_threshold=args.lex_prob_threshold, backoff_to_unk=args.backoff_to_unk,
                        replace_both=args.replace_both, guarantee_in_vocab_replace=args.guarantee_in_vocab_replace)
class BufferManger(object):
    def __init__(self, name, cap=16, create=False, size=0):
        #buffer
        self.buffer = Buffer(name, 602116, create, size)

        # start loader service
        self.id_queue = multiprocessing.Manager().Queue(maxsize=cap)
        self.data_queue = multiprocessing.Manager().Queue(maxsize=cap)
        self.loader = multiprocessing.Process(
            target=Loader.loading, args=(self.id_queue, self.data_queue, name, int(cap/2)))
        self.loader.start()
        assert(self.loader.is_alive() == True)

        # table
        self.data_lock = threading.Lock()
        self.id_table = {}   #id -> datanode
        self.data_refs = {}  #id -> refs
        self.task_tails = {}
        self.task_heads = {}

        self.pending_id_lock = threading.Lock()
        self.pending_id = {} #id->namelist

        #replacer
        self.replacer = Replacer()
        

        # start a thread to listen data queue
        executor = ThreadPoolExecutor(max_workers=1)
        executor.submit(self.listener)

    def listener(self):
        while True:
            # try:
            item = self.data_queue.get()
            p_ticker = m.tiker("data process")
            p_ticker.end()
            # p_ticker.print_avg(128, 128)
            # except:
            #     logging.error("listener read data queue error")
            #     exit(0)
            w_ticker = m.tiker("data write")
            w_ticker.start()

            data_id, data_idx = item
            with self.pending_id_lock:
                name_list, expect_diff = self.pending_id[data_id]
                del self.pending_id[data_id]
            with self.data_lock:
                self.id_table[data_id] = data_idx
            logging.info("buffer write data %d in %d with tasks %s", data_id, data_idx, str(name_list))
            
            self.write(data_id, name_list, expect_diff)

    def write(self, data_id, name_list, expect_diff):
        hit = True
        with self.data_lock:
            if data_id not in self.id_table.keys():
                hit = False
            else:
                logging.info("data %d with %s hit", data_id, name_list)
                self.replacer.delete(data_id)
        
        if hit is False:
            logging.info("data %d with %s miss", data_id, name_list)
            if self._merge_pendingid(data_id, name_list, expect_diff):
                p_ticker = m.tiker("pool")
                p_ticker.start()
                data_idx = self.allocate_datanode()
                self.id_queue.put((data_id, data_idx))
            return
        
        with self.data_lock:
            for name in name_list:
                data_idx = self.id_table[data_id]
                inode_idx = self.allocate_inode()
                if data_id not in self.data_refs.keys():
                    self.data_refs[data_id] = []
                self.data_refs[data_id].append(inode_idx)
                self.buffer.write_inode(inode_idx, self.task_tails[name], data_idx)
                logging.info("wirte %s's data [%d]-->[%d]-->(%d)", name, self.task_tails[name], inode_idx, data_idx)
                self.task_tails[name] = inode_idx
            self.replacer.update(data_id, expect_diff)
        
        w_ticker = m.tiker("data write")
        w_ticker.end()
        # w_ticker.print_avg(128, 128)
    
    def _merge_pendingid(self, data_id, name_list, expect_diff):
        res = False
        with self.pending_id_lock:
            if data_id not in self.pending_id.keys():
                res = True
                self.pending_id[data_id] = [[], 0]
            self.pending_id[data_id][0].extend(name_list)
            self.pending_id[data_id][1] = expect_diff
        return res
    
    def add_task(self, task_name):
        if task_name in self.task_heads.keys():
            return -1
        inode_idx = self.allocate_inode()
        self.buffer.write_inode(inode_idx)
        self.task_heads[task_name] = inode_idx
        self.task_tails[task_name] = inode_idx

        logging.info("add task %s with head %d", task_name, inode_idx)
        return inode_idx

    def allocate_inode(self):
        inode_idx = self.buffer.allocate_inode()
        if inode_idx != -1:
            return inode_idx

        # free some inode
        while True:
            for task_name in self.task_heads.keys():
                head_inode = self.task_heads[task_name]
                # print("try to free %d (%s)"%(head_inode, task_name))
                if self.buffer.is_used(head_inode) == False:
                    _, next_head = self.buffer.parse_inode(head_inode)
                    self.task_heads[task_name] = next_head
                    return head_inode

    def allocate_datanode(self):
        datanode_idx = self.buffer.allocate_datanode()
        if datanode_idx != -1:
            return datanode_idx

        # free some datanode
        valid = True
        while True:
            # print("find datanode")
            with self.data_lock:
                data_id = self.replacer.next()
                data_idx = self.id_table[data_id]
                for ref in self.data_refs[data_id]:
                    valid = self.buffer.is_datavalid(ref, data_idx)
                    if valid is True:
                        break
                        
                if valid is False:
                    logging.info("evict data %d in %d", data_id, data_idx)
                    del self.id_table[data_id]
                    del self.data_refs[data_id]
                    self.replacer.delete(data_id)
                    self.replacer.reset()
                    return data_idx

    def delete_task(self, name):
        with self.pending_id_lock:
            for data_id in self.pending_id.keys():
                self.pending_id[data_id].remove(name)

        head = self.task_heads[name]

        with self.data_lock:
            del self.task_heads[name]
        
        while head != -1:
            # print("del", head)
            head = self.buffer.get_next(head)
        
    def terminate(self):
        self.loader.kill()
        while self.loader.is_alive() == True:
            time.sleep(0.1)
        self.loader.close()
Beispiel #4
0
from parser import Parser
from scrambler import Scrambler
from replacer import Replacer

import argparse

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--file", help="The file that contains the function names to scramble")
    parser.add_argument("--password",
                        help="The password used to encrypt the function names")
    args = parser.parse_args()

    parser = Parser(args.file, def_excludes=['main'], call_excludes=['printf'])

    scrambler = Scrambler(parser, args.password)

    func_names = scrambler.scramble_func_names()

    with open(args.file, 'r') as file:
        source_code = file.read()

    replacer = Replacer(source_code, func_names)

    print(replacer.replace())
Beispiel #5
0
 def __init__(self):
     Replacer.__init__(self)
     self.argument_pattern = re.compile(ur'{[0-9]}')
def main():
    file_name = args.file

    with open(file_name, 'r') as f:
        content = f.read()

    replacer = Replacer()

    has_full_preamble_declaration = replacer.check_for_full_pream(
        content
    )  # If contains '..begin full preamble' and '..end full preamble'

    content = replacer.set_aside_preamble(content)
    content = replacer.set_aside_ignore_markers(content)

    # Deal with comments
    while '%%%' in content:
        ind1 = content.index('%%%')
        ind2 = ind1 + 3 + content[ind1 + 3:].index('%%%')
        content = content[:ind1] + content[ind2 + 3:]

    lines = content.split('\n')
    for i in range(len(lines)):
        if ' %' in lines[i]:
            lines[i] = lines[i][:lines[i].index(' %')]
        elif '\t%' in lines[i]:
            lines[i] = lines[i][:lines[i].index('\t%')]
        elif len(lines[i]) > 1 and '%' == lines[i][0]:
            lines[i] = ''
        lines[i] = lines[i].strip()

    if not has_full_preamble_declaration:  # Ignore all other stuff in the preamble if "..begin full preamble" defined

        # Set font
        font = find_content(lines, '..font', '12')

        # Set packages
        packages = [
            'amsmath', 'amssymb', 'amsthm', 'geometry', 'enumitem', 'fancyhdr'
        ]
        packages.extend(find_content(lines, '..usepackage', '').split())

        # See if name is specified. Overrides --name flag if given.
        my_name = find_content(lines, '..name', None)

        # Set page size
        paper = find_content(lines, '..paper', 'letter')

        # Set page orientation
        orientation = find_content(lines, '..orient', 'portrait')

        # Set margin
        margin = find_content(lines, '..margin', '1')

        # Set indent
        indent = find_content(lines, '..indent', '0')

        # Set spacing
        spacing = find_content(lines, '..spacing', '1.5')
        line_spread = 1.3
        if spacing == '1':
            line_spread = 1.0
        elif spacing == '2':
            line_spread = 1.6

        # Obvious
        qed_symbol = find_content(lines, '..qed', None)

        # Puts whatever follows "..assignment" as the left footer
        assignment = find_content(lines, '..assignment', None)

        # Asset path
        asset_path = find_content(lines, '..assetpath', None)
        assets_path = find_content(lines, '..assetspath', None)
        graphics_path = find_content(lines, '..graphicspath', None)

        if asset_path and assets_path or asset_path and graphics_path or assets_path and graphics_path:
            exit('Specify only 1 of asset(s)path or graphicspath, not both!')

        if assets_path:
            asset_path = assets_path
        if graphics_path:
            asset_path = graphics_path

    # Replacements occur here
    replacer.replace(lines, find_content, find_index)

    _start = find_index(lines, '..begin main')
    _end = find_index(lines, '..end main')

    if _end != -1:
        main_content = lines[_start + 1:_end]
    else:
        main_content = lines[_start + 1:]

    _content = "\n".join(main_content)
    """
    Ignore and not transcribe commands surrounded by \verb||. 
    Currently only ignores commands if they immediately follow "\verb|"
    Better detection still needed, use "..begin ignore" as an alternative. 
    """
    used_macros = []
    pairs = load_pairs()
    for pair in pairs:
        if _content.find(pair[0]) != -1:
            used_macros.append(pair)

    used_renewed_commands = []
    renewed = load_renew_commands()
    for pair in renewed:
        if _content.find(pair[0]) != -1:
            used_renewed_commands.append(pair)

    replacement_pairs = load_replacements()
    for pair in replacement_pairs:
        _content = _content.replace(
            '\\verb|{0}'.format(pair[0]),
            '\\verb|{0}'.format(pair[0][0] + 'afoswj' + pair[0][1:]))
        _content = _content.replace(pair[0], pair[1])
        _content = _content.replace(
            '\\verb|{0}'.format(pair[0][0] + 'afoswj' + pair[0][1:]),
            '\\verb|{0}'.format(pair[0]))

    output = "\n% Created by Roger Hu's .pytex --> .tex latex transcriber\n"
    time_rn = datetime.now(pytz.timezone('America/Los_Angeles'))
    output += "% Compiled on {0} PDT\n\n".format(str(time_rn)[:-13])
    compile_attempts = 1

    if not has_full_preamble_declaration:
        output += '\n\\documentclass[{0}pt]{{article}}\n'.format(font)
        output += '\\usepackage{'
        if 'graphicx' not in packages and asset_path:
            packages.append('graphicx')
        if 'pdfpages' not in packages and '\\includepdf' in _content:
            packages.append('pdfpages')
        output += ', '.join(packages)
        output += '}\n'
        output += '\n'
        output += '\\geometry{{{0}paper, {1}, margin={2}in}}\n'.format(
            paper, orientation, margin)
        output += '\\setlength{{\\parindent}}{{{0}em}}\n'.format(indent)
        output += '\\linespread{{{0}}}\n'.format(line_spread)
        output += '\\pagestyle{fancy}\n'
        output += '\\fancyhf{}\n\n'

        # Commands
        compile_attempts = 1
        if '\\label{' in _content:
            compile_attempts = 2
        for pair in used_macros:
            output += '\\newcommand{' + pair[0] + '}{' + pair[1] + '}\n'
        if '\\begin{Parts}' in _content:
            output += "\n\\newcounter{resumer}\n\\setcounter{resumer}{0}\n"
            output += "\\newenvironment{Parts}{\n\\setcounter{resumer}{0}\n\\begin{enumerate}[label=(\\alph*)]\n"
            output += "\\newcommand\\Part{\\item}}{\\setcounter{resumer}{\\value{enumi}}\\end{enumerate}}\n"
        if '\\begin{ResumeParts}' in _content:
            output += "\n\\newenvironment{ResumeParts}{\n" + "\\begin{enumerate}[label=(\\alph*)]\n"
            output += "\\setcounter{enumi}{\\value{resumer}}\\newcommand\\Part{\\item}}{\\setcounter{resumer}{\\value{enumi}}\\end{enumerate}}\n"
        if '\\norm' in _content:
            output += '\\newcommand{\\norm}[1]{\\|#1\\|}\n'
        if '\\ddef' in _content:
            output += '\\newcommand{\\ddef}{\\mathrel{\\stackrel{\\makebox[0pt]{\\mbox{\\normalfont\\tiny def}}}{=}}}\n'
        if '\\sset' in _content:
            output += '\\newcommand{\\sset}{\\mathrel{\\stackrel{\\makebox[0pt]{\\mbox{\\normalfont\\tiny set}}}{=}}}\n'
        if "\\floor" in _content:
            output += '\\newcommand{\\floor}[1]{\\left\\lfloor #1 \\right\\rfloor}\n'
        if "\\ceil" in _content:
            output += '\\newcommand{\\ceil}[1]{\\left\\lceil #1 \\right\\rceil}\n'
        for pair in used_renewed_commands:
            output += '\\renewcommand' + pair[0] + '{' + pair[1] + '}\n'
        if qed_symbol:
            output += '\\renewcommand\\qedsymbol{{{0}}}\n'.format(qed_symbol)
        output += '\n'
        output += '\\renewcommand{\\headrulewidth}{0pt}\n'
        output += '\\renewcommand{\\footrulewidth}{1pt}\n'

        # Headers and Footers
        if my_name:
            output += '\\rhead{{{0}}}\n'.format(my_name)
        elif args.name:
            output += '\\rhead{{{0}}}\n'.format(args.name)
        output += '\\rfoot{\\fontsize{8}{8} \\selectfont \\thepage}\n'
        if assignment:
            output += '\\lfoot{\\fontsize{8}{8} \\selectfont ' + assignment + '}\n'

        # Theorem definition
        output += '\n'
        output += replacer.theorem_def
        output += '\n'

        if asset_path:
            output += '\\graphicspath{'
            paths = asset_path.split(' ')
            for path in paths:
                output += '{{{0}}}'.format(path)
            output += '}\n'

        # Between "..begin preamble" and "..end preamble"
        if replacer.preamble:
            output += '% This part is unaffected by transcription\n\n'
            output += replacer.preamble
            output += '\n% End of unaffected portion\n\n'

    else:  # Between "..begin full preamble" and "..end full preamble"
        output += replacer.full_preamble + '\n'
        for pair in used_macros:
            output += '\\newcommand{' + pair[0] + '}{' + pair[1] + '}\n'
        for pair in used_renewed_commands:
            output += '\\renewcommand' + pair[0] + '{' + pair[1] + '}\n'

    output += "\\begin{document}\n\n"

    # Put back the content between "..begin ignore" and "..end ignore"
    for i in range(len(replacer.ignored_contents)):
        _content = _content.replace(
            '\\\\marker{0}//'.format(i), '% Ignored by transcriber\n' +
            replacer.ignored_contents[i] + '% End ignored region')

    # Main content
    output += _content

    # Final alignment if not "..align justify"
    end_align = replacer.end_align
    if end_align:
        output += '\n\\end{{{0}}}\n'.format(end_align)

    output += "\n\\end{document}\n"

    # Write to .tex file
    if not args.output:
        tex_file_name = file_name[:file_name.index('.')] + '.tex'
    else:
        tex_file_name = args.output + '.tex'
    with open(tex_file_name, 'w') as f:
        f.write(output)
    _print("Successfully transcribed to {0}!".format(tex_file_name))

    time_out = 5
    try:
        time_out = int(args.time)
    except Exception:
        pass

    try:
        compile_attempts = int(args.attempts)
    except Exception:
        pass

    if args.compile:
        try:
            for i in range(compile_attempts):
                exe = subprocess.run(['pdflatex', '{0}'.format(tex_file_name)],
                                     timeout=time_out,
                                     capture_output=True)
                _print("Successfully compiled to {0}.pdf!".format(
                    tex_file_name[:-4]))
        except Exception:
            _print("An error has occured. Please manually compile the file.")
    elif args.force_compile:
        try:
            for i in range(compile_attempts):
                exe = subprocess.run([
                    'pdflatex', '-interaction=nonstopmode',
                    '{0}'.format(tex_file_name)
                ],
                                     timeout=time_out,
                                     capture_output=True)
                _print("Forceably compiled to {0}.pdf!".format(
                    tex_file_name[:-4]))
        except Exception:
            _print("An error has occured. Please manually compile the file.")

    return tex_file_name[:-4]
Beispiel #7
0
 def __init__(self):
     Replacer.__init__(self)