Python Preprocessor Examples

Programming Language: Python

Namespace/Package Name: tools

Class/Type: Preprocessor

Examples at hotexamples.com: 10

Python Preprocessor - 10 examples found. These are the top rated real world Python examples of tools.Preprocessor extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Preprocessor(10)

write_temp_file(7)

__str__(1)

Example #1

Show file

File: preIsMinified.py Project: renesugar/jsNaughty

def processFile(js_file_path):
    
    pid = int(multiprocessing.current_process().ident)
    
    try:
        signal.alarm(600)
        
        prepro = Preprocessor(js_file_path)
        prepro.write_temp_file('tmp_%d.js' % pid)
            
        beauty = Beautifier()
        ok = beauty.run('tmp_%d.js' % pid, 'tmp_%d.b.js' % pid)
        
        if ok:
            mc = MiniChecker('tmp_%d.b.js' % pid)
            try:
                isMini = mc.compare(keep_mini=False)
            except Exception as e:
                isMini = str(e)
                
            cleanup(pid)
            return [os.path.basename(js_file_path), isMini]
        
        else:
            cleanup(pid)
            return [os.path.basename(js_file_path), 'Beautifier failed']
        
    except TimeExceededError:
        
        cleanup(pid)
        return [os.path.basename(js_file_path), 'Timeout']

Example #2

Show file

File: buildTestCorpus.py Project: renesugar/jsNaughty

def processFile(l):

    js_file_path = l[0]

    pid = int(multiprocessing.current_process().ident)

    try:
        # Temp files to be created during processing
        path_tmp = 'tmp_%d.js' % pid
        path_tmp_b = 'tmp_%d.b.js' % pid

        # Strip comments, replace literals, etc
        try:
            prepro = Preprocessor(os.path.join(corpus_root, js_file_path))
            prepro.write_temp_file(path_tmp)
        except:
            cleanup(pid)
            return (js_file_path, None, 'Preprocessor fail')

        # Pass through beautifier to fix layout
        clear = Beautifier()
        ok = clear.run(path_tmp, path_tmp_b)

        if not ok:
            cleanup(pid)
            return (js_file_path, None, 'Beautifier fail')

        try:
            iBuilder_clear = IndexBuilder(Lexer(path_tmp_b).tokenList)
        except:
            cleanup(pid)
            return (js_file_path, None, 'IndexBuilder fail')

        n_lines = len(iBuilder_clear.tokens)
        max_line_len = max([len(l) for l in iBuilder_clear.tokens])

        cleanup(pid)
        return (js_file_path, n_lines, max_line_len)

    except Exception, e:
        cleanup(pid)
        return (js_file_path, None, str(e))

Example #3

Show file

File: beautifyCorpus.py Project: renesugar/jsNaughty

def processFile(row):

    js_file_path = os.path.join(corpus_root, row[0])

    pid = int(multiprocessing.current_process().ident)
    base_name = os.path.splitext(os.path.basename(js_file_path))[0]

    # Temp files to be created during processing
    temp_files = {
        'path_tmp': 'tmp_%d.js' % pid,
        'path_tmp_b': 'tmp_%d.b.js' % pid,
        'path_tmp_b_a': 'tmp_%d.b.a.js' % pid,
        'path_tmp_u': 'tmp_%d.u.js' % pid,
        'path_tmp_u_a': 'tmp_%d.u.a.js' % pid
    }

    try:
        # Pass through beautifier to fix layout:
        #         # - once through JSNice without renaming
        #         jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename'])
        #
        #         (ok, _out, _err) = jsNiceBeautifier.run(js_file_path,
        #                                                temp_files['path_tmp'])
        #         if not ok:
        #             cleanup(temp_files)
        #             return (js_file_path, False, 'JSNice Beautifier fail')
        #
        #
        #         # Weird JSNice renamings despite --no-rename
        #         try:
        #             before = set([token for (token, token_type) in
        #                           Lexer(js_file_path).tokenList
        #                           if is_token_subtype(token_type, Token.Name)])
        #             after = set([token for (token, token_type) in
        #                           Lexer(temp_files['path_tmp']).tokenList
        #                           if is_token_subtype(token_type, Token.Name)])
        #
        #             if not before == after:
        #                 return (js_file_path, False, 'Weird JSNice renaming')
        #
        #         except:
        #             cleanup(temp_files)
        #             return (js_file_path, False, 'Lexer fail')

        # Strip comments, replace literals, etc
        try:
            prepro = Preprocessor(os.path.join(corpus_root, js_file_path))
            prepro.write_temp_file(temp_files['path_tmp'])
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'Preprocessor fail')

        # - and another time through uglifyjs pretty print only
        clear = Beautifier()
        ok = clear.run(temp_files['path_tmp'], temp_files['path_tmp_b'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, False, 'Beautifier fail')

        # Minify
        ugly = Uglifier()
        ok = ugly.run(temp_files['path_tmp_b'], temp_files['path_tmp_u'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, False, 'Uglifier fail')

        # Num tokens before vs after
        try:
            tok_clear = Lexer(temp_files['path_tmp_b']).tokenList
            tok_ugly = Lexer(temp_files['path_tmp_u']).tokenList
        except:
            cleanup(temp_files)
            return (js_file_path, False, 'Lexer fail')

        # For now only work with minified files that have
        # the same number of tokens as the originals
        if not len(tok_clear) == len(tok_ugly):
            cleanup(temp_files)
            return (js_file_path, False, 'Num tokens mismatch')

        # Align minified and clear files, in case the beautifier
        # did something weird
        try:
            aligner = Aligner()
            # This is already the baseline corpus, no (smart) renaming yet
            aligner.align(temp_files['path_tmp_b'], temp_files['path_tmp_u'])
        except:
            cleanup(temp_files)
            return (js_file_path, False, 'Aligner fail')

        # Check if minification resulted in any change
        # It's not very interesting otherwise
        if open(temp_files['path_tmp_b_a']).read() == \
                open(temp_files['path_tmp_u_a']).read():
            cleanup(temp_files)
            return (js_file_path, False, 'Not minified')

        try:
            lex_ugly = Lexer(temp_files['path_tmp_u_a'])
            _iBuilder_ugly = IndexBuilder(lex_ugly.tokenList)
        except:
            cleanup(temp_files)
            return (js_file_path, False, 'IndexBuilder fail')

        # Store original and uglified versions
        ok = clear.run(temp_files['path_tmp_b_a'],
                       os.path.join(output_path, '%s.js' % base_name))
        if not ok:
            cleanup(temp_files)
            cleanupProcessed(base_name)
            return (js_file_path, False, 'Beautifier fail')

        ok = clear.run(temp_files['path_tmp_u_a'],
                       os.path.join(output_path, '%s.u.js' % base_name))
        if not ok:
            cleanup(temp_files)
            cleanupProcessed(base_name)
            return (js_file_path, False, 'Beautifier fail')

        cleanup(temp_files)
        return (js_file_path, True, 'OK')

    except Exception, e:
        cleanup(temp_files)
        return (js_file_path, False, str(e))

Example #4

Show file

File: renameAndUglify.py Project: renesugar/jsNaughty

def processFile(l):
    
    js_file_path = l[0]
    
    if js_file_path in seen:
        return (js_file_path, None, 'Skipped')
    
    pid = int(multiprocessing.current_process().ident)
    
    # Temp files to be created during processing
    temp_files = {'path_tmp': 'tmp_%d.js' % pid,
                  'path_tmp_b': 'tmp_%d.b.js' % pid,
                  'path_tmp_b_n': 'tmp_%d.b.n.js' % pid,
                  'path_tmp_u': 'tmp_%d.u.js' % pid,
                  'path_tmp_u_n': 'tmp_%d.u.n.js' % pid,
                  'path_tmp_b_a': 'tmp_%d.b.a.js' % pid,
                  'path_tmp_u_a': 'tmp_%d.u.a.js' % pid}
    
    try:        
        # Strip comments, replace literals, etc
        try:
            prepro = Preprocessor(os.path.join(corpus_root, js_file_path))
            prepro.write_temp_file(temp_files['path_tmp'])
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'Preprocessor fail')
        
        
        # Pass through beautifier to fix layout:
        # - once through JSNice without renaming
#         jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename'])
#         
#         (ok, _out, _err) = jsNiceBeautifier.run(temp_files['path_tmp'], 
#                                                 temp_files['path_tmp_b_n'])
#         if not ok:
#             cleanup(temp_files)
#             return (js_file_path, None, 'JSNice Beautifier fail')
        
        
#         # - and another time through uglifyjs pretty print only 
#         clear = Beautifier()
#         ok = clear.run(temp_files['path_tmp_b_n'], 
#                        temp_files['path_tmp_b'])
#         if not ok:
#             cleanup(temp_files)
#             return (js_file_path, None, 'Beautifier fail')
        
#         # JSNice is down! 
        clear = Beautifier()
        ok = clear.run(temp_files['path_tmp'], 
                       temp_files['path_tmp_b_n'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Beautifier fail')
        # Normalize
        norm = Normalizer()
        ok = norm.run(os.path.join(os.path.dirname(os.path.realpath(__file__)), 
                                 temp_files['path_tmp_b_n']),
                      False, 
                      temp_files['path_tmp_b'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Normalizer fail')
        
        
        
        # Minify
        ugly = Uglifier()
        ok = ugly.run(temp_files['path_tmp_b'], 
                      temp_files['path_tmp_u_n'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Uglifier fail')
        # Normalize
        norm = Normalizer()
        ok = norm.run(os.path.join(os.path.dirname(os.path.realpath(__file__)), 
                                 temp_files['path_tmp_u_n']),
                      False, 
                      temp_files['path_tmp_u'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Normalizer fail')
        
        
        
        # Num tokens before vs after
        try:
            tok_clear = Lexer(temp_files['path_tmp_b']).tokenList
            tok_ugly = Lexer(temp_files['path_tmp_u']).tokenList
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'Lexer fail')
        
        # For now only work with minified files that have
        # the same number of tokens as the originals
        if not len(tok_clear) == len(tok_ugly):
            cleanup(temp_files)
            return (js_file_path, None, 'Num tokens mismatch')
        
        
        # Align minified and clear files, in case the beautifier 
        # did something weird
        try:
            aligner = Aligner()
            # This is already the baseline corpus, no (smart) renaming yet
            aligner.align(temp_files['path_tmp_b'], 
                          temp_files['path_tmp_u'])
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'Aligner fail')
        
        try:
            lex_clear = Lexer(temp_files['path_tmp_b_a'])
            iBuilder_clear = IndexBuilder(lex_clear.tokenList)
            
            lex_ugly = Lexer(temp_files['path_tmp_u_a'])
            iBuilder_ugly = IndexBuilder(lex_ugly.tokenList)
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'IndexBuilder fail')
        
        
        
        # Normalize
        norm = Normalizer()
        ok = norm.run(os.path.join(os.path.dirname(os.path.realpath(__file__)), 
                                 temp_files['path_tmp_b']),
                      True, 
                      temp_files['path_tmp_u_n'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Normalizer fail')
        
        try:
            lex_norm = Lexer(temp_files['path_tmp_u_n'])
            iBuilder_norm = IndexBuilder(lex_norm.tokenList)
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'IndexBuilder fail')
        
        normalized = []
        for line_idx, line in enumerate(iBuilder_norm.tokens):
            normalized.append(' '.join([t for (_tt,t) in line]) + "\n")
        
        
        
        # Compute scoping: name2scope is a dictionary where keys
        # are (name, start_index) tuples and values are scope identifiers. 
        # Note: start_index is a flat (unidimensional) index, 
        # not a (line_chr_idx, col_chr_idx) index.
        try:
            scopeAnalyst = ScopeAnalyst(os.path.join(
                                 os.path.dirname(os.path.realpath(__file__)), 
                                 temp_files['path_tmp_u_a']))
#             _name2defScope = scopeAnalyst.resolve_scope()
#             _isGlobal = scopeAnalyst.isGlobal
#             _name2useScope = scopeAnalyst.resolve_use_scope()
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'ScopeAnalyst fail')
        
        orig = []
        no_renaming = []
        
        for line_idx, line in enumerate(iBuilder_ugly.tokens):
            orig.append(' '.join([t for (_tt,t) in \
                                  iBuilder_clear.tokens[line_idx]]) + "\n")
            
            no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n")
            
#         # Simple renaming: disambiguate overloaded names using scope id
        basic_renaming = renameUsingScopeId(scopeAnalyst, 
                                            iBuilder_ugly)
        
        # More complicated renaming: collect the context around  
        # each name (global variables, API calls, punctuation)
        # and build a hash of the concatenation.
#         hash_renaming = renameUsingHashAllPrec(scopeAnalyst, 
#                                                 iBuilder_ugly,
#                                                 debug=True)
        
        hash_def_one_renaming = renameUsingHashDefLine(scopeAnalyst, 
                                                   iBuilder_ugly, 
                                                   twoLines=False,
                                                   debug=False)

        hash_def_two_renaming = renameUsingHashDefLine(scopeAnalyst, 
                                                    iBuilder_ugly, 
                                                    twoLines=True,
                                                    debug=False)

        cleanup(temp_files)
        return (js_file_path,
                orig, 
                no_renaming, 
                basic_renaming,
                normalized, 
#                 hash_renaming,
                hash_def_one_renaming,
                hash_def_two_renaming)
        
    except Exception, e:
        cleanup(temp_files)
        return (js_file_path, None, str(e))

Example #5

Show file

def detect_images(image_path, model_path, configfile, output_dir):
    # Load model
    configs = configparser.ConfigParser()
    configs.read(configfile)

    configs = configparser.ConfigParser()
    configs.read(configfile)

    try:
        input_shape = json.loads(configs['MODEL']['input_shape'])
        try:
            ratios = json.loads(configs['MODEL']['ratios'])
            scales = json.loads(configs['MODEL']['scales'])
        except Exception as e:
            print(e)
            print('USING DEFAULT RATIOS AND SCALES')
            ratios = None
            scales = None

    except:
        print("CONFIG FILE DOES NOT HAVE INPUT_SHAPE")
        sys.exit()

    preprocessor = Preprocessor(input_width=input_shape[2],
                                input_height=input_shape[1],
                                mean=np.array([[[0.485, 0.456, 0.406]]]),
                                std=np.array([[[0.229, 0.224, 0.225]]]))
    postprocessor = PostProcessor(ratios=ratios, scales=scales)

    # Get labelmap
    labels = load_classes_from_configfile(configfile)

    # GPU WARMUP
    img_name = [
        i for i in os.listdir(image_path) if i.endswith((
            '.jpg',
            '.png',
        ))
    ][0]
    image, image_orig, batch, scales = preprocess_image(
        os.path.join(image_path, img_name), preprocessor)
    gpu_warmup(image_orig, trt_engine_path=model_path)

    print('Getting engine')
    engine = get_engine(model_path)
    print('Engine retrieved')
    context = engine.create_execution_context()
    print('Execution context created')

    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
    print('Buffers allocated')

    input_shapes = []
    output_shapes = []

    for binding in engine:
        if engine.binding_is_input(binding):
            input_shapes.append(engine.get_binding_shape(binding))
        else:  # and one output
            output_shapes.append(engine.get_binding_shape(binding))

    print(f'INPUT SHAPES:{input_shapes}, OUTPUT SHAPES:{output_shapes}')

    if not os.path.exists(output_dir):
        os.makedirs(output_dir, exist_ok=True)

    assert (os.path.abspath(output_dir) != os.path.abspath(image_path))

    class_scores_list = {i: [] for i in labels.values()}
    for img_name in os.listdir(image_path):

        image, image_orig, batch, scales = preprocess_image(
            os.path.join(image_path, img_name), preprocessor)
        if (image is None):
            continue
        st = time.time()

        # TRT INFERENCE
        inputs[0].host = batch

        trt_outputs = common.do_inference(
            context=context,
            bindings=bindings,
            inputs=inputs,
            outputs=outputs,
            stream=stream,
            batch_size=1,
        )

        regression, classification = [
            output.reshape(shape)
            for output, shape in zip(trt_outputs, output_shapes)
        ]

        if torch.cuda.is_available():
            regression = torch.from_numpy(regression).cuda()
            classification = torch.from_numpy(classification).cuda()
        else:
            regression = torch.from_numpy(regression)
            classification = torch.from_numpy(classification)

        scores, classification, transformed_anchors = postprocessor(
            batch, regression, classification)

        print('Elapsed time: {}'.format(time.time() - st))
        idxs = np.where(scores.cpu() > 0.01)

        for j in range(idxs[0].shape[0]):
            label_name = labels[int(classification[idxs[0][j]])]
            score = scores[j]
            class_scores_list[label_name].append(score.cpu().numpy())

    # Get quartile distribution of all the scores.
    for label_name, scores_list in class_scores_list.items():
        print(f'{label_name} class has mean_score: {np.mean(scores_list)}, '
              f'0.25:{np.quantile(scores_list, 0.25)}, '
              f'0.5:{np.quantile(scores_list, 0.5)},'
              f'0.75:{np.quantile(scores_list, 0.75)}')

Example #6

Show file

File: runSimplifiedExperiment.py Project: renesugar/jsNaughty

def processFile(l):

    js_file_path = l[0]
    base_name = os.path.splitext(os.path.basename(js_file_path))[0]

    pid = int(multiprocessing.current_process().ident)

    temp_files = {
        'path_tmp': 'tmp_%d.js' % pid,
        'path_tmp_b': 'tmp_%d.b.js' % pid,
        'path_tmp_b_1': 'tmp_%d.b.1.js' % pid,
        'path_tmp_b_2': 'tmp_%d.b.2.js' % pid,
        'path_tmp_b_a': 'tmp_%d.b.a.js' % pid,
        'path_tmp_u': 'tmp_%d.u.js' % pid,
        'path_tmp_u_a': 'tmp_%d.u.a.js' % pid,
        'path_tmp_unugly': 'tmp_%d.n2p.js' % pid,
        'path_tmp_unugly_1': 'tmp_%d.n2p.1.js' % pid,
        'path_tmp_unugly_2': 'tmp_%d.n2p.2.js' % pid,
        'path_tmp_jsnice': 'tmp_%d.jsnice.js' % pid,
        'f2': 'tmp_%d.no_renaming.js' % pid,
        #                   'f3': 'tmp_%d.basic_renaming.js' % pid,
        #                   'f4': 'tmp_%d.hash_renaming.js' % pid,
        'f5': 'tmp_%d.hash_def_one_renaming.js' % pid,
        #                   'f6': 'tmp_%d.hash_def_two_renaming.js' % pid,
        'f7': 'tmp_%d.hash_def_one_renaming_fb.js' % pid,
        'path_orig': os.path.join(output_path, '%s.js' % base_name),
        'path_ugly': os.path.join(output_path, '%s.u.js' % base_name),
        'path_unugly': os.path.join(output_path, '%s.n2p.js' % base_name),
        'path_jsnice': os.path.join(output_path, '%s.jsnice.js' % base_name)
    }

    #     for strategy in ['js', 'lm.js', 'len.js', 'freqlen.js']:
    #         for renaming in ['no_renaming', 'hash_def_one_renaming']:
    #             temp_files['path_tmp_%s_%s' % (renaming, strategy)] = \
    #                     'tmp_%d.%s.%s' % (pid, renaming, strategy)

    candidates = []

    #     if True:
    try:

        # Strip comments, replace literals, etc
        try:
            prepro = Preprocessor(os.path.join(corpus_root, js_file_path))
            prepro.write_temp_file(temp_files['path_tmp'])
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'Preprocessor fail')

        # Pass through beautifier to fix layout
        clear = Beautifier()
        ok = clear.run(temp_files['path_tmp'], temp_files['path_tmp_b'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Beautifier fail')

#         # Pass through beautifier to fix layout
#         clear = Beautifier()
#         ok = clear.run(temp_files['path_tmp'],
#                        temp_files['path_tmp_b_1'])
#         if not ok:
#             cleanup(temp_files)
#             return (js_file_path, None, 'Beautifier fail')
#
#         jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename'])
#
#         (ok, _out, _err) = jsNiceBeautifier.run(temp_files['path_tmp_b_1'],
#                                                 temp_files['path_tmp_b_2'])
#         if not ok:
#             cleanup(temp_files)
#             print js_file_path, _err
#             return (js_file_path, None, 'JSNice Beautifier fail')
#
#         ok = clear.run(temp_files['path_tmp_b_2'],
#                        temp_files['path_tmp_b'])
#         if not ok:
#             cleanup(temp_files)
#             return (js_file_path, None, 'Beautifier fail')
#
#
#         # Weird JSNice renamings despite --no-rename
#         try:
#             before = set([token for (token, token_type) in
#                           Lexer(temp_files['path_tmp_b_1']).tokenList
#                           if is_token_subtype(token_type, Token.Name)])
#             after = set([token for (token, token_type) in
#                           Lexer(temp_files['path_tmp_b']).tokenList
#                           if is_token_subtype(token_type, Token.Name)])
#
#             if not before == after:
#                 return (js_file_path, None, 'Weird JSNice renaming')
#
#         except:
#             cleanup(temp_files)
#             return (js_file_path, None, 'Lexer fail')

# Minify
        ugly = Uglifier()
        ok = ugly.run(temp_files['path_tmp_b'], temp_files['path_tmp_u'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Uglifier fail')

        # Num tokens before vs after
        try:
            tok_clear = Lexer(temp_files['path_tmp_b']).tokenList
            tok_ugly = Lexer(temp_files['path_tmp_u']).tokenList
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'Lexer fail')

        # For now only work with minified files that have
        # the same number of tokens as the originals
        if not len(tok_clear) == len(tok_ugly):
            cleanup(temp_files)
            return (js_file_path, None, 'Num tokens mismatch')

        # Align minified and clear files, in case the beautifier
        # did something weird
        try:
            aligner = Aligner()
            # This is already the baseline corpus, no (smart) renaming yet
            aligner.align(temp_files['path_tmp_b'], temp_files['path_tmp_u'])
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'Aligner fail')



        if open(temp_files['path_tmp_b']).read() == \
                open(temp_files['path_tmp_u']).read():
            cleanup(temp_files)
            return (js_file_path, None, 'Not minified')

        try:
            lex_ugly = Lexer(temp_files['path_tmp_u_a'])
            iBuilder_ugly = IndexBuilder(lex_ugly.tokenList)
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'IndexBuilder fail')

        ############################################################
        # From now on only work with path_tmp_b_a and path_tmp_u_a
        ############################################################

        # Store original and uglified versions
        ok = clear.run(temp_files['path_tmp_b_a'], temp_files['path_orig'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Beautifier fail')

        ok = clear.run(temp_files['path_tmp_u_a'], temp_files['path_ugly'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Beautifier fail')

        # Run the JSNice from http://www.nice2predict.org
        unuglifyJS = UnuglifyJS()
        (ok, _out, _err) = unuglifyJS.run(temp_files['path_tmp_u_a'],
                                          temp_files['path_tmp_unugly'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Nice2Predict fail')

        ok = clear.run(temp_files['path_tmp_unugly'],
                       temp_files['path_unugly'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Beautifier fail')

#         ok = clear.run(temp_files['path_tmp_unugly'],
#                        temp_files['path_tmp_unugly_1'])
#         if not ok:
#             cleanup(temp_files)
#             return (js_file_path, None, 'Beautifier fail')
#
#         (ok, _out, _err) = jsNiceBeautifier.run(temp_files['path_tmp_unugly_1'],
#                                                 temp_files['path_tmp_unugly_2'])
#         if not ok:
#             cleanup(temp_files)
#             print js_file_path, _err
#             return (js_file_path, None, 'JSNice Beautifier fail')
#
#         ok = clear.run(temp_files['path_tmp_unugly_2'],
#                        temp_files['path_unugly'])
#         if not ok:
#             cleanup(temp_files)
#             return (js_file_path, None, 'Beautifier fail')

        try:
            lexer = Lexer(temp_files['path_unugly'])
            iBuilder = IndexBuilder(lexer.tokenList)
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'IndexBuilder fail')

        try:
            scopeAnalyst = ScopeAnalyst(
                os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             temp_files['path_unugly']))
            nameOrigin = scopeAnalyst.nameOrigin
            isGlobal = scopeAnalyst.isGlobal

            for (name, def_scope) in nameOrigin.iterkeys():

                pos = scopeAnalyst.nameDefScope2pos[(name, def_scope)]
                (lin, col) = iBuilder.revFlatMat[pos]
                (tok_lin, tok_col) = iBuilder.revTokMap[(lin, col)]

                candidates.append(('Nice2Predict', def_scope, tok_lin, tok_col,
                                   isGlobal.get((name, pos),
                                                True), name, '', ''))
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'ScopeAnalyst fail')

#         # Run the JSNice from http://www.jsnice.org
#         jsNice = JSNice()
#         (ok, _out, _err) = jsNice.run(temp_files['path_tmp_u_a'],
#                                       temp_files['path_tmp_jsnice'])
#         if not ok:
#             cleanup(temp_files)
#             return (js_file_path, None, 'JSNice fail')
#
#         ok = clear.run(temp_files['path_tmp_jsnice'],
#                        temp_files['path_jsnice'])
#         if not ok:
#             cleanup(temp_files)
#             return (js_file_path, None, 'Beautifier fail')
#
#         try:
#             lexer = Lexer(temp_files['path_jsnice'])
#             iBuilder = IndexBuilder(lexer.tokenList)
#         except:
#             cleanup(temp_files)
#             return (js_file_path, None, 'IndexBuilder fail')
#
#         try:
#             scopeAnalyst = ScopeAnalyst(os.path.join(
#                                  os.path.dirname(os.path.realpath(__file__)),
#                                  temp_files['path_jsnice']))
#             nameOrigin = scopeAnalyst.nameOrigin
#             isGlobal = scopeAnalyst.isGlobal
#
#             for (name, def_scope) in nameOrigin.iterkeys():
#
#                 pos = scopeAnalyst.nameDefScope2pos[(name, def_scope)]
#                 (lin,col) = iBuilder.revFlatMat[pos]
#                 (tok_lin,tok_col) = iBuilder.revTokMap[(lin,col)]
#
#                 candidates.append(('JSNice', def_scope,
#                                    tok_lin, tok_col,
#                                    isGlobal.get((name, pos), True),
#                                    name, '',''))
#         except:
#             cleanup(temp_files)
#             return (js_file_path, None, 'ScopeAnalyst fail')

# Compute scoping: name2scope is a dictionary where keys
# are (name, start_index) tuples and values are scope identifiers.
# Note: start_index is a flat (unidimensional) index,
# not a (line_chr_idx, col_chr_idx) index.
        try:
            scopeAnalyst = ScopeAnalyst(
                os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             temp_files['path_tmp_u_a']))
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'ScopeAnalyst fail')

        # Baseline translation: No renaming, no scoping
        no_renaming = []
        for _line_idx, line in enumerate(iBuilder_ugly.tokens):
            no_renaming.append(' '.join([t for (_tt, t) in line]) + "\n")

        with open(temp_files['f2'], 'w') as f_no_renaming:
            f_no_renaming.writelines(no_renaming)

        moses = MosesDecoder(ini_path=os.path.join(ini_path, \
                           'train.no_renaming', 'tuning', 'moses.ini'))
        (_moses_ok, translation_no_renaming,
         _err) = moses.run(temp_files['f2'])

        nc = processTranslationUnscoped(translation_no_renaming, iBuilder_ugly,
                                        lm_path, temp_files['f2'], output_path,
                                        base_name)
        if nc:
            candidates += nc

#  translation, iBuilder, lm_path,
#                                f_path, output_path, base_name
# Default translation: No renaming
#         no_renaming = []
#         for _line_idx, line in enumerate(iBuilder_ugly.tokens):
#             no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n")
#
#         with open(temp_files['f2'], 'w') as f_no_renaming:
#             f_no_renaming.writelines(no_renaming)
#
#         moses = MosesDecoder(ini_path=os.path.join(ini_path, \
#                            'train.no_renaming', 'tuning', 'moses.ini'))
#         (_moses_ok, translation, _err) = moses.run(temp_files['f2'])

        nc = processTranslationScoped(translation_no_renaming, iBuilder_ugly,
                                      scopeAnalyst, lm_path, temp_files['f2'],
                                      output_path, base_name)
        if nc:
            candidates += nc

        # More complicated renaming: collect the context around
        # each name (global variables, API calls, punctuation)
        # and build a hash of the concatenation.
        hash_def_one_renaming = renameUsingHashDefLine(scopeAnalyst,
                                                       iBuilder_ugly,
                                                       twoLines=False,
                                                       debug=False)
        with open(temp_files['f5'], 'w') as f_hash_def_one_renaming:
            f_hash_def_one_renaming.writelines(hash_def_one_renaming)

#        moses = MosesDecoder(ini_path=os.path.join(ini_path, \
#                           'train.hash_def_one_renaming', 'tuning', 'moses.ini'))
#        (_moses_ok,
#            translation_hash_renaming,
#            _err) = moses.run(temp_files['f5'])

        mosesParams = {}
        mosesParams["text"] = hash_def_one_renaming  #lex_ugly.collapsedText
        #mosesParams["align"] = "true"
        #mosesParams["report-all-factors"] = "true"

        mresults = proxy.translate(
            mosesParams)  # __request("translate", mosesParams)
        rawText = Postprocessor(mresults["nbest"])
        translation_hash_renaming = rawText.getProcessedOutput()

        nc = processTranslationScoped(translation_hash_renaming, iBuilder_ugly,
                                      scopeAnalyst, lm_path, temp_files['f5'],
                                      output_path, base_name)
        if nc:
            candidates += nc


#        nc = processTranslationScopedFallback(translation_hash_renaming,
#                                              translation_no_renaming,
#                                              iBuilder_ugly,
#                                              scopeAnalyst,
#                                              lm_path,
#                                              temp_files['f7'],
#                                              output_path,
#                                              base_name)
#        if nc:
#            candidates += nc

        cleanup(temp_files)
        cleanupRenamed(pid)
        return (js_file_path, 'OK', candidates)

    except Exception, e:
        cleanup(temp_files)
        cleanupRenamed(pid)
        return (js_file_path, None, str(e).replace("\n", ""))

Example #7

Show file

def processFile(l):
    
    def localCleanup(output_path, base_names):
        for base_name in base_names:
            tryRemove(os.path.join(output_path, base_name))
    
    js_file_path = l[0]
    base_name = os.path.splitext(os.path.basename(js_file_path))[0]
    
    pid = int(multiprocessing.current_process().ident)

    candidates = []
    
    try:
#     if True:
        # Temp files to be created during processing
        path_tmp = 'tmp_%d.js' % (pid)
        path_tmp_b = 'tmp_%d.b.js' % (pid)
        path_tmp_b_a = 'tmp_%d.b.a.js' % (pid)
        path_tmp_u = 'tmp_%d.u.js' % (pid)
        path_tmp_u_a = 'tmp_%d.u.a.js' % (pid)
        path_tmp_unugly = 'tmp_%d.n2p.js' % (pid)
        path_tmp_jsnice = 'tmp_%d.jsnice.js' % (pid)
        
        f2 = 'tmp_%d.no_renaming.js' % (pid)
        f3 = 'tmp_%d.basic_renaming.js' % (pid)
        f4 = 'tmp_%d.hash_renaming.js' % (pid)
        f5 = 'tmp_%d.hash_def_one_renaming.js' % (pid)
        f6 = 'tmp_%d.hash_def_two_renaming.js' % (pid)
        
        path_orig = '%s.js' % (base_name)
        path_ugly = '%s.u.js' % (base_name)
        path_unugly = '%s.n2p.js' % (base_name)
        path_jsnice = '%s.jsnice.js' % (base_name)
        
        # Strip comments, replace literals, etc
        try:
            prepro = Preprocessor(os.path.join(corpus_root, js_file_path))
            prepro.write_temp_file(path_tmp)
        except:
            cleanup(pid)
            return (js_file_path, None, 'Preprocessor fail')
        
        # Pass through beautifier to fix layout
        clear = Beautifier()
        ok = clear.run(path_tmp, path_tmp_b+'.tmp1')
        if not ok:
            cleanup(pid)
            return (js_file_path, None, 'Beautifier 1 fail')
         
        jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename'])
        
        (ok, _out, _err) = jsNiceBeautifier.run(path_tmp_b+'.tmp1', path_tmp_b+'.tmp2')
        if not ok:
            cleanup(pid)
            return (js_file_path, None, 'JSNice Beautifier 1 fail')

        ok = clear.run(path_tmp_b+'.tmp2', path_tmp_b)
        if not ok:
            cleanup(pid)
            return (js_file_path, None, 'Beautifier 1 fail')
         
        # Minify
        ugly = Uglifier()
        ok = ugly.run(path_tmp_b, path_tmp_u)
        
        if not ok:
            cleanup(pid)
            return (js_file_path, None, 'Uglifier fail')
        
        # Num tokens before vs after
        try:
            tok_clear = Lexer(path_tmp_b).tokenList
            tok_ugly = Lexer(path_tmp_u).tokenList
        except:
            cleanup(pid)
            return (js_file_path, None, 'Lexer fail')
       
        # For now only work with minified files that have
        # the same number of tokens as the originals
        if not len(tok_clear) == len(tok_ugly):
            cleanup(pid)
            return (js_file_path, None, 'Num tokens mismatch')
        
        # Align minified and clear files, in case the beautifier 
        # did something weird
        try:
            aligner = Aligner()
            # This is already the baseline corpus, no (smart) renaming yet
            aligner.align(path_tmp_b, path_tmp_u)
        except:
            cleanup(pid)
            return (js_file_path, None, 'Aligner fail')
        
        try:
#             iBuilder_clear = IndexBuilder(Lexer(path_tmp_b_a).tokenList)
            iBuilder_ugly = IndexBuilder(Lexer(path_tmp_u_a).tokenList)
        except:
            cleanup(pid)
            return (js_file_path, None, 'IndexBuilder fail')
        
        
        # Store original and uglified versions
        ok = clear.run(path_tmp_u_a, os.path.join(output_path, path_ugly))
        if not ok:
            cleanup(pid)
            localCleanup(output_path, [path_ugly])
            return (js_file_path, None, 'Beautifier 2 fail')
        
        ok = clear.run(path_tmp_b_a, os.path.join(output_path, path_orig))
        if not ok:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig])
            return (js_file_path, None, 'Beautifier 3 fail')
        
        
        # Run the JSNice from http://www.nice2predict.org
        unuglifyJS = UnuglifyJS()
        (ok, _out, _err) = unuglifyJS.run(path_tmp_b_a, path_tmp_unugly)
        if not ok:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig])
            return (js_file_path, None, 'Nice2Predict fail')
        
        ok = clear.run(path_tmp_unugly, path_tmp_unugly+'.tmp1')
        if not ok:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig, path_unugly])
            return (js_file_path, None, 'Beautifier 4 fail')
        
        (ok, _out, _err) = jsNiceBeautifier.run(path_tmp_unugly+'.tmp1', path_tmp_unugly+'.tmp2')
        if not ok:
            cleanup(pid)
            return (js_file_path, None, 'JSNice Beautifier 2 fail')
    
        ok = clear.run(path_tmp_unugly+'.tmp2', os.path.join(output_path, path_unugly))
        if not ok:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig, path_unugly])
            return (js_file_path, None, 'Beautifier 4 fail')

        try:
            scopeAnalyst = ScopeAnalyst(os.path.join(
                                 os.path.dirname(os.path.realpath(__file__)), 
                                 path_tmp_unugly))
            nameOrigin = scopeAnalyst.nameOrigin
            for (name, def_scope) in nameOrigin.iterkeys():
                candidates.append(('Nice2Predict', def_scope, name, '', ''))
        except:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig, path_unugly])
            return (js_file_path, None, 'ScopeAnalyst fail')
    
    
    
        # Run the JSNice from http://www.jsnice.org
        jsNice = JSNice()
        (ok, _out, _err) = jsNice.run(path_tmp_b_a, path_tmp_jsnice)
        if not ok:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig, path_unugly])
            return (js_file_path, None, 'JSNice fail')

        ok = clear.run(path_tmp_jsnice, os.path.join(output_path, path_jsnice))
        if not ok:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig, \
                                       path_unugly, path_jsnice])
            return (js_file_path, None, 'Beautifier 5 fail')
        
        try:
            scopeAnalyst = ScopeAnalyst(os.path.join(
                                 os.path.dirname(os.path.realpath(__file__)), 
                                 path_tmp_jsnice))
            nameOrigin = scopeAnalyst.nameOrigin
            for (name, def_scope) in nameOrigin.iterkeys():
                candidates.append(('JSNice', def_scope, name, '', ''))
        except:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig, \
                                       path_unugly, path_jsnice])
            return (js_file_path, None, 'ScopeAnalyst fail')
        
        
        
        # Compute scoping: name2scope is a dictionary where keys
        # are (name, start_index) tuples and values are scope identifiers. 
        # Note: start_index is a flat (unidimensional) index, 
        # not a (line_chr_idx, col_chr_idx) index.
        try:
            scopeAnalyst = ScopeAnalyst(os.path.join(
                                 os.path.dirname(os.path.realpath(__file__)), 
                                 path_tmp_u_a))
            _name2defScope = scopeAnalyst.resolve_scope()
            _isGlobal = scopeAnalyst.isGlobal
            _name2useScope = scopeAnalyst.resolve_use_scope()
        except:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig, \
                                       path_unugly, path_jsnice])
            return (js_file_path, None, 'ScopeAnalyst fail')
        
        
        no_renaming = []
        for _line_idx, line in enumerate(iBuilder_ugly.tokens):
            no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n")
        
        with open(f2, 'w') as f_no_renaming:
            f_no_renaming.writelines(no_renaming)
        
        moses = MosesDecoder(ini_path=os.path.join(ini_path, \
                           'train.no_renaming', 'tuning', 'moses.ini'))
        (_moses_ok, translation, _err) = moses.run(f2)

        nc = processTranslation(translation, iBuilder_ugly, 
                       scopeAnalyst, lm_path, f2,
                       output_path, base_name, clear)
        if nc:
            candidates += nc
        
        
        # Simple renaming: disambiguate overloaded names using scope id
        basic_renaming = renameUsingScopeId(scopeAnalyst, iBuilder_ugly)
        with open(f3, 'w') as f_basic_renaming:
            f_basic_renaming.writelines(basic_renaming)
        
        moses = MosesDecoder(ini_path=os.path.join(ini_path, \
                           'train.basic_renaming', 'tuning', 'moses.ini'))
        (_moses_ok, translation, _err) = moses.run(f3)
        nc = processTranslation(translation, iBuilder_ugly, 
                       scopeAnalyst, lm_path, f3,
                       output_path, base_name, clear)
        if nc:
            candidates += nc
            
        
        # More complicated renaming: collect the context around  
        # each name (global variables, API calls, punctuation)
        # and build a hash of the concatenation.
        hash_renaming = renameUsingHashAllPrec(scopeAnalyst, 
                                               iBuilder_ugly,
                                               debug=False)
#         print hash_renaming
        with open(f4, 'w') as f_hash_renaming:
            f_hash_renaming.writelines(hash_renaming)
        
        moses = MosesDecoder(ini_path=os.path.join(ini_path, \
                           'train.hash_renaming', 'tuning', 'moses.ini'))
        (_moses_ok, translation, _err) = moses.run(f4)
        
        nc = processTranslation(translation, iBuilder_ugly, 
                       scopeAnalyst, lm_path, f4,
                       output_path, base_name, clear)
        if nc:
            candidates += nc
        
        hash_def_one_renaming = renameUsingHashDefLine(scopeAnalyst, 
                                                   iBuilder_ugly, 
                                                   twoLines=False,
                                                   debug=False)
        with open(f5, 'w') as f_hash_def_one_renaming:
            f_hash_def_one_renaming.writelines(hash_def_one_renaming)

        moses = MosesDecoder(ini_path=os.path.join(ini_path, \
                           'train.hash_def_one_renaming', 'tuning', 'moses.ini'))
        (_moses_ok, translation, _err) = moses.run(f5)
        
        nc = processTranslation(translation, iBuilder_ugly, 
                       scopeAnalyst, lm_path, f5,
                       output_path, base_name, clear)
        if nc:
            candidates += nc
            

        hash_def_two_renaming = renameUsingHashDefLine(scopeAnalyst, 
                                                   iBuilder_ugly, 
                                                   twoLines=True,
                                                   debug=False)
        with open(f6, 'w') as f_hash_def_two_renaming: 
            f_hash_def_two_renaming.writelines(hash_def_two_renaming)
        
        moses = MosesDecoder(ini_path=os.path.join(ini_path, \
                           'train.hash_def_two_renaming', 'tuning', 'moses.ini'))
        (_moses_ok, translation, _err) = moses.run(f6)
        
        nc = processTranslation(translation, iBuilder_ugly, 
                       scopeAnalyst, lm_path, f6,
                       output_path, base_name, clear)
        if nc:
            candidates += nc
            
        
        cleanup(pid)
        cleanupRenamed(pid)
        return (js_file_path, 'OK', candidates)


    except Exception, e:
        cleanup(pid)
        cleanupRenamed(pid)
        return (js_file_path, None, str(e).replace("\n", ""))

Example #8

Show file

def detect_images(image_path, model_path, configfile, output_dir):
    # Load model
    configs = configparser.ConfigParser()
    configs.read(configfile)

    try:
        input_shape = json.loads(configs['MODEL']['input_shape'])
        try:
            ratios = json.loads(configs['MODEL']['ratios'])
            scales = json.loads(configs['MODEL']['scales'])
        except Exception as e:
            print(e)
            print('USING DEFAULT RATIOS AND SCALES')
            ratios = None
            scales = None

    except:
        print("CONFIG FILE DOES NOT HAVE INPUT_SHAPE")
        sys.exit()

    sess, output_names, input_names = load_onnx_model(model_path)

    preprocessor = Preprocessor(input_width=input_shape[2],
                                input_height=input_shape[1],
                                mean=np.array([[[0.485, 0.456, 0.406]]]),
                                std=np.array([[[0.229, 0.224, 0.225]]]))
    postprocessor = PostProcessor(ratios=ratios, scales=scales)

    # Get labelmap
    labels = load_classes_from_configfile(configfile)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir, exist_ok=True)

    assert (os.path.abspath(output_dir) != os.path.abspath(image_path))

    for img_name in os.listdir(image_path):

        image = cv2.imread(os.path.join(image_path, img_name))
        if image is None:
            continue

        image_orig = image.copy()
        image, scales = preprocessor(image)
        image = np.expand_dims(image, 0)
        image = np.transpose(image, (0, 3, 1, 2))
        image = image.astype(np.float32)
        batch = image.copy()
        st = time.time()
        regression, classification = sess.run(output_names,
                                              {input_names[0]: batch})
        if torch.cuda.is_available():
            regression = torch.from_numpy(regression).cuda()
            classification = torch.from_numpy(classification).cuda()
        else:
            regression = torch.from_numpy(regression)
            classification = torch.from_numpy(classification)

        scores, classification, transformed_anchors = postprocessor(
            batch, regression, classification)

        print('Elapsed time: {}'.format(time.time() - st))
        idxs = np.where(scores.cpu() > 0.35)

        for j in range(idxs[0].shape[0]):
            bbox = transformed_anchors[idxs[0][j], :]

            x1 = int(bbox[0] / scales[0])
            y1 = int(bbox[1] / scales[1])
            x2 = int(bbox[2] / scales[0])
            y2 = int(bbox[3] / scales[1])
            label_name = labels[int(classification[idxs[0][j]])]
            score = scores[j]
            caption = '{} {:.3f}'.format(label_name, score)
            draw_caption(image_orig, (x1, y1, x2, y2), caption)
            cv2.rectangle(image_orig, (x1, y1), (x2, y2),
                          color=(0, 0, 255),
                          thickness=2)

        cv2.imwrite(os.path.join(output_dir, img_name), image_orig)

Example #9

Show file

File: hashRenaming.py Project: renesugar/jsNaughty

                  out_file_path):
    
    js_tmp = open(out_file_path, 'w')
    js_tmp.write('\n'.join([' '.join([token for (_token_type, token) in line]) 
                            for line in lines]).encode('utf8'))
    js_tmp.write('\n')
    js_tmp.close()


    
input_file = os.path.abspath(sys.argv[1])
output_file = os.path.abspath(sys.argv[2])
mode = int(sys.argv[3])


prepro = Preprocessor(input_file)
prepro.write_temp_file('tmp.js')

clear = Beautifier()
ok = clear.run('tmp.js', 
               'tmp.b.js')
  
lexer = Lexer('tmp.b.js')
iBuilder = IndexBuilder(lexer.tokenList)

scopeAnalyst = ScopeAnalyst(os.path.join(
                         os.path.dirname(os.path.realpath(__file__)), 
                         'tmp.b.js'))

hash_renaming = renameUsingHashDefLine(scopeAnalyst, 
                                   iBuilder,

Example #10

Show file

@author: caseycas
'''

import xmlrpclib
from tools import Preprocessor
from tools.postprocessor import Postprocessor

proxy = xmlrpclib.ServerProxy("http://godeep.cs.ucdavis.edu:8080/RPC2")

mosesParams = {}

input_path = "/Users/caseycas/jsnaughty/data/js_files.sample/98440.js"

# Strip comments, replace literals, etc
try:
    prepro = Preprocessor(input_path)
    #prepro.write_temp_file(output_path)
except:
    print("Preprocessor failed")
    quit()
    
#print(prepro.__str__())
#quit()

#mosesParams["text"] = "var m = [ ]"
mosesParams["text"] = prepro.__str__()
mosesParams["align"] = "true"
mosesParams["report-all-factors"] = "true"


results = proxy.translate(mosesParams)# __request("translate", mosesParams)