def test_jquery(self):
        source, minified, min_map = self.get_fixtures('jquery')

        source_lines = source.splitlines()

        assert sourcemap.discover(minified) == 'jquery.min.map'

        index = sourcemap.loads(min_map)
        assert index.raw == json.loads(min_map)
        for token in index:
            # Ignore tokens that are None.
            # There's no simple way to verify they're correct
            if token.name is None:
                continue
            source_line = source_lines[token.src_line]
            start = token.src_col
            end = start + len(token.name)
            substring = source_line[start:end]

            # jQuery's sourcemap has a few tokens that are identified
            # incorrectly.
            # For example, they have a token for 'embed', and
            # it maps to '"embe', which is wrong. This only happened
            # for a few strings, so we ignore
            if substring[0] == '"':
                continue
            assert token.name == substring
Example #2
0
def discover_sourcemap(result):
    """
    Given a UrlResult object, attempt to discover a sourcemap.
    """
    # First, check the header
    smap = result.headers.get('SourceMap', result.headers.get('X-SourceMap'))
    if not smap:
        smap = sourcemap.discover(result.body)
    return smap
Example #3
0
def discover_sourcemap(result):
    """
    Given a UrlResult object, attempt to discover a sourcemap.
    """
    # When coercing the headers returned by urllib to a dict
    # all keys become lowercase so they're normalized
    map_path = result.headers.get('sourcemap', result.headers.get('x-sourcemap'))
    if not map_path:
        map_path = sourcemap.discover(result.body)

    if map_path:
        # ensure url is absolute
        map_path = urljoin(result.url, map_path)

    return map_path
    def test_coolstuff(self):
        source, minified, min_map = self.get_fixtures('coolstuff')

        source_lines = source.splitlines()

        assert sourcemap.discover(minified) == 'tests/fixtures/coolstuff.min.map'

        index = sourcemap.loads(min_map)
        assert index.raw == json.loads(min_map)
        for token in index:
            if token.name is None:
                continue

            source_line = source_lines[token.src_line]
            start = token.src_col
            end = start + len(token.name)
            substring = source_line[start:end]
            assert token.name == substring
 def assertFoundSourcemap(self, fixture, expected):
     self.assertEqual(sourcemap.discover(fixture), expected)
 def assertNotFoundSourcemap(self, fixture):
     self.assertIsNone(sourcemap.discover(fixture))
Example #7
0
def processFile(js_file_path):

    js_file_path = os.path.abspath(js_file_path)

    print 'READING:', js_file_path

    acorn = Acorn()
    (_stdout, acorn_ok) = acorn.run(js_file_path)
    print 'RUNNING Acorn:', acorn_ok

    # Load in the minified file
    minified = open(js_file_path).read()

    b = Beautifier()
    (ok, out, err) = b.web_run(minified)
    #     print out

    # Create lexer
    lexer = get_lexer_for_filename(js_file_path)

    # Tokenize input and compute mappings between the different
    # indices used: (line, col), flat, (l,c) in token list
    indexBuilder = IndexBuilder(lex(minified, lexer))
    tokens = indexBuilder.tokens
    print 'RUNNING IndexBuilder:', len(tokens) > 0

    #nice1 = JSNice()
    #(ok, _out, _err) = nice1.run(js_file_path)
    #print 'RUNNING JSNice:', ok

    #nice2 = UnuglifyJS()
    #(ok, _out, _err) = nice2.run(js_file_path)
    #print 'RUNNING UnuglifyJS:', ok

    _pid = multiprocessing.current_process().ident

    # Compute scoping: name2scope is a dictionary where keys
    # are (name, start_index) tuples and values are scope identifiers.
    # Note: start_index is a flat (unidimensional) index,
    # not a (line_chr_idx, col_chr_idx) index.
    #     scopeAnalyst = ScopeAnalyst(js_file_path)
    #     name2defScope = scopeAnalyst.resolve_scope()
    #     isGlobal = scopeAnalyst.isGlobal

    scopeAnalyst = WebScopeAnalyst(minified)
    name2defScope = scopeAnalyst.resolve_scope()
    isGlobal = scopeAnalyst.isGlobal

    print 'RUNNING ScopeAnalyst:', len(name2defScope) > 0

    name2useScope = scopeAnalyst.name2useScope
    name2pth = scopeAnalyst.name2pth
    nameOrigin = scopeAnalyst.nameOrigin

    scopes = set(name2useScope.values())

    for scope in scopes:
        print scope
        lc_list = [
            indexBuilder.revTokMap[indexBuilder.revFlatMat[pos]]
            for (t, pos) in name2useScope.keys()
            if name2useScope[(t, pos)] == scope
        ]
        highlight(tokens, lc_list)
        print

    # Discover the path to the source map
    _map_path = sourcemap.discover(minified)
    # Read and parse our sourcemap
    #     sourcemapIndex = sourcemap.load(open(map_path))

    # Cluster names by scope
    nameScope2Positions = {}

    # Index data by (name,scope)
    for token, l in indexBuilder.name2CharPositions.iteritems():
        for (line, col) in sorted(l, key=lambda (a, b): (a, b)):
            pos = indexBuilder.flatMap[(line, col)]
            if name2defScope.has_key((token, pos)):
                scope = name2defScope[(token, pos)]
                use_scope = name2useScope[(token, pos)]
                pth = name2pth[(token, pos)]

                glb = isGlobal[(token, pos)]

                nameScope2Positions.setdefault((token, scope, glb), [])
                nameScope2Positions[(token, scope, glb)].append((line, col))

#                 print token, pos
#                 print 'def:', scope
#                 print 'use:', use_scope
#                 print 'pth:', pth
#                 highlight(tokens, [indexBuilder.revTokMap[indexBuilder.revFlatMat[pos]]])
#                 print


    for (token,scope,glb), positions in sorted(nameScope2Positions.iteritems(), \
                                           key=lambda (x,y):x[0]):

        if glb:
            continue

        pos = sorted(positions, key=lambda e: (e[0], e[1]))
        #         t = []
        tt = []
        line_tok_idxs = set([])
        for (l, c) in pos:
            #             orig = sourcemapIndex.lookup(line=l, column=c).name
            (tl, tc) = indexBuilder.revTokMap[(l, c)]
            line_tok_idxs.add(tl)
            p = indexBuilder.flatMap[(l, c)]
            tt.append(((tl, tc), p))
#             t.append(orig)

#         if token == 'n':
        print '\nNAME:', token.encode('utf-8'), 'isGlobal =', glb
        #         print scope
        #         highlight(tokens, [indexBuilder.revTokMap[indexBuilder.revFlatMat[pos]]])

        for ((tli, tci), p) in tt:
            scope = name2defScope[(token, p)]
            use_scope = name2useScope[(token, p)]
            pth = name2pth[(token, p)]
            origin = nameOrigin[(token, scope)]
#             print token #, p, origin
#             print
#             print 'def:', scope
#             print 'use:', use_scope
#             print 'pth:', pth
#             print

        for tl in sorted(set([tli for ((tli, tci), p) in tt])):
            l = list(tokens[tl])
            for tc in [tci for ((tli, tci), p) in tt if tli == tl]:
                l[tc] = (l[tc][0], unichr(0x2588) + token + unichr(0x2588))


#                 pos = indexBuilder.flatMap[(line,col)]

            print '  ', '%d:' % (tl + 1), ' '.join(
                [x[1].encode('utf-8') for x in l])

        print

    return
Example #8
0
    def compare(self, mini_js_path=None, keep_mini=True):
        pid = int(multiprocessing.current_process().ident)

        lexer = get_lexer_for_filename(self.js_path)

        # before
        tmp_b = open(self.js_path, 'r').read()
        tokens_b = list(lex(tmp_b, lexer))

        # Discover the path to the source map
        map_path = sourcemap.discover(tmp_b)
        if map_path is not None:
            # The file couldn't have a source map unless it is already minified
            return True

        # after
        if mini_js_path is None:
            uglifier = Uglifier()
            mini_js_path = os.path.abspath('tmp_%d.u.js' % pid)
            uglifyjs_ok = uglifier.run(self.js_path, mini_js_path)
            if not uglifyjs_ok:
                raise Exception, 'Uglifier failed'

        uglified = open(mini_js_path, 'r').read()
        tokens_u = list(lex(uglified, lexer))  # returns a generator of tuples

        if not len(tokens_b) == len(tokens_u):
            if not keep_mini:
                remove_file(mini_js_path)
            raise Exception, 'Different number of tokens'

        clean_names = [
            token for (token_type, token) in tokens_b
            if is_token_subtype(token_type, Token.Name)
        ]

        ugly_names = [
            token for (token_type, token) in tokens_u
            if is_token_subtype(token_type, Token.Name)
        ]

        same = [
            idx for (idx, token) in enumerate(clean_names)
            if ugly_names[idx] == token
        ]

        clean_names_n = [
            token for (idx, token) in enumerate(clean_names) if idx not in same
        ]
        ugly_names_n = [
            token for (idx, token) in enumerate(ugly_names) if idx not in same
        ]

        if not clean_names_n:
            if not keep_mini:
                remove_file(mini_js_path)
            return False

        if sum([len(v) for v in clean_names_n]) <= \
                sum([len(v) for v in ugly_names_n]):
            if not keep_mini:
                remove_file(mini_js_path)
            return True

        if not keep_mini:
            remove_file(mini_js_path)
        return False
Example #9
0
def processFile(js_file_path):

    # Load in the minified file
    minified = open(js_file_path).read()

    # Create lexer
    lexer = get_lexer_for_filename(js_file_path)

    # Tokenize input and compute mappings between the different
    # indices used: (line, col), flat, (l,c) in token list
    indexBuilder = IndexBuilder(lex(minified, lexer))
    tokens = indexBuilder.tokens
    #    print 'RUNNING IndexBuilder:', len(tokens)>0

    # Compute scoping: name2scope is a dictionary where keys
    # are (name, start_index) tuples and values are scope identifiers.
    # Note: start_index is a flat (unidimensional) index,
    # not a (line_chr_idx, col_chr_idx) index.
    scopeAnalyst = ScopeAnalyst(js_file_path)
    name2defScope = scopeAnalyst.resolve_scope()
    isGlobal = scopeAnalyst.isGlobal

    name2useScope = scopeAnalyst.name2useScope
    name2pth = scopeAnalyst.name2pth
    nameOrigin = scopeAnalyst.nameOrigin

    scopes = set(name2useScope.values())

    print
    print '=== FOUND %d SCOPES ===' % len(scopes)
    print

    for scope in scopes:
        print 'USE SCOPE:', scope
        lc_list = [
            indexBuilder.revTokMap[indexBuilder.revFlatMat[pos]]
            for (t, pos) in name2useScope.keys()
            if name2useScope[(t, pos)] == scope
        ]
        highlight(tokens, lc_list)
        print

    scopes = set(name2defScope.values())

    print
    print '=== FOUND %d NAME SCOPES ===' % len(scopes)
    print

    for scope in scopes:
        print 'DEF SCOPE:', scope
        lc_list = [
            indexBuilder.revTokMap[indexBuilder.revFlatMat[pos]]
            for (t, pos) in name2defScope.keys()
            if name2defScope[(t, pos)] == scope
        ]
        highlight(tokens, lc_list)
        print

    # Discover the path to the source map
    map_path = sourcemap.discover(minified)
    # Read and parse our sourcemap
    if map_path:
        sourcemapIndex = sourcemap.load(open(map_path))

    # Cluster names by scope
    nameScope2Positions = {}

    # Index data by (name,scope)
    for token, l in indexBuilder.name2CharPositions.iteritems():
        for (line, col) in sorted(l, key=lambda (a, b): (a, b)):
            pos = indexBuilder.flatMap[(line, col)]
            if name2defScope.has_key((token, pos)):
                scope = name2defScope[(token, pos)]
                use_scope = name2useScope[(token, pos)]
                pth = name2pth[(token, pos)]

                glb = isGlobal[(token, pos)]

                nameScope2Positions.setdefault((token, scope, glb), [])
                nameScope2Positions[(token, scope, glb)].append((line, col))

#                print token, pos
#                print 'def:', scope
#                print 'use:', use_scope
#                print 'pth:', pth
#                highlight(tokens, [indexBuilder.revTokMap[indexBuilder.revFlatMat[pos]]])
#                print

    print
    print

    for (token,scope,glb), positions in sorted(nameScope2Positions.iteritems(), \
                                           key=lambda (x,y):x[0]):

        pos = sorted(positions, key=lambda e: (e[0], e[1]))
        tt = []
        line_tok_idxs = set([])
        for (l, c) in pos:
            (tl, tc) = indexBuilder.revTokMap[(l, c)]
            line_tok_idxs.add(tl)
            p = indexBuilder.flatMap[(l, c)]
            if map_path:
                orig = sourcemapIndex.lookup(line=l, column=c).name
            else:
                orig = token
            print token, scope, (l, c), orig
            tt.append(((tl, tc), p, orig))
#             t.append(orig)

#         if token == 'n':
        print '\nNAME:', token.encode(
            'utf-8'), '( isGlobal =', glb, '; original =', orig, ')'
        #         print scope
        #         highlight(tokens, [indexBuilder.revTokMap[indexBuilder.revFlatMat[pos]]])

        for ((tli, tci), p, orig) in tt:
            scope = name2defScope[(token, p)]
            use_scope = name2useScope[(token, p)]
            pth = name2pth[(token, p)]
            origin = nameOrigin[(token, scope)]
#             print token #, p, origin
#             print
#             print 'def:', scope
#             print 'use:', use_scope
#             print 'pth:', pth
#             print

        for tl in sorted(set([tli for ((tli, tci), p, orig) in tt])):
            l = list(tokens[tl])
            for tc in [tci for ((tli, tci), p, orig) in tt if tli == tl]:
                l[tc] = (l[tc][0], unichr(0x2588) + token + unichr(0x2588))


#                 pos = indexBuilder.flatMap[(line,col)]

            print '  ', '%d:' % (tl + 1), ' '.join(
                [x[1].encode('utf-8') for x in l])

        print

    return