Beispiel #1
0
def matchTreesFromDomainWithScriptsFromURLListS2(domain, url_list_path):
  treedict = getTreesForDomainFromDB(domain)
  if treedict == None or len(treedict) == 0:
    print "failed to fetch trees for domain ", domain
    return None, None
  passed_sc = []
  failed_sc = []
  f = open(url_list_path)
  for line in f:
    url = line.strip()
    print "process url "+url
    hosts, inlines = fetchScripts(url)
    if inlines==None or len(inlines) ==0:
      print "no inlines for "+url
      continue
    for inline in inlines:
      passed, failed = matchTreesFromDomainWithScript(domain, inline, treedict)
      if passed == None:
        print "failed for inline [S] ", inline[:100],' [E]'
      else:
        passed_sc += passed
        failed_sc += failed
  rate = float(len(passed_sc))/float(len(passed_sc)+len(failed_sc))
  print "passed %d; failed: %d; rate:%f" %(len(passed_sc), len(failed_sc), rate)
  print "match details : ", str(global_count)
Beispiel #2
0
def extractScriptsAndGenerateASTNodesFromURLListFinerBlock(path):
  f = open(path)
  scriptdict = {}
  total_script_count = {}
  total_uniq_script_blocks = 0
  total_json_count = {}
  total_uniq_json_blocks = 0
  for line in f:
    url = line.strip()
    print "process url "+url
    hosts, inlines = fetchScripts(url)
    if inlines==None or len(inlines) ==0:
      print "no inlines for "+url
      continue
    for inline in inlines:
      #print "INLINE:%s" % inline
      is_json = False
      #rs = analyzeJSCodes(inline)
      rs, sc = analyzeJSCodesFinerBlock(inline)
      if rs == None:
        rs = analyzeJSON(inline)
        is_json = True
      if rs == None:
        continue
      
      if is_json:
        tree = TemplateTree(rs, None)
        if not tree.key in scriptdict:
          scriptdict[tree.key] = [(json.dumps(rs), url, tree, -1)]
          total_json_count[tree.key] = 1
          total_script_count[tree.key] = 1
        else:
          scriptdict[tree.key].append((inline, url, tree, -1))
          total_json_count[tree.key] += 1
          total_script_count[tree.key] += 1
      else:
        for index in range(len(rs)):
          seq = rs[index]
          tree = TemplateTree(seq, None)
          key = tree.key
          if not key in scriptdict:
            scriptdict[key] = [(sc[index], url, tree, index)]
            total_script_count[key] = 1
            print "  add key  %s" %key
          else:
            contents = [x[0] for x in scriptdict[key]]
            if not sc[index] in contents: 
              scriptdict[key].append((sc[index],url, tree, index))
              print "  item %s has %d unique scripts" %(key, len(scriptdict[key]))
            total_script_count[key] += 1

  return scriptdict, total_script_count, total_json_count
Beispiel #3
0
def matchScriptsFromURLFileWithDomainTemplate(domain, url_list_path):
    treedict = getTreesForDomainFromDB(domain)
    if treedict == None or len(treedict) == 0:
        print "failed to fetch trees for domain ", domain
        return None, None
    passed_sc = []
    failed_sc = []
    passed_dict = {}
    failed_dict = {}
    f_pass = open('passlist', 'w')
    f_empty = open('emptylist', 'w')
    f_fail = open('faillist', 'w')

    f = open(url_list_path)
    for line in f:
        url = line.strip()
        print "process url " + url
        hosts, inlines = fetchScripts(url)
        if inlines == None or len(inlines) == 0:
            print "no inlines for " + url
            f_empty.write(url + '\n')
            continue
        for inline in inlines:
            passed, failed = matchScriptWithDomainTemplate(
                domain, inline, treedict)
            if len(failed) == 0:
                for fa in passed:
                    passed_dict[fa] = 1
                    f_pass.write(fa + '\n')
            else:
                for fa in failed:
                    failed_dict[fa] = 1
                    f_fail.write(fa + '\n')
            if passed == None:
                print "failed for inline [S] ", inline[:100], ' [E]'
            else:
                passed_sc += passed
                failed_sc += failed
    rate = float(len(passed_sc)) / float(len(passed_sc) + len(failed_sc))
    rate2 = float(
        len(passed_dict)) / float(len(passed_dict) + len(failed_dict))
    print "passed %d; failed: %d; rate:%f" % (len(passed_dict),
                                              len(failed_dict), rate2)
    print "passed %d; failed: %d; rate:%f" % (len(passed_sc), len(failed_sc),
                                              rate)
    print "match details : ", str(global_count)
Beispiel #4
0
def matchScriptsFromURLFileWithDomainTemplate(domain, url_list_path):
  treedict = getTreesForDomainFromDB(domain)
  if treedict == None or len(treedict) == 0:
    print "failed to fetch trees for domain ", domain
    return None, None
  passed_sc = []
  failed_sc = []
  passed_dict = {}
  failed_dict = {}
  f_pass = open('passlist','w') 
  f_empty = open('emptylist','w') 
  f_fail = open('faillist','w') 

  f = open(url_list_path)
  for line in f:
    url = line.strip()
    print "process url "+url
    hosts, inlines = fetchScripts(url)
    if inlines==None or len(inlines) ==0:
      print "no inlines for "+url
      f_empty.write(url+'\n')
      continue
    for inline in inlines:
      passed, failed = matchScriptWithDomainTemplate(domain, inline, treedict)
      if len(failed) == 0:
        for fa in passed:
          passed_dict[fa] = 1
          f_pass.write(fa+'\n')
      else:
        for fa in failed:
          failed_dict[fa] = 1
          f_fail.write(fa+'\n')
      if passed == None:
        print "failed for inline [S] ", inline[:100],' [E]'
      else:
        passed_sc += passed
        failed_sc += failed
  rate = float(len(passed_sc))/float(len(passed_sc)+len(failed_sc))
  rate2 = float(len(passed_dict))/float(len(passed_dict)+len(failed_dict))
  print "passed %d; failed: %d; rate:%f" %(len(passed_dict), len(failed_dict), rate2)
  print "passed %d; failed: %d; rate:%f" %(len(passed_sc), len(failed_sc), rate)
  print "match details : ", str(global_count)
Beispiel #5
0
def extractAndStoreScriptsFromFileList(file_list_path):
    f = open(file_list_path)
    urls = set()
    for line in f:
        urls.add(line.strip())

    for url in urls:
        print "prcossing scripts of %s " % url
        hosts, inlines = fetchScripts(url)
        if hosts == None or inlines == None:
            contents = fetchURLContents(url)
            if contents == None or len(contents) == 0:
                print >> sys.stderr, "%s doesn't have contents " % url
                continue
            content = findAverageContents(contents)
            if content == None:
                print >> sys.stderr, "failed to extract average content for %s" % url
                continue
            extractAndStoreScriptsFromDOM(url, content)

        else:
            print "%s already has %d hosts and %d inline scripts " \
              %(url, len(hosts), len(inlines))
Beispiel #6
0
def extractAndStoreScriptsFromFileList(file_list_path):
  f = open(file_list_path)
  urls = set()
  for line in f:
    urls.add(line.strip())

  for url in urls:
    print "prcossing scripts of %s " % url
    hosts, inlines = fetchScripts(url)
    if hosts == None or inlines == None:
      contents = fetchURLContents(url)
      if contents == None or len(contents) == 0:
        print >> sys.stderr, "%s doesn't have contents " %url
        continue
      content = findAverageContents(contents)
      if content == None:
        print >> sys.stderr, "failed to extract average content for %s" %url
        continue
      extractAndStoreScriptsFromDOM(url, content)

    else:
      print "%s already has %d hosts and %d inline scripts " \
        %(url, len(hosts), len(inlines))   
Beispiel #7
0
def extractScriptsAndGenerateASTNodesFromURLList(url_path):
  scriptdict = {}
  f = open(url_path)
  for line in f:
    url = line.strip()
    print "process url "+url
    hosts, inlines = fetchScripts(url)
    if inlines==None or len(inlines) ==0:
      print "no inlines for "+url
      continue
    for inline in inlines:
      #print "INLINE:%s" % inline
      is_json = False
      rs = analyzeJSCodes(inline)
      if rs == None:
        rs = analyzeJSON(inline)
        is_json = True
      if rs == None:
        continue
      m = hashlib.md5()
      if not is_json:
        for node in rs:
          m.update(node.tag)
      else:
        for k in rs:
           m.update(k)
      key = m.hexdigest()
      if not key in scriptdict:
        scriptdict[key] = [(inline,url,rs)]
        print "  add key  %s" %key
      else:
        contents = [x[0] for x in scriptdict[key]]
        if not inline in contents:
          scriptdict[key].append((inline,url, rs) )
          print "  item %s has %d distinct scripts" %(key, len(scriptdict[key]))
  f.close()
  return scriptdict
Beispiel #8
0
def generateTemplateBasedOnURLsFromFile(path, dst_path):
    f = open(path)
    scriptdict = {}
    total_script_blocks = 0
    total_uniq_script_blocks = 0
    debug_dict = {}

    static_scripts = 0
    dynamic_scripts = 0

    for line in f:
        url = line.strip()
        print "process url " + url
        hosts, inlines = fetchScripts(url)
        if inlines == None or len(inlines) == 0:
            print "no inlines for " + url
            continue
        for inline in inlines:
            is_json = False
            rs, sc = analyzeJSCodesFinerBlock(inline)
            if rs == None:
                rs = analyzeJSON(inline)
                is_json = True
            if rs == None:
                continue

            if is_json:
                tree = TemplateTree(rs, None)
                if not tree.key in scriptdict:
                    scriptdict[tree.key] = [(inline, url, tree, -1)]
                    debug_dict[tree.key] = [inline]
                else:
                    debug_dict[tree.key].append(inline)
                    contents = [x[0] for x in scriptdict[key]]
                    if not inline in contents:
                        scriptdict[tree.key].append((inline, url, tree, -1))
                        total_uniq_script_blocks += 1
                total_script_blocks += 1

            else:
                for index in range(len(rs)):
                    total_script_blocks += 1
                    seq = rs[index]
                    tree = TemplateTree(seq, None)
                    key = tree.key
                    if not key in scriptdict:
                        debug_dict[key] = [sc[index]]
                        scriptdict[key] = [(sc[index], url, tree, index)]
                        print "  add key  %s" % key
                    else:
                        contents = [x[0] for x in scriptdict[key]]
                        debug_dict[key].append(sc[index])
                        if not sc[index] in contents:
                            scriptdict[key].append(
                                (sc[index], url, tree, index))
                            print "  item %s has %d unique scripts" % (
                                key, len(scriptdict[key]))
                            total_uniq_script_blocks += 1

    fw = open(os.path.join(dst_path, 'debug'), 'w')
    for k in debug_dict:
        vals = debug_dict[k]
        fw.write("%d %s \n" % (len(vals), k))
        fw.write("  --EXAMPLE-- %s\n" % vals[0])
    fw.close()
    #start to analyze trees
    #scriptdict[tree_key] = [(script, url, tree, index)]
    trees = []
    insufficient_urls = {}
    keys = sorted(scriptdict.keys(), key=lambda k: len(scriptdict[k]))
    for key in keys:
        is_static = True
        name = "%d_%s" % (len(scriptdict[key]), key)
        fw = open(os.path.join(dst_path, name), 'w')
        for item in scriptdict[key]:
            fw.write(item[1] + "||" + str(item[3]) + "  " + str(item[0]) +
                     "\n")

        #make sure all template trees with the same key are the same
        script_list = scriptdict[key]
        length_list = sorted([len(item[2].nodes) for item in script_list])
        seq_length = 0
        if length_list[0] != length_list[-1]:
            fw.write("[ALERT] seq length is not consistent")
            fw.close()
            continue
        else:
            seq_length = length_list[0]

        #only handle JavaScript for now
        tree = script_list[0][2]
        if tree.type == "json":
            print "the inline is json!"
            fw.write("[TODO]: the inline is json. This is next step\n")
            fw.close()
            trees.append(tree)
            continue

        #process String/Object/Array nodes
        #script_list: [(script, url, tree, index)]
        fw.write("start analyzeing values\n")
        script_length = len(script_list)

        for i in range(seq_length):
            node = script_list[0][2].nodes[i]
            try:
                if node.tag == "String":
                    vals = [item[2].nodes[i].value for item in script_list]
                    encoded_val = [b64encode(x) for x in vals]
                    #item = 'string%d: %s' %(i, ','.join(encoded_val))
                    #fw.write(item+"\n")
                    tree.strings[i] = vals
                    node_pattern = generateNodePattern(vals)
                    if is_static and \
                      ((node_pattern.tp!=StringType.CONST) and (node_pattern.tp!=StringType.INSUFFICIENT)):
                        is_static = False
                        dynamic_scripts += script_length

                    tree.string_types_str[str(i)] = node_pattern.dumps()
                    if node_pattern.is_insufficient():
                        if not key in insufficient_urls:
                            insufficient_urls[key] = \
                              [item[1] for item in script_list]
                        else:
                            insufficient_urls[key] += [
                                item[1] for item in script_list
                            ]
                    # testing
                    #node_pattern = NodePattern()
                    #r = node_pattern.loads(tree.string_types_str[i])
                    #if r == False:
                    #  print "node_pattern failed to load: "+tree.string_types_str[i]
                    #else:
                    #  print "successfully loaded tree: "+tree.string_types_str[i]
                    print "STRING%d: [TYPE:%s] [VALUE:%s]" \
                      %(i, tree.string_types_str[str(i)],','.join(encoded_val))
                if node.tag == "Object":
                    #debug = "tag:%s val:%s" \
                    #  %(script_list[0][2].nodes[i].tag,str(script_list[0][2].nodes[i].value))
                    #print "DEBUG: %s" %debug
                    rs = analyzeObjectResultHelper(script_list, i)
                    rs = extractObjectValues(rs)
                    type_dict = {}
                    for k in rs:
                        encoded_val = [b64encode(x) for x in rs[k]]
                        node_pattern = generateNodePattern(rs[k])
                        if is_static and \
                          ((node_pattern.tp!=StringType.CONST) and (node_pattern.tp!=StringType.INSUFFICIENT)):
                            is_static = False
                            dynamic_scripts += script_length

                        type_dict[k] = node_pattern.dumps()
                        if node_pattern.is_insufficient():
                            if not key in insufficient_urls:
                                insufficient_urls[key] = \
                                  [item[1] for item in script_list]
                            else:
                                insufficient_urls[key] += [
                                    item[1] for item in script_list
                                ]
                        #testing
                        #node_pattern = NodePattern()
                        #r = node_pattern.loads(type_dict[k])
                        #if r == False:
                        #  print "node_pattern failed to load: "+type_dict[k]
                        #else:
                        #  print "successfully loaded tree: "+type_dict[k]
                        print "OBJECT%d: [TYPE:%s] [KEY:%s][VALUE:%s]" \
                          %(i, type_dict[k], k, ','.join(encoded_val))
                    tree.objects[i] = rs
                    tree.object_types_str[str(i)] = type_dict
                if node.tag == "Array":
                    rs = analyzeArrayResultHelper(script_list, i)
                    rs = extractObjectValues(rs)
                    type_dict = {}
                    for k in rs:
                        encoded_val = [b64encode(x) for x in rs[k]]
                        #fw.write("array%d: %s:%s\n" % (i, k, ','.join(encoded_val)) )
                        node_pattern = generateNodePattern(rs[k])
                        if is_static and \
                          ((node_pattern.tp!=StringType.CONST) and (node_pattern.tp!=StringType.INSUFFICIENT)):
                            is_static = False
                            dynamic_scripts += script_length

                        type_dict[k] = node_pattern.dumps()
                        if node_pattern.is_insufficient():
                            if not key in insufficient_urls:
                                insufficient_urls[key] = \
                                  [item[1] for item in script_list]
                            else:
                                insufficient_urls[key] += [
                                    item[1] for item in script_list
                                ]
                        #testing
                        #node_pattern = NodePattern()
                        #r = node_pattern.loads(type_dict[k])
                        #if r == False:
                        #  print "node_pattern failed to load: "+type_dict[k]
                        #else:
                        #  print "successfully loaded tree: "+type_dict[k]
                        print "ARRAY%d: [TYPE:%s] [KEY:%s][VALUE:%s]" \
                          %(i, type_dict[k], k, ','.join(encoded_val))
                    tree.arrays[i] = rs
                    tree.array_types_str[str(i)] = type_dict
            except Exception as e:
                displayErrorMsg("fetchAndProcessScriptsOfURLsFromFile",\
                   "excpetion in analyzing node %d %s " %(i, str(e)))

        if is_static:
            static_scripts += script_length

        print "Done writing %d items for file %s " % (len(
            scriptdict[key]), name)
        trees.append(tree)

        fw.close()

    #store trees
    trees = sorted(trees, key=lambda x: x.get_length())
    fw = open(os.path.join(dst_path, "trees"), 'w')
    fw_json = open(os.path.join(dst_path, "jsons"), 'w')
    for i in range(len(trees)):
        tree_val = trees[i].dumps()
        url = scriptdict[trees[i].key][0][1]
        storeTree(url, trees[i].key, tree_val)
        fw.write("1 %.3d: %s\n" % (i, tree_val))
        new_tree = TemplateTree(None, None)
        new_tree.loads(tree_val)

        if trees[i].type == "js":
            fw.write("2 %.3d: %s\n" % (i, getTreeSeq(new_tree.nodes)))
        elif trees[i].type == 'json':
            fw.write("2 %.3d: %s\n" % (i, json.dumps(new_tree.nodes)))
    fw.close()
    fw_json.close()
    print "generate %d trees for %d scripts uniqe[%d]" \
      %(len(trees), total_script_blocks, total_uniq_script_blocks)

    print "static_scripts:%d  dynamic_scripts:%d" % (static_scripts,
                                                     dynamic_scripts)

    return insufficient_urls
Beispiel #9
0
def generateTemplateBasedOnURLsFromFile(path, dst_path):
  f = open(path)
  scriptdict = {}
  total_script_blocks = 0
  total_uniq_script_blocks = 0
  debug_dict = {}

  static_scripts = 0
  dynamic_scripts = 0

  for line in f:
    url = line.strip()
    print "process url "+url
    hosts, inlines = fetchScripts(url)
    if inlines==None or len(inlines) ==0:
      print "no inlines for "+url
      continue
    for inline in inlines:
      is_json = False
      rs, sc = analyzeJSCodesFinerBlock(inline)
      if rs == None:
        rs = analyzeJSON(inline)
        is_json = True
      if rs == None:
        continue
      
      if is_json:
        tree = TemplateTree(rs, None)
        if not tree.key in scriptdict:
          scriptdict[tree.key] = [(inline, url, tree, -1)]
          debug_dict[tree.key] = [inline]
        else:
          debug_dict[tree.key].append(inline)
          contents = [x[0] for x in scriptdict[key]]
          if not inline in contents:
            scriptdict[tree.key].append((inline, url, tree, -1))
            total_uniq_script_blocks += 1
        total_script_blocks += 1

      else:
        for index in range(len(rs)):
          total_script_blocks += 1
          seq = rs[index]
          tree = TemplateTree(seq, None)
          key = tree.key
          if not key in scriptdict:
            debug_dict[key] = [sc[index]]
            scriptdict[key] = [(sc[index], url, tree, index)]
            print "  add key  %s" %key
          else:
            contents = [x[0] for x in scriptdict[key]]
            debug_dict[key].append(sc[index])
            if not sc[index] in contents: 
              scriptdict[key].append((sc[index],url, tree, index))
              print "  item %s has %d unique scripts" %(key, len(scriptdict[key]))
              total_uniq_script_blocks += 1
 
  fw = open(os.path.join(dst_path,'debug'),'w')
  for k in debug_dict:
    vals = debug_dict[k]
    fw.write("%d %s \n" %(len(vals),k)) 
    fw.write("  --EXAMPLE-- %s\n" %vals[0])
  fw.close()
  #start to analyze trees
  #scriptdict[tree_key] = [(script, url, tree, index)]
  trees = []
  insufficient_urls = {}
  keys = sorted(scriptdict.keys(), key=lambda k:len(scriptdict[k]))
  for key in keys:
    is_static = True
    name = "%d_%s" %(len(scriptdict[key]),key)
    fw = open(os.path.join(dst_path,name), 'w')
    for item in scriptdict[key]:
      fw.write(item[1]+"||"+str(item[3])+"  "+str(item[0])+"\n")
    
    #make sure all template trees with the same key are the same
    script_list = scriptdict[key]
    length_list = sorted([len(item[2].nodes) for item in script_list])
    seq_length = 0
    if length_list[0] != length_list[-1]:
      fw.write("[ALERT] seq length is not consistent")
      fw.close()
      continue
    else:
      seq_length = length_list[0]

    #only handle JavaScript for now
    tree = script_list[0][2]
    if tree.type == "json":
      print "the inline is json!"
      fw.write("[TODO]: the inline is json. This is next step\n")
      fw.close()
      trees.append(tree)
      continue  
    
    #process String/Object/Array nodes
    #script_list: [(script, url, tree, index)]
    fw.write("start analyzeing values\n")    
    script_length = len(script_list)

    for i in range(seq_length):
      node = script_list[0][2].nodes[i]
      try:
        if node.tag == "String":
          vals = [item[2].nodes[i].value for item in script_list]
          encoded_val = [b64encode(x) for x in vals]
          #item = 'string%d: %s' %(i, ','.join(encoded_val))
          #fw.write(item+"\n")
          tree.strings[i] = vals
          node_pattern = generateNodePattern(vals)
          if is_static and \
            ((node_pattern.tp!=StringType.CONST) and (node_pattern.tp!=StringType.INSUFFICIENT)):
            is_static = False
            dynamic_scripts += script_length

          tree.string_types_str[str(i)] = node_pattern.dumps()
          if node_pattern.is_insufficient():
            if not key in insufficient_urls:
              insufficient_urls[key] = \
                [item[1] for item in script_list]
            else:
              insufficient_urls[key] += [item[1] for item in script_list]
          # testing
          #node_pattern = NodePattern()
          #r = node_pattern.loads(tree.string_types_str[i])
          #if r == False:
          #  print "node_pattern failed to load: "+tree.string_types_str[i]
          #else:
          #  print "successfully loaded tree: "+tree.string_types_str[i]
          print "STRING%d: [TYPE:%s] [VALUE:%s]" \
            %(i, tree.string_types_str[str(i)],','.join(encoded_val))
        if node.tag == "Object":
          #debug = "tag:%s val:%s" \
          #  %(script_list[0][2].nodes[i].tag,str(script_list[0][2].nodes[i].value))
          #print "DEBUG: %s" %debug
          rs = analyzeObjectResultHelper(script_list, i)
          rs = extractObjectValues(rs)
          type_dict = {}
          for k in rs:
            encoded_val = [b64encode(x) for x in rs[k]]
            node_pattern = generateNodePattern(rs[k])
            if is_static and \
              ((node_pattern.tp!=StringType.CONST) and (node_pattern.tp!=StringType.INSUFFICIENT)):
              is_static = False
              dynamic_scripts += script_length

            type_dict[k] = node_pattern.dumps()
            if node_pattern.is_insufficient():
              if not key in insufficient_urls:
                insufficient_urls[key] = \
                  [item[1] for item in script_list]
              else:
                insufficient_urls[key] += [item[1] for item in script_list]
            #testing
            #node_pattern = NodePattern()
            #r = node_pattern.loads(type_dict[k])
            #if r == False:
            #  print "node_pattern failed to load: "+type_dict[k]
            #else:
            #  print "successfully loaded tree: "+type_dict[k]
            print "OBJECT%d: [TYPE:%s] [KEY:%s][VALUE:%s]" \
              %(i, type_dict[k], k, ','.join(encoded_val))
          tree.objects[i] = rs
          tree.object_types_str[str(i)] = type_dict
        if node.tag == "Array":
          rs = analyzeArrayResultHelper(script_list, i)
          rs = extractObjectValues(rs)
          type_dict = {}
          for k in rs:
            encoded_val = [b64encode(x) for x in rs[k]]
            #fw.write("array%d: %s:%s\n" % (i, k, ','.join(encoded_val)) )
            node_pattern = generateNodePattern(rs[k])
            if is_static and \
              ((node_pattern.tp!=StringType.CONST) and (node_pattern.tp!=StringType.INSUFFICIENT)):
              is_static = False
              dynamic_scripts += script_length

            type_dict[k] = node_pattern.dumps()
            if node_pattern.is_insufficient():
              if not key in insufficient_urls:
                insufficient_urls[key] = \
                  [item[1] for item in script_list]
              else:
                insufficient_urls[key] += [item[1] for item in script_list]
            #testing
            #node_pattern = NodePattern()
            #r = node_pattern.loads(type_dict[k])
            #if r == False:
            #  print "node_pattern failed to load: "+type_dict[k]
            #else:
            #  print "successfully loaded tree: "+type_dict[k]
            print "ARRAY%d: [TYPE:%s] [KEY:%s][VALUE:%s]" \
              %(i, type_dict[k], k, ','.join(encoded_val))
          tree.arrays[i] = rs
          tree.array_types_str[str(i)] = type_dict
      except Exception as e:
        displayErrorMsg("fetchAndProcessScriptsOfURLsFromFile",\
           "excpetion in analyzing node %d %s " %(i, str(e))) 
    
    if is_static:
      static_scripts += script_length

    print "Done writing %d items for file %s " %(len(scriptdict[key]), name)
    trees.append(tree)
    
    fw.close()
  
  #store trees
  trees = sorted(trees, key=lambda x:x.get_length())
  fw = open(os.path.join(dst_path,"trees"), 'w')
  fw_json = open(os.path.join(dst_path,"jsons"), 'w')
  for i in range(len(trees)):
    tree_val = trees[i].dumps()
    url = scriptdict[trees[i].key][0][1]
    storeTree(url,trees[i].key, tree_val)
    fw.write( "1 %.3d: %s\n" %(i, tree_val))
    new_tree = TemplateTree(None, None)
    new_tree.loads(tree_val)

    if trees[i].type == "js":
      fw.write( "2 %.3d: %s\n" %(i, getTreeSeq(new_tree.nodes)))
    elif trees[i].type == 'json':
      fw.write("2 %.3d: %s\n" % (i, json.dumps(new_tree.nodes)))
  fw.close()
  fw_json.close()
  print "generate %d trees for %d scripts uniqe[%d]" \
    %(len(trees), total_script_blocks, total_uniq_script_blocks)

  print "static_scripts:%d  dynamic_scripts:%d" %(static_scripts, dynamic_scripts)

  return insufficient_urls