def main(): parser = OptionParser() parser.add_option("-c", "--country-name", type="string", help="the country from which you want run the test", dest="medialist") parser.add_option("-o", "--output-dir", type="string", default=None, help="directory to store results", dest="user_outputdir") parser.add_option("-l", "--local-phantom", action="store_true", help="use local phantomjs instead of the downloaded one", dest="lp") parser.add_option("-d", "--disable-sending", action="store_true", help="disable the result sending at the end of the test", dest="disable_send") parser.add_option("-i", "--instable-internet", action="store_true", help="If your internet is instable, please enable this option", dest="shitty_internet") parser.add_option("-s", "--send", type="string", dest="targz_output", help="do not perform test, submit a previously collected result.") parser.add_option("-v", "--version", action="store_true", dest="version", help="print version, spoiler: %d" % ANALYSIS_VERSION) (args, _) = parser.parse_args() if args.version: print "analysis format version:", ANALYSIS_VERSION quit(0) if args.targz_output: if args.disable_send: print colored("You can't use -s (--send) and -d (--disable-sending) options together") quit(-1) if not os.path.isfile(args.targz_output): print colored("Invalid file: %s" % args.targz_output) quit(-1) print colored(" ࿓ Sending previous results...", 'blue', 'on_white', attrs=['underline']) send_results(args.targz_output, hiddenservice_tuple) quit(0) if not args.medialist: print colored("Usage: %s -c $YOUR_COUNTRY_NAME" % sys.argv[0], "red", 'on_white') print colored("\t-l (local phantom, instead of the symlink here)", "red", 'on_white') print colored("\t-o output directory, used to collect test results", "red", 'on_white') print "" print " -l option is needed if you want use your own /usr/bin/phantomjs" print " (if you follow README.md, this is not needed because you downloaded phantomjs 1.9.2)" print " ",colored("By default, this software is looking for symlink 'phantom-1.9.2'", "green", "on_white") if os.path.islink('phantom-1.9.2'): print " ",colored("phantom-1.9.2 is a link, as expected.", "green", "on_white") else: print " ",colored("The phantom-1.9.2 link is missing!", "red", "on_white") print "Look in the verified_media/ for a list of countries." print "TrackMap collection tool version: %d" % ANALYSIS_VERSION quit(-1) # check if the user is running phantom as installed on the system (also vagrant make this) # of if is using if args.lp: local_phantomjs = True print colored("You're using your local installed phantomjs. A version >= than 1.9.0 is needed.", 'blue', 'on_white') print colored("I'm not going to compare the string. Be aware: this is your version:", 'red') phantom_version = Popen(['phantomjs', '-v'], stdout=PIPE).stdout.readline() print colored(phantom_version, 'blue', 'on_white') else: if not os.path.islink('phantom-1.9.2'): print colored("Missing phantom-1.9.2. A symbolic link named phantom-1.9.2 was expected, but not found. Please consult README.md and make sure you've followed the installation procedure exactly.", 'red', 'on_white') quit(-1) local_phantomjs = False if not args.disable_send: tor_test = ("127.0.0.1", 9050) c = socket.socket() try: c.connect( tor_test ) c.close() except Exception as xxx: print colored("Unable to connect to %s, Tor is needed to send results" % str(tor_test), "red") print colored(xxx, "red") print colored("You can disable result sending with the option -d", "yellow") quit(-1) del c # country check proposed_country = args.medialist country_f = os.path.join('verified_media', proposed_country.lower()) if not os.path.isfile(country_f): print colored("Invalid country! not found %s in directory 'verified_media/' " % proposed_country, 'red') print "Available countries are:" for existing_c in os.listdir('verified_media'): if existing_c in ['README.md', 'test']: continue print "\t", existing_c print colored("You can propose your own country media list following these instructions:", 'blue', 'on_white') print colored("https://github.com/vecna/trackmap/blob/master/unverified_media_list/README.md", 'blue', 'on_white') quit(-1) # check if the output directory is not the default and/or if need to be created if args.user_outputdir: OUTPUTDIR = args.user_outputdir else: OUTPUTDIR = 'output/' if not os.path.isdir(OUTPUTDIR): try: os.mkdir(OUTPUTDIR) except OSError as error: print "unable to create %s: %s" % (OUTPUTDIR, error) # ask free information to the script runner info_f = os.path.join(OUTPUTDIR, 'information') if os.path.isfile(info_f): f = open(info_f, 'r') information = json.load(f) f.close() print colored("Recovered information of previous collection:", 'green') print " name:", information['name'] print " contact:", information['contact'] print " ISP:", information['ISP'] print " city:", information['city'] else: information = {} print colored("Optionally, provide the information requested below, or press Enter to skip:", 'green') def question(description): print colored(description, 'white', 'on_blue') answer = sys.stdin.readline() answer = answer.strip('\n') return None if not len(answer) else answer information['name'] = question('Your name:') information['contact'] = question('Mail or jabber contact:') information['ISP'] = question('Which ISP is providing your link:') information['city'] = question('From which city you\'re running this script:') information['version'] = ANALYSIS_VERSION with file(info_f, 'w+') as f: json.dump(information, f) # writing in a file which country you're using! with file(os.path.join(OUTPUTDIR, 'country'), 'w+') as f: f.write(proposed_country.lower()) # reading media list, cleaning media list and copy media list cfp = file(country_f, 'r') unclean_lines = cfp.readlines() with file(os.path.join(OUTPUTDIR, 'used_media_list'), 'w+') as f: f.writelines(unclean_lines) # reconding an unique number is always useful, also if I've not yet in mind an usage right now. with file( os.path.join(OUTPUTDIR, "unique_id"), "w+") as f: f.write("%d%d%d" % (random.randint(0, 0xffff), random.randint(0, 0xffff), random.randint(0, 0xffff)) ) print colored(" ࿓ Importing media list:", 'blue', 'on_white', attrs=['underline']) media_entries = media_file_cleanings(unclean_lines) cfp.close() print colored(" ࿓ Checking your network source.", 'blue', 'on_white', attrs=['underline']) do_wget( os.path.join(OUTPUTDIR, 'first.json')) print colored(" ࿓ Starting media crawling:", 'blue', 'on_white', attrs=['underline']) # here start iteration over the media! phantom_stats = {} for cleanurl, media_kind in media_entries.iteritems(): urldir = os.path.join(OUTPUTDIR, cleanurl) title_check = os.path.join(urldir, '__title') if os.path.isdir(urldir) and os.path.isfile(title_check): print "-", urldir, "already present: skipped" phantom_stats.setdefault('resumed', []).append(cleanurl) continue if os.path.isdir(urldir): # being here means that is empty or incomplete shutil.rmtree(urldir) print "+ Creating directory", urldir os.mkdir(urldir) retinfo = do_phantomjs(local_phantomjs, cleanurl, urldir, media_kind, OUTPUTDIR) assert retinfo in [ 'first', 'second', 'failures' ] phantom_stats.setdefault(retinfo, []).append(cleanurl) # take every directory in 'output/', get the included URL and dump in a dict included_url_dict = sortify(OUTPUTDIR) assert included_url_dict, "No url included after phantom scraping and collection !?" with file(os.path.join(OUTPUTDIR, 'domain.infos'), 'w+') as f: json.dump(included_url_dict, f) # generate DNS resolution map. for every host resolve an IP, for every IP resolve again DNS print colored(" ࿓ DNS resolution and reverse of %d domains..." % len(included_url_dict), 'blue', 'on_white', attrs=['underline']) # new format contain: # first dict: resolution error # second dict: reverse error dns_error = [{}, {}] # now, until there is not refactor based on classes, # the resolution of the previously failed DN will not happen resolution_dns_f = os.path.join(OUTPUTDIR, 'resolution.dns') if os.path.isfile(resolution_dns_f): fp = file(resolution_dns_f, 'r') ip_map = json.load(fp) fp.close() else: ip_map = {} counter = 0 percentage_bound = len(included_url_dict.keys()) / 10.0 if not int(percentage_bound): percentage_bound = 1.0 for domain in included_url_dict.keys(): counter += 1 if not counter % int(percentage_bound): print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound) ), time.ctime()) # other random possibility based on birthday paradox to show counters... if random.randint(0, int(percentage_bound * 10 )) == counter: print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound) ), time.ctime()) try: if args.shitty_internet: socket.setdefaulttimeout(1.1) else: socket.setdefaulttimeout(0.5) resolved_v4 = socket.gethostbyname(domain) except Exception as xxx: dns_error[0].setdefault(xxx.strerror, []).append(domain) continue ip_map.setdefault(resolved_v4, []).append(domain) with file(resolution_dns_f, 'w+') as f: json.dump(ip_map, f) print colored("\nResolved %d unique IPv4 from %d unique domain" % (len(ip_map.keys()), len(included_url_dict.keys())), 'green') if not len(ip_map.keys()): print colored("It appears that you can't access the internet. Please fix that and restart the test.", 'red') quit(-1) print colored("\nReversing DNS for %d unique IP address..." % len(ip_map.keys() ), 'green') reverse_dns_f = os.path.join(OUTPUTDIR, 'reverse.dns') if os.path.isfile(reverse_dns_f): fp = file(reverse_dns_f, 'r') true_domain_map = json.load(fp) fp.close() else: true_domain_map = {} counter = 0 percentage_bound = len(ip_map.keys()) / 10.0 if not int(percentage_bound): percentage_bound = 1.0 for ipv4 in ip_map.keys(): counter += 1 if not (counter % int(percentage_bound) ): print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound) ), time.ctime()) # other random possibility based on birthday paradox to show counters... if random.randint(0, int(percentage_bound * 10 )) == counter: print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound) ), time.ctime()) try: if args.shitty_internet: socket.setdefaulttimeout(1.9) else: socket.setdefaulttimeout(0.9) resolved_set = socket.gethostbyaddr(ipv4) resolved_name = resolved_set[0] except Exception as xxx: dns_error[1].setdefault(xxx.strerror, []).append(ipv4) continue true_domain_map.setdefault(resolved_name, []).append(ipv4) with file(reverse_dns_f, 'w+') as f: json.dump(true_domain_map, f) print colored("\nReversed %d unique FQDN" % len(true_domain_map.keys() ), 'green') print colored("Saving DNS errors in 'errors.dns'") with file(os.path.join(OUTPUTDIR, 'errors.dns'), 'w+') as f: json.dump(dns_error, f) # traceroutes contains all the output of traceroute in JSON format, separated # for logs. this output is not in the media directory, because some host like # google are included multiple times. trace_output = os.path.join(OUTPUTDIR, '_traceroutes') if not os.path.isdir(trace_output): os.mkdir(trace_output) # _verbotracelogs instead contain the detailed log of traceroute, # they would be useful in the future because AS number is not yet used # as information in the backend, but, who knows... verbotracelogs = os.path.join(OUTPUTDIR, '_verbotracelogs') if not os.path.isdir(verbotracelogs): os.mkdir(verbotracelogs) # saving again information about network location do_wget( os.path.join(OUTPUTDIR, 'second.json') ) # starting traceroute to all the collected IP print colored(" ࿓ Running traceroute to %d IP address (from %d hosts)" % ( len(ip_map.keys()), len(included_url_dict.keys())), 'blue', 'on_white', attrs=['underline']) counter = 1 trace_stats = {} gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE) for ip_addr, hostlist in ip_map.iteritems(): progress_string = "%d/%d" % (counter, len(ip_map.keys())) print colored("%s%s" % (progress_string, (10 - len(progress_string)) * " " ), "cyan" ), t = Traceroute(OUTPUTDIR, ip_addr, hostlist, gi, args.shitty_internet) counter += 1 if t.already_traced(): print colored ("%s already traced (%d hosts): skipping" % (ip_addr, len(hostlist) ), "green") retinfo = "recover" elif not t.do_trace(): retinfo = "fail" print colored("Traceroute fails! (%d/10)" % TraceStats.three_hundres, "red") else: retinfo = "success" try: t.resolve_target_geoip() t.file_dump() except Exception: retinfo = "anomaly" del t assert retinfo in [ 'recover', 'success', 'anomaly', 'fail' ] trace_stats.setdefault(retinfo, []).append(ip_addr) # Traceroute class need to be enhanced with some kind of: # * failure measurement and GUESSING WHY # * retry after a while # * estimation of shared path - optimization and stabler collection if trace_stats.has_key('fail') and len(trace_stats['fail']): print colored(" ࿓ Testing again the failed traceroute to %d IP address" % len(trace_stats['fail'])) else: # just here to skip a KeyError below trace_stats.update({'fail': []}) counter = 1 fail_list_copy = list(trace_stats['fail']) # a list is done because inside of the loop is changed the # content of trace_stats['fail'] for case_n, failed_trace in enumerate(fail_list_copy): hostlist = ip_map[failed_trace] t = Traceroute(OUTPUTDIR, failed_trace, hostlist, gi, args.shitty_internet) counter += 1 if not t.do_trace(): print colored("Failure again.", "red") retinfo = "fail" else: retinfo = "retry" trace_stats['fail'].remove(failed_trace) try: t.resolve_target_geoip() t.file_dump() except Exception: retinfo = "anomaly" del t assert retinfo in [ 'recover', 'success', 'anomaly', 'fail', 'retry' ] trace_stats.setdefault(retinfo, []).append(failed_trace) TraceStats([]).dump_stats(OUTPUTDIR) if trace_stats.values().count(False): print colored("Registered %d failures" % trace_stats.values().count(False), "red") ptsj = os.path.join(OUTPUTDIR, '_phantom.trace.stats.json') if os.path.isfile(ptsj): os.unlink(ptsj) with file(ptsj, 'w+') as fp: json.dump([ phantom_stats, trace_stats ], fp) # saving again*again information about network location do_wget(os.path.join(OUTPUTDIR, 'third.json')) output_name = 'results-%s.tar.gz' % proposed_country.lower() print colored(" ࿓ Analysis done! compressing the output in %s" % output_name, "blue", 'on_white', attrs=['underline']) if os.path.isfile(output_name): os.unlink(output_name) tar = Popen(['tar', '-z', '-c', '-v', '-f', output_name, OUTPUTDIR], stdout=PIPE) counter_line = 0 while True: line = tar.stdout.readline() counter_line += 1 if not line: break if args.disable_send: print colored("%d file added to %s" % (counter_line, output_name), "green") print colored("Sending disable, test complete.", "yellow"), print colored("亷 亸", 'blue', 'on_white') quit(0) print colored("%d file added to %s, Starting to submit results via Tor network\n" % (counter_line, output_name), "green") print colored("If submitting results fails please run:", "red") print colored("./perform_analysis.py -s %s" % output_name, "yellow") send_results(output_name, hiddenservice_tuple)
def main(): if not os.path.isdir(OUTPUTDIR): try: os.mkdir(OUTPUTDIR) except OSError as error: print "unable to create %s: %s" % (OUTPUTDIR, error) if len(sys.argv) < 2: print colored("Usage: %s $YOUR_COUNTRY_NAME <lp>" % sys.argv[0], "red", 'on_white') print "" print " 'lp' as 3rd argument is needed if you want use your own /usr/bin/phantomjs" print " (if you follow README.md, this is not needed because you've phantomjs 1.9.2)" print " ",colored("By default, this software is looking for symlink 'phantom-1.9.2'", "green", "on_white") if os.path.islink('phantom-1.9.2'): print " ",colored("Link that I've checked: you have ;)", "green", "on_white") else: print " ",colored("Link that I've checked: YOU HAVE NOT!", "red", "on_white") quit(-1) # check if the user is running phantom as installed on the system (also vagrant make this) # of if is using if len(sys.argv) == 3 and sys.argv[2] == 'lp': local_phantomjs = True print colored("You're using your local installed phantomjs. It is needed a version >= than 1.9.0", 'blue', 'on_white') print colored("I'm not gonna to compare the string, so, be aware: this is your version:", 'red') phantom_version = Popen(['phantomjs', '-v'], stdout=PIPE).stdout.readline() print colored(phantom_version, 'blue', 'on_white') else: if not os.path.islink('phantom-1.9.2'): print colored("You have not followd README.md :( I was expecting a symbolick link called phantom-1.9.2", 'red', 'on_white') quit(-1) local_phantomjs = False # country check proposed_country = sys.argv[1] country_f = os.path.join('verified_media', proposed_country.lower()) if not os.path.isfile(country_f): print colored("Invalid country! not found %s in directory 'verified_media/' " % proposed_country, 'red') print "Available countries are:" for existing_c in os.listdir('verified_media'): if existing_c in ['README.md', 'test']: continue print "\t", existing_c print colored("You can propose your own country media list following these instructions:", 'blue', 'on_yellow') print colored("https://github.com/vecna/helpagainsttrack/blob/master/unverified_media_list/README.md", 'blue', 'on_yellow') quit(-1) # writing in a file which country you're using! with file(os.path.join(OUTPUTDIR, 'country'), 'w+') as f: f.write(proposed_country.lower()) # reading media list, cleaning media list and copy media list cfp = file(country_f, 'r') unclean_lines = cfp.readlines() with file(os.path.join(OUTPUTDIR, 'used_media_list'), 'w+') as f: f.writelines(unclean_lines) media_entries = media_file_cleanings(unclean_lines) cfp.close() # here start iteration over the media! for cleanurl, media_kind in media_entries.iteritems(): urldir = os.path.join(OUTPUTDIR, cleanurl) title_check = os.path.join(urldir, '__title') if os.path.isdir(urldir) and os.path.isfile(title_check): print "-", urldir, "already present: skipped" continue if os.path.isdir(urldir): # being here means that is empty or incomplete shutil.rmtree(urldir) print "+ Creating directory", urldir os.mkdir(urldir) do_phantomjs(local_phantomjs, cleanurl, urldir, media_kind) # take every directory in 'output/' and works on the content included_url_dict = sortify(OUTPUTDIR) assert included_url_dict, "No url included after phantom scraping and collection !?" with file(os.path.join(OUTPUTDIR, 'domain.infos'), 'w+') as f: json.dump(included_url_dict, f) # traceroutes contains all the output of traceroute in JSON format, separated # for logs. this output is not in the media directory, because some host like # google are included multiple times. trace_output = os.path.join(OUTPUTDIR, '_traceroutes') if not os.path.isdir(trace_output): os.mkdir(trace_output) # _verbotracelogs instead contain the detailed log of traceroute, # they would be useful in the future because AS number is not yet used # as information in the backend, but, who knows... verbotracelogs = os.path.join(OUTPUTDIR, '_verbotracelogs') if not os.path.isdir(verbotracelogs): os.mkdir(verbotracelogs) print "Running traceroute to", len(included_url_dict.keys()), "hosts!" counter = 1 failure = 0 for url, domain_info in included_url_dict.iteritems(): progress_string = "%d/%d" % (counter, len(included_url_dict.keys())) print colored("%s%s" % (progress_string, (10 - len(progress_string)) * " " ), "cyan" ), if not do_trace(url, url): failure += 1 counter += 1 if failure: print colored("Registered %d failures" % failure, "red") # putting the unique number into with file( os.path.join(OUTPUTDIR, "unique_id"), "w+") as f: f.write("%d%d%d" % (random.randint(0, 0xffff), random.randint(0, 0xffff), random.randint(0, 0xffff)) ) output_name = 'results-%s.tar.gz' % proposed_country.lower() print colored("Finished! compressing the data in %s" % output_name, "green") if os.path.isfile(output_name): os.unlink(output_name) tar = Popen(['tar', '-z', '-c', '-v', '-f', output_name, OUTPUTDIR], stdout=PIPE) counter_line = 0 while True: line = tar.stdout.readline() counter_line += 1 if not line: break print colored("%d file added to %s, Starting 'result_sender.py'" % (counter_line, output_name), "green") # result sender has hardcoded our hidden service p = Popen(['torify', 'python', './sender_results.py', output_name], stdout=PIPE, stderr=PIPE) while True: line = p.stdout.readline() exx = p.stderr.readline() if not line and not exx: break if exx.find('failed to find the symbol') != -1: continue if exx.find('libtorsocks') != -1: continue if line: print colored(" %s" % line, 'yellow'), if exx: print colored(exx, 'red'),
def main(): parser = OptionParser() parser.add_option("-c", "--country-name", type="string", help="the country from which you want run the test", dest="medialist") parser.add_option("-o", "--output-dir", type="string", default=None, help="directory to store results", dest="user_outputdir") parser.add_option("-l", "--local-phantom", action="store_true", help="use local phantomjs instead of the downloaded one", dest="lp") parser.add_option("-d", "--disable-sending", action="store_true", help="disable the result sending at the end of the test", dest="disable_send") parser.add_option( "-i", "--instable-internet", action="store_true", help="If your internet is instable, please enable this option", dest="shitty_internet") parser.add_option( "-s", "--send", type="string", dest="targz_output", help="do not perform test, submit a previously collected result.") parser.add_option("-v", "--version", action="store_true", dest="version", help="print version, spoiler: %d" % ANALYSIS_VERSION) (args, _) = parser.parse_args() if args.version: print "analysis format version:", ANALYSIS_VERSION quit(0) if args.targz_output: if args.disable_send: print colored( "You can't use -s (--send) and -d (--disable-sending) options together" ) quit(-1) if not os.path.isfile(args.targz_output): print colored("Invalid file: %s" % args.targz_output) quit(-1) print colored(" ࿓ Sending previous results...", 'blue', 'on_white', attrs=['underline']) send_results(args.targz_output, hiddenservice_tuple) quit(0) if not args.medialist: print colored("Usage: %s -c $YOUR_COUNTRY_NAME" % sys.argv[0], "red", 'on_white') print colored("\t-l (local phantom, instead of the symlink here)", "red", 'on_white') print colored("\t-o output directory, used to collect test results", "red", 'on_white') print "" print " -l option is needed if you want use your own /usr/bin/phantomjs" print " (if you follow README.md, this is not needed because you downloaded phantomjs 1.9.2)" print " ", colored( "By default, this software is looking for symlink 'phantom-1.9.2'", "green", "on_white") if os.path.islink('phantom-1.9.2'): print " ", colored("phantom-1.9.2 is a link, as expected.", "green", "on_white") else: print " ", colored("The phantom-1.9.2 link is missing!", "red", "on_white") print "Look in the verified_media/ for a list of countries." print "TrackMap collection tool version: %d" % ANALYSIS_VERSION quit(-1) # check if the user is running phantom as installed on the system (also vagrant make this) # of if is using if args.lp: local_phantomjs = True print colored( "You're using your local installed phantomjs. A version >= than 1.9.0 is needed.", 'blue', 'on_white') print colored( "I'm not going to compare the string. Be aware: this is your version:", 'red') phantom_version = Popen(['phantomjs', '-v'], stdout=PIPE).stdout.readline() print colored(phantom_version, 'blue', 'on_white') else: if not os.path.islink('phantom-1.9.2'): print colored( "Missing phantom-1.9.2. A symbolic link named phantom-1.9.2 was expected, but not found. Please consult README.md and make sure you've followed the installation procedure exactly.", 'red', 'on_white') quit(-1) local_phantomjs = False if not args.disable_send: tor_test = ("127.0.0.1", 9050) c = socket.socket() try: c.connect(tor_test) c.close() except Exception as xxx: print colored( "Unable to connect to %s, Tor is needed to send results" % str(tor_test), "red") print colored(xxx, "red") print colored("You can disable result sending with the option -d", "yellow") quit(-1) del c # country check proposed_country = args.medialist country_f = os.path.join('verified_media', proposed_country.lower()) if not os.path.isfile(country_f): print colored( "Invalid country! not found %s in directory 'verified_media/' " % proposed_country, 'red') print "Available countries are:" for existing_c in os.listdir('verified_media'): if existing_c in ['README.md', 'test']: continue print "\t", existing_c print colored( "You can propose your own country media list following these instructions:", 'blue', 'on_white') print colored( "https://github.com/vecna/helpagainsttrack/blob/master/unverified_media_list/README.md", 'blue', 'on_white') quit(-1) # check if the output directory is not the default and/or if need to be created if args.user_outputdir: OUTPUTDIR = args.user_outputdir else: OUTPUTDIR = 'output/' if not os.path.isdir(OUTPUTDIR): try: os.mkdir(OUTPUTDIR) except OSError as error: print "unable to create %s: %s" % (OUTPUTDIR, error) # ask free information to the script runner info_f = os.path.join(OUTPUTDIR, 'information') if os.path.isfile(info_f): f = open(info_f, 'r') information = json.load(f) f.close() print colored("Recovered information of previous collection:", 'green') print " name:", information['name'] print " contact:", information['contact'] print " ISP:", information['ISP'] print " city:", information['city'] else: information = {} print colored( "Optionally, provide the information requested below, or press Enter to skip:", 'green') def question(description): print colored(description, 'white', 'on_blue') answer = sys.stdin.readline() answer = answer.strip('\n') return None if not len(answer) else answer information['name'] = question('Your name:') information['contact'] = question('Mail or jabber contact:') information['ISP'] = question('Which ISP is providing your link:') information['city'] = question( 'From which city you\'re running this script:') information['version'] = ANALYSIS_VERSION with file(info_f, 'w+') as f: json.dump(information, f) # writing in a file which country you're using! with file(os.path.join(OUTPUTDIR, 'country'), 'w+') as f: f.write(proposed_country.lower()) # reading media list, cleaning media list and copy media list cfp = file(country_f, 'r') unclean_lines = cfp.readlines() with file(os.path.join(OUTPUTDIR, 'used_media_list'), 'w+') as f: f.writelines(unclean_lines) # reconding an unique number is always useful, also if I've not yet in mind an usage right now. with file(os.path.join(OUTPUTDIR, "unique_id"), "w+") as f: f.write("%d%d%d" % (random.randint(0, 0xffff), random.randint( 0, 0xffff), random.randint(0, 0xffff))) print colored(" ࿓ Importing media list:", 'blue', 'on_white', attrs=['underline']) media_entries = media_file_cleanings(unclean_lines) cfp.close() print colored(" ࿓ Checking your network source.", 'blue', 'on_white', attrs=['underline']) do_wget(os.path.join(OUTPUTDIR, 'first.json')) print colored(" ࿓ Starting media crawling:", 'blue', 'on_white', attrs=['underline']) # here start iteration over the media! phantom_stats = {} for cleanurl, media_kind in media_entries.iteritems(): urldir = os.path.join(OUTPUTDIR, cleanurl) title_check = os.path.join(urldir, '__title') if os.path.isdir(urldir) and os.path.isfile(title_check): print "-", urldir, "already present: skipped" phantom_stats.setdefault('resumed', []).append(cleanurl) continue if os.path.isdir(urldir): # being here means that is empty or incomplete shutil.rmtree(urldir) print "+ Creating directory", urldir os.mkdir(urldir) retinfo = do_phantomjs(local_phantomjs, cleanurl, urldir, media_kind, OUTPUTDIR) assert retinfo in ['first', 'second', 'failures'] phantom_stats.setdefault(retinfo, []).append(cleanurl) # take every directory in 'output/', get the included URL and dump in a dict included_url_dict = sortify(OUTPUTDIR) assert included_url_dict, "No url included after phantom scraping and collection !?" with file(os.path.join(OUTPUTDIR, 'domain.infos'), 'w+') as f: json.dump(included_url_dict, f) # generate DNS resolution map. for every host resolve an IP, for every IP resolve again DNS print colored(" ࿓ DNS resolution and reverse of %d domains..." % len(included_url_dict), 'blue', 'on_white', attrs=['underline']) # new format contain: # first dict: resolution error # second dict: reverse error dns_error = [{}, {}] # now, until there is not refactor based on classes, # the resolution of the previously failed DN will not happen resolution_dns_f = os.path.join(OUTPUTDIR, 'resolution.dns') if os.path.isfile(resolution_dns_f): fp = file(resolution_dns_f, 'r') ip_map = json.load(fp) fp.close() else: ip_map = {} counter = 0 percentage_bound = len(included_url_dict.keys()) / 10.0 if not int(percentage_bound): percentage_bound = 1.0 for domain in included_url_dict.keys(): counter += 1 if not counter % int(percentage_bound): print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound)), time.ctime()) # other random possibility based on birthday paradox to show counters... if random.randint(0, int(percentage_bound * 10)) == counter: print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound)), time.ctime()) try: if args.shitty_internet: socket.setdefaulttimeout(1.1) else: socket.setdefaulttimeout(0.5) resolved_v4 = socket.gethostbyname(domain) except Exception as xxx: dns_error[0].setdefault(xxx.strerror, []).append(domain) continue ip_map.setdefault(resolved_v4, []).append(domain) with file(resolution_dns_f, 'w+') as f: json.dump(ip_map, f) print colored( "\nResolved %d unique IPv4 from %d unique domain" % (len(ip_map.keys()), len(included_url_dict.keys())), 'green') if not len(ip_map.keys()): print colored( "It appears that you can't access the internet. Please fix that and restart the test.", 'red') quit(-1) print colored( "\nReversing DNS for %d unique IP address..." % len(ip_map.keys()), 'green') reverse_dns_f = os.path.join(OUTPUTDIR, 'reverse.dns') if os.path.isfile(reverse_dns_f): fp = file(reverse_dns_f, 'r') true_domain_map = json.load(fp) fp.close() else: true_domain_map = {} counter = 0 percentage_bound = len(ip_map.keys()) / 10.0 if not int(percentage_bound): percentage_bound = 1.0 for ipv4 in ip_map.keys(): counter += 1 if not (counter % int(percentage_bound)): print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound)), time.ctime()) # other random possibility based on birthday paradox to show counters... if random.randint(0, int(percentage_bound * 10)) == counter: print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound)), time.ctime()) try: if args.shitty_internet: socket.setdefaulttimeout(1.9) else: socket.setdefaulttimeout(0.9) resolved_set = socket.gethostbyaddr(ipv4) resolved_name = resolved_set[0] except Exception as xxx: dns_error[1].setdefault(xxx.strerror, []).append(ipv4) continue true_domain_map.setdefault(resolved_name, []).append(ipv4) with file(reverse_dns_f, 'w+') as f: json.dump(true_domain_map, f) print colored("\nReversed %d unique FQDN" % len(true_domain_map.keys()), 'green') print colored("Saving DNS errors in 'errors.dns'") with file(os.path.join(OUTPUTDIR, 'errors.dns'), 'w+') as f: json.dump(dns_error, f) # traceroutes contains all the output of traceroute in JSON format, separated # for logs. this output is not in the media directory, because some host like # google are included multiple times. trace_output = os.path.join(OUTPUTDIR, '_traceroutes') if not os.path.isdir(trace_output): os.mkdir(trace_output) # _verbotracelogs instead contain the detailed log of traceroute, # they would be useful in the future because AS number is not yet used # as information in the backend, but, who knows... verbotracelogs = os.path.join(OUTPUTDIR, '_verbotracelogs') if not os.path.isdir(verbotracelogs): os.mkdir(verbotracelogs) # saving again information about network location do_wget(os.path.join(OUTPUTDIR, 'second.json')) # starting traceroute to all the collected IP print colored(" ࿓ Running traceroute to %d IP address (from %d hosts)" % (len(ip_map.keys()), len(included_url_dict.keys())), 'blue', 'on_white', attrs=['underline']) counter = 1 trace_stats = {} gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE) for ip_addr, hostlist in ip_map.iteritems(): progress_string = "%d/%d" % (counter, len(ip_map.keys())) print colored( "%s%s" % (progress_string, (10 - len(progress_string)) * " "), "cyan"), t = Traceroute(OUTPUTDIR, ip_addr, hostlist, gi, args.shitty_internet) counter += 1 if t.already_traced(): print colored( "%s already traced (%d hosts): skipping" % (ip_addr, len(hostlist)), "green") retinfo = "recover" elif not t.do_trace(): retinfo = "fail" print colored( "Traceroute fails! (%d/10)" % TraceStats.three_hundres, "red") else: retinfo = "success" try: t.resolve_target_geoip() t.file_dump() except Exception: retinfo = "anomaly" del t assert retinfo in ['recover', 'success', 'anomaly', 'fail'] trace_stats.setdefault(retinfo, []).append(ip_addr) # Traceroute class need to be enhanced with some kind of: # * failure measurement and GUESSING WHY # * retry after a while # * estimation of shared path - optimization and stabler collection if trace_stats.has_key('fail') and len(trace_stats['fail']): print colored( " ࿓ Testing again the failed traceroute to %d IP address" % len(trace_stats['fail'])) else: # just here to skip a KeyError below trace_stats.update({'fail': []}) counter = 1 fail_list_copy = list(trace_stats['fail']) # a list is done because inside of the loop is changed the # content of trace_stats['fail'] for case_n, failed_trace in enumerate(fail_list_copy): hostlist = ip_map[failed_trace] t = Traceroute(OUTPUTDIR, failed_trace, hostlist, gi, args.shitty_internet) counter += 1 if not t.do_trace(): print colored("Failure again.", "red") retinfo = "fail" else: retinfo = "retry" trace_stats['fail'].remove(failed_trace) try: t.resolve_target_geoip() t.file_dump() except Exception: retinfo = "anomaly" del t assert retinfo in ['recover', 'success', 'anomaly', 'fail', 'retry'] trace_stats.setdefault(retinfo, []).append(failed_trace) TraceStats([]).dump_stats(OUTPUTDIR) if trace_stats.values().count(False): print colored( "Registered %d failures" % trace_stats.values().count(False), "red") ptsj = os.path.join(OUTPUTDIR, '_phantom.trace.stats.json') if os.path.isfile(ptsj): os.unlink(ptsj) with file(ptsj, 'w+') as fp: json.dump([phantom_stats, trace_stats], fp) # saving again*again information about network location do_wget(os.path.join(OUTPUTDIR, 'third.json')) output_name = 'results-%s.tar.gz' % proposed_country.lower() print colored(" ࿓ Analysis done! compressing the output in %s" % output_name, "blue", 'on_white', attrs=['underline']) if os.path.isfile(output_name): os.unlink(output_name) tar = Popen(['tar', '-z', '-c', '-v', '-f', output_name, OUTPUTDIR], stdout=PIPE) counter_line = 0 while True: line = tar.stdout.readline() counter_line += 1 if not line: break if args.disable_send: print colored("%d file added to %s" % (counter_line, output_name), "green") print colored("Sending disable, test complete.", "yellow"), print colored("亷 亸", 'blue', 'on_white') quit(0) print colored( "%d file added to %s, Starting to submit results via Tor network\n" % (counter_line, output_name), "green") print colored("If submitting results fails please run:", "red") print colored("./perform_analysis.py -s %s" % output_name, "yellow") send_results(output_name, hiddenservice_tuple)
def main(): if not os.path.isdir(OUTPUTDIR): try: os.mkdir(OUTPUTDIR) except OSError as error: print "unable to create %s: %s" % (OUTPUTDIR, error) if len(sys.argv) < 2: print colored("Usage: %s $YOUR_COUNTRY_NAME <lp>" % sys.argv[0], "red", 'on_white') print "" print " 'lp' as 3rd argument is needed if you want use your own /usr/bin/phantomjs" print " (if you follow README.md, this is not needed because you downloaded phantomjs 1.9.2)" print " ",colored("By default, this software is looking for symlink 'phantom-1.9.2'", "green", "on_white") if os.path.islink('phantom-1.9.2'): print " ",colored("phantom-1.9.2 is a link, as expected.", "green", "on_white") else: print " ",colored("The phantom-1.9.2 link is missing!", "red", "on_white") print "Look in the verified_media/ for a list of countries." quit(-1) # check if the user is running phantom as installed on the system (also vagrant make this) # of if is using if len(sys.argv) == 3 and sys.argv[2] == 'lp': local_phantomjs = True print colored("You're using your local installed phantomjs. A version >= than 1.9.0 is needed.", 'blue', 'on_white') print colored("I'm not going to compare the string. Be aware: this is your version:", 'red') phantom_version = Popen(['phantomjs', '-v'], stdout=PIPE).stdout.readline() print colored(phantom_version, 'blue', 'on_white') else: if not os.path.islink('phantom-1.9.2'): print colored("Missing phantom-1.9.2. A symbolic link named phantom-1.9.2 was expected, but not found. Please consult README.md and make sure you've followed the installation procedure exactly.", 'red', 'on_white') quit(-1) local_phantomjs = False # country check proposed_country = sys.argv[1] country_f = os.path.join('verified_media', proposed_country.lower()) if not os.path.isfile(country_f): print colored("Invalid country! not found %s in directory 'verified_media/' " % proposed_country, 'red') print "Available countries are:" for existing_c in os.listdir('verified_media'): if existing_c in ['README.md', 'test']: continue print "\t", existing_c print colored("You can propose your own country media list following these instructions:", 'blue', 'on_white') print colored("https://github.com/vecna/helpagainsttrack/blob/master/unverified_media_list/README.md", 'blue', 'on_white') quit(-1) # ask free information to the script runner info_f = os.path.join(OUTPUTDIR, 'information') if os.path.isfile(info_f): f = open(info_f, 'r') information = json.load(f) f.close() print colored("Recovered information of previous collection:", 'green') print " name:", information['name'] print " contact:", information['contact'] print " ISP:", information['ISP'] print " city:", information['city'] else: information = {} print colored("Optionally, provide the information requested below, or press Enter to skip:", 'green') def question(description): print colored(description, 'white', 'on_blue') answer = sys.stdin.readline() answer = answer.strip('\n') return None if not len(answer) else answer information['name'] = question('Your name:') information['contact'] = question('Mail or jabber contact:') information['ISP'] = question('Which ISP is providing your link:') information['city'] = question('From which city you\'re running this script:') information['version'] = ANALYSIS_VERSION with file(info_f, 'w+') as f: json.dump(information, f) # writing in a file which country you're using! with file(os.path.join(OUTPUTDIR, 'country'), 'w+') as f: f.write(proposed_country.lower()) # reading media list, cleaning media list and copy media list cfp = file(country_f, 'r') unclean_lines = cfp.readlines() with file(os.path.join(OUTPUTDIR, 'used_media_list'), 'w+') as f: f.writelines(unclean_lines) # reconding an unique number is always useful, also if I've not yet in mind an usage right now. with file( os.path.join(OUTPUTDIR, "unique_id"), "w+") as f: f.write("%d%d%d" % (random.randint(0, 0xffff), random.randint(0, 0xffff), random.randint(0, 0xffff)) ) print colored(" ࿓ Importing media list:", 'blue', 'on_white', attrs=['underline']) media_entries = media_file_cleanings(unclean_lines) cfp.close() print colored(" ࿓ Checking your network source.", 'blue', 'on_white', attrs=['underline']) do_wget('first.json') print colored(" ࿓ Starting media crawling:", 'blue', 'on_white', attrs=['underline']) # here start iteration over the media! phantom_stats = {} for cleanurl, media_kind in media_entries.iteritems(): urldir = os.path.join(OUTPUTDIR, cleanurl) title_check = os.path.join(urldir, '__title') if os.path.isdir(urldir) and os.path.isfile(title_check): print "-", urldir, "already present: skipped" phantom_stats.setdefault('resumed', []).append(cleanurl) continue if os.path.isdir(urldir): # being here means that is empty or incomplete shutil.rmtree(urldir) print "+ Creating directory", urldir os.mkdir(urldir) retinfo = do_phantomjs(local_phantomjs, cleanurl, urldir, media_kind) phantom_stats.setdefault(retinfo, []).append(cleanurl) # take every directory in 'output/', get the included URL and dump in a dict included_url_dict = sortify(OUTPUTDIR) assert included_url_dict, "No url included after phantom scraping and collection !?" with file(os.path.join(OUTPUTDIR, 'domain.infos'), 'w+') as f: json.dump(included_url_dict, f) # generate DNS resolution map. for every host resolve an IP, for every IP resolve again DNS print colored(" ࿓ DNS resolution and reverse of %d domains" % len(included_url_dict), 'blue', 'on_white', attrs=['underline']) # when a "+" is printed, mean that a new IP/reverse has been added, # when a "*" is printed, mean that an older IP/reverse has a new associate # when a "-" is printed, has been an error! dns_error = [] resolution_dns_f = os.path.join(OUTPUTDIR, 'resolution.dns') if os.path.isfile(resolution_dns_f): fp = file(resolution_dns_f, 'r') ip_map = json.load(fp) fp.close() else: ip_map = {} counter = 0 percentage_bound = len(included_url_dict.keys()) / 10.0 if not int(percentage_bound): percentage_bound = 1.0 for domain in included_url_dict.keys(): counter += 1 if not counter % int(percentage_bound): print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound) ), time.ctime()) try: socket.setdefaulttimeout(0.5) resolved_v4 = socket.gethostbyname(domain) except Exception as xxx: dns_error.append([domain, xxx.strerror]) continue ip_map.setdefault(resolved_v4, []).append(domain) with file(resolution_dns_f, 'w+') as f: json.dump(ip_map, f) print colored("\nResolved %d unique IPv4 from %d unique domain" % (len(ip_map.keys()), len(included_url_dict.keys()) ), 'green') if len(dns_error) == len(included_url_dict.keys()): print colored("It appears that you can't access the internet. Please fix that and restart the test.", 'red') quit(-1) reverse_dns_f = os.path.join(OUTPUTDIR, 'reverse.dns') if os.path.isfile(reverse_dns_f): fp = file(reverse_dns_f, 'r') true_domain_map = json.load(fp) fp.close() else: true_domain_map = {} counter = 0 percentage_bound = len(ip_map.keys()) / 10.0 if not int(percentage_bound): percentage_bound = 1.0 for ipv4 in ip_map.keys(): counter += 1 if not (counter % int(percentage_bound) ): print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound) ), time.ctime()) try: socket.setdefaulttimeout(0.9) resolved_set = socket.gethostbyaddr(ipv4) resolved_name = resolved_set[0] except Exception as xxx: dns_error.append([ipv4, xxx.strerror]) continue true_domain_map.setdefault(resolved_name, []).append(ipv4) with file(reverse_dns_f, 'w+') as f: json.dump(true_domain_map, f) print colored("\nReversed %d unique FQDN" % len(true_domain_map.keys() ), 'green') if len(dns_error): print colored("Saving %d errors in 'errors.dns'" % len(dns_error)) with file(os.path.join(OUTPUTDIR, 'errors.dns'), 'w+') as f: json.dump(dns_error, f) # traceroutes contains all the output of traceroute in JSON format, separated # for logs. this output is not in the media directory, because some host like # google are included multiple times. trace_output = os.path.join(OUTPUTDIR, '_traceroutes') if not os.path.isdir(trace_output): os.mkdir(trace_output) # _verbotracelogs instead contain the detailed log of traceroute, # they would be useful in the future because AS number is not yet used # as information in the backend, but, who knows... verbotracelogs = os.path.join(OUTPUTDIR, '_verbotracelogs') if not os.path.isdir(verbotracelogs): os.mkdir(verbotracelogs) # saving again information about network location do_wget('second.json') # starting traceroute to all the collected IP print colored(" ࿓ Running traceroute to %d IP address (from %d hosts)" % ( len(ip_map.keys()), len(included_url_dict.keys())), 'blue', 'on_white', attrs=['underline']) counter = 1 trace_stats = {} for ip_addr, hostlist in ip_map.iteritems(): progress_string = "%d/%d" % (counter, len(ip_map.keys())) print colored("%s%s" % (progress_string, (10 - len(progress_string)) * " " ), "cyan" ), if not do_trace(hostlist, ip_addr): trace_stats.update({ip_addr : False }) else: trace_stats.update({ip_addr : True }) counter += 1 # TraceStats([]).dump_stats() if trace_stats.values().count(False): print colored("Registered %d failures" % trace_stats.values().count(False), "red") ptsj = os.path.join(OUTPUTDIR, '_phantom.trace.stats.json') if os.path.isfile(ptsj): os.unlink(ptsj) with file(ptsj, 'w+') as fp: json.dump([ phantom_stats, trace_stats ], fp) # saving again*again information about network location do_wget('third.json') output_name = 'results-%s.tar.gz' % proposed_country.lower() print colored(" ࿓ Analysis done! compressing the output in %s" % output_name, "blue", 'on_white', attrs=['underline']) if os.path.isfile(output_name): os.unlink(output_name) tar = Popen(['tar', '-z', '-c', '-v', '-f', output_name, OUTPUTDIR], stdout=PIPE) counter_line = 0 while True: line = tar.stdout.readline() counter_line += 1 if not line: break print colored("%d file added to %s, Starting 'result_sender.py'\n" % (counter_line, output_name), "green") print colored("If submitting results fails please type:", "red") print colored(" torify python ./sender_results.py %s" % output_name, "green") print colored("If this command also fails (and raise a python Exception), please report the error to trackmap at tacticaltech dot org :)", 'red') # result sender has hardcoded our hidden service p = Popen(['torify', 'python', './sender_results.py', output_name], stdout=PIPE, stderr=PIPE) while True: line = p.stdout.readline() exx = p.stderr.readline() if not line and not exx: break if exx.find('failed to find the symbol') != -1: continue if exx.find('libtorsocks') != -1: continue if line: print colored(" %s" % line, 'yellow') if exx: print colored(exx, 'red')
def main(): if not os.path.isdir(OUTPUTDIR): try: os.mkdir(OUTPUTDIR) except OSError as error: print "unable to create %s: %s" % (OUTPUTDIR, error) if len(sys.argv) < 2: print colored("Usage: %s $YOUR_COUNTRY_NAME <lp>" % sys.argv[0], "red", 'on_white') print "" print " 'lp' as 3rd argument is needed if you want use your own /usr/bin/phantomjs" print " (if you follow README.md, this is not needed because you've phantomjs 1.9.2)" print " ",colored("By default, this software is looking for symlink 'phantom-1.9.2'", "green", "on_white") if os.path.islink('phantom-1.9.2'): print " ",colored("Link that I've checked: you have ;)", "green", "on_white") else: print " ",colored("Link that I've checked: YOU HAVE NOT!", "red", "on_white") quit(-1) # check if the user is running phantom as installed on the system (also vagrant make this) # of if is using if len(sys.argv) == 3 and sys.argv[2] == 'lp': local_phantomjs = True print colored("You're using your local installed phantomjs. It is needed a version >= than 1.9.0", 'blue', 'on_white') print colored("I'm not gonna to compare the string, so, be aware: this is your version:", 'red') phantom_version = Popen(['phantomjs', '-v'], stdout=PIPE).stdout.readline() print colored(phantom_version, 'blue', 'on_white') else: if not os.path.islink('phantom-1.9.2'): print colored("You have not followd README.md :( I was expecting a symbolick link called phantom-1.9.2", 'red', 'on_white') quit(-1) local_phantomjs = False # country check proposed_country = sys.argv[1] country_f = os.path.join('verified_media', proposed_country.lower()) if not os.path.isfile(country_f): print colored("Invalid country! not found %s in directory 'verified_media/' " % proposed_country, 'red') print "Available countries are:" for existing_c in os.listdir('verified_media'): if existing_c in ['README.md', 'test']: continue print "\t", existing_c print colored("You can propose your own country media list following these instructions:", 'blue', 'on_white') print colored("https://github.com/vecna/helpagainsttrack/blob/master/unverified_media_list/README.md", 'blue', 'on_white') quit(-1) # ask free information to the script runner information = {} print colored("Optionally, provide the informations requested below, or press Enter to skip:", 'green') def question(description): print colored(description, 'white', 'on_blue') answer = sys.stdin.readline() answer = answer.strip('\n') return None if not len(answer) else answer information['name'] = question('Your name:') information['contact'] = question('Mail or jabber contact:') information['ISP'] = question('Which ISP is providing your link:') information['city'] = question('From which city you\'re running this script:') with file(os.path.join(OUTPUTDIR, 'information'), 'w+') as f: json.dump(information, f) # writing in a file which country you're using! with file(os.path.join(OUTPUTDIR, 'country'), 'w+') as f: f.write(proposed_country.lower()) # reading media list, cleaning media list and copy media list cfp = file(country_f, 'r') unclean_lines = cfp.readlines() with file(os.path.join(OUTPUTDIR, 'used_media_list'), 'w+') as f: f.writelines(unclean_lines) print colored(" ࿓ Importing media list:", 'blue', 'on_white', attrs=['underline']) media_entries = media_file_cleanings(unclean_lines) cfp.close() print colored(" ࿓ Starting media crawling:", 'blue', 'on_white', attrs=['underline']) # here start iteration over the media! for cleanurl, media_kind in media_entries.iteritems(): urldir = os.path.join(OUTPUTDIR, cleanurl) title_check = os.path.join(urldir, '__title') if os.path.isdir(urldir) and os.path.isfile(title_check): print "-", urldir, "already present: skipped" continue if os.path.isdir(urldir): # being here means that is empty or incomplete shutil.rmtree(urldir) print "+ Creating directory", urldir os.mkdir(urldir) do_phantomjs(local_phantomjs, cleanurl, urldir, media_kind) # take every directory in 'output/', get the included URL and dump in a dict included_url_dict = sortify(OUTPUTDIR) assert included_url_dict, "No url included after phantom scraping and collection !?" with file(os.path.join(OUTPUTDIR, 'domain.infos'), 'w+') as f: json.dump(included_url_dict, f) # TODO optimization # if os.path.isfile(os.path.join(OUTPUTDIR, 'resolution.dns')): # if os.path.isfile(os.path.join(OUTPUTDIR, 'reverse.dns')): print colored(" ࿓ DNS resolution and reverse of %d domains" % len(included_url_dict), 'blue', 'on_white', attrs=['underline']) # when a "+" is printed, mean that a new IP/reverse has been added, # when a "*" is printed, mean that an older IP/reverse has a new associate # when a "-" is printed, has been an error! dns_error = [] # generate DNS resolution map. for every host resolve an IP, for every IP resolve again DNS ip_map = {} counter = 0 percentage_bound = len(included_url_dict.keys()) / 10.0 for domain in included_url_dict.keys(): counter += 1 if not (counter % int(percentage_bound) ): print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound) ), time.ctime()) try: socket.setdefaulttimeout(0.5) resolved_v4 = socket.gethostbyname(domain) except Exception as xxx: dns_error.append([domain, xxx.strerror]) continue if ip_map.has_key(resolved_v4): ip_map[resolved_v4].append(domain) else: ip_map.update({resolved_v4 : [ domain ] }) print colored("\nResolved %d unique IPv4 from %d unique domain" % (len(ip_map.keys()), len(included_url_dict.keys()) ), 'green') with file(os.path.join(OUTPUTDIR, 'resolution.dns'), 'w+') as f: json.dump(ip_map, f) if len(dns_error) == len(included_url_dict.keys()): print colored("Very probably your network is broken, right ? restart the test when fixed.", 'red') quit(-1) true_domain_map = {} counter = 0 percentage_bound = len(ip_map.keys()) / 10.0 for ipv4 in ip_map.keys(): counter += 1 if not (counter % int(percentage_bound) ): print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound) ), time.ctime()) try: socket.setdefaulttimeout(0.9) resolved_set = socket.gethostbyaddr(ipv4) resolved_name = resolved_set[0] except Exception as xxx: dns_error.append([ipv4, xxx.strerror]) continue if true_domain_map.has_key(resolved_name): true_domain_map[resolved_name].append(ipv4) else: true_domain_map.update({resolved_name : [ ipv4 ] }) print colored("\nReversed %d unique FQDN" % len(true_domain_map.keys() ), 'green') with file(os.path.join(OUTPUTDIR, 'reverse.dns'), 'w+') as f: json.dump(true_domain_map, f) with file(os.path.join(OUTPUTDIR, 'errors.dns'), 'w+') as f: json.dump(dns_error, f) # traceroutes contains all the output of traceroute in JSON format, separated # for logs. this output is not in the media directory, because some host like # google are included multiple times. trace_output = os.path.join(OUTPUTDIR, '_traceroutes') if not os.path.isdir(trace_output): os.mkdir(trace_output) # _verbotracelogs instead contain the detailed log of traceroute, # they would be useful in the future because AS number is not yet used # as information in the backend, but, who knows... verbotracelogs = os.path.join(OUTPUTDIR, '_verbotracelogs') if not os.path.isdir(verbotracelogs): os.mkdir(verbotracelogs) print colored(" ࿓ Running traceroute to %d hosts" % len(included_url_dict.keys()), 'blue', 'on_white', attrs=['underline']) counter = 1 failure = 0 for url, domain_info in included_url_dict.iteritems(): progress_string = "%d/%d" % (counter, len(included_url_dict.keys())) print colored("%s%s" % (progress_string, (10 - len(progress_string)) * " " ), "cyan" ), if not do_trace(url, url): failure += 1 counter += 1 if failure: print colored("Registered %d failures" % failure, "red") # putting the unique number into with file( os.path.join(OUTPUTDIR, "unique_id"), "w+") as f: f.write("%d%d%d" % (random.randint(0, 0xffff), random.randint(0, 0xffff), random.randint(0, 0xffff)) ) output_name = 'results-%s.tar.gz' % proposed_country.lower() print colored(" ࿓ Analysis done! compressing the output in %s" % output_name, "blue", 'on_white', attrs=['underline']) if os.path.isfile(output_name): os.unlink(output_name) tar = Popen(['tar', '-z', '-c', '-v', '-f', output_name, OUTPUTDIR], stdout=PIPE) counter_line = 0 while True: line = tar.stdout.readline() counter_line += 1 if not line: break print colored("%d file added to %s, Starting 'result_sender.py'" % (counter_line, output_name), "green") # result sender has hardcoded our hidden service p = Popen(['torify', 'python', './sender_results.py', output_name], stdout=PIPE, stderr=PIPE) while True: line = p.stdout.readline() exx = p.stderr.readline() if not line and not exx: break if exx.find('failed to find the symbol') != -1: continue if exx.find('libtorsocks') != -1: continue if line: print colored(" %s" % line, 'yellow'), if exx: print colored(exx, 'red'),
print "Error unexpected command:", command quit(-1) # understand the third argument target = sys.argv[2] if os.path.isfile(target): print "Found file", target, "using as media list" # reading media list, cleaning media list and copy media list cfp = file(target, 'r') unclean_lines = cfp.readlines() print colored(" ࿓ Importing media list:", 'blue', 'on_white', attrs=['underline']) media_entries = media_file_cleanings(unclean_lines) cfp.close() else: print "Not found file", target, "assuming as single host" media_entries = {target: 'hand'} if not os.path.isdir('_hostseer'): os.mkdir('_hostseer') def check_section(kind): if kind and len(sys.argv) == 4: assert sys.argv[3] in PERMITTED_SECTIONS, PERMITTED_SECTIONS return kind != sys.argv[3] if command == 'phantom':
command = sys.argv[1] if command not in [ 'DNS', 'phantom', 'Geo' ]: print "Error unexpected command:", command quit(-1) # understand the third argument target = sys.argv[2] if os.path.isfile(target): print "Found file", target, "using as media list" # reading media list, cleaning media list and copy media list cfp = file(target, 'r') unclean_lines = cfp.readlines() print colored(" ࿓ Importing media list:", 'blue', 'on_white', attrs=['underline']) media_entries = media_file_cleanings(unclean_lines) cfp.close() else: print "Not found file", target, "assuming as single host" media_entries = { target : 'hand' } if not os.path.isdir('_hostseer'): os.mkdir('_hostseer') def check_section(kind): if kind and len(sys.argv) == 4: assert sys.argv[3] in PERMITTED_SECTIONS, PERMITTED_SECTIONS return kind != sys.argv[3]