def verify_media_country(the_user_input, special): # this function left the media file open forever. :( if special: special_f = os.path.join('special_media', the_user_input) if not os.path.isfile(special_f): print colored( "Invaild special URL source, check in special_media ", 'red') quit(-1) cfp = file(special_f, 'r') unclean_lines = cfp.readlines() print colored(" ࿓ Importing special media list:", 'blue', 'on_white', attrs=['underline']) media_entries = media_file_cleanings(unclean_lines, permit_flexible_category=True) cfp.close() return special_f, media_entries # if not special, is media list country_f = os.path.join('verified_media', the_user_input.lower()) if not os.path.isfile(country_f): print colored( "Invalid country! not found %s in directory 'verified_media/' " % proposed_country, 'red') print "Available countries are:" for existing_c in os.listdir('verified_media'): if existing_c in ['README.md', 'test']: continue print "\t", existing_c print colored( "You can propose your own country media list following these instructions:", 'blue', 'on_white') print colored( "https://github.com/vecna/trackmap/blob/master/unverified_media_list/README.md", 'blue', 'on_white') quit(-1) cfp = file(country_f, 'r') # reading media list, cleaning media list and copy media list unclean_lines = cfp.readlines() print colored(" ࿓ Importing media list from %s:" % the_user_input.lower(), 'blue', 'on_white', attrs=['underline']) media_entries = media_file_cleanings(unclean_lines) cfp.close() return country_f, media_entries
def verify_media_country(the_user_input, special): if special: special_f = os.path.join("special_media", the_user_input) if not os.path.isfile(special_f): print colored("Invaild special URL source, check in special_media ", "red") quit(-1) cfp = file(special_f, "r") unclean_lines = cfp.readlines() print colored(" ࿓ Importing special media list:", "blue", "on_white", attrs=["underline"]) media_entries = media_file_cleanings(unclean_lines, permit_flexible_category=True) cfp.close() return special_f, media_entries # if not special, is media list country_name = the_user_input.lower() country_f = os.path.join("verified_media", country_name) if not os.path.isfile(country_f): print colored("Invalid country! not found %s in directory 'verified_media/' " % country_name, "red") print "Available countries are:" for existing_c in os.listdir("verified_media"): if existing_c in ["README.md", "test"]: continue print "\t", existing_c print colored("You can propose your own country media list following these instructions:", "blue", "on_white") print colored( "https://github.com/vecna/trackmap/blob/master/unverified_media_list/README.md", "blue", "on_white" ) quit(-1) cfp = file(country_f, "r") # reading media list, cleaning media list and copy media list unclean_lines = cfp.readlines() print colored(" ࿓ Importing media list from %s:" % the_user_input.lower(), "blue", "on_white", attrs=["underline"]) media_entries = media_file_cleanings(unclean_lines) cfp.close() return country_f, media_entries
def verify_media_country(the_user_input, special): # this function left the media file open forever. :( if special: special_f = os.path.join('special_media', the_user_input) if not os.path.isfile(special_f): print colored("Invaild special URL source, check in special_media ", 'red') quit(-1) cfp = file(special_f, 'r') unclean_lines = cfp.readlines() print colored(" ࿓ Importing special media list:", 'blue', 'on_white', attrs=['underline']) media_entries = media_file_cleanings(unclean_lines, permit_flexible_category=True) cfp.close() return special_f, media_entries # if not special, is media list country_f = os.path.join('verified_media', the_user_input.lower()) if not os.path.isfile(country_f): print colored("Invalid country! not found %s in directory 'verified_media/' " % proposed_country, 'red') print "Available countries are:" for existing_c in os.listdir('verified_media'): if existing_c in ['README.md', 'test']: continue print "\t", existing_c print colored("You can propose your own country media list following these instructions:", 'blue', 'on_white') print colored("https://github.com/vecna/trackmap/blob/master/unverified_media_list/README.md", 'blue', 'on_white') quit(-1) cfp = file(country_f, 'r') # reading media list, cleaning media list and copy media list unclean_lines = cfp.readlines() print colored(" ࿓ Importing media list from %s:" % the_user_input.lower(), 'blue', 'on_white', attrs=['underline']) media_entries = media_file_cleanings(unclean_lines) cfp.close() return country_f, media_entries
def main(): parser = OptionParser() parser.add_option("-c", "--country-name", type="string", help="the country from which you want run the test", dest="medialist") parser.add_option("-o", "--output-dir", type="string", default=None, help="directory to store results", dest="user_outputdir") parser.add_option("-l", "--local-phantom", action="store_true", help="use local phantomjs instead of the downloaded one", dest="lp") parser.add_option("-d", "--disable-sending", action="store_true", help="disable the result sending at the end of the test", dest="disable_send") parser.add_option("-i", "--instable-internet", action="store_true", help="If your internet is instable, please enable this option", dest="shitty_internet") parser.add_option("-s", "--send", type="string", dest="targz_output", help="do not perform test, submit a previously collected result.") parser.add_option("-t", "--twitter-handle", type="string", dest="twit", help="put your twitter handler, you'll be mentioned when test is imported.") parser.add_option("-v", "--version", action="store_true", dest="version", help="print version, spoiler: %d" % ANALYSIS_VERSION) parser.add_option("-T", "--Tor", action="store_true", dest="hiddensubmit", help="submit via hidden service (require Tor running)") parser.add_option("-k", "--keep", action="store_true", dest="keep", help="don't remove the results-[country].tar.gz ") (args, _) = parser.parse_args() if args.version: print "analysis format version:", ANALYSIS_VERSION quit(0) if args.targz_output: if args.disable_send: print colored("You can't use -s (--send) and -d (--disable-sending) options together") quit(-1) if not os.path.isfile(args.targz_output): print colored("Invalid file: %s" % args.targz_output) quit(-1) print colored(" ࿓ Sending previous results...", 'blue', 'on_white', attrs=['underline']) if args.hiddensubmit: quit(send_results(args.targz_output, hiddenservice_tuple, tor_proxy=True)) else: quit(send_results(args.targz_output, server_tuple, tor_proxy=False)) try: local_phantom_v = get_local_phantom_v() except Exception as xxx: print xxx local_phantom_v = None if not args.medialist: print colored("Usage: %s -c $YOUR_COUNTRY_NAME" % sys.argv[0], "red", 'on_white') print parser.format_help() if os.path.islink('phantom-1.9.8'): print colored("found phantom-1.9.8 as link, good.", "green", "on_white") elif not local_phantom_v: print colored("phantomjs missing as link and missing in the system!", "red", "on_white") print colored("Please refer to the RADME or asks support to us", 'red', 'on_white') print colored("The script can't work in this status!", red) else: print colored("You have to use the option -l, and your installation is quite uncommon", red) print print "Look in the verified_media/ for a list of countries." print "TrackMap collection tool version: %d" % ANALYSIS_VERSION quit(-1) # check if the user is running phantom as installed on the system (also vagrant make this) # of if is using if args.lp and local_phantom_v: print colored("You're using your local installed phantomjs. A version >= than 1.9.0 is needed.", 'blue', 'on_white') print colored("I'm not going to compare the string. Be aware: this is your version:", 'red') print colored(local_phantom_v, 'blue', 'on_white') print "If is wrong, just press ^c and use the proper README instruction, or asks support to us" elif args.lp: print colored("phantomjs missing as link and missing in the system!", "red", "on_white") print colored("Please refer to the README or asks support to us", 'red', 'on_white') print colored("The script can't work in this status!", red) quit(-1) elif not os.path.islink('phantom-1.9.8'): print colored("Missing phantom-1.9.8. A symbolic link named phantom-1.9.8 was expected, but not found. Please consult README.md and make sure you've followed the installation procedure exactly.", 'red', 'on_white') quit(-1) if args.hiddensubmit: try: import socks except ImportError: print "You are missing 'PySocks' module, needed to proxy over Tor" tor_test = ("127.0.0.1", 9050) c = socket.socket() try: c.connect( tor_test ) c.close() except Exception as xxx: print colored("Unable to connect to %s, Tor is needed to send results" % str(tor_test), "red") print colored(xxx, "red") print colored("You can disable result sending with the option -d", "yellow") quit(-1) del c # country check proposed_country = args.medialist country_f = os.path.join('verified_media', proposed_country.lower()) if not os.path.isfile(country_f): print colored("Invalid country! not found %s in directory 'verified_media/' " % proposed_country, 'red') print "Available countries are:" for existing_c in os.listdir('verified_media'): if existing_c in ['README.md', 'test']: continue print "\t", existing_c print colored("You can propose your own country media list following these instructions:", 'blue', 'on_white') print colored("https://github.com/vecna/trackmap/blob/master/unverified_media_list/README.md", 'blue', 'on_white') quit(-1) # check if the output directory is not the default and/or if need to be created if args.user_outputdir: OUTPUTDIR = args.user_outputdir else: OUTPUTDIR = 'output/' if not os.path.isdir(OUTPUTDIR): try: os.mkdir(OUTPUTDIR) except OSError as error: print "unable to create %s: %s" % (OUTPUTDIR, error) if args.twit is None: print colored("You can specify your Twitter handle with -t and get mentioned by @trackography_", 'blue', 'on_yellow' ) # ask free information to the script runner info_f = os.path.join(OUTPUTDIR, 'information') information = { 'contact' : args.twit, 'version' : ANALYSIS_VERSION, 'city' : None, 'ISP' : None, 'name' : None, } with file(info_f, 'w+') as f: json.dump(information, f) # writing in a file which country you've selected! with file(os.path.join(OUTPUTDIR, 'country'), 'w+') as f: f.write(proposed_country.lower()) # reading media list, cleaning media list and copy media list cfp = file(country_f, 'r') unclean_lines = cfp.readlines() # reconding an unique number is always useful, also if I've not yet in mind an usage right now. with file( os.path.join(OUTPUTDIR, "unique_id"), "w+") as f: f.write("%d%d%d" % (random.randint(0, 0xffff), random.randint(0, 0xffff), random.randint(0, 0xffff)) ) print colored(" ࿓ Importing media list:", 'blue', 'on_white', attrs=['underline']) media_entries = media_file_cleanings(unclean_lines) cfp.close() with file(os.path.join(OUTPUTDIR, 'used_media.json'), 'w+') as f: json.dump(media_entries, f) print colored(" ࿓ Checking your network source.", 'blue', 'on_white', attrs=['underline']) get_client_info(os.path.join(OUTPUTDIR, 'first.json')) # Init of class method/vars PhantomCrawl.media_amount = len(media_entries.keys()) PhantomCrawl.status_file = os.path.join(OUTPUTDIR, 'phantom.results.json') PhantomCrawl.load_status_disk() print colored(" ࿓ Starting media crawling (%d)" % PhantomCrawl.media_amount, 'blue', 'on_white', attrs=['underline']) # here start iteration over the media! skipped = 0 for cleanurl, media_kind in media_entries.iteritems(): if PhantomCrawl.status.has_key(cleanurl) and PhantomCrawl.status[cleanurl]['status']: skipped += 1 PhantomCrawl.media_done += 1 continue urldir = os.path.join(OUTPUTDIR, cleanurl) if skipped: print colored("skipped %d media from interrupted test" % skipped, 'yellow') skipped = 0 if os.path.isdir(urldir): # being here means that is empty or incomplete shutil.rmtree(urldir) os.mkdir(urldir) PhantomCrawl(args.lp, cleanurl, urldir, media_kind, OUTPUTDIR).start() # XXX I can think to a return value here ? previous_running_test = 0 while PhantomCrawl.media_running: if previous_running_test == PhantomCrawl.media_running: I_want_thread_to_zero(70) print colored("Media completed %d over %d: phase complete!" % (PhantomCrawl.media_amount, PhantomCrawl.media_done), 'magenta', 'on_yellow' ) break previous_running_test = PhantomCrawl.media_running print colored("Running %d, completed %d (on %d): sleeping 25s." % \ (PhantomCrawl.media_running, PhantomCrawl.media_done, PhantomCrawl.media_amount), 'green', 'on_white') time.sleep(25) # finally, enforce a complete sync in the disk. is probably already happen, but for safety: PhantomCrawl.sync_status_disk(mandatory=True) # take every directory in 'output/', get the included URL and dump in a dict included_url_dict = sortify(OUTPUTDIR) assert included_url_dict, "No url included after phantom scraping and collection !?" with file(os.path.join(OUTPUTDIR, 'domain.infos'), 'w+') as f: json.dump(included_url_dict, f) # RESOLUTION multi-thread HERE start DNSresolve.host_amount = len(included_url_dict.keys()) DNSresolve.status_file = os.path.join(OUTPUTDIR, 'resolution.status.json') DNSresolve.resolution_file = os.path.join(OUTPUTDIR, 'resolution.dns') DNSresolve.errors_file = os.path.join(OUTPUTDIR, 'resolution.errors.json') DNSresolve.load_status_disk() # generate DNS resolution map. for every host resolve an IP, for every IP resolve again DNS print colored(" ࿓ DNS resolution of %d domains..." % len(included_url_dict.keys()), 'blue', 'on_white', attrs=['underline']) for domain in included_url_dict.keys(): if DNSresolve.status.has_key(domain) and DNSresolve.status[domain]: DNSresolve.host_done += 1 continue DNSresolve(domain, args.shitty_internet).start() I_want_thread_to_zero(8) print colored("\nResolved %d unique IPv4 from %d unique domain (Errors %d)" % (len(DNSresolve.ip_map.keys()), len(included_url_dict.keys()), DNSresolve.resolve_errors ), 'green') DNSresolve.save_status(mandatory=True) if not len(DNSresolve.ip_map.keys()): print colored("It appears that you can't access the internet. Please fix that and restart the test.", 'red') quit(-1) ### -----------------------------------------------------`### ### Reversing multithread start HERE ### DNSreverse.ip_amount = len(DNSresolve.ip_map.keys()) DNSreverse.status_file = os.path.join(OUTPUTDIR, 'reverse.status.json') DNSreverse.reverse_file = os.path.join(OUTPUTDIR, 'reverse.dns') DNSreverse.errors_file = os.path.join(OUTPUTDIR, 'reverse.errors.json') DNSreverse.load_status_disk() print colored(" ࿓ DNS reverse of %d domains..." % DNSreverse.ip_amount, 'blue', 'on_white', attrs=['underline']) for ip in DNSresolve.ip_map.keys(): if DNSreverse.status.has_key(ip) and DNSreverse.status[ip]: DNSreverse.ip_done += 1 continue DNSreverse(ip, args.shitty_internet).start() I_want_thread_to_zero(12) print colored("\nReversed %d unique FQDN from %d IPaddrs (Errors %d)" % ( len(DNSreverse.fqdn_map.keys()), len(DNSresolve.ip_map.keys()), DNSreverse.reverse_errors), 'green') DNSreverse.save_status(mandatory=True) # ------------------------------------------------------------------------ # traceroutes contains all the output of traceroute in JSON format, # for logs. this output is not in the media directory, because some # host (think to fbcdn or google) are included multiple times. # ------------------------------------------------------------------------ verbotracelogs = os.path.join(OUTPUTDIR, '_verbotracelogs') if not os.path.isdir(verbotracelogs): os.mkdir(verbotracelogs) # saving again information about network location get_client_info(os.path.join(OUTPUTDIR, 'second.json')) # Traceroute is not yet multithread # starting traceroute to all the collected IP print colored(" ࿓ Running traceroute to %d IP address (from %d hosts)" % ( len(DNSresolve.ip_map.keys()), len(included_url_dict.keys())), 'blue', 'on_white', attrs=['underline']) Multitrace.amount = len(DNSresolve.ip_map.keys()) for ip_addr, hostlist in DNSresolve.ip_map.iteritems(): assert ip_addr.count('.') == 3, "Invalid IPv4 format %s" % ip_addr if Traceroute.is_already_trace(ip_addr, OUTPUTDIR): Multitrace.done += 1 continue Multitrace(OUTPUTDIR, ip_addr, hostlist, args.shitty_internet).start() I_want_thread_to_zero(80) ## ----------- END TRACEROUTE ------------- # saving again*again information about network location get_client_info(os.path.join(OUTPUTDIR, 'third.json')) output_name = 'results-%s.tar.gz' % proposed_country.lower() print colored(" ࿓ Analysis done! compressing the output in %s" % output_name, "blue", 'on_white', attrs=['underline']) if os.path.isfile(output_name): os.unlink(output_name) tar = subprocess.Popen(['tar', '-z', '-c', '-v', '-f', output_name, OUTPUTDIR], stdout=subprocess.PIPE) counter_line = 0 while True: line = tar.stdout.readline() counter_line += 1 if not line: break if args.disable_send: print colored("%d files added to %s" % (counter_line, output_name), "green") print colored("Sending disable, test complete.", "yellow"), print colored("亷 亸", 'blue', 'on_white') os.kill(os.getpid(), 15) quit(0) print colored("%d file added to %s, Starting to submit results" % (counter_line, output_name), "green") if not args.keep: print "..removing of", OUTPUTDIR shutil.rmtree(OUTPUTDIR) print colored("If submitting results fails please run:", "red") print colored("./perform_analysis.py -s %s" % output_name, "yellow") if args.hiddensubmit: ret = send_results(output_name, hiddenservice_tuple, tor_proxy=True) else: ret = send_results(output_name, server_tuple, tor_proxy=False) print "" os.kill(os.getpid(), 15)
def get_alexa_list(): country_f = 'special_media/alexa/world_top_100_per_country' with file(country_f) as fp: unclean_lines = fp.readlines() alexa_full_entries = media_file_cleanings(unclean_lines) return 'special/alexa100', alexa_full_entries