def main():

    parser = OptionParser()

    parser.add_option("-c", "--country-name", type="string",
                      help="the country from which you want run the test", dest="medialist")
    parser.add_option("-o", "--output-dir", type="string", default=None,
                      help="directory to store results", dest="user_outputdir")
    parser.add_option("-l", "--local-phantom", action="store_true",
                      help="use local phantomjs instead of the downloaded one", dest="lp")
    parser.add_option("-d", "--disable-sending", action="store_true",
                      help="disable the result sending at the end of the test", dest="disable_send")
    parser.add_option("-i", "--instable-internet", action="store_true",
                      help="If your internet is instable, please enable this option", dest="shitty_internet")
    parser.add_option("-s", "--send", type="string", dest="targz_output",
                      help="do not perform test, submit a previously collected result.")
    parser.add_option("-v", "--version", action="store_true", dest="version",
                      help="print version, spoiler: %d" % ANALYSIS_VERSION)

    (args, _) = parser.parse_args()

    if args.version:
        print "analysis format version:", ANALYSIS_VERSION
        quit(0)

    if args.targz_output:
        if args.disable_send:
            print colored("You can't use -s (--send) and -d (--disable-sending) options together")
            quit(-1)

        if not os.path.isfile(args.targz_output):
            print colored("Invalid file: %s" % args.targz_output)
            quit(-1)

        print colored(" ࿓  Sending previous results...", 'blue', 'on_white', attrs=['underline'])
        send_results(args.targz_output, hiddenservice_tuple)
        quit(0)

    if not args.medialist:
        print colored("Usage: %s -c $YOUR_COUNTRY_NAME" % sys.argv[0], "red", 'on_white')
        print colored("\t-l (local phantom, instead of the symlink here)", "red", 'on_white')
        print colored("\t-o output directory, used to collect test results", "red", 'on_white')
        print ""
        print " -l option is needed if you want use your own /usr/bin/phantomjs"
        print " (if you follow README.md, this is not needed because you downloaded phantomjs 1.9.2)"
        print " ",colored("By default, this software is looking for symlink 'phantom-1.9.2'", "green", "on_white")
        if os.path.islink('phantom-1.9.2'):
            print " ",colored("phantom-1.9.2 is a link, as expected.", "green", "on_white")
        else:
            print " ",colored("The phantom-1.9.2 link is missing!", "red", "on_white")
        print "Look in the verified_media/ for a list of countries."
        print "TrackMap collection tool version: %d" % ANALYSIS_VERSION
        quit(-1)

    # check if the user is running phantom as installed on the system (also vagrant make this)
    # of if is using
    if args.lp:
        local_phantomjs = True

        print colored("You're using your local installed phantomjs. A version >= than 1.9.0 is needed.", 'blue', 'on_white')
        print colored("I'm not going to compare the string. Be aware: this is your version:", 'red')

        phantom_version = Popen(['phantomjs', '-v'], stdout=PIPE).stdout.readline()
        print colored(phantom_version, 'blue', 'on_white')
    else:
        if not os.path.islink('phantom-1.9.2'):
            print colored("Missing phantom-1.9.2. A symbolic link named phantom-1.9.2 was expected, but not found. Please consult README.md and make sure you've followed the installation procedure exactly.", 'red', 'on_white')
            quit(-1)

        local_phantomjs = False

    if not args.disable_send:
        tor_test = ("127.0.0.1", 9050)
        c = socket.socket()
        try:
            c.connect( tor_test )
            c.close()
        except Exception as xxx:
            print colored("Unable to connect to %s, Tor is needed to send results" % str(tor_test), "red")
            print colored(xxx, "red")
            print colored("You can disable result sending with the option -d", "yellow")
            quit(-1)
        del c

    # country check
    proposed_country = args.medialist
    country_f = os.path.join('verified_media', proposed_country.lower())
    if not os.path.isfile(country_f):
        print colored("Invalid country! not found %s in directory 'verified_media/' " % proposed_country, 'red')
        print "Available countries are:"
        for existing_c in os.listdir('verified_media'):
            if existing_c in ['README.md', 'test']:
                continue
            print "\t", existing_c
        print colored("You can propose your own country media list following these instructions:", 'blue', 'on_white')
        print colored("https://github.com/vecna/trackmap/blob/master/unverified_media_list/README.md", 'blue', 'on_white')
        quit(-1)

    # check if the output directory is not the default and/or if need to be created
    if args.user_outputdir:
        OUTPUTDIR = args.user_outputdir
    else:
        OUTPUTDIR = 'output/'

    if not os.path.isdir(OUTPUTDIR):
        try:
            os.mkdir(OUTPUTDIR)
        except OSError as error:
            print "unable to create %s: %s" % (OUTPUTDIR, error)


    # ask free information to the script runner
    info_f = os.path.join(OUTPUTDIR, 'information')
    if os.path.isfile(info_f):
        f = open(info_f, 'r')
        information = json.load(f)
        f.close()
        print colored("Recovered information of previous collection:", 'green')
        print " name:", information['name']
        print " contact:", information['contact']
        print " ISP:", information['ISP']
        print " city:", information['city']
    else:
        information = {}
        print colored("Optionally, provide the information requested below, or press Enter to skip:", 'green')

        def question(description):
            print colored(description, 'white', 'on_blue')
            answer = sys.stdin.readline()
            answer = answer.strip('\n')
            return None if not len(answer) else answer

        information['name'] = question('Your name:')
        information['contact'] = question('Mail or jabber contact:')
        information['ISP'] = question('Which ISP is providing your link:')
        information['city'] = question('From which city you\'re running this script:')
        information['version'] = ANALYSIS_VERSION

        with file(info_f, 'w+') as f:
            json.dump(information, f)


    # writing in a file which country you're using!
    with file(os.path.join(OUTPUTDIR, 'country'), 'w+') as f:
        f.write(proposed_country.lower())

    # reading media list, cleaning media list and copy media list
    cfp = file(country_f, 'r')
    unclean_lines = cfp.readlines()

    with file(os.path.join(OUTPUTDIR, 'used_media_list'), 'w+') as f:
        f.writelines(unclean_lines)

    # reconding an unique number is always useful, also if I've not yet in mind an usage right now.
    with file( os.path.join(OUTPUTDIR, "unique_id"), "w+") as f:
        f.write("%d%d%d" % (random.randint(0, 0xffff), random.randint(0, 0xffff), random.randint(0, 0xffff)) )

    print colored(" ࿓  Importing media list:", 'blue', 'on_white', attrs=['underline'])
    media_entries = media_file_cleanings(unclean_lines)
    cfp.close()

    print colored(" ࿓  Checking your network source.", 'blue', 'on_white', attrs=['underline'])
    do_wget( os.path.join(OUTPUTDIR, 'first.json'))

    print colored(" ࿓  Starting media crawling:", 'blue', 'on_white', attrs=['underline'])
    # here start iteration over the media!
    phantom_stats = {}
    for cleanurl, media_kind in media_entries.iteritems():

        urldir = os.path.join(OUTPUTDIR, cleanurl)
        title_check = os.path.join(urldir, '__title')

        if os.path.isdir(urldir) and os.path.isfile(title_check):
            print "-", urldir, "already present: skipped"
            phantom_stats.setdefault('resumed', []).append(cleanurl)
            continue

        if os.path.isdir(urldir):
            # being here means that is empty or incomplete
            shutil.rmtree(urldir)

        print "+ Creating directory", urldir
        os.mkdir(urldir)

        retinfo = do_phantomjs(local_phantomjs, cleanurl, urldir, media_kind, OUTPUTDIR)
        assert retinfo in [ 'first', 'second', 'failures' ]
        phantom_stats.setdefault(retinfo, []).append(cleanurl)

    # take every directory in 'output/', get the included URL and dump in a dict
    included_url_dict = sortify(OUTPUTDIR)
    assert included_url_dict, "No url included after phantom scraping and collection !?"
    with file(os.path.join(OUTPUTDIR, 'domain.infos'), 'w+') as f:
        json.dump(included_url_dict, f)

    # generate DNS resolution map. for every host resolve an IP, for every IP resolve again DNS
    print colored(" ࿓  DNS resolution and reverse of %d domains..." % len(included_url_dict), 'blue', 'on_white', attrs=['underline'])

    # new format contain:
    # first dict: resolution error
    # second dict: reverse error
    dns_error = [{}, {}]

    # now, until there is not refactor based on classes,
    # the resolution of the previously failed DN will not happen
    resolution_dns_f = os.path.join(OUTPUTDIR, 'resolution.dns')
    if os.path.isfile(resolution_dns_f):
        fp = file(resolution_dns_f, 'r')
        ip_map = json.load(fp)
        fp.close()
    else:
        ip_map = {}
        counter = 0
        percentage_bound = len(included_url_dict.keys()) / 10.0

        if not int(percentage_bound):
            percentage_bound = 1.0

        for domain in included_url_dict.keys():
            counter += 1
            if not counter % int(percentage_bound):
                print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound) ), time.ctime())
            # other random possibility based on birthday paradox to show counters...
            if random.randint(0, int(percentage_bound * 10 )) == counter:
                print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound) ), time.ctime())

            try:

                if args.shitty_internet:
                    socket.setdefaulttimeout(1.1)
                else:
                    socket.setdefaulttimeout(0.5)

                resolved_v4 = socket.gethostbyname(domain)
            except Exception as xxx:
                dns_error[0].setdefault(xxx.strerror, []).append(domain)
                continue

            ip_map.setdefault(resolved_v4, []).append(domain)

            with file(resolution_dns_f, 'w+') as f:
                json.dump(ip_map, f)

    print colored("\nResolved %d unique IPv4 from %d unique domain" % (len(ip_map.keys()), len(included_url_dict.keys())), 'green')

    if not len(ip_map.keys()):
        print colored("It appears that you can't access the internet. Please fix that and restart the test.", 'red')
        quit(-1)

    print colored("\nReversing DNS for %d unique IP address..." % len(ip_map.keys() ), 'green')
    reverse_dns_f = os.path.join(OUTPUTDIR, 'reverse.dns')
    if os.path.isfile(reverse_dns_f):
        fp = file(reverse_dns_f, 'r')
        true_domain_map = json.load(fp)
        fp.close()
    else:
        true_domain_map = {}
        counter = 0
        percentage_bound = len(ip_map.keys()) / 10.0

        if not int(percentage_bound):
            percentage_bound = 1.0

        for ipv4 in ip_map.keys():
            counter += 1

            if not (counter % int(percentage_bound) ):
                print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound) ), time.ctime())
            # other random possibility based on birthday paradox to show counters...
            if random.randint(0, int(percentage_bound * 10 )) == counter:
                print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound) ), time.ctime())

            try:

                if args.shitty_internet:
                    socket.setdefaulttimeout(1.9)
                else:
                    socket.setdefaulttimeout(0.9)

                resolved_set = socket.gethostbyaddr(ipv4)
                resolved_name = resolved_set[0]
            except Exception as xxx:
                dns_error[1].setdefault(xxx.strerror, []).append(ipv4)
                continue

            true_domain_map.setdefault(resolved_name, []).append(ipv4)

        with file(reverse_dns_f, 'w+') as f:
            json.dump(true_domain_map, f)

    print colored("\nReversed %d unique FQDN" % len(true_domain_map.keys() ), 'green')

    print colored("Saving DNS errors in 'errors.dns'")
    with file(os.path.join(OUTPUTDIR, 'errors.dns'), 'w+') as f:
        json.dump(dns_error, f)

    # traceroutes contains all the output of traceroute in JSON format, separated
    # for logs. this output is not in the media directory, because some host like
    # google are included multiple times.
    trace_output = os.path.join(OUTPUTDIR, '_traceroutes')
    if not os.path.isdir(trace_output):
        os.mkdir(trace_output)

    # _verbotracelogs instead contain the detailed log of traceroute,
    # they would be useful in the future because AS number is not yet used
    # as information in the backend, but, who knows...
    verbotracelogs = os.path.join(OUTPUTDIR, '_verbotracelogs')
    if not os.path.isdir(verbotracelogs):
        os.mkdir(verbotracelogs)

    # saving again information about network location
    do_wget( os.path.join(OUTPUTDIR, 'second.json') )

    # starting traceroute to all the collected IP
    print colored(" ࿓  Running traceroute to %d IP address (from %d hosts)" % (
        len(ip_map.keys()), len(included_url_dict.keys())), 'blue', 'on_white', attrs=['underline'])

    counter = 1
    trace_stats = {}
    gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE)
    for ip_addr, hostlist in ip_map.iteritems():

        progress_string = "%d/%d" % (counter, len(ip_map.keys()))
        print colored("%s%s" % (progress_string, (10 - len(progress_string)) * " " ), "cyan" ),

        t = Traceroute(OUTPUTDIR, ip_addr, hostlist, gi, args.shitty_internet)

        counter += 1

        if t.already_traced():
            print colored ("%s already traced (%d hosts): skipping" % (ip_addr, len(hostlist) ), "green")
            retinfo = "recover"
        elif not t.do_trace():
            retinfo = "fail"
            print colored("Traceroute fails! (%d/10)" % TraceStats.three_hundres, "red")
        else:
            retinfo = "success"
            try:
                t.resolve_target_geoip()
                t.file_dump()
            except Exception:
                retinfo = "anomaly"

        del t
        assert retinfo in [ 'recover', 'success', 'anomaly', 'fail'  ]
        trace_stats.setdefault(retinfo, []).append(ip_addr)

    # Traceroute class need to be enhanced with some kind of:
    #  *  failure measurement and GUESSING WHY
    #  *  retry after a while
    #  *  estimation of shared path - optimization and stabler collection
    if trace_stats.has_key('fail') and len(trace_stats['fail']):
        print colored(" ࿓  Testing again the failed traceroute to %d IP address" %
                len(trace_stats['fail']))
    else:
        # just here to skip a KeyError below
        trace_stats.update({'fail': []})

    counter = 1
    fail_list_copy = list(trace_stats['fail'])
    # a list is done because inside of the loop is changed the
    # content of trace_stats['fail']
    for case_n, failed_trace in enumerate(fail_list_copy):

        hostlist = ip_map[failed_trace]
        t = Traceroute(OUTPUTDIR, failed_trace, hostlist, gi, args.shitty_internet)
        counter += 1
        if not t.do_trace():
            print colored("Failure again.", "red")
            retinfo = "fail"
        else:
            retinfo = "retry"
            trace_stats['fail'].remove(failed_trace)
            try:
                t.resolve_target_geoip()
                t.file_dump()
            except Exception:
                retinfo = "anomaly"

        del t
        assert retinfo in [ 'recover', 'success', 'anomaly', 'fail', 'retry' ]
        trace_stats.setdefault(retinfo, []).append(failed_trace)


    TraceStats([]).dump_stats(OUTPUTDIR)

    if trace_stats.values().count(False):
        print colored("Registered %d failures" % trace_stats.values().count(False), "red")

    ptsj = os.path.join(OUTPUTDIR, '_phantom.trace.stats.json')
    if os.path.isfile(ptsj):
        os.unlink(ptsj)
    with file(ptsj, 'w+') as fp:
        json.dump([ phantom_stats, trace_stats ], fp)

    # saving again*again information about network location
    do_wget(os.path.join(OUTPUTDIR, 'third.json'))

    output_name = 'results-%s.tar.gz' % proposed_country.lower()
    print colored(" ࿓  Analysis done! compressing the output in %s" % output_name, "blue", 'on_white', attrs=['underline'])

    if os.path.isfile(output_name):
        os.unlink(output_name)

    tar = Popen(['tar', '-z', '-c', '-v', '-f', output_name, OUTPUTDIR], stdout=PIPE)

    counter_line = 0
    while True:
        line = tar.stdout.readline()
        counter_line += 1
        if not line:
            break

    if args.disable_send:
        print colored("%d file added to %s" % (counter_line, output_name), "green")
        print colored("Sending disable, test complete.", "yellow"),
        print colored("亷 亸", 'blue', 'on_white')
        quit(0)

    print colored("%d file added to %s, Starting to submit results via Tor network\n" % (counter_line, output_name), "green")
    print colored("If submitting results fails please run:", "red")
    print colored("./perform_analysis.py -s %s" % output_name, "yellow")
    send_results(output_name, hiddenservice_tuple)
Exemple #2
0
def main():
    if not os.path.isdir(OUTPUTDIR):
        try:
            os.mkdir(OUTPUTDIR)
        except OSError as error:
            print "unable to create %s: %s" % (OUTPUTDIR, error)

    if len(sys.argv) < 2:
        print colored("Usage: %s $YOUR_COUNTRY_NAME <lp>" % sys.argv[0], "red", 'on_white')
        print ""
        print " 'lp' as 3rd argument is needed if you want use your own /usr/bin/phantomjs"
        print " (if you follow README.md, this is not needed because you've phantomjs 1.9.2)"
        print " ",colored("By default, this software is looking for symlink 'phantom-1.9.2'", "green", "on_white")
        if os.path.islink('phantom-1.9.2'):
            print " ",colored("Link that I've checked: you have ;)", "green", "on_white")
        else:
            print " ",colored("Link that I've checked: YOU HAVE NOT!", "red", "on_white")
        quit(-1)

    # check if the user is running phantom as installed on the system (also vagrant make this)
    # of if is using
    if len(sys.argv) == 3 and sys.argv[2] == 'lp':
        local_phantomjs = True

        print colored("You're using your local installed phantomjs. It is needed a version >= than 1.9.0", 'blue', 'on_white')
        print colored("I'm not gonna to compare the string, so, be aware: this is your version:", 'red')

        phantom_version = Popen(['phantomjs', '-v'], stdout=PIPE).stdout.readline()
        print colored(phantom_version, 'blue', 'on_white')
    else:
        if not os.path.islink('phantom-1.9.2'):
            print colored("You have not followd README.md :( I was expecting a symbolick link called phantom-1.9.2", 'red', 'on_white')
            quit(-1)

        local_phantomjs = False

    # country check
    proposed_country = sys.argv[1]
    country_f = os.path.join('verified_media', proposed_country.lower())
    if not os.path.isfile(country_f):
        print colored("Invalid country! not found %s in directory 'verified_media/' " % proposed_country, 'red')
        print "Available countries are:"
        for existing_c in os.listdir('verified_media'):
            if existing_c in ['README.md', 'test']:
                continue
            print "\t", existing_c
        print colored("You can propose your own country media list following these instructions:", 'blue', 'on_yellow')
        print colored("https://github.com/vecna/helpagainsttrack/blob/master/unverified_media_list/README.md", 'blue', 'on_yellow')
        quit(-1)


    # writing in a file which country you're using!
    with file(os.path.join(OUTPUTDIR, 'country'), 'w+') as f:
        f.write(proposed_country.lower())

    # reading media list, cleaning media list and copy media list
    cfp = file(country_f, 'r')
    unclean_lines = cfp.readlines()

    with file(os.path.join(OUTPUTDIR, 'used_media_list'), 'w+') as f:
        f.writelines(unclean_lines)

    media_entries = media_file_cleanings(unclean_lines)
    cfp.close()

    # here start iteration over the media!
    for cleanurl, media_kind in media_entries.iteritems():

        urldir = os.path.join(OUTPUTDIR, cleanurl)
        title_check = os.path.join(urldir, '__title')

        if os.path.isdir(urldir) and os.path.isfile(title_check):
            print "-", urldir, "already present: skipped"
            continue

        if os.path.isdir(urldir):
            # being here means that is empty or incomplete
            shutil.rmtree(urldir)

        print "+ Creating directory", urldir
        os.mkdir(urldir)

        do_phantomjs(local_phantomjs, cleanurl, urldir, media_kind)

    # take every directory in 'output/' and works on the content
    included_url_dict = sortify(OUTPUTDIR)

    assert included_url_dict, "No url included after phantom scraping and collection !?"

    with file(os.path.join(OUTPUTDIR, 'domain.infos'), 'w+') as f:
        json.dump(included_url_dict, f)

    # traceroutes contains all the output of traceroute in JSON format, separated
    # for logs. this output is not in the media directory, because some host like
    # google are included multiple times.
    trace_output = os.path.join(OUTPUTDIR, '_traceroutes')
    if not os.path.isdir(trace_output):
        os.mkdir(trace_output)

    # _verbotracelogs instead contain the detailed log of traceroute,
    # they would be useful in the future because AS number is not yet used
    # as information in the backend, but, who knows...
    verbotracelogs = os.path.join(OUTPUTDIR, '_verbotracelogs')
    if not os.path.isdir(verbotracelogs):
        os.mkdir(verbotracelogs)

    print "Running traceroute to", len(included_url_dict.keys()), "hosts!"
    counter = 1
    failure = 0
    for url, domain_info in included_url_dict.iteritems():

        progress_string = "%d/%d" % (counter, len(included_url_dict.keys()))
        print colored("%s%s" % (progress_string, (10 - len(progress_string)) * " " ), "cyan" ),

        if not do_trace(url, url):
            failure += 1
        counter += 1

    if failure:
        print colored("Registered %d failures" % failure, "red")

    # putting the unique number into
    with file( os.path.join(OUTPUTDIR, "unique_id"), "w+") as f:
        f.write("%d%d%d" % (random.randint(0, 0xffff), random.randint(0, 0xffff), random.randint(0, 0xffff)) )

    output_name = 'results-%s.tar.gz' % proposed_country.lower()
    print colored("Finished! compressing the data in %s" % output_name, "green")

    if os.path.isfile(output_name):
        os.unlink(output_name)

    tar = Popen(['tar', '-z', '-c', '-v', '-f', output_name, OUTPUTDIR], stdout=PIPE)

    counter_line = 0
    while True:
        line = tar.stdout.readline()
        counter_line += 1
        if not line:
            break


    print colored("%d file added to %s, Starting 'result_sender.py'" % (counter_line, output_name), "green")
    # result sender has hardcoded our hidden service
    p = Popen(['torify', 'python', './sender_results.py', output_name], stdout=PIPE, stderr=PIPE)

    while True:
        line = p.stdout.readline()
        exx = p.stderr.readline()

        if not line and not exx:
            break

        if exx.find('failed to find the symbol') != -1:
            continue
        if exx.find('libtorsocks') != -1:
            continue

        if line:
            print colored("   %s" % line, 'yellow'),
        if exx:
            print colored(exx, 'red'),
Exemple #3
0
def main():

    parser = OptionParser()

    parser.add_option("-c",
                      "--country-name",
                      type="string",
                      help="the country from which you want run the test",
                      dest="medialist")
    parser.add_option("-o",
                      "--output-dir",
                      type="string",
                      default=None,
                      help="directory to store results",
                      dest="user_outputdir")
    parser.add_option("-l",
                      "--local-phantom",
                      action="store_true",
                      help="use local phantomjs instead of the downloaded one",
                      dest="lp")
    parser.add_option("-d",
                      "--disable-sending",
                      action="store_true",
                      help="disable the result sending at the end of the test",
                      dest="disable_send")
    parser.add_option(
        "-i",
        "--instable-internet",
        action="store_true",
        help="If your internet is instable, please enable this option",
        dest="shitty_internet")
    parser.add_option(
        "-s",
        "--send",
        type="string",
        dest="targz_output",
        help="do not perform test, submit a previously collected result.")
    parser.add_option("-v",
                      "--version",
                      action="store_true",
                      dest="version",
                      help="print version, spoiler: %d" % ANALYSIS_VERSION)

    (args, _) = parser.parse_args()

    if args.version:
        print "analysis format version:", ANALYSIS_VERSION
        quit(0)

    if args.targz_output:
        if args.disable_send:
            print colored(
                "You can't use -s (--send) and -d (--disable-sending) options together"
            )
            quit(-1)

        if not os.path.isfile(args.targz_output):
            print colored("Invalid file: %s" % args.targz_output)
            quit(-1)

        print colored(" ࿓  Sending previous results...",
                      'blue',
                      'on_white',
                      attrs=['underline'])
        send_results(args.targz_output, hiddenservice_tuple)
        quit(0)

    if not args.medialist:
        print colored("Usage: %s -c $YOUR_COUNTRY_NAME" % sys.argv[0], "red",
                      'on_white')
        print colored("\t-l (local phantom, instead of the symlink here)",
                      "red", 'on_white')
        print colored("\t-o output directory, used to collect test results",
                      "red", 'on_white')
        print ""
        print " -l option is needed if you want use your own /usr/bin/phantomjs"
        print " (if you follow README.md, this is not needed because you downloaded phantomjs 1.9.2)"
        print " ", colored(
            "By default, this software is looking for symlink 'phantom-1.9.2'",
            "green", "on_white")
        if os.path.islink('phantom-1.9.2'):
            print " ", colored("phantom-1.9.2 is a link, as expected.",
                               "green", "on_white")
        else:
            print " ", colored("The phantom-1.9.2 link is missing!", "red",
                               "on_white")
        print "Look in the verified_media/ for a list of countries."
        print "TrackMap collection tool version: %d" % ANALYSIS_VERSION
        quit(-1)

    # check if the user is running phantom as installed on the system (also vagrant make this)
    # of if is using
    if args.lp:
        local_phantomjs = True

        print colored(
            "You're using your local installed phantomjs. A version >= than 1.9.0 is needed.",
            'blue', 'on_white')
        print colored(
            "I'm not going to compare the string. Be aware: this is your version:",
            'red')

        phantom_version = Popen(['phantomjs', '-v'],
                                stdout=PIPE).stdout.readline()
        print colored(phantom_version, 'blue', 'on_white')
    else:
        if not os.path.islink('phantom-1.9.2'):
            print colored(
                "Missing phantom-1.9.2. A symbolic link named phantom-1.9.2 was expected, but not found. Please consult README.md and make sure you've followed the installation procedure exactly.",
                'red', 'on_white')
            quit(-1)

        local_phantomjs = False

    if not args.disable_send:
        tor_test = ("127.0.0.1", 9050)
        c = socket.socket()
        try:
            c.connect(tor_test)
            c.close()
        except Exception as xxx:
            print colored(
                "Unable to connect to %s, Tor is needed to send results" %
                str(tor_test), "red")
            print colored(xxx, "red")
            print colored("You can disable result sending with the option -d",
                          "yellow")
            quit(-1)
        del c

    # country check
    proposed_country = args.medialist
    country_f = os.path.join('verified_media', proposed_country.lower())
    if not os.path.isfile(country_f):
        print colored(
            "Invalid country! not found %s in directory 'verified_media/' " %
            proposed_country, 'red')
        print "Available countries are:"
        for existing_c in os.listdir('verified_media'):
            if existing_c in ['README.md', 'test']:
                continue
            print "\t", existing_c
        print colored(
            "You can propose your own country media list following these instructions:",
            'blue', 'on_white')
        print colored(
            "https://github.com/vecna/helpagainsttrack/blob/master/unverified_media_list/README.md",
            'blue', 'on_white')
        quit(-1)

    # check if the output directory is not the default and/or if need to be created
    if args.user_outputdir:
        OUTPUTDIR = args.user_outputdir
    else:
        OUTPUTDIR = 'output/'

    if not os.path.isdir(OUTPUTDIR):
        try:
            os.mkdir(OUTPUTDIR)
        except OSError as error:
            print "unable to create %s: %s" % (OUTPUTDIR, error)

    # ask free information to the script runner
    info_f = os.path.join(OUTPUTDIR, 'information')
    if os.path.isfile(info_f):
        f = open(info_f, 'r')
        information = json.load(f)
        f.close()
        print colored("Recovered information of previous collection:", 'green')
        print " name:", information['name']
        print " contact:", information['contact']
        print " ISP:", information['ISP']
        print " city:", information['city']
    else:
        information = {}
        print colored(
            "Optionally, provide the information requested below, or press Enter to skip:",
            'green')

        def question(description):
            print colored(description, 'white', 'on_blue')
            answer = sys.stdin.readline()
            answer = answer.strip('\n')
            return None if not len(answer) else answer

        information['name'] = question('Your name:')
        information['contact'] = question('Mail or jabber contact:')
        information['ISP'] = question('Which ISP is providing your link:')
        information['city'] = question(
            'From which city you\'re running this script:')
        information['version'] = ANALYSIS_VERSION

        with file(info_f, 'w+') as f:
            json.dump(information, f)

    # writing in a file which country you're using!
    with file(os.path.join(OUTPUTDIR, 'country'), 'w+') as f:
        f.write(proposed_country.lower())

    # reading media list, cleaning media list and copy media list
    cfp = file(country_f, 'r')
    unclean_lines = cfp.readlines()

    with file(os.path.join(OUTPUTDIR, 'used_media_list'), 'w+') as f:
        f.writelines(unclean_lines)

    # reconding an unique number is always useful, also if I've not yet in mind an usage right now.
    with file(os.path.join(OUTPUTDIR, "unique_id"), "w+") as f:
        f.write("%d%d%d" % (random.randint(0, 0xffff), random.randint(
            0, 0xffff), random.randint(0, 0xffff)))

    print colored(" ࿓  Importing media list:",
                  'blue',
                  'on_white',
                  attrs=['underline'])
    media_entries = media_file_cleanings(unclean_lines)
    cfp.close()

    print colored(" ࿓  Checking your network source.",
                  'blue',
                  'on_white',
                  attrs=['underline'])
    do_wget(os.path.join(OUTPUTDIR, 'first.json'))

    print colored(" ࿓  Starting media crawling:",
                  'blue',
                  'on_white',
                  attrs=['underline'])
    # here start iteration over the media!
    phantom_stats = {}
    for cleanurl, media_kind in media_entries.iteritems():

        urldir = os.path.join(OUTPUTDIR, cleanurl)
        title_check = os.path.join(urldir, '__title')

        if os.path.isdir(urldir) and os.path.isfile(title_check):
            print "-", urldir, "already present: skipped"
            phantom_stats.setdefault('resumed', []).append(cleanurl)
            continue

        if os.path.isdir(urldir):
            # being here means that is empty or incomplete
            shutil.rmtree(urldir)

        print "+ Creating directory", urldir
        os.mkdir(urldir)

        retinfo = do_phantomjs(local_phantomjs, cleanurl, urldir, media_kind,
                               OUTPUTDIR)
        assert retinfo in ['first', 'second', 'failures']
        phantom_stats.setdefault(retinfo, []).append(cleanurl)

    # take every directory in 'output/', get the included URL and dump in a dict
    included_url_dict = sortify(OUTPUTDIR)
    assert included_url_dict, "No url included after phantom scraping and collection !?"
    with file(os.path.join(OUTPUTDIR, 'domain.infos'), 'w+') as f:
        json.dump(included_url_dict, f)

    # generate DNS resolution map. for every host resolve an IP, for every IP resolve again DNS
    print colored(" ࿓  DNS resolution and reverse of %d domains..." %
                  len(included_url_dict),
                  'blue',
                  'on_white',
                  attrs=['underline'])

    # new format contain:
    # first dict: resolution error
    # second dict: reverse error
    dns_error = [{}, {}]

    # now, until there is not refactor based on classes,
    # the resolution of the previously failed DN will not happen
    resolution_dns_f = os.path.join(OUTPUTDIR, 'resolution.dns')
    if os.path.isfile(resolution_dns_f):
        fp = file(resolution_dns_f, 'r')
        ip_map = json.load(fp)
        fp.close()
    else:
        ip_map = {}
        counter = 0
        percentage_bound = len(included_url_dict.keys()) / 10.0

        if not int(percentage_bound):
            percentage_bound = 1.0

        for domain in included_url_dict.keys():
            counter += 1
            if not counter % int(percentage_bound):
                print "%d\t%d%%\t%s" % (counter,
                                        (counter * (10 / percentage_bound)),
                                        time.ctime())
            # other random possibility based on birthday paradox to show counters...
            if random.randint(0, int(percentage_bound * 10)) == counter:
                print "%d\t%d%%\t%s" % (counter,
                                        (counter * (10 / percentage_bound)),
                                        time.ctime())

            try:

                if args.shitty_internet:
                    socket.setdefaulttimeout(1.1)
                else:
                    socket.setdefaulttimeout(0.5)

                resolved_v4 = socket.gethostbyname(domain)
            except Exception as xxx:
                dns_error[0].setdefault(xxx.strerror, []).append(domain)
                continue

            ip_map.setdefault(resolved_v4, []).append(domain)

            with file(resolution_dns_f, 'w+') as f:
                json.dump(ip_map, f)

    print colored(
        "\nResolved %d unique IPv4 from %d unique domain" %
        (len(ip_map.keys()), len(included_url_dict.keys())), 'green')

    if not len(ip_map.keys()):
        print colored(
            "It appears that you can't access the internet. Please fix that and restart the test.",
            'red')
        quit(-1)

    print colored(
        "\nReversing DNS for %d unique IP address..." % len(ip_map.keys()),
        'green')
    reverse_dns_f = os.path.join(OUTPUTDIR, 'reverse.dns')
    if os.path.isfile(reverse_dns_f):
        fp = file(reverse_dns_f, 'r')
        true_domain_map = json.load(fp)
        fp.close()
    else:
        true_domain_map = {}
        counter = 0
        percentage_bound = len(ip_map.keys()) / 10.0

        if not int(percentage_bound):
            percentage_bound = 1.0

        for ipv4 in ip_map.keys():
            counter += 1

            if not (counter % int(percentage_bound)):
                print "%d\t%d%%\t%s" % (counter,
                                        (counter * (10 / percentage_bound)),
                                        time.ctime())
            # other random possibility based on birthday paradox to show counters...
            if random.randint(0, int(percentage_bound * 10)) == counter:
                print "%d\t%d%%\t%s" % (counter,
                                        (counter * (10 / percentage_bound)),
                                        time.ctime())

            try:

                if args.shitty_internet:
                    socket.setdefaulttimeout(1.9)
                else:
                    socket.setdefaulttimeout(0.9)

                resolved_set = socket.gethostbyaddr(ipv4)
                resolved_name = resolved_set[0]
            except Exception as xxx:
                dns_error[1].setdefault(xxx.strerror, []).append(ipv4)
                continue

            true_domain_map.setdefault(resolved_name, []).append(ipv4)

        with file(reverse_dns_f, 'w+') as f:
            json.dump(true_domain_map, f)

    print colored("\nReversed %d unique FQDN" % len(true_domain_map.keys()),
                  'green')

    print colored("Saving DNS errors in 'errors.dns'")
    with file(os.path.join(OUTPUTDIR, 'errors.dns'), 'w+') as f:
        json.dump(dns_error, f)

    # traceroutes contains all the output of traceroute in JSON format, separated
    # for logs. this output is not in the media directory, because some host like
    # google are included multiple times.
    trace_output = os.path.join(OUTPUTDIR, '_traceroutes')
    if not os.path.isdir(trace_output):
        os.mkdir(trace_output)

    # _verbotracelogs instead contain the detailed log of traceroute,
    # they would be useful in the future because AS number is not yet used
    # as information in the backend, but, who knows...
    verbotracelogs = os.path.join(OUTPUTDIR, '_verbotracelogs')
    if not os.path.isdir(verbotracelogs):
        os.mkdir(verbotracelogs)

    # saving again information about network location
    do_wget(os.path.join(OUTPUTDIR, 'second.json'))

    # starting traceroute to all the collected IP
    print colored(" ࿓  Running traceroute to %d IP address (from %d hosts)" %
                  (len(ip_map.keys()), len(included_url_dict.keys())),
                  'blue',
                  'on_white',
                  attrs=['underline'])

    counter = 1
    trace_stats = {}
    gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE)
    for ip_addr, hostlist in ip_map.iteritems():

        progress_string = "%d/%d" % (counter, len(ip_map.keys()))
        print colored(
            "%s%s" % (progress_string, (10 - len(progress_string)) * " "),
            "cyan"),

        t = Traceroute(OUTPUTDIR, ip_addr, hostlist, gi, args.shitty_internet)

        counter += 1

        if t.already_traced():
            print colored(
                "%s already traced (%d hosts): skipping" %
                (ip_addr, len(hostlist)), "green")
            retinfo = "recover"
        elif not t.do_trace():
            retinfo = "fail"
            print colored(
                "Traceroute fails! (%d/10)" % TraceStats.three_hundres, "red")
        else:
            retinfo = "success"
            try:
                t.resolve_target_geoip()
                t.file_dump()
            except Exception:
                retinfo = "anomaly"

        del t
        assert retinfo in ['recover', 'success', 'anomaly', 'fail']
        trace_stats.setdefault(retinfo, []).append(ip_addr)

    # Traceroute class need to be enhanced with some kind of:
    #  *  failure measurement and GUESSING WHY
    #  *  retry after a while
    #  *  estimation of shared path - optimization and stabler collection
    if trace_stats.has_key('fail') and len(trace_stats['fail']):
        print colored(
            " ࿓  Testing again the failed traceroute to %d IP address" %
            len(trace_stats['fail']))
    else:
        # just here to skip a KeyError below
        trace_stats.update({'fail': []})

    counter = 1
    fail_list_copy = list(trace_stats['fail'])
    # a list is done because inside of the loop is changed the
    # content of trace_stats['fail']
    for case_n, failed_trace in enumerate(fail_list_copy):

        hostlist = ip_map[failed_trace]
        t = Traceroute(OUTPUTDIR, failed_trace, hostlist, gi,
                       args.shitty_internet)
        counter += 1
        if not t.do_trace():
            print colored("Failure again.", "red")
            retinfo = "fail"
        else:
            retinfo = "retry"
            trace_stats['fail'].remove(failed_trace)
            try:
                t.resolve_target_geoip()
                t.file_dump()
            except Exception:
                retinfo = "anomaly"

        del t
        assert retinfo in ['recover', 'success', 'anomaly', 'fail', 'retry']
        trace_stats.setdefault(retinfo, []).append(failed_trace)

    TraceStats([]).dump_stats(OUTPUTDIR)

    if trace_stats.values().count(False):
        print colored(
            "Registered %d failures" % trace_stats.values().count(False),
            "red")

    ptsj = os.path.join(OUTPUTDIR, '_phantom.trace.stats.json')
    if os.path.isfile(ptsj):
        os.unlink(ptsj)
    with file(ptsj, 'w+') as fp:
        json.dump([phantom_stats, trace_stats], fp)

    # saving again*again information about network location
    do_wget(os.path.join(OUTPUTDIR, 'third.json'))

    output_name = 'results-%s.tar.gz' % proposed_country.lower()
    print colored(" ࿓  Analysis done! compressing the output in %s" %
                  output_name,
                  "blue",
                  'on_white',
                  attrs=['underline'])

    if os.path.isfile(output_name):
        os.unlink(output_name)

    tar = Popen(['tar', '-z', '-c', '-v', '-f', output_name, OUTPUTDIR],
                stdout=PIPE)

    counter_line = 0
    while True:
        line = tar.stdout.readline()
        counter_line += 1
        if not line:
            break

    if args.disable_send:
        print colored("%d file added to %s" % (counter_line, output_name),
                      "green")
        print colored("Sending disable, test complete.", "yellow"),
        print colored("亷 亸", 'blue', 'on_white')
        quit(0)

    print colored(
        "%d file added to %s, Starting to submit results via Tor network\n" %
        (counter_line, output_name), "green")
    print colored("If submitting results fails please run:", "red")
    print colored("./perform_analysis.py -s %s" % output_name, "yellow")
    send_results(output_name, hiddenservice_tuple)
def main():
    if not os.path.isdir(OUTPUTDIR):
        try:
            os.mkdir(OUTPUTDIR)
        except OSError as error:
            print "unable to create %s: %s" % (OUTPUTDIR, error)

    if len(sys.argv) < 2:
        print colored("Usage: %s $YOUR_COUNTRY_NAME <lp>" % sys.argv[0], "red", 'on_white')
        print ""
        print " 'lp' as 3rd argument is needed if you want use your own /usr/bin/phantomjs"
        print " (if you follow README.md, this is not needed because you downloaded phantomjs 1.9.2)"
        print " ",colored("By default, this software is looking for symlink 'phantom-1.9.2'", "green", "on_white")
        if os.path.islink('phantom-1.9.2'):
            print " ",colored("phantom-1.9.2 is a link, as expected.", "green", "on_white")
        else:
            print " ",colored("The phantom-1.9.2 link is missing!", "red", "on_white")
        print "Look in the verified_media/ for a list of countries."
        quit(-1)

    # check if the user is running phantom as installed on the system (also vagrant make this)
    # of if is using
    if len(sys.argv) == 3 and sys.argv[2] == 'lp':
        local_phantomjs = True

        print colored("You're using your local installed phantomjs. A version >= than 1.9.0 is needed.", 'blue', 'on_white')
        print colored("I'm not going to compare the string. Be aware: this is your version:", 'red')

        phantom_version = Popen(['phantomjs', '-v'], stdout=PIPE).stdout.readline()
        print colored(phantom_version, 'blue', 'on_white')
    else:
        if not os.path.islink('phantom-1.9.2'):
            print colored("Missing phantom-1.9.2. A symbolic link named phantom-1.9.2 was expected, but not found. Please consult README.md and make sure you've followed the installation procedure exactly.", 'red', 'on_white')
            quit(-1)

        local_phantomjs = False

    # country check
    proposed_country = sys.argv[1]
    country_f = os.path.join('verified_media', proposed_country.lower())
    if not os.path.isfile(country_f):
        print colored("Invalid country! not found %s in directory 'verified_media/' " % proposed_country, 'red')
        print "Available countries are:"
        for existing_c in os.listdir('verified_media'):
            if existing_c in ['README.md', 'test']:
                continue
            print "\t", existing_c
        print colored("You can propose your own country media list following these instructions:", 'blue', 'on_white')
        print colored("https://github.com/vecna/helpagainsttrack/blob/master/unverified_media_list/README.md", 'blue', 'on_white')
        quit(-1)

    # ask free information to the script runner
    info_f = os.path.join(OUTPUTDIR, 'information')
    if os.path.isfile(info_f):
        f = open(info_f, 'r')
        information = json.load(f)
        f.close()
        print colored("Recovered information of previous collection:", 'green')
        print " name:", information['name']
        print " contact:", information['contact']
        print " ISP:", information['ISP']
        print " city:", information['city']
    else:
        information = {}
        print colored("Optionally, provide the information requested below, or press Enter to skip:", 'green')

        def question(description):
            print colored(description, 'white', 'on_blue')
            answer = sys.stdin.readline()
            answer = answer.strip('\n')
            return None if not len(answer) else answer

        information['name'] = question('Your name:')
        information['contact'] = question('Mail or jabber contact:')
        information['ISP'] = question('Which ISP is providing your link:')
        information['city'] = question('From which city you\'re running this script:')
        information['version'] = ANALYSIS_VERSION

        with file(info_f, 'w+') as f:
            json.dump(information, f)


    # writing in a file which country you're using!
    with file(os.path.join(OUTPUTDIR, 'country'), 'w+') as f:
        f.write(proposed_country.lower())

    # reading media list, cleaning media list and copy media list
    cfp = file(country_f, 'r')
    unclean_lines = cfp.readlines()

    with file(os.path.join(OUTPUTDIR, 'used_media_list'), 'w+') as f:
        f.writelines(unclean_lines)

    # reconding an unique number is always useful, also if I've not yet in mind an usage right now.
    with file( os.path.join(OUTPUTDIR, "unique_id"), "w+") as f:
        f.write("%d%d%d" % (random.randint(0, 0xffff), random.randint(0, 0xffff), random.randint(0, 0xffff)) )

    print colored(" ࿓  Importing media list:", 'blue', 'on_white', attrs=['underline'])
    media_entries = media_file_cleanings(unclean_lines)
    cfp.close()

    print colored(" ࿓  Checking your network source.", 'blue', 'on_white', attrs=['underline'])
    do_wget('first.json')

    print colored(" ࿓  Starting media crawling:", 'blue', 'on_white', attrs=['underline'])
    # here start iteration over the media!
    phantom_stats = {}
    for cleanurl, media_kind in media_entries.iteritems():

        urldir = os.path.join(OUTPUTDIR, cleanurl)
        title_check = os.path.join(urldir, '__title')

        if os.path.isdir(urldir) and os.path.isfile(title_check):
            print "-", urldir, "already present: skipped"
            phantom_stats.setdefault('resumed', []).append(cleanurl)
            continue

        if os.path.isdir(urldir):
            # being here means that is empty or incomplete
            shutil.rmtree(urldir)

        print "+ Creating directory", urldir
        os.mkdir(urldir)

        retinfo = do_phantomjs(local_phantomjs, cleanurl, urldir, media_kind)
        phantom_stats.setdefault(retinfo, []).append(cleanurl)

    # take every directory in 'output/', get the included URL and dump in a dict
    included_url_dict = sortify(OUTPUTDIR)
    assert included_url_dict, "No url included after phantom scraping and collection !?"
    with file(os.path.join(OUTPUTDIR, 'domain.infos'), 'w+') as f:
        json.dump(included_url_dict, f)

    # generate DNS resolution map. for every host resolve an IP, for every IP resolve again DNS
    print colored(" ࿓  DNS resolution and reverse of %d domains" % len(included_url_dict), 'blue', 'on_white', attrs=['underline'])
    # when a "+" is printed, mean that a new IP/reverse has been added,
    # when a "*" is printed, mean that an older IP/reverse has a new associate
    # when a "-" is printed, has been an error!
    dns_error = []

    resolution_dns_f = os.path.join(OUTPUTDIR, 'resolution.dns')
    if os.path.isfile(resolution_dns_f):
        fp = file(resolution_dns_f, 'r')
        ip_map = json.load(fp)
        fp.close()
    else:
        ip_map = {}
        counter = 0
        percentage_bound = len(included_url_dict.keys()) / 10.0

        if not int(percentage_bound):
            percentage_bound = 1.0

        for domain in included_url_dict.keys():
            counter += 1
            if not counter % int(percentage_bound):
                print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound) ), time.ctime())

            try:
                socket.setdefaulttimeout(0.5)
                resolved_v4 = socket.gethostbyname(domain)
            except Exception as xxx:
                dns_error.append([domain, xxx.strerror])
                continue

            ip_map.setdefault(resolved_v4, []).append(domain)

        with file(resolution_dns_f, 'w+') as f:
            json.dump(ip_map, f)

    print colored("\nResolved %d unique IPv4 from %d unique domain" % (len(ip_map.keys()), len(included_url_dict.keys()) ), 'green')

    if len(dns_error) == len(included_url_dict.keys()):
        print colored("It appears that you can't access the internet. Please fix that and restart the test.", 'red')
        quit(-1)

    reverse_dns_f = os.path.join(OUTPUTDIR, 'reverse.dns')
    if os.path.isfile(reverse_dns_f):
        fp = file(reverse_dns_f, 'r')
        true_domain_map = json.load(fp)
        fp.close()
    else:
        true_domain_map = {}
        counter = 0
        percentage_bound = len(ip_map.keys()) / 10.0

        if not int(percentage_bound):
            percentage_bound = 1.0

        for ipv4 in ip_map.keys():
            counter += 1

            if not (counter % int(percentage_bound) ):
                print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound) ), time.ctime())

            try:
                socket.setdefaulttimeout(0.9)
                resolved_set = socket.gethostbyaddr(ipv4)
                resolved_name = resolved_set[0]
            except Exception as xxx:
                dns_error.append([ipv4, xxx.strerror])
                continue

            true_domain_map.setdefault(resolved_name, []).append(ipv4)

        with file(reverse_dns_f, 'w+') as f:
            json.dump(true_domain_map, f)

    print colored("\nReversed %d unique FQDN" % len(true_domain_map.keys() ), 'green')

    if len(dns_error):
        print colored("Saving %d errors in 'errors.dns'" % len(dns_error))
        with file(os.path.join(OUTPUTDIR, 'errors.dns'), 'w+') as f:
            json.dump(dns_error, f)

    # traceroutes contains all the output of traceroute in JSON format, separated
    # for logs. this output is not in the media directory, because some host like
    # google are included multiple times.
    trace_output = os.path.join(OUTPUTDIR, '_traceroutes')
    if not os.path.isdir(trace_output):
        os.mkdir(trace_output)

    # _verbotracelogs instead contain the detailed log of traceroute,
    # they would be useful in the future because AS number is not yet used
    # as information in the backend, but, who knows...
    verbotracelogs = os.path.join(OUTPUTDIR, '_verbotracelogs')
    if not os.path.isdir(verbotracelogs):
        os.mkdir(verbotracelogs)

    # saving again information about network location
    do_wget('second.json')

    # starting traceroute to all the collected IP
    print colored(" ࿓  Running traceroute to %d IP address (from %d hosts)" % (
        len(ip_map.keys()),
        len(included_url_dict.keys())),
                  'blue', 'on_white', attrs=['underline'])

    counter = 1
    trace_stats = {}
    for ip_addr, hostlist in ip_map.iteritems():

        progress_string = "%d/%d" % (counter, len(ip_map.keys()))
        print colored("%s%s" % (progress_string, (10 - len(progress_string)) * " " ), "cyan" ),

        if not do_trace(hostlist, ip_addr):
            trace_stats.update({ip_addr : False })
        else:
            trace_stats.update({ip_addr : True })

        counter += 1
        # TraceStats([]).dump_stats()

    if trace_stats.values().count(False):
        print colored("Registered %d failures" % trace_stats.values().count(False), "red")

    ptsj = os.path.join(OUTPUTDIR, '_phantom.trace.stats.json')
    if os.path.isfile(ptsj):
        os.unlink(ptsj)
    with file(ptsj, 'w+') as fp:
        json.dump([ phantom_stats, trace_stats ], fp)

    # saving again*again information about network location
    do_wget('third.json')

    output_name = 'results-%s.tar.gz' % proposed_country.lower()
    print colored(" ࿓  Analysis done! compressing the output in %s" % output_name, "blue", 'on_white', attrs=['underline'])

    if os.path.isfile(output_name):
        os.unlink(output_name)

    tar = Popen(['tar', '-z', '-c', '-v', '-f', output_name, OUTPUTDIR], stdout=PIPE)

    counter_line = 0
    while True:
        line = tar.stdout.readline()
        counter_line += 1
        if not line:
            break


    print colored("%d file added to %s, Starting 'result_sender.py'\n" % (counter_line, output_name), "green")
    print colored("If submitting results fails please type:", "red")
    print colored(" torify python ./sender_results.py %s" % output_name, "green")
    print colored("If this command also fails (and raise a python Exception), please report the error to trackmap at tacticaltech dot org :)", 'red')

    # result sender has hardcoded our hidden service
    p = Popen(['torify', 'python', './sender_results.py', output_name], stdout=PIPE, stderr=PIPE)

    while True:
        line = p.stdout.readline()
        exx = p.stderr.readline()

        if not line and not exx:
            break

        if exx.find('failed to find the symbol') != -1:
            continue
        if exx.find('libtorsocks') != -1:
            continue

        if line:
            print colored("   %s" % line, 'yellow')
        if exx:
            print colored(exx, 'red')
Exemple #5
0
def main():
    if not os.path.isdir(OUTPUTDIR):
        try:
            os.mkdir(OUTPUTDIR)
        except OSError as error:
            print "unable to create %s: %s" % (OUTPUTDIR, error)

    if len(sys.argv) < 2:
        print colored("Usage: %s $YOUR_COUNTRY_NAME <lp>" % sys.argv[0], "red", 'on_white')
        print ""
        print " 'lp' as 3rd argument is needed if you want use your own /usr/bin/phantomjs"
        print " (if you follow README.md, this is not needed because you've phantomjs 1.9.2)"
        print " ",colored("By default, this software is looking for symlink 'phantom-1.9.2'", "green", "on_white")
        if os.path.islink('phantom-1.9.2'):
            print " ",colored("Link that I've checked: you have ;)", "green", "on_white")
        else:
            print " ",colored("Link that I've checked: YOU HAVE NOT!", "red", "on_white")
        quit(-1)

    # check if the user is running phantom as installed on the system (also vagrant make this)
    # of if is using
    if len(sys.argv) == 3 and sys.argv[2] == 'lp':
        local_phantomjs = True

        print colored("You're using your local installed phantomjs. It is needed a version >= than 1.9.0", 'blue', 'on_white')
        print colored("I'm not gonna to compare the string, so, be aware: this is your version:", 'red')

        phantom_version = Popen(['phantomjs', '-v'], stdout=PIPE).stdout.readline()
        print colored(phantom_version, 'blue', 'on_white')
    else:
        if not os.path.islink('phantom-1.9.2'):
            print colored("You have not followd README.md :( I was expecting a symbolick link called phantom-1.9.2", 'red', 'on_white')
            quit(-1)

        local_phantomjs = False

    # country check
    proposed_country = sys.argv[1]
    country_f = os.path.join('verified_media', proposed_country.lower())
    if not os.path.isfile(country_f):
        print colored("Invalid country! not found %s in directory 'verified_media/' " % proposed_country, 'red')
        print "Available countries are:"
        for existing_c in os.listdir('verified_media'):
            if existing_c in ['README.md', 'test']:
                continue
            print "\t", existing_c
        print colored("You can propose your own country media list following these instructions:", 'blue', 'on_white')
        print colored("https://github.com/vecna/helpagainsttrack/blob/master/unverified_media_list/README.md", 'blue', 'on_white')
        quit(-1)

    # ask free information to the script runner
    information = {}
    print colored("Optionally, provide the informations requested below, or press Enter to skip:", 'green')

    def question(description):
        print colored(description, 'white', 'on_blue')
        answer = sys.stdin.readline()
        answer = answer.strip('\n')
        return None if not len(answer) else answer

    information['name'] = question('Your name:')
    information['contact'] = question('Mail or jabber contact:')
    information['ISP'] = question('Which ISP is providing your link:')
    information['city'] = question('From which city you\'re running this script:')

    with file(os.path.join(OUTPUTDIR, 'information'), 'w+') as f:
        json.dump(information, f)

    # writing in a file which country you're using!
    with file(os.path.join(OUTPUTDIR, 'country'), 'w+') as f:
        f.write(proposed_country.lower())

    # reading media list, cleaning media list and copy media list
    cfp = file(country_f, 'r')
    unclean_lines = cfp.readlines()

    with file(os.path.join(OUTPUTDIR, 'used_media_list'), 'w+') as f:
        f.writelines(unclean_lines)

    print colored(" ࿓  Importing media list:", 'blue', 'on_white', attrs=['underline'])
    media_entries = media_file_cleanings(unclean_lines)
    cfp.close()

    print colored(" ࿓  Starting media crawling:", 'blue', 'on_white', attrs=['underline'])
    # here start iteration over the media!
    for cleanurl, media_kind in media_entries.iteritems():

        urldir = os.path.join(OUTPUTDIR, cleanurl)
        title_check = os.path.join(urldir, '__title')

        if os.path.isdir(urldir) and os.path.isfile(title_check):
            print "-", urldir, "already present: skipped"
            continue

        if os.path.isdir(urldir):
            # being here means that is empty or incomplete
            shutil.rmtree(urldir)

        print "+ Creating directory", urldir
        os.mkdir(urldir)

        do_phantomjs(local_phantomjs, cleanurl, urldir, media_kind)

    # take every directory in 'output/', get the included URL and dump in a dict
    included_url_dict = sortify(OUTPUTDIR)
    assert included_url_dict, "No url included after phantom scraping and collection !?"
    with file(os.path.join(OUTPUTDIR, 'domain.infos'), 'w+') as f:
        json.dump(included_url_dict, f)

    # TODO optimization
    # if os.path.isfile(os.path.join(OUTPUTDIR, 'resolution.dns')):
    # if os.path.isfile(os.path.join(OUTPUTDIR, 'reverse.dns')):

    print colored(" ࿓  DNS resolution and reverse of %d domains" % len(included_url_dict), 'blue', 'on_white', attrs=['underline'])
    # when a "+" is printed, mean that a new IP/reverse has been added,
    # when a "*" is printed, mean that an older IP/reverse has a new associate
    # when a "-" is printed, has been an error!
    dns_error = []
    # generate DNS resolution map. for every host resolve an IP, for every IP resolve again DNS
    ip_map = {}
    counter = 0
    percentage_bound = len(included_url_dict.keys()) / 10.0
    for domain in included_url_dict.keys():
        counter += 1

        if not (counter % int(percentage_bound) ):
            print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound) ), time.ctime())

        try:
            socket.setdefaulttimeout(0.5)
            resolved_v4 = socket.gethostbyname(domain)
        except Exception as xxx:
            dns_error.append([domain, xxx.strerror])
            continue

        if ip_map.has_key(resolved_v4):
            ip_map[resolved_v4].append(domain)
        else:
            ip_map.update({resolved_v4 : [ domain ] })

    print colored("\nResolved %d unique IPv4 from %d unique domain" % (len(ip_map.keys()), len(included_url_dict.keys()) ), 'green')
    with file(os.path.join(OUTPUTDIR, 'resolution.dns'), 'w+') as f:
        json.dump(ip_map, f)

    if len(dns_error) == len(included_url_dict.keys()):
        print colored("Very probably your network is broken, right ? restart the test when fixed.", 'red')
        quit(-1)

    true_domain_map = {} 
    counter = 0
    percentage_bound = len(ip_map.keys()) / 10.0
    for ipv4 in ip_map.keys():
        counter += 1

        if not (counter % int(percentage_bound) ):
            print "%d\t%d%%\t%s" % (counter, (counter * (10 / percentage_bound) ), time.ctime())

        try:
            socket.setdefaulttimeout(0.9)
            resolved_set = socket.gethostbyaddr(ipv4)
            resolved_name = resolved_set[0]
        except Exception as xxx:
            dns_error.append([ipv4, xxx.strerror])
            continue

        if true_domain_map.has_key(resolved_name):
            true_domain_map[resolved_name].append(ipv4)
        else:
            true_domain_map.update({resolved_name : [ ipv4 ] })

    print colored("\nReversed %d unique FQDN" % len(true_domain_map.keys() ), 'green')
    with file(os.path.join(OUTPUTDIR, 'reverse.dns'), 'w+') as f:
        json.dump(true_domain_map, f)

    with file(os.path.join(OUTPUTDIR, 'errors.dns'), 'w+') as f:
        json.dump(dns_error, f)

    # traceroutes contains all the output of traceroute in JSON format, separated
    # for logs. this output is not in the media directory, because some host like
    # google are included multiple times.
    trace_output = os.path.join(OUTPUTDIR, '_traceroutes')
    if not os.path.isdir(trace_output):
        os.mkdir(trace_output)

    # _verbotracelogs instead contain the detailed log of traceroute,
    # they would be useful in the future because AS number is not yet used
    # as information in the backend, but, who knows...
    verbotracelogs = os.path.join(OUTPUTDIR, '_verbotracelogs')
    if not os.path.isdir(verbotracelogs):
        os.mkdir(verbotracelogs)


    print colored(" ࿓  Running traceroute to %d hosts" % len(included_url_dict.keys()), 'blue', 'on_white', attrs=['underline'])
    counter = 1
    failure = 0
    for url, domain_info in included_url_dict.iteritems():

        progress_string = "%d/%d" % (counter, len(included_url_dict.keys()))
        print colored("%s%s" % (progress_string, (10 - len(progress_string)) * " " ), "cyan" ),

        if not do_trace(url, url):
            failure += 1
        counter += 1

    if failure:
        print colored("Registered %d failures" % failure, "red")

    # putting the unique number into
    with file( os.path.join(OUTPUTDIR, "unique_id"), "w+") as f:
        f.write("%d%d%d" % (random.randint(0, 0xffff), random.randint(0, 0xffff), random.randint(0, 0xffff)) )

    output_name = 'results-%s.tar.gz' % proposed_country.lower()
    print colored(" ࿓  Analysis done! compressing the output in %s" % output_name, "blue", 'on_white', attrs=['underline'])

    if os.path.isfile(output_name):
        os.unlink(output_name)

    tar = Popen(['tar', '-z', '-c', '-v', '-f', output_name, OUTPUTDIR], stdout=PIPE)

    counter_line = 0
    while True:
        line = tar.stdout.readline()
        counter_line += 1
        if not line:
            break


    print colored("%d file added to %s, Starting 'result_sender.py'" % (counter_line, output_name), "green")
    # result sender has hardcoded our hidden service
    p = Popen(['torify', 'python', './sender_results.py', output_name], stdout=PIPE, stderr=PIPE)

    while True:
        line = p.stdout.readline()
        exx = p.stderr.readline()

        if not line and not exx:
            break

        if exx.find('failed to find the symbol') != -1:
            continue
        if exx.find('libtorsocks') != -1:
            continue

        if line:
            print colored("   %s" % line, 'yellow'),
        if exx:
            print colored(exx, 'red'),
        print "Error unexpected command:", command
        quit(-1)

    # understand the third argument
    target = sys.argv[2]
    if os.path.isfile(target):
        print "Found file", target, "using as media list"
        # reading media list, cleaning media list and copy media list
        cfp = file(target, 'r')
        unclean_lines = cfp.readlines()

        print colored(" ࿓  Importing media list:",
                      'blue',
                      'on_white',
                      attrs=['underline'])
        media_entries = media_file_cleanings(unclean_lines)
        cfp.close()
    else:
        print "Not found file", target, "assuming as single host"
        media_entries = {target: 'hand'}

    if not os.path.isdir('_hostseer'):
        os.mkdir('_hostseer')

    def check_section(kind):

        if kind and len(sys.argv) == 4:
            assert sys.argv[3] in PERMITTED_SECTIONS, PERMITTED_SECTIONS
            return kind != sys.argv[3]

    if command == 'phantom':
    command = sys.argv[1]

    if command not in [ 'DNS', 'phantom', 'Geo' ]:
        print "Error unexpected command:", command
        quit(-1)

    # understand the third argument
    target = sys.argv[2]
    if os.path.isfile(target):
        print "Found file", target, "using as media list"
        # reading media list, cleaning media list and copy media list
        cfp = file(target, 'r')
        unclean_lines = cfp.readlines()

        print colored(" ࿓  Importing media list:", 'blue', 'on_white', attrs=['underline'])
        media_entries = media_file_cleanings(unclean_lines)
        cfp.close()
    else:
        print "Not found file", target, "assuming as single host"
        media_entries = { target : 'hand' }

    if not os.path.isdir('_hostseer'):
        os.mkdir('_hostseer')

    def check_section(kind):

        if kind and len(sys.argv) == 4:
            assert sys.argv[3] in PERMITTED_SECTIONS, PERMITTED_SECTIONS
            return kind != sys.argv[3]