Example #1
0
def launch():
    """
        Fetch all the whois entry assigned to the server of this :class:`Connector`
    """
    i = 0
    while True:
        try:
            entry = temp_db.spop(key_ris)
            if not entry:
                __disconnect()
                i = 0
                publisher.debug("Disconnected of " + server)
                time.sleep(sleep_timer)
                continue
            if cache_db.get(entry) is None:
                if not connected:
                    __connect()
                publisher.debug(server + ", query : " + str(entry))
                whois = fetch_whois(entry)
                if whois != '':
                    cache_db.setex(entry, server + '\n' + unicode(whois,  errors="replace"), cache_ttl)
                if not keepalive:
                    __disconnect()
            i += 1
            if i%10000 == 0:
                publisher.info(str(temp_db.scard(key_ris)) + ' to process on ' + server)
        except IOError as text:
            publisher.error("IOError on " + server + ': ' + str(text))
            time.sleep(sleep_timer)
            __disconnect()
Example #2
0
def display_listof_pid(r_serv, arg):
    """Display the pid list from redis

    This function display infos in the shell about lauched process

    """
    jobs = {}
    joblist = []
    try:
        for job in r_serv.smembers("pid"):
            jobs = r_serv.hgetall(job)

            if jobs != None:
                start = datetime.strptime(r_serv.hget(job, "startime"), "%Y-%m-%d_%H:%M:%S")

                end = datetime.strptime(time.strftime("%Y-%m-%d_%H:%M:%S"), "%Y-%m-%d_%H:%M:%S")
                jobs['uptime'] = str(abs(start - end))
                joblist.append(jobs)
            else:
                publisher.debug("display_list_of_pid Aborted due to lack of Information in Redis")

        joblist = sorted(joblist, key=lambda k: k['uptime'], reverse=True)

        for job in joblist:
            print format_display_listof_pid(job, arg)

        if arg == "remain":
            print "Remaining: {0}".format(r_serv.llen("filelist"))

        if arg == "processed":
            print "processed: {0}".format(r_serv.llen("processed"))

    except TypeError:
        publisher.error("TypeError for display_listof_pid")
def launch_fetcher(module):
    """
        Launch a process which fetch a dataset in a directory
    """
    service_fetcher = os.path.join(services_dir, "fetch_raw_files.py")
    timer = '3600'
    if module is None:
        publisher.error('Unable to start fetching : module is None')
        return
    url = config_db.get(module + "|" + "url")
    if url is None:
        publisher.info(module + ' does not have an URL, no fetcher.')
        config_db.set(module + "|" + "fetching", 0)
        return
    directory = config_db.get(module + "|" + "home_dir")
    if directory is not None:
        subprocess.Popen([
            "python", service_fetcher, '-n', module, '-d', directory, '-u',
            url, '-t', timer
        ])
        config_db.set(module + "|" + "fetching", 1)
        publisher.info('Fetching of ' + module + 'started.')
    else:
        publisher.error('Unable to start fetching of ' + module + \
                ': home_dir unknown.')
        config_db.set(module + "|" + "fetching", 0)
Example #4
0
def fetcher():
    """
        Main function which fetch the datasets
    """
    while config_db.sismember('modules', module):
        try:
            urllib.urlretrieve(url, temp_filename)
        except:
            publisher.error('Unable to fetch ' + url)
            __check_exit()
            continue
        drop_file = False
        """
            Check is the file already exists, if the same file is found,
            the downloaded file is dropped. Else, it is moved in his
            final directory.
        """
        to_check = glob.glob( os.path.join(old_directory, '*') )
        to_check += glob.glob( os.path.join(directory, '*') )
        for file in to_check:
            if filecmp.cmp(temp_filename, file):
                drop_file = True
                break
        if drop_file:
            os.unlink(temp_filename)
            publisher.debug('No new file on ' + url)
        else:
            os.rename(temp_filename, filename)
            publisher.info('New file on ' + url)
        __check_exit()
    config_db.delete(module + "|" + "fetching")
Example #5
0
def crawl_onion(url, domain, date, date_month, message):

    #if not r_onion.sismember('full_onion_up', domain) and not r_onion.sismember('onion_down:'+date , domain):
    super_father = r_serv_metadata.hget('paste_metadata:'+paste, 'super_father')
    if super_father is None:
        super_father=paste

    retry = True
    nb_retry = 0
    while retry:
        try:
            r = requests.get(splash_url , timeout=30.0)
            retry = False
        except Exception:
            # TODO: relaunch docker or send error message
            nb_retry += 1

            if nb_retry == 30:
                on_error_send_message_back_in_queue(type_hidden_service, domain, message)
                publisher.error('{} SPASH DOWN'.format(splash_url))
                print('--------------------------------------')
                print('         \033[91m DOCKER SPLASH DOWN\033[0m')
                print('          {} DOWN'.format(splash_url))
                exit(1)

            print('         \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m')
            print('          Retry({}) in 10 seconds'.format(nb_retry))
            time.sleep(10)

    if r.status_code == 200:
        process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', splash_url, type_hidden_service, url, domain, paste, super_father],
                                   stdout=subprocess.PIPE)
        while process.poll() is None:
            time.sleep(1)

        if process.returncode == 0:
            output = process.stdout.read().decode()
            print(output)
            # error: splash:Connection to proxy refused
            if 'Connection to proxy refused' in output:
                on_error_send_message_back_in_queue(type_hidden_service, domain, message)
                publisher.error('{} SPASH, PROXY DOWN OR BAD CONFIGURATION'.format(splash_url))
                print('------------------------------------------------------------------------')
                print('         \033[91m SPLASH: Connection to proxy refused')
                print('')
                print('            PROXY DOWN OR BAD CONFIGURATION\033[0m'.format(splash_url))
                print('------------------------------------------------------------------------')
                exit(-2)
        else:
            print(process.stdout.read())
            exit(-1)
    else:
        on_error_send_message_back_in_queue(type_hidden_service, domain, message)
        print('--------------------------------------')
        print('         \033[91m DOCKER SPLASH DOWN\033[0m')
        print('          {} DOWN'.format(splash_url))
        exit(1)
Example #6
0
def __query_logging(ip, user_agent, method, q_ip=None, announce_date=None,
                    days_limit=None, level=None):
    if level == 'warning':
        publisher.warning(__csv2string([ip, user_agent, method, q_ip,
                                        announce_date, days_limit, level]))
    elif level == 'error':
        publisher.error(__csv2string([ip, user_agent, method, q_ip,
                                      announce_date, days_limit, level]))
    else:
        publisher.info(__csv2string([ip, user_agent, method, q_ip,
                                     announce_date, days_limit, level]))
Example #7
0
def __query_logging(ip, user_agent, method, q_ip=None, announce_date=None,
                    days_limit=None, level=None):
    if level == 'warning':
        publisher.warning(__csv2string([ip, user_agent, method, q_ip,
                                        announce_date, days_limit, level]))
    elif level == 'error':
        publisher.error(__csv2string([ip, user_agent, method, q_ip,
                                      announce_date, days_limit, level]))
    else:
        publisher.info(__csv2string([ip, user_agent, method, q_ip,
                                     announce_date, days_limit, level]))
Example #8
0
def main():
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'DomClassifier'

    p = Process(config_section)
    addr_dns = p.config.get("DomClassifier", "dns")

    publisher.info("""ZMQ DomainClassifier is Running""")

    c = DomainClassifier.domainclassifier.Extract(rawtext="",
                                                  nameservers=[addr_dns])

    cc = p.config.get("DomClassifier", "cc")
    cc_tld = p.config.get("DomClassifier", "cc_tld")

    while True:
        try:
            message = p.get_from_set()

            if message is not None:
                PST = Paste.Paste(message)
            else:
                publisher.debug("Script DomClassifier is idling 1s")
                time.sleep(1)
                continue
            paste = PST.get_p_content()
            mimetype = PST._get_p_encoding()

            if mimetype == "text/plain":
                c.text(rawtext=paste)
                c.potentialdomain()
                c.validdomain(rtype=['A'], extended=True)
                localizeddomains = c.include(expression=cc_tld)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        'DomainC;{};{};{};Checked {} located in {};{}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            localizeddomains, cc_tld, PST.p_path))
                localizeddomains = c.localizedomain(cc=cc)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        'DomainC;{};{};{};Checked {} located in {};{}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            localizeddomains, cc, PST.p_path))
        except IOError:
            print("CRC Checksum Failed on :", PST.p_path)
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
 def test_publisher(self):
     for i in range(0, 21):
         if i % 2 == 0:
             publisher.info('test' + str(i))
         elif i % 3 == 0:
             publisher.warning('test' + str(i))
         elif i % 5 == 0:
             publisher.error('test' + str(i))
         elif i % 7 == 0:
             publisher.critical('test' + str(i))
         else:
             publisher.debug('test' + str(i))
         time.sleep(1)
Example #10
0
 def test_publisher(self):
     for i in range(0, 21):
         if i % 2 == 0:
             publisher.info('test' + str(i))
         elif i % 3 == 0:
             publisher.warning('test' + str(i))
         elif i % 5 == 0:
             publisher.error('test' + str(i))
         elif i % 7 == 0:
             publisher.critical('test' + str(i))
         else:
             publisher.debug('test' + str(i))
         time.sleep(1)
Example #11
0
def service_start_once(servicename = None, param = None, processname = None):
    """
        Start a services and save his pids.
        Check if it is not already running
    """
    config, pid_path = init_static()
    processname = os.path.basename(processname)
    pidpath = os.path.join(pid_path,processname+".pid")
    if not os.path.exists(pidpath):
        proc = service_start(servicename, param)
        writepid(processname, proc)
    else:
        print(processname + ' already running on pid ' + str(pidof(processname)[0]))
        publisher.error("%s already running with pid %s" % (param, pidof(processname)[0]))
Example #12
0
def get_pgp_packet(message, save_path):
    save_path = '{}'.format(save_path)
    if len(save_path) > 131072:
        save_in_file(message, save_path)
        return ''
    else:
        process1 = subprocess.Popen([ 'echo', '-e', save_path], stdout=subprocess.PIPE)
        process2 = subprocess.Popen([ 'pgpdump'], stdin=process1.stdout, stdout=subprocess.PIPE)
        process1.stdout.close()
        output = process2.communicate()[0]
        try:
            output = output.decode()
        except UnicodeDecodeError:
            publisher.error('Error PgpDump UnicodeDecodeError: {}'.format(message))
            output = ''
        return output
Example #13
0
def service_start_once(servicename=None, param=None, processname=None):
    """
        Start a services and save his pids.
        Check if it is not already running
    """
    config, pid_path = init_static()
    processname = os.path.basename(processname)
    pidpath = os.path.join(pid_path, processname + ".pid")
    if not os.path.exists(pidpath):
        proc = service_start(servicename, param)
        writepid(processname, proc)
    else:
        print(processname + ' already running on pid ' +
              str(pidof(processname)[0]))
        publisher.error("%s already running with pid %s" %
                        (param, pidof(processname)[0]))
Example #14
0
def launch_parser(module):
    """
        Launch a parser on a dataset for a module
    """
    service_parser = os.path.join(services_dir, "parse_raw_files.py")
    if module is None:
        publisher.error('Unable to start parsing : module is None')
        return
    directory = config_db.get(module + "|" + "home_dir")
    if directory is not None:
        subprocess.Popen(["python", service_parser, '-n', module, '-d', directory])
        config_db.set(module + "|" + "parsing", 1)
        publisher.info('Parsing of ' + module + 'started.')
    else:
        publisher.error('Unable to start parsing of ' + module + ': home_dir unknown.')
        config_db.set(module + "|" + "parsing", 0)
Example #15
0
def main():
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'DomClassifier'

    p = Process(config_section)
    addr_dns = p.config.get("DomClassifier", "dns")

    publisher.info("""ZMQ DomainClassifier is Running""")

    c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])

    cc = p.config.get("DomClassifier", "cc")
    cc_tld = p.config.get("DomClassifier", "cc_tld")

    while True:
        try:
            message = p.get_from_set()

            if message is not None:
                PST = Paste.Paste(message)
            else:
                publisher.debug("Script DomClassifier is idling 1s")
                time.sleep(1)
                continue
            paste = PST.get_p_content()
            mimetype = PST._get_p_encoding()

            if mimetype == "text/plain":
                c.text(rawtext=paste)
                c.potentialdomain()
                c.validdomain(rtype=['A'], extended=True)
                localizeddomains = c.include(expression=cc_tld)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
                        PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path))
                localizeddomains = c.localizedomain(cc=cc)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
                        PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
        except IOError:
            print("CRC Checksum Failed on :", PST.p_path)
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
Example #16
0
def launch_parser(module):
    """
        Launch a parser on a dataset for a module
    """
    service_parser = os.path.join(services_dir, "parse_raw_files.py")
    timer = '60'
    if module is None:
        publisher.error('Unable to start parsing : module is None')
        return
    directory = config_db.get(module + "|" + "home_dir")
    if directory is not None:
        subprocess.Popen(["python", service_parser, '-n', module,
            '-d', directory, '-t', timer])
        config_db.set(module + "|" + "parsing", 1)
        publisher.info('Parsing of ' + module + 'started.')
    else:
        publisher.error('Unable to start parsing of ' + module + \
                ': home_dir unknown.')
        config_db.set(module + "|" + "parsing", 0)
Example #17
0
def prepare_keys_for_ranking():
    # Add all announced subnets by ASN
    pipeline = history_db_static.pipeline()
    for asn in routing_db.smembers('asns'):
        blocks = routing_db.smembers(asn)
        pipeline.sadd('{asn}{sep}{date}{sep}clean_set'.format(sep = separator,
            asn = asn, date = date), *blocks)
        temp_db.sadd('full_asn_db', *[str(IPy.IP(b)[0]) for b in blocks])
        temp_db.sadd('no_asn', 'full_asn_db')
    pipeline.execute()

    # Cleanup the old keys, setup the list of asns to rank
    sources = global_db.smembers('{date}{sep}{key}'.\
            format(date = date, sep = separator, key = index_sources))

    pipeline = history_db.pipeline()
    pipeline_static = history_db_static.pipeline()
    to_delete = []
    for source in sources:
        asns = global_db.smembers('{date}{sep}{source}{sep}{key}'.\
                format(date = date, sep = separator, source = source,
                    key = index_asns_details))
        for asn in asns:
            global_asn = asn.split(separator)[0]
            asn_key_v4 = '{asn}{sep}{date}{sep}{source}{sep}rankv4'.\
                    format(sep = separator, asn = global_asn,
                            date = date, source = source)
            asn_key_v6 = '{asn}{sep}{date}{sep}{source}{sep}rankv6'.\
                    format(sep = separator, asn = global_asn,
                            date = date, source = source)
            to_delete.append(asn_key_v4)
            to_delete.append(asn_key_v6)

            pipeline.sadd(key_to_rank,
                    '{asn}{sep}{date}{sep}{source}'.format(sep = separator,
                        asn = asn, date = date, source = source))
    to_delete = set(to_delete)
    if len(to_delete) > 0:
        pipeline_static.delete(*to_delete)
    else:
        publisher.error('You *do not* have anything to rank!')
    pipeline.execute()
    pipeline_static.execute()
Example #18
0
def prepare_keys_for_ranking():
    # Add all announced subnets by ASN
    pipeline = history_db_static.pipeline()
    for asn in routing_db.smembers('asns'):
        blocks = routing_db.smembers(asn)
        pipeline.sadd('{asn}|{date}|clean_set'.format(asn=asn, date=date),
                      *blocks)
        temp_db.sadd('full_asn_db', *[str(IPy.IP(b)[0]) for b in blocks])
        temp_db.sadd('no_asn', 'full_asn_db')
    pipeline.execute()

    # Cleanup the old keys, setup the list of asns to rank
    sources = global_db.smembers('{date}|sources'.format(date=date))

    pipeline = history_db.pipeline()
    pipeline_static = history_db_static.pipeline()
    to_delete = []
    for source in sources:
        asns = global_db.smembers('{date}|{source}|asns_details'.format(
            date=date, source=source))
        for asn in asns:
            global_asn = asn.split('|')[0]
            asn_key_v4 = '{asn}|{date}|{source}|rankv4'.format(asn=global_asn,
                                                               date=date,
                                                               source=source)
            asn_key_v6 = '{asn}|{date}|{source}|rankv6'.format(asn=global_asn,
                                                               date=date,
                                                               source=source)
            to_delete.append(asn_key_v4)
            to_delete.append(asn_key_v6)

            pipeline.sadd(
                key_to_rank, '{asn}|{date}|{source}'.format(asn=asn,
                                                            date=date,
                                                            source=source))
    to_delete = set(to_delete)
    if len(to_delete) > 0:
        pipeline_static.delete(*to_delete)
    else:
        publisher.error('You *do not* have anything to rank!')
    pipeline.execute()
    pipeline_static.execute()
def launch():
    """
        Fetch all the whois entry assigned to the server of this :class:`Connector`
    """
    i = 0
    while True:
        try:
            entry = temp_db.spop(key_ris)
            if not entry:
                __disconnect()
                i = 0
                publisher.debug("Disconnected of " + server)
                time.sleep(sleep_timer)
                continue
            if cache_db.get(entry) is None:
                if not connected:
                    __connect()
                publisher.debug(server + ", query : " + str(entry))
                whois = fetch_whois(entry)
                if whois != '':
                    cache_db.setex(
                        entry,
                        server + '\n' + unicode(whois, errors="replace"),
                        cache_ttl)
                if not keepalive:
                    __disconnect()
            i += 1
            if i % 10000 == 0:
                publisher.info(
                    str(temp_db.scard(key_ris)) + ' to process on ' + server)
        except IOError as text:
            publisher.error("IOError on " + server + ': ' + str(text))
            publisher.info(
                str(temp_db.scard(key_ris)) + ' to process on ' + server)
            time.sleep(sleep_timer)
            __disconnect()
        except Exception as e:
            publisher.error("Error on " + server + ': ' + str(e))
            publisher.info(
                str(temp_db.scard(key_ris)) + ' to process on ' + server)
            time.sleep(sleep_timer)
            __disconnect()
def fetch_whois(query):
    """
        Fetch the RIS RIPE informations. Keep the connection is possible.
    """
    server_socket.send('-k -M ' + query + '\n')
    text = ''
    fs = server_socket.makefile()
    prec = ''
    while 1:
        temp = fs.readline()
        if not temp or len(temp) == 0 or prec == temp == '\n':
            break
        text += temp
        prec = temp
    if len(text) == 0:
        publisher.error("error (no response) with query: " + query +
                        " on server " + server)
        time.sleep(sleep_timer)
    if not keepalive:
        __disconnect()
    return text
Example #21
0
def fetch_whois(query):
    """
        Fetch the RIS RIPE informations. Keep the connection is possible.
    """
    server_socket.send('-k -M ' + query + '\n')
    text = ''
    fs = server_socket.makefile()
    prec = ''
    while 1:
        temp = fs.readline()
        if not temp or len(temp) == 0 or prec == temp == '\n':
            break
        text += temp
        prec = temp
    if len(text) == 0:
        publisher.error("error (no response) with query: " + query +
                " on server " + server)
        time.sleep(sleep_timer)
    if not keepalive:
        __disconnect()
    return text
Example #22
0
def launch_fetcher(module):
    """
        Launch a process which fetch a dataset in a directory
    """
    service_fetcher = os.path.join(services_dir, "fetch_raw_files.py")
    if module is None:
        publisher.error('Unable to start fetching : module is None')
        return
    url = config_db.get(module + "|" + "url")
    if url is None:
        publisher.info(module + ' does not have an URL, no fetcher.')
        config_db.set(module + "|" + "fetching", 0)
        return
    directory = config_db.get(module + "|" + "home_dir")
    if directory is not None:
        subprocess.Popen(["python", service_fetcher, '-n', module, '-d', directory, '-u', url])
        config_db.set(module + "|" + "fetching", 1)
        publisher.info('Fetching of ' + module + 'started.')
    else:
        publisher.error('Unable to start fetching of ' + module + ': home_dir unknown.')
        config_db.set(module + "|" + "fetching", 0)
Example #23
0
def importer(raw_dir, listname):
    publisher.channel = 'ParseRawFiles'
    has_files = False
    if temp_db is None:
        __prepare()
    try:
        parser = importlib.import_module(listname).parser
    except:
        parser = __default_parser
    date = datetime.date.today()
    for filename in __get_files(raw_dir):
        try:
            date_from_module = parser(filename, listname, date)
            has_files = True
            if date_from_module is not None:
                date = date_from_module
            os.rename(filename, os.path.join(raw_dir, old_dir, date.isoformat()))
        except:
            new_file = os.path.join(raw_dir, old_dir,
                                    'INVALID_' + str(date).replace(' ', '-'))
            os.rename(filename, new_file)
            publisher.error('Invalid file: ' + new_file)
    return has_files
Example #24
0
def importer(raw_dir, listname):
    publisher.channel = 'ParseRawFiles'
    has_files = False
    if temp_db is None:
        __prepare()
    try:
        parser = importlib.import_module(listname).parser
    except:
        parser = __default_parser
    date = datetime.date.today()
    for filename in __get_files(raw_dir):
        try:
            date_from_module = parser(filename, listname, date)
            has_files = True
            if date_from_module is not None:
                date = date_from_module
            os.rename(filename, os.path.join(raw_dir, old_dir,
                                             date.isoformat()))
        except:
            new_file = os.path.join(raw_dir, old_dir,
                                    'INVALID_' + str(date).replace(' ', '-'))
            os.rename(filename, new_file)
            publisher.error('Invalid file: ' + new_file)
    return has_files
Example #25
0
def get_pgp_packet(message, save_path):
    save_path = '{}'.format(save_path)
    # remove Version
    all_version = re.findall(regex_tool_version, save_path)
    for version in all_version:
        save_path = save_path.replace(version, '')
    # remove comment
    all_comment = re.findall(regex_block_comment, save_path)
    for comment in all_comment:
        save_path = save_path.replace(comment, '')
    # remove empty line
    save_path = [s for s in save_path.splitlines() if s]
    save_path[0] = save_path[0] + '\n'
    save_path[-1] = '\n' + save_path[-1]
    save_path = '\n'.join(save_path)

    #print(save_path)

    if len(save_path) > 131072:
        save_in_file(message, save_path)
        return ''
    else:
        process1 = subprocess.Popen(['echo', '-e', save_path],
                                    stdout=subprocess.PIPE)
        process2 = subprocess.Popen(['pgpdump'],
                                    stdin=process1.stdout,
                                    stdout=subprocess.PIPE)
        process1.stdout.close()
        output = process2.communicate()[0]
        try:
            output = output.decode()
        except UnicodeDecodeError:
            publisher.error(
                'Error PgpDump UnicodeDecodeError: {}'.format(message))
            output = ''
        return output
Example #26
0
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read(configfile)

    # Redis
    r_serv1 = redis.StrictRedis(
        host = cfg.get("Redis_Queues", "host"),
        port = cfg.getint("Redis_Queues", "port"),
        db = cfg.getint("Redis_Queues", "db"))

    # Indexer configuration - index dir and schema setup
    indexpath = cfg.get("Indexer", "path")
    indexertype = cfg.get("Indexer", "type")
    if indexertype == "whoosh":
        schema = Schema(title=TEXT(stored=True), path=ID(stored=True,unique=True), content=TEXT)

        if not os.path.exists(indexpath):
            os.mkdir(indexpath)

        if not exists_in(indexpath):
            ix = create_in(indexpath, schema)
        else:
            ix = open_dir(indexpath)

    # LOGGING #
    publisher.channel = "Script"

    # ZMQ #
    #Subscriber
    channel = cfg.get("PubSub_Global", "channel")
    subscriber_name = "indexer"
    subscriber_config_section = "PubSub_Global"

    Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)

    # FUNCTIONS #
    publisher.info("""ZMQ Indexer is Running""")

    while True:
	try:
            message = Sub.get_msg_from_queue(r_serv1)

            if message != None:
                PST = P.Paste(message.split(" ",-1)[-1])
            else:
                if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"):
                    r_serv1.srem("SHUTDOWN_FLAGS", "Indexer")
                    publisher.warning("Shutdown Flag Up: Terminating.")
                    break
                publisher.debug("Script Indexer is idling 10s")
                time.sleep(1)
                continue
            docpath = message.split(" ",-1)[-1]
            paste = PST.get_p_content()
            print "Indexing :", docpath
            if indexertype == "whoosh":
                indexwriter = ix.writer()
                indexwriter.update_document(title=unicode(docpath, errors='ignore'),path=unicode(docpath, errors='ignore'),content=unicode(paste, errors='ignore'))
                indexwriter.commit()
        except IOError:
           print "CRC Checksum Failed on :", PST.p_path
           publisher.error('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "CRC Checksum Failed" ))
           pass
Example #27
0
def insert():
    """
        Re-insert in the database the data provided by the module and
        extracted by :meth:`get_all_information` in a sorted form.
    """
    while True:
        i = 0
        try:
            while temp_db.scard(uid_list) > 0:
                infos = get_all_information()
                if infos is None:
                    continue
                uid, ip, src, timestamp = infos
                if ip is None:
                    publisher.error('Entry without IP, invalid')
                    continue
                if src is None:
                    publisher.error(ip + ' without source, invalid')
                    continue
                if timestamp.date() < datetime.date.today() - \
                        datetime.timedelta(1) and not accept_old_entries:
                    publisher.warning('The timestamp ({ts}) of {ip} from {source} is too old.'.\
                            format(ts = timestamp.isoformat(), ip = ip, source = src))
                    continue
                try:
                    # Check and normalize the IP
                    ip_bin = IPy.IP(ip)
                    if ip_bin.iptype() != 'PUBLIC':
                        publisher.warning(str(ip_bin) + ' is not a PUBLIC IP Address')
                        continue
                    ip = ip_bin.strCompressed()
                except:
                    publisher.error('This IP: ' + ip + ' in invalid.')
                    continue

                iso_timestamp = timestamp.isoformat()
                date = timestamp.date().isoformat()
                index_day_src = '{date}{sep}{key}'.format(sep = separator,
                        date=date, key=list_sources)
                index_day_ips = 'temp{sep}{date}{sep}{source}{sep}{key}'.format(
                        sep = separator, date=date, source=src, key=list_ips)
                ip_details = '{ip}{sep}{timestamp}'.format(sep = separator,
                        ip = ip, timestamp = iso_timestamp)

                global_db.sadd(index_day_src, src)
                pipeline_temp_db = temp_db.pipeline()
                pipeline_temp_db.sadd(index_day_ips, ip_details)
                pipeline_temp_db.sadd(temp_ris, ip)
                pipeline_temp_db.sadd(temp_no_asn, index_day_ips)
                pipeline_temp_db.delete(uid)
                pipeline_temp_db.execute()
                i += 1
                if i%100 == 0 and config_db.exists(stop_db_input):
                    break
                if i%10000 == 0:
                    publisher.info('{nb} new entries to insert'\
                            .format(nb = temp_db.scard(uid_list)))
        except:
            publisher.critical('Unable to insert, redis does not respond')
            break
        time.sleep(sleep_timer)
        if config_db.exists(stop_db_input):
            publisher.info('DatabaseInput stopped.')
            break
Example #28
0
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read(configfile)

    # Redis
    r_serv1 = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"),
                                port=cfg.getint("Redis_Queues", "port"),
                                db=cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.channel = "Script"

    # ZMQ #
    # Subscriber
    channel = cfg.get("PubSub_Global", "channel")
    subscriber_name = "DomainClassifier"
    subscriber_config_section = "PubSub_Global"

    cc = cfg.get("PubSub_DomainClassifier", "cc")
    cc_tld = cfg.get("PubSub_DomainClassifier", "cc_tld")

    sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel,
                            subscriber_name)

    # FUNCTIONS #
    publisher.info("""ZMQ DomainClassifier is Running""")
    c = DomainClassifier.domainclassifier.Extract(rawtext="")

    while True:
        try:
            message = sub.get_msg_from_queue(r_serv1)

            if message is not None:
                PST = Paste.Paste(message.split(" ", -1)[-1])
            else:
                if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"):
                    r_serv1.srem("SHUTDOWN_FLAGS", "Indexer")
                    publisher.warning("Shutdown Flag Up: Terminating.")
                    break
                publisher.debug("Script DomainClassifier is idling 10s")
                time.sleep(1)
                continue
            docpath = message.split(" ", -1)[-1]
            paste = PST.get_p_content()
            mimetype = PST._get_p_encoding()
            if mimetype == "text/plain":
                c.text(rawtext=paste)
                c.potentialdomain()
                c.validdomain(rtype=['A'], extended=True)
                localizeddomains = c.include(expression=cc_tld)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        'DomainC;{};{};{};Checked {} located in {}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            localizeddomains, cc_tld))
                localizeddomains = c.localizedomain(cc=cc)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        'DomainC;{};{};{};Checked {} located in {}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            localizeddomains, cc))
        except IOError:
            print "CRC Checksum Failed on :", PST.p_path
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
            pass
Example #29
0
def crawl_onion(url, domain, port, type_service, message, crawler_config):
    crawler_config['url'] = url
    crawler_config['port'] = port
    print('Launching Crawler: {}'.format(url))

    r_cache.hset('metadata_crawler:{}'.format(splash_port), 'crawling_domain',
                 domain)
    r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time',
                 datetime.datetime.now().strftime("%Y/%m/%d  -  %H:%M.%S"))

    retry = True
    nb_retry = 0
    while retry:
        try:
            r = requests.get(splash_url, timeout=30.0)
            retry = False
        except Exception:
            # TODO: relaunch docker or send error message
            nb_retry += 1

            if nb_retry == 6:
                on_error_send_message_back_in_queue(type_service, domain,
                                                    message)
                publisher.error('{} SPASH DOWN'.format(splash_url))
                print('--------------------------------------')
                print('         \033[91m DOCKER SPLASH DOWN\033[0m')
                print('          {} DOWN'.format(splash_url))
                r_cache.hset('metadata_crawler:{}'.format(splash_port),
                             'status', 'SPLASH DOWN')
                nb_retry == 0

            print('         \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m')
            print('          Retry({}) in 10 seconds'.format(nb_retry))
            time.sleep(10)

    if r.status_code == 200:
        r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status',
                     'Crawling')
        # save config in cash
        UUID = str(uuid.uuid4())
        r_cache.set('crawler_request:{}'.format(UUID),
                    json.dumps(crawler_config))

        process = subprocess.Popen(
            ["python", './torcrawler/tor_crawler.py', UUID],
            stdout=subprocess.PIPE)
        while process.poll() is None:
            time.sleep(1)

        if process.returncode == 0:
            output = process.stdout.read().decode()
            print(output)
            # error: splash:Connection to proxy refused
            if 'Connection to proxy refused' in output:
                on_error_send_message_back_in_queue(type_service, domain,
                                                    message)
                publisher.error(
                    '{} SPASH, PROXY DOWN OR BAD CONFIGURATION'.format(
                        splash_url))
                print(
                    '------------------------------------------------------------------------'
                )
                print('         \033[91m SPLASH: Connection to proxy refused')
                print('')
                print('            PROXY DOWN OR BAD CONFIGURATION\033[0m'.
                      format(splash_url))
                print(
                    '------------------------------------------------------------------------'
                )
                r_cache.hset('metadata_crawler:{}'.format(splash_port),
                             'status', 'Error')
                exit(-2)
        else:
            print(process.stdout.read())
            exit(-1)
    else:
        on_error_send_message_back_in_queue(type_service, domain, message)
        print('--------------------------------------')
        print('         \033[91m DOCKER SPLASH DOWN\033[0m')
        print('          {} DOWN'.format(splash_url))
        r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status',
                     'Crawling')
        exit(1)
Example #30
0
    ##################### Similarity found  #######################

            # if there is data in this dictionnary
            if len(hash_dico) != 0:
                # paste_tuple = (hash_type, date, paste_path, percent)
                for dico_hash, paste_tuple in hash_dico.items():
                    dupl.add(paste_tuple)

                # Creating the object attribute and save it.
                to_print = 'Duplicate;{};{};{};'.format(
                    PST.p_source, PST.p_date, PST.p_name)
                if dupl != []:
                    dupl = list(dupl)
                    PST.__setattr__("p_duplicate", dupl)
                    PST.save_attribute_duplicate(dupl)
                    PST.save_others_pastes_attribute_duplicate(dupl)
                    publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path))
                    print('{}Detected {}'.format(to_print, len(dupl)))
                    print('')

                y = time.time()

                publisher.debug('{}Processed in {} sec'.format(to_print, y-x))

        except IOError:
            to_print = 'Duplicate;{};{};{};'.format(
                PST.p_source, PST.p_date, PST.p_name)
            print("CRC Checksum Failed on :", PST.p_path)
            publisher.error('{}CRC Checksum Failed'.format(to_print))
Example #31
0
def crawl_onion(url, domain, date, date_month, message):

    #if not r_onion.sismember('full_onion_up', domain) and not r_onion.sismember('onion_down:'+date , domain):
    super_father = r_serv_metadata.hget('paste_metadata:' + paste,
                                        'super_father')
    if super_father is None:
        super_father = paste

    try:
        r = requests.get(splash_url, timeout=30.0)
    except Exception:
        # TODO: relaunch docker or send error message

        on_error_send_message_back_in_queue(type_hidden_service, domain,
                                            message)
        publisher.error('{} SPASH DOWN'.format(splash_url))
        print('--------------------------------------')
        print('         \033[91m DOCKER SPLASH DOWN\033[0m')
        print('          {} DOWN'.format(splash_url))
        exit(1)

    if r.status_code == 200:
        process = subprocess.Popen([
            "python", './torcrawler/tor_crawler.py', splash_url,
            type_hidden_service, url, domain, paste, super_father
        ],
                                   stdout=subprocess.PIPE)
        while process.poll() is None:
            time.sleep(1)

        if process.returncode == 0:
            output = process.stdout.read().decode()
            print(output)
            # error: splash:Connection to proxy refused
            if 'Connection to proxy refused' in output:
                on_error_send_message_back_in_queue(type_hidden_service,
                                                    domain, message)
                publisher.error(
                    '{} SPASH, PROXY DOWN OR BAD CONFIGURATION'.format(
                        splash_url))
                print(
                    '------------------------------------------------------------------------'
                )
                print('         \033[91m SPLASH: Connection to proxy refused')
                print('')
                print('            PROXY DOWN OR BAD CONFIGURATION\033[0m'.
                      format(splash_url))
                print(
                    '------------------------------------------------------------------------'
                )
                exit(-2)
        else:
            print(process.stdout.read())
            exit(-1)
    else:
        on_error_send_message_back_in_queue(type_hidden_service, domain,
                                            message)
        print('--------------------------------------')
        print('         \033[91m DOCKER SPLASH DOWN\033[0m')
        print('          {} DOWN'.format(splash_url))
        exit(1)
Example #32
0
# -*- coding: utf-8 -*-

"""
    :file:`bin/services/microblog.py` - Microblogging client
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    Start the microblogging client which posts on twitter and identica
"""

import time
from pubsublogger import publisher
import microblog

dev_mode = True

if __name__ == '__main__':

    sleep_timer = 3600

    publisher.channel = 'API_Twitter'

    while 1:
        try:
            if microblog.post_new_top_ranking():
                publisher.info('New Ranking posted on twitter and identica.')
                print 'New Ranking posted on twitter and identica.'
        except Exception as e:
            publisher.error("Something bad occurs: " + e)
            print "Something bad occurs: " + str(e)
        time.sleep(sleep_timer)
Example #33
0
    def get_ip_info(self, ip, days_limit=None):
        """
            Return informations related to an IP address.

            :param ip: The IP address
            :param days_limit: The number of days we want to check in the past
                (default: around 2 years)
            :rtype: Dictionary

                .. note:: Format of the output:

                    .. code-block:: python

                        {
                            'ip': ip,
                            'days_limit' : days_limit,
                            'ptrrecord' : 'ptr.record.com',
                            'history':
                                [
                                    {
                                        'asn': asn,
                                        'interval': [first, last],
                                        'block': block,
                                        'timestamp': timestamp,
                                        'descriptions':
                                            [
                                                [date, descr],
                                                ...
                                            ]
                                    },
                                    ...
                                ]
                        }
        """
        if days_limit is None:
            days_limit = 750
        to_return = {'ip': ip, 'days_limit': days_limit, 'history': []}
        if self.has_ptr:
            to_return['ptrrecord'] = self.get_ptr_record(ip)
        if not self.has_ipasn:
            publisher.debug('IPASN not enabled.')
            to_return['error'] = 'IPASN not enabled.'
            return to_return
        if not ip:
            to_return['error'] = 'No IP provided.'
            return to_return
        for first, last, asn, block in self.ipasn.aggregate_history(ip, days_limit):
            first_date = parser.parse(first).replace(tzinfo=tz.tzutc()).date()
            last_date = parser.parse(last).replace(tzinfo=tz.tzutc()).date()
            if self.has_asnhistory:
                desc_history = self.asnhistory.get_all_descriptions(asn)
                valid_descriptions = []
                for date, descr in desc_history:
                    date = date.astimezone(tz.tzutc()).date()
                    test_date = date - datetime.timedelta(days=1)
                    if last_date < test_date:
                        # Too new
                        continue
                    elif last_date >= test_date and first_date <= test_date:
                        # Changes within the interval
                        valid_descriptions.append([date.isoformat(), descr])
                    elif first_date > test_date:
                        # get the most recent change befrore the interval
                        valid_descriptions.append([date.isoformat(), descr])
                        break
            else:
                publisher.debug('ASN History not enabled.')
                valid_descriptions = [datetime.date.today().isoformat(), 'ASN History not enabled.']
            if len(valid_descriptions) == 0:
                if len(desc_history) != 0:
                    # fallback, use the oldest description.
                    date = desc_history[-1][0].astimezone(tz.tzutc()).date()
                    descr = desc_history[-1][1]
                    valid_descriptions.append([date.isoformat(), descr])
                else:
                    # No history found for this ASN
                    if last_date > datetime.date(2013, 1, 1):
                        # ASN has been seen recently, should not happen
                        # as the asn history module is running since early 2013
                        publisher.error('Unable to find the ASN description of {}. IP address: {}. ASN History might be down.'.format(asn, ip))
                    valid_descriptions.append(['0000-00-00', 'No ASN description has been found.'])
            entry = {}
            entry['asn'] = asn
            entry['interval'] = [first_date.isoformat(), last_date.isoformat()]
            entry['block'] = block
            entry['timestamp'] = self.get_first_seen(asn, block)
            entry['descriptions'] = valid_descriptions
            to_return['history'].append(entry)
        return to_return
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read(configfile)

    # REDIS #
    r_serv = redis.StrictRedis(
        host = cfg.get("Redis_Data_Merging", "host"),
        port = cfg.getint("Redis_Data_Merging", "port"),
        db = cfg.getint("Redis_Data_Merging", "db"))

    r_serv1 = redis.StrictRedis(
        host = cfg.get("Redis_Queues", "host"),
        port = cfg.getint("Redis_Queues", "port"),
        db = cfg.getint("Redis_Queues", "db"))

    p_serv = r_serv.pipeline(False)

    # LOGGING #
    publisher.channel = "Script"

    # ZMQ #
    #Subscriber
    channel = cfg.get("PubSub_Global", "channel")
    subscriber_name = "attributes"
    subscriber_config_section = "PubSub_Global"

    Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)

    # FUNCTIONS #
    publisher.info("""ZMQ Attribute is Running""")

    while True:
	try:
            message = Sub.get_msg_from_queue(r_serv1)

            if message != None:
                PST = P.Paste(message.split(" ",-1)[-1])
            else:
                if r_serv1.sismember("SHUTDOWN_FLAGS", "Attributes"):
                    r_serv1.srem("SHUTDOWN_FLAGS", "Attributes")
                    print "Shutdown Flag Up: Terminating"
                    publisher.warning("Shutdown Flag Up: Terminating.")
                    break
                publisher.debug("Script Attribute is idling 10s")
                time.sleep(10)
                continue

            encoding = PST._get_p_encoding()
            language = PST._get_p_language()

            PST.save_attribute_redis(r_serv, "p_encoding", encoding)
            PST.save_attribute_redis(r_serv, "p_language", language)

            r_serv.sadd("Pastes_Objects",PST.p_path)

            PST.save_all_attributes_redis(r_serv)
        except IOError:
           print "CRC Checksum Failed on :", PST.p_path
           publisher.error('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "CRC Checksum Failed" ))
           pass 
def main():
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'DomClassifier'

    p = Process(config_section)
    addr_dns = p.config.get("DomClassifier", "dns")

    publisher.info("""ZMQ DomainClassifier is Running""")

    c = DomainClassifier.domainclassifier.Extract(rawtext="",
                                                  nameservers=[addr_dns])

    cc = p.config.get("DomClassifier", "cc")
    cc_tld = p.config.get("DomClassifier", "cc_tld")

    while True:
        try:
            item_id = p.get_from_set()

            if item_id is None:
                publisher.debug("Script DomClassifier is idling 1s")
                time.sleep(1)
                continue

            item_content = item_basic.get_item_content(item_id)
            mimetype = item_basic.get_item_mimetype(item_id)
            item_basename = item_basic.get_basename(item_id)
            item_source = item_basic.get_source(item_id)
            item_date = item_basic.get_item_date(item_id)

            if mimetype.split('/')[0] == "text":
                c.text(rawtext=item_content)
                c.potentialdomain()
                c.validdomain(passive_dns=True, extended=False)
                print(c.vdomain)

                if c.vdomain and d4.is_passive_dns_enabled():
                    for dns_record in c.vdomain:
                        p.populate_set_out(dns_record)

                localizeddomains = c.include(expression=cc_tld)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {cc_tld};{item_id}"
                    )
                localizeddomains = c.localizedomain(cc=cc)

                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {cc};{item_id}"
                    )

        except IOError:
            print("CRC Checksum Failed on :", item_id)
            publisher.error(
                f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed"
            )
    from helpers.initscript import *
    services_dir = os.path.join(root_dir,config.get('directories','services'))

    service = os.path.join(services_dir, "fetch_ris_entries")

    if args.action == "start":
        publisher.info( "Starting fetching...")
        for option in servers_available:
            print(option + " to start...")
            publisher.info( option + " to start...")
            service_start_multiple(servicename = service,
                    param = ['-s', option],
                    number = int(config.get('processes','whois_fetch')))

    elif args.action == "stop":
        print("Stopping fetching...")
        publisher.info("Stopping fetching...")
        pids = pidof(processname=service)
        if pids:
            print(service + " to be stopped...")
            publisher.info(service + " to be stopped...")
            for pid in pids:
                try:
                    os.kill(int(pid), signal.SIGKILL)
                except OSError, e:
                    print(service + " unsuccessfully stopped")
                    publisher.error(service + " unsuccessfully stopped")
            rmpid(processname=service)
    else:
        usage()
Example #37
0
        else:
            ix = open_dir(indexpath)

    # LOGGING #
    publisher.info("ZMQ Indexer is Running")

    while True:
        try:
            message = p.get_from_set()

            if message is not None:
                PST = Paste.Paste(message)
            else:
                publisher.debug("Script Indexer is idling 1s")
                time.sleep(1)
                continue
            docpath = message.split(" ", -1)[-1]
            paste = PST.get_p_content()
            print "Indexing :", docpath
            if indexertype == "whoosh":
                indexwriter = ix.writer()
                indexwriter.update_document(
                    title=unicode(docpath, errors='ignore'),
                    path=unicode(docpath, errors='ignore'),
                    content=unicode(paste, errors='ignore'))
                indexwriter.commit()
        except IOError:
            print "CRC Checksum Failed on :", PST.p_path
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
Example #38
0
                # Creating the object attribute and save it.
                to_print = 'Duplicate;{};{};{};'.format(
                    PST.p_source, PST.p_date, PST.p_name)
                if dupl != []:
                    PST.__setattr__("p_duplicate", dupl)
                    PST.save_attribute_redis("p_duplicate", dupl)
                    publisher.info('{}Detected {}'.format(to_print, len(dupl)))
                    print '{}Detected {}'.format(to_print, len(dupl))

                y = time.time()

                publisher.debug('{}Processed in {} sec'.format(
                    to_print, y - x))

            # Adding the hash in the dico of the month
            today_dico[index] = paste_hash

            if flag_write_to_disk:
                time_1 = time.time()
                flag_write_to_disk = False
                flag_reload_from_disk = True
                print 'writing'
                with open(filedicopath, 'w') as fp:
                    json.dump(today_dico, fp)
        except IOError:
            to_print = 'Duplicate;{};{};{};'.format(PST.p_source, PST.p_date,
                                                    PST.p_name)
            print "CRC Checksum Failed on :", PST.p_path
            publisher.error('{}CRC Checksum Failed'.format(to_print))
Example #39
0
def crawl_onion(url, domain, date, date_month, message):

    r_cache.hset('metadata_crawler:{}'.format(splash_port), 'crawling_domain',
                 domain)
    r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time',
                 datetime.datetime.now().strftime("%Y/%m/%d  -  %H:%M.%S"))

    #if not r_onion.sismember('full_onion_up', domain) and not r_onion.sismember('onion_down:'+date , domain):
    super_father = r_serv_metadata.hget('paste_metadata:' + paste,
                                        'super_father')
    if super_father is None:
        super_father = paste

    retry = True
    nb_retry = 0
    while retry:
        try:
            r = requests.get(splash_url, timeout=30.0)
            retry = False
        except Exception:
            # TODO: relaunch docker or send error message
            nb_retry += 1

            if nb_retry == 6:
                on_error_send_message_back_in_queue(type_hidden_service,
                                                    domain, message)
                publisher.error('{} SPASH DOWN'.format(splash_url))
                print('--------------------------------------')
                print('         \033[91m DOCKER SPLASH DOWN\033[0m')
                print('          {} DOWN'.format(splash_url))
                r_cache.hset('metadata_crawler:{}'.format(splash_port),
                             'status', 'SPLASH DOWN')
                nb_retry == 0

            print('         \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m')
            print('          Retry({}) in 10 seconds'.format(nb_retry))
            time.sleep(10)

    if r.status_code == 200:
        r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status',
                     'Crawling')
        process = subprocess.Popen([
            "python", './torcrawler/tor_crawler.py', splash_url,
            type_hidden_service, url, domain, paste, super_father
        ],
                                   stdout=subprocess.PIPE)
        while process.poll() is None:
            time.sleep(1)

        if process.returncode == 0:
            output = process.stdout.read().decode()
            print(output)
            # error: splash:Connection to proxy refused
            if 'Connection to proxy refused' in output:
                on_error_send_message_back_in_queue(type_hidden_service,
                                                    domain, message)
                publisher.error(
                    '{} SPASH, PROXY DOWN OR BAD CONFIGURATION'.format(
                        splash_url))
                print(
                    '------------------------------------------------------------------------'
                )
                print('         \033[91m SPLASH: Connection to proxy refused')
                print('')
                print('            PROXY DOWN OR BAD CONFIGURATION\033[0m'.
                      format(splash_url))
                print(
                    '------------------------------------------------------------------------'
                )
                r_cache.hset('metadata_crawler:{}'.format(splash_port),
                             'status', 'Error')
                exit(-2)
        else:
            print(process.stdout.read())
            exit(-1)
    else:
        on_error_send_message_back_in_queue(type_hidden_service, domain,
                                            message)
        print('--------------------------------------')
        print('         \033[91m DOCKER SPLASH DOWN\033[0m')
        print('          {} DOWN'.format(splash_url))
        r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status',
                     'Crawling')
        exit(1)
    args = parser.parse_args()
    __prepare(args.directory)

    publisher.port = redis_port
    publisher.channel = 'ASN_History'
    time.sleep(5)
    publisher.info('Importer started.')
    while True:
        for timestamp, data in parse(args.directory):
            r = redis.Redis(host=redis_host, port=redis_port, db=redis_db)

            last_update = r.get('last_update')
            if last_update > timestamp:
                msg = 'Trying to import an old file (old). Latest: {new}'.\
                        format(old=timestamp, new=last_update)
                publisher.error(msg)
                continue
            else:
                msg = '===== Importing new file: {new} ====='.format(
                    new=timestamp)
                publisher.info(msg)
                p = r.pipeline(transaction=False)
                p.set('last_update', timestamp)
                p.sadd('all_timestamps', timestamp)
                new_asns = 0
                updated_descrs = 0
                for asn, descr in data:
                    all_descrs = r.hgetall(asn)
                    if len(all_descrs) == 0:
                        p.hset(asn, timestamp, descr)
                        publisher.debug('New asn: {asn}'.format(asn=asn))
def handle_error():
    cherrypy.response.status = 500
    cherrypy.response.body = ["<html><body>Sorry, an error occured</body></html>"]
    publisher.error('Request: '+ str(cherrypy.request.params) + '\n' +_cperror.format_exc())
Example #42
0
    parser.add_argument("-H", "--hostname", default='localhost',
                        type=str, help='Set the hostname of the server.')
    parser.add_argument("-p", "--port", default=6379,
                        type=int, help='Set the server port.')
    parser.add_argument("-c", "--channel",
                        type=str, required=True, help='Channel to publish into.')

    args = parser.parse_args()

    if args.use_unix_socket:
        publisher.use_tcp_socket = False
        publisher.unix_socket = args.unix_socket_path
    else:
        publisher.hostname = args.hostname
        publisher.port = args.port

    publisher.channel = args.channel

    for i in range(0, 21):
        if i % 2 == 0:
            publisher.info('test' + str(i))
        elif i % 3 == 0:
            publisher.warning('test' + str(i))
        elif i % 5 == 0:
            publisher.error('test' + str(i))
        elif i % 7 == 0:
            publisher.critical('test' + str(i))
        else:
            publisher.debug('test' + str(i))
        time.sleep(1)
                        help='Set the server port.')
    parser.add_argument("-c",
                        "--channel",
                        type=str,
                        required=True,
                        help='Channel to publish into.')

    args = parser.parse_args()

    if args.use_unix_socket:
        publisher.use_tcp_socket = False
        publisher.unix_socket = args.unix_socket_path
    else:
        publisher.hostname = args.hostname
        publisher.port = args.port

    publisher.channel = args.channel

    for i in range(0, 21):
        if i % 2 == 0:
            publisher.info('test' + str(i))
        elif i % 3 == 0:
            publisher.warning('test' + str(i))
        elif i % 5 == 0:
            publisher.error('test' + str(i))
        elif i % 7 == 0:
            publisher.critical('test' + str(i))
        else:
            publisher.debug('test' + str(i))
        time.sleep(1)
Example #44
0
def crawl_onion(url, domain, port, type_service, message, crawler_config):
    crawler_config['url'] = url
    crawler_config['port'] = port
    print('Launching Crawler: {}'.format(url))

    r_cache.hset('metadata_crawler:{}'.format(splash_port), 'crawling_domain', domain)
    r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d  -  %H:%M.%S"))

    retry = True
    nb_retry = 0
    while retry:
        try:
            r = requests.get(splash_url , timeout=30.0)
            retry = False
        except Exception:
            # TODO: relaunch docker or send error message
            nb_retry += 1

            if nb_retry == 6:
                on_error_send_message_back_in_queue(type_service, domain, message)
                publisher.error('{} SPASH DOWN'.format(splash_url))
                print('--------------------------------------')
                print('         \033[91m DOCKER SPLASH DOWN\033[0m')
                print('          {} DOWN'.format(splash_url))
                r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'SPLASH DOWN')
                nb_retry == 0

            print('         \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m')
            print('          Retry({}) in 10 seconds'.format(nb_retry))
            time.sleep(10)

    if r.status_code == 200:
        r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling')
        # save config in cash
        UUID = str(uuid.uuid4())
        r_cache.set('crawler_request:{}'.format(UUID), json.dumps(crawler_config))

        process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', UUID],
                                   stdout=subprocess.PIPE)
        while process.poll() is None:
            time.sleep(1)

        if process.returncode == 0:
            output = process.stdout.read().decode()
            print(output)
            # error: splash:Connection to proxy refused
            if 'Connection to proxy refused' in output:
                on_error_send_message_back_in_queue(type_service, domain, message)
                publisher.error('{} SPASH, PROXY DOWN OR BAD CONFIGURATION'.format(splash_url))
                print('------------------------------------------------------------------------')
                print('         \033[91m SPLASH: Connection to proxy refused')
                print('')
                print('            PROXY DOWN OR BAD CONFIGURATION\033[0m'.format(splash_url))
                print('------------------------------------------------------------------------')
                r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Error')
                exit(-2)
        else:
            print(process.stdout.read())
            exit(-1)
    else:
        on_error_send_message_back_in_queue(type_service, domain, message)
        print('--------------------------------------')
        print('         \033[91m DOCKER SPLASH DOWN\033[0m')
        print('          {} DOWN'.format(splash_url))
        r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling')
        exit(1)
    from helpers.initscript import *
    from helpers.files_splitter import *
    services_dir = os.path.join(root_dir,config.get('directories','services'))
    raw_data = os.path.join(root_dir,config.get('directories','raw_data'))

    service = os.path.join(services_dir, "push_update_routing")

    if args.action == "start":
        print("Start pushing routes...")
        publisher.info( "Start pushing routes...")
        print(service + " to start...")
        publisher.info(service + " to start...")
        proc = service_start_once(servicename = service,
                processname = service)

    elif args.action == "stop":
        print("Stop pushing routes...")
        publisher.info("Stop pushing routes...")
        pids = pidof(processname=service)
        if pids:
            print(service + " to be stopped...")
            publisher.info(service + " to be stopped...")
            for pid in pids:
                try:
                    os.kill(int(pid), signal.SIGKILL)
                except OSError, e:
                    print(service + " unsuccessfully stopped")
                    publisher.error(service + " unsuccessfully stopped")
            rmpid(processname=service)

Example #46
0
def insert():
    """
        Re-insert in the database the data provided by the module and
        extracted by :meth:`get_all_information` in a sorted form.
    """
    while True:
        i = 0
        try:
            while temp_db.scard(uid_list) > 0:
                infos = get_all_information()
                if infos is None:
                    continue
                uid, ip, src, timestamp = infos
                if ip is None:
                    publisher.error('Entry without IP, invalid')
                    continue
                if src is None:
                    publisher.error(ip + ' without source, invalid')
                    continue
                if timestamp.date() < datetime.date.today() - \
                        datetime.timedelta(1) and not accept_old_entries:
                    publisher.warning('The timestamp ({ts}) of {ip} from {source} is too old.'.\
                            format(ts = timestamp.isoformat(), ip = ip, source = src))
                    continue
                try:
                    # Check and normalize the IP
                    ip_bin = IPy.IP(ip)
                    if ip_bin.iptype() != 'PUBLIC':
                        publisher.warning(
                            str(ip_bin) + ' is not a PUBLIC IP Address')
                        continue
                    ip = ip_bin.strCompressed()
                except:
                    publisher.error('This IP: ' + ip + ' in invalid.')
                    continue

                iso_timestamp = timestamp.isoformat()
                date = timestamp.date().isoformat()
                index_day_src = '{date}{sep}{key}'.format(sep=separator,
                                                          date=date,
                                                          key=list_sources)
                index_day_ips = 'temp{sep}{date}{sep}{source}{sep}{key}'.format(
                    sep=separator, date=date, source=src, key=list_ips)
                ip_details = '{ip}{sep}{timestamp}'.format(
                    sep=separator, ip=ip, timestamp=iso_timestamp)

                global_db.sadd(index_day_src, src)
                pipeline_temp_db = temp_db.pipeline()
                pipeline_temp_db.sadd(index_day_ips, ip_details)
                pipeline_temp_db.sadd(temp_ris, ip)
                pipeline_temp_db.sadd(temp_no_asn, index_day_ips)
                pipeline_temp_db.delete(uid)
                pipeline_temp_db.execute()
                i += 1
                if i % 100 == 0 and config_db.exists(stop_db_input):
                    break
                if i % 10000 == 0:
                    publisher.info('{nb} new entries to insert'\
                            .format(nb = temp_db.scard(uid_list)))
        except:
            publisher.critical('Unable to insert, redis does not respond')
            break
        time.sleep(sleep_timer)
        if config_db.exists(stop_db_input):
            publisher.info('DatabaseInput stopped.')
            break
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read(configfile)

    # Redis
    r_serv1 = redis.StrictRedis(
        host=cfg.get("Redis_Queues", "host"),
        port=cfg.getint("Redis_Queues", "port"),
        db=cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.channel = "Script"

    # ZMQ #
    # Subscriber
    channel = cfg.get("PubSub_Global", "channel")
    subscriber_name = "DomainClassifier"
    subscriber_config_section = "PubSub_Global"

    cc = cfg.get("PubSub_DomainClassifier", "cc")
    cc_tld = cfg.get("PubSub_DomainClassifier", "cc_tld")

    sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)

    # FUNCTIONS #
    publisher.info("""ZMQ DomainClassifier is Running""")
    c = DomainClassifier.domainclassifier.Extract(rawtext="")

    while True:
        try:
            message = sub.get_msg_from_queue(r_serv1)

            if message is not None:
                PST = Paste.Paste(message.split(" ", -1)[-1])
            else:
                if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"):
                    r_serv1.srem("SHUTDOWN_FLAGS", "Indexer")
                    publisher.warning("Shutdown Flag Up: Terminating.")
                    break
                publisher.debug("Script DomainClassifier is idling 10s")
                time.sleep(1)
                continue
            docpath = message.split(" ", -1)[-1]
            paste = PST.get_p_content()
            mimetype = PST._get_p_encoding()
            if mimetype == "text/plain":
                c.text(rawtext=paste)
                c.potentialdomain()
                c.validdomain(rtype=['A'],extended=True)
                localizeddomains = c.include(expression=cc_tld)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning('DomainC;{};{};{};Checked {} located in {}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld))
                localizeddomains =  c.localizedomain(cc=cc)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning('DomainC;{};{};{};Checked {} located in {}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc))
        except IOError:
            print "CRC Checksum Failed on :", PST.p_path
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(PST.p_source, PST.p_date, PST.p_name))
            pass
Example #48
0
    # LOGGING #
    publisher.info("ZMQ Indexer is Running")

    while True:
        try:
            message = p.get_from_set()

            if message is not None:
                PST = Paste.Paste(message)
            else:
                publisher.debug("Script Indexer is idling 1s")
                time.sleep(1)
                continue
            docpath = message.split(" ", -1)[-1]
            paste = PST.get_p_content()
            print "Indexing :", docpath
            if indexertype == "whoosh":
                indexwriter = ix.writer()
                indexwriter.update_document(title=unicode(docpath,
                                                          errors='ignore'),
                                            path=unicode(docpath,
                                                         errors='ignore'),
                                            content=unicode(paste,
                                                            errors='ignore'))
                indexwriter.commit()
        except IOError:
            print "CRC Checksum Failed on :", PST.p_path
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
Example #49
0
    config.read(config_file)
    root_dir = config.get('directories','root')
    sys.path.append(os.path.join(root_dir,config.get('directories','libraries')))
    from helpers.initscript import *
    services_dir = os.path.join(root_dir,config.get('directories','services'))

    service = os.path.join(services_dir, "fetch_bview")

    if args.action == "start":
        print('Start fetching of bview')
        publisher.info('Start fetching of bview')
        print(service + " to start...")
        publisher.info(service + " to start...")
        proc = service_start_once(servicename = service,
                processname = service)

    elif args.action == "stop":
        print('Stop fetching of bview')
        publisher.info('Stop fetching of bview')
        pid = pidof(processname=service)
        if pid:
            pid = pid[0]
            print(service + " to be stopped...")
            publisher.info(service + " to be stopped...")
            try:
                os.kill(int(pid), signal.SIGKILL)
            except OSError, e:
                print(service + 'unsuccessfully stopped')
                publisher.error(service + 'unsuccessfully stopped')
            rmpid(processname=service)
Example #50
0
    def get_ip_info(self, ip, days_limit=None):
        """
            Return informations related to an IP address.

            :param ip: The IP address
            :param days_limit: The number of days we want to check in the past
                (default: around 2 years)
            :rtype: Dictionary

                .. note:: Format of the output:

                    .. code-block:: python

                        {
                            'ip': ip,
                            'days_limit' : days_limit,
                            'ptrrecord' : 'ptr.record.com',
                            'history':
                                [
                                    {
                                        'asn': asn,
                                        'interval': [first, last],
                                        'block': block,
                                        'timestamp': timestamp,
                                        'descriptions':
                                            [
                                                [date, descr],
                                                ...
                                            ]
                                    },
                                    ...
                                ]
                        }
        """
        if days_limit is None:
            days_limit = 750
        to_return = {'ip': ip, 'days_limit': days_limit, 'history': []}
        if self.has_ptr:
            to_return['ptrrecord'] = self.get_ptr_record(ip)
        if not self.has_ipasn:
            publisher.debug('IPASN not enabled.')
            to_return['error'] = 'IPASN not enabled.'
            return to_return
        if not ip:
            to_return['error'] = 'No IP provided.'
            return to_return
        for first, last, asn, block in self.ipasn.aggregate_history(
                ip, days_limit):
            first_date = parser.parse(first).replace(tzinfo=tz.tzutc()).date()
            last_date = parser.parse(last).replace(tzinfo=tz.tzutc()).date()
            if self.has_asnhistory:
                desc_history = self.asnhistory.get_all_descriptions(asn)
                valid_descriptions = []
                for date, descr in desc_history:
                    date = date.astimezone(tz.tzutc()).date()
                    test_date = date - datetime.timedelta(days=1)
                    if last_date < test_date:
                        # Too new
                        continue
                    elif last_date >= test_date and first_date <= test_date:
                        # Changes within the interval
                        valid_descriptions.append([date.isoformat(), descr])
                    elif first_date > test_date:
                        # get the most recent change befrore the interval
                        valid_descriptions.append([date.isoformat(), descr])
                        break
            else:
                publisher.debug('ASN History not enabled.')
                valid_descriptions = [
                    datetime.date.today().isoformat(),
                    'ASN History not enabled.'
                ]
            if len(valid_descriptions) == 0:
                if len(desc_history) != 0:
                    # fallback, use the oldest description.
                    date = desc_history[-1][0].astimezone(tz.tzutc()).date()
                    descr = desc_history[-1][1]
                    valid_descriptions.append([date.isoformat(), descr])
                else:
                    # No history found for this ASN
                    if last_date > datetime.date(2013, 1, 1):
                        # ASN has been seen recently, should not happen
                        # as the asn history module is running since early 2013
                        publisher.error(
                            'Unable to find the ASN description of {}. IP address: {}. ASN History might be down.'
                            .format(asn, ip))
                    valid_descriptions.append(
                        ['0000-00-00', 'No ASN description has been found.'])
            entry = {}
            entry['asn'] = asn
            entry['interval'] = [first_date.isoformat(), last_date.isoformat()]
            entry['block'] = block
            entry['timestamp'] = self.get_first_seen(asn, block)
            entry['descriptions'] = valid_descriptions
            to_return['history'].append(entry)
        return to_return
Example #51
0
    config.read(config_file)
    root_dir = config.get('directories', 'root')
    sys.path.append(os.path.join(root_dir, config.get('directories', 'libraries')))
    from helpers.initscript import *
    services_dir = os.path.join(root_dir, config.get('directories', 'services'))

    service = os.path.join(services_dir, "ip_zmq")

    if args.action == "start":
        print("Starting ZeroMQ IP publisher...")
        publisher.info('Starting ZeroMQ IP publisher...')
        print(service+" to start...")
        publisher.info(service + 'to start...')
        proc = service_start_once(servicename=service, processname=service)

    elif args.action == "stop":
        print("Stopping ZeroMQ IP publisher...")
        publisher.info('Stopping ZeroMQ IP publisher...')
        pid = pidof(processname=service)
        if pid:
            pid = pid[0]
            try:
                os.kill(int(pid), signal.SIGHUP)
            except OSError, e:
                print(service + " unsuccessfully stopped")
                publisher.error(service + 'unsuccessfully stopped')
            rmpid(processname=service)
        else:
            print('No running ZeroMQ IP publisher process')
            publisher.info('No running ZeroMQ IP publisher process')
    args = parser.parse_args()
    __prepare(args.directory)

    publisher.port = redis_port
    publisher.channel = 'ASN_History'
    time.sleep(5)
    publisher.info('Importer started.')
    while True:
        for timestamp, data in parse(args.directory):
            r = redis.Redis(host=redis_host, port=redis_port, db=redis_db)

            last_update = r.get('last_update')
            if last_update > timestamp:
                msg = 'Trying to import an old file (old). Latest: {new}'.format(
                    old=timestamp, new=last_update)
                publisher.error(msg)
                continue
            else:
                msg = '===== Importing new file: {new} ====='.format(new=timestamp)
                publisher.info(msg)
                p = r.pipeline(transaction=False)
                p.set('last_update', timestamp)
                p.sadd('all_timestamps', timestamp)
                new_asns = 0
                updated_descrs = 0
                for asn, descr in data:
                    all_descrs = r.hgetall(asn)
                    if len(all_descrs) == 0:
                        p.hset(asn, timestamp, descr)
                        publisher.debug('New asn: {asn}'.format(asn=asn))
                        new_asns += 1
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
    :file:`bin/services/microblog.py` - Microblogging client
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    Start the microblogging client which posts on twitter and identica
"""

import time
from pubsublogger import publisher
import microblog

dev_mode = True

if __name__ == '__main__':

    sleep_timer = 3600

    publisher.channel = 'API_Twitter'

    while 1:
        try:
            if microblog.post_new_top_ranking():
                publisher.info('New Ranking posted on twitter and identica.')
                print 'New Ranking posted on twitter and identica.'
        except Exception as e:
            publisher.error("Something bad occurs: " + e)
            print "Something bad occurs: " + str(e)
        time.sleep(sleep_timer)