Example #1
0
def slexy(vars_dict):
    """
    Scraping function for slexy. This one is almost identical to ix.io, with the exception of having some
    tables to dig through. It also has a heavier rate limit, so a minimum limiter is enforced

    :param vars_dict: dict of necessary variables returned from config()
    :return: nothing
    """
    lib.print_status("Starting slexy run...")
    # Connect to archive and get parameters for individual documents
    soup = BeautifulSoup(
        lib.connect("https://slexy.org/recent", verify_ssl=False).text,
        'html.parser')
    table = soup.find("table", attrs={'id': "recent_pastes"})
    parameters = set([a['href'] for a in table.findAll('a', href=True)])
    # Loop through parameters
    for param in parameters:
        # Connect and fetch the raw text
        document_soup = BeautifulSoup(
            lib.connect(f'https://slexy.org{param}', verify_ssl=False).text,
            'html.parser')
        document_table = document_soup.findAll("table")
        raw_parameter = [
            a['href'] for a in document_table[1].findAll('a', href=True)
            if 'raw' in a['href']
        ][0]
        unprocessed = BeautifulSoup(
            lib.connect(f'https://slexy.org{raw_parameter}',
                        verify_ssl=False).text, 'html.parser')
        # Pass to archive engine
        # We remove the /view/ from the param for file naming purposes
        identifier = f'slexy-{param.split("/view/")[1]}'
        lib.archive_engine(str(unprocessed), identifier, vars_dict)
        sleep(5) if vars_dict['limiter'] < 5 else sleep(vars_dict['limiter'])
    lib.print_success("All slexy pastes processed.")
Example #2
0
def pastebin(vars_dict):
    """
    This function fetches the pastebin archive and all the pastes in it. It passes them to archive_engine(),
    then sleeps per the time specified by vars_dict['cooldown']

    :param vars_dict: dict of necessary variables returned from config()
    :return: Nothing
    """
    # Fetch the pastebin public archive
    lib.print_status(f"Starting pastebin run...")
    arch_page = lib.connect("https://pastebin.com/archive")
    arch_soup = BeautifulSoup(arch_page.text, 'html.parser')
    sleep(2)
    # Parse the archive HTML to get the individual document URLs
    table = arch_soup.find("table", attrs={'class': "maintable"})
    parameters = [
        a['href'] for a in table.findAll('a', href=True)
        if 'archive' not in a['href']
    ]
    # For each paste listed, connect and pass the text to archive_engine()
    for param in parameters:
        param = param[1:]  # removes the leading forward slash
        document_page = lib.connect(f"https://pastebin.com/{param}")
        document_soup = BeautifulSoup(document_page.text, 'html.parser')
        # Fetch the raw text and pass to archive_engine()
        unprocessed = document_soup.find('textarea').contents[0]
        identifier = f'pastebin-{param}'
        lib.archive_engine(unprocessed, identifier, vars_dict)
        sleep(vars_dict['limiter'])
    lib.print_success("All pastebin pastes processed.")
Example #3
0
def archive_engine(prescan_text, proch, vars_dict):
    if vars_dict['yara_scanning'] is True:
        matches = vars_dict['search_rules'].match(data=prescan_text)
        if matches:
            if matches[0].rule == 'blacklist':
                lib.print_status(f"Blacklisted term detected: [{((matches[0]).strings[0])[2].decode('UTF-8')}]")
            else:
                if matches[0].rule == 'b64Artifacts':
                    lib.print_success(f"Base64 Artifact Found: [{((matches[0]).strings[0])[2].decode('UTF-8')}]")
                    decoded_content = b64decode(prescan_text)
                    if ((matches[0]).strings[0])[2].decode('UTF-8') == "H4sI":
                        decompressed_string = gzip.decompress(bytes(decoded_content, 'utf-8'))
                        with codecs.open(f"{vars_dict['workpath']}{proch}.file", 'w+', 'utf-8') as savefile:
                            savefile.write(decompressed_string)
                    else:
                        with codecs.open(f"{vars_dict['workpath']}{((matches[0]).strings[0])[1].decode('UTF-8').decode('UTF-8')}_{proch}.txt", 'w+', 'utf-8') as savefile:
                            savefile.write(decoded_content)
                elif matches[0].rule == 'powershellArtifacts':
                    lib.print_success(f"Powershell Artifact Found: [{((matches[0]).strings[0])[2].decode('UTF-8')}]")
                    with codecs.open(f"{vars_dict['workpath']}{((matches[0]).strings[0])[2].decode('UTF-8')}_{proch}.ps1", 'w+', 'utf-8') as savefile:
                        savefile.write(prescan_text)
                elif matches[0].rule == 'keywords':
                    lib.print_success(f"Keyword found: [{((matches[0]).strings[0])[2].decode('UTF-8')}]")
                    with codecs.open(f"{vars_dict['workpath']}{((matches[0]).strings[0])[2].decode('UTF-8')}_{proch}.txt", 'w+', 'utf-8') as savefile:
                        savefile.write(prescan_text)
                else:
                    with codecs.open(f"{vars_dict['workpath']}{((matches[0]).strings[0])[2].decode('UTF-8')}_{proch}.txt", 'w+', 'utf-8') as savefile:
                        savefile.write(prescan_text)
        else:
            with codecs.open(f"{vars_dict['workpath']}{proch}.txt", 'w+', 'utf-8') as savefile:
                savefile.write(prescan_text)
    else:
        with codecs.open(f"{vars_dict['workpath']}{proch}.txt", 'w+', "utf-8") as savefile:
            savefile.write(prescan_text)
Example #4
0
def main(args):
    lib.print_title("""
    _________________________________________
    [                                       ]
    [                                       ]
    [           Welcome to BinBot           ]
    [            Made by Mili-NT            ]
    [                                       ]
    [_______________________________________]
    Note: To load a config file, pass it as an argument
    """)
    # If filepath is passed, it passes that to config().
    # If not, it passes an invalid path "" which results in manual setup
    vars_dict = config(args[1]) if len(args) > 1 else config("")
    try:
        # This creates a thread for every service enabled
        runs = 0
        while True:
            with ThreadPoolExecutor(
                    max_workers=len(vars_dict['services'])) as executor:
                for service in vars_dict['services']:
                    executor.submit(collectors.services[service], vars_dict)
            runs += 1
            # This line is a little weird, but due to True == 1 being True, isinstance(vars_dict['stop_input'], int)
            # wouldnt work.
            if str(vars_dict['stop_input']) != 'True':
                if runs >= vars_dict['stop_input']:
                    lib.print_success(f"Runs Complete, Operation Finished...")
                    exit()
            lib.print_status(
                f"All services scraped, cooling down for {vars_dict['cooldown']} seconds"
            )
            sleep(vars_dict['cooldown'] / 2)
            lib.print_status("Halfway through cooldown.")
            sleep(vars_dict['cooldown'] / 2)
            lib.print_status("Continuing...")

    except KeyboardInterrupt:
        lib.print_status(f"Operation cancelled...")
        exit()
Example #5
0
def Non_API_Search(vars_dict):
    arch_runs = 0
    while True:
        if arch_runs > 0:
            lib.print_status(f"Runs: {arch_runs}")
            if arch_runs >= vars_dict['stop_input'] and vars_dict['stop_input'] is False:
                lib.print_success(f"Runs Complete, Operation Finished...")
                exit()
            else:
                lib.print_status(f"Pastes fetched, cooling down for {vars_dict['cooldown']} seconds...")
                sleep(vars_dict['cooldown']/2)
                lib.print_status(f"Halfway through cooldown")
                sleep(vars_dict['cooldown']/2)
                lib.print_status(f"resuming...")
        if arch_runs < vars_dict['stop_input'] or vars_dict['stop_input'] is True:
            arch_page = archive_connect()
            arch_soup = BeautifulSoup(arch_page.text, 'html.parser')
            sleep(2)
            lib.print_status(f"Getting archived pastes...")
            if 'access denied' in arch_page.text:
                lib.print_error(f"IP Temporarily suspending, pausing until the ban is lifted. Estimated time: one hour...")
                sleep(vars_dict['cooldown'])
                lib.print_status(f"Process resumed...")
                continue
            else:
                pass
            lib.print_status(f"Finding params...")
            table = arch_soup.find("table", class_="maintable") # Fetch the table of recent pastes
            while True:
                try:
                    tablehrefs = table.findAll('a', href=True) # Find the <a> tags for every paste
                    break
                except AttributeError:
                    lib.print_error(f"IP Temporarily suspending, pausing until the ban is lifted. Estimated time: one hour...")
                    sleep(vars_dict['cooldown'])
                    lib.print_error(f"Process resumed...")
                    continue
            for h in tablehrefs:
                proch = (h['href']).replace("/", "") # fetch the URL param for each paste
                lib.print_success("params fetched...")
                lib.print_status(f"Acting on param {proch}...")
                full_archpage, full_arch_url = parameter_connect(proch)
                item_soup = BeautifulSoup(full_archpage.text, 'html.parser')
                unprocessed = item_soup.find('textarea') # Fetch the raw text in the paste.
                taglist = [
                    '<textarea class="paste_code" id="paste_code" name="paste_code" onkeydown="return catchTab(this,event)">',
                    '<textarea class="paste_code" id="paste_code" name="paste_code" onkeydown="return catchTab(this,event)">',
                    '<textarea class="paste_textarea" id="paste_code" name="paste_code" onkeydown="return catchTab(this,event)" rows="10">',
                    '</textarea>', '<textarea class="paste_code" id="paste_code" name="paste_code" onkeydown="return catchTab(this,event)">',
                ]
                for tag in taglist:
                    unprocessed = str(unprocessed).replace(tag, "") # process the raw text by removing html tags
                archive_engine(unprocessed, proch, vars_dict)
                arch_runs += 1
                sleep(vars_dict['limiter'])
                continue
        else:
            lib.print_success(f"Operation Finished...")
            break
Example #6
0
def ixio(vars_dict):
    """
    This is the scraping function for ix.io. It works very similar to the pastebin() function,
    and fetches a list of documents from an archive, processes them, and cools down

    :param vars_dict: dict of necessary variables returned from config()
    :return: nothing
    """
    lib.print_status("Starting ix.io run...")
    # Connect to archive and gather individual document parameters
    soup = BeautifulSoup(lib.connect("http://ix.io/user/").text, 'html.parser')
    # The parameter is sanitized (has its leading and trailing forward slashes removed) during this comprehension
    parameters = set(
        [a['href'].replace('/', '') for a in soup.findAll('a', href=True)])
    # Loop through parameters and get raw text
    for param in parameters:
        document_soup = BeautifulSoup(
            lib.connect(f'http://ix.io/{param}').text, 'html.parser')
        # Pass raw text to archive engine
        identifier = f'ixio-{param}'
        lib.archive_engine(str(document_soup), identifier, vars_dict)
        sleep(vars_dict['limiter'])
    lib.print_success("All ix.io pastes processed.")
Example #7
0
def archive_engine(prescan_text, proch, vars_dict):
    if vars_dict['yara_scanning'] is True:
        matches = vars_dict['search_rules'].match(data=prescan_text)
        if matches:
            if matches[0].rule == 'blacklist':
                lib.print_status(
                    f"Blacklisted term detected: [{((matches[0]).strings[0])[2].decode('UTF-8')}] at [{datetime.now().strftime('%X')}]"
                )
            else:
                if matches[0].rule == 'b64Artifacts':
                    lib.print_success(
                        f"Base64 Artifact Found: [{((matches[0]).strings[0])[2].decode('UTF-8')}] at [{datetime.now().strftime('%X')}]"
                    )
                    with codecs.open(
                            f"{vars_dict['workpath']}{((matches[0]).strings[0])[1].decode('UTF-8').decode('UTF-8')}_{proch}.b64",
                            'w+', 'utf-8') as savefile:
                        savefile.write(prescan_text)
                elif matches[0].rule == 'powershellArtifacts':
                    lib.print_success(
                        f"Powershell Artifact Found: [{((matches[0]).strings[0])[2].decode('UTF-8')}] at [{datetime.now().strftime('%X')}]"
                    )
                    with codecs.open(
                            f"{vars_dict['workpath']}{((matches[0]).strings[0])[2].decode('UTF-8')}_{proch}.ps1",
                            'w+', 'utf-8') as savefile:
                        savefile.write(prescan_text)
                elif matches[0].rule == 'keywords':
                    lib.print_success(
                        f"Keyword found: [{((matches[0]).strings[0])[2].decode('UTF-8')}] at [{datetime.now().strftime('%X')}]"
                    )
                    with codecs.open(
                            f"{vars_dict['workpath']}{((matches[0]).strings[0])[2].decode('UTF-8')}_{proch}.txt",
                            'w+', 'utf-8') as savefile:
                        savefile.write(prescan_text)
                else:
                    with codecs.open(
                            f"{vars_dict['workpath']}{((matches[0]).strings[0])[2].decode('UTF-8')}_{proch}.txt",
                            'w+', 'utf-8') as savefile:
                        savefile.write(prescan_text)
        else:
            with codecs.open(f"{vars_dict['workpath']}{proch}.txt", 'w+',
                             "utf-8") as savefile:
                savefile.write(prescan_text)
    else:
        with codecs.open(f"{vars_dict['workpath']}{proch}.txt", 'w+',
                         "utf-8") as savefile:
            savefile.write(prescan_text)
Example #8
0
def manual_setup():
    # Save path
    while True:
        workpath = lib.print_input("Enter the path you wish to save text documents to (enter curdir for current directory)")
        if workpath.lower() == 'curdir':
            if name.lower() == 'nt':
                workpath = getcwd()
            else:
                workpath = syspath[0]
        if path.isdir(workpath):
            lib.print_success("Valid Path...")
            if workpath.endswith('\\') or workpath.endswith('/'):
                pass
            else:
                if name.lower == 'nt':
                    workpath = workpath + str('\\')
                else:
                    workpath = workpath + str('/')
            break
        else:
            lib.print_error("Invalid path, check input...")
            continue
    # Looping
    while True:
        try:
            stopinput_input = lib.print_input("Run in a constant loop? [y]/[n]")
            if stopinput_input.lower() == 'y':
                stop_input = True
            elif stopinput_input.lower() == 'n':
                stop_input = int(lib.print_input("Enter the amount of successful pulls you wish to make (enter 0 for infinite)"))
            # Limiter and Cooldown
            try: limiter = int(lib.print_input("Enter the request limit you wish to use (recommended: 5)"))
            except: limiter = 5
            try: cooldown = int(lib.print_input("Enter the cooldown between IP bans/Archive scrapes (recommended: 1200)"))
            except: cooldown = 1200
            break
        except ValueError:
            lib.print_error("Invalid Input.")
            continue
    while True:
        yara_choice = lib.print_input("Enable scanning documents using YARA rules? [y/n]")
        if yara_choice.lower() not in ['y', 'n', 'yes', 'no']:
            lib.print_error("Invalid Input.")
            continue
        elif yara_choice.lower() in ['y', 'yes']:
            yara_scanning = True
            break
        elif yara_choice.lower() in ['n', 'no']:
            yara_scanning = False
            break
    # Yara Compiling
    if yara_scanning is True:
        yara_dir = f"{getcwd()}/yara_rules"
        search_rules = yara.compile(
            filepaths={f.replace(".yar", ""): path.join(f'{yara_dir}/general_rules/', f) for f in listdir(f'{yara_dir}/general_rules/') if
                       path.isfile(path.join(yara_dir, f)) and f.endswith(".yar")})
        binary_rules = yara.compile(
            filepaths={f.replace(".yar", ""): path.join(f'{yara_dir}/binary_rules/', f) for f in listdir(f'{yara_dir}/binary_rules/') if
                       path.isfile(path.join(yara_dir, f)) and f.endswith(".yar")})
    else:
        search_rules = []
        binary_rules = []
    # Saving
    while True:
        savechoice = lib.print_input('Save configuration to file for repeated use? [y]/[n]')
        if savechoice.lower() == 'n':
            break
        elif savechoice.lower() == 'y':
            configname = lib.print_input("Enter the config name (no extension)")
            try:
                with open(configname + '.ini', 'w+') as cfile:
                    cfile.write(
f"""[initial_vars]
workpath = {workpath}
stop_input = {stop_input}
limiter = {limiter}
cooldown = {cooldown}
yara_scanning = {yara_scanning}""")
                    break
            except Exception as e:
                print(f"{e}")
                break
    vars_dict = {
        'workpath': workpath,
        'stop_input': stop_input,
        'limiter': limiter,
        'cooldown': cooldown,
        'yara_scanning': yara_scanning,
        'search_rules': search_rules,
        'binary_rules': binary_rules,
    }
    try:
        print("\n")
        for x in vars_dict.keys():
            if x != 'search_rules' and x != 'binary_rules':
                if name == 'nt':
                    print(f"{x}]: {str(vars_dict[x])}")
                    print("---------------------")
                else:
                    print(f"\x1b[94m[{x}]\x1b[0m: " + f"\x1b[1;32;40m{str(vars_dict[x])}\x1b[0m")
                    print("\x1b[94m---------------------\x1b[0m")
    finally:
        print("\n")
    return vars_dict
Example #9
0
def config(configpath):
    """
    :param configpath: path to config file, if it is blank or non-existent, it runs manual setup
    :return: vars_dict, a dictionary containing all the variables needed to run the main functions
    """
    # Manual Setup:
    if path.isfile(configpath) is False:
        # Saving options (workpath and saveall):
        while True:
            workpath = lib.print_input(
                "Enter the path you wish to save text documents to (enter curdir for current directory)"
            )
            workpath = syspath[0] if workpath.lower() == 'curdir' else workpath
            if path.isdir(workpath):
                lib.print_success("Valid Path...")
                workpath = workpath if any(
                    [workpath.endswith('\\'),
                     workpath.endswith('/')]) else f'{workpath}/'
            else:
                lib.print_error("Invalid path, check input...")
                continue
            savechoice = input(
                "Save all documents (Enter N to only save matched documents)? [y/n]: "
            )
            saveall = True if savechoice.lower() in ['y', 'yes'] else False
            break
        # Services to Enable (services):
        while True:
            for x in collectors.service_names.keys():
                lib.print_status(f"[{x}]: {collectors.service_names[x]}")
            service_choice = lib.print_input(
                "Enter the number(s) of the services you wish to scrape, "
                "separated by a comma").replace(" ", '').split(',')
            services = [
                collectors.service_names[int(x)] for x in service_choice
                if int(x) in collectors.service_names.keys()
            ]
            services = list(collectors.service_names.values()
                            ) if services == [] else services
            break
        # Looping, Limiter, and Cooldown Input (stop_input, limiter, cooldown):
        while True:
            loop_input = lib.print_input("Run in a constant loop? [y]/[n]")
            if loop_input.lower() == 'y':
                stop_input = True
            else:
                stop_input = int(
                    lib.print_input(
                        "Enter the amount of times you want to fetch the archives: "
                    ))
                # If they enter 0 or below pastes to fetch, run in an infinite loop:
                stop_input = True if stop_input <= 0 else stop_input
            # Limiter and Cooldown
            limiter = int(
                lib.print_input(
                    "Enter the request limit you wish to use (recommended: 5)")
            )
            cooldown = int(
                lib.print_input(
                    "Enter the cooldown between IP bans/Archive scrapes (recommended: 600)"
                ))
            # If no values are entered, select the recommended
            limiter = 5 if any(
                [limiter <= 0, isinstance(limiter, int) is False]) else limiter
            cooldown = 600 if any(
                [cooldown <= 0,
                 isinstance(cooldown, int) is False]) else cooldown
            break
        # YARA (yara_scanning)
        while True:
            yara_choice = lib.print_input(
                "Enable scanning documents using YARA rules? [y/n]")
            if yara_choice.lower() not in ['y', 'n', 'yes', 'no']:
                lib.print_error("Invalid Input.")
                continue
            elif yara_choice.lower() in ['y', 'yes']:
                yara_scanning = True
            elif yara_choice.lower() in ['n', 'no']:
                yara_scanning = False
            break
        # Building Settings Dict:
        vars_dict = {
            'workpath': workpath,
            'stop_input': stop_input,
            'limiter': limiter,
            'cooldown': cooldown,
            'yara_scanning': yara_scanning,
            'services': services,
            'saveall': saveall,
        }
        # Saving
        savechoice = lib.print_input(
            'Save configuration to file for repeated use? [y]/[n]')
        if savechoice.lower() == 'y':
            configname = lib.print_input(
                "Enter the config name (no extension)")
            configname = configname.split(
                ".")[0] if '.json' in configname else configname
            json.dump(vars_dict, open(f"{configname}.json", 'w'))
    # Loading Config:
    else:
        vars_dict = json.load(open(configpath))
    # YARA Compilation:
    # YARA rules aren't written to files because they cant be serialized
    if vars_dict['yara_scanning']:
        vars_dict['search_rules'] = yara.compile(
            filepaths={
                f.split('.')[0]: path.join(
                    f'{syspath[0]}/yara_rules/general_rules/', f)
                for f in listdir(f'{syspath[0]}/yara_rules/general_rules/')
                if path.isfile(
                    path.join(f'{syspath[0]}/yara_rules/general_rules/', f))
                and f.endswith(".yar") or f.endswith(".yara")
            })
        vars_dict['binary_rules'] = yara.compile(
            filepaths={
                f.split('.')[0]: path.join(
                    f'{syspath[0]}/yara_rules/binary_rules/', f)
                for f in listdir(f'{syspath[0]}/yara_rules/binary_rules/')
                if path.isfile(
                    path.join(f'{syspath[0]}/yara_rules/binary_rules/', f))
                and f.endswith(".yar") or f.endswith(".yara")
            })
    # Display and Return:
    try:
        print("\n")
        for x in vars_dict.keys():
            if x != 'search_rules' and x != 'binary_rules':
                print(f"\x1b[94m[{x}]\x1b[0m: " +
                      f"\x1b[1;32;40m{str(vars_dict[x])}\x1b[0m")
                print("\x1b[94m---------------------\x1b[0m")
    finally:
        print("\n")
        return vars_dict