Beispiel #1
0
    def __init__(self, inputfile, phishtank, openphish, cleanmx, output, alivecheck, emailnotifiers):
        logging.debug("Instantiating the '%s' class" % (self.__class__.__name__))

        self._inputfile = inputfile
        self._phishtank = phishtank
        self._openphish = openphish
        self._cleanmx = cleanmx
        self._output = output
        self._alivecheck = alivecheck
        self._emailnotifiers = emailnotifiers

        check_passed = CheckDependencies.run()
        if not check_passed:
            exit()
        else:
            self._cfg = ConfigReader.run()

            if not self._inputfile:
                self._downloader = Downloader(self._cfg, self._phishtank, self._openphish, self._cleanmx)
                self._extractor = Extractor(self._cfg, self._phishtank, self._openphish, self._cleanmx)
                
            if self._alivecheck:
                AliveChecker.config = self._cfg

            if 'json' in self._output:
                JSONAdapter.config = self._cfg
            if 'csv' in self._output:
                CSVAdapter.config = self._cfg
            if 'xml' in self._output:
                XMLAdapter.config = self._cfg

            if self._emailnotifiers:
                EmailNotifiers.config = self._cfg
if __name__ == '__main__':
    urls_file = 'emotioNet_1.txt'
    links = get_urls(os.path.join(BASE_URL_PATH, urls_file))

    loop = asyncio.get_event_loop()
    f_name, ext = os.path.splitext(urls_file)
    save_path = os.path.join(BASE_SAVE_PATH, f_name)

    if not os.path.exists(save_path):
        os.makedirs(save_path)
    log_file = os.path.join(BASE_LOGS_PATH, f_name + '.log')
    downloader = Downloader(links,
                            save_path,
                            log_file,
                            headers,
                            max_tasks=150,
                            max_tries=4,
                            max_sem=1000,
                            conn_time=30,
                            loop=loop)

    try:
        loop.run_until_complete(downloader.run())
    except Exception as e:
        print(e)
    finally:
        loop.stop()
        loop.run_forever()
        loop.close()
Beispiel #3
0
from download.download_configuration import DownloadConfiguration, DateRange
from download.downloader import Downloader
import concurrent.futures

import requests

BASE_URL = "https://bulkdata.uspto.gov/"

page = requests.get(BASE_URL)
html = page.content

soup = BeautifulSoup(html, "html.parser")

# redbook is the pattern to match!s
# https://bulkdata.uspto.gov/data/patent/grant/redbook/2021
# https://bulkdata.uspto.gov/data/patent/grant/redbook/2021/I20210105.tar

date_range = DateRange(start=2021, end=2021)
conf = DownloadConfiguration(Path("tarfiles"), date_range)
df = DataFilter(conf)
a = df.get_redbook_data(soup=soup)
b = df.filter_by_year(a)
c = df.get_all_download_urls(b)
print(c)
urls: List[str] = c

downloader = Downloader(config=conf, d_filter=df)
print(f"Number of URLS: {len(urls)}")
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
    executor.map(downloader.download_tar, urls)
Beispiel #4
0
class Core:

    def __init__(self, inputfile, phishtank, openphish, cleanmx, output, alivecheck, emailnotifiers):
        logging.debug("Instantiating the '%s' class" % (self.__class__.__name__))

        self._inputfile = inputfile
        self._phishtank = phishtank
        self._openphish = openphish
        self._cleanmx = cleanmx
        self._output = output
        self._alivecheck = alivecheck
        self._emailnotifiers = emailnotifiers

        check_passed = CheckDependencies.run()
        if not check_passed:
            exit()
        else:
            self._cfg = ConfigReader.run()

            if not self._inputfile:
                self._downloader = Downloader(self._cfg, self._phishtank, self._openphish, self._cleanmx)
                self._extractor = Extractor(self._cfg, self._phishtank, self._openphish, self._cleanmx)
                
            if self._alivecheck:
                AliveChecker.config = self._cfg

            if 'json' in self._output:
                JSONAdapter.config = self._cfg
            if 'csv' in self._output:
                CSVAdapter.config = self._cfg
            if 'xml' in self._output:
                XMLAdapter.config = self._cfg

            if self._emailnotifiers:
                EmailNotifiers.config = self._cfg

    def run(self):
        print("Running...")

        if not self._inputfile:
            self._downloader.run()
            ph_ws = self._extractor.run()
        else:
            logging.debug("Reading possible phishing website from '%s'" % (self._inputfile))

            fp = open(self._inputfile, 'rb')
            lines = fp.readlines()
            logging.debug(lines)
            fp.close()
         
            ph_ws = dict()
            cm_elems = []
            for line in lines:
                cm_elem = dict()
                cm_elem['domain'] = line.replace('\n','')
                cm_elem['url'] = line.replace('\n','')
                cm_elem['ip'] = 'n.d.'
                cm_elem['brand'] = 'custom'
                cm_elem['time'] = 'n.d.'
                cm_elem['country'] = 'n.d.'
                cm_elems.append(cm_elem)
            ph_ws['custom'] = cm_elems 

        if self._alivecheck:
            ph_ws = AliveChecker.are_alive(ph_ws)

        if len(ph_ws) > 0:
            if 'json' in self._output:
                JSONAdapter.save_ph_ws(ph_ws)
            if 'csv' in self._output:
                CSVAdapter.save_ph_ws(ph_ws)
            if 'xml' in self._output:
                XMLAdapter.save_ph_ws(ph_ws)

            if self._emailnotifiers:
                EmailNotifiers.notify()
        else:
            logging.debug('Empty phishing_website list')

        print("Done...")