예제 #1
0
    def __execute_query_test(self, dirindex, chunks_info_in_memory):
        '''Ejecuta query test del índice especificado.

        Args:
            dirindex (string): directorio de índice.
            chunks_info_in_memory (bool): indica si la información de chunks se
                debe cargar en RAM.

        Returns:
            index (Index): índice al que pertenecen las estadísticas.
            min_times (double list): listado de tiempo mínimo de query.
            avg_times (double list): listado de tiempo promedio de query.
            max_times (double list): listado de tiempo máximo de query.
        '''
        index = Index(dirindex)
        self.__check_queries_load()

        browser = Browser(index, BrowseType.Boolean)

        # Listados de stats.
        min_times = []
        avg_times = []
        max_times = []

        index.load(chunks_info_in_memory)
        index_name = path.basename(dirindex)
        evtype = "inmemory" if chunks_info_in_memory else "indisk"
        queries_count = len(self.__queries)

        iterations = self.__iterations + 1
        for i in range(0, iterations):
            info = "{0} ({1}) - evaluación nro. {2}/{3}"
            print(info.format(index_name, evtype, i, iterations))
            min_time = None
            sum_time = 0
            max_time = 0
            for query in self.__queries:
                # Obtención de posting list.
                browser.browse(query)

                # Obtención de benchmark de última búsqueda.
                time = browser.get_benchmark()

                sum_time += time

                if time > max_time:
                    max_time = time

                if not min_time or time < min_time:
                    min_time = time

            avg_time = sum_time / queries_count

            # Append de times (si no es 'warm-up').
            if i > 0:
                min_times.append(min_time * 1000)
                avg_times.append(avg_time * 1000)
                max_times.append(max_time * 1000)

        return index, min_times, avg_times, max_times
예제 #2
0
def main():
    browser = Browser()
    with browser.new_page() as tab:
        data = tab.collect_data(
            "https://news.ycombinator.com/front?day=2020-03-20")
        layout_parser = LayoutParser(data["snapshot"], data["tree"],
                                     data["screenshot"])
        layout_parser.build_rtree_index()
        layout_parser.build_dom_index()
        write_json(layout_parser.dom_index, "dom-build.json")
        # write_image(layout_parser.screenshot["data"], "page.png")
        layout_parser.create_report1()
예제 #3
0
def index():
    browser = Browser()
    report = None
    with browser.new_page() as tab:
        print("Loading page")
        data = tab.collect_data("https://news.ycombinator.com/")
        layout_parser = LayoutParser(data["snapshot"], data["tree"],
                                     data["screenshot"])
        print("Building rtree")
        layout_parser.build_rtree_index()
        print("Building dom tree")
        layout_parser.build_dom_index()
        write_image(layout_parser.screenshot["data"], "page.png")
        print("Creating report")
        report = layout_parser.create_report1()
    return render_template("index.html", report=report)
예제 #4
0
    def parse_resp(self, browser: Browser, proxy_sig: str) -> None:
        '''Parse the response of the browser.
        '''

        if not self.is_alive or browser.is_processed:
            return

        browser.is_processed = True

        if browser.is_authenticated:
            self.account_password = browser.password
        elif browser.password_attempted and not browser.proxy_failed:
            self.attempts += 1

        if browser.proxy_failed:

            proxy = self.proxies_browsers[proxy_sig]
            self.proxies.proxy_expired(browser.proxy)

            proxy.is_expired = True
            self.add_password(browser.password)

            with self.lock_expired_proxies:
                self.expired_proxies.put(proxy_sig)

        self.active_browsers -= 1
예제 #5
0
    def create_browsers(self, password: str, proxy_sig: str, csrftoken: str) -> None:
        '''Creates a browser and trys to execute it.
        '''

        try:
            proxy = self.proxies_browsers[proxy_sig].proxy
            browser = Browser(self.username, password, proxy, csrftoken)
        except KeyError:
            self.add_password(password)
            return

        try:
            Thread(target=browser.login, daemon=True).start()
            self.active_browsers += 1

            if self.verbose and self.is_alive:
                with self.lock_display:
                    print('[-] Trying: {} ...'.format(
                        password
                    ))
        except:
            return

        try:
            with self.lock_proxies_browsers:
                self.proxies_browsers[proxy_sig].add_browser(
                    browser
                )
        except:
            self.add_password(password)
            self.active_browsers -= 1
예제 #6
0
def main(args):
    '''Punto de entrada de app.'''
    utils.clearscreen()

    index1 = Index("../output/test/index-1/")
    index2 = Index("../output/test/index-2/")

    print("Cargando índice 1...")
    index1.load()
    print("Cargando índice 2...")
    index2.load()

    # Browser en índice resultante.
    index1_browser = Browser(index1, BrowseType.Boolean)
    index2_browser = Browser(index1, BrowseType.Boolean)

    while True:
        text = input("\nBúsqueda: ").lower().strip()

        for i in range(0, 2):
            if i == 0:
                doc_ids = index1_browser.browse(text)
                print("Búsqueda en índice 1 (total {0}):".format(len(doc_ids)))
                doc_ids1 = doc_ids
            else:
                print("")
                doc_ids = index2_browser.browse(text)
                print("Búsqueda en índice 2 (total {0}):".format(len(doc_ids)))

                if doc_ids != doc_ids1:
                    input("ATENCIÓN: Las postings son distintas...")

            doc_number = 1
            if not doc_ids:
                msg = "Ningún documento coincide con la búsqueda."
                print(msg)
            else:
                # Impresión de primeros 25 docs.
                for doc_id in doc_ids[0:25]:
                    if i == 0:
                        doc = index1.get_doc_by_id(int(doc_id))
                    else:
                        doc = index2.get_doc_by_id(int(doc_id))

                    sdoc = str(doc_number) + ". " + doc + " ({0})"
                    print(sdoc.format(doc_id))
                    doc_number += 1

            print("")

            if i == 0:
                print("Benchmark:", index1_browser.get_benchmark(), "segundos")
            else:
                print("Benchmark:", index2_browser.get_benchmark(), "segundos")

        input("\nPresione una tecla para continuar...")
        utils.clearscreen()
예제 #7
0
    def attack(self):
        attack_started = False
        proxy_per_pwd = 3

        while self.is_alive:
            for pwd in self.password_manager.passlist:
                if not self.is_alive:
                    break

                with self.lock_unstarted_browsers:
                    if len(self.unstarted_browsers) >= self.total_threads:
                        break

                with self.lock_active_passwords:
                    if pwd in self.active_passwords:
                        continue

                is_added = False

                for _ in range(proxy_per_pwd):

                    with self.lock_unstarted_browsers:
                        if len(self.unstarted_browsers) >= self.total_threads:
                            break

                    proxy = self.proxy_manager.get_proxy()

                    if not proxy:
                        continue

                    with self.lock_unstarted_browsers:
                        self.unstarted_browsers.append(
                            Browser(self.username, pwd, proxy))

                        is_added = True

                if not is_added:
                    break

                with self.lock_active_passwords:
                    self.active_passwords.append(pwd)

                if not attack_started:
                    self.display.info("Starting attack...")
                    attack_started = True

            with self.lock_unstarted_browsers:
                for br in list(self.unstarted_browsers):
                    with self.lock_browsers:
                        if len(self.browsers) >= self.total_threads:
                            break
                        else:
                            self.browsers.append(br)

                    self.unstarted_browsers.remove(br)
                    threading.Thread(target=br.attempt, daemon=True).start()
예제 #8
0
def main(args):
    '''Punto de entrada de app.'''

    utils.clearscreen()

    index = Index("../output/test/index-1/")

    print("Cargando índice...")
    index.load(chunks_info_in_memory=False)

    # Browser en índice resultante.
    browser = Browser(index, BrowseType.Boolean)

    while True:
        text = input("\nBúsqueda: ").lower().strip()

        print("")
        doc_ids = browser.browse(text)
        print("Búsqueda en índice (total {0}):".format(len(doc_ids)))

        doc_number = 1
        if not doc_ids:
            msg = "Ningún documento coincide con la búsqueda."
            print(msg)
        else:
            # Impresión de primeros 25 docs.
            for doc_id in doc_ids[0:25]:
                doc = index.get_doc_by_id(int(doc_id))

                sdoc = str(doc_number) + ". " + doc + " ({0})"
                print(sdoc.format(doc_id))
                doc_number += 1

        print("")

        print("Benchmark:", browser.get_benchmark(), "segundos")

    input("\nPresione una tecla para continuar...")
    utils.clearscreen()
예제 #9
0
    def attack(self):
        proxy = None
        is_attack_started = False
        while self.is_alive:

            browsers = []
            for password in self.password_manager.passlist:

                if not self.is_alive:
                    break

                if not proxy:
                    proxy = self.proxy_manager.get_proxy()
                    self.bots_per_proxy = 0

                if self.bots_per_proxy >= max_bots_per_proxy:
                    proxy = None

                if not proxy:
                    continue

                if not password in self.active_passwords and password in self.password_manager.passlist:
                    browser = Browser(self.username, password, proxy)
                    browsers.append(browser)
                    self.bots_per_proxy += 1

                    if not is_attack_started:
                        self.display.info('Starting attack ...')
                        is_attack_started = True

                    with self.lock:
                        self.browsers.append(browser)
                        self.active_passwords.append(password)

            for browser in browsers:
                thread = Thread(target=browser.attempt)
                thread.daemon = True
                try:
                    thread.start()
                except:
                    self.remove_browser(browser)
예제 #10
0
import os
import logging
import time
import sys

from lib.browser import Browser
from lib.flow import Flow

logging.basicConfig(
    level=logging.INFO,
    format='[%(asctime)s][%(filename)s:%(lineno)d][%(levelname)s] %(message)s',
    handlers=[logging.StreamHandler(sys.stdout)])
log = logging.getLogger('Olsanka App')
log.setLevel(logging.INFO)

if __name__ == '__main__':
    browser = Browser(display_visible=False, display_size=(1000, 900))
    flow = Flow(browser)
    flow.login_and_find_next_free_and_book()
    time.sleep(5)
    browser.close()
예제 #11
0
def main(args):
    '''Punto de entrada de app.'''
    utils.clearscreen()

    indexer = Indexer(DIRIN, CorpusTypes.Text, reuse_tmp=True)

    # Generación de índice 1.
    index1, index1_error = indexer.create_index(INDEX1_DIROUT, False, 64)

    # Generacón de índice 2
    indexer.doc_encode = [
        EncodeTypes.Unary, EncodeTypes.VariableByte, EncodeTypes.VariableByte,
        EncodeTypes.EliasFano, EncodeTypes.Simple16, EncodeTypes.BitPacking,
        EncodeTypes.PForDelta
    ]

    indexer.freq_encode = [EncodeTypes.Unary, EncodeTypes.VariableByte]

    index2, index2_error = indexer.create_index(INDEX2_DIROUT, False, 128)

    if index1_error == IndexerStatusTypes.Already_Indexed:
        print("Indice 1 ya indexado.")
    elif index1_error == IndexerStatusTypes.Collection_Non_Existent:
        print("Directorio de colección de índice 1 inexistente.")
        return

    if index2_error == IndexerStatusTypes.Already_Indexed:
        print("Indice 2 ya indexado.")
    elif index2_error == IndexerStatusTypes.Collection_Non_Existent:
        print("Directorio de colección de índice 2 inexistente.")
        return

    print("Cargando índice 1...")
    index1.load(chunks_info_in_memory=True)
    print("Cargando índice 2...")
    index2.load()

    # Browser en índice resultante.
    index1_browser = Browser(index1, BrowseType.Boolean)
    index2_browser = Browser(index2, BrowseType.Boolean)

    while True:
        text = input("\nBúsqueda: ").lower().strip()

        for i in range(0, 2):
            if i == 0:
                print("Búsqueda en índice 1:")
                doc_ids = index1_browser.browse(text)
                doc_ids1 = doc_ids
            else:
                print("")
                print("Búsqueda en índice 2:")
                doc_ids = index2_browser.browse(text)

                if doc_ids != doc_ids1:
                    print("")
                    print(doc_ids)
                    print(doc_ids1)
                    input(
                        "ATENCIÓN: Las postings son distintas ({0})...".format(
                            text))
            doc_number = 1
            if not doc_ids:
                msg = "Ningún documento coincide con la búsqueda."
                print(msg)

            for doc_id in doc_ids:
                if i == 0:
                    doc = index1.get_doc_by_id(int(doc_id))
                else:
                    doc = index2.get_doc_by_id(int(doc_id))

                sdoc = str(doc_number) + ". " + doc + " ({0})"
                print(sdoc.format(doc_id))
                doc_number += 1

            print("")
            if i == 0:
                print("Benchmark:", index1_browser.get_benchmark(), "segundos")
            else:
                print("Benchmark:", index2_browser.get_benchmark(), "segundos")

        input("\nPresione una tecla para continuar...")
        utils.clearscreen()
예제 #12
0
def before_all(context):
    print("==> Executing  before_all")
    context.browser = Browser()