Example #1
0
def dump_flash_cookies(start_time, visit_id, webdriver, browser_params,
                       manager_params):
    """ Save newly changed Flash LSOs to database

    We determine which LSOs to save by the `start_time` timestamp.
    This timestamp should be taken prior to calling the `get` for
    which creates these changes.
    """
    # Set up a connection to DataAggregator
    tab_restart_browser(webdriver)  # kills traffic
    sock = clientsocket()
    sock.connect(*manager_params['aggregator_address'])

    # Flash cookies
    flash_cookies = get_flash_cookies(start_time)
    for cookie in flash_cookies:
        query = ("INSERT INTO flash_cookies (crawl_id, visit_id, domain, "
                 "filename, local_path, key, content) VALUES (?,?,?,?,?,?,?)",
                 (browser_params['crawl_id'], visit_id, cookie.domain,
                  cookie.filename, cookie.local_path, cookie.key,
                  cookie.content))
        sock.send(query)

    # Close connection to db
    sock.close()
Example #2
0
def dump_storage_vectors(top_url, start_time, webdriver, browser_params):
    """ Grab the newly changed items in supported storage vectors """

    # Set up a connection to DataAggregator
    tab_restart_browser(
        webdriver)  # kills traffic so we can cleanly record data
    sock = clientsocket()
    sock.connect(*browser_params['aggregator_address'])

    # Wait for SQLite Checkpointing - never happens when browser open

    # Flash cookies
    flash_cookies = get_flash_cookies(start_time)
    for cookie in flash_cookies:
        query = (
            "INSERT INTO flash_cookies (crawl_id, page_url, domain, filename, local_path, \
                  key, content) VALUES (?,?,?,?,?,?,?)",
            (browser_params['crawl_id'], top_url, cookie.domain,
             cookie.filename, cookie.local_path, cookie.key, cookie.content))
        sock.send(query)

    # Cookies
    rows = get_cookies(browser_params['profile_path'], start_time)
    if rows is not None:
        for row in rows:
            query = (
                "INSERT INTO profile_cookies (crawl_id, page_url, baseDomain, name, value, \
                      host, path, expiry, accessed, creationTime, isSecure, isHttpOnly) \
                      VALUES (?,?,?,?,?,?,?,?,?,?,?,?)",
                (browser_params['crawl_id'], top_url) + row)
            sock.send(query)

    # localStorage - TODO this doesn't have a modified time support
    #rows = get_localStorage(profile_dir, start_time)
    #if rows is not None:
    #    for row in rows:
    #        query = ("INSERT INTO localStorage (crawl_id, page_url, scope, KEY, value) \
    #                  VALUES (?,?,?,?)",(crawl_id, top_url) + row)
    #        sock.send(query)

    # Close connection to db
    sock.close()
Example #3
0
def dump_storage_vectors(top_url, start_time, webdriver, browser_params, manager_params):
    """ Grab the newly changed items in supported storage vectors """

    # Set up a connection to DataAggregator
    tab_restart_browser(webdriver)  # kills traffic so we can cleanly record data
    sock = clientsocket()
    sock.connect(*manager_params['aggregator_address'])

    # Wait for SQLite Checkpointing - never happens when browser open

    # Flash cookies
    flash_cookies = get_flash_cookies(start_time)
    for cookie in flash_cookies:
        query = ("INSERT INTO flash_cookies (crawl_id, page_url, domain, filename, local_path, \
                  key, content) VALUES (?,?,?,?,?,?,?)", (browser_params['crawl_id'], top_url, cookie.domain,
                                                          cookie.filename, cookie.local_path,
                                                          cookie.key, cookie.content))
        sock.send(query)

    # Cookies
    rows = get_cookies(browser_params['profile_path'], start_time)
    if rows is not None:
        for row in rows:
            query = ("INSERT INTO profile_cookies (crawl_id, page_url, baseDomain, name, value, \
                      host, path, expiry, accessed, creationTime, isSecure, isHttpOnly) \
                      VALUES (?,?,?,?,?,?,?,?,?,?,?,?)", (browser_params['crawl_id'], top_url) + row)
            sock.send(query)
    
    # localStorage - TODO this doesn't have a modified time support
    #rows = get_localStorage(profile_dir, start_time)
    #if rows is not None:
    #    for row in rows:
    #        query = ("INSERT INTO localStorage (crawl_id, page_url, scope, KEY, value) \
    #                  VALUES (?,?,?,?)",(crawl_id, top_url) + row)
    #        sock.send(query)

    # Close connection to db
    sock.close()
Example #4
0
def dump_flash_cookies(start_time, visit_id, webdriver, browser_params, manager_params):
    """ Save newly changed Flash LSOs to database

    We determine which LSOs to save by the `start_time` timestamp.
    This timestamp should be taken prior to calling the `get` for
    which creates these changes.
    """
    # Set up a connection to DataAggregator
    tab_restart_browser(webdriver)  # kills traffic so we can cleanly record data
    sock = clientsocket()
    sock.connect(*manager_params['aggregator_address'])

    # Flash cookies
    flash_cookies = get_flash_cookies(start_time)
    for cookie in flash_cookies:
        query = ("INSERT INTO flash_cookies (crawl_id, visit_id, domain, filename, local_path, \
                  key, content) VALUES (?,?,?,?,?,?,?)", (browser_params['crawl_id'], visit_id, cookie.domain,
                                                          cookie.filename, cookie.local_path,
                                                          cookie.key, cookie.content))
        sock.send(query)

    # Close connection to db
    sock.close()