def identify_line(line):
    """
    Asynchronously performs CMS identification on a particular URL. The process
    is as follows:

    - Make a request to the site's root.
        - If 403, 500 or other error code, or connection error, raise.
        - If redirect, change base URL to redirected URL (after repairing the
          URL).
    - For each URL:
        - Request common JS files for all CMS.
        - If files for a single CMS are found, determine that to be the CMS.
        - If server responds with 200 OK to everything, break.
        - If no files for any CMS break.
        - Else, break. (this shouldn't happen too often)
    - Perform version identification:
        - Request all required files and return a deferredlist with a callback.
        - Hash all these files.
        - Use the hashes to calculate the version.

    @param line: the line to identify.
    @return: deferred
    """
    base_url, host_header = f.process_host_line(line)
    base_url = f.repair_url(base_url)

    try:
        yield async .request_url(base_url, host_header)
    except PageRedirect as e:
        base_url, host_header = f.repair_url(e.location), None

    cms_name, tempdir = yield identify_url(base_url, host_header)
    versions = yield identify_version_url(base_url, host_header, cms_name,
                                          tempdir)

    out = Output()
    out.host = (base_url, host_header)
    out.version = versions

    print(json.dumps(out.__dict__))
Exemple #2
0
def identify_line(line):
    """
    Asynchronously performs CMS identification on a particular URL. The process
    is as follows:

    - Make a request to the site's root.
        - If 403, 500 or other error code, or connection error, raise.
        - If redirect, change base URL to redirected URL (after repairing the
          URL).
    - For each URL:
        - Request common JS files for all CMS.
        - If files for a single CMS are found, determine that to be the CMS.
        - If server responds with 200 OK to everything, break.
        - If no files for any CMS break.
        - Else, break. (this shouldn't happen too often)
    - Perform version identification:
        - Request all required files and return a deferredlist with a callback.
        - Hash all these files.
        - Use the hashes to calculate the version.

    @param line: the line to identify.
    @return: deferred
    """
    base_url, host_header = f.process_host_line(line)
    base_url = f.repair_url(base_url)

    try:
        yield async.request_url(base_url, host_header)
    except PageRedirect as e:
        base_url, host_header = f.repair_url(e.location), None

    cms_name, tempdir = yield identify_url(base_url, host_header)
    versions = yield identify_version_url(base_url, host_header, cms_name, tempdir)

    out = Output()
    out.host = (base_url, host_header)
    out.version = versions

    print(json.dumps(out.__dict__))
 def _process_host_line(self, line):
     return f.process_host_line(line)
 def _process_host_line(self, line):
     return f.process_host_line(line)