def identify_line(line): """ Asynchronously performs CMS identification on a particular URL. The process is as follows: - Make a request to the site's root. - If 403, 500 or other error code, or connection error, raise. - If redirect, change base URL to redirected URL (after repairing the URL). - For each URL: - Request common JS files for all CMS. - If files for a single CMS are found, determine that to be the CMS. - If server responds with 200 OK to everything, break. - If no files for any CMS break. - Else, break. (this shouldn't happen too often) - Perform version identification: - Request all required files and return a deferredlist with a callback. - Hash all these files. - Use the hashes to calculate the version. @param line: the line to identify. @return: deferred """ base_url, host_header = f.process_host_line(line) base_url = f.repair_url(base_url) try: yield async .request_url(base_url, host_header) except PageRedirect as e: base_url, host_header = f.repair_url(e.location), None cms_name, tempdir = yield identify_url(base_url, host_header) versions = yield identify_version_url(base_url, host_header, cms_name, tempdir) out = Output() out.host = (base_url, host_header) out.version = versions print(json.dumps(out.__dict__))
def identify_line(line): """ Asynchronously performs CMS identification on a particular URL. The process is as follows: - Make a request to the site's root. - If 403, 500 or other error code, or connection error, raise. - If redirect, change base URL to redirected URL (after repairing the URL). - For each URL: - Request common JS files for all CMS. - If files for a single CMS are found, determine that to be the CMS. - If server responds with 200 OK to everything, break. - If no files for any CMS break. - Else, break. (this shouldn't happen too often) - Perform version identification: - Request all required files and return a deferredlist with a callback. - Hash all these files. - Use the hashes to calculate the version. @param line: the line to identify. @return: deferred """ base_url, host_header = f.process_host_line(line) base_url = f.repair_url(base_url) try: yield async.request_url(base_url, host_header) except PageRedirect as e: base_url, host_header = f.repair_url(e.location), None cms_name, tempdir = yield identify_url(base_url, host_header) versions = yield identify_version_url(base_url, host_header, cms_name, tempdir) out = Output() out.host = (base_url, host_header) out.version = versions print(json.dumps(out.__dict__))
def _process_host_line(self, line): return f.process_host_line(line)