Beispiel #1
0
async def main():
    version_string = f"%(prog)s {__version__}\n" +  \
                     f"{requests.__description__}:  {requests.__version__}\n" + \
                     f"Python:  {platform.python_version()}"

    parser = ArgumentParser(
        formatter_class=RawDescriptionHelpFormatter,
        description=f"{module_name} (Version {__version__})")
    parser.add_argument("--version",
                        action="version",
                        version=version_string,
                        help="Display version information and dependencies.")
    parser.add_argument("--verbose",
                        "-v",
                        action="store_true",
                        dest="verbose",
                        default=False,
                        help="Display extra information and metrics.")
    parser.add_argument(
        "-d",
        "--debug",
        action="store_true",
        dest="debug",
        default=False,
        help="Saving debugging information and sites responses in debug.txt.")
    parser.add_argument(
        "--rank",
        "-r",
        action="store_true",
        dest="rank",
        default=False,
        help=
        "Present websites ordered by their Alexa.com global rank in popularity."
    )
    parser.add_argument(
        "--folderoutput",
        "-fo",
        dest="folderoutput",
        help=
        "If using multiple usernames, the output of the results will be saved to this folder."
    )
    parser.add_argument(
        "--output",
        "-o",
        dest="output",
        help=
        "If using single username, the output of the result will be saved to this file."
    )
    parser.add_argument(
        "--tor",
        "-t",
        action="store_true",
        dest="tor",
        default=False,
        help=
        "Make requests over Tor; increases runtime; requires Tor to be installed and in system path."
    )
    parser.add_argument(
        "--unique-tor",
        "-u",
        action="store_true",
        dest="unique_tor",
        default=False,
        help=
        "Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path."
    )
    parser.add_argument("--csv",
                        action="store_true",
                        dest="csv",
                        default=False,
                        help="Create Comma-Separated Values (CSV) File.")
    parser.add_argument(
        "--site",
        action="append",
        metavar='SITE_NAME',
        dest="site_list",
        default=None,
        help=
        "Limit analysis to just the listed sites. Add multiple options to specify more than one site."
    )
    parser.add_argument(
        "--proxy",
        "-p",
        metavar='PROXY_URL',
        action="store",
        dest="proxy",
        default=None,
        help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080")
    parser.add_argument(
        "--json",
        "-j",
        metavar="JSON_FILE",
        dest="json_file",
        default=None,
        help="Load data from a JSON file or an online, valid, JSON file.")
    parser.add_argument(
        "--timeout",
        action="store",
        metavar='TIMEOUT',
        dest="timeout",
        type=timeout_check,
        default=10,
        help="Time (in seconds) to wait for response to requests."
        "Default timeout of 10.0s."
        "A longer timeout will be more likely to get results from slow sites."
        "On the other hand, this may cause a long delay to gather all results."
    )
    parser.add_argument(
        "--print-found",
        action="store_true",
        dest="print_found_only",
        default=False,
        help="Do not output sites where the username was not found.")
    parser.add_argument(
        "--skip-errors",
        action="store_true",
        dest="skip_check_errors",
        default=False,
        help=
        "Do not print errors messages: connection, captcha, site country ban, etc."
    )
    parser.add_argument("--no-color",
                        action="store_true",
                        dest="no_color",
                        default=False,
                        help="Don't color terminal output")
    parser.add_argument("--browse",
                        "-b",
                        action="store_true",
                        dest="browse",
                        default=False,
                        help="Browse to all results on default bowser.")
    parser.add_argument(
        "--ids",
        "-i",
        action="store_true",
        dest="ids_search",
        default=False,
        help=
        "Make scan of pages for other usernames and recursive search by them.")
    parser.add_argument(
        "--parse",
        dest="parse_url",
        default='',
        help="Parse page by URL and extract username and IDs to use for search."
    )
    parser.add_argument(
        "username",
        nargs='+',
        metavar='USERNAMES',
        action="store",
        help="One or more usernames to check with social networks.")
    parser.add_argument("--tags",
                        dest="tags",
                        default='',
                        help="Specify tags of sites.")
    args = parser.parse_args()

    # Logging
    log_level = logging.ERROR
    logging.basicConfig(
        format=
        '[%(filename)s:%(lineno)d] %(levelname)-3s  %(asctime)s %(message)s',
        datefmt='%H:%M:%S',
        level=logging.ERROR)

    if args.debug:
        log_level = logging.DEBUG
    elif args.verbose:
        log_level = logging.WARNING

    logger = logging.getLogger('maigret')
    logger.setLevel(log_level)

    # Usernames initial list
    usernames = {u: 'username' for u in args.username if u not in ('-')}

    # TODO regex check on args.proxy
    if args.tor and (args.proxy is not None):
        raise Exception("Tor and Proxy cannot be set at the same time.")

    # Make prompts
    if args.proxy is not None:
        print("Using the proxy: " + args.proxy)

    if args.tor or args.unique_tor:
        print("Using Tor to make requests")
        print(
            "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
        )

    # Check if both output methods are entered as input.
    if args.output is not None and args.folderoutput is not None:
        print("You can only use one of the output methods.")
        sys.exit(1)

    # Check validity for single username output.
    if args.output is not None and len(args.username) != 1:
        print("You can only use --output with a single username")
        sys.exit(1)

    if args.parse_url:
        page, _ = parse(args.parse_url, cookies_str='')
        info = extract(page)
        text = 'Extracted ID data from webpage: ' + ', '.join(
            [f'{a}: {b}' for a, b in info.items()])
        print(text)
        for k, v in info.items():
            if 'username' in k:
                usernames[v] = 'username'
            if k in supported_recursive_search_ids:
                usernames[v] = k

    if args.tags:
        args.tags = set(args.tags.split(','))

    #Create object with all information about sites we are aware of.
    try:
        sites = SitesInformation(args.json_file)
    except Exception as error:
        print(f"ERROR:  {error}")
        sys.exit(1)

    #Create original dictionary from SitesInformation() object.
    #Eventually, the rest of the code will be updated to use the new object
    #directly, but this will glue the two pieces together.
    site_data_all = {}
    for site in sites:
        site_data_all[site.name] = site.information

    if args.site_list is None:
        # Not desired to look at a sub-set of sites
        site_data = site_data_all
    else:
        # User desires to selectively run queries on a sub-set of the site list.

        # Make sure that the sites are supported & build up pruned site database.
        site_data = {}
        site_missing = []
        for site in args.site_list:
            for existing_site in site_data_all:
                if site.lower() == existing_site.lower():
                    site_data[existing_site] = site_data_all[existing_site]
            if not site_data:
                # Build up list of sites not supported for future error message.
                site_missing.append(f"'{site}'")

        if site_missing:
            print(
                f"Error: Desired sites not found: {', '.join(site_missing)}.")
            sys.exit(1)

    if args.rank:
        # Sort data by rank
        site_dataCpy = dict(site_data)
        ranked_sites = sorted(
            site_data,
            key=lambda k:
            ("rank" not in k, site_data[k].get("rank", sys.maxsize)))
        site_data = {}
        for site in ranked_sites:
            site_data[site] = site_dataCpy.get(site)

    #Create notify object for query results.
    query_notify = QueryNotifyPrint(result=None,
                                    verbose=args.verbose,
                                    print_found_only=args.print_found_only,
                                    skip_check_errors=args.skip_check_errors,
                                    color=not args.no_color)

    already_checked = set()

    while usernames:
        username, id_type = list(usernames.items())[0]
        del usernames[username]

        if username.lower() in already_checked:
            continue
        else:
            already_checked.add(username.lower())

        # check for characters do not supported by sites generally
        found_unsupported_chars = set(unsupported_characters).intersection(
            set(username))

        if found_unsupported_chars:
            pretty_chars_str = ','.join(
                map(lambda s: f'"{s}"', found_unsupported_chars))
            print(
                f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"'
            )
            continue

        results = await sherlock(username,
                                 site_data,
                                 query_notify,
                                 tor=args.tor,
                                 unique_tor=args.unique_tor,
                                 proxy=args.proxy,
                                 timeout=args.timeout,
                                 ids_search=args.ids_search,
                                 id_type=id_type,
                                 tags=args.tags,
                                 debug=args.verbose,
                                 logger=logger)

        if args.output:
            result_file = args.output
        elif args.folderoutput:
            # The usernames results should be stored in a targeted folder.
            # If the folder doesn't exist, create it first
            os.makedirs(args.folderoutput, exist_ok=True)
            result_file = os.path.join(args.folderoutput, f"{username}.txt")
        else:
            result_file = f"{username}.txt"

        with open(result_file, "w", encoding="utf-8") as file:
            exists_counter = 0
            for website_name in results:
                dictionary = results[website_name]

                new_usernames = dictionary.get('ids_usernames')
                if new_usernames:
                    for u, utype in new_usernames.items():
                        usernames[u] = utype

                if dictionary.get("status").status == QueryStatus.CLAIMED:
                    exists_counter += 1
                    file.write(dictionary["url_user"] + "\n")
            file.write(
                f"Total Websites Username Detected On : {exists_counter}")

        if args.csv:
            with open(username + ".csv", "w", newline='',
                      encoding="utf-8") as csv_report:
                writer = csv.writer(csv_report)
                writer.writerow([
                    'username', 'name', 'url_main', 'url_user', 'exists',
                    'http_status', 'response_time_s'
                ])
                for site in results:
                    response_time_s = results[site]['status'].query_time
                    if response_time_s is None:
                        response_time_s = ""
                    writer.writerow([
                        username, site, results[site]['url_main'],
                        results[site]['url_user'],
                        str(results[site]['status'].status),
                        results[site]['http_status'], response_time_s
                    ])
Beispiel #2
0
def main():

    version_string = f"%(prog)s {__version__}\n" +  \
                     f"{requests.__description__}:  {requests.__version__}\n" + \
                     f"Python:  {platform.python_version()}"

    parser = ArgumentParser(
        formatter_class=RawDescriptionHelpFormatter,
        description=f"{module_name} (Version {__version__})")
    parser.add_argument("--version",
                        action="version",
                        version=version_string,
                        help="Display version information and dependencies.")
    parser.add_argument(
        "--verbose",
        "-v",
        "-d",
        "--debug",
        action="store_true",
        dest="verbose",
        default=False,
        help="Display extra debugging information and metrics.")
    parser.add_argument(
        "--folderoutput",
        "-fo",
        dest="folderoutput",
        help=
        "If using multiple usernames, the output of the results will be saved to this folder."
    )
    parser.add_argument(
        "--output",
        "-o",
        dest="output",
        help=
        "If using single username, the output of the result will be saved to this file."
    )
    parser.add_argument(
        "--tor",
        "-t",
        action="store_true",
        dest="tor",
        default=False,
        help=
        "Make requests over Tor; increases runtime; requires Tor to be installed and in system path."
    )
    parser.add_argument(
        "--unique-tor",
        "-u",
        action="store_true",
        dest="unique_tor",
        default=False,
        help=
        "Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path."
    )
    parser.add_argument("--csv",
                        action="store_true",
                        dest="csv",
                        default=False,
                        help="Create Comma-Separated Values (CSV) File.")
    parser.add_argument(
        "--site",
        action="append",
        metavar='SITE_NAME',
        dest="site_list",
        default=None,
        help=
        "Limit analysis to just the listed sites. Add multiple options to specify more than one site."
    )
    parser.add_argument(
        "--proxy",
        "-p",
        metavar='PROXY_URL',
        action="store",
        dest="proxy",
        default=None,
        help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080")
    parser.add_argument(
        "--json",
        "-j",
        metavar="JSON_FILE",
        dest="json_file",
        default=None,
        help="Load data from a JSON file or an online, valid, JSON file.")
    parser.add_argument(
        "--timeout",
        action="store",
        metavar='TIMEOUT',
        dest="timeout",
        type=timeout_check,
        default=None,
        help="Time (in seconds) to wait for response to requests. "
        "Default timeout is infinity. "
        "A longer timeout will be more likely to get results from slow sites. "
        "On the other hand, this may cause a long delay to gather all results."
    )
    parser.add_argument("--print-all",
                        action="store_true",
                        dest="print_all",
                        help="Output sites where the username was not found.")
    parser.add_argument("--print-found",
                        action="store_false",
                        dest="print_all",
                        default=False,
                        help="Output sites where the username was found.")
    parser.add_argument("--no-color",
                        action="store_true",
                        dest="no_color",
                        default=False,
                        help="Don't color terminal output")
    parser.add_argument(
        "username",
        nargs='+',
        metavar='USERNAMES',
        action="store",
        help="One or more usernames to check with social networks.")
    parser.add_argument("--browse",
                        "-b",
                        action="store_true",
                        dest="browse",
                        default=False,
                        help="Browse to all results on default browser.")

    parser.add_argument("--local",
                        "-l",
                        action="store_true",
                        default=False,
                        help="Force the use of the local data.json file.")

    args = parser.parse_args()

    # Check for newer version of Sherlock. If it exists, let the user know about it
    try:
        r = requests.get(
            "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/sherlock.py"
        )

        remote_version = str(re.findall('__version__ = "(.*)"', r.text)[0])
        local_version = __version__

        if remote_version != local_version:
            print(
                "Update Available!\n" +
                f"You are running version {local_version}. Version {remote_version} is available at https://git.io/sherlock"
            )

    except Exception as error:
        print(f"A problem occured while checking for an update: {error}")

    # Argument check
    # TODO regex check on args.proxy
    if args.tor and (args.proxy is not None):
        raise Exception("Tor and Proxy cannot be set at the same time.")

    # Make prompts
    if args.proxy is not None:
        print("Using the proxy: " + args.proxy)

    if args.tor or args.unique_tor:
        print("Using Tor to make requests")
        print(
            "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
        )

    # Check if both output methods are entered as input.
    if args.output is not None and args.folderoutput is not None:
        print("You can only use one of the output methods.")
        sys.exit(1)

    # Check validity for single username output.
    if args.output is not None and len(args.username) != 1:
        print("You can only use --output with a single username")
        sys.exit(1)

    # Create object with all information about sites we are aware of.
    try:
        if args.local:
            sites = SitesInformation(
                os.path.join(os.path.dirname(__file__), 'resources/data.json'))
        else:
            sites = SitesInformation(args.json_file)
    except Exception as error:
        print(f"ERROR:  {error}")
        sys.exit(1)

    # Create original dictionary from SitesInformation() object.
    # Eventually, the rest of the code will be updated to use the new object
    # directly, but this will glue the two pieces together.
    site_data_all = {}
    for site in sites:
        site_data_all[site.name] = site.information

    if args.site_list is None:
        # Not desired to look at a sub-set of sites
        site_data = site_data_all
    else:
        # User desires to selectively run queries on a sub-set of the site list.

        # Make sure that the sites are supported & build up pruned site database.
        site_data = {}
        site_missing = []
        for site in args.site_list:
            counter = 0
            for existing_site in site_data_all:
                if site.lower() == existing_site.lower():
                    site_data[existing_site] = site_data_all[existing_site]
                    counter += 1
            if counter == 0:
                # Build up list of sites not supported for future error message.
                site_missing.append(f"'{site}'")

        if site_missing:
            print(
                f"Error: Desired sites not found: {', '.join(site_missing)}.")

        if not site_data:
            sys.exit(1)

    # Create notify object for query results.
    query_notify = QueryNotifyPrint(result=None,
                                    verbose=args.verbose,
                                    print_all=args.print_all,
                                    color=not args.no_color)

    # Run report on all specified users.
    for username in args.username:
        results = sherlock(username,
                           site_data,
                           query_notify,
                           tor=args.tor,
                           unique_tor=args.unique_tor,
                           proxy=args.proxy,
                           timeout=args.timeout)

        if args.output:
            result_file = args.output
        elif args.folderoutput:
            # The usernames results should be stored in a targeted folder.
            # If the folder doesn't exist, create it first
            os.makedirs(args.folderoutput, exist_ok=True)
            result_file = os.path.join(args.folderoutput, f"{username}.txt")
        else:
            result_file = f"{username}.txt"

        with open(result_file, "w", encoding="utf-8") as file:
            exists_counter = 0
            for website_name in results:
                dictionary = results[website_name]
                if dictionary.get("status").status == QueryStatus.CLAIMED:
                    exists_counter += 1
                    file.write(website_name + " : " + dictionary["url_user"] +
                               "\n")
            file.write(
                f"Total Websites Username Detected On : {exists_counter}\n")

        if args.csv:
            result_file = f"{username}.csv"
            if args.folderoutput:
                # The usernames results should be stored in a targeted folder.
                # If the folder doesn't exist, create it first
                os.makedirs(args.folderoutput, exist_ok=True)
                result_file = os.path.join(args.folderoutput, result_file)

            with open(result_file, "w", newline='',
                      encoding="utf-8") as csv_report:
                writer = csv.writer(csv_report)
                writer.writerow([
                    'username', 'name', 'url_main', 'url_user', 'exists',
                    'http_status', 'response_time_s'
                ])
                for site in results:
                    response_time_s = results[site]['status'].query_time
                    if response_time_s is None:
                        response_time_s = ""
                    writer.writerow([
                        username, site, results[site]['url_main'],
                        results[site]['url_user'],
                        str(results[site]['status'].status),
                        results[site]['http_status'], response_time_s
                    ])
        print()
Beispiel #3
0
def main():

    version_string = f"%(prog)s {__version__}\n" +  \
                     f"{requests.__description__}:  {requests.__version__}\n" + \
                     f"Python:  {platform.python_version()}"

    parser = ArgumentParser(
        formatter_class=RawDescriptionHelpFormatter,
        description=f"{module_name} (Version {__version__})")
    parser.add_argument("--version",
                        action="version",
                        version=version_string,
                        help="Display version information and dependencies.")
    parser.add_argument(
        "--verbose",
        "-v",
        "-d",
        "--debug",
        action="store_true",
        dest="verbose",
        default=False,
        help="Display extra debugging information and metrics.")
    parser.add_argument(
        "--rank",
        "-r",
        action="store_true",
        dest="rank",
        default=False,
        help=
        "Present websites ordered by their Alexa.com global rank in popularity."
    )
    parser.add_argument(
        "--folderoutput",
        "-fo",
        dest="folderoutput",
        help=
        "If using multiple usernames, the output of the results will be saved to this folder."
    )
    parser.add_argument(
        "--output",
        "-o",
        dest="output",
        help=
        "If using single username, the output of the result will be saved to this file."
    )
    parser.add_argument(
        "--tor",
        "-t",
        action="store_true",
        dest="tor",
        default=False,
        help=
        "Make requests over Tor; increases runtime; requires Tor to be installed and in system path."
    )
    parser.add_argument(
        "--unique-tor",
        "-u",
        action="store_true",
        dest="unique_tor",
        default=False,
        help=
        "Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path."
    )
    parser.add_argument("--csv",
                        action="store_true",
                        dest="csv",
                        default=False,
                        help="Create Comma-Separated Values (CSV) File.")
    parser.add_argument(
        "--site",
        action="append",
        metavar='SITE_NAME',
        dest="site_list",
        default=None,
        help=
        "Limit analysis to just the listed sites. Add multiple options to specify more than one site."
    )
    parser.add_argument(
        "--proxy",
        "-p",
        metavar='PROXY_URL',
        action="store",
        dest="proxy",
        default=None,
        help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080")
    parser.add_argument(
        "--json",
        "-j",
        metavar="JSON_FILE",
        dest="json_file",
        default=None,
        help="Load data from a JSON file or an online, valid, JSON file.")
    parser.add_argument(
        "--timeout",
        action="store",
        metavar='TIMEOUT',
        dest="timeout",
        type=timeout_check,
        default=None,
        help="Time (in seconds) to wait for response to requests. "
        "Default timeout of 60.0s."
        "A longer timeout will be more likely to get results from slow sites."
        "On the other hand, this may cause a long delay to gather all results."
    )
    parser.add_argument(
        "--print-found",
        action="store_true",
        dest="print_found_only",
        default=False,
        help="Do not output sites where the username was not found.")
    parser.add_argument(
        "--skip-errors",
        action="store_true",
        dest="skip_check_errors",
        default=False,
        help=
        "Do not print errors messages: connection, captcha, site country ban, etc."
    )
    parser.add_argument("--no-color",
                        action="store_true",
                        dest="no_color",
                        default=False,
                        help="Don't color terminal output")
    parser.add_argument("--browse",
                        "-b",
                        action="store_true",
                        dest="browse",
                        default=False,
                        help="Browse to all results on default bowser.")
    parser.add_argument(
        "--ids",
        "-i",
        action="store_true",
        dest="ids_search",
        default=False,
        help=
        "Make scan of pages for other usernames and recursive search by them.")
    parser.add_argument(
        "--parse",
        dest="parse_url",
        default='',
        help="Parse page by URL and extract username and IDs to use for search."
    )
    parser.add_argument(
        "username",
        nargs='+',
        metavar='USERNAMES',
        action="store",
        help="One or more usernames to check with social networks.")
    parser.add_argument("--tags",
                        dest="tags",
                        default='',
                        help="Specify tags of sites.")

    args = parser.parse_args()
    # Argument check

    # Usernames initial list
    usernames = {u: 'username' for u in args.username if u not in ('-')}

    # TODO regex check on args.proxy
    if args.tor and (args.proxy is not None):
        raise Exception("Tor and Proxy cannot be set at the same time.")

    # Make prompts
    if args.proxy is not None:
        print("Using the proxy: " + args.proxy)

    if args.tor or args.unique_tor:
        print("Using Tor to make requests")
        print(
            "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
        )

    # Check if both output methods are entered as input.
    if args.output is not None and args.folderoutput is not None:
        print("You can only use one of the output methods.")
        sys.exit(1)

    # Check validity for single username output.
    if args.output is not None and len(args.username) != 1:
        print("You can only use --output with a single username")
        sys.exit(1)

    if args.parse_url:
        page, _ = parse(
            args.parse_url,
            cookies_str=
            'collections_gid=213; cph=948; cpw=790; yandexuid=2146767031582893378; yuidss=2146767031582893378; gdpr=0; _ym_uid=1582893380492618461; mda=0; ymex=1898253380.yrts.1582893380#1900850969.yrtsi.1585490969; font_loaded=YSv1; yandex_gid=213; my=YwA=; _ym_uid=1582893380492618461; _ym_d=1593451737; L=XGJfaARJWEAARGILWAQKbXJUUU5NSEJHNAwrIxkaE11SHD4P.1593608730.14282.352228.74f1540484d115d5f534c370a0d54d14; yandex_login=danilovdelta; i=pQT2fDoFQAd1ZkIJW/qOXaKw+KI7LXUGoTQbUy5dPTdftfK7HFAnktwsf4MrRy4aQEk0sqxbZGY18+bnpKkrDgt29/8=; ys=udn.cDpkYW5pbG92ZGVsdGE%3D#wprid.1593608013100941-1715475084842016754100299-production-app-host-man-web-yp-306#ymrefl.DD2F275B69BCF594; zm=m-white_bender.webp.css-https%3As3home-static_KgOlxZDBNvw0efFr5riblj4yPtY%3Al; yp=1908968730.udn.cDpkYW5pbG92ZGVsdGE%3D#1595886694.ygu.1#1609637986.szm.2:1680x1050:1644x948#1596131262.csc.2#1908664615.sad.1593304615:1593304615:1#1908965951.multib.1; _ym_d=1593869990; yc=1594225567.zen.cach%3A1593969966; yabs-frequency=/5/0m0004s7_5u00000/8Y10RG00003uEo7ptt9m00000FWx8KRMFsq00000w3j-/; ys_fp=form-client%3DWeb%26form-page%3Dhttps%253A%252F%252Fyandex.ru%252Fchat%2523%252F%2540%252Fchats%252F1%25252F0%25252F964d3b91-5972-49c2-84d3-ed614622223f%2520%25D0%25AF%25D0%25BD%25D0%25B4%25D0%25B5%25D0%25BA%25D1%2581.%25D0%259C%25D0%25B5%25D1%2581%25D1%2581%25D0%25B5%25D0%25BD%25D0%25B4%25D0%25B6%25D0%25B5%25D1%2580%26form-referrer%3Dhttps%253A%252F%252Fyandex.ru%252Fchat%26form-browser%3DMozilla%252F5.0%2520(Macintosh%253B%2520Intel%2520Mac%2520OS%2520X%252010_15_5)%2520AppleWebKit%252F537.36%2520(KHTML%252C%2520like%2520Gecko)%2520Chrome%252F83.0.4103.116%2520Safari%252F537.36%26form-screen%3D1680%25C3%25971050%25C3%259730%26form-window%3D792%25C3%2597948%26form-app_version%3D2.8.0%26form-reqid%3D1593966167731077-1230441077775610555700303-production-app-host-sas-web-yp-249; skid=8069161091593972389; device_id="a9eb41b4cb3b056e5da4f9a4029a9e7cfea081196"; cycada=xPXy0sesbr5pVmRDiBiYZnAFhHtmn6zZ/YSDpCUU2Gs=; Session_id=3:1594143924.5.1.1593295629841:JeDkBQ:f.1|611645851.-1.0.1:114943352|33600788.310322.2.2:310322|219601.339772.5aiiRX9iIGUU6gzDuKnO4dqTM24; sessionid2=3:1594143924.5.1.1593295629841:JeDkBQ:f.1|611645851.-1.0.1:114943352|33600788.310322.2.2:310322|219601.678091.QGFa-AEA5z46AzNAmKFAL4_4jdM; _ym_isad=2; active-browser-timestamp=1594143926414; q-csrf-token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiIzMzYwMDc4OCIsImV4cCI6MTU5NDIzMDMzMX0.w4FkWaag4t1D7j42MD2ILP0oenqZiIjo4iOZnshCiwY; ar=1594145799547993-792214; _ym_visorc_10630330=w; spravka=dD0xNTk0MTQ1ODMwO2k9NS4yMjguMjI0LjM3O3U9MTU5NDE0NTgzMDI5MTk5NTkwMjtoPWMyZTI1Mjk4NmVmZjFhNGNjMGZhYmIwZWQ3ZDIyMmZk'
        )
        info = extract(page)
        text = 'Extracted ID data from webpage: ' + ', '.join(
            [f'{a}: {b}' for a, b in info.items()])
        print(text)
        for k, v in info.items():
            if 'username' in k:
                usernames[v] = 'username'
            if k in ('yandex_public_id', 'wikimapia_uid', 'gaia_id'):
                usernames[v] = k

    if args.tags:
        args.tags = set(args.tags.split(','))

    #Create object with all information about sites we are aware of.
    try:
        sites = SitesInformation(args.json_file)
    except Exception as error:
        print(f"ERROR:  {error}")
        sys.exit(1)

    #Create original dictionary from SitesInformation() object.
    #Eventually, the rest of the code will be updated to use the new object
    #directly, but this will glue the two pieces together.
    site_data_all = {}
    for site in sites:
        site_data_all[site.name] = site.information

    if args.site_list is None:
        # Not desired to look at a sub-set of sites
        site_data = site_data_all
    else:
        # User desires to selectively run queries on a sub-set of the site list.

        # Make sure that the sites are supported & build up pruned site database.
        site_data = {}
        site_missing = []
        for site in args.site_list:
            for existing_site in site_data_all:
                if site.lower() == existing_site.lower():
                    site_data[existing_site] = site_data_all[existing_site]
            if not site_data:
                # Build up list of sites not supported for future error message.
                site_missing.append(f"'{site}'")

        if site_missing:
            print(
                f"Error: Desired sites not found: {', '.join(site_missing)}.")
            sys.exit(1)

    if args.rank:
        # Sort data by rank
        site_dataCpy = dict(site_data)
        ranked_sites = sorted(
            site_data,
            key=lambda k:
            ("rank" not in k, site_data[k].get("rank", sys.maxsize)))
        site_data = {}
        for site in ranked_sites:
            site_data[site] = site_dataCpy.get(site)

    #Create notify object for query results.
    query_notify = QueryNotifyPrint(result=None,
                                    verbose=args.verbose,
                                    print_found_only=args.print_found_only,
                                    skip_check_errors=args.skip_check_errors,
                                    color=not args.no_color)

    already_checked = set()

    while usernames:
        username, id_type = list(usernames.items())[0]
        del usernames[username]

        if username.lower() in already_checked:
            continue
        else:
            already_checked.add(username.lower())

        results = sherlock(username,
                           site_data,
                           query_notify,
                           tor=args.tor,
                           unique_tor=args.unique_tor,
                           proxy=args.proxy,
                           timeout=args.timeout,
                           ids_search=args.ids_search,
                           id_type=id_type,
                           tags=args.tags)

        if args.output:
            result_file = args.output
        elif args.folderoutput:
            # The usernames results should be stored in a targeted folder.
            # If the folder doesn't exist, create it first
            os.makedirs(args.folderoutput, exist_ok=True)
            result_file = os.path.join(args.folderoutput, f"{username}.txt")
        else:
            result_file = f"{username}.txt"

        with open(result_file, "w", encoding="utf-8") as file:
            exists_counter = 0
            for website_name in results:
                dictionary = results[website_name]

                new_usernames = dictionary.get('ids_usernames')
                if new_usernames:
                    for u, utype in new_usernames.items():
                        usernames[u] = utype

                if dictionary.get("status").status == QueryStatus.CLAIMED:
                    exists_counter += 1
                    file.write(dictionary["url_user"] + "\n")
            file.write(
                f"Total Websites Username Detected On : {exists_counter}")

        if args.csv:
            with open(username + ".csv", "w", newline='',
                      encoding="utf-8") as csv_report:
                writer = csv.writer(csv_report)
                writer.writerow([
                    'username', 'name', 'url_main', 'url_user', 'exists',
                    'http_status', 'response_time_s'
                ])
                for site in results:
                    response_time_s = results[site]['status'].query_time
                    if response_time_s is None:
                        response_time_s = ""
                    writer.writerow([
                        username, site, results[site]['url_main'],
                        results[site]['url_user'],
                        str(results[site]['status'].status),
                        results[site]['http_status'], response_time_s
                    ])