Python parse 예제들, socid_extractor.parse Python 예제들

예제 #1

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_steam():
    info = extract(
        parse('https://steamcommunity.com/id/GabrielSantosMariano/')[0])

    assert info.get('uid') == '76561198315585536'
    assert info.get('username') == 'GabrielSantosMariano'
    assert info.get('name') == 'Gabriel! Santos, Mariano.'

예제 #2

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_behance():
    info = extract(parse('https://www.behance.net/Skyratov', 'ilo0=1')[0])

    assert info.get('uid') == '39065909'
    assert info.get('username') == 'Skyratov'
    assert info.get('last_name') == 'Skuratov'
    assert info.get('first_name') == 'Vasiliy'

예제 #3

0

파일 보기

파일: maigret.py 프로젝트: HalcyonEric/maigret

def extract_ids_from_page(url, logger, timeout=5) -> dict:
    results = {}
    # url, headers
    reqs: List[Tuple[str, set]] = [(url, set())]
    try:
        # temporary workaround for URL mutations MVP
        from socid_extractor import mutate_url

        reqs += list(mutate_url(url))
    except Exception as e:
        logger.warning(e)

    for req in reqs:
        url, headers = req
        print(f'Scanning webpage by URL {url}...')
        page, _ = parse(url, cookies_str='', headers=headers, timeout=timeout)
        logger.debug(page)
        info = extract(page)
        if not info:
            print('Nothing extracted')
        else:
            print(get_dict_ascii_tree(info.items(), new_line=False), ' ')
        for k, v in info.items():
            if 'username' in k:
                results[v] = 'username'
            if k in SUPPORTED_IDS:
                results[v] = k

    return results

예제 #4

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_yandex_realty_offer_cookies():
    cookies = open('yandex.test.cookies').read()
    info = extract(
        parse('https://realty.yandex.ru/offer/363951114410351104/',
              cookies)[0])

    assert info.get('uid') == '86903473'
    assert info.get('name') == 'Севостьянова Мария Владимировна'

예제 #5

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_livejournal():
    info = extract(parse('https://julia-klay.livejournal.com/')[0])

    assert info.get('uid') == '83505610'
    assert info.get('name') == 'julia_klay'
    assert info.get('username') == 'julia_klay'
    assert info.get('is_personal') == 'True'
    assert info.get('is_community') == 'False'

예제 #6

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_medium():
    info = extract(parse('https://medium.com/@lys1n')[0])

    assert info.get('uid') == '4894fec6b289'
    assert info.get('username') == 'lys1n'
    assert info.get('twitter_username') == 'lys1n'
    assert info.get('name') == 'Марк Лясин'
    assert info.get('facebook_uid') == '1726256597385716'

예제 #7

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_google_documents_cookies():
    cookies = open('google.test.cookies').read()
    info = extract(
        parse(
            'https://docs.google.com/spreadsheets/d/1HtZKMLRXNsZ0HjtBmo0Gi03nUPiJIA4CC4jTYbCAnXw/edit#gid=0',
            cookies)[0])

    assert info.get('org_domain') == 'breakoutcommerce.com'
    assert info.get('org_name') == 'Gooten'

예제 #8

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_yandex_znatoki_user_profile():
    info = extract(
        parse(
            'https://yandex.ru/znatoki/user/e3795016-b18e-58ba-9112-21c301e53f37/'
        )[0])

    assert info.get('uid') == 'e3795016-b18e-58ba-9112-21c301e53f37'
    assert info.get('yandex_uid') == '980797984'
    assert info.get('username') == 'uid-hwcuuacg'
    assert info.get('name') == 'Настя Рогозинская'

예제 #9

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_500px():
    info = extract(parse('https://500px.com/the-maksimov')[0])

    assert info.get('uid') == '23896'
    assert info.get('username') == 'The-Maksimov'
    assert info.get('name') == 'Maxim Maximov'
    assert info.get('qq_uid') == None
    assert info.get('fb_uid') == None
    assert info.get('instagram_username') == 'the.maksimov'
    assert info.get('twitter_username') == 'The_Maksimov'
    assert info.get('website') == 'vk.com/id156603747'
    assert info.get('facebook_page') == 'facebook.com/the.maksimov'
    assert info.get('facebook_uid') == '100001789363632'

예제 #10

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_github():
    info = extract(parse('https://github.com/soxoj')[0])

    assert info.get('uid') == '31013580'
    assert info.get('username') == 'soxoj'

예제 #11

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_facebook_group():
    info = extract(parse('https://www.facebook.com/discordapp/')[0])

    assert info.get('uid') == '858412104226521'
    assert info.get('username') == 'discordapp'

예제 #12

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_facebook_user_profile():
    info = extract(parse('https://ru-ru.facebook.com/anatolijsharij/')[0])

    assert info.get('uid') == '1486042157'
    assert info.get('username') == 'anatolijsharij'

예제 #13

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_reddit():
    info = extract(parse('https://www.reddit.com/user/postvolta/')[0])

    assert info.get('uid') == 't2_dexuehm'
    assert info.get('username') == 'postvolta'

예제 #14

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_stack_exchange():
    info = extract(parse('https://stackoverflow.com/users/758202/zzart')[0])

    assert info.get('uid') == '758202'
    assert info.get('stack_exchange_uid') == '395311'

예제 #15

0

파일 보기

async def main():
    version_string = f"%(prog)s {__version__}\n" +  \
                     f"{requests.__description__}:  {requests.__version__}\n" + \
                     f"Python:  {platform.python_version()}"

    parser = ArgumentParser(
        formatter_class=RawDescriptionHelpFormatter,
        description=f"{module_name} (Version {__version__})")
    parser.add_argument("--version",
                        action="version",
                        version=version_string,
                        help="Display version information and dependencies.")
    parser.add_argument("--verbose",
                        "-v",
                        action="store_true",
                        dest="verbose",
                        default=False,
                        help="Display extra information and metrics.")
    parser.add_argument(
        "-d",
        "--debug",
        action="store_true",
        dest="debug",
        default=False,
        help="Saving debugging information and sites responses in debug.txt.")
    parser.add_argument(
        "--rank",
        "-r",
        action="store_true",
        dest="rank",
        default=False,
        help=
        "Present websites ordered by their Alexa.com global rank in popularity."
    )
    parser.add_argument(
        "--folderoutput",
        "-fo",
        dest="folderoutput",
        help=
        "If using multiple usernames, the output of the results will be saved to this folder."
    )
    parser.add_argument(
        "--output",
        "-o",
        dest="output",
        help=
        "If using single username, the output of the result will be saved to this file."
    )
    parser.add_argument(
        "--tor",
        "-t",
        action="store_true",
        dest="tor",
        default=False,
        help=
        "Make requests over Tor; increases runtime; requires Tor to be installed and in system path."
    )
    parser.add_argument(
        "--unique-tor",
        "-u",
        action="store_true",
        dest="unique_tor",
        default=False,
        help=
        "Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path."
    )
    parser.add_argument("--csv",
                        action="store_true",
                        dest="csv",
                        default=False,
                        help="Create Comma-Separated Values (CSV) File.")
    parser.add_argument(
        "--site",
        action="append",
        metavar='SITE_NAME',
        dest="site_list",
        default=None,
        help=
        "Limit analysis to just the listed sites. Add multiple options to specify more than one site."
    )
    parser.add_argument(
        "--proxy",
        "-p",
        metavar='PROXY_URL',
        action="store",
        dest="proxy",
        default=None,
        help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080")
    parser.add_argument(
        "--json",
        "-j",
        metavar="JSON_FILE",
        dest="json_file",
        default=None,
        help="Load data from a JSON file or an online, valid, JSON file.")
    parser.add_argument(
        "--timeout",
        action="store",
        metavar='TIMEOUT',
        dest="timeout",
        type=timeout_check,
        default=10,
        help="Time (in seconds) to wait for response to requests."
        "Default timeout of 10.0s."
        "A longer timeout will be more likely to get results from slow sites."
        "On the other hand, this may cause a long delay to gather all results."
    )
    parser.add_argument(
        "--print-found",
        action="store_true",
        dest="print_found_only",
        default=False,
        help="Do not output sites where the username was not found.")
    parser.add_argument(
        "--skip-errors",
        action="store_true",
        dest="skip_check_errors",
        default=False,
        help=
        "Do not print errors messages: connection, captcha, site country ban, etc."
    )
    parser.add_argument("--no-color",
                        action="store_true",
                        dest="no_color",
                        default=False,
                        help="Don't color terminal output")
    parser.add_argument("--browse",
                        "-b",
                        action="store_true",
                        dest="browse",
                        default=False,
                        help="Browse to all results on default bowser.")
    parser.add_argument(
        "--ids",
        "-i",
        action="store_true",
        dest="ids_search",
        default=False,
        help=
        "Make scan of pages for other usernames and recursive search by them.")
    parser.add_argument(
        "--parse",
        dest="parse_url",
        default='',
        help="Parse page by URL and extract username and IDs to use for search."
    )
    parser.add_argument(
        "username",
        nargs='+',
        metavar='USERNAMES',
        action="store",
        help="One or more usernames to check with social networks.")
    parser.add_argument("--tags",
                        dest="tags",
                        default='',
                        help="Specify tags of sites.")
    args = parser.parse_args()

    # Logging
    log_level = logging.ERROR
    logging.basicConfig(
        format=
        '[%(filename)s:%(lineno)d] %(levelname)-3s  %(asctime)s %(message)s',
        datefmt='%H:%M:%S',
        level=logging.ERROR)

    if args.debug:
        log_level = logging.DEBUG
    elif args.verbose:
        log_level = logging.WARNING

    logger = logging.getLogger('maigret')
    logger.setLevel(log_level)

    # Usernames initial list
    usernames = {u: 'username' for u in args.username if u not in ('-')}

    # TODO regex check on args.proxy
    if args.tor and (args.proxy is not None):
        raise Exception("Tor and Proxy cannot be set at the same time.")

    # Make prompts
    if args.proxy is not None:
        print("Using the proxy: " + args.proxy)

    if args.tor or args.unique_tor:
        print("Using Tor to make requests")
        print(
            "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
        )

    # Check if both output methods are entered as input.
    if args.output is not None and args.folderoutput is not None:
        print("You can only use one of the output methods.")
        sys.exit(1)

    # Check validity for single username output.
    if args.output is not None and len(args.username) != 1:
        print("You can only use --output with a single username")
        sys.exit(1)

    if args.parse_url:
        page, _ = parse(args.parse_url, cookies_str='')
        info = extract(page)
        text = 'Extracted ID data from webpage: ' + ', '.join(
            [f'{a}: {b}' for a, b in info.items()])
        print(text)
        for k, v in info.items():
            if 'username' in k:
                usernames[v] = 'username'
            if k in supported_recursive_search_ids:
                usernames[v] = k

    if args.tags:
        args.tags = set(args.tags.split(','))

    #Create object with all information about sites we are aware of.
    try:
        sites = SitesInformation(args.json_file)
    except Exception as error:
        print(f"ERROR:  {error}")
        sys.exit(1)

    #Create original dictionary from SitesInformation() object.
    #Eventually, the rest of the code will be updated to use the new object
    #directly, but this will glue the two pieces together.
    site_data_all = {}
    for site in sites:
        site_data_all[site.name] = site.information

    if args.site_list is None:
        # Not desired to look at a sub-set of sites
        site_data = site_data_all
    else:
        # User desires to selectively run queries on a sub-set of the site list.

        # Make sure that the sites are supported & build up pruned site database.
        site_data = {}
        site_missing = []
        for site in args.site_list:
            for existing_site in site_data_all:
                if site.lower() == existing_site.lower():
                    site_data[existing_site] = site_data_all[existing_site]
            if not site_data:
                # Build up list of sites not supported for future error message.
                site_missing.append(f"'{site}'")

        if site_missing:
            print(
                f"Error: Desired sites not found: {', '.join(site_missing)}.")
            sys.exit(1)

    if args.rank:
        # Sort data by rank
        site_dataCpy = dict(site_data)
        ranked_sites = sorted(
            site_data,
            key=lambda k:
            ("rank" not in k, site_data[k].get("rank", sys.maxsize)))
        site_data = {}
        for site in ranked_sites:
            site_data[site] = site_dataCpy.get(site)

    #Create notify object for query results.
    query_notify = QueryNotifyPrint(result=None,
                                    verbose=args.verbose,
                                    print_found_only=args.print_found_only,
                                    skip_check_errors=args.skip_check_errors,
                                    color=not args.no_color)

    already_checked = set()

    while usernames:
        username, id_type = list(usernames.items())[0]
        del usernames[username]

        if username.lower() in already_checked:
            continue
        else:
            already_checked.add(username.lower())

        # check for characters do not supported by sites generally
        found_unsupported_chars = set(unsupported_characters).intersection(
            set(username))

        if found_unsupported_chars:
            pretty_chars_str = ','.join(
                map(lambda s: f'"{s}"', found_unsupported_chars))
            print(
                f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"'
            )
            continue

        results = await sherlock(username,
                                 site_data,
                                 query_notify,
                                 tor=args.tor,
                                 unique_tor=args.unique_tor,
                                 proxy=args.proxy,
                                 timeout=args.timeout,
                                 ids_search=args.ids_search,
                                 id_type=id_type,
                                 tags=args.tags,
                                 debug=args.verbose,
                                 logger=logger)

        if args.output:
            result_file = args.output
        elif args.folderoutput:
            # The usernames results should be stored in a targeted folder.
            # If the folder doesn't exist, create it first
            os.makedirs(args.folderoutput, exist_ok=True)
            result_file = os.path.join(args.folderoutput, f"{username}.txt")
        else:
            result_file = f"{username}.txt"

        with open(result_file, "w", encoding="utf-8") as file:
            exists_counter = 0
            for website_name in results:
                dictionary = results[website_name]

                new_usernames = dictionary.get('ids_usernames')
                if new_usernames:
                    for u, utype in new_usernames.items():
                        usernames[u] = utype

                if dictionary.get("status").status == QueryStatus.CLAIMED:
                    exists_counter += 1
                    file.write(dictionary["url_user"] + "\n")
            file.write(
                f"Total Websites Username Detected On : {exists_counter}")

        if args.csv:
            with open(username + ".csv", "w", newline='',
                      encoding="utf-8") as csv_report:
                writer = csv.writer(csv_report)
                writer.writerow([
                    'username', 'name', 'url_main', 'url_user', 'exists',
                    'http_status', 'response_time_s'
                ])
                for site in results:
                    response_time_s = results[site]['status'].query_time
                    if response_time_s is None:
                        response_time_s = ""
                    writer.writerow([
                        username, site, results[site]['url_main'],
                        results[site]['url_user'],
                        str(results[site]['status'].status),
                        results[site]['http_status'], response_time_s
                    ])

예제 #16

0

파일 보기

파일: maigret.py 프로젝트: uMag/maigret

def main():

    version_string = f"%(prog)s {__version__}\n" +  \
                     f"{requests.__description__}:  {requests.__version__}\n" + \
                     f"Python:  {platform.python_version()}"

    parser = ArgumentParser(
        formatter_class=RawDescriptionHelpFormatter,
        description=f"{module_name} (Version {__version__})")
    parser.add_argument("--version",
                        action="version",
                        version=version_string,
                        help="Display version information and dependencies.")
    parser.add_argument(
        "--verbose",
        "-v",
        "-d",
        "--debug",
        action="store_true",
        dest="verbose",
        default=False,
        help="Display extra debugging information and metrics.")
    parser.add_argument(
        "--rank",
        "-r",
        action="store_true",
        dest="rank",
        default=False,
        help=
        "Present websites ordered by their Alexa.com global rank in popularity."
    )
    parser.add_argument(
        "--folderoutput",
        "-fo",
        dest="folderoutput",
        help=
        "If using multiple usernames, the output of the results will be saved to this folder."
    )
    parser.add_argument(
        "--output",
        "-o",
        dest="output",
        help=
        "If using single username, the output of the result will be saved to this file."
    )
    parser.add_argument(
        "--tor",
        "-t",
        action="store_true",
        dest="tor",
        default=False,
        help=
        "Make requests over Tor; increases runtime; requires Tor to be installed and in system path."
    )
    parser.add_argument(
        "--unique-tor",
        "-u",
        action="store_true",
        dest="unique_tor",
        default=False,
        help=
        "Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path."
    )
    parser.add_argument("--csv",
                        action="store_true",
                        dest="csv",
                        default=False,
                        help="Create Comma-Separated Values (CSV) File.")
    parser.add_argument(
        "--site",
        action="append",
        metavar='SITE_NAME',
        dest="site_list",
        default=None,
        help=
        "Limit analysis to just the listed sites. Add multiple options to specify more than one site."
    )
    parser.add_argument(
        "--proxy",
        "-p",
        metavar='PROXY_URL',
        action="store",
        dest="proxy",
        default=None,
        help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080")
    parser.add_argument(
        "--json",
        "-j",
        metavar="JSON_FILE",
        dest="json_file",
        default=None,
        help="Load data from a JSON file or an online, valid, JSON file.")
    parser.add_argument(
        "--timeout",
        action="store",
        metavar='TIMEOUT',
        dest="timeout",
        type=timeout_check,
        default=None,
        help="Time (in seconds) to wait for response to requests. "
        "Default timeout of 60.0s."
        "A longer timeout will be more likely to get results from slow sites."
        "On the other hand, this may cause a long delay to gather all results."
    )
    parser.add_argument(
        "--print-found",
        action="store_true",
        dest="print_found_only",
        default=False,
        help="Do not output sites where the username was not found.")
    parser.add_argument(
        "--skip-errors",
        action="store_true",
        dest="skip_check_errors",
        default=False,
        help=
        "Do not print errors messages: connection, captcha, site country ban, etc."
    )
    parser.add_argument("--no-color",
                        action="store_true",
                        dest="no_color",
                        default=False,
                        help="Don't color terminal output")
    parser.add_argument("--browse",
                        "-b",
                        action="store_true",
                        dest="browse",
                        default=False,
                        help="Browse to all results on default bowser.")
    parser.add_argument(
        "--ids",
        "-i",
        action="store_true",
        dest="ids_search",
        default=False,
        help=
        "Make scan of pages for other usernames and recursive search by them.")
    parser.add_argument(
        "--parse",
        dest="parse_url",
        default='',
        help="Parse page by URL and extract username and IDs to use for search."
    )
    parser.add_argument(
        "username",
        nargs='+',
        metavar='USERNAMES',
        action="store",
        help="One or more usernames to check with social networks.")
    parser.add_argument("--tags",
                        dest="tags",
                        default='',
                        help="Specify tags of sites.")

    args = parser.parse_args()
    # Argument check

    # Usernames initial list
    usernames = {u: 'username' for u in args.username if u not in ('-')}

    # TODO regex check on args.proxy
    if args.tor and (args.proxy is not None):
        raise Exception("Tor and Proxy cannot be set at the same time.")

    # Make prompts
    if args.proxy is not None:
        print("Using the proxy: " + args.proxy)

    if args.tor or args.unique_tor:
        print("Using Tor to make requests")
        print(
            "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
        )

    # Check if both output methods are entered as input.
    if args.output is not None and args.folderoutput is not None:
        print("You can only use one of the output methods.")
        sys.exit(1)

    # Check validity for single username output.
    if args.output is not None and len(args.username) != 1:
        print("You can only use --output with a single username")
        sys.exit(1)

    if args.parse_url:
        page, _ = parse(
            args.parse_url,
            cookies_str=
            'collections_gid=213; cph=948; cpw=790; yandexuid=2146767031582893378; yuidss=2146767031582893378; gdpr=0; _ym_uid=1582893380492618461; mda=0; ymex=1898253380.yrts.1582893380#1900850969.yrtsi.1585490969; font_loaded=YSv1; yandex_gid=213; my=YwA=; _ym_uid=1582893380492618461; _ym_d=1593451737; L=XGJfaARJWEAARGILWAQKbXJUUU5NSEJHNAwrIxkaE11SHD4P.1593608730.14282.352228.74f1540484d115d5f534c370a0d54d14; yandex_login=danilovdelta; i=pQT2fDoFQAd1ZkIJW/qOXaKw+KI7LXUGoTQbUy5dPTdftfK7HFAnktwsf4MrRy4aQEk0sqxbZGY18+bnpKkrDgt29/8=; ys=udn.cDpkYW5pbG92ZGVsdGE%3D#wprid.1593608013100941-1715475084842016754100299-production-app-host-man-web-yp-306#ymrefl.DD2F275B69BCF594; zm=m-white_bender.webp.css-https%3As3home-static_KgOlxZDBNvw0efFr5riblj4yPtY%3Al; yp=1908968730.udn.cDpkYW5pbG92ZGVsdGE%3D#1595886694.ygu.1#1609637986.szm.2:1680x1050:1644x948#1596131262.csc.2#1908664615.sad.1593304615:1593304615:1#1908965951.multib.1; _ym_d=1593869990; yc=1594225567.zen.cach%3A1593969966; yabs-frequency=/5/0m0004s7_5u00000/8Y10RG00003uEo7ptt9m00000FWx8KRMFsq00000w3j-/; ys_fp=form-client%3DWeb%26form-page%3Dhttps%253A%252F%252Fyandex.ru%252Fchat%2523%252F%2540%252Fchats%252F1%25252F0%25252F964d3b91-5972-49c2-84d3-ed614622223f%2520%25D0%25AF%25D0%25BD%25D0%25B4%25D0%25B5%25D0%25BA%25D1%2581.%25D0%259C%25D0%25B5%25D1%2581%25D1%2581%25D0%25B5%25D0%25BD%25D0%25B4%25D0%25B6%25D0%25B5%25D1%2580%26form-referrer%3Dhttps%253A%252F%252Fyandex.ru%252Fchat%26form-browser%3DMozilla%252F5.0%2520(Macintosh%253B%2520Intel%2520Mac%2520OS%2520X%252010_15_5)%2520AppleWebKit%252F537.36%2520(KHTML%252C%2520like%2520Gecko)%2520Chrome%252F83.0.4103.116%2520Safari%252F537.36%26form-screen%3D1680%25C3%25971050%25C3%259730%26form-window%3D792%25C3%2597948%26form-app_version%3D2.8.0%26form-reqid%3D1593966167731077-1230441077775610555700303-production-app-host-sas-web-yp-249; skid=8069161091593972389; device_id="a9eb41b4cb3b056e5da4f9a4029a9e7cfea081196"; cycada=xPXy0sesbr5pVmRDiBiYZnAFhHtmn6zZ/YSDpCUU2Gs=; Session_id=3:1594143924.5.1.1593295629841:JeDkBQ:f.1|611645851.-1.0.1:114943352|33600788.310322.2.2:310322|219601.339772.5aiiRX9iIGUU6gzDuKnO4dqTM24; sessionid2=3:1594143924.5.1.1593295629841:JeDkBQ:f.1|611645851.-1.0.1:114943352|33600788.310322.2.2:310322|219601.678091.QGFa-AEA5z46AzNAmKFAL4_4jdM; _ym_isad=2; active-browser-timestamp=1594143926414; q-csrf-token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiIzMzYwMDc4OCIsImV4cCI6MTU5NDIzMDMzMX0.w4FkWaag4t1D7j42MD2ILP0oenqZiIjo4iOZnshCiwY; ar=1594145799547993-792214; _ym_visorc_10630330=w; spravka=dD0xNTk0MTQ1ODMwO2k9NS4yMjguMjI0LjM3O3U9MTU5NDE0NTgzMDI5MTk5NTkwMjtoPWMyZTI1Mjk4NmVmZjFhNGNjMGZhYmIwZWQ3ZDIyMmZk'
        )
        info = extract(page)
        text = 'Extracted ID data from webpage: ' + ', '.join(
            [f'{a}: {b}' for a, b in info.items()])
        print(text)
        for k, v in info.items():
            if 'username' in k:
                usernames[v] = 'username'
            if k in ('yandex_public_id', 'wikimapia_uid', 'gaia_id'):
                usernames[v] = k

    if args.tags:
        args.tags = set(args.tags.split(','))

    #Create object with all information about sites we are aware of.
    try:
        sites = SitesInformation(args.json_file)
    except Exception as error:
        print(f"ERROR:  {error}")
        sys.exit(1)

    #Create original dictionary from SitesInformation() object.
    #Eventually, the rest of the code will be updated to use the new object
    #directly, but this will glue the two pieces together.
    site_data_all = {}
    for site in sites:
        site_data_all[site.name] = site.information

    if args.site_list is None:
        # Not desired to look at a sub-set of sites
        site_data = site_data_all
    else:
        # User desires to selectively run queries on a sub-set of the site list.

        # Make sure that the sites are supported & build up pruned site database.
        site_data = {}
        site_missing = []
        for site in args.site_list:
            for existing_site in site_data_all:
                if site.lower() == existing_site.lower():
                    site_data[existing_site] = site_data_all[existing_site]
            if not site_data:
                # Build up list of sites not supported for future error message.
                site_missing.append(f"'{site}'")

        if site_missing:
            print(
                f"Error: Desired sites not found: {', '.join(site_missing)}.")
            sys.exit(1)

    if args.rank:
        # Sort data by rank
        site_dataCpy = dict(site_data)
        ranked_sites = sorted(
            site_data,
            key=lambda k:
            ("rank" not in k, site_data[k].get("rank", sys.maxsize)))
        site_data = {}
        for site in ranked_sites:
            site_data[site] = site_dataCpy.get(site)

    #Create notify object for query results.
    query_notify = QueryNotifyPrint(result=None,
                                    verbose=args.verbose,
                                    print_found_only=args.print_found_only,
                                    skip_check_errors=args.skip_check_errors,
                                    color=not args.no_color)

    already_checked = set()

    while usernames:
        username, id_type = list(usernames.items())[0]
        del usernames[username]

        if username.lower() in already_checked:
            continue
        else:
            already_checked.add(username.lower())

        results = sherlock(username,
                           site_data,
                           query_notify,
                           tor=args.tor,
                           unique_tor=args.unique_tor,
                           proxy=args.proxy,
                           timeout=args.timeout,
                           ids_search=args.ids_search,
                           id_type=id_type,
                           tags=args.tags)

        if args.output:
            result_file = args.output
        elif args.folderoutput:
            # The usernames results should be stored in a targeted folder.
            # If the folder doesn't exist, create it first
            os.makedirs(args.folderoutput, exist_ok=True)
            result_file = os.path.join(args.folderoutput, f"{username}.txt")
        else:
            result_file = f"{username}.txt"

        with open(result_file, "w", encoding="utf-8") as file:
            exists_counter = 0
            for website_name in results:
                dictionary = results[website_name]

                new_usernames = dictionary.get('ids_usernames')
                if new_usernames:
                    for u, utype in new_usernames.items():
                        usernames[u] = utype

                if dictionary.get("status").status == QueryStatus.CLAIMED:
                    exists_counter += 1
                    file.write(dictionary["url_user"] + "\n")
            file.write(
                f"Total Websites Username Detected On : {exists_counter}")

        if args.csv:
            with open(username + ".csv", "w", newline='',
                      encoding="utf-8") as csv_report:
                writer = csv.writer(csv_report)
                writer.writerow([
                    'username', 'name', 'url_main', 'url_user', 'exists',
                    'http_status', 'response_time_s'
                ])
                for site in results:
                    response_time_s = results[site]['status'].query_time
                    if response_time_s is None:
                        response_time_s = ""
                    writer.writerow([
                        username, site, results[site]['url_main'],
                        results[site]['url_user'],
                        str(results[site]['status'].status),
                        results[site]['http_status'], response_time_s
                    ])

예제 #17

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_vk_user_profile_full():
    info = extract(parse('https://vk.com/idsvyatoslavs')[0])

    assert info.get('uid') == '134173165'
    assert info.get('username') == 'idsvyatoslavs'
    assert info.get('name') in ('Святослав Степанов', 'Svyatoslav Stepanov')

예제 #18

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_yandex_disk():
    info = extract(parse('https://yadi.sk/d/KDk-D4vhGFbhb')[0])

    assert info.get('uid') == '106917461'
    assert info.get('name') == 'samografova.viktoria'

예제 #19

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_instagram():
    info = extract(parse('https://www.instagram.com/xenia_sobchak/')[0])

    assert info.get('uid') == '21965519'
    assert info.get('username') == 'xenia_sobchak'

예제 #20

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_myspace():
    info = extract(parse('https://myspace.com/katelynryry')[0])

    assert info.get('uid') == '8158005'
    assert info.get('username') == 'katelynryry'

예제 #21

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_blogger():
    info = extract(parse('https://b0ltay.blogspot.ru')[0])

    assert info.get('uid') == '10725121405978587846'
    assert info.get('blog_id') == '9057808199412143402'

예제 #22

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_vcru():
    info = extract(parse('https://vc.ru/u/6587-pavel-stolyarov')[0])

    assert info.get('uid') == '6587'
    assert info.get('username') == '6587-pavel-stolyarov'
    assert info.get('name') == 'Павел Столяров'

예제 #23

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_soundcloud():
    info = extract(parse('https://soundcloud.com/danielpatterson')[0])

    assert info.get('uid') == '78365'
    assert info.get('username') == 'danielpatterson'
    assert info.get('name') == 'Daniel Patterson'

예제 #24

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_yandex_disk_photos():
    info = extract(parse('https://yadi.sk/a/oiySK_wg3Vv5p4')[0])

    assert info.get('uid') == '38569641'
    assert info.get('username') == 'nikitina-nm'
    assert info.get('name') == 'Вербочка'

예제 #25

0

파일 보기

파일: maigret.py 프로젝트: sec-js/maigret

async def main():
    version_string = f"%(prog)s {__version__}\n" + \
                     f"{requests.__description__}:  {requests.__version__}\n" + \
                     f"Python:  {platform.python_version()}"

    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
                            description=f"Maigret v{__version__}"
                            )
    parser.add_argument("--version",
                        action="version", version=version_string,
                        help="Display version information and dependencies."
                        )
    parser.add_argument("--info",
                        action="store_true", dest="info", default=False,
                        help="Display service information."
                        )
    parser.add_argument("--verbose", "-v",
                        action="store_true", dest="verbose", default=False,
                        help="Display extra information and metrics."
                        )
    parser.add_argument("-d", "--debug",
                        action="store_true", dest="debug", default=False,
                        help="Saving debugging information and sites responses in debug.txt."
                        )
    parser.add_argument("--folderoutput", "-fo", dest="folderoutput", default="reports",
                        help="If using multiple usernames, the output of the results will be saved to this folder."
                        )
    parser.add_argument("--csv",
                        action="store_true", dest="csv", default=False,
                        help="Create Comma-Separated Values (CSV) File."
                        )
    parser.add_argument("--html",
                        action="store_true", dest="html", default=False,
                        help="Create HTML report file."
                        )
    parser.add_argument("--site",
                        action="append", metavar='SITE_NAME',
                        dest="site_list", default=[],
                        help="Limit analysis to just the listed sites (use several times to specify more than one)"
                        )
    parser.add_argument("--proxy", "-p", metavar='PROXY_URL',
                        action="store", dest="proxy", default=None,
                        help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080"
                        )
    parser.add_argument("--json", "-j", metavar="JSON_FILE",
                        dest="json_file", default=None,
                        help="Load data from a JSON file or an online, valid, JSON file.")
    parser.add_argument("--timeout",
                        action="store", metavar='TIMEOUT',
                        dest="timeout", type=timeout_check, default=10,
                        help="Time (in seconds) to wait for response to requests."
                             "Default timeout of 10.0s."
                             "A longer timeout will be more likely to get results from slow sites."
                             "On the other hand, this may cause a long delay to gather all results."
                        )
    parser.add_argument("--top-sites",
                        action="store", default=500, type=int,
                        help="Count of sites for checking ranked by Alexa Top (default: 500)."
                        )
    parser.add_argument("--print-not-found",
                        action="store_true", dest="print_not_found", default=False,
                        help="Print sites where the username was not found."
                        )
    parser.add_argument("--print-errors",
                        action="store_true", dest="print_check_errors", default=False,
                        help="Print errors messages: connection, captcha, site country ban, etc."
                        )
    parser.add_argument("--no-color",
                        action="store_true", dest="no_color", default=False,
                        help="Don't color terminal output"
                        )
    parser.add_argument("--browse", "-b",
                        action="store_true", dest="browse", default=False,
                        help="Browse to all results on default bowser."
                        )
    parser.add_argument("--no-recursion",
                        action="store_true", dest="disable_recursive_search", default=False,
                        help="Disable parsing pages for other usernames and recursive search by them."
                        )
    parser.add_argument("--self-check",
                        action="store_true", default=False,
                        help="Do self check for sites and database and disable non-working ones."
                        )
    parser.add_argument("--use-disabled-sites",
                        action="store_true", default=False,
                        help="Use disabled sites to search (may cause many false positives)."
                        )
    parser.add_argument("--parse",
                        dest="parse_url", default='',
                        help="Parse page by URL and extract username and IDs to use for search."
                        )
    parser.add_argument("username",
                        nargs='+', metavar='USERNAMES',
                        action="store",
                        help="One or more usernames to check with social networks."
                        )
    parser.add_argument("--tags",
                        dest="tags", default='',
                        help="Specify tags of sites."
                        )

    parser.add_argument("-x","--xmind",
                        action="store_true",
                        dest="xmind", default=False,
                        help="Generate an xmind 8 mindmap"
                        )

    parser.add_argument("-P", "--pdf",
                        action="store_true",
                        dest="pdf", default=False,
                        help="Generate a pdf report"
                        )

    args = parser.parse_args()

    # Logging
    log_level = logging.ERROR
    logging.basicConfig(
        format='[%(filename)s:%(lineno)d] %(levelname)-3s  %(asctime)s %(message)s',
        datefmt='%H:%M:%S',
        level=log_level
    )

    if args.debug:
        log_level = logging.DEBUG
    elif args.info:
        log_level = logging.INFO
    elif args.verbose:
        log_level = logging.WARNING

    logger = logging.getLogger('maigret')
    logger.setLevel(log_level)

    # Usernames initial list
    usernames = {
        u: 'username'
        for u in args.username
        if u not in ['-']
    }

    recursive_search_enabled = not args.disable_recursive_search

    # Make prompts
    if args.proxy is not None:
        print("Using the proxy: " + args.proxy)

    if args.parse_url:
        page, _ = parse(args.parse_url, cookies_str='')
        info = extract(page)
        text = 'Extracted ID data from webpage: ' + ', '.join([f'{a}: {b}' for a, b in info.items()])
        print(text)
        for k, v in info.items():
            if 'username' in k:
                usernames[v] = 'username'
            if k in supported_recursive_search_ids:
                usernames[v] = k

    if args.tags:
        args.tags = list(set(str(args.tags).split(',')))

    if args.json_file is None:
        args.json_file = \
            os.path.join(os.path.dirname(os.path.realpath(__file__)),
                         "resources/data.json"
                         )

    if args.top_sites == 0:
        args.top_sites = sys.maxsize

    # Create object with all information about sites we are aware of.
    try:
        db = MaigretDatabase().load_from_file(args.json_file)
        site_data = db.ranked_sites_dict(top=args.top_sites, tags=args.tags, names=args.site_list)
    except Exception as error:
        print(f"ERROR:  {error}")
        sys.exit(1)

    # Database self-checking
    if args.self_check:
        print('Maigret sites database self-checking...')
        await self_check(db, site_data, logger)
        if input('Do you want to save changes permanently? [yYnN]\n').lower() == 'y':
            db.save_to_file(args.json_file)
            print('Database was successfully updated.')
        else:
            print('Updates will be applied only for current search session.')

    # Database consistency
    enabled_count = len(list(filter(lambda x: not x.disabled, site_data.values())))
    print(f'Sites in database, enabled/total: {enabled_count}/{len(site_data)}')

    if not enabled_count:
        print('No sites to check, exiting!')
        sys.exit(2)

    if usernames == ['-']:
        # magic params to exit after init
        print('No usernames to check, exiting.')
        sys.exit(0)

    # Create notify object for query results.
    query_notify = QueryNotifyPrint(result=None,
                                    verbose=args.verbose,
                                    print_found_only=not args.print_not_found,
                                    skip_check_errors=not args.print_check_errors,
                                    color=not args.no_color)

    already_checked = set()

    general_results = []

    while usernames:
        username, id_type = list(usernames.items())[0]
        del usernames[username]

        if username.lower() in already_checked:
            continue
        else:
            already_checked.add(username.lower())

        # check for characters do not supported by sites generally
        found_unsupported_chars = set(unsupported_characters).intersection(set(username))

        if found_unsupported_chars:
            pretty_chars_str = ','.join(map(lambda s: f'"{s}"', found_unsupported_chars))
            print(f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"')
            continue

        results = await maigret(username,
                                dict(site_data),
                                query_notify,
                                proxy=args.proxy,
                                timeout=args.timeout,
                                recursive_search=recursive_search_enabled,
                                id_type=id_type,
                                tags=args.tags,
                                debug=args.verbose,
                                logger=logger,
                                forced=args.use_disabled_sites,
                                )
        general_results.append((username, id_type, results))

        if args.folderoutput:
            # The usernames results should be stored in a targeted folder.
            # If the folder doesn't exist, create it first
            os.makedirs(args.folderoutput, exist_ok=True)
            result_path = os.path.join(args.folderoutput, f"{username}.")
        else:
            result_path = os.path.join("reports", f"{username}.")

        if args.xmind:
            genxmindfile(result_path+"xmind", username, results)


        with open(result_path+"txt", "w", encoding="utf-8") as file:
            exists_counter = 0
            for website_name in results:
                dictionary = results[website_name]
                # TODO: fix no site data issue
                if not dictionary:
                    continue
                new_usernames = dictionary.get('ids_usernames')
                if new_usernames:
                    for u, utype in new_usernames.items():
                        usernames[u] = utype

                if dictionary.get("status").status == QueryStatus.CLAIMED:
                    exists_counter += 1
                    file.write(dictionary["url_user"] + "\n")
            file.write(f"Total Websites Username Detected On : {exists_counter}")
            file.close()

        if args.csv:
            save_csv_report(username, results, result_path+"csv")

        pathPDF = None
        pathHTML = None
        if args.html:
            pathHTML = result_path+"html"
        if args.pdf:
            pathPDF = result_path+"pdf"

        if pathPDF or pathHTML:
            save_html_pdf_report(general_results,pathHTML,pathPDF)

    db.save_to_file(args.json_file)

예제 #26

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_ok():
    info = extract(parse('https://ok.ru/profile/46054003')[0])

    assert info.get('uid') == '46054003'

예제 #27

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_habr():
    info = extract(parse('https://habr.com/ru/users/m1rko/')[0])

    assert info.get('uid') == '1371978'
    assert info.get('username') == 'm1rko'

예제 #28

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_twitter():
    info = extract(parse('https://twitter.com/esquireru')[0])

    assert info.get('uid') == '163060799'
    assert info.get('username') == 'Esquire Russia'
    assert info.get('name') == 'esquireru'

예제 #29

0

파일 보기

async def main():
    version_string = '\n'.join([
        f'%(prog)s {__version__}',
        f'Socid-extractor:  {socid_version}',
        f'Aiohttp:  {aiohttp.__version__}',
        f'Requests:  {requests.__version__}',
        f'Python:  {platform.python_version()}',
    ])

    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
                            description=f"Maigret v{__version__}")
    parser.add_argument("--version",
                        action="version",
                        version=version_string,
                        help="Display version information and dependencies.")
    parser.add_argument("--info",
                        "-vv",
                        action="store_true",
                        dest="info",
                        default=False,
                        help="Display service information.")
    parser.add_argument("--verbose",
                        "-v",
                        action="store_true",
                        dest="verbose",
                        default=False,
                        help="Display extra information and metrics.")
    parser.add_argument(
        "-d",
        "--debug",
        "-vvv",
        action="store_true",
        dest="debug",
        default=False,
        help="Saving debugging information and sites responses in debug.txt.")
    parser.add_argument(
        "--site",
        action="append",
        metavar='SITE_NAME',
        dest="site_list",
        default=[],
        help=
        "Limit analysis to just the listed sites (use several times to specify more than one)"
    )
    parser.add_argument(
        "--proxy",
        "-p",
        metavar='PROXY_URL',
        action="store",
        dest="proxy",
        default=None,
        help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080")
    parser.add_argument(
        "--db",
        metavar="DB_FILE",
        dest="db_file",
        default=None,
        help=
        "Load Maigret database from a JSON file or an online, valid, JSON file."
    )
    parser.add_argument("--cookies-jar-file",
                        metavar="COOKIE_FILE",
                        dest="cookie_file",
                        default=None,
                        help="File with cookies.")
    parser.add_argument(
        "--timeout",
        action="store",
        metavar='TIMEOUT',
        dest="timeout",
        type=timeout_check,
        default=10,
        help="Time (in seconds) to wait for response to requests."
        "Default timeout of 10.0s. "
        "A longer timeout will be more likely to get results from slow sites."
        "On the other hand, this may cause a long delay to gather all results."
    )
    parser.add_argument("-n",
                        "--max-connections",
                        action="store",
                        type=int,
                        dest="connections",
                        default=100,
                        help="Allowed number of concurrent connections.")
    parser.add_argument("-a",
                        "--all-sites",
                        action="store_true",
                        dest="all_sites",
                        default=False,
                        help="Use all sites for scan.")
    parser.add_argument(
        "--top-sites",
        action="store",
        default=500,
        type=int,
        help="Count of sites for scan ranked by Alexa Top (default: 500).")
    parser.add_argument("--print-not-found",
                        action="store_true",
                        dest="print_not_found",
                        default=False,
                        help="Print sites where the username was not found.")
    parser.add_argument(
        "--print-errors",
        action="store_true",
        dest="print_check_errors",
        default=False,
        help=
        "Print errors messages: connection, captcha, site country ban, etc.")
    parser.add_argument("--submit",
                        metavar='EXISTING_USER_URL',
                        type=str,
                        dest="new_site_to_submit",
                        default=False,
                        help="URL of existing profile in new site to submit.")
    parser.add_argument("--no-color",
                        action="store_true",
                        dest="no_color",
                        default=False,
                        help="Don't color terminal output")
    parser.add_argument("--browse",
                        "-b",
                        action="store_true",
                        dest="browse",
                        default=False,
                        help="Browse to all results on default bowser.")
    parser.add_argument(
        "--no-recursion",
        action="store_true",
        dest="disable_recursive_search",
        default=False,
        help="Disable recursive search by additional data extracted from pages."
    )
    parser.add_argument(
        "--no-extracting",
        action="store_true",
        dest="disable_extracting",
        default=False,
        help="Disable parsing pages for additional data and other usernames.")
    parser.add_argument(
        "--self-check",
        action="store_true",
        default=False,
        help=
        "Do self check for sites and database and disable non-working ones.")
    parser.add_argument("--stats",
                        action="store_true",
                        default=False,
                        help="Show database statistics.")
    parser.add_argument(
        "--use-disabled-sites",
        action="store_true",
        default=False,
        help="Use disabled sites to search (may cause many false positives).")
    parser.add_argument(
        "--parse",
        dest="parse_url",
        default='',
        help="Parse page by URL and extract username and IDs to use for search."
    )
    parser.add_argument("--id-type",
                        dest="id_type",
                        default='username',
                        help="Specify identifier(s) type (default: username).")
    parser.add_argument(
        "--ignore-ids",
        action="append",
        metavar='IGNORED_IDS',
        dest="ignore_ids_list",
        default=[],
        help="Do not make search by the specified username or other ids.")
    parser.add_argument(
        "username",
        nargs='+',
        metavar='USERNAMES',
        action="store",
        help="One or more usernames to check with social networks.")
    parser.add_argument("--tags",
                        dest="tags",
                        default='',
                        help="Specify tags of sites.")
    # reports options
    parser.add_argument(
        "--folderoutput",
        "-fo",
        dest="folderoutput",
        default="reports",
        help=
        "If using multiple usernames, the output of the results will be saved to this folder."
    )
    parser.add_argument("-T",
                        "--txt",
                        action="store_true",
                        dest="txt",
                        default=False,
                        help="Create a TXT report (one report per username).")
    parser.add_argument("-C",
                        "--csv",
                        action="store_true",
                        dest="csv",
                        default=False,
                        help="Create a CSV report (one report per username).")
    parser.add_argument(
        "-H",
        "--html",
        action="store_true",
        dest="html",
        default=False,
        help="Create an HTML report file (general report on all usernames).")
    parser.add_argument(
        "-X",
        "--xmind",
        action="store_true",
        dest="xmind",
        default=False,
        help="Generate an XMind 8 mindmap report (one report per username).")
    parser.add_argument(
        "-P",
        "--pdf",
        action="store_true",
        dest="pdf",
        default=False,
        help="Generate a PDF report (general report on all usernames).")
    parser.add_argument(
        "-J",
        "--json",
        action="store",
        metavar='REPORT_TYPE',
        dest="json",
        default='',
        type=check_supported_json_format,
        help=
        f"Generate a JSON report of specific type: {', '.join(SUPPORTED_JSON_REPORT_FORMATS)}"
        " (one report per username).")

    args = parser.parse_args()

    # Logging
    log_level = logging.ERROR
    logging.basicConfig(
        format=
        '[%(filename)s:%(lineno)d] %(levelname)-3s  %(asctime)s %(message)s',
        datefmt='%H:%M:%S',
        level=log_level)

    if args.debug:
        log_level = logging.DEBUG
    elif args.info:
        log_level = logging.INFO
    elif args.verbose:
        log_level = logging.WARNING

    logger = logging.getLogger('maigret')
    logger.setLevel(log_level)

    # Usernames initial list
    usernames = {
        u: args.id_type
        for u in args.username
        if u not in ['-'] and u not in args.ignore_ids_list
    }

    parsing_enabled = not args.disable_extracting
    recursive_search_enabled = not args.disable_recursive_search

    # Make prompts
    if args.proxy is not None:
        print("Using the proxy: " + args.proxy)

    if args.parse_url:
        page, _ = parse(args.parse_url, cookies_str='')
        info = extract(page)
        text = 'Extracted ID data from webpage: ' + ', '.join(
            [f'{a}: {b}' for a, b in info.items()])
        print(text)
        for k, v in info.items():
            if 'username' in k:
                usernames[v] = 'username'
            if k in supported_recursive_search_ids:
                usernames[v] = k

    if args.tags:
        args.tags = list(set(str(args.tags).split(',')))

    if args.db_file is None:
        args.db_file = \
            os.path.join(os.path.dirname(os.path.realpath(__file__)),
                         "resources/data.json"
                         )

    if args.top_sites == 0 or args.all_sites:
        args.top_sites = sys.maxsize

    # Create notify object for query results.
    query_notify = QueryNotifyPrint(
        result=None,
        verbose=args.verbose,
        print_found_only=not args.print_not_found,
        skip_check_errors=not args.print_check_errors,
        color=not args.no_color)

    # Create object with all information about sites we are aware of.
    db = MaigretDatabase().load_from_file(args.db_file)
    get_top_sites_for_id = lambda x: db.ranked_sites_dict(top=args.top_sites,
                                                          tags=args.tags,
                                                          names=args.site_list,
                                                          disabled=False,
                                                          id_type=x)

    site_data = get_top_sites_for_id(args.id_type)

    if args.new_site_to_submit:
        is_submitted = await submit_dialog(db, args.new_site_to_submit,
                                           args.cookie_file)
        if is_submitted:
            db.save_to_file(args.db_file)

    # Database self-checking
    if args.self_check:
        print('Maigret sites database self-checking...')
        is_need_update = await self_check(db,
                                          site_data,
                                          logger,
                                          max_connections=args.connections)
        if is_need_update:
            if input('Do you want to save changes permanently? [Yn]\n').lower(
            ) == 'y':
                db.save_to_file(args.db_file)
                print('Database was successfully updated.')
            else:
                print(
                    'Updates will be applied only for current search session.')
        print(db.get_scan_stats(site_data))

    if args.stats:
        print(db.get_db_stats(db.sites_dict))

    # Make reports folder is not exists
    os.makedirs(args.folderoutput, exist_ok=True)
    report_path = args.folderoutput

    # Define one report filename template
    report_filepath_tpl = os.path.join(args.folderoutput,
                                       'report_{username}{postfix}')

    # Database stats
    # TODO: verbose info about filtered sites
    # enabled_count = len(list(filter(lambda x: not x.disabled, site_data.values())))
    # print(f'Sites in database, enabled/total: {enabled_count}/{len(site_data)}')

    if usernames == {}:
        # magic params to exit after init
        query_notify.warning('No usernames to check, exiting.')
        sys.exit(0)

    if not site_data:
        query_notify.warning('No sites to check, exiting!')
        sys.exit(2)
    else:
        query_notify.warning(
            f'Starting a search on top {len(site_data)} sites from the Maigret database...'
        )
        if not args.all_sites:
            query_notify.warning(
                f'You can run search by full list of sites with flag `-a`',
                '!')

    already_checked = set()
    general_results = []

    while usernames:
        username, id_type = list(usernames.items())[0]
        del usernames[username]

        if username.lower() in already_checked:
            continue
        else:
            already_checked.add(username.lower())

        if username in args.ignore_ids_list:
            query_notify.warning(
                f'Skip a search by username {username} cause it\'s marked as ignored.'
            )
            continue

        # check for characters do not supported by sites generally
        found_unsupported_chars = set(unsupported_characters).intersection(
            set(username))

        if found_unsupported_chars:
            pretty_chars_str = ','.join(
                map(lambda s: f'"{s}"', found_unsupported_chars))
            query_notify.warning(
                f'Found unsupported URL characters: {pretty_chars_str}, skip search by username "{username}"'
            )
            continue

        sites_to_check = get_top_sites_for_id(id_type)

        results = await maigret(
            username,
            dict(sites_to_check),
            query_notify,
            proxy=args.proxy,
            timeout=args.timeout,
            is_parsing_enabled=parsing_enabled,
            id_type=id_type,
            debug=args.verbose,
            logger=logger,
            cookies=args.cookie_file,
            forced=args.use_disabled_sites,
            max_connections=args.connections,
        )

        general_results.append((username, id_type, results))

        # TODO: tests
        for website_name in results:
            dictionary = results[website_name]
            # TODO: fix no site data issue
            if not dictionary or not recursive_search_enabled:
                continue

            new_usernames = dictionary.get('ids_usernames')
            if new_usernames:
                for u, utype in new_usernames.items():
                    usernames[u] = utype

            for url in dictionary.get('ids_links', []):
                for s in db.sites:
                    u = s.detect_username(url)
                    if u:
                        usernames[u] = 'username'

        # reporting for a one username
        if args.xmind:
            filename = report_filepath_tpl.format(username=username,
                                                  postfix='.xmind')
            save_xmind_report(filename, username, results)
            query_notify.warning(
                f'XMind report for {username} saved in {filename}')

        if args.csv:
            filename = report_filepath_tpl.format(username=username,
                                                  postfix='.csv')
            save_csv_report(filename, username, results)
            query_notify.warning(
                f'CSV report for {username} saved in {filename}')

        if args.txt:
            filename = report_filepath_tpl.format(username=username,
                                                  postfix='.txt')
            save_txt_report(filename, username, results)
            query_notify.warning(
                f'TXT report for {username} saved in {filename}')

        if args.json:
            filename = report_filepath_tpl.format(username=username,
                                                  postfix=f'_{args.json}.json')
            save_json_report(filename,
                             username,
                             results,
                             report_type=args.json)
            query_notify.warning(
                f'JSON {args.json} report for {username} saved in {filename}')

    # reporting for all the result
    if general_results:
        if args.html or args.pdf:
            query_notify.warning('Generating report info...')
        report_context = generate_report_context(general_results)
        # determine main username
        username = report_context['username']

        if args.html:
            filename = report_filepath_tpl.format(username=username,
                                                  postfix='.html')
            save_html_report(filename, report_context)
            query_notify.warning(
                f'HTML report on all usernames saved in {filename}')

        if args.pdf:
            filename = report_filepath_tpl.format(username=username,
                                                  postfix='.pdf')
            save_pdf_report(filename, report_context)
            query_notify.warning(
                f'PDF report on all usernames saved in {filename}')
    # update database
    db.save_to_file(args.db_file)

예제 #30

0

파일 보기

파일: test_socid_extractor.py 프로젝트: pavelshpettt/socid_extractor

def test_d3():
    info = extract(parse('https://d3.ru/user/deer00hunter')[0])

    assert info.get('uid') == '75504'