Python Yandex Beispiele, yandex_search.Yandex Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: main.py Projekt: haonanc/general-scraper

def yandexSearch(api_user, api_key, top_k_results, prefixs, suffixs,
                 outputFileName):
    """
    a yandex search API. It requires a yandex account. The registeration is free and go to
    https://pypi.org/project/yandex-search/ for more details

    :param api_user: [str] your credentials username
    :param api_key: [str] your credentials api key
    :param top_k_results: [int] only keep top k results
    :param prefixs:[list] prefix for searches
    :param suffixs: [list] suffix for searches
    :param outputFileName: [str] output name
    :return: void
    """
    output = open(outputFileName + ".txt", 'w')
    yandex = yandex_search.Yandex(api_user=api_user, api_key=api_key)
    for prefix in prefixs:
        for suffix in suffixs:
            output.write("=====" + prefix + suffix + "=====" + "\n")
            try:
                results = yandex.search(prefix + suffix).items
                for i in range(top_k_results):
                    output.write(str(results[i]['url']) + "\n")
            except:
                print("quest failed")

Beispiel #2

0

Datei anzeigen

Datei: download_yandex.py Projekt: windj007/narrative-extraction

def main(args):
    os.makedirs(args.out_dir, exist_ok=True)

    logger = setup_logger()

    yandex = yandex_search.Yandex(api_user=args.user, api_key=args.key)
    with open(args.queries_file, 'r') as queries_f:
        for query_i, query in enumerate(queries_f):
            query = query.strip()
            if not query:
                continue

            logger.info(f'Query {query_i}: {query}')
            query_res_i = 0
            for page_i in range(args.get_pages):
                for found_item in yandex.search(query, page=page_i).items:
                    url = found_item['url']
                    logger.info(f'Found item {query_res_i}: {url}')

                    resp = requests.get(url)
                    with open(
                            os.path.join(
                                args.out_dir,
                                f'{query_i:03d}_{query_res_i:05d}.html'),
                            'w') as item_f:
                        item_f.write(resp.content)

                    query_res_i += 1

Beispiel #3

0

Datei anzeigen

def get_data(phishtank_key, force_update=False):
    if not os.path.isfile("phishtank.csv") or force_update:
        urllib.request.urlretrieve(
            "http://data.phishtank.com/data/{}/online-valid.csv".format(
                phishtank_key), "phishtank.csv", show_progress)
    if not os.path.isfile("common.csv") or force_update:
        data = {"url": []}
        with open("keywordList") as wordlist:
            keywords = wordlist.read().split("\n")
            wordlist.close()
        suggestions = []
        for word in keywords:
            URL = (
                "http://suggestqueries.google.com/complete/search?client=firefox&q="
                + word)
            headers = {'User-agent': 'Mozilla/5.0'}
            response = requests.get(URL, headers=headers)
            result = json.loads(response.content.decode('utf-8'))
            for r in result[1]:
                suggestions.append(r)
        yandex = yandex_search.Yandex(
            api_user='******',
            api_key='03.1041007756:28d93f7d79ff3c91b861da63e38a8e5c')
        for word in suggestions:
            top10 = (yandex.search(word).items[0:10])
            for site in top10:
                data["url"].append(site)
        common = pd.DataFrame(data)
        common.to_csv("common.csv")
    urls = (pd.read_csv("phishtank.csv"), pd.read_csv("common.csv"))
    return urls

Beispiel #4

0

Datei anzeigen

Datei: mail.py Projekt: theFr1nge/KulYutmaz

 def check_url(self, url):
     yandex = yandex_search.Yandex(
         api_user='******',
         api_key='03.1042294429:b8e679f9acadef49ebab0d9726ccef58')
     data = self.get_url_data(url, yandex, timeout=10)
     if self.aiPredict(data):
         self.add_domain_to_blacklist(url)
         self.spam_points += self.sensitivity

Beispiel #5

0

Datei anzeigen

def test_no_results():
    @all_requests
    def response_credential(url, request):
        xml = open('tests/noresults_error.xml', 'rb').read()
        return {'content': xml}

    with HTTMock(response_credential):
        yandex = yd.Yandex(api_user='******', api_key='fake')
        with pytest.raises(yd.NoResultsException):
            yandex.search(query='asdf')

Beispiel #6

0

Datei anzeigen

def test_xml_parse():
    @all_requests
    def response_success(url, request):
        xml = open('tests/success.xml', 'rb').read()
        return {'status_code': 200,
                'content': xml}

    with HTTMock(response_success):
        yandex = yd.Yandex(api_user='******', api_key='fake')
        results = yandex.search(query='asdf')
        assert results.found['strict'] == '7'
        assert len(results.items) == 7
        for item in results.items:
            assert 'url' in item
            assert 'title' in item
            assert 'snippet' in item
            assert 'domain' in item

Beispiel #7

0

Datei anzeigen

import yandex_search


# https://yandex.ru/search/xml?user=lvv-2003&key=03.195052229:15b4cdde7ff532f1a4b3c8db5703e842

yandex = yandex_search.Yandex(api_user='******', api_key='pt598t6x')
print(yandex.search("котики википедия").items)

Beispiel #8

0

Datei anzeigen

### ANSWERS DICT LIKE THIS
### ('Мыши', 'Пчёлы', 'Мухи') # 3 ELEMENTS!!!
import difflib
import yandex_search
import config

yandex = yandex_search.Yandex(api_user=config.API_USER, api_key=config.API_KEY)

replacement = {'x': 'х', 'o': 'о', 'у': 'у', 'e': 'е', 'a': 'а'}


class Compare():
    def __init__(self, question: str, answers: list):
        self.question = question.lower()
        self.answers = answers

        if not self.question or \
           not self.answers:
            raise Exception("Question or answers is not defined")

        if len(answers) < 3 or len(answers) > 3:
            raise Exception("Answers list is not correct")

        for (k, v) in replacement.items():
            self.question.replace(k, v)

            for x in answers:
                x.replace(k, v)

    def check(self):
        r = self.search()

Beispiel #9

0

Datei anzeigen

import yandex_search
import json
api = json.load(open("../api.txt"))
user = api["api_user"]
key = api["api_key"]
yandex = yandex_search.Yandex(api_user=user, api_key=key)


def yandex_query(key_word):
    text = key_word
    results = yandex.search(text)
    list_results = results.items
    list_len = len(list_results)
    for i in range(list_len - 1):
        c = list_results[i]
        print(c['title'], ' - ', c['url'], '\n')

Beispiel #10

0

Datei anzeigen

Datei: script.py Projekt: TimNekk/mc-mod-updater

import re
import os
import yandex_search
import pickle
from urllib import error
from bs4 import BeautifulSoup as BS
import cfscrape
import requests
import shutil
import data

# noinspection PyBroadException

mods_exception = ['VoxelMap']
yandex = yandex_search.Yandex(
    api_user='******',
    api_key='03.907013875:1908728c0c5f64a885f21721a1f1f4ee')


def unzip(file_path):
    try:
        with zipfile.ZipFile(file_path, 'r') as zip_ref:
            zip_ref.extract('mcmod.info')
    except KeyError:
        return False
    return True


def get_mod_info(file_name):
    if unzip(os.path.join(data.user_mc_path, file_name)):
        with open('mcmod.info', 'rb') as file:

Beispiel #11

0

Datei anzeigen

import additional
import requests
from telegram.ext import Updater, MessageHandler, Filters, CallbackQueryHandler
from apixu.client import ApixuClient
import logging
from azure.cognitiveservices.search.imagesearch import ImageSearchAPI
from msrest.authentication import CognitiveServicesCredentials
from newsapi import NewsApiClient

logging.basicConfig(filename='main.log',
                    format='%(asctime)s %(levelname)s %(name)s %(message)s',
                    level=logging.DEBUG)

newsapi = NewsApiClient(api_key=keys.news_api)
app = apiai.ApiAI(keys.apiai)
yandex = yandex_search.Yandex(api_key=keys.yandex_key,
                              api_user=keys.yandex_user)
client = ApixuClient(keys.apixu)
image_search = ImageSearchAPI(
    credentials=CognitiveServicesCredentials(keys.visual_search_key))

session_storage = {}
err = " Если у вас постоянно возникает ошибка с поиском, поиском по изображению или новостями," \
      " то рекомендую вам перезапустить меня командой /start ."


def get_toponym_delta(toponym):
    toponym_bounded_lower = tuple(
        toponym["boundedBy"]["Envelope"]["lowerCorner"].split(" "))
    toponym_bounded_upper = tuple(
        toponym["boundedBy"]["Envelope"]["upperCorner"].split(" "))
    return str(abs(float(toponym_bounded_lower[0]) -

Beispiel #12

0

Datei anzeigen

def extract_data(raw_data, force_update=False):
    reps = 0
    phishing, benign = raw_data[0], raw_data[1]
    data = {
        "phishing": [],
        "length": [],
        "out_resources": [],
        "dir_num": [],
        "special_char_num": [],
        "robots_entries": [],
        "tld_trust": [],
        "index_num": [],
        "subdomain_len": [],
        "subdomain_num": [],
        "url": []
    }
    if not os.path.isfile("dataset.csv") or force_update:
        largest_dataset = 0
        while os.path.isfile(largest_dataset + 300):
            largest_dataset += 300
        try:
            # filter old sites
            old = []
            for index, row in phishing.iterrows():
                date = datetime.strptime(row["submission_time"],
                                         "%Y-%m-%dT%H:%M:%S+00:00")
                if date.year < 2020:
                    old.append(index)
            phishing = phishing.drop(old)
            yandex = yandex_search.Yandex(
                api_user='******',
                api_key='03.1041007756:28d93f7d79ff3c91b861da63e38a8e5c')
            for index, row in phishing.iterrows():
                reps += 1
                if reps < largest_dataset:
                    continue
                if reps % 300 == 0:
                    pd.DataFrame(data).to_csv("dataset{}.csv".format(reps))
                url = row['url']
                print("[INFO]: {} : {}".format(reps, url))
                url_data = get_url_data(url, yandex)
                data["phishing"].append(1)
                data["length"].append(url_data["length"])
                data["dir_num"].append(url_data["dir_num"])
                data["special_char_num"].append(url_data["special_char_num"])
                data["tld_trust"].append(url_data["tld_trust"])
                data["index_num"].append(url_data["index_num"])
                data["subdomain_len"].append(url_data["subdomain_len"])
                data["subdomain_num"].append(url_data["subdomain_num"])
                data["out_resources"].append(url_data["out_resources"])
                data["robots_entries"].append(url_data["robots_entries"])
                data["url"].append(url_data["url"])
            for index, row in benign.iterrows():
                reps += 1
                if reps < largest_dataset:
                    continue
                if reps % 300 == 0:
                    pd.DataFrame(data).to_csv("dataset{}.csv".format(reps))
                url = row['url']
                print("[INFO]: {} : {}".format(reps, url))
                url_data = get_url_data(url, yandex)
                data["phishing"].append(1)
                data["length"].append(url_data["length"])
                data["dir_num"].append(url_data["dir_num"])
                data["special_char_num"].append(url_data["special_char_num"])
                data["tld_trust"].append(url_data["tld_trust"])
                data["index_num"].append(url_data["index_num"])
                data["subdomain_len"].append(url_data["subdomain_len"])
                data["subdomain_num"].append(url_data["subdomain_num"])
                data["out_resources"].append(url_data["out_resources"])
                data["robots_entries"].append(url_data["robots_entries"])
                data["url"].append(url_data["url"])
            pd.DataFrame(data).to_csv("dataset.csv".format(reps))
        except Exception as e:
            print("[ERROR]: {}".format(e))
    return pd.read_csv("dataset.csv")

Beispiel #13

0

Datei anzeigen

Datei: yandex-site.py Projekt: whoisgvb/osint_garoa_aphack

import yandex_search
import sys

if len(sys.argv) >= 2:
    #print(sys.argv[0])
    site = sys.argv[1]
    yandex = yandex_search.Yandex(api_user='******', api_key='mykey')
    print(yandex.search('site:'+site).items)
else:
    print("algo deu errado")

Beispiel #14

0

Datei anzeigen

Datei: search.py Projekt: haonanc/GDPR-data-collection

# Parameters
FILE_NAME = "sites"  # name of input file
NUM_OF_RESULTS = 5  # number of results kept for each search
OUTPUT_FILE_NAME = "output"  # name of output file
KEYWORD = " Privacy Policy"  # keyword to search; format = company_name + keyword
START = 8000  # start with # of company
END = 8500  # terminate when reaches # of company
API_KEY = ""

count = 0
file = open(FILE_NAME + ".txt", 'r')
ts = time.time()
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
output = open(OUTPUT_FILE_NAME + st + ".txt", 'w')
yandex = yandex_search.Yandex(api_user='******', api_key=API_KEY)

for line in file:

    if count == END:
        break
    count += 1
    if count < START:
        continue
    if line.split()[1] == "Hidden":
        continue
    output.write("=====" + str(count) + " " + line.split()[1] + "=====" + "\n")
    try:
        results = yandex.search("'" + line.split()[1] + KEYWORD + "'").items
        print("Request#" + str(count) + " succeeded:" + line.split()[1])
    except: