コード例 #1
0
def download(query,
             limit=100,
             output_dir='dataset',
             adult_filter_off=True,
             force_replace=False,
             timeout=60,
             verbose=True):

    # engine = 'bing'
    if adult_filter_off:
        adult = 'off'
    else:
        adult = 'on'

    image_dir = Path(output_dir).joinpath(query).absolute()

    if force_replace:
        if Path.isdir(image_dir):
            shutil.rmtree(image_dir)

    # check directory and create if necessary
    try:
        if not Path.is_dir(image_dir):
            Path.mkdir(image_dir, parents=True)

    except Exception as e:
        print('[Error]Failed to create directory.', e)
        sys.exit(1)

    print("[%] Downloading Images to {}".format(str(image_dir.absolute())))
    bing = Bing(query, limit, image_dir, adult, timeout, verbose)
    bing.run()
コード例 #2
0
def download(query,
             limit=100,
             output_dir='dataset',
             image_dir='sample',
             adult_filter_off=True,
             force_replace=False,
             timeout=60):

    # engine = 'bing'
    if adult_filter_off:
        adult = 'off'
    else:
        adult = 'on'

    cwd = os.getcwd()
    #     image_dir = os.path.join(cwd, output_dir, query)

    if force_replace:
        if os.path.isdir(image_dir):
            shutil.rmtree(image_dir)

    # check directory and create if necessary
    try:
        if not os.path.isdir("{}/{}/".format(cwd, output_dir)):
            os.makedirs("{}/{}/".format(cwd, output_dir))
    except:
        pass
    if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)):
        os.makedirs("{}/{}/{}".format(cwd, output_dir, query))

    bing = Bing(query, limit, output_dir, adult, timeout)
    bing.run()
コード例 #3
0
def download(query, limit=100, adult_filter_off=True, force_replace=False):

    engine = 'bing'
    if adult_filter_off:
        adult = 'off'
    else:
        adult = 'on'

    cwd = os.getcwd()
    image_dir = os.path.join(cwd, 'dataset', engine, query)

    if force_replace:
        if os.path.isdir(image_dir):
            shutil.rmtree(image_dir)

    # check directory and create if necessary
    try:
        if not os.path.isdir("{}/dataset/".format(cwd)):
            os.makedirs("{}/dataset/".format(cwd))
    except:
        pass
    if not os.path.isdir("{}/dataset/{}/{}".format(cwd, engine, query)):
        os.makedirs("{}/dataset/{}/{}".format(cwd, engine, query))

    Bing().bing(query, limit, adult)
コード例 #4
0
ファイル: downloader.py プロジェクト: elgood/CoverGenerator
def download(query, limit=100, output_dir='dataset', adult_filter_off=True, timeout=60,
              dedup=True):

    # engine = 'bing'
    if adult_filter_off:
        adult = 'off'
    else:
        adult = 'on'

    bing = Bing(query, limit, output_dir, adult, timeout)
    bing.run()
コード例 #5
0
    def __init__(self,
                 model_file,
                 pretrained_file,
                 gpu=False,
                 mean=None,
                 input_scale=None,
                 raw_scale=None,
                 channel_swap=None,
                 context_pad=None,
                 weights_1st_stage_bing=None,
                 sizes_idx_bing=None,
                 weights_2nd_stage_bing=None,
                 num_bbs_psz_bing=130,
                 num_bbs_final_bing=1500):
        """
        Take
        gpu, mean, input_scale, raw_scale, channel_swap: params for
            preprocessing options.
        context_pad: amount of surrounding context to take s.t. a `context_pad`
            sized border of pixels in the network input image is context, as in
            R-CNN feature extraction.
        """
        caffe.Net.__init__(self, model_file, pretrained_file)
        self.set_phase_test()

        if gpu:
            self.set_mode_gpu()
        else:
            self.set_mode_cpu()

        if mean is not None:
            self.set_mean(self.inputs[0], mean)
        if input_scale is not None:
            self.set_input_scale(self.inputs[0], input_scale)
        if raw_scale is not None:
            self.set_raw_scale(self.inputs[0], raw_scale)
        if channel_swap is not None:
            self.set_channel_swap(self.inputs[0], channel_swap)

        self.configure_crop(context_pad)

        if bing_flag and not weights_1st_stage_bing is None and not sizes_idx_bing is None and not weights_2nd_stage_bing is None:
            self.bing = Bing(weights_1st_stage=weights_1st_stage_bing,
                             sizes_idx=sizes_idx_bing,
                             weights_2nd_stage=weights_2nd_stage_bing,
                             num_bbs_per_size_1st_stage=num_bbs_psz_bing,
                             num_bbs_final=num_bbs_final_bing)
        else:
            self.bing = None
コード例 #6
0
 def bing_search(self):
     key = my_keys.MICROSOFT_API_KEY
     bing = Bing(key)
     items = bing.web_search(self.query, 50, ['Title', 'Url', 'Description'])
     pages = []
     for item in items:
         if type(item) == str:
             continue
         page = WebPage(item['Url'])
         page.query = self.query
         #googleの書き方に統一
         page.title = item['Title']
         page.snippet = item['Description']
         pages.append(page)
     return pages
コード例 #7
0
def download(query,
             limit=100,
             output_dir='dataset',
             adult_filter_off=True,
             force_replace=False,
             timeout=60):
    # engine = 'bing'
    if adult_filter_off:
        adult = 'off'
    else:
        adult = 'on'
    try:
        li = pd.read_csv('links.csv')
        link = li['Links'].to_list()
        fname = li['Files'].to_list()
        queries = li['Queries'].to_list()
        # start = fname.split('.')[0] + 1
    except:
        link = []
        fname = []
        queries = []
        # start = '1'
    cwd = os.getcwd()
    image_dir = os.path.join(cwd, output_dir, query)

    if force_replace:
        if os.path.isdir(image_dir):
            shutil.rmtree(image_dir)

    # check directory and create if necessary
    try:
        if not os.path.isdir("{}/{}/".format(cwd, output_dir)):
            os.makedirs("{}/{}/".format(cwd, output_dir))
    except:
        pass
    if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)):
        os.makedirs("{}/{}/{}".format(cwd, output_dir, query))

    bing = Bing(query, limit, output_dir, adult, timeout, link, fname, queries)
    links, files, queries = bing.run()
    d = {'Files': files, 'Queries': queries, 'Links': links}
    lin = pd.DataFrame(d)
    if not os.path.exists("{}/{}".format(cwd, "links.csv")):
        lin.to_csv("{}/{}".format(cwd, "links.csv"))
    else:
        os.remove("{}/{}".format(cwd, "links.csv"))
        lin.to_csv("{}/{}".format(cwd, "links.csv"))
コード例 #8
0
def extract_features(extractor, img_idx):
    bing_params = bing_param_setting(bing_param_file)
    bing_detector = Bing(bing_params['w_1st'],
                         bing_params['sizes'],
                         bing_params['w_2nd'],
                         num_bbs_per_size_1st_stage=bing_params["num_win_psz"],
                         num_bbs_final=bing_params["num_bbs"])

    pca = joblib.load("data/learned_PCA.pkl")

    relations = {}
    formatted_proposals = []
    indexes = []

    list_f = open(img_list_file)
    img_lst = list_f.read().split()
    img_lst = img_lst[img_idx - 200:img_idx]

    for img_name in img_lst:
        img_name = img_name.strip()
        if img_name == "" or img_name[-3:] != "jpg":
            continue
        img = os.path.join(data_dir, img_name)

        # k : number of regions
        proposals, rels = get_proposals(extractor, bing_detector, img, k=30)

        for idx in range(len(proposals[1])):
            indexes.append((img_name, idx))
        formatted_proposals.append(proposals)
        relations[img_name] = reduce_rel(rels)

    features = extractor.extract_features(formatted_proposals, layer='fc6')
    features = post_process(features, pca)

    f = open("data/features/%d.pkl" % img_idx, "wb")
    pickle.dump(features, f)
    f.close()

    f = open("data/indexes/%d.pkl" % img_idx, "wb")
    pickle.dump(indexes, f)
    f.close()

    f = open("data/relations/%d.pkl" % img_idx, "wb")
    pickle.dump(relations, f)
    f.close()
コード例 #9
0
ファイル: __init__.py プロジェクト: flebel/geocoder
def bing(location, key='', proxies='', timeout=5.0):
    """
    Retrieves geocoding data from Bing's REST location API.

        >>> key = 'XXXXX'
        >>> g = geocoder.bing('Medina, Washington', key=key)
        >>> g.latlng
        (47.615821838378906, -122.23892211914062)
        >>> g.country
        'United States'
        ...

    Official Docs
    -------------
    http://msdn.microsoft.com/en-us/library/ff701714.aspx
    """
    provider = Bing(location, key=key)
    return Geocoder(provider, proxies=proxies, timeout=timeout)
コード例 #10
0
def extract_features(extractor):
    bing_params = bing_param_setting(bing_param_file)
    bing_detector = Bing(bing_params['w_1st'],
                         bing_params['sizes'],
                         bing_params['w_2nd'],
                         num_bbs_per_size_1st_stage=bing_params["num_win_psz"],
                         num_bbs_final=bing_params["num_bbs"])

    pca = joblib.load("data/learned_PCA.pkl")

    query_list = open(query_list_file, "r")

    query_dict = {}

    for q in query_list:
        query_name = q.strip()
        if q == "":
            continue
        print query_name

        query, crop = ox5k_get_query(gt_dir, query_name)
        img = os.path.join(data_dir, query)

        proposals, rels = get_proposals(extractor,
                                        bing_detector,
                                        img,
                                        k=30,
                                        crop=crop)

        formatted_proposals = [proposals]
        features = extractor.extract_features(formatted_proposals, layer='fc6')
        features = post_process(features, pca)

        query_dict[query_name] = {}
        query_dict[query_name]["feature"] = features
        query_dict[query_name]["relation"] = reduce_rel(rels)

    query_list.close()

    f = open("data/query.pkl", "wb")
    pickle.dump(query_dict, f)
    f.close()
コード例 #11
0
def download(query,
             limit=100,
             adult_filter_off=True,
             force_replace=False,
             output_dir=None,
             timeout=30,
             page_counter_limit=5):

    engine = 'bing'
    if adult_filter_off:
        adult = 'off'
    else:
        adult = 'on'

    if output_dir is None:
        output_dir = os.path.join(os.getcwd(), 'dataset')
    query_dir = os.path.join(output_dir, query)

    if force_replace:
        if os.path.isdir(query_dir):
            shutil.rmtree(query_dir)

    # check output directory and create if necessary
    try:
        if not os.path.isdir(output_dir):
            os.makedirs(output_dir)
    except:
        pass
    # check query directory and create if necessary
    print('Query dir: {}'.format(query_dir))
    if not os.path.isdir(query_dir):
        os.makedirs(query_dir)

    Bing().bing(query=query,
                limit=limit,
                adlt=adult,
                output_dir=query_dir,
                timeout=timeout,
                page_counter_limit=5)
コード例 #12
0
ファイル: downloader.py プロジェクト: mashuk999/botyoutube
def download(query,
             limit=100,
             output_dir='dataset',
             adult_filter_off=True,
             force_replace=False,
             timeout=60):
    try:

        # engine = 'bing'
        if adult_filter_off:
            adult = 'off'
        else:
            adult = 'on'

        cwd = os.getcwd()
        image_dir = os.path.join(cwd, output_dir, query)

        if force_replace:
            if os.path.isdir(image_dir):
                shutil.rmtree(image_dir)

        # check directory and create if necessary
        try:
            if not os.path.isdir("{}/{}/".format(cwd, output_dir)):
                os.makedirs("{}/{}/".format(cwd, output_dir))
        except:
            pass
        if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)):
            os.makedirs("{}/{}/{}".format(cwd, output_dir, query))

        bing = Bing(query, limit, output_dir, adult, timeout)
        bing.run()
    except Exception as e:
        print('downlaoder.py')
        print(e)

    if __name__ == '__main__':
        download('abitabh', limit=10, timeout='1')
コード例 #13
0
def collect_data():
    key = "TIwk7p7nC7HlKijRb5Z42IHx0S2+MKHqAS0BNIOdKqM"
    name_list = ['Hillary', 'bill']
    bing = Bing(key)
    save_dir = './raw_image/'

    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    for name in name_list:
        save_dir = './raw_image/' + name + '/'

        if not os.path.exists(save_dir):
            os.mkdir(save_dir)

        results = bing.web_search(name, 3, ["MediaUrl"])

        for num, result in enumerate(results):
            try:
                scrape_image(result['MediaUrl'], save_dir + str(num) + '.jpg')
            except Exception as e:
                print(e)
                continue
コード例 #14
0
def download(query,
             limit=100,
             output_dir='dataset',
             adult_filter_off=True,
             force_replace=False,
             timeout=60,
             no_directory=False):

    # engine = 'bing'
    if adult_filter_off:
        adult = 'off'
    else:
        adult = 'on'

    cwd = os.getcwd()

    image_dir = os.path.join(cwd, output_dir, query)

    if force_replace:
        if os.path.isdir(image_dir):
            shutil.rmtree(image_dir)

    # check output directory and create if necessary
    try:
        if not os.path.isdir("{}/{}/".format(cwd, output_dir)):
            os.makedirs("{}/{}/".format(cwd, output_dir))
    except:
        pass

    # create extra directories if they don't exist and if no_directory parameter is false
    if not no_directory:
        if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)):
            # print("making dirs")
            os.makedirs("{}/{}/{}".format(cwd, output_dir, query))

    bing = Bing(query, limit, output_dir, adult, timeout, no_directory)
    bing.run()
コード例 #15
0
def download(query,
             limit=100,
             output_dir='dataset',
             adult_filter_off=True,
             force_replace=False,
             timeout=60,
             visited_urls={},
             return_visited_url=False):

    # engine = 'bing'
    if adult_filter_off:
        adult = 'off'
    else:
        adult = 'on'

    cwd = os.getcwd()
    image_dir = os.path.join(cwd, output_dir, query)

    if force_replace:
        if os.path.isdir(image_dir):
            shutil.rmtree(image_dir)

    # check directory and create if necessary
    try:
        if not os.path.isdir("{}/{}/".format(cwd, output_dir)):
            os.makedirs("{}/{}/".format(cwd, output_dir))
    except:
        pass
    if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)):
        os.makedirs("{}/{}/{}".format(cwd, output_dir, query))

    bing = Bing(query, limit, output_dir, adult, timeout, visited_urls)
    bing.run()

    # added : retrun Dict of visited urls if user want
    if return_visited_url:
        return bing.visited_urls
コード例 #16
0
from flask import Flask, url_for, render_template, redirect, session, request
from app import app

from bing import Bing
import os

bing = Bing(None)
bing.toggle()


@app.route('/search', methods=["GET"])
def search():
    query = request.args.get("q")

    if not query:
        redirect('/index')

    bing.query = query
    images = bing.get_images()

    return render_template("search.html",
                           images=images,
                           cache_bust=os.path.getmtime("app/static/style.css"))


@app.route('/index', methods=["GET"])
@app.route('/', methods=["GET"])
def index():
    return render_template("search.html",
                           images=[],
                           cache_bust=os.path.getmtime("app/static/style.css"))
コード例 #17
0
from __future__ import print_function
import os, json, sys
from google import Google
from duckduckgo import Duckduckgo
from bing import Bing
from yahoo import Yahoo
scrapers = {
    'g': Google(),
    'b': Bing(),
    'y': Yahoo(),
    'd': Duckduckgo(),
}


def read_in():
    lines = sys.stdin.readlines()
    return json.loads(lines[0])


def small_test():
    assert type(scrapers.google.results_search('fossasia')) is list


def feedgen(query, engine):
    urls = scrapers[engine].results_search(query)
    result = urls
    print(result)
    print(len(result))
    return result
コード例 #18
0
 def __init__(self):
     self.searchType = {
         SEARCH_IMAGE: self.__searchImage,
         SEARCH_WEB: self.__searchWeb
     }
     self.bing = Bing()
コード例 #19
0
 def load_bing_model(self, model_file=DEFAULT_MODEL_FILE):
     logging.info("Load Bing Model ...")
     self.bing = Bing(2, 8, 2)
     self.bing.loadTrainModel(model_file)
コード例 #20
0
 def _search(self, page_num):
     key = my_keys.MICROSOFT_API_KEY_2
     bing = Bing(key)
     items = bing.web_search(self.query, page_num,
                             ['Title', 'Url', 'Description'])
     return items
コード例 #21
0
from bing import Bing
from dailymotion import Dailymotion
from duckduckgo import Duckduckgo
from exalead import Exalead
from google import Google
from mojeek import Mojeek
from parsijoo import Parsijoo
from quora import Quora
from yahoo import Yahoo
from yandex import Yandex
from youtube import Youtube

scrapers = {
    'ask': Ask(),
    'baidu': Baidu(),
    'bing': Bing(),
    'dailymotion': Dailymotion(),
    'duckduckgo': Duckduckgo(),
    'exalead': Exalead(),
    'google': Google(),
    'mojeek': Mojeek(),
    'parsijoo': Parsijoo(),
    'quora': Quora(),
    'yahoo': Yahoo(),
    'yandex': Yandex(),
    'youtube': Youtube()
}


def small_test():
    assert isinstance(scrapers['google'].search('fossasia'), list)
コード例 #22
0
from bs4 import BeautifulSoup as BS
import logging
import urllib, urllib2
import requests
import ssl
import chardet
import re
from geolcation import Geolocation
from YoutubeSearch import YtubeSearch
from bing import Bing

b = Bing()
print b.searchWeb('akb', {'location': 'JP'})
#jgeo = Geolocation()
#print geo.lookup('129.97.224.225')
'''
re.compile = r'/\[(.*?)\]/'
url1 = "http://en.wikipedia.org/wiki/Tom_cruise"
url2 = "http://en.wikipedia.org/wiki/Cat"
#result = requests.get(url2)

f = open('fake.html')
soup = BS(f)
cont = str(soup.p)
print type(cont)
print type(u'abc')
print type(u'abc'.encode('ascii'))
print type(u'abc'+'abc')

soup = BS(result.text)
ps = []
コード例 #23
0
 def __init__(self):
     super().__init__()
     self.b = Bing()
     self.appData = AppData()
     self.icon = None