def download(query, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60, verbose=True): # engine = 'bing' if adult_filter_off: adult = 'off' else: adult = 'on' image_dir = Path(output_dir).joinpath(query).absolute() if force_replace: if Path.isdir(image_dir): shutil.rmtree(image_dir) # check directory and create if necessary try: if not Path.is_dir(image_dir): Path.mkdir(image_dir, parents=True) except Exception as e: print('[Error]Failed to create directory.', e) sys.exit(1) print("[%] Downloading Images to {}".format(str(image_dir.absolute()))) bing = Bing(query, limit, image_dir, adult, timeout, verbose) bing.run()
def download(query, limit=100, output_dir='dataset', image_dir='sample', adult_filter_off=True, force_replace=False, timeout=60): # engine = 'bing' if adult_filter_off: adult = 'off' else: adult = 'on' cwd = os.getcwd() # image_dir = os.path.join(cwd, output_dir, query) if force_replace: if os.path.isdir(image_dir): shutil.rmtree(image_dir) # check directory and create if necessary try: if not os.path.isdir("{}/{}/".format(cwd, output_dir)): os.makedirs("{}/{}/".format(cwd, output_dir)) except: pass if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)): os.makedirs("{}/{}/{}".format(cwd, output_dir, query)) bing = Bing(query, limit, output_dir, adult, timeout) bing.run()
def download(query, limit=100, adult_filter_off=True, force_replace=False): engine = 'bing' if adult_filter_off: adult = 'off' else: adult = 'on' cwd = os.getcwd() image_dir = os.path.join(cwd, 'dataset', engine, query) if force_replace: if os.path.isdir(image_dir): shutil.rmtree(image_dir) # check directory and create if necessary try: if not os.path.isdir("{}/dataset/".format(cwd)): os.makedirs("{}/dataset/".format(cwd)) except: pass if not os.path.isdir("{}/dataset/{}/{}".format(cwd, engine, query)): os.makedirs("{}/dataset/{}/{}".format(cwd, engine, query)) Bing().bing(query, limit, adult)
def download(query, limit=100, output_dir='dataset', adult_filter_off=True, timeout=60, dedup=True): # engine = 'bing' if adult_filter_off: adult = 'off' else: adult = 'on' bing = Bing(query, limit, output_dir, adult, timeout) bing.run()
def __init__(self, model_file, pretrained_file, gpu=False, mean=None, input_scale=None, raw_scale=None, channel_swap=None, context_pad=None, weights_1st_stage_bing=None, sizes_idx_bing=None, weights_2nd_stage_bing=None, num_bbs_psz_bing=130, num_bbs_final_bing=1500): """ Take gpu, mean, input_scale, raw_scale, channel_swap: params for preprocessing options. context_pad: amount of surrounding context to take s.t. a `context_pad` sized border of pixels in the network input image is context, as in R-CNN feature extraction. """ caffe.Net.__init__(self, model_file, pretrained_file) self.set_phase_test() if gpu: self.set_mode_gpu() else: self.set_mode_cpu() if mean is not None: self.set_mean(self.inputs[0], mean) if input_scale is not None: self.set_input_scale(self.inputs[0], input_scale) if raw_scale is not None: self.set_raw_scale(self.inputs[0], raw_scale) if channel_swap is not None: self.set_channel_swap(self.inputs[0], channel_swap) self.configure_crop(context_pad) if bing_flag and not weights_1st_stage_bing is None and not sizes_idx_bing is None and not weights_2nd_stage_bing is None: self.bing = Bing(weights_1st_stage=weights_1st_stage_bing, sizes_idx=sizes_idx_bing, weights_2nd_stage=weights_2nd_stage_bing, num_bbs_per_size_1st_stage=num_bbs_psz_bing, num_bbs_final=num_bbs_final_bing) else: self.bing = None
def bing_search(self): key = my_keys.MICROSOFT_API_KEY bing = Bing(key) items = bing.web_search(self.query, 50, ['Title', 'Url', 'Description']) pages = [] for item in items: if type(item) == str: continue page = WebPage(item['Url']) page.query = self.query #googleの書き方に統一 page.title = item['Title'] page.snippet = item['Description'] pages.append(page) return pages
def download(query, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60): # engine = 'bing' if adult_filter_off: adult = 'off' else: adult = 'on' try: li = pd.read_csv('links.csv') link = li['Links'].to_list() fname = li['Files'].to_list() queries = li['Queries'].to_list() # start = fname.split('.')[0] + 1 except: link = [] fname = [] queries = [] # start = '1' cwd = os.getcwd() image_dir = os.path.join(cwd, output_dir, query) if force_replace: if os.path.isdir(image_dir): shutil.rmtree(image_dir) # check directory and create if necessary try: if not os.path.isdir("{}/{}/".format(cwd, output_dir)): os.makedirs("{}/{}/".format(cwd, output_dir)) except: pass if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)): os.makedirs("{}/{}/{}".format(cwd, output_dir, query)) bing = Bing(query, limit, output_dir, adult, timeout, link, fname, queries) links, files, queries = bing.run() d = {'Files': files, 'Queries': queries, 'Links': links} lin = pd.DataFrame(d) if not os.path.exists("{}/{}".format(cwd, "links.csv")): lin.to_csv("{}/{}".format(cwd, "links.csv")) else: os.remove("{}/{}".format(cwd, "links.csv")) lin.to_csv("{}/{}".format(cwd, "links.csv"))
def extract_features(extractor, img_idx): bing_params = bing_param_setting(bing_param_file) bing_detector = Bing(bing_params['w_1st'], bing_params['sizes'], bing_params['w_2nd'], num_bbs_per_size_1st_stage=bing_params["num_win_psz"], num_bbs_final=bing_params["num_bbs"]) pca = joblib.load("data/learned_PCA.pkl") relations = {} formatted_proposals = [] indexes = [] list_f = open(img_list_file) img_lst = list_f.read().split() img_lst = img_lst[img_idx - 200:img_idx] for img_name in img_lst: img_name = img_name.strip() if img_name == "" or img_name[-3:] != "jpg": continue img = os.path.join(data_dir, img_name) # k : number of regions proposals, rels = get_proposals(extractor, bing_detector, img, k=30) for idx in range(len(proposals[1])): indexes.append((img_name, idx)) formatted_proposals.append(proposals) relations[img_name] = reduce_rel(rels) features = extractor.extract_features(formatted_proposals, layer='fc6') features = post_process(features, pca) f = open("data/features/%d.pkl" % img_idx, "wb") pickle.dump(features, f) f.close() f = open("data/indexes/%d.pkl" % img_idx, "wb") pickle.dump(indexes, f) f.close() f = open("data/relations/%d.pkl" % img_idx, "wb") pickle.dump(relations, f) f.close()
def bing(location, key='', proxies='', timeout=5.0): """ Retrieves geocoding data from Bing's REST location API. >>> key = 'XXXXX' >>> g = geocoder.bing('Medina, Washington', key=key) >>> g.latlng (47.615821838378906, -122.23892211914062) >>> g.country 'United States' ... Official Docs ------------- http://msdn.microsoft.com/en-us/library/ff701714.aspx """ provider = Bing(location, key=key) return Geocoder(provider, proxies=proxies, timeout=timeout)
def extract_features(extractor): bing_params = bing_param_setting(bing_param_file) bing_detector = Bing(bing_params['w_1st'], bing_params['sizes'], bing_params['w_2nd'], num_bbs_per_size_1st_stage=bing_params["num_win_psz"], num_bbs_final=bing_params["num_bbs"]) pca = joblib.load("data/learned_PCA.pkl") query_list = open(query_list_file, "r") query_dict = {} for q in query_list: query_name = q.strip() if q == "": continue print query_name query, crop = ox5k_get_query(gt_dir, query_name) img = os.path.join(data_dir, query) proposals, rels = get_proposals(extractor, bing_detector, img, k=30, crop=crop) formatted_proposals = [proposals] features = extractor.extract_features(formatted_proposals, layer='fc6') features = post_process(features, pca) query_dict[query_name] = {} query_dict[query_name]["feature"] = features query_dict[query_name]["relation"] = reduce_rel(rels) query_list.close() f = open("data/query.pkl", "wb") pickle.dump(query_dict, f) f.close()
def download(query, limit=100, adult_filter_off=True, force_replace=False, output_dir=None, timeout=30, page_counter_limit=5): engine = 'bing' if adult_filter_off: adult = 'off' else: adult = 'on' if output_dir is None: output_dir = os.path.join(os.getcwd(), 'dataset') query_dir = os.path.join(output_dir, query) if force_replace: if os.path.isdir(query_dir): shutil.rmtree(query_dir) # check output directory and create if necessary try: if not os.path.isdir(output_dir): os.makedirs(output_dir) except: pass # check query directory and create if necessary print('Query dir: {}'.format(query_dir)) if not os.path.isdir(query_dir): os.makedirs(query_dir) Bing().bing(query=query, limit=limit, adlt=adult, output_dir=query_dir, timeout=timeout, page_counter_limit=5)
def download(query, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60): try: # engine = 'bing' if adult_filter_off: adult = 'off' else: adult = 'on' cwd = os.getcwd() image_dir = os.path.join(cwd, output_dir, query) if force_replace: if os.path.isdir(image_dir): shutil.rmtree(image_dir) # check directory and create if necessary try: if not os.path.isdir("{}/{}/".format(cwd, output_dir)): os.makedirs("{}/{}/".format(cwd, output_dir)) except: pass if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)): os.makedirs("{}/{}/{}".format(cwd, output_dir, query)) bing = Bing(query, limit, output_dir, adult, timeout) bing.run() except Exception as e: print('downlaoder.py') print(e) if __name__ == '__main__': download('abitabh', limit=10, timeout='1')
def collect_data(): key = "TIwk7p7nC7HlKijRb5Z42IHx0S2+MKHqAS0BNIOdKqM" name_list = ['Hillary', 'bill'] bing = Bing(key) save_dir = './raw_image/' if not os.path.exists(save_dir): os.mkdir(save_dir) for name in name_list: save_dir = './raw_image/' + name + '/' if not os.path.exists(save_dir): os.mkdir(save_dir) results = bing.web_search(name, 3, ["MediaUrl"]) for num, result in enumerate(results): try: scrape_image(result['MediaUrl'], save_dir + str(num) + '.jpg') except Exception as e: print(e) continue
def download(query, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60, no_directory=False): # engine = 'bing' if adult_filter_off: adult = 'off' else: adult = 'on' cwd = os.getcwd() image_dir = os.path.join(cwd, output_dir, query) if force_replace: if os.path.isdir(image_dir): shutil.rmtree(image_dir) # check output directory and create if necessary try: if not os.path.isdir("{}/{}/".format(cwd, output_dir)): os.makedirs("{}/{}/".format(cwd, output_dir)) except: pass # create extra directories if they don't exist and if no_directory parameter is false if not no_directory: if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)): # print("making dirs") os.makedirs("{}/{}/{}".format(cwd, output_dir, query)) bing = Bing(query, limit, output_dir, adult, timeout, no_directory) bing.run()
def download(query, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60, visited_urls={}, return_visited_url=False): # engine = 'bing' if adult_filter_off: adult = 'off' else: adult = 'on' cwd = os.getcwd() image_dir = os.path.join(cwd, output_dir, query) if force_replace: if os.path.isdir(image_dir): shutil.rmtree(image_dir) # check directory and create if necessary try: if not os.path.isdir("{}/{}/".format(cwd, output_dir)): os.makedirs("{}/{}/".format(cwd, output_dir)) except: pass if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)): os.makedirs("{}/{}/{}".format(cwd, output_dir, query)) bing = Bing(query, limit, output_dir, adult, timeout, visited_urls) bing.run() # added : retrun Dict of visited urls if user want if return_visited_url: return bing.visited_urls
from flask import Flask, url_for, render_template, redirect, session, request from app import app from bing import Bing import os bing = Bing(None) bing.toggle() @app.route('/search', methods=["GET"]) def search(): query = request.args.get("q") if not query: redirect('/index') bing.query = query images = bing.get_images() return render_template("search.html", images=images, cache_bust=os.path.getmtime("app/static/style.css")) @app.route('/index', methods=["GET"]) @app.route('/', methods=["GET"]) def index(): return render_template("search.html", images=[], cache_bust=os.path.getmtime("app/static/style.css"))
from __future__ import print_function import os, json, sys from google import Google from duckduckgo import Duckduckgo from bing import Bing from yahoo import Yahoo scrapers = { 'g': Google(), 'b': Bing(), 'y': Yahoo(), 'd': Duckduckgo(), } def read_in(): lines = sys.stdin.readlines() return json.loads(lines[0]) def small_test(): assert type(scrapers.google.results_search('fossasia')) is list def feedgen(query, engine): urls = scrapers[engine].results_search(query) result = urls print(result) print(len(result)) return result
def __init__(self): self.searchType = { SEARCH_IMAGE: self.__searchImage, SEARCH_WEB: self.__searchWeb } self.bing = Bing()
def load_bing_model(self, model_file=DEFAULT_MODEL_FILE): logging.info("Load Bing Model ...") self.bing = Bing(2, 8, 2) self.bing.loadTrainModel(model_file)
def _search(self, page_num): key = my_keys.MICROSOFT_API_KEY_2 bing = Bing(key) items = bing.web_search(self.query, page_num, ['Title', 'Url', 'Description']) return items
from bing import Bing from dailymotion import Dailymotion from duckduckgo import Duckduckgo from exalead import Exalead from google import Google from mojeek import Mojeek from parsijoo import Parsijoo from quora import Quora from yahoo import Yahoo from yandex import Yandex from youtube import Youtube scrapers = { 'ask': Ask(), 'baidu': Baidu(), 'bing': Bing(), 'dailymotion': Dailymotion(), 'duckduckgo': Duckduckgo(), 'exalead': Exalead(), 'google': Google(), 'mojeek': Mojeek(), 'parsijoo': Parsijoo(), 'quora': Quora(), 'yahoo': Yahoo(), 'yandex': Yandex(), 'youtube': Youtube() } def small_test(): assert isinstance(scrapers['google'].search('fossasia'), list)
from bs4 import BeautifulSoup as BS import logging import urllib, urllib2 import requests import ssl import chardet import re from geolcation import Geolocation from YoutubeSearch import YtubeSearch from bing import Bing b = Bing() print b.searchWeb('akb', {'location': 'JP'}) #jgeo = Geolocation() #print geo.lookup('129.97.224.225') ''' re.compile = r'/\[(.*?)\]/' url1 = "http://en.wikipedia.org/wiki/Tom_cruise" url2 = "http://en.wikipedia.org/wiki/Cat" #result = requests.get(url2) f = open('fake.html') soup = BS(f) cont = str(soup.p) print type(cont) print type(u'abc') print type(u'abc'.encode('ascii')) print type(u'abc'+'abc') soup = BS(result.text) ps = []
def __init__(self): super().__init__() self.b = Bing() self.appData = AppData() self.icon = None