def download(query,
             limit=100,
             output_dir='dataset',
             adult_filter_off=True,
             force_replace=False,
             timeout=60,
             verbose=True):

    # engine = 'bing'
    if adult_filter_off:
        adult = 'off'
    else:
        adult = 'on'

    image_dir = Path(output_dir).joinpath(query).absolute()

    if force_replace:
        if Path.isdir(image_dir):
            shutil.rmtree(image_dir)

    # check directory and create if necessary
    try:
        if not Path.is_dir(image_dir):
            Path.mkdir(image_dir, parents=True)

    except Exception as e:
        print('[Error]Failed to create directory.', e)
        sys.exit(1)

    print("[%] Downloading Images to {}".format(str(image_dir.absolute())))
    bing = Bing(query, limit, image_dir, adult, timeout, verbose)
    bing.run()
Ejemplo n.º 2
0
def create_files_from_internet(target, query, file_types=[Bing.TXT_FILE_TYPE], num_created=1, verbose=False):
    """
    Download files based on a query, to the target directory.

    :param target: target directory (created in it does not exist)
    :param query: query (for the Bing search engine)
    :param file_types: list of file types to search for
    :param num_created: number of files to download
    :param verbose: print to console messages for each file downloaded
    :return: None
    """
    count = int(num_created)
    if not num_created or count <= 0:
        return
    # create target directory if it does not exist
    if not os.path.exists(target):
        os.makedirs(target)

    if verbose:
        print 'created files'

    bing = Bing(KEY)
    files_per_type = count / len(file_types)
    remainder = count - files_per_type * len(file_types)
    file_tuples = []
    for ft in file_types:
        num = files_per_type + 1 if remainder > 0 else files_per_type
        file_tuples.append((ft, num))
        remainder -= 1

    display = print_file_details if verbose else None
    total = bing.execute(target, query, file_tuples, display=display)
    if verbose:
        print 'created %d files' % total
def download(query,
             limit=100,
             output_dir='dataset',
             image_dir='sample',
             adult_filter_off=True,
             force_replace=False,
             timeout=60):

    # engine = 'bing'
    if adult_filter_off:
        adult = 'off'
    else:
        adult = 'on'

    cwd = os.getcwd()
    #     image_dir = os.path.join(cwd, output_dir, query)

    if force_replace:
        if os.path.isdir(image_dir):
            shutil.rmtree(image_dir)

    # check directory and create if necessary
    try:
        if not os.path.isdir("{}/{}/".format(cwd, output_dir)):
            os.makedirs("{}/{}/".format(cwd, output_dir))
    except:
        pass
    if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)):
        os.makedirs("{}/{}/{}".format(cwd, output_dir, query))

    bing = Bing(query, limit, output_dir, adult, timeout)
    bing.run()
Ejemplo n.º 4
0
def download(query, limit=100, output_dir='dataset', adult_filter_off=True, timeout=60,
              dedup=True):

    # engine = 'bing'
    if adult_filter_off:
        adult = 'off'
    else:
        adult = 'on'

    bing = Bing(query, limit, output_dir, adult, timeout)
    bing.run()
Ejemplo n.º 5
0
class SearchEngine:
    def __init__(self):
        self.searchType = {
            SEARCH_IMAGE: self.__searchImage,
            SEARCH_WEB: self.__searchWeb
        }
        self.bing = Bing()

    def __searchImage(self, query, options=None):
        imageFilter = ''

        if options and options['searchEntity']:
            options['filter'] = IMAGE_FILTER[options['searchEntity']]

        result = self.bing.searchImage(query, options)

        return {'url': result[u'd'][u'results']}

    def __searchWeb(self, query, options=None):
        result = self.bing.searchWeb(query, options)
        websites, allURL = [], defaultdict(list)

        for entry in result[u'd'][u'results']:
            entryURL = entry[u'Url']
            urlObj = tldextract.extract(entryURL)
            siteName = urlObj.domain

            if not siteName:
                continue  # ignoring anything that fails parser
            else:
                allURL[siteName.lower()].append(entryURL)
                websites.append(entryURL)

        return {'ranking': allURL, 'url': websites}

    def search(self, query, options=None):
        '''returns a list of URLs from search engine result'''
        result = {SEARCH_IMAGE: None, SEARCH_WEB: None}

        query += " " + options['searchEntity']

        if options and 'type' in options:
            for searchType in options['type']:
                result[searchType] = self.searchType[searchType](query,
                                                                 options)
        else:
            for searchType in result:
                result[searchType] = self.searchType[searchType](query,
                                                                 options)
        return result
Ejemplo n.º 6
0
    def __init__(self,
                 model_file,
                 pretrained_file,
                 gpu=False,
                 mean=None,
                 input_scale=None,
                 raw_scale=None,
                 channel_swap=None,
                 context_pad=None,
                 weights_1st_stage_bing=None,
                 sizes_idx_bing=None,
                 weights_2nd_stage_bing=None,
                 num_bbs_psz_bing=130,
                 num_bbs_final_bing=1500):
        """
        Take
        gpu, mean, input_scale, raw_scale, channel_swap: params for
            preprocessing options.
        context_pad: amount of surrounding context to take s.t. a `context_pad`
            sized border of pixels in the network input image is context, as in
            R-CNN feature extraction.
        """
        caffe.Net.__init__(self, model_file, pretrained_file)
        self.set_phase_test()

        if gpu:
            self.set_mode_gpu()
        else:
            self.set_mode_cpu()

        if mean is not None:
            self.set_mean(self.inputs[0], mean)
        if input_scale is not None:
            self.set_input_scale(self.inputs[0], input_scale)
        if raw_scale is not None:
            self.set_raw_scale(self.inputs[0], raw_scale)
        if channel_swap is not None:
            self.set_channel_swap(self.inputs[0], channel_swap)

        self.configure_crop(context_pad)

        if bing_flag and not weights_1st_stage_bing is None and not sizes_idx_bing is None and not weights_2nd_stage_bing is None:
            self.bing = Bing(weights_1st_stage=weights_1st_stage_bing,
                             sizes_idx=sizes_idx_bing,
                             weights_2nd_stage=weights_2nd_stage_bing,
                             num_bbs_per_size_1st_stage=num_bbs_psz_bing,
                             num_bbs_final=num_bbs_final_bing)
        else:
            self.bing = None
Ejemplo n.º 7
0
 def bing_search(self):
     key = my_keys.MICROSOFT_API_KEY
     bing = Bing(key)
     items = bing.web_search(self.query, 50, ['Title', 'Url', 'Description'])
     pages = []
     for item in items:
         if type(item) == str:
             continue
         page = WebPage(item['Url'])
         page.query = self.query
         #googleの書き方に統一
         page.title = item['Title']
         page.snippet = item['Description']
         pages.append(page)
     return pages
Ejemplo n.º 8
0
def download(query, limit=100, adult_filter_off=True, force_replace=False):

    engine = 'bing'
    if adult_filter_off:
        adult = 'off'
    else:
        adult = 'on'

    cwd = os.getcwd()
    image_dir = os.path.join(cwd, 'dataset', engine, query)

    if force_replace:
        if os.path.isdir(image_dir):
            shutil.rmtree(image_dir)

    # check directory and create if necessary
    try:
        if not os.path.isdir("{}/dataset/".format(cwd)):
            os.makedirs("{}/dataset/".format(cwd))
    except:
        pass
    if not os.path.isdir("{}/dataset/{}/{}".format(cwd, engine, query)):
        os.makedirs("{}/dataset/{}/{}".format(cwd, engine, query))

    Bing().bing(query, limit, adult)
Ejemplo n.º 9
0
class SearchEngine:
    def __init__(self):
        self.searchType = {SEARCH_IMAGE:self.__searchImage,
                            SEARCH_WEB:self.__searchWeb}
        self.bing = Bing()

    def __searchImage(self, query, options=None):
        imageFilter = ''

        if options and options['searchEntity']: 
            options['filter'] = IMAGE_FILTER[options['searchEntity']]

        result = self.bing.searchImage(query, options)

        return {'url':result[u'd'][u'results']}

    def __searchWeb(self, query, options=None):
        result = self.bing.searchWeb(query, options)
        websites, allURL = [], defaultdict(list)

        for entry in result[u'd'][u'results']:
            entryURL = entry[u'Url']
            urlObj = tldextract.extract(entryURL)
            siteName = urlObj.domain
            
            if not siteName:
                continue # ignoring anything that fails parser
            else:
                allURL[siteName.lower()].append(entryURL) 
                websites.append(entryURL)
        
        return {'ranking':allURL, 'url':websites}

    def search(self, query, options=None):
        '''returns a list of URLs from search engine result'''
        result = {SEARCH_IMAGE:None,
                  SEARCH_WEB:None}

        query += " "+options['searchEntity']

        if options and 'type' in options:
            for searchType in options['type']:
                result[searchType] = self.searchType[searchType](query, options)     
        else:
            for searchType in result:
                result[searchType] = self.searchType[searchType](query, options) 
        return result
def download(query,
             limit=100,
             output_dir='dataset',
             adult_filter_off=True,
             force_replace=False,
             timeout=60):
    # engine = 'bing'
    if adult_filter_off:
        adult = 'off'
    else:
        adult = 'on'
    try:
        li = pd.read_csv('links.csv')
        link = li['Links'].to_list()
        fname = li['Files'].to_list()
        queries = li['Queries'].to_list()
        # start = fname.split('.')[0] + 1
    except:
        link = []
        fname = []
        queries = []
        # start = '1'
    cwd = os.getcwd()
    image_dir = os.path.join(cwd, output_dir, query)

    if force_replace:
        if os.path.isdir(image_dir):
            shutil.rmtree(image_dir)

    # check directory and create if necessary
    try:
        if not os.path.isdir("{}/{}/".format(cwd, output_dir)):
            os.makedirs("{}/{}/".format(cwd, output_dir))
    except:
        pass
    if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)):
        os.makedirs("{}/{}/{}".format(cwd, output_dir, query))

    bing = Bing(query, limit, output_dir, adult, timeout, link, fname, queries)
    links, files, queries = bing.run()
    d = {'Files': files, 'Queries': queries, 'Links': links}
    lin = pd.DataFrame(d)
    if not os.path.exists("{}/{}".format(cwd, "links.csv")):
        lin.to_csv("{}/{}".format(cwd, "links.csv"))
    else:
        os.remove("{}/{}".format(cwd, "links.csv"))
        lin.to_csv("{}/{}".format(cwd, "links.csv"))
Ejemplo n.º 11
0
def download(query,
             limit=100,
             output_dir='dataset',
             adult_filter_off=True,
             force_replace=False,
             timeout=60):
    try:

        # engine = 'bing'
        if adult_filter_off:
            adult = 'off'
        else:
            adult = 'on'

        cwd = os.getcwd()
        image_dir = os.path.join(cwd, output_dir, query)

        if force_replace:
            if os.path.isdir(image_dir):
                shutil.rmtree(image_dir)

        # check directory and create if necessary
        try:
            if not os.path.isdir("{}/{}/".format(cwd, output_dir)):
                os.makedirs("{}/{}/".format(cwd, output_dir))
        except:
            pass
        if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)):
            os.makedirs("{}/{}/{}".format(cwd, output_dir, query))

        bing = Bing(query, limit, output_dir, adult, timeout)
        bing.run()
    except Exception as e:
        print('downlaoder.py')
        print(e)

    if __name__ == '__main__':
        download('abitabh', limit=10, timeout='1')
Ejemplo n.º 12
0
class Main:
    def __init__(self):
        super().__init__()
        self.b = Bing()
        self.appData = AppData()
        self.icon = None

    def run(self):
        self.createIcon()
        self.icon.run()

    def createIcon(self):
        image = self.b.getNextImage()
        self.appData.saveImage(image)
        wall = WindowsWallpaper()
        wall.setWallpaper(self.appData.getImagePath())
        image = Image.open("icon.png")
        menu = Menu(MenuItem('Next Image', self.nextI),
                    MenuItem('Prev Image', self.prevI),
                    MenuItem('Quit', self.quitI))
        self.icon = pystray.Icon("pyBing v0.1", image, "pyBing v0.1", menu)

    def nextI(self):
        image = self.b.getNextImage()
        self.appData.saveImage(image)
        wall = WindowsWallpaper()
        wall.setWallpaper(self.appData.getImagePath())

    def prevI(self):
        image = self.b.getPreviousImage()
        self.appData.saveImage(image)
        wall = WindowsWallpaper()
        wall.setWallpaper(self.appData.getImagePath())

    def quitI(self):
        print('Exit')
        self.icon.stop()
        pass
Ejemplo n.º 13
0
def collect_data():
    key = "TIwk7p7nC7HlKijRb5Z42IHx0S2+MKHqAS0BNIOdKqM"
    name_list = ['Hillary', 'bill']
    bing = Bing(key)
    save_dir = './raw_image/'

    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    for name in name_list:
        save_dir = './raw_image/' + name + '/'

        if not os.path.exists(save_dir):
            os.mkdir(save_dir)

        results = bing.web_search(name, 3, ["MediaUrl"])

        for num, result in enumerate(results):
            try:
                scrape_image(result['MediaUrl'], save_dir + str(num) + '.jpg')
            except Exception as e:
                print(e)
                continue
Ejemplo n.º 14
0
def download(query,
             limit=100,
             output_dir='dataset',
             adult_filter_off=True,
             force_replace=False,
             timeout=60,
             no_directory=False):

    # engine = 'bing'
    if adult_filter_off:
        adult = 'off'
    else:
        adult = 'on'

    cwd = os.getcwd()

    image_dir = os.path.join(cwd, output_dir, query)

    if force_replace:
        if os.path.isdir(image_dir):
            shutil.rmtree(image_dir)

    # check output directory and create if necessary
    try:
        if not os.path.isdir("{}/{}/".format(cwd, output_dir)):
            os.makedirs("{}/{}/".format(cwd, output_dir))
    except:
        pass

    # create extra directories if they don't exist and if no_directory parameter is false
    if not no_directory:
        if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)):
            # print("making dirs")
            os.makedirs("{}/{}/{}".format(cwd, output_dir, query))

    bing = Bing(query, limit, output_dir, adult, timeout, no_directory)
    bing.run()
Ejemplo n.º 15
0
def download(query,
             limit=100,
             output_dir='dataset',
             adult_filter_off=True,
             force_replace=False,
             timeout=60,
             visited_urls={},
             return_visited_url=False):

    # engine = 'bing'
    if adult_filter_off:
        adult = 'off'
    else:
        adult = 'on'

    cwd = os.getcwd()
    image_dir = os.path.join(cwd, output_dir, query)

    if force_replace:
        if os.path.isdir(image_dir):
            shutil.rmtree(image_dir)

    # check directory and create if necessary
    try:
        if not os.path.isdir("{}/{}/".format(cwd, output_dir)):
            os.makedirs("{}/{}/".format(cwd, output_dir))
    except:
        pass
    if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)):
        os.makedirs("{}/{}/{}".format(cwd, output_dir, query))

    bing = Bing(query, limit, output_dir, adult, timeout, visited_urls)
    bing.run()

    # added : retrun Dict of visited urls if user want
    if return_visited_url:
        return bing.visited_urls
Ejemplo n.º 16
0
def extract_features(extractor, img_idx):
    bing_params = bing_param_setting(bing_param_file)
    bing_detector = Bing(bing_params['w_1st'],
                         bing_params['sizes'],
                         bing_params['w_2nd'],
                         num_bbs_per_size_1st_stage=bing_params["num_win_psz"],
                         num_bbs_final=bing_params["num_bbs"])

    pca = joblib.load("data/learned_PCA.pkl")

    relations = {}
    formatted_proposals = []
    indexes = []

    list_f = open(img_list_file)
    img_lst = list_f.read().split()
    img_lst = img_lst[img_idx - 200:img_idx]

    for img_name in img_lst:
        img_name = img_name.strip()
        if img_name == "" or img_name[-3:] != "jpg":
            continue
        img = os.path.join(data_dir, img_name)

        # k : number of regions
        proposals, rels = get_proposals(extractor, bing_detector, img, k=30)

        for idx in range(len(proposals[1])):
            indexes.append((img_name, idx))
        formatted_proposals.append(proposals)
        relations[img_name] = reduce_rel(rels)

    features = extractor.extract_features(formatted_proposals, layer='fc6')
    features = post_process(features, pca)

    f = open("data/features/%d.pkl" % img_idx, "wb")
    pickle.dump(features, f)
    f.close()

    f = open("data/indexes/%d.pkl" % img_idx, "wb")
    pickle.dump(indexes, f)
    f.close()

    f = open("data/relations/%d.pkl" % img_idx, "wb")
    pickle.dump(relations, f)
    f.close()
Ejemplo n.º 17
0
def bing(location, key='', proxies='', timeout=5.0):
    """
    Retrieves geocoding data from Bing's REST location API.

        >>> key = 'XXXXX'
        >>> g = geocoder.bing('Medina, Washington', key=key)
        >>> g.latlng
        (47.615821838378906, -122.23892211914062)
        >>> g.country
        'United States'
        ...

    Official Docs
    -------------
    http://msdn.microsoft.com/en-us/library/ff701714.aspx
    """
    provider = Bing(location, key=key)
    return Geocoder(provider, proxies=proxies, timeout=timeout)
Ejemplo n.º 18
0
def extract_features(extractor):
    bing_params = bing_param_setting(bing_param_file)
    bing_detector = Bing(bing_params['w_1st'],
                         bing_params['sizes'],
                         bing_params['w_2nd'],
                         num_bbs_per_size_1st_stage=bing_params["num_win_psz"],
                         num_bbs_final=bing_params["num_bbs"])

    pca = joblib.load("data/learned_PCA.pkl")

    query_list = open(query_list_file, "r")

    query_dict = {}

    for q in query_list:
        query_name = q.strip()
        if q == "":
            continue
        print query_name

        query, crop = ox5k_get_query(gt_dir, query_name)
        img = os.path.join(data_dir, query)

        proposals, rels = get_proposals(extractor,
                                        bing_detector,
                                        img,
                                        k=30,
                                        crop=crop)

        formatted_proposals = [proposals]
        features = extractor.extract_features(formatted_proposals, layer='fc6')
        features = post_process(features, pca)

        query_dict[query_name] = {}
        query_dict[query_name]["feature"] = features
        query_dict[query_name]["relation"] = reduce_rel(rels)

    query_list.close()

    f = open("data/query.pkl", "wb")
    pickle.dump(query_dict, f)
    f.close()
Ejemplo n.º 19
0
class EvaluateRecall(object):
    
    def __init__(self, w_1st, sizes_idx, w_2nd, num_bbs_per_size_1st_stage= 130, num_bbs_final = 1500):
    
        self.w_1st = w_1st
        self.sizes_idx = sizes_idx
        self.w_2nd = w_2nd      
        self.bing = Bing(w_1st,sizes_idx,w_2nd, num_bbs_per_size_1st_stage = num_bbs_per_size_1st_stage, num_bbs_final = num_bbs_final)  
    
    def evaluate_test_set(self, test_annotations):
        
        tot_num_gt_bbs = 0
        
        print "Getting ground truth and predicted bounding boxes from testing images."
        images_bbs_dict = dict()
        for key in test_annotations.keys():
            ann_dict = test_annotations[key]
            fn = os.path.join(ann_dict["path"],ann_dict["basename"])
            img = cv2.imread(fn)
            if img is None:
                warnings.warn("The image %s does not exist in the filesystem."%fn)
            #calculating features for each ground truth bounding box
            bbs = ann_dict["bbs"]
            predicted_bbs, _ = self.bing.predict(img)
            tot_num_gt_bbs = tot_num_gt_bbs + len(bbs)        
            images_bbs_dict[ann_dict["basename"]] = (bbs, predicted_bbs)
        
        print "Calculate the recall of predicted bounding boxes that overlap at least the 50% with ground truth bounding boxes."
        overlaps_array = np.zeros(tot_num_gt_bbs)
        gt_bbs_idx = 0
        for img_bn in images_bbs_dict.keys():
            gt_bbs, predicted_bbs = images_bbs_dict[img_bn]
            for i, gt_bb in enumerate(gt_bbs):
                overlaps_array[gt_bbs_idx+i] = bounding_box_overlap(predicted_bbs, bb_query = gt_bb["bb"])
            gt_bbs_idx = gt_bbs_idx + len(gt_bbs)      
        detected = (overlaps_array>0.5).astype(float)
        
        recall = np.sum(detected)/len(detected)
        
        return recall
Ejemplo n.º 20
0
def download(query,
             limit=100,
             adult_filter_off=True,
             force_replace=False,
             output_dir=None,
             timeout=30,
             page_counter_limit=5):

    engine = 'bing'
    if adult_filter_off:
        adult = 'off'
    else:
        adult = 'on'

    if output_dir is None:
        output_dir = os.path.join(os.getcwd(), 'dataset')
    query_dir = os.path.join(output_dir, query)

    if force_replace:
        if os.path.isdir(query_dir):
            shutil.rmtree(query_dir)

    # check output directory and create if necessary
    try:
        if not os.path.isdir(output_dir):
            os.makedirs(output_dir)
    except:
        pass
    # check query directory and create if necessary
    print('Query dir: {}'.format(query_dir))
    if not os.path.isdir(query_dir):
        os.makedirs(query_dir)

    Bing().bing(query=query,
                limit=limit,
                adlt=adult,
                output_dir=query_dir,
                timeout=timeout,
                page_counter_limit=5)
Ejemplo n.º 21
0
    def __init__(self, model_file, pretrained_file, gpu=False, mean=None,
                 input_scale=None, raw_scale=None, channel_swap=None,
                 context_pad=None, weights_1st_stage_bing = None, sizes_idx_bing = None,
                 weights_2nd_stage_bing = None, 
                 num_bbs_psz_bing = 130, num_bbs_final_bing = 1500):
        """
        Take
        gpu, mean, input_scale, raw_scale, channel_swap: params for
            preprocessing options.
        context_pad: amount of surrounding context to take s.t. a `context_pad`
            sized border of pixels in the network input image is context, as in
            R-CNN feature extraction.
        """
        caffe.Net.__init__(self, model_file, pretrained_file)
        self.set_phase_test()

        if gpu:
            self.set_mode_gpu()
        else:
            self.set_mode_cpu()

        if mean is not None:
            self.set_mean(self.inputs[0], mean)
        if input_scale is not None:
            self.set_input_scale(self.inputs[0], input_scale)
        if raw_scale is not None:
            self.set_raw_scale(self.inputs[0], raw_scale)
        if channel_swap is not None:
            self.set_channel_swap(self.inputs[0], channel_swap)

        self.configure_crop(context_pad)
        
        if bing_flag and not weights_1st_stage_bing is None and not sizes_idx_bing is None and not weights_2nd_stage_bing is None:
            self.bing = Bing(weights_1st_stage = weights_1st_stage_bing, sizes_idx = sizes_idx_bing ,weights_2nd_stage = weights_2nd_stage_bing, num_bbs_per_size_1st_stage= num_bbs_psz_bing, num_bbs_final = num_bbs_final_bing)
        else:
            self.bing = None
Ejemplo n.º 22
0
 def test_bing_scrape_search_result(self):
     with open('ipbing') as fp:
         bing_search_result = Bing.scrape_search_result(BeautifulSoup(fp))
         self.assertEqual(BING_SEARCH_RESULT, bing_search_result)
Ejemplo n.º 23
0
 def __init__(self):
     self.searchType = {
         SEARCH_IMAGE: self.__searchImage,
         SEARCH_WEB: self.__searchWeb
     }
     self.bing = Bing()
Ejemplo n.º 24
0
 def load_bing_model(self, model_file=DEFAULT_MODEL_FILE):
     logging.info("Load Bing Model ...")
     self.bing = Bing(2, 8, 2)
     self.bing.loadTrainModel(model_file)
Ejemplo n.º 25
0
 def _search(self, page_num):
     key = my_keys.MICROSOFT_API_KEY_2
     bing = Bing(key)
     items = bing.web_search(self.query, page_num, ['Title', 'Url', 'Description'])
     return items
Ejemplo n.º 26
0
import sys
sys.path.append('lib')
from bing import Bing

appID = 'Hko5cXg5U8h/WIE46pYQjmo/MLXNNkXYr+VXx/a66Ig'

bing = Bing(appID)

print bing.search(query='mootools',sources='web')

Ejemplo n.º 27
0
class Detector(caffe.Net):
    """
    Detector extends Net for windowed detection by a list of crops or
    selective search proposals.
    """
    def __init__(self,
                 model_file,
                 pretrained_file,
                 gpu=False,
                 mean=None,
                 input_scale=None,
                 raw_scale=None,
                 channel_swap=None,
                 context_pad=None,
                 weights_1st_stage_bing=None,
                 sizes_idx_bing=None,
                 weights_2nd_stage_bing=None,
                 num_bbs_psz_bing=130,
                 num_bbs_final_bing=1500):
        """
        Take
        gpu, mean, input_scale, raw_scale, channel_swap: params for
            preprocessing options.
        context_pad: amount of surrounding context to take s.t. a `context_pad`
            sized border of pixels in the network input image is context, as in
            R-CNN feature extraction.
        """
        caffe.Net.__init__(self, model_file, pretrained_file)
        self.set_phase_test()

        if gpu:
            self.set_mode_gpu()
        else:
            self.set_mode_cpu()

        if mean is not None:
            self.set_mean(self.inputs[0], mean)
        if input_scale is not None:
            self.set_input_scale(self.inputs[0], input_scale)
        if raw_scale is not None:
            self.set_raw_scale(self.inputs[0], raw_scale)
        if channel_swap is not None:
            self.set_channel_swap(self.inputs[0], channel_swap)

        self.configure_crop(context_pad)

        if bing_flag and not weights_1st_stage_bing is None and not sizes_idx_bing is None and not weights_2nd_stage_bing is None:
            self.bing = Bing(weights_1st_stage=weights_1st_stage_bing,
                             sizes_idx=sizes_idx_bing,
                             weights_2nd_stage=weights_2nd_stage_bing,
                             num_bbs_per_size_1st_stage=num_bbs_psz_bing,
                             num_bbs_final=num_bbs_final_bing)
        else:
            self.bing = None

    def detect_bing(self, image):

        assert not self.bing is None

        if not bing_flag:
            print "Bing detection invoked but error while importing bing module!"
            sys.exit(1)

        t0 = time.time()
        bbs, scores = self.bing.predict(image)
        t1 = time.time()
        print "Bing prediction: {0:.2f}s.".format(t1 - t0)
        images_windows = self.detect_windows(image, bbs)

        return self.get_predictions_from_cropped_images(images_windows)

    def detect_windows(self, image, bbs):
        """
        Do windowed detection over given images and windows. Windows are
        extracted then warped to the input dimensions of the net.

        Take
        images_windows: (image filename, window list) iterable.
        context_crop: size of context border to crop in pixels.

        Give
        detections: list of {filename: image filename, window: crop coordinates,
            predictions: prediction vector} dicts.
        """
        images_windows = []

        image_fl = img_as_float(image)
        t0 = time.time()
        for bb in bbs:
            bb = np.array((bb[1], bb[0], bb[3], bb[2]))
            images_windows.append((self.crop(image_fl, bb), bb))
        t1 = time.time()
        print "Bounding boxes cropping: {0:.2f}s.".format(t1 - t0)

        return images_windows

    def get_predictions_from_cropped_images(self, images_windows):

        # Run through the net (warping windows to input dimensions).
        caffe_in = np.zeros(
            (len(images_windows), images_windows[0][0].shape[2]) +
            self.blobs[self.inputs[0]].data.shape[2:],
            dtype=np.float32)
        bbs = []
        for ix, (window_in, bb) in enumerate(images_windows):
            caffe_in[ix] = self.preprocess(self.inputs[0], window_in)
            bbs.append(bb)
        out = self.forward_all(**{self.inputs[0]: caffe_in})
        predictions = out[self.outputs[0]].squeeze(axis=(2, 3))

        # Package predictions with images and windows.
        detections = []
        ix = 0
        for bb in bbs:
            detections.append({
                'window': bb,
                'prediction': predictions[ix],
            })
            ix += 1
        return detections, predictions

    def crop(self, im, window):
        """
        Crop a window from the image for detection. Include surrounding context
        according to the `context_pad` configuration.

        Take
        im: H x W x K image ndarray to crop.
        window: bounding box coordinates as ymin, xmin, ymax, xmax.

        Give
        crop: cropped window.
        """
        # Crop window from the image.
        crop = im[window[0]:window[2], window[1]:window[3]]

        if self.context_pad:
            box = window.copy()
            crop_size = self.blobs[self.inputs[0]].width  # assumes square
            scale = crop_size / (1. * crop_size - self.context_pad * 2)
            # Crop a box + surrounding context.
            half_h = (box[2] - box[0] + 1) / 2.
            half_w = (box[3] - box[1] + 1) / 2.
            center = (box[0] + half_h, box[1] + half_w)
            scaled_dims = scale * np.array((-half_h, -half_w, half_h, half_w))
            box = np.round(np.tile(center, 2) + scaled_dims)
            full_h = box[2] - box[0] + 1
            full_w = box[3] - box[1] + 1
            scale_h = crop_size / full_h
            scale_w = crop_size / full_w
            pad_y = round(max(0, -box[0]) * scale_h)  # amount out-of-bounds
            pad_x = round(max(0, -box[1]) * scale_w)

            # Clip box to image dimensions.
            im_h, im_w = im.shape[:2]
            box = np.clip(box, 0., [im_h, im_w, im_h, im_w])
            clip_h = box[2] - box[0] + 1
            clip_w = box[3] - box[1] + 1
            assert (clip_h > 0 and clip_w > 0)
            crop_h = round(clip_h * scale_h)
            crop_w = round(clip_w * scale_w)
            if pad_y + crop_h > crop_size:
                crop_h = crop_size - pad_y
            if pad_x + crop_w > crop_size:
                crop_w = crop_size - pad_x

            # collect with context padding and place in input
            # with mean padding
            context_crop = im[box[0]:box[2], box[1]:box[3]]
            context_crop = caffe.io.resize_image(context_crop,
                                                 (crop_h, crop_w))
            crop = self.crop_mean.copy()
            crop[pad_y:(pad_y + crop_h), pad_x:(pad_x + crop_w)] = context_crop

        return crop

    def configure_crop(self, context_pad):
        """
        Configure amount of context for cropping.
        If context is included, make the special input mean for context padding.

        Take
        context_pad: amount of context for cropping.
        """
        self.context_pad = context_pad
        if self.context_pad:
            raw_scale = self.raw_scale.get(self.inputs[0])
            channel_order = self.channel_swap.get(self.inputs[0])
            # Padding context crops needs the mean in unprocessed input space.
            mean = self.mean.get(self.inputs[0])
            if mean is not None:
                crop_mean = mean.copy().transpose((1, 2, 0))
                if channel_order is not None:
                    channel_order_inverse = [
                        channel_order.index(i)
                        for i in range(crop_mean.shape[2])
                    ]
                    crop_mean = crop_mean[:, :, channel_order_inverse]
                if raw_scale is not None:
                    crop_mean /= raw_scale
                self.crop_mean = crop_mean
            else:
                self.crop_mean = np.zeros(
                    self.blobs[self.inputs[0]].data.shape, dtype=np.float32)
Ejemplo n.º 28
0
 def test_bing_scrape_search_result(self):
     with open('ipbing') as fp:
         bing_search_result = Bing.scrape_search_result(BeautifulSoup(fp, 'html.parser'))
         self.assertEqual(BING_SEARCH_RESULT, bing_search_result)
Ejemplo n.º 29
0
from bing import Bing
from dailymotion import Dailymotion
from duckduckgo import Duckduckgo
from exalead import Exalead
from google import Google
from mojeek import Mojeek
from parsijoo import Parsijoo
from quora import Quora
from yahoo import Yahoo
from yandex import Yandex
from youtube import Youtube

scrapers = {
    'ask': Ask(),
    'baidu': Baidu(),
    'bing': Bing(),
    'dailymotion': Dailymotion(),
    'duckduckgo': Duckduckgo(),
    'exalead': Exalead(),
    'google': Google(),
    'mojeek': Mojeek(),
    'parsijoo': Parsijoo(),
    'quora': Quora(),
    'yahoo': Yahoo(),
    'yandex': Yandex(),
    'youtube': Youtube()
}


def small_test():
    assert isinstance(scrapers['google'].search('fossasia'), list)
Ejemplo n.º 30
0
 def _search(self, page_num):
     key = my_keys.MICROSOFT_API_KEY_2
     bing = Bing(key)
     items = bing.web_search(self.query, page_num,
                             ['Title', 'Url', 'Description'])
     return items
Ejemplo n.º 31
0
 def __init__(self):
     super().__init__()
     self.b = Bing()
     self.appData = AppData()
     self.icon = None
Ejemplo n.º 32
0
from flask import Flask, url_for, render_template, redirect, session, request
from app import app

from bing import Bing
import os

bing = Bing(None)
bing.toggle()


@app.route('/search', methods=["GET"])
def search():
    query = request.args.get("q")

    if not query:
        redirect('/index')

    bing.query = query
    images = bing.get_images()

    return render_template("search.html",
                           images=images,
                           cache_bust=os.path.getmtime("app/static/style.css"))


@app.route('/index', methods=["GET"])
@app.route('/', methods=["GET"])
def index():
    return render_template("search.html",
                           images=[],
                           cache_bust=os.path.getmtime("app/static/style.css"))
Ejemplo n.º 33
0
 def run1(self):
     s = Bing.search(self.search_term)
     return s
Ejemplo n.º 34
0
 def __init__(self, w_1st, sizes_idx, w_2nd, num_bbs_per_size_1st_stage= 130, num_bbs_final = 1500):
 
     self.w_1st = w_1st
     self.sizes_idx = sizes_idx
     self.w_2nd = w_2nd      
     self.bing = Bing(w_1st,sizes_idx,w_2nd, num_bbs_per_size_1st_stage = num_bbs_per_size_1st_stage, num_bbs_final = num_bbs_final)  
Ejemplo n.º 35
0
 def test_bing_scrape_news_result(self):
     with open('ipbingnews') as fp:
         bing_news_result = Bing.scrape_news_result(BeautifulSoup(fp, 'html.parser'))
         self.assertEqual(BING_NEWS_RESULT, bing_news_result)
Ejemplo n.º 36
0
 def test_bing_scrape_news_result(self):
     with open('ipbingnews') as fp:
         bing_news_result = Bing.scrape_news_result(BeautifulSoup(fp))
         self.assertEqual(BING_NEWS_RESULT, bing_news_result)
Ejemplo n.º 37
0
class bing_cluster:
    def __init__(self, cluster_num=10, top_k=10, max_ratio=4, min_size=100):
        logging.info("Init the bing and cluster parameter")
        self.cluster_num = cluster_num
        self.top_k = top_k
        self.max_ratio = max_ratio
        self.min_size = min_size
        self.spectral = cluster.SpectralClustering(n_clusters=self.cluster_num,
                                                   affinity='precomputed')

    def load_bing_model(self, model_file=DEFAULT_MODEL_FILE):
        logging.info("Load Bing Model ...")
        self.bing = Bing(2, 8, 2)
        self.bing.loadTrainModel(model_file)

    def get_bing_of_image(self, image_filename, numPerSz=130):
        boxes = self.bing.getBoxesOfOneImage(image_filename, numPerSz)
        ymins = [s for s in boxes.ymins()]
        ymaxs = [s for s in boxes.ymaxs()]
        xmins = [s for s in boxes.xmins()]
        xmaxs = [s for s in boxes.xmaxs()]
        bing_windows = pd.DataFrame({
            'ymin': ymins,
            'xmin': xmins,
            'ymax': ymaxs,
            'xmax': xmaxs
        })
        return bing_windows

    def get_iou_distance_matrix(self, bing_windows):
        window_size = bing_windows.shape[0]
        y1 = bing_windows["ymin"].values
        x1 = bing_windows["xmin"].values
        y2 = bing_windows["ymax"].values
        x2 = bing_windows["xmax"].values
        w = x2 - x1
        h = y2 - y1
        area = (w * h).astype(float)
        distances = np.zeros((window_size, window_size))
        for i in range(window_size):
            xx1 = np.maximum(x1[i], x1)
            yy1 = np.maximum(y1[i], y1)
            xx2 = np.minimum(x2[i], x2)
            yy2 = np.minimum(y2[i], y2)
            w = np.maximum(0., xx2 - xx1)
            h = np.maximum(0., yy2 - yy1)
            wh = w * h
            distances[i] = wh / (area[i] + area - wh)
        return distances

    def cluster_boxes(self, bing_windows):
        starttime = time.time()
        distance_matrix = self.get_iou_distance_matrix(bing_windows)
        self.spectral.fit(distance_matrix)
        #get top of each cluster
        window_size = bing_windows.shape[0]
        y1 = bing_windows["ymin"].values
        x1 = bing_windows["xmin"].values
        y2 = bing_windows["ymax"].values
        x2 = bing_windows["xmax"].values
        w = x2 - x1
        h = y2 - y1
        area = (w * h).astype(float)
        index_dictionary = {}
        for i in range(window_size):
            #if(area[i]<self.min_size):
            #    continue
            #if(w[i]*1.0/h[i]>self.max_ratio or h[i]*1.0/w[i]>self.max_ratio):
            #    continue
            label = self.spectral.labels_[i]
            if not label in index_dictionary:
                index_dictionary[label] = []
            if len(index_dictionary[label]) >= self.top_k:
                continue
            index_dictionary[label].append(i)
        index_list = []
        #for key in index_dictionary:
        #    index_list.extend(index_dictionary[key])
        while True:
            empty = True
            for key in index_dictionary:
                one_list = index_dictionary[key]
                if len(one_list):
                    empty = False
                    index_list.append(one_list.pop(0))
            if empty:
                break
        bing_windows = pd.DataFrame({
            "ymin": y1[index_list],
            "xmin": x1[index_list],
            "ymax": y2[index_list],
            "xmax": x2[index_list]
        })
        endtime = time.time()
        #logging.info("Cluster spend {:.3f}".format(endtime-starttime))
        return bing_windows
Ejemplo n.º 38
0
from __future__ import print_function
import os, json, sys
from google import Google
from duckduckgo import Duckduckgo
from bing import Bing
from yahoo import Yahoo
scrapers = {
    'g': Google(),
    'b': Bing(),
    'y': Yahoo(),
    'd': Duckduckgo(),
}


def read_in():
    lines = sys.stdin.readlines()
    return json.loads(lines[0])


def small_test():
    assert type(scrapers.google.results_search('fossasia')) is list


def feedgen(query, engine):
    urls = scrapers[engine].results_search(query)
    result = urls
    print(result)
    print(len(result))
    return result
Ejemplo n.º 39
0
class Detector(caffe.Net):
    """
    Detector extends Net for windowed detection by a list of crops or
    selective search proposals.
    """
    def __init__(self, model_file, pretrained_file, gpu=False, mean=None,
                 input_scale=None, raw_scale=None, channel_swap=None,
                 context_pad=None, weights_1st_stage_bing = None, sizes_idx_bing = None,
                 weights_2nd_stage_bing = None, 
                 num_bbs_psz_bing = 130, num_bbs_final_bing = 1500):
        """
        Take
        gpu, mean, input_scale, raw_scale, channel_swap: params for
            preprocessing options.
        context_pad: amount of surrounding context to take s.t. a `context_pad`
            sized border of pixels in the network input image is context, as in
            R-CNN feature extraction.
        """
        caffe.Net.__init__(self, model_file, pretrained_file)
        self.set_phase_test()

        if gpu:
            self.set_mode_gpu()
        else:
            self.set_mode_cpu()

        if mean is not None:
            self.set_mean(self.inputs[0], mean)
        if input_scale is not None:
            self.set_input_scale(self.inputs[0], input_scale)
        if raw_scale is not None:
            self.set_raw_scale(self.inputs[0], raw_scale)
        if channel_swap is not None:
            self.set_channel_swap(self.inputs[0], channel_swap)

        self.configure_crop(context_pad)
        
        if bing_flag and not weights_1st_stage_bing is None and not sizes_idx_bing is None and not weights_2nd_stage_bing is None:
            self.bing = Bing(weights_1st_stage = weights_1st_stage_bing, sizes_idx = sizes_idx_bing ,weights_2nd_stage = weights_2nd_stage_bing, num_bbs_per_size_1st_stage= num_bbs_psz_bing, num_bbs_final = num_bbs_final_bing)
        else:
            self.bing = None
        
    def detect_bing(self, image):

        assert not self.bing is None
        
        if not bing_flag:
            print "Bing detection invoked but error while importing bing module!"
            sys.exit(1)
                
        
        t0 = time.time()
        bbs, scores = self.bing.predict(image)
        t1 = time.time()
        print "Bing prediction: {0:.2f}s.".format(t1-t0)
        images_windows = self.detect_windows(image, bbs)
        
        return self.get_predictions_from_cropped_images(images_windows)
        
    def detect_windows(self, image, bbs):
        """
        Do windowed detection over given images and windows. Windows are
        extracted then warped to the input dimensions of the net.

        Take
        images_windows: (image filename, window list) iterable.
        context_crop: size of context border to crop in pixels.

        Give
        detections: list of {filename: image filename, window: crop coordinates,
            predictions: prediction vector} dicts.
        """
        images_windows = []
        
        image_fl = img_as_float(image)
        t0 = time.time()
        for bb in bbs:
            bb = np.array((bb[1],bb[0],bb[3],bb[2]))
            images_windows.append((self.crop(image_fl, bb), bb))
        t1 = time.time()
        print "Bounding boxes cropping: {0:.2f}s.".format(t1-t0)

        return images_windows
    
    def get_predictions_from_cropped_images(self, images_windows):
        
        # Run through the net (warping windows to input dimensions).
        caffe_in = np.zeros((len(images_windows), images_windows[0][0].shape[2])
                            + self.blobs[self.inputs[0]].data.shape[2:],
                            dtype=np.float32)
        bbs = []
        for ix, (window_in, bb) in enumerate(images_windows):
            caffe_in[ix] = self.preprocess(self.inputs[0], window_in)
            bbs.append(bb)
        out = self.forward_all(**{self.inputs[0]: caffe_in})
        predictions = out[self.outputs[0]].squeeze(axis=(2,3))

        # Package predictions with images and windows.
        detections = []
        ix = 0
        for bb in bbs:
            detections.append({
                'window': bb,
                'prediction': predictions[ix],
            })
            ix += 1
        return detections, predictions

    def crop(self, im, window):
        """
        Crop a window from the image for detection. Include surrounding context
        according to the `context_pad` configuration.

        Take
        im: H x W x K image ndarray to crop.
        window: bounding box coordinates as ymin, xmin, ymax, xmax.

        Give
        crop: cropped window.
        """
        # Crop window from the image.
        crop = im[window[0]:window[2], window[1]:window[3]]

        if self.context_pad:
            box = window.copy()
            crop_size = self.blobs[self.inputs[0]].width  # assumes square
            scale = crop_size / (1. * crop_size - self.context_pad * 2)
            # Crop a box + surrounding context.
            half_h = (box[2] - box[0] + 1) / 2.
            half_w = (box[3] - box[1] + 1) / 2.
            center = (box[0] + half_h, box[1] + half_w)
            scaled_dims = scale * np.array((-half_h, -half_w, half_h, half_w))
            box = np.round(np.tile(center, 2) + scaled_dims)
            full_h = box[2] - box[0] + 1
            full_w = box[3] - box[1] + 1
            scale_h = crop_size / full_h
            scale_w = crop_size / full_w
            pad_y = round(max(0, -box[0]) * scale_h)  # amount out-of-bounds
            pad_x = round(max(0, -box[1]) * scale_w)

            # Clip box to image dimensions.
            im_h, im_w = im.shape[:2]
            box = np.clip(box, 0., [im_h, im_w, im_h, im_w])
            clip_h = box[2] - box[0] + 1
            clip_w = box[3] - box[1] + 1
            assert(clip_h > 0 and clip_w > 0)
            crop_h = round(clip_h * scale_h)
            crop_w = round(clip_w * scale_w)
            if pad_y + crop_h > crop_size:
                crop_h = crop_size - pad_y
            if pad_x + crop_w > crop_size:
                crop_w = crop_size - pad_x

            # collect with context padding and place in input
            # with mean padding
            context_crop = im[box[0]:box[2], box[1]:box[3]]
            context_crop = caffe.io.resize_image(context_crop, (crop_h, crop_w))
            crop = self.crop_mean.copy()
            crop[pad_y:(pad_y + crop_h), pad_x:(pad_x + crop_w)] = context_crop

        return crop

    def configure_crop(self, context_pad):
        """
        Configure amount of context for cropping.
        If context is included, make the special input mean for context padding.

        Take
        context_pad: amount of context for cropping.
        """
        self.context_pad = context_pad
        if self.context_pad:
            raw_scale = self.raw_scale.get(self.inputs[0])
            channel_order = self.channel_swap.get(self.inputs[0])
            # Padding context crops needs the mean in unprocessed input space.
            mean = self.mean.get(self.inputs[0])
            if mean is not None:
                crop_mean = mean.copy().transpose((1,2,0))
                if channel_order is not None:
                    channel_order_inverse = [channel_order.index(i)
                                            for i in range(crop_mean.shape[2])]
                    crop_mean = crop_mean[:,:, channel_order_inverse]
                if raw_scale is not None:
                    crop_mean /= raw_scale
                self.crop_mean = crop_mean
            else:
                self.crop_mean = np.zeros(self.blobs[self.inputs[0]].data.shape,
                                          dtype=np.float32)
Ejemplo n.º 40
0
def create_directory_with_files_from_internet(target, query, file_types=[Bing.TXT_FILE_TYPE], dir_prefix='folder',
                                              levels=1, num_dir_per_level_created=1, num_files_per_dir_created=1,
                                              verbose=False, delay=False):
    """
    Create a hierarchy of directories with files downloaded from the Internet, based on a query.

    :param target: target directory (root of the created hierarchy)
    :param query: query for retrieving the files
    :param file_types: a list of file types to download (splits the total number of files aprox. equally between all file types)
    :param dir_prefix: prefix for created directory names
    :param levels: teh depth of the directory hierarchy created
    :param num_dir_per_level_created: number of directories for each level in the hierarchy (same for all levels)
    :param num_files_per_dir_created: number of files in each directory created (same for all directories)
    :param verbose: print to console a message for each directory and file downloaded
    :param delay: not used
    :return: None
    """
    if not levels or int(levels) <= 0:
        return
    if not num_dir_per_level_created or int(num_dir_per_level_created) <= 0:
        return
    if not num_files_per_dir_created or int(num_files_per_dir_created) <= 0:
        return
    # create target directory if it does not exist
    if not os.path.exists(target):
        os.makedirs(target)

    # make the naming pattern for file
    _, files_count_per_dir, total_files = make_file_pattern('', '', levels, num_dir_per_level_created,
                                                            num_files_per_dir_created)
    # make the naming pattern for directory
    dir_name_pattern, dirs_count_per_level, total_dirs = make_dir_pattern(dir_prefix, levels, num_dir_per_level_created)

    if verbose:
        print 'creating directories (%d) and files (%d)' % (total_dirs, total_files)

    # recursive function to build each level of directories and files
    def create_level(current_target, urls, from_to_list, _num_levels, _current_level, _num_dirs_per_level,
                     _num_files_per_dir, _current_dirs_count, show_details=None):

        _dirs_count = _current_dirs_count
        if _current_level <= 0:
            return _dirs_count

        for i in range(_num_dirs_per_level):
            dir_name = dir_name_pattern % (_num_levels - _current_level, i)
            dir_path = os.path.join(current_target, dir_name)
            # create directory
            try:
                os.makedirs(dir_path)
                _dirs_count += 1
                # num_operations += 1
                if show_details:
                    show_details(dir_path)

                # prepare the files download list
                src_list = []
                for i in range(_num_files_per_dir):
                    src_list.append(urls.pop(0))
                from_to_list.append({'src': src_list, 'dst': dir_path})

                # recurse into the next level
                _dirs_count = create_level(dir_path, urls, from_to_list, _num_levels, _current_level - 1,
                                           _num_dirs_per_level, _num_files_per_dir, _dirs_count,
                                           show_details=show_details)
            except:
                pass

        return _dirs_count

    bing = Bing(KEY)
    urls = []
    files_per_type = total_files / len(file_types)
    remainder = total_files - files_per_type * len(file_types)
    file_tuples = []
    for ft in file_types:
        num = files_per_type + 1 if remainder > 0 else files_per_type
        file_tuples.append((ft, num))
        remainder -= 1

    result_total = 0
    for file_tuple in file_tuples:
        result_per_file_type = 0
        for url in bing.get_files(query, file_tuple[0], file_tuple[1]):
            urls.append(url)
            result_per_file_type += 1
        print 'found %d (%d) results for %s type' % (result_per_file_type, file_tuple[1], file_tuple[0])
        result_total += result_per_file_type
    print 'found %d results for querying "%s"' % (result_total, query)

    show_details = print_file_details if verbose else None
    # create the directory structure and prepare list of files to download
    from_to_list = []
    d = create_level(target, urls, from_to_list, levels, levels, num_dir_per_level_created,
                     num_files_per_dir_created, 0, show_details=show_details)

    show_download_details = print_file_download_details if verbose else None
    # parallel downloading of files in their respctive directories
    f = bing.execute2(from_to_list, display=show_download_details)

    global num_operations
    num_operations = d + f

    if verbose:
        print 'created %d directories with %d files' % (d, f)
Ejemplo n.º 41
0
from bs4 import BeautifulSoup as BS
import logging
import urllib, urllib2
import requests
import ssl
import chardet
import re
from geolcation import Geolocation
from YoutubeSearch import YtubeSearch
from bing import Bing

b = Bing()
print b.searchWeb('akb', {'location': 'JP'})
#jgeo = Geolocation()
#print geo.lookup('129.97.224.225')
'''
re.compile = r'/\[(.*?)\]/'
url1 = "http://en.wikipedia.org/wiki/Tom_cruise"
url2 = "http://en.wikipedia.org/wiki/Cat"
#result = requests.get(url2)

f = open('fake.html')
soup = BS(f)
cont = str(soup.p)
print type(cont)
print type(u'abc')
print type(u'abc'.encode('ascii'))
print type(u'abc'+'abc')

soup = BS(result.text)
ps = []
Ejemplo n.º 42
0
from bs4 import BeautifulSoup as BS
import logging
import urllib,urllib2
import requests
import ssl
import chardet
import re
from geolcation import Geolocation
from YoutubeSearch import YtubeSearch 
from bing import Bing
b = Bing()
print b.searchWeb('akb',{'location':'JP'})
#jgeo = Geolocation()
#print geo.lookup('129.97.224.225')
'''
re.compile = r'/\[(.*?)\]/'
url1 = "http://en.wikipedia.org/wiki/Tom_cruise"
url2 = "http://en.wikipedia.org/wiki/Cat"
#result = requests.get(url2)

f = open('fake.html')
soup = BS(f)
cont = str(soup.p)
print type(cont)
print type(u'abc')
print type(u'abc'.encode('ascii'))
print type(u'abc'+'abc')

soup = BS(result.text)
ps = []
content = ''
Ejemplo n.º 43
0
 def __init__(self):
     self.searchType = {SEARCH_IMAGE:self.__searchImage,
                         SEARCH_WEB:self.__searchWeb}
     self.bing = Bing()