Ejemplo n.º 1
0
 def __init__(self, radius, minPoint, distance=DDistance.DefaultDistance()):
     exceptions2.judge_type(distance, DDistance.DDdistance)
     exceptions2.judge_null(radius)
     exceptions2.judge_null(minPoint)
     self.distance = distance
     self.radius = radius
     self.minPoint = minPoint
Ejemplo n.º 2
0
 def __init__(self,radius,minPoint,distance = DDistance.DefaultDistance()):
     exceptions2.judge_type(distance,DDistance.DDdistance)
     exceptions2.judge_null(radius)
     exceptions2.judge_null(minPoint)
     self.distance = distance
     self.radius = radius
     self.minPoint = minPoint
Ejemplo n.º 3
0
 def _add_site(self, site):
     exceptions2.judge_null(site)
     if isinstance(site, basestring):
         self.sites.add(site)
     elif isinstance(site, (list, tuple, set)):
         self.sites.update(site)
     else:
         raise ValueError
Ejemplo n.º 4
0
 def __init__(self, t1, t2, calc_distance=DDistance.DefaultDistance()):
     exceptions2.judge_null(calc_distance)
     exceptions2.judge_type(calc_distance, DDistance.DDdistance)
     exceptions2.judge_type(t1, (int, float))
     exceptions2.judge_type(t2, (int, float))
     exceptions2.judge_smaller(t2, t1)
     self.t1 = t1
     self.t2 = t2
     self._calc_distance = calc_distance
Ejemplo n.º 5
0
 def __init__(self, t1, t2, calc_distance = DDistance.DefaultDistance()):
     exceptions2.judge_null(calc_distance) 
     exceptions2.judge_type(calc_distance,DDistance.DDdistance)
     exceptions2.judge_type(t1,(int,float))
     exceptions2.judge_type(t2,(int,float))
     exceptions2.judge_smaller(t2,t1)
     self.t1 = t1
     self.t2 = t2
     self._calc_distance = calc_distance
Ejemplo n.º 6
0
def entropy(probs):
    """calc entropy
        :param:probs:probality array:float array
        :return:entropy:float
    """
    exceptions2.judge_null(probs)
    if isinstance(probs, (list, tuple)):
        return sum([-prob * log(prob, 2) for prob in probs])
    elif isinstance(probs, (int, float)):
        return -probs * log(probs, 2)
Ejemplo n.º 7
0
 def __sub__(self, value):
     exceptions2.judge_null(value)
     if isinstance(vector, (list, tuple)):
         return self._distance(self.vector, vector)
     elif isinstance(value, Center):
         return self._distance(self.vector, value.vector)
     elif hasattr(value, "vector") and isinstance(
             getattr(value, "vector"), (list, tuple)):
         return self._distance(self.vector, value.vector)
     else:
         raise TypeError
Ejemplo n.º 8
0
 def __init__(self, name, *argv, **kw):
     self.log_level = kw.get("log_level", "warn")
     self.logger = log2.get_stream_logger(self.log_level, name)
     self.name = name
     self.allow_site = kw.get("allow_site", [])
     self.logger.info("spider {} init , allow_site {}".format(
         name, self.allow_site))
     self.start_urls = kw.get("start_urls", [])
     exceptions2.judge_null(self.start_urls)
     page_processor = kw.get("page_processor")
     if isinstance(page_processor, (list, tuple)):
         self.page_processor = page_processor
     elif isinstance(page_processor, PageProcessor):
         self.page_processor = [page_processor]
     else:
         raise TypeError("page_processor is list or PageProcessor")
     self.proxy_policy = kw.get("proxy_policy", None)
     if self.proxy_policy is not None:
         exceptions2.judge_type(self.proxy_policy, BaseProxyPolicy)
     self.fetcher = kw.get("fetcher", BaseRequestsFetcher())
     self.fetcher.setProxy(self.proxy_policy)
     self.pipelines = kw.get("pipeline", [ConsolePipeLine()])
     self.run_flag = True
     self.spid = rand2.get_random_seq(10)
     self.url_pool = kw.get("queue", MemoryFifoQueue(10000))
     self.logger.info("init")
     self.before_crawl = kw.get("before_crawl",
                                [])  # before crawl do something
     self.site_filters = [SiteFilter(site) for site in self.allow_site]
     url_filters = kw.get("url_filters", [])
     url_filters.extend(self.site_filters)
     """sort filter order by init param priority
     """
     sort2.sort_list_object(url_filters, "_priority")
     self.url_filters = url_filters
     self.listeners = SpiderListener()
     self.fetch_coding = kw.get("fetch_coding", None)
     self.listeners.addListener(
         kw.get("listeners", [DefaultSpiderListener()]))
     self.link_extractors = CssSelector("a[href]")
     self.crawled_filter = kw.get("crawled_filter", None)
     self.timeout = kw.get("timeout", 120)
Ejemplo n.º 9
0
 def __init__(self, **kw):
     super(JsonSelector, self).__init__("json", **kw)
     self.query = kw.get("query", None)
     exceptions2.judge_null(self.query)
     self.jpath = JPath(self.query)
Ejemplo n.º 10
0
 def train(self, datas, labels, *argv, **kw):
     exceptions2.judge_null(datas)
     exceptions2.judge_null(labels)
     exceptions2.judge_type(datas, (list, tuple, DataSet.DataSet))
     for data, label in zip(datas, labels):
         self.__train(data, label)
Ejemplo n.º 11
0
 def train(self , datas , labels , *argv , **kw):
     exceptions2.judge_null(datas)
     exceptions2.judge_null(labels)
     exceptions2.judge_type(datas,(list,tuple,DataSet.DataSet))
     for data,label in zip(datas,labels):
         self.__train(data,label)