def __init__(self, radius, minPoint, distance=DDistance.DefaultDistance()): exceptions2.judge_type(distance, DDistance.DDdistance) exceptions2.judge_null(radius) exceptions2.judge_null(minPoint) self.distance = distance self.radius = radius self.minPoint = minPoint
def __init__(self,radius,minPoint,distance = DDistance.DefaultDistance()): exceptions2.judge_type(distance,DDistance.DDdistance) exceptions2.judge_null(radius) exceptions2.judge_null(minPoint) self.distance = distance self.radius = radius self.minPoint = minPoint
def _add_site(self, site): exceptions2.judge_null(site) if isinstance(site, basestring): self.sites.add(site) elif isinstance(site, (list, tuple, set)): self.sites.update(site) else: raise ValueError
def __init__(self, t1, t2, calc_distance=DDistance.DefaultDistance()): exceptions2.judge_null(calc_distance) exceptions2.judge_type(calc_distance, DDistance.DDdistance) exceptions2.judge_type(t1, (int, float)) exceptions2.judge_type(t2, (int, float)) exceptions2.judge_smaller(t2, t1) self.t1 = t1 self.t2 = t2 self._calc_distance = calc_distance
def __init__(self, t1, t2, calc_distance = DDistance.DefaultDistance()): exceptions2.judge_null(calc_distance) exceptions2.judge_type(calc_distance,DDistance.DDdistance) exceptions2.judge_type(t1,(int,float)) exceptions2.judge_type(t2,(int,float)) exceptions2.judge_smaller(t2,t1) self.t1 = t1 self.t2 = t2 self._calc_distance = calc_distance
def entropy(probs): """calc entropy :param:probs:probality array:float array :return:entropy:float """ exceptions2.judge_null(probs) if isinstance(probs, (list, tuple)): return sum([-prob * log(prob, 2) for prob in probs]) elif isinstance(probs, (int, float)): return -probs * log(probs, 2)
def __sub__(self, value): exceptions2.judge_null(value) if isinstance(vector, (list, tuple)): return self._distance(self.vector, vector) elif isinstance(value, Center): return self._distance(self.vector, value.vector) elif hasattr(value, "vector") and isinstance( getattr(value, "vector"), (list, tuple)): return self._distance(self.vector, value.vector) else: raise TypeError
def __init__(self, name, *argv, **kw): self.log_level = kw.get("log_level", "warn") self.logger = log2.get_stream_logger(self.log_level, name) self.name = name self.allow_site = kw.get("allow_site", []) self.logger.info("spider {} init , allow_site {}".format( name, self.allow_site)) self.start_urls = kw.get("start_urls", []) exceptions2.judge_null(self.start_urls) page_processor = kw.get("page_processor") if isinstance(page_processor, (list, tuple)): self.page_processor = page_processor elif isinstance(page_processor, PageProcessor): self.page_processor = [page_processor] else: raise TypeError("page_processor is list or PageProcessor") self.proxy_policy = kw.get("proxy_policy", None) if self.proxy_policy is not None: exceptions2.judge_type(self.proxy_policy, BaseProxyPolicy) self.fetcher = kw.get("fetcher", BaseRequestsFetcher()) self.fetcher.setProxy(self.proxy_policy) self.pipelines = kw.get("pipeline", [ConsolePipeLine()]) self.run_flag = True self.spid = rand2.get_random_seq(10) self.url_pool = kw.get("queue", MemoryFifoQueue(10000)) self.logger.info("init") self.before_crawl = kw.get("before_crawl", []) # before crawl do something self.site_filters = [SiteFilter(site) for site in self.allow_site] url_filters = kw.get("url_filters", []) url_filters.extend(self.site_filters) """sort filter order by init param priority """ sort2.sort_list_object(url_filters, "_priority") self.url_filters = url_filters self.listeners = SpiderListener() self.fetch_coding = kw.get("fetch_coding", None) self.listeners.addListener( kw.get("listeners", [DefaultSpiderListener()])) self.link_extractors = CssSelector("a[href]") self.crawled_filter = kw.get("crawled_filter", None) self.timeout = kw.get("timeout", 120)
def __init__(self, **kw): super(JsonSelector, self).__init__("json", **kw) self.query = kw.get("query", None) exceptions2.judge_null(self.query) self.jpath = JPath(self.query)
def train(self, datas, labels, *argv, **kw): exceptions2.judge_null(datas) exceptions2.judge_null(labels) exceptions2.judge_type(datas, (list, tuple, DataSet.DataSet)) for data, label in zip(datas, labels): self.__train(data, label)
def train(self , datas , labels , *argv , **kw): exceptions2.judge_null(datas) exceptions2.judge_null(labels) exceptions2.judge_type(datas,(list,tuple,DataSet.DataSet)) for data,label in zip(datas,labels): self.__train(data,label)