Esempio n. 1
0
    def parse_1(self, response):
        # 在这里解析新闻类别

        hostname = urlparse(response.url).hostname
        info("hostname ------ " + hostname)

        class_ = hostname.split('.')[0]

        if class_ in class_config.keys():
            cur_count = count_config.get(class_)
            if (cur_count < 1000):
                count_config[class_] += 1

                item = sinanewsItem()
                item['class_'] = class_

                x = self.parse_with_rules(response, self.content_css_rules,
                                          dict)

                item['content'] = x[0]
                item['url'] = response.url
                info('class_ ====== ' + item['class_'])
                yield item

                soup = BeautifulSoup(response.text, 'lxml')
                for a in soup.find_all(
                        'a', {'href': re.compile(".*doc[^/]*shtml$")}):
                    try:
                        new_url = urljoin(response.url, a['href'])
                        yield scrapy.Request(new_url, callback=self.parse_1)
                    except Exception as e:
                        continue
Esempio n. 2
0
def db_eval(db, segmentations, measures, n_jobs=cfg.N_JOBS, verbose=True):
    """
  Evaluate video sequence results.
	Arguments:
		segmentations (list of ndarrya): segmentations masks.
		annotations   (list of ndarrya): ground-truth  masks.
    measure       (char): evaluation metric (J,F,T)
    n_jobs        (int) : number of CPU cores.
  Returns:
    results (dict): [sequence]: per-frame sequence results.
                    [dataset] : aggreated dataset results.
  """

    s_eval = defaultdict(dict)  # sequence evaluation
    d_eval = defaultdict(dict)  # dataset  evaluation

    for measure in measures:
        log.info("Evaluating measure: {}".format(measure))
        for sid in range(len(db)):
            sg = segmentations[sid]
            s_eval[sg.name][measure] = db_eval_sequence(
                sg, db[sg.name].annotations, measure=measure, n_jobs=n_jobs)

        for statistic in cfg.EVAL.STATISTICS:
            raw_data = np.hstack([
                s_eval[sequence][measure][statistic]
                for sequence in s_eval.keys()
            ])
            d_eval[measure][statistic] = float(np.mean(raw_data))

    g_eval = {'sequence': dict(s_eval), 'dataset': dict(d_eval)}

    return g_eval
Esempio n. 3
0
    def parse_blog_detail(self, response):
        info('process blog list response' + str(response.url))
        sel = Selector(response)

        header = sel.css('#main-content article header')
        desc_img = self._prepare_desc_img(header)
        source_created, author, raw_description = self._prepare_copy_right(header)
        description = self._prepare_description(raw_description)

        # content = sel.css('#main-content article div.content div.field-type-text-with-summary')
        title = "".join(sel.css('#main-content h1.page-title').xpath('./text()').extract()).strip()

        item = LoveMattersItem()
        # prepare basic info
        item['base_url'] = self.base_url
        item['source_url'] = response.url
        item['title'] = title

        # prepare content
        result = self.prepare_content(response)
        result.insert(0, desc_img)
        result.insert(0, description)
        item['content'] = json.dumps(result)

        item['author'] = author
        item['source_created'] = source_created

        # prepare comment
        item['comment'] = self.prepare_comment(sel)

        # get comment num
        item['comment_num'] = 0

        # get view num
        item['view_num'] = 0

        # get like num
        item['like_num'] = 0

        # get like num
        item['forward_num'] = 0

        # get play num
        item['play_num'] = 0

        yield item
Esempio n. 4
0
 def process_request(self, r):
     info('process ' + str(r))
     return r
Esempio n. 5
0
 def parse_blog_list(self, response):
     info('process blog list response' + str(response.url))
Esempio n. 6
0
async def on_guild_remove(guild: Guild):
    log.info("Guild removed : {} !".format(guild.name))
    await update_presence()
Esempio n. 7
0
async def on_guild_join(guild: Guild):
    log.info("New Guild : {} !".format(guild.name))
    await update_presence()