Ejemplo n.º 1
0
 def __init__(self, topic_info, log):
     DefaultExtractor.__init__(self, topic_info, log)
     self.lst_schema = [
         u'inside_square', u'deal_type', u'type_building', u'mortgage',
         u'elevator', u'heat', u'own_year', u'house_type', u'unique',
         u'type_xiaoqu', u'property_right'
     ]
Ejemplo n.º 2
0
 def __init__(self, topic_info, log):
     DefaultExtractor.__init__(self, topic_info, log)
     self.quarter_map = {u"-03-31": u"一季", u"-06-30": u"中期", u"-09-30": u"三季", u"-12-31": u"年度", }
     self.year_mode_list  = {u"2014",u"2015",u"2016"}
     self.month_mode_map  = {u"1-3":u"-03-31", u"1-6":u"-06-30",u"1-9":u"-09-30",u"1-12":u"-12-31",u"一季度":u"-03-31",u"一季":u"-03-31",u"中期":u"-06-30",u"三季度":u"-09-30",u"三季":u"-09-30",u"年度":u"-12-31",}
     self.config_path = self.basic_path + "i_entity_extractor/extractors/ssgs_caibao_profit/mapping.conf"
     self.mapping_conf = self.read_config(self.config_path)
     self.profit_config_path = self.basic_path + "i_entity_extractor/extractors/ssgs_caibao_profit/profit_mapping.conf"
     self.profit_mapping_conf = self.read_config(self.profit_config_path)
     self.money_type_list = [u'美元', u'欧元', u'港元', u'港币']
     self.title_map = {u"108":u"利润表",u"110":u"资产负债表",u"111":u"现金流量表",u"112":u"公司综合能力指标"}
 def __init__(self, topic_info, log):
     DefaultExtractor.__init__(self, topic_info, log)
     self.info_dic = {
         u"宗地编号": u"code",
         u"宗地总面积": u"acreage",
         u"宗地面积": u"acreage",
         u"宗地坐落": u"address",
         u"出让年限": u"land_use_year",
         u"保证金": u"margin",
         u"起始价": u"starting_price",
         u"容积率": u"volume_ratio"
     }
Ejemplo n.º 4
0
 def __init__(self, topic_info, log):
     DefaultExtractor.__init__(self, topic_info, log)
     self.config_path = self.basic_path + "i_entity_extractor/extractors/ssgs_caibao/mapping.conf"
     self.mapping_conf = self.read_config()
     self.public_sector_regex = re.compile("\D+")
     self.public_sector_dict = {
         "szmb": u"深市主板",
         "szsme": u"中小企业板",
         "szcn": u"创业板",
         "shmb": u"沪市主板",
         "hkmb": u"香港主板",
         "hkgem": u"香港创业板",
     }
Ejemplo n.º 5
0
 def __init__(self, topic_info, log):
     DefaultExtractor.__init__(self, topic_info, log)
     self.time_map = {
         u"小时": 3600,
         u"分钟": 60,
         u"秒": 1,
     }
     negative_word_conf = self.basic_path + 'i_entity_extractor/dict/negative_word.conf'
     self.negative_word_list = open(negative_word_conf).read().split(
         '\n')[:-1]
     self.negative_word_index = esm.Index()
     for negative_word in self.negative_word_list:
         if negative_word:
             self.negative_word_index.enter(negative_word)
     self.negative_word_index.fix()
Ejemplo n.º 6
0
    def __init__(self, topic_info, log):
        DefaultExtractor.__init__(self, topic_info, log)
        self.config_path = self.basic_path + "i_entity_extractor/extractors/gsxx/mapping.conf"
        self.mapping_conf = self.read_config()
        self.period_regex = re.compile(
            u"(\d{4}.\d{1,2}.\d{1,2}).*?(\d{4}.\d{1,2}.\d{1,2})")
        self.period_regex2 = re.compile(u"\d{4}.\d{1,2}.\d{1,2}")

        self.punctuation_list = [
            '+', '!', '。', ',', '?', '&', '#', '@', '、', '~', '*', '……', '(',
            ')', ';'
        ]
        for special_str in string.punctuation:
            self.punctuation_list.append(special_str)
        self.extract_re = re.compile(
            u'^(.{0,5}名称|企业基本信息:名称|企业\(机构\)名称|名称序号: 企业名称|变更前内容|变更后内容|【变更前内容|【变更后内容)\s{0,1}(:|:|】|\s) {0,3}([^;\:\.]+)'
        )
Ejemplo n.º 7
0
 def __init__(self, topic_info, log):
     DefaultExtractor.__init__(self, topic_info, log)
     self.case_type_map = {
         "1": u"刑事案件",
         "2": u"民事案件",
         "3": u"行政案件",
         "4": u"赔偿案件",
         "5": u"执行案件"
     }
     self.case_id_type_map = {
         u"刑": u"刑事案件",
         u"民": u"民事案件",
         u"商": u"民事案件",
         u"行": u"行政案件",
         u"赔": u"赔偿案件",
         u"执": u"执行案件",
     }
Ejemplo n.º 8
0
    def __init__(self, topic_info, log):
        DefaultExtractor.__init__(self, topic_info, log)
        self.money_regex = re.compile(u'\d+\.\d+万元|\d+万元|\d+\.\d+元|\d+元')
        self.money_regex_chs = re.compile(
            u'[一二三四五六七八九十壹贰叁肆伍陆柒捌玖拾]万\S+元|[一二三四五六七八九十壹贰叁肆伍陆柒捌玖拾]千\S+元|[一二三四五六七八九十壹贰叁肆伍陆柒捌玖拾]百\S+元'
        )
        money_pattern_list = []
        money_pattern_list.append(u'¥\d+\.\d+')
        for keyword in shixin_conf.money_keyword_list:
            pattern = keyword + '\d+\.\d+万'
            money_pattern_list.append(pattern)
            pattern = keyword + '\d+万'
            money_pattern_list.append(pattern)
            pattern = keyword + '\d+\.\d+'
            money_pattern_list.append(pattern)
            pattern = keyword + '\d+'
            money_pattern_list.append(pattern)

        money_patterns = '|'.join(money_pattern_list)

        self.money_regex_last = re.compile(money_patterns)

        self.money_regex3 = re.compile(u'\d+\.\d+|\d+')
Ejemplo n.º 9
0
 def __init__(self, topic_info, log):
     DefaultExtractor.__init__(self, topic_info, log)
Ejemplo n.º 10
0
 def __init__(self, topic_info, log):
     DefaultExtractor.__init__(self, topic_info, log)
     self.seps = [',', ':', '\t']
     self.parser_obj = CommonParser(self.parser_tool, log)
Ejemplo n.º 11
0
 def __init__(self, topic_info, log):
     DefaultExtractor.__init__(self, topic_info, log)
     self.litiants_seps = [',', ':', ',', ':', '。', '、', ";", ";", '\t']
Ejemplo n.º 12
0
 def __init__(self, topic_info, log):
     DefaultExtractor.__init__(self, topic_info, log)
     self.src_table = 'stock_info'
Ejemplo n.º 13
0
 def __init__(self, topic_info, log):
     DefaultExtractor.__init__(self, topic_info, log)
     self.lst_schema = [u'building_usage', u'years', u'decoration']
Ejemplo n.º 14
0
 def __init__(self, topic_info, log):
     DefaultExtractor.__init__(self, topic_info, log)
     self.config_path  = self.basic_path + "i_entity_extractor/extractors/annual_reports/mapping.conf"
     self.mapping_conf = self.read_config()
Ejemplo n.º 15
0
 def __init__(self, topic_info, log):
     DefaultExtractor.__init__(self, topic_info, log)
     self.lst_schema = [u'building_usage', u'years', u'decoration']
     self.province_parser = ProvinceParser(province_city, phone_city,
                                           region_city, city_city)
Ejemplo n.º 16
0
 def __init__(self, topic_info, log):
     DefaultExtractor.__init__(self, topic_info, log)
     self.lst_cfg_area = [u'province', u'city', u'county']