Esempio n. 1
0
def build_score_dict(jsonlistfile):
    score_dict = {}
    json_list = open(jsonlistfile).readlines()
    for line in json_list:
        fields = line.strip().split("/")
        text_id = "_".join(fields[-4:-2])
        time_stamp = fields[-2][-8:]
        if (not score_dict.has_key(text_id)):
            score_dict[text_id] = {}
        json_items = open(line.strip()).readlines()
        for json_item in json_items:
            json_entity = json.loads(json_item.strip())
            if json_entity.has_key("AudioName"):
                audio_id = str(json_entity["AudioName"])
            elif json_entity.has_key("audioName"):
                audio_id = str(json_entity["audioName"])
            else:
                log.Error("bad json file:%s" % json_item)

            if json_entity.has_key("Score"):
                score = float(json_entity["Score"])
            elif json_entity.has_key("score"):
                score = float(json_entity["score"])
            else:
                log.Error("bad json file:%s" % json_item)

            score_id = time_stamp + "_" + audio_id
            if score_dict[text_id].has_key(score_id):
                log.Error("repeated score_id: %s for text_id: %s" %
                          (score_id, text_id))
            score_dict[text_id][score_id] = score
    return score_dict
Esempio n. 2
0
 def _send(self):
     try:
         message = self._MakeMessage()
         smtpObj = self.m_parent.Login()
         smtpObj.sendmail(self.m_sender, self.m_receive, message.as_string())
         smtpObj.quit()
     except smtplib.SMTPException as e:
         log.Error("send mail false", e)
         return False
     except BaseException as e:
         log.Error("sned mail false by other reason",e)
         return False
     return True
Esempio n. 3
0
def get_community_info(cityName, keyword, filter_word=None):
    '''
    cityName: 城市
    keyword: 小区名
    filter_word 过滤关键字, 对region进行匹配
    @return
        返回小区列表
    '''
    global g_session

    data = {
        "cityName": cityName,
        "channel": "xiaoqu",
        "keyword": keyword,
        "query": keyword,
    }

    url = url_encode("https://ajax.api.ke.com/sug/headerSearch", data)
    result, _ = get_url(url, session=g_session)
    result_list = {}
    if is_not_ok(result):
        log.Error("get_community_info url false", cityName, keyword,
                  result.status_code)
        return
    result_data = js2py_val(result.content)
    if result_data["errno"] != 0:
        log.Error("get_community_info not ok", cityName, keyword, keyword)
        return result_list

    if len(result_data["data"]) == 0:
        log.Waring("get_community_info data is nil", cityName, result)
        return result_list
    for data in result_data["data"]["result"]:
        if filter_word and not filter_word in data["region"]:
            log.Info("get_community_info ingore by filter_word", cityName,
                     keyword, data)
            continue
        new_data = {
            "city": cityName,
            "name": data["text"],
            "id": data["id"],
            "region": data["region"],
            "house_url_list": [],
            "house_data": {},
        }
        new_data["house_url_list"] = get_house_list(data["id"])
        result_list[new_data["id"]] = new_data

    return result_list
Esempio n. 4
0
def set_wav(wav_dict, score_dict):
    for text_id in wav_dict.keys():
        if not score_dict.has_key(text_id):
            log.Error("we don't have audio for the text_id:%s"%text_id)

        for wav_id, score in score_dict[text_id].items():
            wav_dict[text_id].setWavList(wav_id, score)
Esempio n. 5
0
 def validatePhonePosition(self):
     for i in range(len(self.phone_position)):
         if self.phone_position[i][0] > self.phone_position[i][1]:
             log.Error("phone position error")
         elif self.phone_position[i][0] == self.phone_position[i][1]:
             log.Warn("this phone only has one frame: " + self.__file_id +
                      " " + str(self.phone_position))
Esempio n. 6
0
def GetPlatform():
  """Return string describing current OS platform."""
  if bool(platform.mac_ver()[0]):
    return PLATFORM_OSX
  elif bool(platform.dist()[0]):
    return PLATFORM_LINUX
  log.Error('Error: Unknown platform type: %s' % type)
  return PLATFORM_DEFAULT
Esempio n. 7
0
def set_wav(wav_dict, score_dict, exclude_dict):
    for text_id in wav_dict.keys():
        if (not score_dict.has_key(text_id)) or (len(exclude_dict[text_id])
                                                 == len(score_dict[text_id])):
            log.Error("we don't have audio for the text_id:%s" % text_id)

        for wav_id, score in score_dict[text_id].items():
            if not wav_id in exclude_dict[text_id]:
                wav_dict[text_id].setWavList(wav_id, score)
Esempio n. 8
0
def innetProduct_dot(matrix_a, matrix_b):
    height1 = matrix_a.shape[0]
    height2 = matrix_b.shape[0]
    distance_matrix = []
    if height1 != height2:
        log.Error("the lenght of two matrixes are not same")
    for i in range(height1):
        distance_matrix.append(np.dot(matrix_a[i, :], matrix_b[i, :]))
    return np.array(distance_matrix)
Esempio n. 9
0
 def getWav1(self, lowscore, highscore, num):
     if (self.__wav_list == None):
         log.Error("wav-score list is null")
     wav_id_list = []
     i = 0
     while len(wav_id_list) < num:
         wav_id_list = []
         for (wav_id, score) in self.__wav_list.items():
             if (score > lowscore and score < highscore):
                 wav_id_list.append(wav_id)
         lowscore -= 2
         highscore += 2
         i += 1
     if i > 1:
         log.Log("retry %d times for sentence %s" % (i - 1, self.__text_id))
     if len(wav_id_list) < num:
         log.Error(
             "not enough wav file meet the requirment of score, has: %d, require: %d"
             % (len(wav_id_list), num))
     return random.sample(wav_id_list, num)
Esempio n. 10
0
    def RunTask(self):
        rm_task = []
        for taskobj in self.m_task:
            if not taskobj.CanRun():
                continue
            taskobj.Run()
            if taskobj.IsOver():
                log.Error("task over", taskobj)
                rm_task.append(taskobj)

        for taskobj in rm_task:
            self.m_task.remove(taskobj)
Esempio n. 11
0
 def getSamplingFunction(self, sampling_type):
     sampling_type_enum = {
         "median": self.medianSampling,
         "mean": self.meanSampling,
         "lastframe": self.lastframeSampling,
         "uniform": self.uniformSampling,
         "uniform_mean": self.uniformMeanSampling
     }
     if sampling_type_enum.has_key(sampling_type):
         return sampling_type_enum[sampling_type]
     else:
         log.Error("No such kind of sampling function")
Esempio n. 12
0
 def getWav2(self, lowscore, highscore):
     if (self.__wav_list == None):
         log.Error("wav-score list is null")
     wav_id_list = []
     for (wav_id, score) in self.__wav_list.items():
         if (score > lowscore and score < highscore):
             wav_id_list.append(wav_id)
     if len(wav_id_list) < 300:
         log.Warn(
             "not enough wav file meet the requirment of score, has: %d, require: %d"
             % (len(wav_id_list), 300))
     return wav_id_list
Esempio n. 13
0
 def Run(self):
     try:
         self.m_trigger.Trigger()
         self.m_func(self, *self.m_args)
     except BaseException as error:
         self.m_status = CTask.__Over
         logging.exception(error)
         log.Error("run task fail", str(self))
     else:
         if self.IsRunOnce():
             self.m_status = CTask.__Over
         elif self.IsRunForver():
             self.m_status = CTask.__None
Esempio n. 14
0
def KLdivergence_dot(matrix_a, matrix_b, symmetrized=1):
    height1 = matrix_a.shape[0]
    height2 = matrix_b.shape[0]
    distance_matrix = []
    if height1 != height2:
        log.Error("the lenght of two matrixes are not same")
    matrix_a_log = np.log(matrix_a)
    matrix_b_log = np.log(matrix_b)
    for i in range(height1):
        res1 = np.dot(matrix_a[i, :], matrix_a_log[i, :]) - np.dot(matrix_a[i, :], matrix_b_log[i, :])
        if (symmetrized):
            res2 = np.dot(matrix_b[i, :], matrix_b_log[i, :]) - np.dot(matrix_b[i, :], matrix_a_log[i, :])
            distance_matrix.append(0.5 * (res1 + res2))
        else:
            distance_matrix.append(res1)
    return np.array(distance_matrix)
Esempio n. 15
0
def MakeArgumentList(args):
  if args is None:
    arg_list = []
  elif isinstance(args, str):
    arg_list = [args]
  else:
    assert isinstance(args, list)
    arg_list = args

  filtered = []
  for idx in range(len(arg_list)):
    if isinstance(arg_list[idx], str):
      filtered.append(arg_list[idx])
    elif isinstance(arg_list[idx], tuple):
      value, allowed_platform = arg_list[idx]
      if IsPlatform(allowed_platform):
        filtered.append(value)
    else:
      log.Error('Unknown argument type')
  return filtered
Esempio n. 16
0
def thread_spike_fund(threadobj, *fund_list):
    ok = 0
    err = 0
    i=0
    l = len(fund_list)
    fund_yeild = {}
    stock_total = {}
    for code in fund_list:
        i+=1
        if i%50 == 0:
            log.Info("thread_spike_fund",threadobj.getName(),i,l-i)
        try:
            data = fund_api.spiker_fund_and_save(code)
            base_data = data["base"]
            if not base_data["type"] in __filter_stop_type:
                continue
            if not tools.is_float(base_data["new_worth_ratio"]):
                continue
            for k,v in base_data["stock"].items():
                if not k in stock_total:
                    stock_total[k] = 0
                stock_total[k]+=1
            fund_yeild[code] = {
                "now": base_data["new_worth_ratio"],
                "history":data["data"]["yield"],
                "name": data["name"]
            }
            ok+=1
        except BaseException as error:
            log.Error("spiker fund false", threadobj.getName(), code, error)
            err+=1
            continue

    return {
        "ok":ok,
        "error":err,
        "data":{
            "stock":stock_total,
            "yeild":fund_yeild,
        },
    }
Esempio n. 17
0
def get_house_info(url, house_data):
    def get_total(data, htree):
        ls = htree.xpath(
            './/div[@class="overview"]//span[@class="total"]/text()')
        if len(ls) == 0 or not tools.is_float(ls[0]):
            log.Waring("get_house_info -> get_total false")
            return
        data["价格"] = tools.tofloat(ls[0])

    def get_info(data, htree):
        #基本属性

        def get_info2(key):
            parttern = './div[@data-component="baseinfo"]//div[@class="introContent"]//div[@class="%s"]//ul//li' % (
                key)
            ls = htree.xpath(parttern)
            if len(ls) == 0:
                log.Waring("get_house_info -> get_info -> get_info2 false",
                           key)
                return
            d = {}
            for li in ls:
                ls1 = li.xpath('./span/text()')

                if len(ls1) > 1 and trim_str(ls1[0]) == "抵押信息":  #特殊处理该处信息
                    ls2 = ls1[1:]
                else:
                    if key == "transaction":  #交易属性
                        ls2 = li.xpath('.//a/text()')
                    else:
                        ls2 = li.xpath('./text()')
                if len(ls1) == 0 or len(ls2) == 0:
                    log.Waring("get_house_info -> get_info base false", ls1,
                               ls2)
                    continue
                k = trim_str(ls1[0])
                v = trim_str(ls2[0])
                if k == "建筑面积" or k == "套内面积":
                    v = tools.tofloat(v.replace("㎡", ""), 5)

                data[k] = v
            #data[key] = d

        get_info2("base")
        get_info2("transaction")
        if "价格" in data and "建筑面积" in data:
            data["均价"] = tools.tofloat(
                float(data["价格"]) / float(data["建筑面积"]), 5)

    r = re.match(__pHouseID, url)
    if not r:
        log.Error("get_house_info no hid", url)
        return None
    house_info = {
        "id": r.groups()[0],
    }
    result, _ = get_url(url, session=g_session)
    if result.status_code != 200:
        log.Waring("request house url false", url)
        return None
    htree = etree.HTML(result.text)

    ls = htree.xpath(
        '//div[@class="sellDetailPage"]//div[@data-component="overviewIntro"]')
    if len(ls) > 0:
        get_total(house_info, ls[0])
    ls = htree.xpath(
        '//div[@class="sellDetailPage"]//div[@class="m-content"]//div[@class="box-l"]'
    )
    if len(ls) > 0:
        get_info(house_info, ls[0])
    house_data[house_info["id"]] = house_info